From c0559e083acb81d4589f3aebfe3f65dec33da88e Mon Sep 17 00:00:00 2001
From: Jan Vales <jan@jvales.net>
Date: Sat, 6 Apr 2019 02:52:34 +0200
Subject: [PATCH] ex 1.4 complete?

---
 ex1/main_4.tex | 132 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/ex1/main_4.tex b/ex1/main_4.tex
index e4c3c23..a8bb3bd 100644
--- a/ex1/main_4.tex
+++ b/ex1/main_4.tex
@@ -175,4 +175,136 @@ Execution time: 8884.429 ms
 \end{verbatim}
 Suddenly nested loop joins become feasible.
 
+
+\item RESET ALL; EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) SELECT a,b,c FROM r NATURAL JOIN t WHERE b IN (SELECT s.b FROM s);\\
+\begin{verbatim}
+Hash Join (cost=578.59..12368.66 rows=1007007 width=12)
+    (actual time=25.336..340.303 rows=987869 loops=1)
+  Output: r.a, r.b, t.c
+  Hash Cond: (t.a = r.a)
+  Buffers: shared hit=135
+  ->  Seq Scan on public.t (cost=0.00..145.00 rows=10000 width=8)
+                    (actual time=0.035..1.820 rows=10000 loops=1)
+    Output: t.a, t.c
+    Buffers: shared hit=45
+  ->  Hash (cost=453.59..453.59 rows=10000 width=8)
+    (actual time=25.235..25.235 rows=10000 loops=1)
+    Output: r.a, r.b
+    Buckets: 16384 Batches: 1 Memory Usage: 519kB
+    Buffers: shared hit=90
+    ->  Hash Join (cost=172.27..453.59 rows=10000 width=8)
+           (actual time=14.607..22.222 rows=10000 loops=1)
+        Output: r.a, r.b
+        Hash Cond: (r.b = s.b)
+        Buffers: shared hit=90
+        ->  Seq Scan on public.r (cost=0.00..145.00 rows=10000 width=8)
+                          (actual time=0.026..1.943 rows=10000 loops=1)
+          Output: r.a, r.b
+          Buffers: shared hit=45
+        ->  Hash (cost=171.01..171.01 rows=101 width=4)
+          (actual time=14.558..14.559 rows=101 loops=1)
+          Output: s.b
+          Buckets: 1024 Batches: 1 Memory Usage: 12kB
+          Buffers: shared hit=45
+          ->  HashAggregate (cost=170.00..171.01 rows=101 width=4)
+                     (actual time=14.423..14.486 rows=101 loops=1)
+              Output: s.b
+              Group Key: s.b
+              Buffers: shared hit=45
+              ->  Seq Scan on public.s (cost=0.00..145.00 rows=10000 width=4)
+                                (actual time=0.028..6.220 rows=10000 loops=1)
+                Output: s.b
+                Buffers: shared hit=45
+Planning time: 0.626 ms
+Execution time: 439.179 ms
+\end{verbatim}
+
+It now takes only 439.7 ms to execute the query, instead of the previous 4323 ms.
+
+
+\item This was the case with (d) already.\\
+But if we create indices we get a merge join + hash semi join combination, which is a little bit slower.
+\begin{verbatim}
+Merge Join (cost=1118.26..12199.68 rows=1007007 width=12)
+      (actual time=23.151..517.241 rows=987869 loops=1)
+  Output: r.a, r.b, t.c
+  Merge Cond: (t.a = r.a)
+  Buffers: shared hit=585 read=29
+  ->  Index Scan using i5 on public.t (cost=0.29..448.89 rows=10000 width=8)
+                               (actual time=0.038..4.266 rows=10000 loops=1)
+    Output: t.a, t.c
+    Buffers: shared hit=495 read=29
+  ->  Sort (cost=1117.98..1142.98 rows=10000 width=8)
+     (actual time=23.107..186.813 rows=987870 loops=1)
+    Output: r.a, r.b
+    Sort Key: r.a
+    Sort Method: quicksort Memory: 853kB
+    Buffers: shared hit=90
+    ->  Hash Join (cost=172.27..453.59 rows=10000 width=8)
+           (actual time=11.223..19.180 rows=10000 loops=1)
+        Output: r.a, r.b
+        Hash Cond: (r.b = s.b)
+        Buffers: shared hit=90
+        ->  Seq Scan on public.r (cost=0.00..145.00 rows=10000 width=8)
+                          (actual time=0.019..2.069 rows=10000 loops=1)
+          Output: r.a, r.b
+          Buffers: shared hit=45
+        ->  Hash (cost=171.01..171.01 rows=101 width=4)
+          (actual time=11.183..11.183 rows=101 loops=1)
+          Output: s.b
+          Buckets: 1024 Batches: 1 Memory Usage: 12kB
+          Buffers: shared hit=45
+          ->  HashAggregate (cost=170.00..171.01 rows=101 width=4)
+                     (actual time=11.079..11.126 rows=101 loops=1)
+              Output: s.b
+              Group Key: s.b
+              Buffers: shared hit=45
+              ->  Seq Scan on public.s (cost=0.00..145.00 rows=10000 width=4)
+                                (actual time=0.019..4.724 rows=10000 loops=1)
+                Output: s.b
+                Buffers: shared hit=45
+Planning time: 1.320 ms
+Execution time: 624.356 ms
+\end{verbatim}
+
+
+\item RESET ALL; EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) SELECT a,b,c FROM r NATURAL JOIN s WHERE a IN (SELECT t.a FROM t);\\
+Without index: hash join + hash semi join
+\begin{verbatim}
+Hash Join (cost=440.25..1943.04 rows=112490 width=12)
+    (actual time=20.751..57.164 rows=105628 loops=1)
+Planning time: 0.836 ms
+Execution time: 67.660 ms
+\end{verbatim}
+
+With index: hash join + merge join.
+\begin{verbatim}
+Hash Join  (cost=440.59..1817.14 rows=112490 width=12)
+     (actual time=19.917..53.721 rows=105628 loops=1)
+Planning time: 1.938 ms
+Execution time: 65.131 ms
+\end{verbatim}
+
+
+RESET ALL; EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) SELECT a,b,c FROM s NATURAL JOIN t WHERE a IN (SELECT r.a FROM r);
+Without index: hash join + hash semi join
+\begin{verbatim}
+Hash Join  (cost=442.27..56192.50 rows=4884102 width=12)
+    (actual time=17.134..1482.398 rows=4884102 loops=1)
+Planning time: 0.547 ms
+Execution time: 1980.768 ms
+\end{verbatim}
+
+With index: hash join + hash join.
+\begin{verbatim}
+Hash Join (cost=442.27..56192.50 rows=4884102 width=12)
+   (actual time=17.631..1541.867 rows=4884102 loops=1)
+Planning time: 0.978 ms
+Execution time: 2040.407 ms
+\end{verbatim}
+
+TODO: Discuss why.\\
+-> Indizes quasi irrelevant.\\
+-> performance unterschiede weil unterschiedliche set mächtigkeiten?
+
 \end{enumerate}
-- 
2.43.0