From 981401fb095f304020b6cf3f84f44b78b09e99a0 Mon Sep 17 00:00:00 2001
From: Jan Vales <jan@jvales.net>
Date: Sat, 6 Apr 2019 00:49:07 +0200
Subject: [PATCH] Ex1.4 a-c

---
 ex1/main_4.tex | 195 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 161 insertions(+), 34 deletions(-)

diff --git a/ex1/main_4.tex b/ex1/main_4.tex
index f674397..e4c3c23 100644
--- a/ex1/main_4.tex
+++ b/ex1/main_4.tex
@@ -1,51 +1,178 @@
 %ex1.4
 
 \begin{enumerate}[label=(\alph*)]
-\item Hash Join, then Merge Join
+\item Hash Join in Merge Join.\\
+EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) SELECT a,b,c FROM r NATURAL JOIN s NATURAL JOIN t;\\
 \begin{verbatim}
-Merge Join  (cost=126688.84..189746.85 rows=4812309 width=12) (actual time=1235.201..3507.532 rows=4790517 loops=1)
+Merge Join (cost=126688.84..189746.85 rows=4812309 width=12)
+      (actual time=1223.252..3564.720 rows=4790517 loops=1)
   Output: r.a, r.b, s.c
   Merge Cond: ((t.a = r.a) AND (t.c = s.c))
   Buffers: shared hit=138, temp read=1320 written=3593
-  ->  Sort  (cost=809.39..834.39 rows=10000 width=8) (actual time=15.180..17.809 rows=10000 loops=1)
+  -> Sort (cost=809.39..834.39 rows=10000 width=8)
+     (actual time=6.542..9.442 rows=10000 loops=1)
+    Output: t.a, t.c
+    Sort Key: t.a, t.c
+    Sort Method: quicksort Memory: 853kB
+    Buffers: shared hit=45
+    -> Seq Scan on public.t (cost=0.00..145.00 rows=10000 width=8)
+                     (actual time=0.013..1.698 rows=10000 loops=1)
         Output: t.a, t.c
-        Sort Key: t.a, t.c
-        Sort Method: quicksort  Memory: 853kB
         Buffers: shared hit=45
-        ->  Seq Scan on public.t  (cost=0.00..145.00 rows=10000 width=8) (actual time=0.036..4.271 rows=10000 loops=1)
-              Output: t.a, t.c
-              Buffers: shared hit=45
-  ->  Materialize  (cost=125878.37..130770.59 rows=978445 width=12) (actual time=1220.009..1810.482 rows=4790518 loops=1)
+  -> Materialize (cost=125878.37..130770.59 rows=978445 width=12)
+            (actual time=1216.698..1848.300 rows=4790518 loops=1)
+    Output: r.a, r.b, s.c
+    Buffers: shared hit=93, temp read=1320 written=3593
+    -> Sort (cost=125878.37..128324.48 rows=978445 width=12)
+       (actual time=1216.694..1265.485 rows=105629 loops=1)
         Output: r.a, r.b, s.c
+        Sort Key: r.a, s.c
+        Sort Method: external merge Disk: 21312kB
         Buffers: shared hit=93, temp read=1320 written=3593
-        ->  Sort  (cost=125878.37..128324.48 rows=978445 width=12) (actual time=1220.003..1267.097 rows=105629 loops=1)
-              Output: r.a, r.b, s.c
-              Sort Key: r.a, s.c
-              Sort Method: external merge  Disk: 21312kB
-              Buffers: shared hit=93, temp read=1320 written=3593
-              ->  Hash Join  (cost=270.00..11799.45 rows=978445 width=12) (actual time=7.133..326.364 rows=993774 loops=1)
-                    Output: r.a, r.b, s.c
-                    Hash Cond: (r.b = s.b)
-                    Buffers: shared hit=93
-                    ->  Seq Scan on public.r  (cost=0.00..145.00 rows=10000 width=8) (actual time=0.023..2.157 rows=10000 loops=1)
-                          Output: r.a, r.b
-                          Buffers: shared hit=45
-                    ->  Hash  (cost=145.00..145.00 rows=10000 width=8) (actual time=7.022..7.022 rows=10000 loops=1)
-                          Output: s.c, s.b
-                          Buckets: 16384  Batches: 1  Memory Usage: 519kB
-                          Buffers: shared hit=45
-                          ->  Seq Scan on public.s  (cost=0.00..145.00 rows=10000 width=8) (actual time=0.021..3.459 rows=10000 loops=1)
-                                Output: s.c, s.b
-                                Buffers: shared hit=45
-Planning time: 0.899 ms
-Execution time: 4040.426 ms
-
+        -> Hash Join (cost=270.00..11799.45 rows=978445 width=12)
+                (actual time=5.707..326.759 rows=993774 loops=1)
+          Output: r.a, r.b, s.c
+          Hash Cond: (r.b = s.b)
+          Buffers: shared hit=93
+          -> Seq Scan on public.r (cost=0.00..145.00 rows=10000 width=8)
+                           (actual time=0.013..2.200 rows=10000 loops=1)
+              Output: r.a, r.b
+              Buffers: shared hit=45
+          -> Hash  (cost=145.00..145.00 rows=10000 width=8)
+              (actual time=5.636..5.636 rows=10000 loops=1)
+              Output: s.c, s.b
+              Buckets: 16384  Batches: 1 Memory Usage: 519kB
+              Buffers: shared hit=45
+              -> Seq Scan on public.s (cost=0.00..145.00 rows=10000 width=8)
+                               (actual time=0.012..2.656 rows=10000 loops=1)
+                Output: s.c, s.b
+                Buffers: shared hit=45
+Planning time: 0.448 ms
+Execution time: 4093.957 ms
 \end{verbatim}
 
+
 \item
+set enable\_hashjoin=1; set enable\_mergejoin=0; set enable\_nestloop=0;
+\begin{verbatim}
+Hash Join (cost=565.00..382720.42 rows=4812309 width=12)
+    (actual time=10.065..2336.284 rows=4790517 loops=1)
+Execution time: 2839.835 ms
+\end{verbatim}
+
+
+set enable\_hashjoin=0; set enable\_mergejoin=1; set enable\_nestloop=0;
+\begin{verbatim}
+Merge Join (cost=127309.80..190367.82 rows=4812309 width=12)
+      (actual time=1400.115..3961.542 rows=4790517 loops=1)
+Planning time: 0.374 ms
+Execution time: 4215.626 ms
+\end{verbatim}
+
+set enable\_hashjoin=0; set enable\_mergejoin=0; set enable\_nestloop=1;
+\begin{verbatim}
+Nested Loop (cost=0.00..172728360.00 rows=4812309 width=12)
+Execution time: query-timeout
+\end{verbatim}
+
+TODO: Explain perfomance diffrences.
+
+TODO: change create numbers to change performance. -> (non)duplicate stuff?
+
+\item create indices.
+\begin{verbatim}
+create index i1 on r(a);
+create index i2 on r(b);
+create index i3 on s(b);
+create index i4 on s(c);
+create index i5 on t(a);
+create index i6 on t(c);
+
+create index ii1 on r(a,b);
+create index ii2 on s(b,c);
+create index ii3 on t(a,c);
+
+REINDEX DATABASE u00726236;
+\end{verbatim}
+
+\newpage
+New query plan for merge join.
+\begin{verbatim}
+Merge Join (cost=126115.03..189598.35 rows=4812309 width=12)
+      (actual time=1471.775..3798.577 rows=4790517 loops=1)
+  Output: r.a, r.b, s.c
+  Merge Cond: ((t.a = r.a) AND (t.c = s.c))
+  Buffers: shared hit=7289 read=58, temp read=1325 written=3598
+  ->  Index Only Scan using ii3 on public.t (cost=0.29..449.50 rows=10000 width=8)
+                                     (actual time=0.034..8.250 rows=10000 loops=1)
+    Output: t.a, t.c
+    Heap Fetches: 10000
+    Buffers: shared hit=6299 read=29
+  ->  Materialize (cost=126114.75..131006.97 rows=978445 width=12)
+             (actual time=1471.732..2093.037 rows=4790518 loops=1)
+    Output: r.a, r.b, s.c
+    Buffers: shared hit=990 read=29, temp read=1325 written=3598
+    ->  Sort (cost=126114.75..128560.86 rows=978445 width=12)
+        (actual time=1471.727..1519.766 rows=105629 loops=1)
+        Output: r.a, r.b, s.c
+        Sort Key: r.a, s.c
+        Sort Method: external merge  Disk: 21312kB
+        Buffers: shared hit=990 read=29, temp read=1325 written=3598
+        ->  Merge Join (cost=809.67..12035.83 rows=978445 width=12)
+                  (actual time=5.865..568.309 rows=993774 loops=1)
+          Output: r.a, r.b, s.c
+          Merge Cond: (r.b = s.b)
+          Buffers: shared hit=990 read=29
+          ->  Index Scan using i2 on public.r (cost=0.29..449.80 rows=10000 width=8)
+                                       (actual time=0.026..7.013 rows=10000 loops=1)
+              Output: r.a, r.b
+              Buffers: shared hit=945 read=29
+          ->  Sort (cost=809.39..834.39 rows=10000 width=8)
+            (actual time=5.836..189.024 rows=993775 loops=1)
+              Output: s.c, s.b
+              Sort Key: s.b
+              Sort Method: quicksort Memory: 853kB
+              Buffers: shared hit=45
+              ->  Seq Scan on public.s (cost=0.00..145.00 rows=10000 width=8)
+                                (actual time=0.013..2.654 rows=10000 loops=1)
+                Output: s.c, s.b
+                Buffers: shared hit=45
+Planning time: 1.060 ms
+Execution time: 4323.022 ms
+\end{verbatim}
+
+It seems like no performance improvement is made: Query planning and execution time is bigger with indices.
 
-Only Merge join: Total cost: 190481.63
-Only Hash join: Total cost: 378418.91
-Only Nestloop: Total cost: 174352535.00
+
+New query plan for merge join.
+\begin{verbatim}
+Nested Loop (cost=0.57..498778.26 rows=4812309 width=12)
+     (actual time=0.096..8351.002 rows=4790517 loops=1)
+  Output: r.a, r.b, s.c
+  Buffers: shared hit=7476815 read=11
+  ->  Nested Loop (cost=0.29..30520.00 rows=1007007 width=12)
+          (actual time=0.076..1017.339 rows=987869 loops=1)
+        Output: r.a, r.b, t.c
+        Buffers: shared hit=950972 read=4
+        ->  Seq Scan on public.t (cost=0.00..145.00 rows=10000 width=8)
+                          (actual time=0.015..2.720 rows=10000 loops=1)
+              Output: t.a, t.c
+              Buffers: shared hit=45
+        ->  Index Only Scan using ii1 on public.r (cost=0.29..2.05 rows=99 width=8)
+                                         (actual time=0.007..0.076 rows=99 loops=10000)
+              Output: r.a, r.b
+              Index Cond: (r.a = t.a)
+              Heap Fetches: 987869
+              Buffers: shared hit=950927 read=4
+  ->  Index Only Scan using ii2 on public.s (cost=0.29..0.41 rows=5 width=8)
+                                   (actual time=0.003..0.006 rows=5 loops=987869)
+        Output: s.b, s.c
+        Index Cond: ((s.b = r.b) AND (s.c = t.c))
+        Heap Fetches: 4790517
+        Buffers: shared hit=6525843 read=7
+Planning time: 0.604 ms
+Execution time: 8884.429 ms
+\end{verbatim}
+Suddenly nested loop joins become feasible.
 
 \end{enumerate}
-- 
2.43.0