From 0df3e268b6355c0d0a06a3ab94891afc01f4feb9 Mon Sep 17 00:00:00 2001
From: Jan Vales <jan@jvales.net>
Date: Mon, 8 Apr 2019 02:28:56 +0200
Subject: [PATCH] added original queries.

---
 ex1/main_5.tex | 161 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 112 insertions(+), 49 deletions(-)

diff --git a/ex1/main_5.tex b/ex1/main_5.tex
index ab0e748..c7273be 100644
--- a/ex1/main_5.tex
+++ b/ex1/main_5.tex
@@ -2,54 +2,87 @@
 
 \begin{enumerate}[label=(\alph*)]
 % (a)
-\item
+\item\begin{verbatim}
+SELECT DISTINCT displayname FROM users u
+WHERE id IN (SELECT owneruserid FROM posts p WHERE p.viewcount > u.views);
+\end{verbatim}
+
+Original results
+\begin{verbatim}
+Planning time: 0.978 ms
+Execution time: 86179.847 ms
+\end{verbatim}
+
+
 Optimized query:
 \begin{verbatim}
-SELECT distinct(displayname) FROM posts p
+SELECT DISTINCT displayname FROM posts p
     JOIN users u ON p.owneruserid = u.id
     WHERE p.viewcount > u.views;
 \end{verbatim}
 
 Improvements:\\
-Just converted the selection to a JOIN.
-This improves the $n_{users} + n_{users} * n_{posts}$ Seq Scans
-to one JOIN and two Seq Scans.
+Converted the selection to a JOIN.\\
+This improves the $n_{users} + n_{users} * n_{posts}$ Seq Scans to one JOIN and two Seq Scans.
 
-Original version:
-\begin{verbatim}
- Planning time: 0.978 ms
- Execution time: 86179.847 ms
-\end{verbatim}
-
-Optimized version:
 \begin{verbatim}
  Planning time: 0.960 ms
  Execution time: 37.354 ms
 \end{verbatim}
 
+
+
 % (b)
-\item
-Optimized query:
+\item\begin{verbatim}
+SELECT score FROM comments WHERE lower(substring(text for 3)) = 'yes';
+\end{verbatim}
+
+Original results
+\begin{verbatim}
+ Planning time: 0.540 ms
+ Execution time: 60.946 ms
+\end{verbatim}
+
+
+Attempt at optimization
 \begin{verbatim}
 SELECT score FROM comments WHERE text ILIKE 'yes%';
 \end{verbatim}
 
-Improvements:\\
-None so far.
 
-Original version:
+If we are allowed to create indexes
 \begin{verbatim}
- Planning time: 0.540 ms
- Execution time: 60.946 ms
+CREATE INDEX ON comments (lower(text) varchar_pattern_ops)
+REINDEX DATABASE u00726236; VACUUM ANALYZE;
+EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
+SELECT score FROM comments WHERE lower(text) like 'yes%';
 \end{verbatim}
 
-Optimized version:
+Results
 \begin{verbatim}
-TODO, current query is slower..
+Bitmap Heap Scan on public.comments  (cost=17.71..321.40 rows=114 width=4)
+                               (actual time=0.462..5.917 rows=363 loops=1)
+Planning time: 0.218 ms
+Execution time: 6.069 ms
 \end{verbatim}
 
-% (b)
-\item
+
+
+% (c)
+\item\begin{verbatim}
+SELECT DISTINCT postid FROM votes v
+WHERE (SELECT COUNT(*) FROM votes v2
+	WHERE v2.postid = v.postid AND v2.votetypeid = 2)
+	 = (SELECT COUNT(*) FROM votes v2 WHERE v2.postid = v.postid);
+\end{verbatim}
+
+Original results
+\begin{verbatim}
+Did not terminate.
+On a different machine with SSD and no time limit it took 16 minutes.
+\end{verbatim}
+
+
 Optimized query:
 \begin{verbatim}
 SELECT DISTINCT postid FROM votes WHERE postid NOT IN (
@@ -61,47 +94,77 @@ Improvements:\\
 Instead of doing about twelve million Seq Scans this is now reduced to
 two Seq Scans by inverting the condition.
 
-Original version:
-\begin{verbatim}
-Did not terminate.
-On a different machine with SSD and no time limit it took 16 minutes.
-\end{verbatim}
-
 Optimized version:
 \begin{verbatim}
  Planning time: 1.093 ms
  Execution time: 81.776 ms
 \end{verbatim}
 
+
+
 % (d)
-\item
-Optimized query:
+\item\begin{verbatim}
+SELECT p.*, c.*, u.* FROM posts p, comments c, users u, badges b
+WHERE c.postid=p.id
+AND u.id=p.owneruserid
+AND u.upvotes+3 >= (SELECT AVG(upvotes)
+    FROM users WHERE u.creationdate > c.creationdate)
+AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id)
+AND u.id = b.userid
+AND (b.name SIMILAR TO 'Autobiographer' OR b.name SIMILAR TO 'Supporter');
+\end{verbatim}
+
+Original results
+\begin{verbatim}
+Hash Join (cost=4231.28..3049237.06 rows=1494 width=1506)
+    (actual time=4259.393..4405.810 rows=10 loops=1)
+Planning time: 4.560 ms
+Execution time: 4406.299 ms
+\end{verbatim}
+
+
+Attempt at optimization
 \begin{verbatim}
 SELECT p.*, c.*, u.* FROM posts p
-    JOIN comments c ON p.id = c.postid
-    JOIN users u ON p.owneruserid = u.id
-    JOIN badges b on u.id = b.userid
-    WHERE u.upvotes+3 >= (
-        SELECT AVG(upvotes) FROM users
-            WHERE u.creationdate > c.creationdate
-        ) AND EXISTS (
-            SELECT 1 FROM postlinks l
-                WHERE l.relatedpostid > p.id
-        ) AND b.name IN ('Autobiographer','Supporter');
+JOIN comments c ON (c.postid = p.id)
+JOIN users u ON (u.id = p.owneruserid)
+JOIN badges b ON (u.id = b.userid)
+WHERE
+u.upvotes+3 >=
+    (SELECT AVG(upvotes) FROM users WHERE u.creationdate > c.creationdate)
+AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id)
+AND b.name IN ('Autobiographer','Supporter');
 \end{verbatim}
 
-Improvements:\\
-Converted the joins via WHERE clause to proper JOINs
+Results
+\begin{verbatim}
+Hash Join (cost=4777.00..3352584.37 rows=1640 width=1506)
+    (actual time=4189.629..4303.507 rows=10 loops=1)
+Planning time: 3.739 ms
+Execution time: 4303.959 ms
+\end{verbatim}
 
-Original version:
+
+Another attempt at optimization
 \begin{verbatim}
- Planning time: 4.084 ms
- Execution time: 4006.044 ms
+
+SELECT p.*, c.*, u.* FROM posts p
+JOIN comments c ON (p.id = c.postid)
+JOIN users u ON (p.owneruserid = u.id)
+JOIN badges b ON (u.id = b.userid)
+WHERE
+u.upvotes+3 >=
+    (SELECT AVG(upvotes) FROM users WHERE u.creationdate > c.creationdate)
+AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id)
+AND b.name IN ('Autobiographer','Supporter');
 \end{verbatim}
 
-Optimized version:
+Results
 \begin{verbatim}
- Planning time: 2.954 ms
- Execution time: 3081.461 ms
+Hash Join (cost=4777.00..3421794.82 rows=28452 width=1506)
+    (actual time=3212.737..3330.359 rows=10 loops=1)
+Planning time: 3.295 ms
+Execution time: 3330.766 ms
 \end{verbatim}
+
 \end{enumerate}
-- 
2.43.0