From 0df3e268b6355c0d0a06a3ab94891afc01f4feb9 Mon Sep 17 00:00:00 2001 From: Jan Vales Date: Mon, 8 Apr 2019 02:28:56 +0200 Subject: [PATCH] added original queries. --- ex1/main_5.tex | 161 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 112 insertions(+), 49 deletions(-) diff --git a/ex1/main_5.tex b/ex1/main_5.tex index ab0e748..c7273be 100644 --- a/ex1/main_5.tex +++ b/ex1/main_5.tex @@ -2,54 +2,87 @@ \begin{enumerate}[label=(\alph*)] % (a) -\item +\item\begin{verbatim} +SELECT DISTINCT displayname FROM users u +WHERE id IN (SELECT owneruserid FROM posts p WHERE p.viewcount > u.views); +\end{verbatim} + +Original results +\begin{verbatim} +Planning time: 0.978 ms +Execution time: 86179.847 ms +\end{verbatim} + + Optimized query: \begin{verbatim} -SELECT distinct(displayname) FROM posts p +SELECT DISTINCT displayname FROM posts p JOIN users u ON p.owneruserid = u.id WHERE p.viewcount > u.views; \end{verbatim} Improvements:\\ -Just converted the selection to a JOIN. -This improves the $n_{users} + n_{users} * n_{posts}$ Seq Scans -to one JOIN and two Seq Scans. +Converted the selection to a JOIN.\\ +This improves the $n_{users} + n_{users} * n_{posts}$ Seq Scans to one JOIN and two Seq Scans. -Original version: -\begin{verbatim} - Planning time: 0.978 ms - Execution time: 86179.847 ms -\end{verbatim} - -Optimized version: \begin{verbatim} Planning time: 0.960 ms Execution time: 37.354 ms \end{verbatim} + + % (b) -\item -Optimized query: +\item\begin{verbatim} +SELECT score FROM comments WHERE lower(substring(text for 3)) = 'yes'; +\end{verbatim} + +Original results +\begin{verbatim} + Planning time: 0.540 ms + Execution time: 60.946 ms +\end{verbatim} + + +Attempt at optimization \begin{verbatim} SELECT score FROM comments WHERE text ILIKE 'yes%'; \end{verbatim} -Improvements:\\ -None so far. -Original version: +If we are allowed to create indexes \begin{verbatim} - Planning time: 0.540 ms - Execution time: 60.946 ms +CREATE INDEX ON comments (lower(text) varchar_pattern_ops) +REINDEX DATABASE u00726236; VACUUM ANALYZE; +EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) +SELECT score FROM comments WHERE lower(text) like 'yes%'; \end{verbatim} -Optimized version: +Results \begin{verbatim} -TODO, current query is slower.. +Bitmap Heap Scan on public.comments (cost=17.71..321.40 rows=114 width=4) + (actual time=0.462..5.917 rows=363 loops=1) +Planning time: 0.218 ms +Execution time: 6.069 ms \end{verbatim} -% (b) -\item + + +% (c) +\item\begin{verbatim} +SELECT DISTINCT postid FROM votes v +WHERE (SELECT COUNT(*) FROM votes v2 + WHERE v2.postid = v.postid AND v2.votetypeid = 2) + = (SELECT COUNT(*) FROM votes v2 WHERE v2.postid = v.postid); +\end{verbatim} + +Original results +\begin{verbatim} +Did not terminate. +On a different machine with SSD and no time limit it took 16 minutes. +\end{verbatim} + + Optimized query: \begin{verbatim} SELECT DISTINCT postid FROM votes WHERE postid NOT IN ( @@ -61,47 +94,77 @@ Improvements:\\ Instead of doing about twelve million Seq Scans this is now reduced to two Seq Scans by inverting the condition. -Original version: -\begin{verbatim} -Did not terminate. -On a different machine with SSD and no time limit it took 16 minutes. -\end{verbatim} - Optimized version: \begin{verbatim} Planning time: 1.093 ms Execution time: 81.776 ms \end{verbatim} + + % (d) -\item -Optimized query: +\item\begin{verbatim} +SELECT p.*, c.*, u.* FROM posts p, comments c, users u, badges b +WHERE c.postid=p.id +AND u.id=p.owneruserid +AND u.upvotes+3 >= (SELECT AVG(upvotes) + FROM users WHERE u.creationdate > c.creationdate) +AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id) +AND u.id = b.userid +AND (b.name SIMILAR TO 'Autobiographer' OR b.name SIMILAR TO 'Supporter'); +\end{verbatim} + +Original results +\begin{verbatim} +Hash Join (cost=4231.28..3049237.06 rows=1494 width=1506) + (actual time=4259.393..4405.810 rows=10 loops=1) +Planning time: 4.560 ms +Execution time: 4406.299 ms +\end{verbatim} + + +Attempt at optimization \begin{verbatim} SELECT p.*, c.*, u.* FROM posts p - JOIN comments c ON p.id = c.postid - JOIN users u ON p.owneruserid = u.id - JOIN badges b on u.id = b.userid - WHERE u.upvotes+3 >= ( - SELECT AVG(upvotes) FROM users - WHERE u.creationdate > c.creationdate - ) AND EXISTS ( - SELECT 1 FROM postlinks l - WHERE l.relatedpostid > p.id - ) AND b.name IN ('Autobiographer','Supporter'); +JOIN comments c ON (c.postid = p.id) +JOIN users u ON (u.id = p.owneruserid) +JOIN badges b ON (u.id = b.userid) +WHERE +u.upvotes+3 >= + (SELECT AVG(upvotes) FROM users WHERE u.creationdate > c.creationdate) +AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id) +AND b.name IN ('Autobiographer','Supporter'); \end{verbatim} -Improvements:\\ -Converted the joins via WHERE clause to proper JOINs +Results +\begin{verbatim} +Hash Join (cost=4777.00..3352584.37 rows=1640 width=1506) + (actual time=4189.629..4303.507 rows=10 loops=1) +Planning time: 3.739 ms +Execution time: 4303.959 ms +\end{verbatim} -Original version: + +Another attempt at optimization \begin{verbatim} - Planning time: 4.084 ms - Execution time: 4006.044 ms + +SELECT p.*, c.*, u.* FROM posts p +JOIN comments c ON (p.id = c.postid) +JOIN users u ON (p.owneruserid = u.id) +JOIN badges b ON (u.id = b.userid) +WHERE +u.upvotes+3 >= + (SELECT AVG(upvotes) FROM users WHERE u.creationdate > c.creationdate) +AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id) +AND b.name IN ('Autobiographer','Supporter'); \end{verbatim} -Optimized version: +Results \begin{verbatim} - Planning time: 2.954 ms - Execution time: 3081.461 ms +Hash Join (cost=4777.00..3421794.82 rows=28452 width=1506) + (actual time=3212.737..3330.359 rows=10 loops=1) +Planning time: 3.295 ms +Execution time: 3330.766 ms \end{verbatim} + \end{enumerate} -- 2.43.0