3 \begin{enumerate}[label=(\alph*)]
6 SELECT DISTINCT displayname FROM users u
7 WHERE id IN (SELECT owneruserid FROM posts p WHERE p.viewcount > u.views);
12 Planning time: 0.978 ms
13 Execution time: 86179.847 ms
19 SELECT DISTINCT displayname FROM posts p
20 JOIN users u ON p.owneruserid = u.id
21 WHERE p.viewcount > u.views;
25 Converted the selection to a JOIN.\\
26 This improves the $n_{users} + n_{users} * n_{posts}$ Seq Scans to one JOIN and two Seq Scans.
29 Planning time: 0.960 ms
30 Execution time: 37.354 ms
37 SELECT score FROM comments WHERE lower(substring(text for 3)) = 'yes';
42 Planning time: 0.540 ms
43 Execution time: 60.946 ms
47 Attempt at optimization
49 SELECT score FROM comments WHERE text ILIKE 'yes%';
53 If we are allowed to create indexes
55 CREATE INDEX ON comments (lower(text) varchar_pattern_ops)
56 REINDEX DATABASE u00726236; VACUUM ANALYZE;
57 EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
58 SELECT score FROM comments WHERE lower(text) like 'yes%';
63 Bitmap Heap Scan on public.comments (cost=17.71..321.40 rows=114 width=4)
64 (actual time=0.462..5.917 rows=363 loops=1)
65 Planning time: 0.218 ms
66 Execution time: 6.069 ms
73 SELECT DISTINCT postid FROM votes v
74 WHERE (SELECT COUNT(*) FROM votes v2
75 WHERE v2.postid = v.postid AND v2.votetypeid = 2)
76 = (SELECT COUNT(*) FROM votes v2 WHERE v2.postid = v.postid);
82 On a different machine with SSD and no time limit it took 16 minutes.
88 SELECT DISTINCT postid FROM votes WHERE postid NOT IN (
89 SELECT postid FROM votes WHERE votetypeid != 2
94 Instead of doing about twelve million Seq Scans this is now reduced to
95 two Seq Scans by inverting the condition.
99 Planning time: 1.093 ms
100 Execution time: 81.776 ms
106 \item\begin{verbatim}
107 SELECT p.*, c.*, u.* FROM posts p, comments c, users u, badges b
109 AND u.id=p.owneruserid
110 AND u.upvotes+3 >= (SELECT AVG(upvotes)
111 FROM users WHERE u.creationdate > c.creationdate)
112 AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id)
114 AND (b.name SIMILAR TO 'Autobiographer' OR b.name SIMILAR TO 'Supporter');
119 Hash Join (cost=4231.28..3049237.06 rows=1494 width=1506)
120 (actual time=4259.393..4405.810 rows=10 loops=1)
121 Planning time: 4.560 ms
122 Execution time: 4406.299 ms
127 Converted the joins via WHERE clause to proper JOINs
129 Attempt at optimization
131 SELECT p.*, c.*, u.* FROM posts p
132 JOIN comments c ON (c.postid = p.id)
133 JOIN users u ON (u.id = p.owneruserid)
134 JOIN badges b ON (u.id = b.userid)
137 (SELECT AVG(upvotes) FROM users WHERE u.creationdate > c.creationdate)
138 AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id)
139 AND b.name IN ('Autobiographer','Supporter');
144 Hash Join (cost=4777.00..3352584.37 rows=1640 width=1506)
145 (actual time=4189.629..4303.507 rows=10 loops=1)
146 Planning time: 3.739 ms
147 Execution time: 4303.959 ms
151 Another attempt at optimization
154 SELECT p.*, c.*, u.* FROM posts p
155 JOIN comments c ON (p.id = c.postid)
156 JOIN users u ON (p.owneruserid = u.id)
157 JOIN badges b ON (u.id = b.userid)
160 (SELECT AVG(upvotes) FROM users WHERE u.creationdate > c.creationdate)
161 AND EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid > p.id)
162 AND b.name IN ('Autobiographer','Supporter');
167 Hash Join (cost=4777.00..3421794.82 rows=28452 width=1506)
168 (actual time=3212.737..3330.359 rows=10 loops=1)
169 Planning time: 3.295 ms
170 Execution time: 3330.766 ms
173 And move that semijoin for each post to a simple value comparison,
174 because ``some $x$ exists where $f(x)$ is larger than $y$'' equals ``$y < max(f(x))$'',
176 SELECT p.*, c.*, u.* FROM posts p
177 JOIN comments c ON p.id = c.postid
178 JOIN users u ON p.owneruserid = u.id
179 JOIN badges b on u.id = b.userid
181 b.name IN ('Autobiographer','Supporter')
182 AND p.id < (SELECT MAX(relatedpostid) FROM postlinks)
184 (SELECT AVG(upvotes) FROM users WHERE u.creationdate > c.creationdate);
189 Hash Join (cost=4274.13..3221469.71 rows=28448 width=1506)
190 (actual time=261.341..369.495 rows=10 loops=1)
191 Planning time: 4.422 ms
192 Execution time: 370.932 ms