4 SET hive.enforce.bucketing = TRUE;
5 SET hive.exec.dynamic.partition = TRUE;
6 SET hive.exec.dynamic.partition.mode = nonstrict;
7 --SET hive.exec.max.dynamic.partitions = 1980;
8 --SET hive.exec.max.dynamic.partitions.pernode = 110;
9 SET hive.exec.max.dynamic.partitions = 2000;
10 SET hive.exec.max.dynamic.partitions.pernode = 400;
12 -- users.reputation has 1519 different values
15 --SELECT p.id FROM posts p, comments c, users u, votes v
16 --WHERE c.postid=p.id AND c.userid=p.owneruserid AND u.id=p.owneruserid
17 --AND u.reputation > 100 AND v.postid = p.id AND v.userid = p.owneruserid
18 --AND NOT EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid = p.id)
20 -- find join column diff values
21 SELECT "COUNT(DISTINCT x)";
22 SELECT COUNT(DISTINCT postid) FROM comments;
23 SELECT COUNT(DISTINCT userid) FROM comments;
24 SELECT COUNT(DISTINCT owneruserid) FROM posts;
25 SELECT COUNT(DISTINCT relatedpostid) FROM postlinks;
28 --CREATE TABLE IF NOT EXISTS users_partitioned (id INT, aboutme STRING, accountid INT, creationdate TIMESTAMP, displayname STRING, downvotes INT, lastaccessdate TIMESTAMP, location STRING, profileimageurl STRING, upvotes INT, views INT, websiteurl STRING) PARTITIONED BY (reputation INT);
29 --INSERT OVERWRITE TABLE users_partitioned PARTITION (reputation) SELECT * FROM users;
33 SELECT "\n##### QUERY 1 UNOPTIMIZED #####\n";
35 --EXPLAIN SELECT p.id FROM posts p, comments c, users u, votes v
36 --WHERE c.postid=p.id AND c.userid=p.owneruserid AND u.id=p.owneruserid
37 --AND u.reputation > 100 AND v.postid = p.id AND v.userid = p.owneruserid
38 --AND NOT EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid = p.id);
40 SELECT "\n##### QUERY 1 OPTIMIZED #####\n";
42 --EXPLAIN SELECT p.id FROM posts p, comments c, users_partitioned u, votes v
43 --WHERE c.postid=p.id AND c.userid=p.owneruserid AND u.id=p.owneruserid
44 --AND u.reputation > 100 AND v.postid = p.id AND v.userid = p.owneruserid
45 --AND NOT EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid = p.id);