From 03cac5edc9bc7a3332362de7a3f7dbee475baeb7 Mon Sep 17 00:00:00 2001
From: David Kaufmann <astra@ionic.at>
Date: Sun, 12 May 2019 23:04:44 +0200
Subject: [PATCH] start partitioning

---
 ex2/hive/hive.sql | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/ex2/hive/hive.sql b/ex2/hive/hive.sql
index 9967b13..d747952 100644
--- a/ex2/hive/hive.sql
+++ b/ex2/hive/hive.sql
@@ -6,8 +6,10 @@ USE e700719f;
 SET hive.enforce.bucketing = TRUE;
 SET hive.exec.dynamic.partition = TRUE;
 SET hive.exec.dynamic.partition.mode = nonstrict;
---SET hive.exec.max.dynamic.partitions = X;
---SET hive.exec.max.dynamic.partitions.pernode = Y;
+SET hive.exec.max.dynamic.partitions = 1980;
+SET hive.exec.max.dynamic.partitions.pernode = 110;
+-- 18 nodes total
+-- users.reputation has 1519 different values
 
 -- badges
 CREATE TABLE IF NOT EXISTS badges (id INT, class INT, `date` DATE, name STRING, tagbased BOOLEAN, userid INT);
@@ -32,3 +34,24 @@ LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/users.csv' OVERWRITE INTO T
 -- votes
 CREATE TABLE IF NOT EXISTS votes (id INT, bountyamount INT, creationdate DATE, postid INT, userid INT, votetypeid INT);
 LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/votes.csv' OVERWRITE INTO TABLE votes;
+
+-- users_partitioned
+CREATE TABLE IF NOT EXISTS users_partitioned (id INT, aboutme STRING, accountid INT, creationdate DATE, displayname STRING, downvotes INT, lastaccessdate DATE, location STRING, profileimageurl STRING, upvotes INT, views INT, websiteurl STRING) PARTITIONED BY (reputation INT);
+INSERT OVERWRITE TABLE users_partitioned PARTITION (reputation) SELECT * FROM users;
+
+SELECT "\n\n\n";
+
+SELECT "\n##### QUERY 1 UNOPTIMIZED #####\n";
+-- query unoptimized
+EXPLAIN SELECT p.id FROM posts p, comments c, users u, votes v
+WHERE c.postid=p.id AND c.userid=p.owneruserid AND u.id=p.owneruserid
+AND u.reputation > 100 AND v.postid = p.id AND v.userid = p.owneruserid
+AND NOT EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid = p.id);
+
+SELECT "\n##### QUERY 1 OPTIMIZED #####\n";
+-- query optimized
+EXPLAIN SELECT p.id FROM posts p, comments c, users_partitioned u, votes v
+WHERE c.postid=p.id AND c.userid=p.owneruserid AND u.id=p.owneruserid
+AND u.reputation > 100 AND v.postid = p.id AND v.userid = p.owneruserid
+AND NOT EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid = p.id);
+
-- 
2.43.0