From 03cac5edc9bc7a3332362de7a3f7dbee475baeb7 Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Sun, 12 May 2019 23:04:44 +0200 Subject: [PATCH] start partitioning --- ex2/hive/hive.sql | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/ex2/hive/hive.sql b/ex2/hive/hive.sql index 9967b13..d747952 100644 --- a/ex2/hive/hive.sql +++ b/ex2/hive/hive.sql @@ -6,8 +6,10 @@ USE e700719f; SET hive.enforce.bucketing = TRUE; SET hive.exec.dynamic.partition = TRUE; SET hive.exec.dynamic.partition.mode = nonstrict; ---SET hive.exec.max.dynamic.partitions = X; ---SET hive.exec.max.dynamic.partitions.pernode = Y; +SET hive.exec.max.dynamic.partitions = 1980; +SET hive.exec.max.dynamic.partitions.pernode = 110; +-- 18 nodes total +-- users.reputation has 1519 different values -- badges CREATE TABLE IF NOT EXISTS badges (id INT, class INT, `date` DATE, name STRING, tagbased BOOLEAN, userid INT); @@ -32,3 +34,24 @@ LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/users.csv' OVERWRITE INTO T -- votes CREATE TABLE IF NOT EXISTS votes (id INT, bountyamount INT, creationdate DATE, postid INT, userid INT, votetypeid INT); LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/votes.csv' OVERWRITE INTO TABLE votes; + +-- users_partitioned +CREATE TABLE IF NOT EXISTS users_partitioned (id INT, aboutme STRING, accountid INT, creationdate DATE, displayname STRING, downvotes INT, lastaccessdate DATE, location STRING, profileimageurl STRING, upvotes INT, views INT, websiteurl STRING) PARTITIONED BY (reputation INT); +INSERT OVERWRITE TABLE users_partitioned PARTITION (reputation) SELECT * FROM users; + +SELECT "\n\n\n"; + +SELECT "\n##### QUERY 1 UNOPTIMIZED #####\n"; +-- query unoptimized +EXPLAIN SELECT p.id FROM posts p, comments c, users u, votes v +WHERE c.postid=p.id AND c.userid=p.owneruserid AND u.id=p.owneruserid +AND u.reputation > 100 AND v.postid = p.id AND v.userid = p.owneruserid +AND NOT EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid = p.id); + +SELECT "\n##### QUERY 1 OPTIMIZED #####\n"; +-- query optimized +EXPLAIN SELECT p.id FROM posts p, comments c, users_partitioned u, votes v +WHERE c.postid=p.id AND c.userid=p.owneruserid AND u.id=p.owneruserid +AND u.reputation > 100 AND v.postid = p.id AND v.userid = p.owneruserid +AND NOT EXISTS (SELECT 1 FROM postlinks l WHERE l.relatedpostid = p.id); + -- 2.43.0