From a8b6aaa4fee5260bba36f63f24cb6b4e8a9210e4 Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Sun, 12 May 2019 22:16:47 +0200 Subject: [PATCH] add first hive.sql --- ex2/hive/hive.sql | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 ex2/hive/hive.sql diff --git a/ex2/hive/hive.sql b/ex2/hive/hive.sql new file mode 100644 index 0000000..9967b13 --- /dev/null +++ b/ex2/hive/hive.sql @@ -0,0 +1,34 @@ +DROP DATABASE IF EXISTS e700719f CASCADE; +CREATE DATABASE e700719f; +USE e700719f; + +-- configure bucketing +SET hive.enforce.bucketing = TRUE; +SET hive.exec.dynamic.partition = TRUE; +SET hive.exec.dynamic.partition.mode = nonstrict; +--SET hive.exec.max.dynamic.partitions = X; +--SET hive.exec.max.dynamic.partitions.pernode = Y; + +-- badges +CREATE TABLE IF NOT EXISTS badges (id INT, class INT, `date` DATE, name STRING, tagbased BOOLEAN, userid INT); +LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/badges.csv' OVERWRITE INTO TABLE badges; + +-- comments +CREATE TABLE IF NOT EXISTS comments (id INT, creationdate DATE, postid INT, score INT, text STRING, userdisplayname STRING, userid INT); +LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/comments.csv' OVERWRITE INTO TABLE comments; + +-- postlinks +CREATE TABLE IF NOT EXISTS postlinks (id INT, creationdate DATE, linktypeid INT, postid INT, relatedpostid INT); +LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/postlinks.csv' OVERWRITE INTO TABLE postlinks; + +-- posts +CREATE TABLE IF NOT EXISTS posts (id INT, acceptedanswerid INT, answercount INT, body STRING, closeddate DATE, commentcount INT, communityowneddate DATE, creationdate DATE, favoritecount INT, lastactivitydate DATE, lasteditdate DATE, lasteditordisplayname STRING, lasteditoruserid INT, ownerdisplayname STRING, owneruserid INT, parentid INT, posttypeid INT, score INT, tags STRING, title STRING, viewcount INT); +LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/posts.csv' OVERWRITE INTO TABLE posts; + +-- users +CREATE TABLE IF NOT EXISTS users (id INT, aboutme STRING, accountid INT, creationdate DATE, displayname STRING, downvotes INT, lastaccessdate DATE, location STRING, profileimageurl STRING, reputation INT, upvotes INT, views INT, websiteurl STRING); +LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/users.csv' OVERWRITE INTO TABLE users; + +-- votes +CREATE TABLE IF NOT EXISTS votes (id INT, bountyamount INT, creationdate DATE, postid INT, userid INT, votetypeid INT); +LOAD DATA LOCAL INPATH '/home/adbs/2019S/shared/hive/votes.csv' OVERWRITE INTO TABLE votes; -- 2.43.0