From a5d3277bc3f180897ffcef64b5fda324d6891815 Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Sun, 12 May 2019 20:44:10 +0200 Subject: [PATCH] update ex2.5 --- ex2/spark/Exercise5_SparkInScala.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ex2/spark/Exercise5_SparkInScala.ipynb b/ex2/spark/Exercise5_SparkInScala.ipynb index 11379a5..e60caf5 100644 --- a/ex2/spark/Exercise5_SparkInScala.ipynb +++ b/ex2/spark/Exercise5_SparkInScala.ipynb @@ -384,8 +384,7 @@ "metadata": {}, "outputs": [], "source": [ - "// does not work yet\n", - "val query5sql = spark.sql(\"SELECT COUNT(singleKeyWords), EXPLODE(keywords) AS singleKeyWords FROM articles JOIN comments ON articles.articleID = comments.articleID GROUP BY singleKeyWords ORDER BY number DESC\")" + "val query5sql = spark.sql(\"SELECT singleKeyWords, COUNT(*) AS number FROM (SELECT EXPLODE(keywords) AS singleKeyWords FROM articles JOIN comments ON articles.articleID = comments.articleID) GROUP BY singleKeyWords ORDER BY number DESC\")" ] }, { -- 2.43.0