From 683893e17ad958aea76bb29fc4ffad5fadcb629d Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Sun, 12 May 2019 20:59:01 +0200 Subject: [PATCH] clean broken outputs --- ex2/spark/Exercise5_SparkInScala.ipynb | 93 +++++--------------------- 1 file changed, 15 insertions(+), 78 deletions(-) diff --git a/ex2/spark/Exercise5_SparkInScala.ipynb b/ex2/spark/Exercise5_SparkInScala.ipynb index e60caf5..45b5ddd 100644 --- a/ex2/spark/Exercise5_SparkInScala.ipynb +++ b/ex2/spark/Exercise5_SparkInScala.ipynb @@ -31,17 +31,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Intitializing Scala interpreter ..." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "val fibs20 = sc.parallelize(List( 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181)) " ] @@ -57,17 +47,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Intitializing Scala interpreter ..." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "val evenFibs20 = fibs20.filter(x => (x % 2 == 0)).collect()" ] @@ -83,17 +63,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Intitializing Scala interpreter ..." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "val avg1map = fibs20.map(x => (x, 1))\n", "val avg1fold = avg1map.fold (0,0) ((x,y) => (x._1 + y._1, x._2 + y._2))\n", @@ -161,7 +131,7 @@ "metadata": {}, "outputs": [], "source": [ - "val wordlist = words.map(word => permutate(word)).collect().flatMap(x => x)" + "val wordlist = words.map(word => permutate(word)).flatMap(x => x).collect()" ] }, { @@ -235,17 +205,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Intitializing Scala interpreter ..." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "val query2 = articlesDF.groupBy(\"sectionName\").count()\n", "query2.show(false)\n", @@ -274,17 +234,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Intitializing Scala interpreter ..." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "val query3 = spark.sql(\n", " \"SELECT a.headline, COUNT(c.commentID) AS numComments FROM articles a, comments c WHERE a.articleID = c.articleID GROUP BY a.headline\" )\n", @@ -312,17 +262,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Intitializing Scala interpreter ..." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "val query4 = spark.sql(\" SELECT headline, byline, pubDate FROM articles WHERE headline RLIKE \\\"2016\\\" \")\n", "query4.show(false)\n", @@ -349,17 +289,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Intitializing Scala interpreter ..." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "val query5 = articlesDF\n", " .join(commentsDF, articlesDF(\"articleID\") === commentsDF(\"articleID\"))\n", @@ -405,6 +335,13 @@ "\n", "#### Use the Spark Internal Web UI to analyse the dependencies and stages of the queries, and try to determine which commands on which Dataframes are executed as wide dependencies and which as narrow dependencies. \n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { -- 2.43.0