clean broken outputs
authorDavid Kaufmann <astra@ionic.at>
Sun, 12 May 2019 18:59:01 +0000 (20:59 +0200)
committerDavid Kaufmann <astra@ionic.at>
Sun, 12 May 2019 18:59:01 +0000 (20:59 +0200)
ex2/spark/Exercise5_SparkInScala.ipynb

index e60caf5d4d65b14314aded034418142df7644eae..45b5dddd9352405c1b01a364f962ecb12ccb223e 100644 (file)
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Intitializing Scala interpreter ..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val fibs20 = sc.parallelize(List( 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181))  "
    ]
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Intitializing Scala interpreter ..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val evenFibs20 = fibs20.filter(x => (x % 2 == 0)).collect()"
    ]
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Intitializing Scala interpreter ..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val avg1map = fibs20.map(x => (x, 1))\n",
     "val avg1fold = avg1map.fold (0,0) ((x,y) => (x._1 + y._1, x._2 + y._2))\n",
    "metadata": {},
    "outputs": [],
    "source": [
-    "val wordlist = words.map(word => permutate(word)).collect().flatMap(x => x)"
+    "val wordlist = words.map(word => permutate(word)).flatMap(x => x).collect()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Intitializing Scala interpreter ..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val query2 = articlesDF.groupBy(\"sectionName\").count()\n",
     "query2.show(false)\n",
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Intitializing Scala interpreter ..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val query3  = spark.sql(\n",
     "    \"SELECT a.headline, COUNT(c.commentID) AS numComments FROM articles a, comments c WHERE a.articleID = c.articleID GROUP BY a.headline\" )\n",
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Intitializing Scala interpreter ..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val query4 = spark.sql(\" SELECT headline, byline, pubDate FROM articles WHERE headline RLIKE \\\"2016\\\" \")\n",
     "query4.show(false)\n",
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Intitializing Scala interpreter ..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "val query5 = articlesDF\n",
     "      .join(commentsDF, articlesDF(\"articleID\") === commentsDF(\"articleID\"))\n",
     "\n",
     "#### Use the Spark Internal Web UI to analyse the dependencies and stages of the queries, and try to determine which commands on which Dataframes are executed as wide dependencies and which as narrow dependencies. \n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {