From b19fb1a629ed643edaa1113fda8ea4e5014abbdf Mon Sep 17 00:00:00 2001
From: David Kaufmann <astra@ionic.at>
Date: Sat, 6 Apr 2019 01:42:40 +0200
Subject: [PATCH] add most of ex1.3

---
 ex1/main_3.tex | 109 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 2 deletions(-)

diff --git a/ex1/main_3.tex b/ex1/main_3.tex
index f202998..04a071a 100644
--- a/ex1/main_3.tex
+++ b/ex1/main_3.tex
@@ -1,6 +1,111 @@
 %ex1.3
 
 \begin{enumerate}[label=(\alph*)]
-	\item empty item oida!
-	
+% (a)
+	\item
+$ numGroups = 8 $ \\
+$ numRecords = 4000 $ \\
+$ bucketHeight = numRecords / numGroups = 500 $ \\
+
+\begin{enumerate}[label=\roman*)]
+	\item
+$ numBuckets = 3 + \frac{4}{7} = \frac{25}{7} $ \\
+$ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{25}{7} * 500}{4000} \approx 0.446 \approx 44.6\% $ \\
+	\item
+$ numBuckets = \frac{175-140}{175-133} = \frac{35}{42} $ \\
+$ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{35}{42} * 500}{4000} \approx 0.104 \approx 10.4\% $ \\
+\end{enumerate}
+
+% (b)
+	\item
+All values are most likely unique (stored value count is 180, while maximum
+value is 175).
+\begin{enumerate}[label=\roman*)]
+	\item
+Because of the uniqueness assumption, the result set has size 1.\\
+$ selectivity = \frac{1}{180} \approx 0.006 \approx 0.6\% $\\
+	\item
+$ selectivity = (1 - \frac{1}{180}) \approx 0.994 \approx 99.4\% $\\
+\end{enumerate}
+
+Tradeoffs: we have to update the buckets distribution regularly, so we either
+work with outdated data or we update the buckets regularly.\\
+
+% (c)
+\item
+Known Distribution Table: \\
+\begin{tabular}{ c c c }
+ Value & Frequency & Occurrences \\
+ 5 & 11\% & 440 \\
+ 9 & 8\% & 320 \\
+ 15 & 8\% & 320 \\
+ 26 & 3\% & 120
+\end{tabular}
+
+\begin{enumerate}[label=\roman*)]
+\item
+Known Excluded by $ votes != 9 $: $320$ \\
+Known Included by $ votes < 30 $: $440 + 320 + 120 = 880$ \\
+Assumed uniform distributed values: $ numRecords - knownExcluded - knownIncluded = 4000 - 880 - 320 = 2800$\\
+\\
+Number of values below 30: $30$\\
+Number of known values below 30: $4$\\
+Average votes per record: $\frac{2800}{175-4} \approx 16.37$\\
+Assumed uniform values below 30:\\
+$avgVotesPerRecord * (30-4) = 16.37 * 26 \approx 425.73$\\
+
+Total values below 30: $knownIncluded + assumedUniformValues = 880 + 425.73 = 1305.73$\\
+$selectivity = totalValues / numRecords = 1305.73 / 4000 \approx 0.326 \approx 32.6\%$\\
+
+\item
+Known rows from first filter (votes = 15): 320\\
+Assumed rows $> 50$: $avgVotesPerRecord * (175-50) = 16.37 * 125 \approx 2046.78$\\
+Assume no overlaps, so just sum up both selectivities:\\
+$assumedRows = knownIsFifteen + assumedOverFifty = 320 + 2046.78 = 2366.78$\\
+$selectivity = assumedRows / numRecords = 2366.78 / 4000 = 0.5916 \approx 59.2\%$\\
+\end{enumerate}
+
+\item
+\begin{enumerate}[label=\roman*)]
+\item Selector over course:\\
+Filter: $coursename$ $=$ $\textquotedbl$ADBS$\textquotedbl$ $OR$ $ects > 6$\\
+\\
+Again assume no overlap (as ADBS has 6 ects), so OR just gets summed up.\\
+\\
+Assume uniform distribution over $course.coursename$:\\
+$numADBSCourses = (1 / distinctCourses) * numRows = (1/490) * 540 = 1.102$\\
+$bucketSize = (numRows / numBuckets) = 540 / 5 = 108$\\
+$numberOfBucketsOverSix = 1 + \frac{2}{3} = \frac{5}{3}$\\
+$numCoursesOverSixECTS = numberOfBucketsOverSix * bucketSize = \frac{5}{3} * 108 = 180$\\
+$selectivity = (1.102 + 180) / 540 = 181.102 / 540 = 0.335 \approx 33.5\%$\\
+\item Selector over room:\\
+Filter: $capacity < 300$ $AND$ $building$ $!=$ \textquotedbl$Freihaus$\textquotedbl\\
+\\
+$bucketSize = (numRows / numBuckets) = 1700 / 5 = 340$\\
+$bucketsBelow300 = 4.5$\\
+$roomsBelow300 = bucketsBelow300 * bucketSize = 4.5 * 340 = 1530$\\
+$remainingBuildings = 10$\\
+$roomsNotInFreihaus = remainingBuildings / numBuildings * numRooms = 1545.45$\\
+$selectivity = math.min(roomsBelow300, roomsNotInFreihaus) / numRooms$\\
+$selectivity = math.min(1530, 1545.45) / 1700 = 1530 / 1700 = 0.9 = 90\%$\\
+\end{enumerate}
+
+% (d)
+\item
+\begin{verbatim}
+Hash Join
+    Hash Cond: (reservation.room = room.name)
+    ->  Hash Join
+        Hash Cond: (reservation.course = course.courseid)
+        ->  Seq Scan on reservation
+        ->  Hash
+            -> Seq Scan on course
+               Filter: ((coursename)::text = 'ADBS'::text OR (ects)::integer > 6)
+    -> Hash
+       ->  Seq Scan on room
+           Filter: ((capacity)::integer < 300::integer AND
+                    (building)::text != 'Freihaus'::text)
+\end{verbatim}
+
+\item TODO
 \end{enumerate}
-- 
2.43.0