From b19fb1a629ed643edaa1113fda8ea4e5014abbdf Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Sat, 6 Apr 2019 01:42:40 +0200 Subject: [PATCH] add most of ex1.3 --- ex1/main_3.tex | 109 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/ex1/main_3.tex b/ex1/main_3.tex index f202998..04a071a 100644 --- a/ex1/main_3.tex +++ b/ex1/main_3.tex @@ -1,6 +1,111 @@ %ex1.3 \begin{enumerate}[label=(\alph*)] - \item empty item oida! - +% (a) + \item +$ numGroups = 8 $ \\ +$ numRecords = 4000 $ \\ +$ bucketHeight = numRecords / numGroups = 500 $ \\ + +\begin{enumerate}[label=\roman*)] + \item +$ numBuckets = 3 + \frac{4}{7} = \frac{25}{7} $ \\ +$ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{25}{7} * 500}{4000} \approx 0.446 \approx 44.6\% $ \\ + \item +$ numBuckets = \frac{175-140}{175-133} = \frac{35}{42} $ \\ +$ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{35}{42} * 500}{4000} \approx 0.104 \approx 10.4\% $ \\ +\end{enumerate} + +% (b) + \item +All values are most likely unique (stored value count is 180, while maximum +value is 175). +\begin{enumerate}[label=\roman*)] + \item +Because of the uniqueness assumption, the result set has size 1.\\ +$ selectivity = \frac{1}{180} \approx 0.006 \approx 0.6\% $\\ + \item +$ selectivity = (1 - \frac{1}{180}) \approx 0.994 \approx 99.4\% $\\ +\end{enumerate} + +Tradeoffs: we have to update the buckets distribution regularly, so we either +work with outdated data or we update the buckets regularly.\\ + +% (c) +\item +Known Distribution Table: \\ +\begin{tabular}{ c c c } + Value & Frequency & Occurrences \\ + 5 & 11\% & 440 \\ + 9 & 8\% & 320 \\ + 15 & 8\% & 320 \\ + 26 & 3\% & 120 +\end{tabular} + +\begin{enumerate}[label=\roman*)] +\item +Known Excluded by $ votes != 9 $: $320$ \\ +Known Included by $ votes < 30 $: $440 + 320 + 120 = 880$ \\ +Assumed uniform distributed values: $ numRecords - knownExcluded - knownIncluded = 4000 - 880 - 320 = 2800$\\ +\\ +Number of values below 30: $30$\\ +Number of known values below 30: $4$\\ +Average votes per record: $\frac{2800}{175-4} \approx 16.37$\\ +Assumed uniform values below 30:\\ +$avgVotesPerRecord * (30-4) = 16.37 * 26 \approx 425.73$\\ + +Total values below 30: $knownIncluded + assumedUniformValues = 880 + 425.73 = 1305.73$\\ +$selectivity = totalValues / numRecords = 1305.73 / 4000 \approx 0.326 \approx 32.6\%$\\ + +\item +Known rows from first filter (votes = 15): 320\\ +Assumed rows $> 50$: $avgVotesPerRecord * (175-50) = 16.37 * 125 \approx 2046.78$\\ +Assume no overlaps, so just sum up both selectivities:\\ +$assumedRows = knownIsFifteen + assumedOverFifty = 320 + 2046.78 = 2366.78$\\ +$selectivity = assumedRows / numRecords = 2366.78 / 4000 = 0.5916 \approx 59.2\%$\\ +\end{enumerate} + +\item +\begin{enumerate}[label=\roman*)] +\item Selector over course:\\ +Filter: $coursename$ $=$ $\textquotedbl$ADBS$\textquotedbl$ $OR$ $ects > 6$\\ +\\ +Again assume no overlap (as ADBS has 6 ects), so OR just gets summed up.\\ +\\ +Assume uniform distribution over $course.coursename$:\\ +$numADBSCourses = (1 / distinctCourses) * numRows = (1/490) * 540 = 1.102$\\ +$bucketSize = (numRows / numBuckets) = 540 / 5 = 108$\\ +$numberOfBucketsOverSix = 1 + \frac{2}{3} = \frac{5}{3}$\\ +$numCoursesOverSixECTS = numberOfBucketsOverSix * bucketSize = \frac{5}{3} * 108 = 180$\\ +$selectivity = (1.102 + 180) / 540 = 181.102 / 540 = 0.335 \approx 33.5\%$\\ +\item Selector over room:\\ +Filter: $capacity < 300$ $AND$ $building$ $!=$ \textquotedbl$Freihaus$\textquotedbl\\ +\\ +$bucketSize = (numRows / numBuckets) = 1700 / 5 = 340$\\ +$bucketsBelow300 = 4.5$\\ +$roomsBelow300 = bucketsBelow300 * bucketSize = 4.5 * 340 = 1530$\\ +$remainingBuildings = 10$\\ +$roomsNotInFreihaus = remainingBuildings / numBuildings * numRooms = 1545.45$\\ +$selectivity = math.min(roomsBelow300, roomsNotInFreihaus) / numRooms$\\ +$selectivity = math.min(1530, 1545.45) / 1700 = 1530 / 1700 = 0.9 = 90\%$\\ +\end{enumerate} + +% (d) +\item +\begin{verbatim} +Hash Join + Hash Cond: (reservation.room = room.name) + -> Hash Join + Hash Cond: (reservation.course = course.courseid) + -> Seq Scan on reservation + -> Hash + -> Seq Scan on course + Filter: ((coursename)::text = 'ADBS'::text OR (ects)::integer > 6) + -> Hash + -> Seq Scan on room + Filter: ((capacity)::integer < 300::integer AND + (building)::text != 'Freihaus'::text) +\end{verbatim} + +\item TODO \end{enumerate} -- 2.43.0