From 0f6f0a28d603d02063781235c62aa2b2ced20bc8 Mon Sep 17 00:00:00 2001 From: David Kaufmann Date: Mon, 8 Apr 2019 01:12:33 +0200 Subject: [PATCH] more cleanup --- ex1/main_3.tex | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/ex1/main_3.tex b/ex1/main_3.tex index 7aa5f22..c5b863b 100644 --- a/ex1/main_3.tex +++ b/ex1/main_3.tex @@ -2,29 +2,32 @@ \begin{enumerate}[label=(\alph*)] % (a) - \item +\item $ numGroups = 8 $ \\ $ numRecords = 4000 $ \\ $ bucketHeight = numRecords / numGroups = 500 $ \\ \begin{enumerate}[label=\roman*)] - \item +\item $ numBuckets = 3 + \frac{4}{7} = \frac{25}{7} $ \\ $ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{25}{7} * 500}{4000} \approx 0.446 \approx 44.6\% $ \\ - \item + +\item $ numBuckets = \frac{175-140}{175-133} = \frac{35}{42} $ \\ $ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{35}{42} * 500}{4000} \approx 0.104 \approx 10.4\% $ \\ \end{enumerate} % (b) - \item +\item All values are most likely unique (stored value count is 180, while maximum value is 175). + \begin{enumerate}[label=\roman*)] - \item +\item Because of the uniqueness assumption, the result set has size 1.\\ + $ selectivity = \frac{1}{180} \approx 0.006 \approx 0.6\% $\\ - \item +\item $ selectivity = (1 - \frac{1}{180}) \approx 0.994 \approx 99.4\% $\\ \end{enumerate} @@ -33,7 +36,8 @@ work with outdated data or we update the buckets regularly.\\ % (c) \item -Known Distribution Table: \\ +Known Distribution Table: + \begin{tabular}{ c c c } Value & Frequency & Occurrences \\ 5 & 11\% & 440 \\ @@ -47,7 +51,7 @@ Known Distribution Table: \\ Known Excluded by $votes$ $!=$ $9$: $320$ \\ Known Included by $votes$ $<$ $30$: $440 + 320 + 120 = 880$ \\ Assumed uniform distributed values: $ numRecords - knownExcluded - knownIncluded = 4000 - 880 - 320 = 2800$\\ -\\ + Number of values below 30: $30$\\ Number of known values below 30: $4$\\ Average rows per unique votes value:\\ @@ -94,7 +98,8 @@ $selectivity = math.min(roomsBelow300, roomsNotInFreihaus) / numRooms$\\ $selectivity = math.min(1530, 1545.45) / 1700 = 1530 / 1700 = 0.9 = 90\%$ \end{enumerate} -Join Order:\\ +Join Order: + Everything should be joined to $reservation$, as there are no common columns for $room$ and $course$. @@ -105,6 +110,20 @@ Therefor we'd join first $reservation$ and $course$, and then join to $room$. % (d) \item +Selected Join Strategies: + +Hash Join for join $reservation$ to $course$, this should +reduce the table space quite far. + +Both selections have to be done, because they are connected via OR. + +Hash Join for joining the result to $room$, as we don't know +if any of the results are sorted. + +First the restriction to room capacity is applied (excludes a +little bit more than the other restriction), then the restriction +to buildings other than ``Freihaus''. + \begin{verbatim} Hash Join Hash Cond: (reservation.room = room.name) @@ -119,6 +138,4 @@ Hash Join Filter: ((capacity)::integer < 300::integer AND (building)::text != 'Freihaus'::text) \end{verbatim} - -\item TODO \end{enumerate} -- 2.43.0