ex1/main_3.tex

   1 %ex1.3
   2
   3 \begin{enumerate}[label=(\alph*)]
   4 % (a)
   5         \item
   6 $ numGroups = 8 $ \\
   7 $ numRecords = 4000 $ \\
   8 $ bucketHeight = numRecords / numGroups = 500 $ \\
   9
  10 \begin{enumerate}[label=\roman*)]
  11         \item
  12 $ numBuckets = 3 + \frac{4}{7} = \frac{25}{7} $ \\
  13 $ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{25}{7} * 500}{4000} \approx 0.446 \approx 44.6\% $ \\
  14         \item
  15 $ numBuckets = \frac{175-140}{175-133} = \frac{35}{42} $ \\
  16 $ selectivity = \frac{numBuckets * bucketHeight}{numRecords} = \frac{\frac{35}{42} * 500}{4000} \approx 0.104 \approx 10.4\% $ \\
  17 \end{enumerate}
  18
  19 % (b)
  20         \item
  21 All values are most likely unique (stored value count is 180, while maximum
  22 value is 175).
  23 \begin{enumerate}[label=\roman*)]
  24         \item
  25 Because of the uniqueness assumption, the result set has size 1.\\
  26 $ selectivity = \frac{1}{180} \approx 0.006 \approx 0.6\% $\\
  27         \item
  28 $ selectivity = (1 - \frac{1}{180}) \approx 0.994 \approx 99.4\% $\\
  29 \end{enumerate}
  30
  31 Tradeoffs: we have to update the buckets distribution regularly, so we either
  32 work with outdated data or we update the buckets regularly.\\
  33
  34 % (c)
  35 \item
  36 Known Distribution Table: \\
  37 \begin{tabular}{ c c c }
  38  Value & Frequency & Occurrences \\
  39  5 & 11\% & 440 \\
  40  9 & 8\% & 320 \\
  41  15 & 8\% & 320 \\
  42  26 & 3\% & 120
  43 \end{tabular}
  44
  45 \begin{enumerate}[label=\roman*)]
  46 \item
  47 Known Excluded by $ votes != 9 $: $320$ \\
  48 Known Included by $ votes < 30 $: $440 + 320 + 120 = 880$ \\
  49 Assumed uniform distributed values: $ numRecords - knownExcluded - knownIncluded = 4000 - 880 - 320 = 2800$\\
  50 \\
  51 Number of values below 30: $30$\\
  52 Number of known values below 30: $4$\\
  53 Average votes per record: $\frac{2800}{175-4} \approx 16.37$\\
  54 Assumed uniform values below 30:\\
  55 $avgVotesPerRecord * (30-4) = 16.37 * 26 \approx 425.73$\\
  56
  57 Total values below 30: $knownIncluded + assumedUniformValues = 880 + 425.73 = 1305.73$\\
  58 $selectivity = totalValues / numRecords = 1305.73 / 4000 \approx 0.326 \approx 32.6\%$\\
  59
  60 \item
  61 Known rows from first filter (votes = 15): 320\\
  62 Assumed rows $> 50$: $avgVotesPerRecord * (175-50) = 16.37 * 125 \approx 2046.78$\\
  63 Assume no overlaps, so just sum up both selectivities:\\
  64 $assumedRows = knownIsFifteen + assumedOverFifty = 320 + 2046.78 = 2366.78$\\
  65 $selectivity = assumedRows / numRecords = 2366.78 / 4000 = 0.5916 \approx 59.2\%$\\
  66 \end{enumerate}
  67
  68 \item
  69 \begin{enumerate}[label=\roman*)]
  70 \item Selector over course:\\
  71 Filter: $coursename$ $=$ $\textquotedbl$ADBS$\textquotedbl$ $OR$ $ects > 6$\\
  72 \\
  73 Again assume no overlap (as ADBS has 6 ects), so OR just gets summed up.\\
  74 \\
  75 Assume uniform distribution over $course.coursename$:\\
  76 $numADBSCourses = (1 / distinctCourses) * numRows = (1/490) * 540 = 1.102$\\
  77 $bucketSize = (numRows / numBuckets) = 540 / 5 = 108$\\
  78 $numberOfBucketsOverSix = 1 + \frac{2}{3} = \frac{5}{3}$\\
  79 $numCoursesOverSixECTS = numberOfBucketsOverSix * bucketSize = \frac{5}{3} * 108 = 180$\\
  80 $selectivity = (1.102 + 180) / 540 = 181.102 / 540 = 0.335 \approx 33.5\%$\\
  81 \item Selector over room:\\
  82 Filter: $capacity < 300$ $AND$ $building$ $!=$ \textquotedbl$Freihaus$\textquotedbl\\
  83 \\
  84 $bucketSize = (numRows / numBuckets) = 1700 / 5 = 340$\\
  85 $bucketsBelow300 = 4.5$\\
  86 $roomsBelow300 = bucketsBelow300 * bucketSize = 4.5 * 340 = 1530$\\
  87 $remainingBuildings = 10$\\
  88 $roomsNotInFreihaus = remainingBuildings / numBuildings * numRooms = 1545.45$\\
  89 $selectivity = math.min(roomsBelow300, roomsNotInFreihaus) / numRooms$\\
  90 $selectivity = math.min(1530, 1545.45) / 1700 = 1530 / 1700 = 0.9 = 90\%$\\
  91 \end{enumerate}
  92
  93 % (d)
  94 \item
  95 \begin{verbatim}
  96 Hash Join
  97     Hash Cond: (reservation.room = room.name)
  98     ->  Hash Join
  99         Hash Cond: (reservation.course = course.courseid)
 100         ->  Seq Scan on reservation
 101         ->  Hash
 102             -> Seq Scan on course
 103                Filter: ((coursename)::text = 'ADBS'::text OR (ects)::integer > 6)
 104     -> Hash
 105        ->  Seq Scan on room
 106            Filter: ((capacity)::integer < 300::integer AND
 107                     (building)::text != 'Freihaus'::text)
 108 \end{verbatim}
 109
 110 \item TODO
 111 \end{enumerate}