]> git.somenet.org - pub/jan/adbs.git/blob - ex2/main_1.tex
[ex3.1] communication costs.
[pub/jan/adbs.git] / ex2 / main_1.tex
1 %ex2.1
2
3 \begin{enumerate}[label=(\alph*)]
4         \item\textbf{Write a MapReduce job which takes as input the checkout records and
5                 computes the following:\\
6                 For each author, list the title that was checked out (borrowed) the most}\\     
7
8         Compile and run:
9         \begin{verbatim}rsync -vaPp --delete ~/gitstuff/adbs/ex2/mapreduce/ \
10             e726236f@lbd.zserv.tuwien.ac.at:mapreduce/; \
11         ssh -t e726236f@lbd.zserv.tuwien.ac.at "cd mapreduce; ./build_run.sh '_ex1a' \
12             '/user/adbs/2019S/shared/seattle-checkouts-by-title/checkouts-by-title.csv'"
13         \end{verbatim}
14         
15         Results:\\      
16         http://localhost:19888/jobhistory/job/job\_1557406089646\_5204\\
17         Tasks:
18         Map: 53\\
19         Reduce: 1\\
20         HDFS: Number of bytes read: 7118531360\\
21         HDFS: Number of bytes written: 28695222\\
22         Map input records: 32723546\\
23         Reduce output records: 324418\\
24         
25         \item\textbf{Write a MapReduce job which takes as input both the library inventory
26                 and the checkout records and computes the following:\\
27                 For each author, list the title that was checked out (borrowed) the most}\\
28         
29         Compile and run:
30         \begin{verbatim}rsync -vaPp --delete ~/gitstuff/adbs/ex2/mapreduce/ \
31             e726236f@lbd.zserv.tuwien.ac.at:mapreduce/; \
32         ssh -t e726236f@lbd.zserv.tuwien.ac.at "cd mapreduce; ./build_run.sh '_ex2a' \
33             '/user/adbs/2019S/shared/seattle-checkouts-by-title/checkouts-by-title.csv' \
34             '/user/adbs/2019S/shared/seattle-library-collection-inventory/library-collection-inventory.csv'"
35         \end{verbatim}
36         
37         Results:\\      
38         http://localhost:19888/jobhistory/job/job\_1557406089646\_5309\\
39         Map: 109\\
40         Reduce: 1\\
41         HDFS: Number of bytes read: 14621528936\\
42         HDFS: Number of bytes written: 52724095\\
43         Map input records: 55563180\\
44         Reduce output records: 183399\\
45 \end{enumerate}