ex2/mapreduce/SomeMapReduce_ex1a.java

   1 // Created as a template for  Advanced Database Systems 2019
   2
   3 import java.io.*;
   4 import org.apache.hadoop.conf.Configuration;
   5 import org.apache.hadoop.fs.Path;
   6 import org.apache.hadoop.io.*;
   7 import org.apache.hadoop.mapreduce.*;
   8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
   9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  10
  11 public class SomeMapReduce_ex1a {
  12     public static class MyMapper extends Mapper<Object, Text, Text, TextPair> {
  13         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
  14             String[] result = CSVSplitter.split(value.toString());
  15             try{
  16                 context.write(new Text(result[7]), new TextPair(result[6].toString(), Integer.toString(Integer.parseInt(result[5]))));
  17             }catch(NumberFormatException e){} // not an integer (csv header line) or no value.
  18         }
  19     }
  20     public static class MyReducer extends Reducer<Text, TextPair, Text, TextPair> {
  21         public void reduce(Text key, Iterable<TextPair> values, Context context) throws IOException, InterruptedException {
  22             TextPair max = null;
  23             for (TextPair val : values) {
  24                 if (max == null){
  25                     max = val;
  26                     continue;
  27                 }
  28                 if (Integer.parseInt(max.getSecond().toString()) < Integer.parseInt(val.getSecond().toString())){
  29                     max = val;
  30                     continue;
  31                 }
  32             }
  33             if(Integer.parseInt(max.getSecond().toString()) > 0) {
  34                 context.write(key, max);
  35             }
  36         }
  37     }
  38     public static void main(String[] args) throws Exception {
  39         Configuration conf1 = new Configuration();
  40         conf1.set("mapreduce.output.textoutputformat.separator",",");  // This ensures that output is comma separated
  41         Job job = Job.getInstance(conf1);
  42         job.setJarByClass(SomeMapReduce_ex1a.class);
  43         job.setOutputKeyClass(Text.class);
  44         job.setOutputValueClass(TextPair.class);
  45         job.setMapperClass(MyMapper.class);
  46         job.setReducerClass(MyReducer.class);
  47         job.setCombinerClass(MyReducer.class); // To allow the reducer to be used as a Combiner too
  48 //    job.setNumReduceTasks(8);     // Uncomment this to run the job with more than one Reduce tasks. Depending on the system, this may produce a speedup.
  49         FileInputFormat.setInputPaths(job, new Path(args[1]));
  50         FileOutputFormat.setOutputPath(job, new Path(args[0]));
  51         boolean status = job.waitForCompletion(true);
  52         if (status) {
  53             System.exit(0);
  54         } else {
  55             System.exit(1);
  56         }
  57     }
  58 }