ex2/mapreduce/SomeMapReduce_ex1a.java

   1 // Created as a template for  Advanced Database Systems 2019
   2
   3 import java.io.*;
   4 import org.apache.hadoop.conf.Configuration;
   5 import org.apache.hadoop.fs.Path;
   6 import org.apache.hadoop.io.*;
   7 import org.apache.hadoop.mapreduce.*;
   8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
   9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  10
  11 public class SomeMapReduce_ex1a {
  12     public static class MyMapper extends Mapper<Object, Text, Text, TextPair> {
  13         public static IntWritable one = new IntWritable(1);
  14
  15         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
  16             String[] result = CSVSplitter.split(value.toString());
  17             try{
  18                 context.write(new Text(result[7]), new TextPair(result[6].toString(), Integer.toString(Integer.parseInt(result[5]))));
  19             }catch(NumberFormatException e){} // not an integer (csv header line) or no value.
  20         }
  21     }
  22     public static class MyReducer extends Reducer<Text, TextPair, Text, TextPair> {
  23         public void reduce(Text key, Iterable<TextPair> values, Context context) throws IOException, InterruptedException {
  24             TextPair max = null;
  25             for (TextPair val : values) {
  26                 if (max == null){
  27                     max = val;
  28                     continue;
  29                 }
  30                 if (Integer.parseInt(max.getSecond().toString()) < Integer.parseInt(val.getSecond().toString())){
  31                     max = val;
  32                     continue;
  33                 }
  34             }
  35             if(Integer.parseInt(max.getSecond().toString()) > 0) {
  36                 context.write(key, max);
  37             }
  38         }
  39     }
  40     public static void main(String[] args) throws Exception {
  41         Configuration conf1 = new Configuration();
  42         conf1.set("mapreduce.output.textoutputformat.separator",",");  // This ensures that output is comma separated
  43         Job job = Job.getInstance(conf1);
  44         job.setJarByClass(SomeMapReduce_ex1a.class);
  45         job.setOutputKeyClass(Text.class);
  46         job.setOutputValueClass(TextPair.class);
  47         job.setMapperClass(MyMapper.class);
  48         job.setReducerClass(MyReducer.class);
  49         job.setCombinerClass(MyReducer.class); // To allow the reducer to be used as a Combiner too
  50 //      job.setNumReduceTasks(8);     // Uncomment this to run the job with more than one Reduce tasks. Depending on the system, this may produce a speedup.
  51         FileInputFormat.setInputPaths(job, new Path(args[0]));
  52         FileOutputFormat.setOutputPath(job, new Path(args[1]));
  53         boolean status = job.waitForCompletion(true);
  54         if (status) {
  55             System.exit(0);
  56         } else {
  57             System.exit(1);
  58         }
  59     }
  60 }