// Created as a template for Advanced Database Systems 2019 import java.io.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class SomeMapReduce_ex1a { public static class MyMapper extends Mapper { public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String[] result = CSVSplitter.split(value.toString()); try{ context.write(new Text(result[7]), new TextPair(result[6].toString(), Integer.toString(Integer.parseInt(result[5])))); }catch(NumberFormatException e){} // not an integer (csv header line) or no value. } } public static class MyReducer extends Reducer { public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { TextPair max = null; for (TextPair val : values) { if (max == null){ max = val; continue; } if (Integer.parseInt(max.getSecond().toString()) < Integer.parseInt(val.getSecond().toString())){ max = val; continue; } } if(Integer.parseInt(max.getSecond().toString()) > 0) { context.write(key, max); } } } public static void main(String[] args) throws Exception { Configuration conf1 = new Configuration(); conf1.set("mapreduce.output.textoutputformat.separator",","); // This ensures that output is comma separated Job job = Job.getInstance(conf1); job.setJarByClass(SomeMapReduce_ex1a.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TextPair.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setCombinerClass(MyReducer.class); // To allow the reducer to be used as a Combiner too // job.setNumReduceTasks(8); // Uncomment this to run the job with more than one Reduce tasks. Depending on the system, this may produce a speedup. FileInputFormat.setInputPaths(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[0])); boolean status = job.waitForCompletion(true); if (status) { System.exit(0); } else { System.exit(1); } } }