1 // Created as a template for Advanced Database Systems 2019
3 import java.io.IOException;
4 import org.apache.hadoop.conf.Configuration;
5 import org.apache.hadoop.fs.Path;
6 import org.apache.hadoop.io.IntWritable;
7 import org.apache.hadoop.io.LongWritable;
8 import org.apache.hadoop.io.Text;
9 import org.apache.hadoop.mapreduce.Job;
10 import org.apache.hadoop.mapreduce.Mapper;
11 import org.apache.hadoop.mapreduce.Reducer;
12 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
13 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 public class SomeMapReduce_wc {
16 public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
17 public static IntWritable one = new IntWritable(1);
19 public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
20 String[] result = value.toString().split("\\W+");
21 for (String s : result) {
22 context.write(new Text(s.toLowerCase()), one);
26 public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
27 public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
29 for (IntWritable val : values) {
32 context.write(key, new IntWritable(sum));
35 public static void main(String[] args) throws Exception {
36 Configuration conf1 = new Configuration();
37 conf1.set("mapreduce.output.textoutputformat.separator",","); // This ensures that output is comma separated
38 Job job = Job.getInstance(conf1);
39 job.setJarByClass(SomeMapReduce_wc.class);
40 job.setOutputKeyClass(Text.class);
41 job.setOutputValueClass(IntWritable.class);
42 job.setMapperClass(MyMapper.class);
43 job.setReducerClass(MyReducer.class);
44 job.setCombinerClass(MyReducer.class); // To allow the reducer to be used as a Combiner too
45 // job.setNumReduceTasks(8); // Uncomment this to run the job with more than one Reduce tasks. Depending on the system, this may produce a speedup.
46 FileInputFormat.setInputPaths(job, new Path(args[0]));
47 FileOutputFormat.setOutputPath(job, new Path(args[1]));
48 boolean status = job.waitForCompletion(true);