使用命令行执行MapReduce-CFANZ编程社区

package com.ccse.hadoop.cmd;

import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.ccse.hadoop.outputformat.MySelfOutputFormatApp;

/**
 * 命令行运行MapReduce
 * @author woshiccna
 *
 */
public class WordCountApp extends Configured implements Tool {

  public static String INPUT_PATH = null;
  public static String OUTPUT_PATH = null;
  
  public static void main(String[] args) throws Exception {
    ToolRunner.run(new Configuration(), new WordCountApp(), args);
  }
  
  public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
    private Text word = new Text();
    private LongWritable writable = new LongWritable(1);
    @Override
    protected void map(LongWritable key, Text value,
        Mapper<LongWritable, Text, Text, LongWritable>.Context context)
        throws IOException, InterruptedException {
      if (value != null) {
        String line = value.toString();
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
          word.set(tokenizer.nextToken());
          context.write(word, writable);
        }
      }
    }
  }
  
  public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values,
        Reducer<Text, LongWritable, Text, LongWritable>.Context context)
        throws IOException, InterruptedException {
      long sum = 0; 
      for (LongWritable value : values) {
        sum += value.get();
      }
      context.write(key, new LongWritable(sum));
    }
  }

  @Override
  public int run(String[] args) throws Exception {
    INPUT_PATH = args[0];
    OUTPUT_PATH = args[1];    
    Configuration conf = new Configuration();
    FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);
    fileSystem.delete(new Path(OUTPUT_PATH), true);
    
    Job job = new Job(conf, MySelfOutputFormatApp.class.getSimpleName());
    job.setJarByClass(MySelfOutputFormatApp.class);
    
    FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    
    job.setReducerClass(MyReducer.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(LongWritable.class);
      FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
      
      job.waitForCompletion(true);
    return 0;
  }

}
使用命令 hadoop jar jar.jar com.ccse.hadoop.cmd.WordCountApp hdfs://chaoren1:9000/mapinput hdfs://chaoren1:9000/mapoutput来执行MapReduce程序