public void map(Object key, Text value, Context context
) throws IOException,InterruptedException {//每个mapper对于每个key-value,一次调用map读进一个key-value
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);//key是word,key也是reducer读的key;写到‘中间层’
}
}
//中间过程,相同hash值的word/key分别汇集到一起,才有key-list的结构,供reducer读
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {//每个reducer对于每个key,每次调用reduce读进一个key-values;上一步相同key的value成为一个list即values
int sum = 0;
for (IntWritable val : values) {
sum += val.get();//即+1
}
result.set(sum);
context.write(key, result);//写到指定HDFS(一个)文件
https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java