定义框架接口
由具体实现类实现
1. public interface Tool extends
2. int run(String [] args) throws
3. }
ToolRunner
同一的入口调用
按配置解析参数,调用接口方法
1. public static int
2. throws
3. if(conf == null) {
4. new
5. }
6. new
7. //set the configuration back, so that Tool can configure itself
8. tool.setConf(conf);
9.
10. //get the args w/o generic hadoop args
11. String[] toolArgs = parser.getRemainingArgs();
12. return
13. }
Mahout 中具体调用示例
1. public static void main(String[] args) throws
2. new Configuration(), new
3. }
覆盖方法,提取参数,调用核心方法
1. @Override
2. public int run(String[] args) throws
3. addInputOption();
4. addOutputOption();
5. //...........
6. runJob(input,
7. output,
8. minClusterSize,
9. minVectorSize,
10. hashType,
11. numHashFunctions,
12. keyGroups,
13. numReduceTasks,
14. debugOutput);
15. return 0;
16. }
核心方法,配置job,开始map reduce任务
1. private void
2. Path output,
3. int
4. int
5. String hashType,
6. int
7. int
8. int
9. boolean debugOutput) throws
10. Configuration conf = getConf();
11.
12. //配置参数设置........................
13. new Job(conf, "MinHash Clustering");
14. class);
15.
16. //Job参数设置.........................
17. true);
18. }