Hadoop学习：MapReduce不使用Reduce将表合并提高效率-CFANZ编程社区

✌✌✌古人有云，好记性不如烂笔头，千里之行，始于足下，每日千行代码必不可少，每日总结写一写，目标大厂，满怀希望便会所向披靡，哈哈哈！！！✌✌✌

Hadoop学习：MapReduce不使用Reduce将表合并提高效率_hadoop

一、✌题目要求

record表：

ID	城市编号	空气指数
001	03	245
002	02	655
003	05	743
004	04	246
005	02	956
006	01	637
007	05	831
008	03	683
009	02	349

city表：

城市编号	城市名称
01	长沙
02	株洲
03	湘潭
04	怀化
05	岳阳

目标表：

ID	城市名称	空气指数
001	湘潭	245
002	株洲	655
003	岳阳	743
004	怀化	246
005	株洲	956
006	长沙	637
007	岳阳	831
008	湘潭	683
009	株洲	349

三、✌代码实现

1.✌Bean类

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Bean implements Writable {

    private String id;
    private String pid;
    private int amount;
    private String pname;
    private String type;

    public Bean() {
        super();
    }

    public Bean(String id, String pid, int amount, String pname, String type) {
        this.id = id;
        this.pid = pid;
        this.amount = amount;
        this.pname = pname;
        this.type = type;
    }

    @Override
    public String toString() {
        return id + "\t" + pname + "\t\t" + amount;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getPid() {
        return pid;
    }

    public void setPid(String pid) {
        this.pid = pid;
    }

    public int getAmount() {
        return amount;
    }

    public void setAmount(int amount) {
        this.amount = amount;
    }

    public String getPname() {
        return pname;
    }

    public void setPname(String pname) {
        this.pname = pname;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(id);
        out.writeUTF(pid);
        out.writeInt(amount);
        out.writeUTF(pname);
        out.writeUTF(type);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        id = in.readUTF();
        pid = in.readUTF();
        amount = in.readInt();
        pname = in.readUTF();
        type = in.readUTF();
    }

}

2.✌Map类

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;

public class Map extends Mapper<LongWritable, Text, Bean, NullWritable> {

    HashMap<String, String> map = new HashMap();

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {

        //获取缓冲数据
        URI[] cacheFiles = context.getCacheFiles();
        String path = cacheFiles[0].getPath().toString();

        //创建缓冲流
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));

        String line;

        while (StringUtils.isNotEmpty(line = reader.readLine())) {

            String[] words = line.split("\t");

            map.put(words[0], words[1]);

        }

        reader.close();

    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String line = value.toString();

        String[] words = line.split("\t");

        context.write(new Bean(words[0], words[1], Integer.parseInt(words[2]), map.get(words[1]), ""), NullWritable.get());

    }
}

3.✌Driver类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;


public class Driver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {

        args = new String[]{"D:/input/inputword", "D:/output"};

        BasicConfigurator.configure();

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);

        job.setJarByClass(Driver.class);
        job.setMapperClass(Map.class);

        job.addCacheFile(new URI("file:///D:/input/inputcache/pd.txt"));
        job.setNumReduceTasks(0);

        job.setOutputKeyClass(Bean.class);
        job.setOutputValueClass(NullWritable.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);


    }

}

、