Codec
Codec实现了一种压缩-解压缩算法。在Hadoop中,一个对CompressionCodec接口的实现代码一个codec。下面列出Hadoop实现的codec。
通过CompressionCodec对数据流进行压缩和解压缩
CompressionCodec接口中有两个重要的方法,来完成数据的压缩和解压缩。
案例1-压缩文件
public class FileCompress {
public static void main(String[] args) throws Exception {
Class<?> codecClass = Class.forName("org.apache.hadoop.io.compress.GzipCodec");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);
//指定压缩文件路径
FSDataOutputStream outputStream = fs.create(new Path("hdfs://mycluster:9000/user/hadoop/text.gz"));
//指定要被压缩的文件路径
FSDataInputStream in = fs.open(new Path("hdfs://mycluster:9000/user/hadoop/aa.txt"));
//创建压缩输出流
CompressionOutputStream out = codec.createOutputStream(outputStream);
IOUtils.copyBytes(in, out, conf);
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
案例2-解压缩文件
public class FileUnCompress {
public static void main(String[] args) throws Exception {
Class<?> codecClass = Class.forName("org.apache.hadoop.io.compress.GzipCodec");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);
CompressionInputStream in =null;
in = codec.createInputStream(fs.open(new Path("hdfs://mycluster:9000/user/hadoop/text.gz")));
IOUtils.copyBytes(in, System.out, conf);
IOUtils.closeStream(in);
}
}
通过
CompressionCodecFactory
对数据流进行解压缩
public class FileUnCompress {
public static void main(String[] args) throws Exception {
String uri = "hdfs://mycluster:9000/user/hadoop/text.gz";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
Path inputPath= new Path(uri);
CompressionCodec codec = factory.getCodec(inputPath);
if(codec == null){
System.out.println("No codec found for " + uri);
System.exit(1);
}
String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
CompressionInputStream in = codec.createInputStream(fs.open(inputPath));
System.out.println(outputUri);//hdfs://mycluster:9000/user/hadoop/text
IOUtils.copyBytes(in, System.out, conf);
IOUtils.closeStream(in);
}
}