0
点赞
收藏
分享

微信扫一扫

POI word转HTML

fbd4ffd0717b 2022-03-27 阅读 50
java
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

public class WordToHtml {
    public static void main(String[] args) throws IOException, ParserConfigurationException, TransformerException {
        convert2Html("F:\\电脑桌面\\新建文件夹\\电商实操练习题1.doc","F:\\电脑桌面\\新建文件夹\\电商实操练习题1.html");
    }

    public static void writerFile(String context, String path) {
        FileOutputStream fos = null;
        BufferedWriter bw = null;
        try {
            File file = new File(path);
            fos = new FileOutputStream(file);
            bw = new BufferedWriter(new OutputStreamWriter(fos,"utf-8"));
            bw.write(context);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
                try {
                    if (bw!=null)bw.close();
                    if (fos!=null)fos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
        }
    }

    public static void convert2Html1(byte[] bytes, String outPutFile) throws Exception {
        String picpath = outPutFile.replace(".html", "");//图片保存的绝对路径
        final String picth = picpath.substring(picpath.lastIndexOf("/") + 1, picpath.length());//图片保存的文件夹名字
        picpath = picpath + "/";//图片保存绝对的路径
        File dir = new File(picpath);
        if (!dir.exists()) {
            dir.mkdirs();
        }
        System.out.println(picpath + "===" + picth);
        //把fastdfs读取到的word文件字节数组转换为字节输入流对象
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes);
        HWPFDocument wordDocument = new HWPFDocument(byteArrayInputStream);
        //WordToHtmlUtils.loadDoc(new FileInputStream(inputFile))
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance()
                .newDocumentBuilder().newDocument());
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            @Override
            public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
                                      float widthInches, float heightInches) {
                return picth + "/" + suggestedName;
            }
        });
        wordToHtmlConverter.processDocument(wordDocument);
        List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                FileOutputStream fileout = new FileOutputStream(picpath + pic.suggestFullFileName());
                pic.writeImageContent(fileout);
                fileout.close();
            }
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
        writerFile(new String(out.toByteArray()), outPutFile);
    }

    public static void convert2Html(String fileName, String outPutFile) throws IOException, ParserConfigurationException, TransformerException {
        String picpath = outPutFile.replace(".html", "");//图片保存的绝对路径
        final String picth = picpath.substring(picpath.lastIndexOf("/") + 1, picpath.length());//图片保存的文件夹名字
        picpath = picpath + "/";//图片保存绝对的路径
        File dir = new File(picpath);
        if (!dir.exists()) {
            dir.mkdirs();
        }
        System.out.println(picpath + "===" + picth);
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));
        //WordToHtmlUtils.loadDoc(new FileInputStream(inputFile))
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance()
                .newDocumentBuilder().newDocument());
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            @Override
            public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
                                      float widthInches, float heightInches) {
                return picth + "/" + suggestedName;
            }
        });
        wordToHtmlConverter.processDocument(wordDocument);
        List pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                FileOutputStream fileout = new FileOutputStream(picpath + pic.suggestFullFileName());
                pic.writeImageContent(fileout);
                fileout.close();
            }
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
        writerFile(new String(out.toByteArray()), outPutFile);
    }
}
举报

相关推荐

0 条评论