Use Apache POI to convert Word document to PDF, PDF to image
I found a lot of online examples, but the writing was very complicated and unusable. It took me a day to sort it out. Don’t talk nonsense, just upload the code
Required dependencies
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.27</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox-tools</artifactId> <version>2.0.27</version> </dependency> <!-- fill word--> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version> </dependency> <!--word to pdf, fill word--> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version> </dependency> <!--word to pdf--> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.pdf-gae</artifactId> <version>2.0.1</version> </dependency> <dependency> <groupId>com.documents4j</groupId> <artifactId>documents4j-local</artifactId> <version>1.0.3</version> </dependency> <dependency> <groupId>com.documents4j</groupId> <artifactId>documents4j-transformer-msoffice-word</artifactId> <version>1.0.3</version> </dependency> <!--pdf to picture--> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>3.0.0-alpha2</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>fontbox</artifactId> <version>3.0.0-alpha2</version> </dependency>
Tool class
package com.xxx.common.utils; import java.awt.image.BufferedImage; import java.io.FileInputStream; import java.io.FileOutputStream; import com.documents4j.api.DocumentType; import com.documents4j.api.IConverter; import com.documents4j.job.LocalConverter; import lombok.extern.slf4j.Slf4j; import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.poi.xwpf.usermodel.*; import javax.imageio.ImageIO; import java.io.*; import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.Map; /** * @description: tool class */ @Slf4j public class WordUtils {<!-- --> /** * @Author: RedRush * @Date: 2023/2/20 17:42 * @param path template path * @param outPath output path * @param dict The collection of information that needs to be replaced * @Return: boolean * @description: Compile the text and tables in the template according to the dict */ public static void compile(String path, String outPath, Map<String, Object> dict) throws Exception{<!-- --> FileInputStream is = new FileInputStream(path); XWPFDocument document = new XWPFDocument(is); if (dict != null) {<!-- --> // Replace text outside the table (text only) WordUtils.compileText(document, dict); // replace the text object in the table WordUtils.compileTable(document, dict); } File f = new File(outPath. substring(0, outPath. lastIndexOf(File. separator))); if(!f.exists()){<!-- --> f. mkdirs(); } FileOutputStream out = new FileOutputStream(outPath); document. write(out); } /*** * @Description : replace paragraph text * @param document docx parsing object * @param dict The collection of information that needs to be replaced * @return void * @Date 2022/11/17 17:22 */ public static void compileText(XWPFDocument document, Map<String, Object> dict) {<!-- --> // Get the collection of paragraphs Iterator<XWPFParagraph> iterator = document. getParagraphsIterator(); XWPFParagraph paragraph = null; while (iterator.hasNext()) {<!-- --> paragraph = iterator. next(); // Determine whether this paragraph needs to be replaced if (checkText(paragraph. getText())) {<!-- --> replaceValue(paragraph, dict); } } } /*** * @Description : replace the text in the table * @param document * @param dict The collection of information that needs to be replaced * @return void * @Date 2022/11/18 11:29 */ public static void compileTable(XWPFDocument document, Map<String, Object> dict) {<!-- --> // Get the form of the file Iterator<XWPFTable> tableList = document. getTablesIterator(); XWPFTable table; List<XWPFTableRow> rows; List<XWPFTableCell> cells; // Loop through all the text that needs to be replaced and replace it while (tableList.hasNext()) {<!-- --> table = tableList. next(); if (checkText(table.getText())) {<!-- --> rows = table. getRows(); // Traverse the table and replace the template for (XWPFTableRow row : rows) {<!-- --> cells = row. getTableCells(); for (XWPFTableCell cell : cells) {<!-- --> // Determine whether the cell needs to be replaced if (checkText(cell. getText())) {<!-- --> List<XWPFParagraph> paragraphs = cell. getParagraphs(); for (XWPFParagraph paragraph : paragraphs) {<!-- --> replaceValue(paragraph, dict); } } } } } } } /** * @Author: RedRush * @Date: 2023/2/20 17:31 * @param paragraph word text * @param dict The collection of information that needs to be replaced * @description: replacement string */ private static void replaceValue(XWPFParagraph paragraph, Map<String, Object> dict) {<!-- --> String nextLine; List<XWPFRun> runs = paragraph. getRuns(); for (int i = 0; i < runs. size(); i ++ ) {<!-- --> // read current line String readLine = runs. get(i). text(); // System.out.println("readLine:" + readLine); // Skip if empty or does not contain the target string if(StringUtils.isEmpty(readLine) || !readLine.contains("$")) continue; // Initialize the result set StringBuffer sb = new StringBuffer(); // Loop through the template string of the current row while (readLine. contains("$")){<!-- --> // Get the string on the left side of the template string int left; if(readLine. contains("${")){<!-- --> left = readLine. indexOf("${"); } else {<!-- --> if(runs. size() < i + 1){<!-- --> break; } nextLine = runs. get(i + 1). text(); if(!nextLine.startsWith("{")) break; readLine += nextLine; paragraph. removeRun(i + 1); left = readLine. indexOf("${"); } sb.append(readLine.substring(0, left)); // Get the right side of the template string while(runs.size() >= i + 1 & amp; & amp; !readLine.contains("}")){<!-- --> nextLine = runs. get(i + 1). text(); readLine += nextLine; paragraph. removeRun(i + 1); } int right = readLine. indexOf("}"); if(right == -1) break; // Replace the template string [if it does not exist in the dictionary, it will be replaced with an empty string] sb.append(dict.getOrDefault(readLine.substring(left, right + 1), "")); if(right + 1 < readLine. length()){<!-- --> sb. append(readLine. substring(right + 1)); } readLine = sb. toString(); } runs.get(i).setText(sb.toString(), 0); } } /*** * @Description : Check if the text contains the specified character (here "$") * @param text * @return boolean * @Date 2022/11/17 17:22 */ private static boolean checkText(String text) {<!-- --> return text. contains("$"); } /** * Convert word to pdf through documents4j * * @param sourcePath source file address such as /root/example.doc * @param targetPath target file address such as /root/example.pdf */ public static void documents4jWordToPdf(String sourcePath, String targetPath) {<!-- --> File inputWord = new File(sourcePath); File outputFile = new File(targetPath); try {<!-- --> InputStream docxInputStream = new FileInputStream(inputWord); OutputStream outputStream = new FileOutputStream(outputFile); IConverter converter = LocalConverter.builder().build(); boolean execute = converter. convert(docxInputStream) .as(DocumentType.DOCX) .to(outputStream) .as(DocumentType.PDF).schedule().get(); outputStream. close(); docxInputStream. close(); log.info("Conversion completed targetPath = {}", outputFile.getAbsolutePath()); System.out.println("Conversion completed targetPath = " + outputFile.getAbsolutePath()); converter. shutDown(); return; } catch (Exception e) {<!-- --> log.error("[documents4J] word to pdf failed: {}", e.toString()); } } /** * Convert the generated Word document to PDF format * @param wordPath Word document path * @param pdfPath generated PDF path */ public static void convertToPDF(String wordPath , String pdfPath ) {<!-- --> documents4jWordToPdf(wordPath, pdfPath); } /** * Convert PDF files to pictures * @param sourcePath PDF file address */ public void execute(String sourcePath) {<!-- --> File file = new File(sourcePath); String path = file. getAbsolutePath(); String targetPathNoExt = path.substring(0, path.lastIndexOf(".")); try {<!-- --> PDDocument doc = Loader.loadPDF(file); PDFRenderer renderer = new PDFRenderer(doc); int pageCount = doc. getNumberOfPages(); for (int i = 0; i < pageCount; i ++ ) {<!-- --> // System.out.println("Current page" + (i + 1)); BufferedImage image = renderer.renderImageWithDPI(i, 296); // BufferedImage image = renderer. renderImage(i, 2.5f); ImageIO.write(image, "PNG", new File(targetPathNoExt + "_" + i + ".png")); } } catch (IOException e) {<!-- --> e.printStackTrace(); } } }
Test Code
/** * Fill word and generate pdf * @throws Exception */ @Test public void fillTemplate() throws Exception {<!-- --> //Fill word needs to be defined in the document: ${name} Map<String, Object> strData = new HashMap<>(); strData.put("${name}", "xxx"); //Customer name strData.put("${createDate}", DateUtil.format(DateUtil.date(), DatePattern.CHINESE_DATE_PATTERN)); strData.put("${openingAccountBalance}", 121);//The opening account balance (yuan) strData.put("${rechargeAmount}", 121);//New recharge amount (yuan) strData.put("${totalConsumptionAmount}", 121);//Consumption during this account period (yuan) strData.put("${balanceAfterSettlement}", 121);//End account balance (yuan) String readPath = "C:\Users\gukt\Desktop\template.docx"; //word template String outPath = "C:\Users\gukt\Desktop\output.docx"; // output fill word path String outPdfPath = "C:\Users\gukt\Desktop\output.pdf"; //Enter word to pdf path WordUtils.compile(readPath, outPath, strData); //Fill in word, if you only want to convert word to pdf, you don’t need it here // conversion operation, can be used alone WordUtils.convertToPDF(outPath,outPdfPath); //word to pdf WordUtils.execute(outPdfPath); //pdf to picture png }