Read pdf content through pdfBox

Import dependencies:

<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.11</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>xmpbox</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>preflight</artifactId>
            <version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.0</version>
        </dependency>

2. Process documents by reference, the code example is as follows:

import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class PdfBoxUtil {<!-- -->

    /**
     * Read text information in pdf (all)
     *
     * @param inputFile
     * @return
     */
    public static String readPdf(String inputFile) {<!-- -->
        //Create document object
        PDDocument doc = null;
        String content = "";
        try {<!-- -->
            //load a pdf object
            doc = PDDocument. load(new File(inputFile));
            //Get a PDFTextStripper text stripping object
            PDFTextStripper textStripper = new PDFTextStripper();
            content = textStripper. getText(doc);
// System.out.println("Content:" + content);
// System.out.println("All pages" + doc.getNumberOfPages());
        } catch (Exception e) {<!-- -->
            e.printStackTrace();
        } finally {<!-- -->
            try {<!-- -->
                // close the document
                if (doc != null) {<!-- -->
                    doc. close();
                }
            } catch (IOException e) {<!-- -->
                e.printStackTrace();
            }
        }
        return content;
    }

    /**
     * insert text into pdf
     *
     * @param inputFilePath
     * @param outputFilePath
     * @param pageNum
     * @param message
     * @throws Exception
     */
    public static void insertWordContent(String inputFilePath, String outputFilePath, Integer pageNum, String message) throws Exception {<!-- -->
        File inputPDFFile = new File(inputFilePath);
        File outputPDFFile = new File(outputFilePath);
        PDDocument doc = null;
        try {<!-- -->
            doc = PDDocument. load(inputPDFFile);
            PDPageTree allPages = doc.getDocumentCatalog().getPages();
// PDFont font = PDType1Font.HELVETICA_BOLD;
            PDFont font = PDType0Font.load(doc, new File("C:\Users\DELL\Desktop\FZLTHJW.TTF"));
            // font size
            float fontSize = 36.0f;
            PDPage page = (PDPage) allPages. get(pageNum - 1);
            PDRectangle pageSize = page. getMediaBox();
            float stringWidth = font. getStringWidth(message) * fontSize / 1000f;
            // Calculate the center position of the page
            int rotation = page. getRotation();
            boolean rotate = rotation == 90 || rotation == 270;
            float pageWidth = rotate ? pageSize. getHeight() : pageSize. getWidth();
            float pageHeight = rotate ? pageSize. getWidth() : pageSize. getHeight();
            double centeredXPosition = rotate ? pageHeight / 2f : (pageWidth - stringWidth) / 2f;
            double centeredYPosition = rotate ? (pageWidth - stringWidth) / 2f : pageHeight / 2f;
            // append the content to the existing stream
            PDPageContentStream contentStream = new PDPageContentStream(doc, page, true, true, true);
            contentStream.beginText();
            // set font and font size
            contentStream.setFont(font, fontSize);
            // Set the font color (red as below)
            contentStream.setNonStrokingColor(255, 0, 0);
            if (rotate) {<!-- -->
                // rotate the text according to the page rotation
                contentStream.setTextRotation(Math.PI / 2, centeredXPosition, centeredYPosition);
            } else {<!-- -->
                contentStream.setTextTranslation(centeredXPosition, centeredYPosition);
            }
            // write text
            contentStream. drawString(message);
            contentStream. endText();
            contentStream. close();
            // save to new document
            doc.save(outputPDFFile);
            System.out.println("Successfully insert text into pdf");
        } finally {<!-- -->
            if (doc != null) {<!-- -->
                doc. close();
            }
        }
    }

    /**
     * Insert pictures in pdf
     *
     * @param inputFilePath
     * @param imagePath
     * @param outputFilePath
     * @param pageNum
     * @throws Exception
     */
    public static void insertImageContent(String inputFilePath, String imagePath, String outputFilePath, Integer pageNum) throws Exception {<!-- -->
        File inputPDFFile = new File(inputFilePath);
        File outputPDFFile = new File(outputFilePath);

        try {<!-- -->
            PDDocument doc = PDDocument. load(inputPDFFile);
            PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);

            PDPage page = doc. getPage(0);
            //The commented line of code will overwrite the original content, and the uncommented line will not be overwritten
// PDPageContentStream contentStream = new PDPageContentStream(doc, page);
            PDPageContentStream contentStream = new PDPageContentStream(doc, page, true, true, true);
            contentStream. drawImage(pdImage, 70, 250);
            contentStream. close();
            doc.save(outputPDFFile);
            doc. close();
            System.out.println("Successfully inserted picture");
        } catch (IOException e) {<!-- -->
            e.printStackTrace();
        }
    }

    /**
     * Merge pdf files
     *
     * @param pathList
     * @param targetPDFPath
     * @throws Exception
     */
    public static void mergePdf(List<String> pathList, String targetPDFPath) throws Exception {<!-- -->
        List<InputStream> inputStreams = new ArrayList<>();
        for (String path : pathList) {<!-- -->
            inputStreams.add(new FileInputStream(new File(path)));
        }
        PDFMergerUtility mergePdf = new PDFMergerUtility();
        File file = new File(targetPDFPath);

        if (!file.exists()) {<!-- -->
            file.delete();
        }

        mergePdf. addSources(inputStreams);
        mergePdf.setDestinationFileName(targetPDFPath);
        mergePdf. mergeDocuments();
        for (InputStream in : inputStreams) {<!-- -->
            if (in != null) {<!-- -->
                in. close();
            }
        }
    }


    /**
     * Split the pdf file into multiple
     *
     * @param sourcePdfPath
     * @param splitPath
     * @param splitFileName
     * @throws Exception
     */
    public static void splitPdf(String sourcePdfPath, String splitPath, String splitFileName) throws Exception {<!-- -->
        File targetDir = new File(splitPath);
        if (!targetDir. exists()) {<!-- -->
            targetDir.mkdirs();
        }
        int j = 1;
        String splitPdf = splitPath + File.separator + splitFileName + "_";

        // Loading an existing PDF document
        File file = new File(sourcePdfPath);
        PDDocument document = PDDocument. load(file);
        // Instantiating Splitter class
        Splitter splitter = new Splitter();
        splitter.setStartPage(1);
        splitter.setSplitAtPage(1);
        splitter. setEndPage(5);
        // splitting the pages of a PDF document
        List<PDDocument> Pages = splitter. split(document);
        // Creating an iterator
        Iterator<PDDocument> iterator = Pages. listIterator();
        // Saving each page as an individual document
        while (iterator.hasNext()) {<!-- -->
            PDDocument pd = iterator. next();
            String pdfName = splitPdf + j + + + ".pdf";
            pd. save(pdfName);
        }
        document. close();
    }


    public static void main(String args[]) throws IOException {<!-- -->
        // 1. Read the pdf file
// String filePath = "F:\image_test\sample.pdf";
// String content = readPdf(filePath);
// System.out.println("read content:" + content);

        // 2. Insert text into pdf
// String inFilePath = "F:\image_test\sample.pdf";
// String outFilePath = "F:\image_test\sample2.pdf";
// try {<!-- -->
// insertWordContent(inFilePath,outFilePath,1,"inserted puppy text");
// } catch (Exception e) {<!-- -->
// e. printStackTrace();
// }

        // 3. Insert the picture into the pdf file
// String inFilePath = "F:\image_test\sample.pdf";
// String imagePath = "F:\image_test\sun1.jpg";
// String outFilePath = "F:\image_test\sample3.pdf";
// try {<!-- -->
// insertImageContent(inFilePath,imagePath,outFilePath,1);
// } catch (Exception e) {<!-- -->
// e. printStackTrace();
// }

        // 4. Merge pdf files
// String filePath1 = "F:\image_test\sample.pdf";
// String filePath2 = "F:\image_test\sample2.pdf";
// String outFilePath = "F:\image_test\sample4.pdf";
// List<String> filePathList = new ArrayList<>();
// filePathList.add(filePath1);
// filePathList.add(filePath2);
// try {<!-- -->
// mergePdf(filePathList, outFilePath);
// } catch (Exception e) {<!-- -->
// e. printStackTrace();
// }

        // 5. Split the pdf file
        String inFilePath = "F:\image_test\sample4.pdf";
        String targetPath = "F:\image_test\11";
        String targetFileName = "aa";
        try {<!-- -->
            splitPdf(inFilePath, targetPath, targetFileName);
        } catch (Exception e) {<!-- -->
            e.printStackTrace();
        }

    }
}

As in the sample code above, the main method examples are executed in sequence
(1) Execute 1 effect:

(2) Execute 2 effects: create a new file and insert text.

Example 2 requires us to download fonts by ourselves, which can be downloaded at the following address:
https://www.fontke.com/font/10279514/download/
Introduce the font file at the following location in the code

(3), 3 execution effect, the picture is inserted successfully

(4), 4 execution effect, 2 pages will appear in the merged pdf

(5), 5 execution effect, divide the multi-page pdf file page by page

There is no limit to the sea of learning and hard work! ! !