SpringBoot decompresses the zip package and reads the contents of each file

SpringBoot decompresses the zip package and reads the contents of each file

1. Application scenarios

Obtain the local compressed package, decompress it, and perform business processing on the read file content according to the file name and type.

2. POM file dependencies

<!--Read file-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>

<!--Ali ocr-->
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>aliyun-java-sdk-core</artifactId>
<version>3.4.0</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>aws-sdk-java</artifactId>
<version>2.17.0</version>
</dependency>

<!--Baidu ocr-->
<dependency>
<groupId>com.baidu.aip</groupId>
<artifactId>java-sdk</artifactId>
<version>4.11.3</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</exclusion>
</exclusions>
</dependency>

3. Code part

1. Control layer method

@GetMapping(value = "/localZipFile")
    public Result localZipFile(){
        String filePath = "C:\Users\Administrator\Desktop\11.zip";
        List<String> list = new ArrayList<>();
        try {
            ZipFile zipFile = new ZipFile(filePath);
            Enumeration<? extends ZipEntry> entries = zipFile.getEntries();
            //Get the file name under the zip package
            while (entries.hasMoreElements()) {
                list.add(entries.nextElement().getName());
            }
            String packFileStr = "C:\Users\Administrator\Desktop\zip";
            File file = new File(filePath);
            String packFilePath = packFileStr + File.separator;
            //Extract to the specified path
            UnPackeUtil.unPackZip(file, null,packFilePath);

            File readFileDir = new File(packFilePath);
            List<String> strings = new ArrayList<>();
            //Get the collection of files under the folder
            File[] files = readFileDir.listFiles();
            for (File file1 : files) {
                String savePath = MeFileUtils.uploadLocal(file1,"");
                FileInputStream inputStream = new FileInputStream(file1);
                String fileExtension =MeFileUtils.getFileExtension(file1.getName());
                //Return the read file content
                String fileContent = MeFileUtils.readGsFile(inputStream,fileExtension,file1);
                strings.add(savePath);
                strings.add(fileContent);
            }
            return Result.OK(strings);
        } catch (IOException e) {
            e.printStackTrace();
            return Result.error(e.getMessage());
        }

2. MeFileUtils tool class

//Upload
public class MeFileUtils{
public static String uploadLocal(File file,String bizPath){
try {
            String ctxPath = uploadpath;
            String fileName = null;
            File file = new File(ctxPath + File.separator + bizPath + File.separator );
            if (!file.exists()) {
                file.mkdirs();//Create file root directory
            }
            // Get file name
            String orgName = mf.getName();
            orgName = CommonUtils.getFileName(orgName);
            if(orgName.indexOf(".")!=-1){
                fileName = orgName.substring(0, orgName.lastIndexOf(".")) + "_" + System.currentTimeMillis() + orgName.substring(orgName.lastIndexOf("."));
            }else{
                fileName = orgName + "_" + System.currentTimeMillis();
            }
            String savePath = file.getPath() + File.separator + fileName;
            File savefile = new File(savePath);
            FileCopyUtils.copy(mf, savefile);
            String dbpath = null;
            if(oConvertUtils.isNotEmpty(bizPath)){
                dbpath = bizPath + File.separator + fileName;
            }else{
                dbpath = fileName;
            }
            if (dbpath.contains("\")) {
                dbpath = dbpath.replace("\", "/");
            }
            return dbpath;
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }
        return "";
}

//Get file name
public String getFileExtension(String filename) {
        int dotIndex = filename.lastIndexOf(".");
        if (dotIndex > 0 & amp; & amp; dotIndex < filename.length() - 1) {
            return filename.substring(dotIndex + 1).toLowerCase();
        }
        return "";
    }
\t
//Get file content
public String readGsFile(FileInputStream inputStream,String fileExtension,File txtFile) {
        try {
            if (fileExtension.equalsIgnoreCase("doc") || fileExtension.equalsIgnoreCase("docx")) {
                // Process Word document
                XWPFDocument document = new XWPFDocument(inputStream);
                //Read the text content of each paragraph
                StringBuilder content = new StringBuilder();
                for (XWPFParagraph paragraph : document.getParagraphs()) {
                    for (XWPFRun run : paragraph.getRuns()) {
                        content.append(run.text());
                    }
                }
                //Close document
                document.close();
                return content.toString();
            }else if(fileExtension.equalsIgnoreCase("txt")){
                StringBuffer buffer = new StringBuffer();
                //Create a Scanner object to read the file content
                Scanner scanner = new Scanner(txtFile);
                // Read the file content line by line and output
                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    System.out.println(line);
                    buffer.append(line).append(",");
                }
                //Close Scanner object
                scanner.close();
                return buffer.toString();
            } else if (fileExtension.equalsIgnoreCase("xls") || fileExtension.equalsIgnoreCase("xlsx")) {
                // Process Excel document
                XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
                return "";
            } else if (fileExtension.equalsIgnoreCase("pdf")) {
                // Process PDF documents
                PDDocument document = PDDocument.load(inputStream);
                //Create PDFTextStripper object
                PDFTextStripper textStripper = new PDFTextStripper();
                //Read document content
                String content = textStripper.getText(document);
                document.close();
                return content;
            }else if (fileExtension.equalsIgnoreCase("jpg") || fileExtension.equalsIgnoreCase("png")) {
                // Process images
                try {
                    // Call the OCR tool class to identify the file content
                    String result = BaiduOCRUtil.recognizeFile(txtFile.getAbsolutePath());
                    return result;
                } catch (Exception e) {
                    e.printStackTrace();
                    return "";
                }
            } else {
                // Other file formats
                inputStream.close();
                return "";
            }
        } catch (IOException e) {
            e.printStackTrace();
            return "";
        }
    }
}

3. BaiduOCRUtil tool class

public class BaiduOCRUtil {

    private static final String APP_ID = "";
    private static final String API_KEY = "";
    private static final String SECRET_KEY = "";

    public static String recognizeFile(String filePath) {
        AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);

        //Set optional parameters
        HashMap<String, String> options = new HashMap<>();
        options.put("language_type", "CHN_ENG");
        options.put("detect_direction", "true");
        options.put("detect_language", "true");
        options.put("probability", "true");

        // Call Baidu Cloud OCR service to identify file content
        JSONObject response = client.basicGeneral(filePath, options);

        // Parse the recognition results
        StringBuilder result = new StringBuilder();
        JSONArray wordsArray = response.getJSONArray("words_result");
        for (int i = 0; i < wordsArray.length(); i + + ) {
            JSONObject wordsObject = wordsArray.getJSONObject(i);
            String words = wordsObject.getString("words");
            result.append(words).append("\\
");
        }

        return result.toString();
    }
}

A developer who is learning, please don’t criticize, welcome to communicate