SpringBoot decompresses the zip package and reads the contents of each file
1. Application scenarios
Obtain the local compressed package, decompress it, and perform business processing on the read file content according to the file name and type.
2. POM file dependencies
<!--Read file--> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.2</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.2</version> </dependency> <!--Ali ocr--> <dependency> <groupId>com.aliyun</groupId> <artifactId>aliyun-java-sdk-core</artifactId> <version>3.4.0</version> </dependency> <dependency> <groupId>software.amazon.awssdk</groupId> <artifactId>aws-sdk-java</artifactId> <version>2.17.0</version> </dependency> <!--Baidu ocr--> <dependency> <groupId>com.baidu.aip</groupId> <artifactId>java-sdk</artifactId> <version>4.11.3</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-simple</artifactId> </exclusion> </exclusions> </dependency>
3. Code part
1. Control layer method
@GetMapping(value = "/localZipFile") public Result localZipFile(){ String filePath = "C:\Users\Administrator\Desktop\11.zip"; List<String> list = new ArrayList<>(); try { ZipFile zipFile = new ZipFile(filePath); Enumeration<? extends ZipEntry> entries = zipFile.getEntries(); //Get the file name under the zip package while (entries.hasMoreElements()) { list.add(entries.nextElement().getName()); } String packFileStr = "C:\Users\Administrator\Desktop\zip"; File file = new File(filePath); String packFilePath = packFileStr + File.separator; //Extract to the specified path UnPackeUtil.unPackZip(file, null,packFilePath); File readFileDir = new File(packFilePath); List<String> strings = new ArrayList<>(); //Get the collection of files under the folder File[] files = readFileDir.listFiles(); for (File file1 : files) { String savePath = MeFileUtils.uploadLocal(file1,""); FileInputStream inputStream = new FileInputStream(file1); String fileExtension =MeFileUtils.getFileExtension(file1.getName()); //Return the read file content String fileContent = MeFileUtils.readGsFile(inputStream,fileExtension,file1); strings.add(savePath); strings.add(fileContent); } return Result.OK(strings); } catch (IOException e) { e.printStackTrace(); return Result.error(e.getMessage()); }
2. MeFileUtils tool class
//Upload public class MeFileUtils{ public static String uploadLocal(File file,String bizPath){ try { String ctxPath = uploadpath; String fileName = null; File file = new File(ctxPath + File.separator + bizPath + File.separator ); if (!file.exists()) { file.mkdirs();//Create file root directory } // Get file name String orgName = mf.getName(); orgName = CommonUtils.getFileName(orgName); if(orgName.indexOf(".")!=-1){ fileName = orgName.substring(0, orgName.lastIndexOf(".")) + "_" + System.currentTimeMillis() + orgName.substring(orgName.lastIndexOf(".")); }else{ fileName = orgName + "_" + System.currentTimeMillis(); } String savePath = file.getPath() + File.separator + fileName; File savefile = new File(savePath); FileCopyUtils.copy(mf, savefile); String dbpath = null; if(oConvertUtils.isNotEmpty(bizPath)){ dbpath = bizPath + File.separator + fileName; }else{ dbpath = fileName; } if (dbpath.contains("\")) { dbpath = dbpath.replace("\", "/"); } return dbpath; } catch (IOException e) { log.error(e.getMessage(), e); } return ""; } //Get file name public String getFileExtension(String filename) { int dotIndex = filename.lastIndexOf("."); if (dotIndex > 0 & amp; & amp; dotIndex < filename.length() - 1) { return filename.substring(dotIndex + 1).toLowerCase(); } return ""; } \t //Get file content public String readGsFile(FileInputStream inputStream,String fileExtension,File txtFile) { try { if (fileExtension.equalsIgnoreCase("doc") || fileExtension.equalsIgnoreCase("docx")) { // Process Word document XWPFDocument document = new XWPFDocument(inputStream); //Read the text content of each paragraph StringBuilder content = new StringBuilder(); for (XWPFParagraph paragraph : document.getParagraphs()) { for (XWPFRun run : paragraph.getRuns()) { content.append(run.text()); } } //Close document document.close(); return content.toString(); }else if(fileExtension.equalsIgnoreCase("txt")){ StringBuffer buffer = new StringBuffer(); //Create a Scanner object to read the file content Scanner scanner = new Scanner(txtFile); // Read the file content line by line and output while (scanner.hasNextLine()) { String line = scanner.nextLine(); System.out.println(line); buffer.append(line).append(","); } //Close Scanner object scanner.close(); return buffer.toString(); } else if (fileExtension.equalsIgnoreCase("xls") || fileExtension.equalsIgnoreCase("xlsx")) { // Process Excel document XSSFWorkbook workbook = new XSSFWorkbook(inputStream); return ""; } else if (fileExtension.equalsIgnoreCase("pdf")) { // Process PDF documents PDDocument document = PDDocument.load(inputStream); //Create PDFTextStripper object PDFTextStripper textStripper = new PDFTextStripper(); //Read document content String content = textStripper.getText(document); document.close(); return content; }else if (fileExtension.equalsIgnoreCase("jpg") || fileExtension.equalsIgnoreCase("png")) { // Process images try { // Call the OCR tool class to identify the file content String result = BaiduOCRUtil.recognizeFile(txtFile.getAbsolutePath()); return result; } catch (Exception e) { e.printStackTrace(); return ""; } } else { // Other file formats inputStream.close(); return ""; } } catch (IOException e) { e.printStackTrace(); return ""; } } }
3. BaiduOCRUtil tool class
public class BaiduOCRUtil { private static final String APP_ID = ""; private static final String API_KEY = ""; private static final String SECRET_KEY = ""; public static String recognizeFile(String filePath) { AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY); //Set optional parameters HashMap<String, String> options = new HashMap<>(); options.put("language_type", "CHN_ENG"); options.put("detect_direction", "true"); options.put("detect_language", "true"); options.put("probability", "true"); // Call Baidu Cloud OCR service to identify file content JSONObject response = client.basicGeneral(filePath, options); // Parse the recognition results StringBuilder result = new StringBuilder(); JSONArray wordsArray = response.getJSONArray("words_result"); for (int i = 0; i < wordsArray.length(); i + + ) { JSONObject wordsObject = wordsArray.getJSONObject(i); String words = wordsObject.getString("words"); result.append(words).append("\\ "); } return result.toString(); } }
A developer who is learning, please don’t criticize, welcome to communicate