From d088d46cdc3aa1b1c98417302c69d940579a556b Mon Sep 17 00:00:00 2001 From: dengchun Date: Mon, 4 Aug 2025 23:18:07 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E6=89=B9=E9=87=8F=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E7=9A=84?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../physical-system-biz/pom.xml | 2 +- .../system/controller/CommonController.java | 71 ++++++++++++------- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/physical-module-system/physical-system-biz/pom.xml b/physical-module-system/physical-system-biz/pom.xml index 1798e3c..6d3f522 100644 --- a/physical-module-system/physical-system-biz/pom.xml +++ b/physical-module-system/physical-system-biz/pom.xml @@ -80,7 +80,7 @@ org.apache.pdfbox pdfbox - 2.0.24 + 3.0.5 net.sourceforge.tess4j diff --git a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java index 3e07a5c..ce97dd5 100644 --- a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java +++ b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java @@ -7,6 +7,7 @@ import net.sourceforge.tess4j.Tesseract; import org.apache.commons.lang3.StringUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.text.PDFTextStripper; import org.jeecg.common.api.vo.Result; import org.jeecg.common.constant.CommonConstant; import org.jeecg.common.constant.SymbolConstant; @@ -212,7 +213,7 @@ public class CommonController { */ @PostMapping(value = "/uploadAll") public Result uploadAll(HttpServletRequest request) throws Exception { - Result result = new Result<>(); + Result result = new Result<>(); String savePath = "123"; String bizPath = ""; @@ -220,7 +221,7 @@ public class CommonController { // 获取上传文件对象 MultipartFile file = multipartRequest.getFile("file"); - extractExperimentFile(file); + Map resultMap = extractExperimentFile(file); // 获取是否包含必填的文件类型 //savePath = CommonUtils.upload(file, bizPath, uploadType); @@ -230,7 +231,8 @@ public class CommonController { ossFile.setUrl("测试路径.txt"); // ossFile.setUrl(savePath); // ossFileService.save(ossFile); - result.setResult(ossFile); + resultMap.put("file", ossFile); + result.setResult(resultMap); @@ -245,36 +247,53 @@ public class CommonController { return result; } - private List> extractExperimentFile(MultipartFile file) throws Exception { - List> allFileResult = new ArrayList<>(); - PDDocument document = PDDocument.load(file.getInputStream()); - PDFRenderer renderer = new PDFRenderer(document); - Tesseract tesseract = new Tesseract(); - - // 设置Tesseract数据路径(tessdata文件夹路径) - tesseract.setDatapath("/usr/share/tesseract-ocr/5/tessdata"); - tesseract.setLanguage("chi_sim"); - - StringBuilder fullText = new StringBuilder(); - - Map map = new HashMap(); - for (int i = 0; i < document.getNumberOfPages(); i++) { - BufferedImage image = renderer.renderImageWithDPI(i, 300); // 高分辨率 - File tempImage = new File("page_" + i + ".png"); - ImageIO.write(image, "png", tempImage); - - String result = tesseract.doOCR(tempImage); - fullText.append(result).append("\n"); + private Map extractExperimentFile(MultipartFile file) throws Exception { + Map allFileResult = new HashMap<>(); + String fullText = ""; + try (PDDocument document = PDDocument.load(file.getInputStream())) { + PDFTextStripper stripper = new PDFTextStripper(); + fullText = stripper.getText(document); + } catch (IOException e) { + e.printStackTrace(); } if (fullText.length() > 0) { if (fullText.toString().contains("辐照试验委托书")){ - map.put("fzsywts", "1"); - allFileResult.add(map); + allFileResult.put("fzsywts", "1"); + } + + if (fullText.toString().contains("试验大纲")){ + Map map = new HashMap<>(); + allFileResult.put("sydg", "1"); + } + + if (fullText.toString().contains("沟通记录表")){ + Map map = new HashMap<>(); + allFileResult.put("gtjlb", "1"); + } + + if (fullText.toString().contains("委托书等评审表")){ + Map map = new HashMap<>(); + allFileResult.put("htwtspsb", "1"); + } + + if (fullText.toString().contains("试验大纲评审表")){ + Map map = new HashMap<>(); + allFileResult.put("sydgpsb", "1"); + } + + if (fullText.toString().contains("试验流程检查单")){ + Map map = new HashMap<>(); + allFileResult.put("sylcjyd", "1"); + } + + if (fullText.toString().contains("试验流程检查单")){ + Map map = new HashMap<>(); + allFileResult.put("sylcjyd", "1"); } } - document.close(); + System.out.println("解析结果: " + allFileResult); System.out.println("所有文本: " + fullText); From 4a020077d2dad3c1e0793d51978fe2fe09d90859 Mon Sep 17 00:00:00 2001 From: dengchun Date: Mon, 4 Aug 2025 23:39:24 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E8=A7=A3=E5=86=B3maven=20build=E6=8A=A5?= =?UTF-8?q?=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../jeecg/modules/system/controller/CommonController.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java index ce97dd5..967954a 100644 --- a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java +++ b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java @@ -3,10 +3,9 @@ package org.jeecg.modules.system.controller; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; import lombok.extern.slf4j.Slf4j; -import net.sourceforge.tess4j.Tesseract; import org.apache.commons.lang3.StringUtils; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.text.PDFTextStripper; import org.jeecg.common.api.vo.Result; import org.jeecg.common.constant.CommonConstant; @@ -250,7 +249,7 @@ public class CommonController { private Map extractExperimentFile(MultipartFile file) throws Exception { Map allFileResult = new HashMap<>(); String fullText = ""; - try (PDDocument document = PDDocument.load(file.getInputStream())) { + try (PDDocument document = Loader.loadPDF(file.getInputStream().readAllBytes())) { PDFTextStripper stripper = new PDFTextStripper(); fullText = stripper.getText(document); } catch (IOException e) {