diff --git a/physical-module-system/physical-system-biz/pom.xml b/physical-module-system/physical-system-biz/pom.xml index 1798e3c..6d3f522 100644 --- a/physical-module-system/physical-system-biz/pom.xml +++ b/physical-module-system/physical-system-biz/pom.xml @@ -80,7 +80,7 @@ org.apache.pdfbox pdfbox - 2.0.24 + 3.0.5 net.sourceforge.tess4j diff --git a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java index 3e07a5c..ce97dd5 100644 --- a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java +++ b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/system/controller/CommonController.java @@ -7,6 +7,7 @@ import net.sourceforge.tess4j.Tesseract; import org.apache.commons.lang3.StringUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.text.PDFTextStripper; import org.jeecg.common.api.vo.Result; import org.jeecg.common.constant.CommonConstant; import org.jeecg.common.constant.SymbolConstant; @@ -212,7 +213,7 @@ public class CommonController { */ @PostMapping(value = "/uploadAll") public Result uploadAll(HttpServletRequest request) throws Exception { - Result result = new Result<>(); + Result result = new Result<>(); String savePath = "123"; String bizPath = ""; @@ -220,7 +221,7 @@ public class CommonController { // 获取上传文件对象 MultipartFile file = multipartRequest.getFile("file"); - extractExperimentFile(file); + Map resultMap = extractExperimentFile(file); // 获取是否包含必填的文件类型 //savePath = CommonUtils.upload(file, bizPath, uploadType); @@ -230,7 +231,8 @@ public class CommonController { ossFile.setUrl("测试路径.txt"); // ossFile.setUrl(savePath); // ossFileService.save(ossFile); - result.setResult(ossFile); + resultMap.put("file", ossFile); + result.setResult(resultMap); @@ -245,36 +247,53 @@ public class CommonController { return result; } - private List> extractExperimentFile(MultipartFile file) throws Exception { - List> allFileResult = new ArrayList<>(); - PDDocument document = PDDocument.load(file.getInputStream()); - PDFRenderer renderer = new PDFRenderer(document); - Tesseract tesseract = new Tesseract(); - - // 设置Tesseract数据路径(tessdata文件夹路径) - tesseract.setDatapath("/usr/share/tesseract-ocr/5/tessdata"); - tesseract.setLanguage("chi_sim"); - - StringBuilder fullText = new StringBuilder(); - - Map map = new HashMap(); - for (int i = 0; i < document.getNumberOfPages(); i++) { - BufferedImage image = renderer.renderImageWithDPI(i, 300); // 高分辨率 - File tempImage = new File("page_" + i + ".png"); - ImageIO.write(image, "png", tempImage); - - String result = tesseract.doOCR(tempImage); - fullText.append(result).append("\n"); + private Map extractExperimentFile(MultipartFile file) throws Exception { + Map allFileResult = new HashMap<>(); + String fullText = ""; + try (PDDocument document = PDDocument.load(file.getInputStream())) { + PDFTextStripper stripper = new PDFTextStripper(); + fullText = stripper.getText(document); + } catch (IOException e) { + e.printStackTrace(); } if (fullText.length() > 0) { if (fullText.toString().contains("辐照试验委托书")){ - map.put("fzsywts", "1"); - allFileResult.add(map); + allFileResult.put("fzsywts", "1"); + } + + if (fullText.toString().contains("试验大纲")){ + Map map = new HashMap<>(); + allFileResult.put("sydg", "1"); + } + + if (fullText.toString().contains("沟通记录表")){ + Map map = new HashMap<>(); + allFileResult.put("gtjlb", "1"); + } + + if (fullText.toString().contains("委托书等评审表")){ + Map map = new HashMap<>(); + allFileResult.put("htwtspsb", "1"); + } + + if (fullText.toString().contains("试验大纲评审表")){ + Map map = new HashMap<>(); + allFileResult.put("sydgpsb", "1"); + } + + if (fullText.toString().contains("试验流程检查单")){ + Map map = new HashMap<>(); + allFileResult.put("sylcjyd", "1"); + } + + if (fullText.toString().contains("试验流程检查单")){ + Map map = new HashMap<>(); + allFileResult.put("sylcjyd", "1"); } } - document.close(); + System.out.println("解析结果: " + allFileResult); System.out.println("所有文本: " + fullText);