Merge remote-tracking branch 'origin/master'

This commit is contained in:
ls
2025-08-04 23:47:10 +08:00
2 changed files with 47 additions and 29 deletions

View File

@@ -80,7 +80,7 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.24</version>
<version>3.0.5</version>
</dependency>
<dependency>
<groupId>net.sourceforge.tess4j</groupId>

View File

@@ -3,10 +3,10 @@ package org.jeecg.modules.system.controller;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.tess4j.Tesseract;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.jeecg.common.api.vo.Result;
import org.jeecg.common.constant.CommonConstant;
import org.jeecg.common.constant.SymbolConstant;
@@ -212,7 +212,7 @@ public class CommonController {
*/
@PostMapping(value = "/uploadAll")
public Result<?> uploadAll(HttpServletRequest request) throws Exception {
Result<OssFile> result = new Result<>();
Result<Map> result = new Result<>();
String savePath = "123";
String bizPath = "";
@@ -220,7 +220,7 @@ public class CommonController {
// 获取上传文件对象
MultipartFile file = multipartRequest.getFile("file");
extractExperimentFile(file);
Map resultMap = extractExperimentFile(file);
// 获取是否包含必填的文件类型
//savePath = CommonUtils.upload(file, bizPath, uploadType);
@@ -230,7 +230,8 @@ public class CommonController {
ossFile.setUrl("测试路径.txt");
// ossFile.setUrl(savePath);
// ossFileService.save(ossFile);
result.setResult(ossFile);
resultMap.put("file", ossFile);
result.setResult(resultMap);
@@ -245,36 +246,53 @@ public class CommonController {
return result;
}
private List<Map<String, String>> extractExperimentFile(MultipartFile file) throws Exception {
List<Map<String, String>> allFileResult = new ArrayList<>();
PDDocument document = PDDocument.load(file.getInputStream());
PDFRenderer renderer = new PDFRenderer(document);
Tesseract tesseract = new Tesseract();
// 设置Tesseract数据路径tessdata文件夹路径
tesseract.setDatapath("/usr/share/tesseract-ocr/5/tessdata");
tesseract.setLanguage("chi_sim");
StringBuilder fullText = new StringBuilder();
Map map = new HashMap<String, String>();
for (int i = 0; i < document.getNumberOfPages(); i++) {
BufferedImage image = renderer.renderImageWithDPI(i, 300); // 高分辨率
File tempImage = new File("page_" + i + ".png");
ImageIO.write(image, "png", tempImage);
String result = tesseract.doOCR(tempImage);
fullText.append(result).append("\n");
private Map<String, String> extractExperimentFile(MultipartFile file) throws Exception {
Map<String, String> allFileResult = new HashMap<>();
String fullText = "";
try (PDDocument document = Loader.loadPDF(file.getInputStream().readAllBytes())) {
PDFTextStripper stripper = new PDFTextStripper();
fullText = stripper.getText(document);
} catch (IOException e) {
e.printStackTrace();
}
if (fullText.length() > 0) {
if (fullText.toString().contains("辐照试验委托书")){
map.put("fzsywts", "1");
allFileResult.add(map);
allFileResult.put("fzsywts", "1");
}
if (fullText.toString().contains("试验大纲")){
Map<String, String> map = new HashMap<>();
allFileResult.put("sydg", "1");
}
if (fullText.toString().contains("沟通记录表")){
Map<String, String> map = new HashMap<>();
allFileResult.put("gtjlb", "1");
}
if (fullText.toString().contains("委托书等评审表")){
Map<String, String> map = new HashMap<>();
allFileResult.put("htwtspsb", "1");
}
if (fullText.toString().contains("试验大纲评审表")){
Map<String, String> map = new HashMap<>();
allFileResult.put("sydgpsb", "1");
}
if (fullText.toString().contains("试验流程检查单")){
Map<String, String> map = new HashMap<>();
allFileResult.put("sylcjyd", "1");
}
if (fullText.toString().contains("试验流程检查单")){
Map<String, String> map = new HashMap<>();
allFileResult.put("sylcjyd", "1");
}
}
document.close();
System.out.println("解析结果: " + allFileResult);
System.out.println("所有文本: " + fullText);