新功能:

1. 添加识别扫描pdf的接口
This commit is contained in:
dengchun
2025-07-28 14:06:24 +08:00
parent d262f5d77f
commit 1a9662e02d
2 changed files with 100 additions and 5 deletions

View File

@@ -77,11 +77,16 @@
<artifactId>jsoup</artifactId>
<version>1.18.1</version>
</dependency>
<!-- 积木报表 mongo redis 支持包
<dependency>
<groupId>org.jeecgframework.jimureport</groupId>
<artifactId>jimureport-nosql-starter</artifactId>
</dependency>-->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.24</version>
</dependency>
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>5.4.0</version>
</dependency>
</dependencies>
</project>

View File

@@ -8,7 +8,10 @@ import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.tess4j.Tesseract;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.shiro.authz.annotation.RequiresPermissions;
import org.jeecg.common.api.vo.Result;
import org.jeecg.common.aspect.annotation.AutoLog;
@@ -19,6 +22,8 @@ import org.jeecg.common.util.oConvertUtils;
import org.jeecg.modules.database.constant.ExperimentFileType;
import org.jeecg.modules.database.entity.ExperimentFile;
import org.jeecg.modules.database.service.IExperimentFileService;
import org.jeecg.modules.oss.entity.OssFile;
import org.jeecg.modules.oss.service.IOssFileService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
@@ -26,6 +31,9 @@ import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;
import org.springframework.web.servlet.ModelAndView;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.*;
/**
@@ -46,6 +54,9 @@ public class ExperimentFileController extends JeecgController<ExperimentFile, IE
@Autowired
private IExperimentFileService experimentFileService;
@Autowired
private IOssFileService ossFileService;
/**
* 分页列表查询
*
@@ -234,4 +245,83 @@ public class ExperimentFileController extends JeecgController<ExperimentFile, IE
return result;
}
/**
* 试验文件上传识别入口
*
* @param request 文件上传对象
* @return 上面结果
*/
@PostMapping(value = "/uploadAll")
public Result<?> uploadAll(HttpServletRequest request) throws Exception {
Result<OssFile> result = new Result<>();
String savePath = "123";
String bizPath = "";
MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
// 获取上传文件对象
MultipartFile file = multipartRequest.getFile("file");
extractExperimentFile(file);
// 获取是否包含必填的文件类型
//savePath = CommonUtils.upload(file, bizPath, uploadType);
OssFile ossFile = new OssFile();
ossFile.setFileName(file.getOriginalFilename());
ossFile.setUrl("测试路径.txt");
// ossFile.setUrl(savePath);
// ossFileService.save(ossFile);
result.setResult(ossFile);
if (oConvertUtils.isNotEmpty(savePath)) {
result.setMessage(savePath);
result.setSuccess(true);
} else {
result.setMessage("上传失败!");
result.setSuccess(false);
}
return result;
}
private List<Map<String, String>> extractExperimentFile(MultipartFile file) throws Exception {
List<Map<String, String>> allFileResult = new ArrayList<>();
PDDocument document = PDDocument.load(file.getInputStream());
PDFRenderer renderer = new PDFRenderer(document);
Tesseract tesseract = new Tesseract();
// 设置Tesseract数据路径tessdata文件夹路径
tesseract.setDatapath("/usr/share/tesseract-ocr/5/tessdata");
tesseract.setLanguage("chi_sim");
StringBuilder fullText = new StringBuilder();
Map map = new HashMap<String, String>();
for (int i = 0; i < document.getNumberOfPages(); i++) {
BufferedImage image = renderer.renderImageWithDPI(i, 300); // 高分辨率
File tempImage = new File("page_" + i + ".png");
ImageIO.write(image, "png", tempImage);
String result = tesseract.doOCR(tempImage);
fullText.append(result).append("\n");
}
if (fullText.length() > 0) {
if (fullText.toString().contains("辐照试验委托书")){
map.put("fzsywts", "1");
allFileResult.add(map);
}
}
document.close();
System.out.println(allFileResult);
return allFileResult;
}
}