增加一个批量上传文件的接口
This commit is contained in:
@@ -80,7 +80,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.pdfbox</groupId>
|
<groupId>org.apache.pdfbox</groupId>
|
||||||
<artifactId>pdfbox</artifactId>
|
<artifactId>pdfbox</artifactId>
|
||||||
<version>2.0.24</version>
|
<version>3.0.5</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>net.sourceforge.tess4j</groupId>
|
<groupId>net.sourceforge.tess4j</groupId>
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import net.sourceforge.tess4j.Tesseract;
|
|||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||||
|
import org.apache.pdfbox.text.PDFTextStripper;
|
||||||
import org.jeecg.common.api.vo.Result;
|
import org.jeecg.common.api.vo.Result;
|
||||||
import org.jeecg.common.constant.CommonConstant;
|
import org.jeecg.common.constant.CommonConstant;
|
||||||
import org.jeecg.common.constant.SymbolConstant;
|
import org.jeecg.common.constant.SymbolConstant;
|
||||||
@@ -212,7 +213,7 @@ public class CommonController {
|
|||||||
*/
|
*/
|
||||||
@PostMapping(value = "/uploadAll")
|
@PostMapping(value = "/uploadAll")
|
||||||
public Result<?> uploadAll(HttpServletRequest request) throws Exception {
|
public Result<?> uploadAll(HttpServletRequest request) throws Exception {
|
||||||
Result<OssFile> result = new Result<>();
|
Result<Map> result = new Result<>();
|
||||||
String savePath = "123";
|
String savePath = "123";
|
||||||
String bizPath = "";
|
String bizPath = "";
|
||||||
|
|
||||||
@@ -220,7 +221,7 @@ public class CommonController {
|
|||||||
// 获取上传文件对象
|
// 获取上传文件对象
|
||||||
MultipartFile file = multipartRequest.getFile("file");
|
MultipartFile file = multipartRequest.getFile("file");
|
||||||
|
|
||||||
extractExperimentFile(file);
|
Map resultMap = extractExperimentFile(file);
|
||||||
|
|
||||||
// 获取是否包含必填的文件类型
|
// 获取是否包含必填的文件类型
|
||||||
//savePath = CommonUtils.upload(file, bizPath, uploadType);
|
//savePath = CommonUtils.upload(file, bizPath, uploadType);
|
||||||
@@ -230,7 +231,8 @@ public class CommonController {
|
|||||||
ossFile.setUrl("测试路径.txt");
|
ossFile.setUrl("测试路径.txt");
|
||||||
// ossFile.setUrl(savePath);
|
// ossFile.setUrl(savePath);
|
||||||
// ossFileService.save(ossFile);
|
// ossFileService.save(ossFile);
|
||||||
result.setResult(ossFile);
|
resultMap.put("file", ossFile);
|
||||||
|
result.setResult(resultMap);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -245,36 +247,53 @@ public class CommonController {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Map<String, String>> extractExperimentFile(MultipartFile file) throws Exception {
|
private Map<String, String> extractExperimentFile(MultipartFile file) throws Exception {
|
||||||
List<Map<String, String>> allFileResult = new ArrayList<>();
|
Map<String, String> allFileResult = new HashMap<>();
|
||||||
PDDocument document = PDDocument.load(file.getInputStream());
|
String fullText = "";
|
||||||
PDFRenderer renderer = new PDFRenderer(document);
|
try (PDDocument document = PDDocument.load(file.getInputStream())) {
|
||||||
Tesseract tesseract = new Tesseract();
|
PDFTextStripper stripper = new PDFTextStripper();
|
||||||
|
fullText = stripper.getText(document);
|
||||||
// 设置Tesseract数据路径(tessdata文件夹路径)
|
} catch (IOException e) {
|
||||||
tesseract.setDatapath("/usr/share/tesseract-ocr/5/tessdata");
|
e.printStackTrace();
|
||||||
tesseract.setLanguage("chi_sim");
|
|
||||||
|
|
||||||
StringBuilder fullText = new StringBuilder();
|
|
||||||
|
|
||||||
Map map = new HashMap<String, String>();
|
|
||||||
for (int i = 0; i < document.getNumberOfPages(); i++) {
|
|
||||||
BufferedImage image = renderer.renderImageWithDPI(i, 300); // 高分辨率
|
|
||||||
File tempImage = new File("page_" + i + ".png");
|
|
||||||
ImageIO.write(image, "png", tempImage);
|
|
||||||
|
|
||||||
String result = tesseract.doOCR(tempImage);
|
|
||||||
fullText.append(result).append("\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fullText.length() > 0) {
|
if (fullText.length() > 0) {
|
||||||
if (fullText.toString().contains("辐照试验委托书")){
|
if (fullText.toString().contains("辐照试验委托书")){
|
||||||
map.put("fzsywts", "1");
|
allFileResult.put("fzsywts", "1");
|
||||||
allFileResult.add(map);
|
}
|
||||||
|
|
||||||
|
if (fullText.toString().contains("试验大纲")){
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
allFileResult.put("sydg", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fullText.toString().contains("沟通记录表")){
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
allFileResult.put("gtjlb", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fullText.toString().contains("委托书等评审表")){
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
allFileResult.put("htwtspsb", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fullText.toString().contains("试验大纲评审表")){
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
allFileResult.put("sydgpsb", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fullText.toString().contains("试验流程检查单")){
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
allFileResult.put("sylcjyd", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fullText.toString().contains("试验流程检查单")){
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
allFileResult.put("sylcjyd", "1");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
document.close();
|
|
||||||
System.out.println("解析结果: " + allFileResult);
|
System.out.println("解析结果: " + allFileResult);
|
||||||
System.out.println("所有文本: " + fullText);
|
System.out.println("所有文本: " + fullText);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user