diff --git a/physical-launcher/src/main/resources/application-dev.yml b/physical-launcher/src/main/resources/application-dev.yml
index 22e4d3c..516c3cb 100644
--- a/physical-launcher/src/main/resources/application-dev.yml
+++ b/physical-launcher/src/main/resources/application-dev.yml
@@ -242,7 +242,7 @@ jeecg:
file-view-domain: http://fileview.jeecg.com
# minio文件上传
minio:
- minio_url: http://127.0.0.1:9000
+ minio_url: http://192.168.50.100:29000
minio_public_url: http://58.215.212.230:8005/oss/
minio_name: root
minio_pass: 12345678
diff --git a/physical-module-system/physical-system-biz/pom.xml b/physical-module-system/physical-system-biz/pom.xml
index a7619d3..c5b91ca 100644
--- a/physical-module-system/physical-system-biz/pom.xml
+++ b/physical-module-system/physical-system-biz/pom.xml
@@ -45,11 +45,25 @@
drag-free-springboot3
1.1.2
-
+
+ cn.hutool
+ hutool-core
+
+
+ cn.hutool
+ hutool-http
+
+
org.jeecgframework.boot
jeecg-boot-starter3-chatgpt
3.7.0
+
+
+ cn.hutool
+ hutool-all
+
+
diff --git a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/entity/CrawlerRecord.java b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/entity/CrawlerRecord.java
old mode 100755
new mode 100644
index 9c53d92..27fff92
--- a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/entity/CrawlerRecord.java
+++ b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/entity/CrawlerRecord.java
@@ -1,77 +1,145 @@
package org.jeecg.modules.database.entity;
-import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
-import java.util.Date;
-import java.math.BigDecimal;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
-import com.baomidou.mybatisplus.annotation.TableLogic;
-import lombok.Data;
import com.fasterxml.jackson.annotation.JsonFormat;
-import org.springframework.format.annotation.DateTimeFormat;
-import org.jeecgframework.poi.excel.annotation.Excel;
-import org.jeecg.common.aspect.annotation.Dict;
import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.Accessors;
+import org.jeecgframework.poi.excel.annotation.Excel;
+import org.springframework.format.annotation.DateTimeFormat;
+
+import java.io.Serializable;
+import java.util.Date;
/**
* @Description: 爬虫记录
* @Author: jeecg-boot
- * @Date: 2024-09-03
+ * @Date: 2024-10-14
* @Version: V1.0
*/
@Data
@TableName("crawler_record")
@Accessors(chain = true)
@EqualsAndHashCode(callSuper = false)
-@Schema(description="爬虫记录")
+@Schema(description = "爬虫记录")
public class CrawlerRecord implements Serializable {
private static final long serialVersionUID = 1L;
- /**主键*/
- @TableId(type = IdType.ASSIGN_ID)
+ /**
+ * 主键
+ */
+ @TableId(type = IdType.ASSIGN_ID)
@Schema(description = "主键")
private String id;
- /**创建人*/
+ /**
+ * 创建人
+ */
@Schema(description = "创建人")
private String createBy;
- /**创建日期*/
- @JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
- @DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
+ /**
+ * 创建日期
+ */
+ @JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm:ss")
+ @DateTimeFormat(pattern = "yyyy-MM-dd HH:mm:ss")
@Schema(description = "创建日期")
private Date createTime;
- /**更新人*/
+ /**
+ * 更新人
+ */
@Schema(description = "更新人")
private String updateBy;
- /**更新日期*/
- @JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
- @DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
+ /**
+ * 更新日期
+ */
+ @JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm:ss")
+ @DateTimeFormat(pattern = "yyyy-MM-dd HH:mm:ss")
@Schema(description = "更新日期")
private Date updateTime;
- /**所属部门*/
+ /**
+ * 所属部门
+ */
@Schema(description = "所属部门")
private String sysOrgCode;
- /**来源*/
- @Excel(name = "来源", width = 15)
- @Schema(description = "来源")
- private String source;
- /**编号*/
- @Excel(name = "编号", width = 15)
+ /**
+ * 类型
+ */
+ @Excel(name = "类型", width = 15)
+ @Schema(description = "类型")
+ private String type;
+ /**
+ * 编号
+ */
+ @Excel(name = "编号", width = 15)
@Schema(description = "编号")
private String code;
- /**名称*/
- @Excel(name = "名称", width = 15)
- @Schema(description = "名称")
- private String name;
- /**文件名*/
- @Excel(name = "文件名", width = 15)
- @Schema(description = "文件名")
- private String fileName;
- /**文件ID*/
- @Excel(name = "文件ID", width = 15)
- @Schema(description = "文件ID")
- private String fileId;
+ /**
+ * 制造商
+ */
+ @Excel(name = "制造商", width = 15)
+ @Schema(description = "制造商")
+ private String manufacturer;
+ /**
+ * 文件
+ */
+ @Excel(name = "文件", width = 15)
+ @Schema(description = "文件")
+ private String fileUrl;
+ /**
+ * 测试类型
+ */
+ @Excel(name = "测试类型", width = 15)
+ @Schema(description = "测试类型")
+ private String functionType;
+ /**
+ * 测试方式
+ */
+ @Excel(name = "测试方式", width = 15)
+ @Schema(description = "测试方式")
+ private String testMethod;
+ /**
+ * 分组
+ */
+ @Excel(name = "分组", width = 15)
+ @Schema(description = "分组")
+ private String category;
+ /**
+ * 二级分组
+ */
+ @Excel(name = "二级分组", width = 15)
+ @Schema(description = "二级分组")
+ private String subCategory;
+ /**
+ * 文档日期
+ */
+ @Excel(name = "文档日期", width = 15)
+ @Schema(description = "文档日期")
+ private String reportDate;
+ /**
+ * 测试技术
+ */
+ @Excel(name = "测试技术", width = 15)
+ @Schema(description = "测试技术")
+ private String technology;
+ /**
+ * 报告来源
+ */
+ @Excel(name = "报告来源", width = 15)
+ @Schema(description = "报告来源")
+ private String reportSource;
+
+ /**
+ * 辐射测试类型
+ */
+ @Excel(name = "辐射测试类型", width = 15)
+ @Schema(description = "辐射测试类型")
+ private String radiationTestType;
+ /**
+ * 报告ID
+ */
+ @Excel(name = "报告ID", width = 15)
+ @Schema(description = "报告ID")
+ private String reportId;
}
diff --git a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/service/impl/CrawlerRecordServiceImpl.java b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/service/impl/CrawlerRecordServiceImpl.java
index f6ae6ee..2bc4f9e 100644
--- a/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/service/impl/CrawlerRecordServiceImpl.java
+++ b/physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/service/impl/CrawlerRecordServiceImpl.java
@@ -1,10 +1,21 @@
package org.jeecg.modules.database.service.impl;
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.io.FileUtil;
+import cn.hutool.core.io.IoUtil;
import cn.hutool.core.thread.ThreadUtil;
+import cn.hutool.core.util.ObjUtil;
+import cn.hutool.core.util.ReUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.hutool.core.util.URLUtil;
+import cn.hutool.http.Header;
+import cn.hutool.http.HttpRequest;
+import cn.hutool.http.HttpResponse;
import cn.hutool.http.HttpUtil;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
+import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
@@ -24,9 +35,16 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.ByteArrayInputStream;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.util.*;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import static org.jeecg.modules.online.auth.b.a.f;
+import static org.jeecg.modules.online.auth.b.a.i;
@Slf4j
@Service
@@ -40,6 +58,22 @@ public class CrawlerRecordServiceImpl extends ServiceImpl dispositions = execute.headerList(Header.CONTENT_DISPOSITION.getValue());
+ String fileName = null;
+ if (CollUtil.isNotEmpty(dispositions)) {
+ for (String disposition : dispositions) {
+ fileName = ReUtil.getGroup1("filename" + "=([^;]+)", disposition);
+ }
+ System.out.println(URLUtil.decode("N2920A%20TID_1009_01.pdf", Charset.defaultCharset()));
+ System.out.println(fileName);
+ // filename* 采用了 RFC 5987 中规定的编码方式,优先读取
+
+ }
+
+ }
+
/**
* https://esarad.esa.int/
*/
@@ -51,7 +85,8 @@ public class CrawlerRecordServiceImpl extends ServiceImpl> tableData = new ArrayList<>();
+
+ List tableData = new ArrayList<>();
Document doc = Jsoup.connect("https://esarad.esa.int").get();
Element table = doc.getElementById("dtReports");
@@ -78,33 +113,71 @@ public class CrawlerRecordServiceImpl extends ServiceImpl 10) {
+ break;
+ }
+ Element row = rows.get(j);
+
// Select all cells in the row
+ CrawlerRecord crawlerRecord = new CrawlerRecord();
+
Elements cells = row.select("td");
if (cells.size() == headerNames.size()) { // Ensure the number of cells matches the number of headers
- Map rowMap = new HashMap<>();
for (int i = 0; i < cells.size(); i++) {
+
String header = headerNames.get(i);
String value = cells.get(i).text();
- rowMap.put(header, value);
+ switch (header) {
+ case "Radiation Test Method":
+ crawlerRecord.setTestMethod(value);
+ break;
+ case "EPPL Familiy":
+ crawlerRecord.setCategory(value);
+ break;
+ case "EPPL Group":
+ crawlerRecord.setSubCategory(value);
+ break;
+ case "DUT Manufacturer":
+ crawlerRecord.setManufacturer(value);
+ break;
+ case "Function":
+ crawlerRecord.setFunctionType(value);
+ break;
+ case "Report Date":
+ crawlerRecord.setReportDate(value);
+ break;
+ case "Report Source":
+ crawlerRecord.setReportSource(value);
+ break;
+ case "Technology":
+ crawlerRecord.setTechnology(value);
+ break;
+ case "Id":
+ crawlerRecord.setReportId(value);
+ break;
+ case "DUT part type":
+ crawlerRecord.setCode(value);
+ break;
+ case "Radiation Test Type":
+ crawlerRecord.setRadiationTestType(value);
+ break;
+ }
+
}
// Add the map to the list
- tableData.add(rowMap);
+ tableData.add(crawlerRecord);
}
}
// Print the list of maps
- for (Map rowMap : tableData) {
- rowMap.put("fileId", rowMap.get("Id"));
- rowMap.put("fileUrl", "https://esarad.esa.int/?id=" + rowMap.get("Id") + "&handler=DownloadDb");
- rowMap.put("fileName", rowMap.get("Id") + ".pdf");
-
- System.out.println(rowMap);
+ for (CrawlerRecord rowMap : tableData) {
+ rowMap.setFileUrl("https://esarad.esa.int/?id=" + rowMap.getReportId() + "&handler=DownloadDb");
}
- saveFiles(tableData, esaradKey);
+ saveEsaradFiles(tableData, esaradKey);
} catch (Exception e) {
redisUtil.del(esaradKey);
@@ -139,20 +212,29 @@ public class CrawlerRecordServiceImpl extends ServiceImpl> tableData = new ArrayList<>();
+ List tableData = new ArrayList<>();
- list.forEach(row -> {
- String fileId = String.valueOf(((JSONArray) row).get(0));
- String fileNames = String.valueOf(((JSONArray) row).get(4));
- Map map = new HashMap<>();
- map.put("fileName", fileNames);
+ for (int i = 0; i < list.size(); i++) {
+
+ if (i > 10) {
+ break;
+ }
+ JSONArray row = (JSONArray) list.get(i);
+ String fileNames = String.valueOf(row.get(4));
+ CrawlerRecord map = new CrawlerRecord();
String fileUrls = fixFileNames(fileNames);
- map.put("fileId", fileId);
- map.put("fileUrl", fileUrls);
- System.out.println(row);
+ map.setFileUrl(fileUrls);
+
+ map.setCode(String.valueOf(row.get(0)));
+ map.setFunctionType(String.valueOf(row.get(1)));
+ map.setManufacturer(String.valueOf(row.get(2)));
+ map.setReportDate(String.valueOf(row.get(3)));
+ map.setTestMethod(String.valueOf(row.get(5)));
+ map.setCategory(String.valueOf(row.get(6)));
+ map.setReportId(map.getCode().replaceAll(" ", ""));
tableData.add(map);
- });
- saveFiles(tableData, radhomeKey);
+ }
+ saveRadhomeFiles(tableData, radhomeKey);
}
@Override
@@ -180,29 +262,65 @@ public class CrawlerRecordServiceImpl extends ServiceImpl> fileList, String type) {
+ private void saveRadhomeFiles(List fileList, String type) {
ThreadUtil.execute(() -> {
try {
- for (Map map : fileList) {
- String fileId = map.get("fileId");
- String resultStr = "";
- String fileUrl = map.get("fileUrl");
+ for (CrawlerRecord record : fileList) {
+ String fileUploadResult = "";
+ String fileUrl = record.getFileUrl();
if (fileUrl.contains(";")) {
String[] split = fileUrl.split(";");
List result = new ArrayList<>();
for (String s : split) {
byte[] fileBytes = HttpUtil.downloadBytes(s);
InputStream inputStream = new ByteArrayInputStream(fileBytes);
- result.add(MinioUtil.upload(inputStream, s.substring(s.lastIndexOf("/") + 1)));
- resultStr = StringUtils.join(result, ";");
+ result.add(MinioUtil.upload(inputStream, "radhome/" + s.substring(s.lastIndexOf("/") + 1)));
+ fileUploadResult = StringUtils.join(result, ";");
}
} else {
byte[] fileBytes = HttpUtil.downloadBytes(fileUrl);
InputStream inputStream = new ByteArrayInputStream(fileBytes);
- resultStr = MinioUtil.upload(inputStream, fileUrl.substring(fileUrl.lastIndexOf("/") + 1));
+ fileUploadResult = MinioUtil.upload(inputStream, "radhome/" + fileUrl.substring(fileUrl.lastIndexOf("/") + 1));
}
+ System.out.println(fileUploadResult);
+ if (StringUtils.isNotBlank(fileUploadResult)) {
+ record.setFileUrl(fileUploadResult);
+ save(record);
+ }
+ }
+ redisUtil.del(type);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ });
+ }
+
+ private void saveEsaradFiles(List fileList, String type) {
+ ThreadUtil.execute(() -> {
+ try {
+ for (CrawlerRecord record : fileList) {
+ CrawlerRecord dbData = getOne(Wrappers.lambdaQuery().eq(CrawlerRecord::getReportSource, record.getReportId()));
+ if (Objects.nonNull(dbData)) {
+ continue;
+ }
+ String resultStr = "";
+
+ String dest = FileUtil.getTmpDirPath() + "esarad-" + record.getReportId() + "/";
+ FileUtil.mkdir(dest);
+ long fileSize = HttpUtil.downloadFile(record.getFileUrl(), dest);
+
+ if (fileSize > 0) {
+ List files = FileUtil.loopFiles(dest);
+ for (File file : files) {
+ resultStr = MinioUtil.upload(IoUtil.toStream(file), "esarad/" + record.getReportId() + "-" + URLUtil.decode(file.getName(), Charset.defaultCharset()) );
+ }
+ }
+
System.out.println(resultStr);
+ if (StringUtils.isNotBlank(resultStr)) {
+ record.setFileUrl(resultStr);
+ save(record);
+ }
}
redisUtil.del(type);
} catch (Exception e) {
diff --git a/pom.xml b/pom.xml
index fd71c01..f50cf53 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
11.2.0.3
4.0
8.0.27
- 5.8.25
+ 5.8.32
9.0.0
8.1.1.49
@@ -343,6 +343,11 @@
hutool-crypto
${hutool.version}
+
+ cn.hutool
+ hutool-http
+ ${hutool.version}
+
io.minio