update
This commit is contained in:
@@ -45,11 +45,25 @@
|
||||
<artifactId>drag-free-springboot3</artifactId>
|
||||
<version>1.1.2</version>
|
||||
</dependency>
|
||||
<!-- chatgpt -->
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-http</artifactId>
|
||||
</dependency>
|
||||
<!-- chatgpt -->
|
||||
<dependency>
|
||||
<groupId>org.jeecgframework.boot</groupId>
|
||||
<artifactId>jeecg-boot-starter3-chatgpt</artifactId>
|
||||
<version>3.7.0</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-all</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- jsoup HTML parser library @ https://jsoup.org/ -->
|
||||
|
||||
148
physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/entity/CrawlerRecord.java
Executable file → Normal file
148
physical-module-system/physical-system-biz/src/main/java/org/jeecg/modules/database/entity/CrawlerRecord.java
Executable file → Normal file
@@ -1,77 +1,145 @@
|
||||
package org.jeecg.modules.database.entity;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Date;
|
||||
import java.math.BigDecimal;
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.baomidou.mybatisplus.annotation.TableLogic;
|
||||
import lombok.Data;
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
import org.jeecgframework.poi.excel.annotation.Excel;
|
||||
import org.jeecg.common.aspect.annotation.Dict;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.Accessors;
|
||||
import org.jeecgframework.poi.excel.annotation.Excel;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* @Description: 爬虫记录
|
||||
* @Author: jeecg-boot
|
||||
* @Date: 2024-09-03
|
||||
* @Date: 2024-10-14
|
||||
* @Version: V1.0
|
||||
*/
|
||||
@Data
|
||||
@TableName("crawler_record")
|
||||
@Accessors(chain = true)
|
||||
@EqualsAndHashCode(callSuper = false)
|
||||
@Schema(description="爬虫记录")
|
||||
@Schema(description = "爬虫记录")
|
||||
public class CrawlerRecord implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**主键*/
|
||||
@TableId(type = IdType.ASSIGN_ID)
|
||||
/**
|
||||
* 主键
|
||||
*/
|
||||
@TableId(type = IdType.ASSIGN_ID)
|
||||
@Schema(description = "主键")
|
||||
private String id;
|
||||
/**创建人*/
|
||||
/**
|
||||
* 创建人
|
||||
*/
|
||||
@Schema(description = "创建人")
|
||||
private String createBy;
|
||||
/**创建日期*/
|
||||
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
|
||||
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
|
||||
/**
|
||||
* 创建日期
|
||||
*/
|
||||
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm:ss")
|
||||
@DateTimeFormat(pattern = "yyyy-MM-dd HH:mm:ss")
|
||||
@Schema(description = "创建日期")
|
||||
private Date createTime;
|
||||
/**更新人*/
|
||||
/**
|
||||
* 更新人
|
||||
*/
|
||||
@Schema(description = "更新人")
|
||||
private String updateBy;
|
||||
/**更新日期*/
|
||||
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
|
||||
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
|
||||
/**
|
||||
* 更新日期
|
||||
*/
|
||||
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm:ss")
|
||||
@DateTimeFormat(pattern = "yyyy-MM-dd HH:mm:ss")
|
||||
@Schema(description = "更新日期")
|
||||
private Date updateTime;
|
||||
/**所属部门*/
|
||||
/**
|
||||
* 所属部门
|
||||
*/
|
||||
@Schema(description = "所属部门")
|
||||
private String sysOrgCode;
|
||||
/**来源*/
|
||||
@Excel(name = "来源", width = 15)
|
||||
@Schema(description = "来源")
|
||||
private String source;
|
||||
/**编号*/
|
||||
@Excel(name = "编号", width = 15)
|
||||
/**
|
||||
* 类型
|
||||
*/
|
||||
@Excel(name = "类型", width = 15)
|
||||
@Schema(description = "类型")
|
||||
private String type;
|
||||
/**
|
||||
* 编号
|
||||
*/
|
||||
@Excel(name = "编号", width = 15)
|
||||
@Schema(description = "编号")
|
||||
private String code;
|
||||
/**名称*/
|
||||
@Excel(name = "名称", width = 15)
|
||||
@Schema(description = "名称")
|
||||
private String name;
|
||||
/**文件名*/
|
||||
@Excel(name = "文件名", width = 15)
|
||||
@Schema(description = "文件名")
|
||||
private String fileName;
|
||||
/**文件ID*/
|
||||
@Excel(name = "文件ID", width = 15)
|
||||
@Schema(description = "文件ID")
|
||||
private String fileId;
|
||||
/**
|
||||
* 制造商
|
||||
*/
|
||||
@Excel(name = "制造商", width = 15)
|
||||
@Schema(description = "制造商")
|
||||
private String manufacturer;
|
||||
/**
|
||||
* 文件
|
||||
*/
|
||||
@Excel(name = "文件", width = 15)
|
||||
@Schema(description = "文件")
|
||||
private String fileUrl;
|
||||
/**
|
||||
* 测试类型
|
||||
*/
|
||||
@Excel(name = "测试类型", width = 15)
|
||||
@Schema(description = "测试类型")
|
||||
private String functionType;
|
||||
/**
|
||||
* 测试方式
|
||||
*/
|
||||
@Excel(name = "测试方式", width = 15)
|
||||
@Schema(description = "测试方式")
|
||||
private String testMethod;
|
||||
/**
|
||||
* 分组
|
||||
*/
|
||||
@Excel(name = "分组", width = 15)
|
||||
@Schema(description = "分组")
|
||||
private String category;
|
||||
/**
|
||||
* 二级分组
|
||||
*/
|
||||
@Excel(name = "二级分组", width = 15)
|
||||
@Schema(description = "二级分组")
|
||||
private String subCategory;
|
||||
/**
|
||||
* 文档日期
|
||||
*/
|
||||
@Excel(name = "文档日期", width = 15)
|
||||
@Schema(description = "文档日期")
|
||||
private String reportDate;
|
||||
/**
|
||||
* 测试技术
|
||||
*/
|
||||
@Excel(name = "测试技术", width = 15)
|
||||
@Schema(description = "测试技术")
|
||||
private String technology;
|
||||
/**
|
||||
* 报告来源
|
||||
*/
|
||||
@Excel(name = "报告来源", width = 15)
|
||||
@Schema(description = "报告来源")
|
||||
private String reportSource;
|
||||
|
||||
/**
|
||||
* 辐射测试类型
|
||||
*/
|
||||
@Excel(name = "辐射测试类型", width = 15)
|
||||
@Schema(description = "辐射测试类型")
|
||||
private String radiationTestType;
|
||||
/**
|
||||
* 报告ID
|
||||
*/
|
||||
@Excel(name = "报告ID", width = 15)
|
||||
@Schema(description = "报告ID")
|
||||
private String reportId;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,21 @@
|
||||
package org.jeecg.modules.database.service.impl;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.io.FileUtil;
|
||||
import cn.hutool.core.io.IoUtil;
|
||||
import cn.hutool.core.thread.ThreadUtil;
|
||||
import cn.hutool.core.util.ObjUtil;
|
||||
import cn.hutool.core.util.ReUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.core.util.URLUtil;
|
||||
import cn.hutool.http.Header;
|
||||
import cn.hutool.http.HttpRequest;
|
||||
import cn.hutool.http.HttpResponse;
|
||||
import cn.hutool.http.HttpUtil;
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.alibaba.fastjson2.JSONArray;
|
||||
import com.alibaba.fastjson2.JSONObject;
|
||||
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.*;
|
||||
@@ -24,9 +35,16 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.jeecg.modules.online.auth.b.a.f;
|
||||
import static org.jeecg.modules.online.auth.b.a.i;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@@ -40,6 +58,22 @@ public class CrawlerRecordServiceImpl extends ServiceImpl<CrawlerRecordMapper, C
|
||||
private String esaradKey = "crawler-esarad";
|
||||
private String radhomeKey = "crawler-radhome";
|
||||
|
||||
public static void main(String[] args) {
|
||||
HttpResponse execute = HttpRequest.get("https://esarad.esa.int/?id=76&handler=DownloadDb").execute();
|
||||
final List<String> dispositions = execute.headerList(Header.CONTENT_DISPOSITION.getValue());
|
||||
String fileName = null;
|
||||
if (CollUtil.isNotEmpty(dispositions)) {
|
||||
for (String disposition : dispositions) {
|
||||
fileName = ReUtil.getGroup1("filename" + "=([^;]+)", disposition);
|
||||
}
|
||||
System.out.println(URLUtil.decode("N2920A%20TID_1009_01.pdf", Charset.defaultCharset()));
|
||||
System.out.println(fileName);
|
||||
// filename* 采用了 RFC 5987 中规定的编码方式,优先读取
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* https://esarad.esa.int/
|
||||
*/
|
||||
@@ -51,7 +85,8 @@ public class CrawlerRecordServiceImpl extends ServiceImpl<CrawlerRecordMapper, C
|
||||
throw new RuntimeException("爬虫任务执行中");
|
||||
}
|
||||
redisUtil.set(esaradKey, "T", 24 * 60 * 60);
|
||||
List<Map<String, String>> tableData = new ArrayList<>();
|
||||
|
||||
List<CrawlerRecord> tableData = new ArrayList<>();
|
||||
|
||||
Document doc = Jsoup.connect("https://esarad.esa.int").get();
|
||||
Element table = doc.getElementById("dtReports");
|
||||
@@ -78,33 +113,71 @@ public class CrawlerRecordServiceImpl extends ServiceImpl<CrawlerRecordMapper, C
|
||||
// Select all rows in the tbody
|
||||
Elements rows = tbody.select("tr");
|
||||
|
||||
for (Element row : rows) {
|
||||
for (int j = 0; j < rows.size(); j++) {
|
||||
if (j > 10) {
|
||||
break;
|
||||
}
|
||||
Element row = rows.get(j);
|
||||
|
||||
// Select all cells in the row
|
||||
CrawlerRecord crawlerRecord = new CrawlerRecord();
|
||||
|
||||
Elements cells = row.select("td");
|
||||
|
||||
if (cells.size() == headerNames.size()) { // Ensure the number of cells matches the number of headers
|
||||
Map<String, String> rowMap = new HashMap<>();
|
||||
|
||||
for (int i = 0; i < cells.size(); i++) {
|
||||
|
||||
String header = headerNames.get(i);
|
||||
String value = cells.get(i).text();
|
||||
rowMap.put(header, value);
|
||||
switch (header) {
|
||||
case "Radiation Test Method":
|
||||
crawlerRecord.setTestMethod(value);
|
||||
break;
|
||||
case "EPPL Familiy":
|
||||
crawlerRecord.setCategory(value);
|
||||
break;
|
||||
case "EPPL Group":
|
||||
crawlerRecord.setSubCategory(value);
|
||||
break;
|
||||
case "DUT Manufacturer":
|
||||
crawlerRecord.setManufacturer(value);
|
||||
break;
|
||||
case "Function":
|
||||
crawlerRecord.setFunctionType(value);
|
||||
break;
|
||||
case "Report Date":
|
||||
crawlerRecord.setReportDate(value);
|
||||
break;
|
||||
case "Report Source":
|
||||
crawlerRecord.setReportSource(value);
|
||||
break;
|
||||
case "Technology":
|
||||
crawlerRecord.setTechnology(value);
|
||||
break;
|
||||
case "Id":
|
||||
crawlerRecord.setReportId(value);
|
||||
break;
|
||||
case "DUT part type":
|
||||
crawlerRecord.setCode(value);
|
||||
break;
|
||||
case "Radiation Test Type":
|
||||
crawlerRecord.setRadiationTestType(value);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Add the map to the list
|
||||
tableData.add(rowMap);
|
||||
tableData.add(crawlerRecord);
|
||||
}
|
||||
}
|
||||
|
||||
// Print the list of maps
|
||||
for (Map<String, String> rowMap : tableData) {
|
||||
rowMap.put("fileId", rowMap.get("Id"));
|
||||
rowMap.put("fileUrl", "https://esarad.esa.int/?id=" + rowMap.get("Id") + "&handler=DownloadDb");
|
||||
rowMap.put("fileName", rowMap.get("Id") + ".pdf");
|
||||
|
||||
System.out.println(rowMap);
|
||||
for (CrawlerRecord rowMap : tableData) {
|
||||
rowMap.setFileUrl("https://esarad.esa.int/?id=" + rowMap.getReportId() + "&handler=DownloadDb");
|
||||
}
|
||||
saveFiles(tableData, esaradKey);
|
||||
saveEsaradFiles(tableData, esaradKey);
|
||||
|
||||
} catch (Exception e) {
|
||||
redisUtil.del(esaradKey);
|
||||
@@ -139,20 +212,29 @@ public class CrawlerRecordServiceImpl extends ServiceImpl<CrawlerRecordMapper, C
|
||||
JSONArray list = jsonObject.getJSONArray("ROWS");
|
||||
System.out.println("total count " + total);
|
||||
System.out.println("total list " + list.get(0));
|
||||
List<Map<String, String>> tableData = new ArrayList<>();
|
||||
List<CrawlerRecord> tableData = new ArrayList<>();
|
||||
|
||||
list.forEach(row -> {
|
||||
String fileId = String.valueOf(((JSONArray) row).get(0));
|
||||
String fileNames = String.valueOf(((JSONArray) row).get(4));
|
||||
Map<String, String> map = new HashMap<>();
|
||||
map.put("fileName", fileNames);
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
|
||||
if (i > 10) {
|
||||
break;
|
||||
}
|
||||
JSONArray row = (JSONArray) list.get(i);
|
||||
String fileNames = String.valueOf(row.get(4));
|
||||
CrawlerRecord map = new CrawlerRecord();
|
||||
String fileUrls = fixFileNames(fileNames);
|
||||
map.put("fileId", fileId);
|
||||
map.put("fileUrl", fileUrls);
|
||||
System.out.println(row);
|
||||
map.setFileUrl(fileUrls);
|
||||
|
||||
map.setCode(String.valueOf(row.get(0)));
|
||||
map.setFunctionType(String.valueOf(row.get(1)));
|
||||
map.setManufacturer(String.valueOf(row.get(2)));
|
||||
map.setReportDate(String.valueOf(row.get(3)));
|
||||
map.setTestMethod(String.valueOf(row.get(5)));
|
||||
map.setCategory(String.valueOf(row.get(6)));
|
||||
map.setReportId(map.getCode().replaceAll(" ", ""));
|
||||
tableData.add(map);
|
||||
});
|
||||
saveFiles(tableData, radhomeKey);
|
||||
}
|
||||
saveRadhomeFiles(tableData, radhomeKey);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -180,29 +262,65 @@ public class CrawlerRecordServiceImpl extends ServiceImpl<CrawlerRecordMapper, C
|
||||
return StringUtils.join(result, ";");
|
||||
}
|
||||
|
||||
|
||||
private void saveFiles(List<Map<String, String>> fileList, String type) {
|
||||
private void saveRadhomeFiles(List<CrawlerRecord> fileList, String type) {
|
||||
ThreadUtil.execute(() -> {
|
||||
try {
|
||||
for (Map<String, String> map : fileList) {
|
||||
String fileId = map.get("fileId");
|
||||
String resultStr = "";
|
||||
String fileUrl = map.get("fileUrl");
|
||||
for (CrawlerRecord record : fileList) {
|
||||
String fileUploadResult = "";
|
||||
String fileUrl = record.getFileUrl();
|
||||
if (fileUrl.contains(";")) {
|
||||
String[] split = fileUrl.split(";");
|
||||
List<String> result = new ArrayList<>();
|
||||
for (String s : split) {
|
||||
byte[] fileBytes = HttpUtil.downloadBytes(s);
|
||||
InputStream inputStream = new ByteArrayInputStream(fileBytes);
|
||||
result.add(MinioUtil.upload(inputStream, s.substring(s.lastIndexOf("/") + 1)));
|
||||
resultStr = StringUtils.join(result, ";");
|
||||
result.add(MinioUtil.upload(inputStream, "radhome/" + s.substring(s.lastIndexOf("/") + 1)));
|
||||
fileUploadResult = StringUtils.join(result, ";");
|
||||
}
|
||||
} else {
|
||||
byte[] fileBytes = HttpUtil.downloadBytes(fileUrl);
|
||||
InputStream inputStream = new ByteArrayInputStream(fileBytes);
|
||||
resultStr = MinioUtil.upload(inputStream, fileUrl.substring(fileUrl.lastIndexOf("/") + 1));
|
||||
fileUploadResult = MinioUtil.upload(inputStream, "radhome/" + fileUrl.substring(fileUrl.lastIndexOf("/") + 1));
|
||||
}
|
||||
System.out.println(fileUploadResult);
|
||||
if (StringUtils.isNotBlank(fileUploadResult)) {
|
||||
record.setFileUrl(fileUploadResult);
|
||||
save(record);
|
||||
}
|
||||
}
|
||||
redisUtil.del(type);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void saveEsaradFiles(List<CrawlerRecord> fileList, String type) {
|
||||
ThreadUtil.execute(() -> {
|
||||
try {
|
||||
for (CrawlerRecord record : fileList) {
|
||||
CrawlerRecord dbData = getOne(Wrappers.<CrawlerRecord>lambdaQuery().eq(CrawlerRecord::getReportSource, record.getReportId()));
|
||||
if (Objects.nonNull(dbData)) {
|
||||
continue;
|
||||
}
|
||||
String resultStr = "";
|
||||
|
||||
String dest = FileUtil.getTmpDirPath() + "esarad-" + record.getReportId() + "/";
|
||||
FileUtil.mkdir(dest);
|
||||
long fileSize = HttpUtil.downloadFile(record.getFileUrl(), dest);
|
||||
|
||||
if (fileSize > 0) {
|
||||
List<File> files = FileUtil.loopFiles(dest);
|
||||
for (File file : files) {
|
||||
resultStr = MinioUtil.upload(IoUtil.toStream(file), "esarad/" + record.getReportId() + "-" + URLUtil.decode(file.getName(), Charset.defaultCharset()) );
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(resultStr);
|
||||
if (StringUtils.isNotBlank(resultStr)) {
|
||||
record.setFileUrl(resultStr);
|
||||
save(record);
|
||||
}
|
||||
}
|
||||
redisUtil.del(type);
|
||||
} catch (Exception e) {
|
||||
|
||||
Reference in New Issue
Block a user