This commit is contained in:
ls
2024-10-23 16:56:01 +08:00
parent 0f65911496
commit 5e0bbbd757
3 changed files with 277 additions and 220 deletions

View File

@@ -79,11 +79,9 @@ public class ImportRecord implements Serializable {
@Schema(description = "生产厂家")
private String manufacturer;
/**试验时间*/
@Excel(name = "试验时间", width = 20, format = "yyyy-MM-dd HH:mm:ss")
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
@Excel(name = "试验时间", width = 20)
@Schema(description = "试验时间")
private Date experimentDate;
private String experimentDate;
/**数据来源*/
@Excel(name = "数据来源", width = 15)
@Schema(description = "数据来源")

View File

@@ -1,9 +1,14 @@
import os
import requests
import csv
import mysql.connector
from bs4 import BeautifulSoup
import hashlib
import os
import re
from datetime import datetime
import mysql.connector
import requests
from bs4 import BeautifulSoup
from minio import Minio
from minio.error import S3Error
# 设置下载目录
download_dir = 'downloaded_files'
@@ -11,57 +16,94 @@ os.makedirs(download_dir, exist_ok=True)
# MySQL 连接配置
db_config = {
'host': 'localhost',
'user': 'your_username',
'password': 'your_password',
'database': 'your_database'
'host': '192.168.50.100',
'port': 23306,
'user': 'root',
'password': '123456',
'database': 'physical-boot'
}
connection = mysql.connector.connect(**db_config)
cursor = connection.cursor()
# minio 配置
minio_client = Minio(
"192.168.50.100:29000", # MinIO服务器地址或IP
access_key="root", # 替换为你的Access Key
secret_key="12345678", # 替换为你的Secret Key
secure=False # 如果使用的是http则为False
)
bucket_name = 'physical'
def get_md5(input_string):
# 创建MD5对象
md5_obj = hashlib.md5()
# 更新对象,注意字符串需要编码为字节
md5_obj.update(input_string.encode('utf-8'))
# 返回MD5值的十六进制字符串
return md5_obj.hexdigest()
def save_to_mysql(data):
try:
"""保存数据到 MySQL 数据库"""
connection = mysql.connector.connect(**db_config)
cursor = connection.cursor()
insert_query = """
INSERT INTO your_table (
sequence_number, test_object_type, test_start_date, test_end_date, test_object_name,
test_object_model, test_object_quantity, test_nature, test_purpose, device_name,
data_provider, test_commissioner, failure_criteria, failure_quantity, test_result_description,
outcome, source_project_name, source_project_type, classification, component_name,
component_model, component_batch_number, manufacturer, is_domestic, component_maturity,
wafer_material, wafer_batch_number, package_material, package_technology, is_flip_chip,
manufacturing_process, process_feature_size, process_platform, process_code, process_version,
quality_grade, reinforcement_measures, working_principle, supply_capacity, application_experience,
specification_manual, device_image, electronic_system_classification, electronic_system_name,
electronic_system_model, manufacturer, electronic_system_function, electronic_system_reinforcement_measures,
electronic_system_image, material_name, material_model, material_components, material_purpose,
material_manufacturer, material_physical_structure, material_usage_experience, irradiation_test_outline,
outline_expert_category, standard_specifications, test_steps_description, irradiation_process_power,
dc_bias_condition_description, ac_bias_condition_description, clock_frequency, test_pattern,
other_bias_conditions, irradiation_bias_schematic, test_method, test_schematic,
test_instrument_name, test_instrument_model, test_instrument_manufacturer, test_instrument_certificate,
test_software_name, test_software_developer, test_software_version, test_site_photo,
tester_name, tester_organization, tester_phone, device_operator, third_party_person, third_party_organization,
third_party_phone, other_notes, lead_aluminum_shielding, dose_rate, total_dose, dose_equivalent_material,
test_object_number, test_parameter_name, test_parameter_unit, test_parameter_result,
is_accelerated_test_data, is_annealing_data, annealing_temperature, annealing_time, raw_data,
data_processing_method, other_notes_additional
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
insert_query = """INSERT INTO `import_record` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `sys_org_code`, `device_type`, `device_name`, `device_mode`, `device_function`, `device_batch`, `manufacturer`, `experiment_date`, `data_source`, `experiment_user`, `total_count`, `file_list`)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"""
cursor.execute(insert_query, data)
connection.commit()
cursor.close()
connection.close()
except Exception as e:
# 处理 ZeroDivisionError 的代码
print(e)
# 获取网页内容
url = 'https://esarad.esa.int/'
def save_to_db_file(data):
try:
"""保存数据到 MySQL 数据库"""
insert_query = """INSERT INTO `oss_file` (`id`,`file_name`,`url`,`create_by`,`create_time` )
VALUES (%s, %s, %s, %s, %s);"""
cursor.execute(insert_query, data)
connection.commit()
except Exception as e:
# 处理 ZeroDivisionError 的代码
print(e)
def upload_to_minio(folder_path):
if not minio_client.bucket_exists(bucket_name):
minio_client.make_bucket(bucket_name)
folder_name = os.path.basename(folder_path)
# 遍历文件夹中的所有文件,并上传
file_ids = []
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
# 检查是否是文件,忽略非文件类型
if os.path.isfile(file_path):
object_name = f"ESA/{folder_name}/{file_name}"
try:
# 上传文件到 MinIO
minio_client.fput_object(bucket_name, object_name, file_path)
print(f"已上传: {file_path} -> {bucket_name}/{object_name}")
file_id = get_md5(object_name)
file_ids.append(file_id)
db_file = [file_id, file_name,
'http://58.215.212.230:8005/oss/' + bucket_name + '/' + object_name, 'admin', datetime.now()]
save_to_db_file(db_file)
except S3Error as err:
print(f"上传 {file_name} 时出错: {err}")
return file_ids
def scrape():
try:
response = requests.get(url)
# 确保请求成功
@@ -237,9 +279,26 @@ def scrape():
'' # 其他需要说明的事项(可以根据需要填充)
]
writer.writerow(data_row)
save_to_mysql(data_row)
# (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `sys_org_code`, `device_type`,
# `device_name`, `device_mode`, `device_function`, `device_batch`, `manufacturer`, `experiment_date`, `data_source`, `experiment_user`, `total_count`, `file_list`)
print(f'CSV created and data saved to MySQL: {csv_file_path}')
else:
print(f'Failed to download: {download_url}')
upload_ids = upload_to_minio(folder_path)
data_db = ['ESA-' + cells[0].get_text(strip=True), 'Crawler', datetime.now(), None, None, None,
cells[5].get_text(strip=True),
cells[1].get_text(strip=True), cells[1].get_text(strip=True),
cells[8].get_text(strip=True),
cells[7].get_text(strip=True), cells[2].get_text(strip=True),
cells[11].get_text(strip=True),
'ESA', None, None, ','.join(upload_ids)
]
save_to_mysql(data_db)
else:
print(f'Error: {response.status_code}')
finally:
# 关闭游标和连接
cursor.close()
connection.close()

View File

@@ -4,4 +4,4 @@ urllib3
flask
lxml
mysql-connector-python
gunicorn
minio