update
This commit is contained in:
@@ -79,11 +79,9 @@ public class ImportRecord implements Serializable {
|
||||
@Schema(description = "生产厂家")
|
||||
private String manufacturer;
|
||||
/**试验时间*/
|
||||
@Excel(name = "试验时间", width = 20, format = "yyyy-MM-dd HH:mm:ss")
|
||||
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
|
||||
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
|
||||
@Excel(name = "试验时间", width = 20)
|
||||
@Schema(description = "试验时间")
|
||||
private Date experimentDate;
|
||||
private String experimentDate;
|
||||
/**数据来源*/
|
||||
@Excel(name = "数据来源", width = 15)
|
||||
@Schema(description = "数据来源")
|
||||
|
||||
147
scripts/esa.py
147
scripts/esa.py
@@ -1,9 +1,14 @@
|
||||
import os
|
||||
import requests
|
||||
import csv
|
||||
import mysql.connector
|
||||
from bs4 import BeautifulSoup
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import mysql.connector
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
|
||||
# 设置下载目录
|
||||
download_dir = 'downloaded_files'
|
||||
@@ -11,57 +16,94 @@ os.makedirs(download_dir, exist_ok=True)
|
||||
|
||||
# MySQL 连接配置
|
||||
db_config = {
|
||||
'host': 'localhost',
|
||||
'user': 'your_username',
|
||||
'password': 'your_password',
|
||||
'database': 'your_database'
|
||||
'host': '192.168.50.100',
|
||||
'port': 23306,
|
||||
'user': 'root',
|
||||
'password': '123456',
|
||||
'database': 'physical-boot'
|
||||
}
|
||||
|
||||
|
||||
def save_to_mysql(data):
|
||||
"""保存数据到 MySQL 数据库"""
|
||||
connection = mysql.connector.connect(**db_config)
|
||||
cursor = connection.cursor()
|
||||
|
||||
insert_query = """
|
||||
INSERT INTO your_table (
|
||||
sequence_number, test_object_type, test_start_date, test_end_date, test_object_name,
|
||||
test_object_model, test_object_quantity, test_nature, test_purpose, device_name,
|
||||
data_provider, test_commissioner, failure_criteria, failure_quantity, test_result_description,
|
||||
outcome, source_project_name, source_project_type, classification, component_name,
|
||||
component_model, component_batch_number, manufacturer, is_domestic, component_maturity,
|
||||
wafer_material, wafer_batch_number, package_material, package_technology, is_flip_chip,
|
||||
manufacturing_process, process_feature_size, process_platform, process_code, process_version,
|
||||
quality_grade, reinforcement_measures, working_principle, supply_capacity, application_experience,
|
||||
specification_manual, device_image, electronic_system_classification, electronic_system_name,
|
||||
electronic_system_model, manufacturer, electronic_system_function, electronic_system_reinforcement_measures,
|
||||
electronic_system_image, material_name, material_model, material_components, material_purpose,
|
||||
material_manufacturer, material_physical_structure, material_usage_experience, irradiation_test_outline,
|
||||
outline_expert_category, standard_specifications, test_steps_description, irradiation_process_power,
|
||||
dc_bias_condition_description, ac_bias_condition_description, clock_frequency, test_pattern,
|
||||
other_bias_conditions, irradiation_bias_schematic, test_method, test_schematic,
|
||||
test_instrument_name, test_instrument_model, test_instrument_manufacturer, test_instrument_certificate,
|
||||
test_software_name, test_software_developer, test_software_version, test_site_photo,
|
||||
tester_name, tester_organization, tester_phone, device_operator, third_party_person, third_party_organization,
|
||||
third_party_phone, other_notes, lead_aluminum_shielding, dose_rate, total_dose, dose_equivalent_material,
|
||||
test_object_number, test_parameter_name, test_parameter_unit, test_parameter_result,
|
||||
is_accelerated_test_data, is_annealing_data, annealing_temperature, annealing_time, raw_data,
|
||||
data_processing_method, other_notes_additional
|
||||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
||||
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
||||
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
||||
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
# minio 配置
|
||||
minio_client = Minio(
|
||||
"192.168.50.100:29000", # MinIO服务器地址或IP
|
||||
access_key="root", # 替换为你的Access Key
|
||||
secret_key="12345678", # 替换为你的Secret Key
|
||||
secure=False # 如果使用的是http则为False
|
||||
)
|
||||
bucket_name = 'physical'
|
||||
|
||||
|
||||
def get_md5(input_string):
|
||||
# 创建MD5对象
|
||||
md5_obj = hashlib.md5()
|
||||
|
||||
# 更新对象,注意字符串需要编码为字节
|
||||
md5_obj.update(input_string.encode('utf-8'))
|
||||
|
||||
# 返回MD5值的十六进制字符串
|
||||
return md5_obj.hexdigest()
|
||||
|
||||
|
||||
def save_to_mysql(data):
|
||||
try:
|
||||
"""保存数据到 MySQL 数据库"""
|
||||
|
||||
insert_query = """INSERT INTO `import_record` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `sys_org_code`, `device_type`, `device_name`, `device_mode`, `device_function`, `device_batch`, `manufacturer`, `experiment_date`, `data_source`, `experiment_user`, `total_count`, `file_list`)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"""
|
||||
|
||||
cursor.execute(insert_query, data)
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
except Exception as e:
|
||||
# 处理 ZeroDivisionError 的代码
|
||||
print(e)
|
||||
|
||||
|
||||
# 获取网页内容
|
||||
url = 'https://esarad.esa.int/'
|
||||
|
||||
|
||||
def save_to_db_file(data):
|
||||
try:
|
||||
"""保存数据到 MySQL 数据库"""
|
||||
insert_query = """INSERT INTO `oss_file` (`id`,`file_name`,`url`,`create_by`,`create_time` )
|
||||
VALUES (%s, %s, %s, %s, %s);"""
|
||||
cursor.execute(insert_query, data)
|
||||
connection.commit()
|
||||
except Exception as e:
|
||||
# 处理 ZeroDivisionError 的代码
|
||||
print(e)
|
||||
|
||||
|
||||
def upload_to_minio(folder_path):
|
||||
if not minio_client.bucket_exists(bucket_name):
|
||||
minio_client.make_bucket(bucket_name)
|
||||
|
||||
folder_name = os.path.basename(folder_path)
|
||||
# 遍历文件夹中的所有文件,并上传
|
||||
file_ids = []
|
||||
for file_name in os.listdir(folder_path):
|
||||
file_path = os.path.join(folder_path, file_name)
|
||||
# 检查是否是文件,忽略非文件类型
|
||||
if os.path.isfile(file_path):
|
||||
object_name = f"ESA/{folder_name}/{file_name}"
|
||||
try:
|
||||
# 上传文件到 MinIO
|
||||
minio_client.fput_object(bucket_name, object_name, file_path)
|
||||
print(f"已上传: {file_path} -> {bucket_name}/{object_name}")
|
||||
file_id = get_md5(object_name)
|
||||
file_ids.append(file_id)
|
||||
db_file = [file_id, file_name,
|
||||
'http://58.215.212.230:8005/oss/' + bucket_name + '/' + object_name, 'admin', datetime.now()]
|
||||
save_to_db_file(db_file)
|
||||
except S3Error as err:
|
||||
print(f"上传 {file_name} 时出错: {err}")
|
||||
return file_ids
|
||||
|
||||
|
||||
def scrape():
|
||||
try:
|
||||
response = requests.get(url)
|
||||
|
||||
# 确保请求成功
|
||||
@@ -237,9 +279,26 @@ def scrape():
|
||||
'' # 其他需要说明的事项(可以根据需要填充)
|
||||
]
|
||||
writer.writerow(data_row)
|
||||
save_to_mysql(data_row)
|
||||
# (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `sys_org_code`, `device_type`,
|
||||
# `device_name`, `device_mode`, `device_function`, `device_batch`, `manufacturer`, `experiment_date`, `data_source`, `experiment_user`, `total_count`, `file_list`)
|
||||
|
||||
print(f'CSV created and data saved to MySQL: {csv_file_path}')
|
||||
else:
|
||||
print(f'Failed to download: {download_url}')
|
||||
|
||||
upload_ids = upload_to_minio(folder_path)
|
||||
data_db = ['ESA-' + cells[0].get_text(strip=True), 'Crawler', datetime.now(), None, None, None,
|
||||
cells[5].get_text(strip=True),
|
||||
cells[1].get_text(strip=True), cells[1].get_text(strip=True),
|
||||
cells[8].get_text(strip=True),
|
||||
cells[7].get_text(strip=True), cells[2].get_text(strip=True),
|
||||
cells[11].get_text(strip=True),
|
||||
'ESA', None, None, ','.join(upload_ids)
|
||||
]
|
||||
save_to_mysql(data_db)
|
||||
else:
|
||||
print(f'Error: {response.status_code}')
|
||||
finally:
|
||||
# 关闭游标和连接
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
@@ -4,4 +4,4 @@ urllib3
|
||||
flask
|
||||
lxml
|
||||
mysql-connector-python
|
||||
gunicorn
|
||||
minio
|
||||
|
||||
Reference in New Issue
Block a user