update
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import csv
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
@@ -7,102 +6,19 @@ from datetime import datetime
|
||||
import mysql.connector
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
|
||||
from common import upload_to_minio, save_to_db_import_record, db_config
|
||||
|
||||
# 设置下载目录
|
||||
download_dir = 'downloaded_files'
|
||||
os.makedirs(download_dir, exist_ok=True)
|
||||
|
||||
# MySQL 连接配置
|
||||
db_config = {
|
||||
'host': '192.168.50.100',
|
||||
'port': 23306,
|
||||
'user': 'root',
|
||||
'password': '123456',
|
||||
'database': 'physical-boot'
|
||||
}
|
||||
connection = mysql.connector.connect(**db_config)
|
||||
cursor = connection.cursor()
|
||||
|
||||
# minio 配置
|
||||
minio_client = Minio(
|
||||
"192.168.50.100:29000", # MinIO服务器地址或IP
|
||||
access_key="root", # 替换为你的Access Key
|
||||
secret_key="12345678", # 替换为你的Secret Key
|
||||
secure=False # 如果使用的是http则为False
|
||||
)
|
||||
bucket_name = 'physical'
|
||||
|
||||
|
||||
def get_md5(input_string):
|
||||
# 创建MD5对象
|
||||
md5_obj = hashlib.md5()
|
||||
|
||||
# 更新对象,注意字符串需要编码为字节
|
||||
md5_obj.update(input_string.encode('utf-8'))
|
||||
|
||||
# 返回MD5值的十六进制字符串
|
||||
return md5_obj.hexdigest()
|
||||
|
||||
|
||||
def save_to_mysql(data):
|
||||
try:
|
||||
"""保存数据到 MySQL 数据库"""
|
||||
|
||||
insert_query = """INSERT INTO `import_record` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `sys_org_code`, `device_type`, `device_name`, `device_mode`, `device_function`, `device_batch`, `manufacturer`, `experiment_date`, `data_source`, `experiment_user`, `total_count`, `file_list`)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"""
|
||||
|
||||
cursor.execute(insert_query, data)
|
||||
connection.commit()
|
||||
except Exception as e:
|
||||
# 处理 ZeroDivisionError 的代码
|
||||
print(e)
|
||||
|
||||
|
||||
# 获取网页内容
|
||||
url = 'https://esarad.esa.int/'
|
||||
|
||||
|
||||
def save_to_db_file(data):
|
||||
try:
|
||||
"""保存数据到 MySQL 数据库"""
|
||||
insert_query = """INSERT INTO `oss_file` (`id`,`file_name`,`url`,`create_by`,`create_time` )
|
||||
VALUES (%s, %s, %s, %s, %s);"""
|
||||
cursor.execute(insert_query, data)
|
||||
connection.commit()
|
||||
except Exception as e:
|
||||
# 处理 ZeroDivisionError 的代码
|
||||
print(e)
|
||||
|
||||
|
||||
def upload_to_minio(folder_path):
|
||||
if not minio_client.bucket_exists(bucket_name):
|
||||
minio_client.make_bucket(bucket_name)
|
||||
|
||||
folder_name = os.path.basename(folder_path)
|
||||
# 遍历文件夹中的所有文件,并上传
|
||||
file_ids = []
|
||||
for file_name in os.listdir(folder_path):
|
||||
file_path = os.path.join(folder_path, file_name)
|
||||
# 检查是否是文件,忽略非文件类型
|
||||
if os.path.isfile(file_path):
|
||||
object_name = f"ESA/{folder_name}/{file_name}"
|
||||
try:
|
||||
# 上传文件到 MinIO
|
||||
minio_client.fput_object(bucket_name, object_name, file_path)
|
||||
print(f"已上传: {file_path} -> {bucket_name}/{object_name}")
|
||||
file_id = get_md5(object_name)
|
||||
file_ids.append(file_id)
|
||||
db_file = [file_id, file_name,
|
||||
'http://58.215.212.230:8005/oss/' + bucket_name + '/' + object_name, 'admin', datetime.now()]
|
||||
save_to_db_file(db_file)
|
||||
except S3Error as err:
|
||||
print(f"上传 {file_name} 时出错: {err}")
|
||||
return file_ids
|
||||
|
||||
|
||||
def scrape():
|
||||
esa_connection = mysql.connector.connect(**db_config)
|
||||
try:
|
||||
response = requests.get(url)
|
||||
|
||||
@@ -286,7 +202,7 @@ def scrape():
|
||||
else:
|
||||
print(f'Failed to download: {download_url}')
|
||||
|
||||
upload_ids = upload_to_minio(folder_path)
|
||||
upload_ids = upload_to_minio(esa_connection, folder_path,'ESA')
|
||||
data_db = ['ESA-' + cells[0].get_text(strip=True), 'Crawler', datetime.now(), None, None, None,
|
||||
cells[5].get_text(strip=True),
|
||||
cells[1].get_text(strip=True), cells[1].get_text(strip=True),
|
||||
@@ -295,10 +211,9 @@ def scrape():
|
||||
cells[11].get_text(strip=True),
|
||||
'ESA', None, None, ','.join(upload_ids)
|
||||
]
|
||||
save_to_mysql(data_db)
|
||||
save_to_db_import_record(esa_connection, data_db)
|
||||
else:
|
||||
print(f'Error: {response.status_code}')
|
||||
finally:
|
||||
# 关闭游标和连接
|
||||
cursor.close()
|
||||
connection.close()
|
||||
esa_connection.close()
|
||||
|
||||
Reference in New Issue
Block a user