This commit is contained in:
ls
2024-11-12 15:46:28 +08:00
parent d02773609f
commit 835218ffa6
8 changed files with 161 additions and 133 deletions

View File

@@ -44,7 +44,7 @@ def scrape():
# 下载文件并获取文件名
file_response = requests.get(download_url)
if file_response.status_code == 200:
if file_response and file_response.status_code == 200:
# 从响应头获取文件名
content_disposition = file_response.headers.get('Content-Disposition')
filename = ''
@@ -202,16 +202,17 @@ def scrape():
else:
print(f'Failed to download: {download_url}')
upload_ids = upload_to_minio(esa_connection, folder_path,'ESA')
data_db = ['ESA-' + cells[0].get_text(strip=True), 'Crawler', datetime.now(), None, None, None,
cells[5].get_text(strip=True),
cells[1].get_text(strip=True), cells[1].get_text(strip=True),
cells[8].get_text(strip=True),
cells[7].get_text(strip=True), cells[2].get_text(strip=True),
cells[11].get_text(strip=True),
'ESA', None, None, ','.join(upload_ids)
]
save_to_db_import_record(esa_connection, data_db)
upload_ids = upload_to_minio(esa_connection, folder_path, 'ESA')
origin_data = ','.join([c.get_text(strip=True) for c in cells])
data_db = ['ESA-' + cells[0].get_text(strip=True), 'Crawler', datetime.now(), None, None, None,
cells[5].get_text(strip=True),
cells[1].get_text(strip=True), cells[1].get_text(strip=True),
cells[8].get_text(strip=True),
cells[7].get_text(strip=True), cells[2].get_text(strip=True),
cells[11].get_text(strip=True),
'ESA', None, None, str(upload_ids), origin_data
]
save_to_db_import_record(esa_connection, data_db)
else:
print(f'Error: {response.status_code}')
finally: