之前分享过一个使用Aspeara自动监测数据上传NCBI情况的Python脚本,但是存在缺陷,上传结束后并不会自动结束,而是一直重启上传任务,可以点击这里查看之前分享的脚本:Aspera上传数据到NCBI,使用Python脚本保持传输无中断-Wslll Blog
这里进行了优化脚本,可以自动识别传输是否完成,如果完成可以自动停止
import subprocess
import time
import psutil
import re
def is_process_running(process_name):
for proc in psutil.process_iter(['name']):
if proc.info['name'] == process_name:
return True
return False
def run_aspera_transfer():
command = [
r"C:\Program Files\IBM\Aspera Connect\bin\ascp.exe", # 替换成你的Aspera的安装位置的ascp.exe
"-i", r"D:\data\aspera.openssh", # 替换成从NCBI下载得出传输密钥的位置
"-QT", "-l500m", "-k1", "-d", # 设置一下限速
r"D:\data\single-celldata\\", # 替换成你要上传的文件夹,填到文件夹就行,会自动上传文件夹里所有文件
"subasp@upload.ncbi.nlm.nih.gov:uploads/mail_adress_code/file_you_add_at_root_catalog" # 用从NCBI处获取的上传路径替换这里,记得在主路径新建文件夹
]
result = subprocess.run(command, capture_output=True, text=True)
return result.returncode, result.stdout
def transfer_successful(return_code):
# Aspera return codes: 0 for success, other codes for various errors
return return_code == 0
def check_transfer_log(output):
# 检查输出日志,确保所有文件都已成功传输
completed_match = re.search(r'Completed: \d+K? bytes transferred', output)
skipped_match = re.search(r'\(skipped\)', output)
if completed_match and skipped_match:
return True
return False
while True:
if not is_process_running("ascp.exe"):
print("Aspera transfer interrupted. Restarting...")
return_code, output = run_aspera_transfer()
if transfer_successful(return_code) and check_transfer_log(output):
print("Aspera transfer completed successfully.")
break # Exit the loop if the transfer is successful
else:
print(f"Aspera transfer failed with return code {return_code}. Output: {output}")
print("Restarting transfer...")
time.sleep(60) # 每60秒检查一次