Python batch downloads Sentinel-1 precision orbit files (.EOF)

Based on Xiaolong’s hard work on RS, we added the reading time of SLC images (readSLCFileName, which saves the operation of counting the timing of downloading sentinel images), detecting the downloaded track file size and re-downloading the wrong file (getFileSize and downloadWrongOribits) content
For specific operation methods, see:
https://blog.csdn.net/qq_44932630/article/details/124202144、

FBI WARNING:
In this example, you need to complete the cookies of the headers in the download function in the basic source code of the boss;
The headers in the download function in the boss code need to be replaced with your own information (PS. You also need to replace it if you change the computer)

# coding:utf-8
from urllib.parse import urlparse
import urllib.request
from bs4 import BeautifulSoup
import re
import datetime
from dateutil.parser import parse
import tkinter as tk
importsys
import os
import requests

timestart = datetime.datetime.now()


def downloadWrongOribits(wrongList, outPath):
    fileIndex = 1
    while len(wrongList) != 0:
        print("----------------------------------------")
        print("Start downloading the data with the error.")
        print("The data that went wrong is:")
        print(error_url)
        for eof in error_url:
            print("(" + str(fileIndex) + "/" + str(len(wrongList)) + ")" + " Start downloading the Precision track files: " + eof)
            saveFile = os.path.join(outPath, eof[50:])
            download(saveFile, eof)
            fileIndex + = 1
        print("The data download for the error is complete.")
        print("----------------------------------------")


def getFileSize(filePath):
    fsize = os.path.getsize(filePath)

    if fsize < 1024:
        return round(fsize, 2), 'Byte'
    else:
        KBX = fsize/1024
        if KBX < 1024:
            return round(KBX, 2), 'K'
        else:
            MBX = KBX / 1024
            if MBX < 1024:
                return round(MBX, 2), 'M'
            else:
                return round(MBX / 1024), 'G'


def readSLCFileName(parentPath, path):
    orbitsDateListContainsDuplicates = []
    list = os.listdir(parentPath + path)
    zip_file_list = []
    for file in list:
        if file[-3:] == "zip":
            zip_file_list.append(file)

    with open("zip_file_list.txt", "w") as logFile:
        for zip_file in zip_file_list:
            zip_file = zip_file + "\\
"
        logFile.write(zip_file)

    file = open("zip_file_list.txt", 'r')
    lines = file.read().splitlines()

    for line in lines:
        orbitsDateListContainsDuplicates.append(line[17:25])

    orbitsDateList = []
    for date in orbitsDateListContainsDuplicates:
        if date not in orbitsDateList:
            orbitsDateList.append(date)

    return orbitsDateList


def download(dest_dir, url):
    print(url)
    print(dest_dir)
    headers = {<!-- -->
        "Accept": "text/html,application/xhtml + xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2\ ",
        "Connection": "keep-alive",
        "Cookie": "_ga_XCPHL9DW7E=GS1.1.1696731884.3.1.1696732536.0.0.0; _ga=GA1.2.1813036397.1695265545; "
                  "_ce.s=v~998cce5ddc7dea52e816b862b1f59664c97255b0~lcw~1696732434540~vpv~2~v11.fhb~1696731892368~v11"
                  ".lhb~1696732494573~lcw~1696732494574; "
                  "asf-urs=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9"
                  "..T3w_cX26656amley8u6brpP3-QM8hEfsbXO1ZVvHHYSluKkiLnPBbHjFFdm0l7pbTgpN8hoafq0ZImb_oTBNCb5RNaSCMY8OqV6Yw7AeRT8lFE1ZH9IewGfHt_LbMkfrQYeR0KrQRObEjT 8xUdyESWJDPSOvNywqXYlncnjKxceIK0fxaHtBsBJmKK4gZOiYJVUc6WXmGPxZAtb7wPmPb7YjcqFFnxfJbdkEUBqXh_uwDU000thixMuDN2_tfuxT4tDML5v6CsYMNhq4CxSq5FYzfyxftPhLT qS2yGQsG-xGs-9PuGCSLwrQjxxh89Birk0PlVsbExGVS9HLikEyFAl23dh-3J5lOE-T7ZdKcm3G5a0J5oGuul8SGZTe91S4X7RELSomkEMhH5mHk1zTQ_DW7UC1p5RUKD0H7uLr6IrAMHEsPbovl11Vsa KtLl5SIuTqpuEyXolx5GR6Us-DUEu9F-HD3HCbYL5AMyrKPI0NI29SU_YhNZ6jlxOmaSujzWH3nAI7p5wbZib6tf1PU6M_ih3LJSter7bD3lmj43yttA7MNVGsZGZ8z3xKJdNikA0CBrYotBd3dUSKSBhu urs-access-token=eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYX J0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiT0F1dGgiLCJjbGllbnRfaWQiOiJCT19uN25USWxNbGpkdlU2a1JSQjNnIiwiZXhw IjoxNjk5NTg4NTczLCJpYXQiOjE2OTY5OTY1NzMsImlzcyI6IkVhcnRoZGF0YSBMb2dpbiIsInVpZCI6Imdpc2VyY2huIn0.qzWXgBhInPPZFn_-4VGtunZlFBUFMVLp0s5pM-1yQgEFGjb3mLzD-WX iiaGo9qZAI7yQIQGYL0E-IkeT4HFSVQf_PYMRtna7rXeNOBcVMiFXXnri9by8lb65hY4 -tSeiPk7YXVDKYZJJ2BsEcUXnWIvsKNhjt-r4nnbvETCVpEPa1RVInmQLJKKviTYuGB-0R13gzzyQsLKWPBZaUCbU3uMMGQbQYKWqZBQeqtlkC7aL1ZJnt5io-rkh23RkJTltw4i4FCJ8MGUjyx7qQ s1Hc__jCP5zxpICTAgUTVvHmHciB41jCpYYbJe4kmoY0ojQcL3Ho7ChhcuMbJzAFiAxoA",
        "Host": "s1qc.asf.alaska.edu",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "none",
        "Sec-Fetch-User": "?1",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0",

    }
    try:
        request = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(request)
        f = open(dest_dir, "w")
        lines = response.readlines()
        for line in lines:
            f.write(line.decode())
        f.close()
    except:
        error_url.append(url)
        print("\tError retrieving the URL:", dest_dir)
    else:
        if url in error_url:
            error_url.remove(url)


if __name__ == '__main__':
    current_path = sys.path[0]
    slc_path = "\slc"
    orbits_path = "\orbits"
    cookie_txt_file_path = sys.path[0] + "\cookie.txt"

    orbitsList = []
    wrongDownloadOrbitsList = []

    root = tk.Tk()
    root.withdraw()
    error_url = []

    cookie_path = cookie_txt_file_path
    out_path = current_path + orbits_path

    url_param_json = {<!-- -->}
    url_param_json['sentinel1__mission'] = 'S1A'

    date = '2015-01-01'
    url_param_json['validity_start'] = date

    url_param = urllib.parse.urlencode(url_param_json)
    url = 'https://s1qc.asf.alaska.edu/aux_poeorb/?%s' % url_param
    html = requests.get(url).content
    dom = BeautifulSoup(html, "lxml")
    a_list = dom.findAll("a")
    eof_lists = [a['href'] for a in a_list if a['href'].endswith('.EOF')]
    TimeArray = []
    NeedTimeArray = readSLCFileName(current_path, slc_path)
    fileIndex = 1
    for eof in eof_lists:
        if os.path.splitext(eof)[1] == ".EOF" and os.path.basename(eof)[0:3] == 'S1A':
            SplitEOF = re.split(r'[_,.,\s ]\s*', eof)
            SplitTime = SplitEOF[-2]
            Time = parse(SplitTime)
            NeedTime = Time + datetime.timedelta(days=-1)
            NeedTimeNum = (re.sub('[-,:, ]', '', str(NeedTime)))[0:8]
            if NeedTimeNum in str(NeedTimeArray):
                TimeArray.append(NeedTimeNum)
                savefile = os.path.join(out_path, eof)
                download(savefile, 'https://s1qc.asf.alaska.edu/aux_poeorb/' + eof)
                print("----------------------------------------")
                print("(" + str(fileIndex) + "/" + str(len(NeedTimeArray)) + ")" + " Start downloading the Precision track files: " + eof)
                print("The precision track data: " + eof + "download is complete.")
                print("----------------------------------------")
                fileIndex + = 1
                if len(TimeArray) == len(NeedTimeArray):
                    print("The required precision track data is downloaded, totaling %d files" % (len(TimeArray)))
                    print("----------------------------------------")
                    break
            else:
                continue

    downloadWrongOribits(error_url, out_path)

    orbitsList = os.listdir(current_path + orbits_path)
    for orbitsFile in orbitsList:
        size = getFileSize(orbitsFile)
        if size[1] != 'M':
            wrongDownloadOrbitsList.append(orbitsFile)
        elif float(size[0]) < 4.0:
            wrongDownloadOrbitsList.append(orbitsFile)

    downloadWrongOribits(wrongDownloadOrbitsList, out_path)

    timeend = datetime.datetime.now()
    print('Running time: %s Seconds' % (timeend - timestart))