python LAADS+Selenium下載MODIS資料

專注的阿熊發表於2021-06-09

from selenium import webdriver

from time import sleep

import tempfile

import os,sys

import pandas as pd

import geopandas as gpd

import time

# 構建查詢地址

def GetURL(ProductID,StartTime,EndTime,search_file):

     # 查詢邊界

     data = gpd.GeoDataFrame.from_file(search_file)

     bbox = (data.bounds.values)[0].tolist()

     # 研究區範圍,左上角和右下角。根據需要構造字串

     Area = str(round(bbox[0],1))+','+str(round(bbox[3],1))+','+str(round(bbox[2],1))+','+str(round(bbox[1],1))

     # 輸入 MODIS 軌道向量

     modis_grid_file = 'E:\***\modis_WGS84_grid_world.shp'

     modis_grid = gpd.GeoDataFrame.from_file(modis_grid_file)

     # 查詢邊界覆蓋的軌道中心座標

     modis_intersection = modis_grid[modis_grid.intersects(data.geometry[0])]

     path_row = 'Tile:'

     for mv in modis_intersection.values:

         path_row += "H"+str(mv[1])+"V"+str(mv[2])+","

     # 根據以上資訊構建 Search 頁的網址

     path_row = path_row[0:-1]

url='

     return url

# 使用 Selenium 查詢影像

def SearchFileList(url):

     # 建立資料夾,命名規則為程式執行的時刻

     # 將使用 selenium 下載的檔案使用該資料夾儲存

     csvdir = 'E:\\***\\' + str(time.time()).replace('.','')

     os.mkdir(csvdir)

     # 配置 selenium 的引數

     options = webdriver.ChromeOptions()

     prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': csvdir}

     options.add_experimental_option('prefs', prefs)

     chromedriver = r"C:\***\Google\Chrome\Application\chromedriver.exe"#chromedriver.exe 的本地路徑

     # options.add_argument('--headless')  # 有無瀏覽器介面模式,根據需要設定

     driver = webdriver.Chrome(executable_path=chromedriver,options=options)

     # 自動開啟 Search

     driver.get(url)

     # 瀏覽器開啟 Search 頁後,外匯跟單gendan5.com還要留足時間讓伺服器進行資料檢索

     # 所以這裡 sleep50 秒,可以根據網速自行設定

     sleep(50)

     # 當然也可以判斷搜尋結果,也就是包含 csv 的標籤是否出現

     # WebDriverWait(driver,

     # 下載 csv 檔案

     # 找到文字包含 csv 的標籤

     # csvElement = driver.find_element_by_link_text('csv')

     csvElement = driver.find_element_by_xpath('// *[ @ id = "tab4download"] / a[2]')

     # 點選下載

     csvElement.click()

     # 留下下載 csv 檔案的時間

     sleep(20)

     # 關閉瀏覽器

     driver.quit()

     return csvdir

# 下載影像

def MODISDown(FileDir):

     # 獲取下載的 csv 檔案的檔名

     csvfilename = os.listdir(FileDir)[0]

     # 構造檔案路徑

     csvfilepath = os.path.join(FileDir, csvfilename)

     # print(csvfilepath)

     csvvalues = pd.read_csv(csvfilepath).values

     os.remove(csvfilepath)

     os.rmdir(FileDir)

     # 下載資料

     file_count = 0

     for cv in csvvalues:

         file_count += 1

         # 構建資料的下載連結

         modislink='[1]

         outdir = 'E:/***/MODIS/'+(cv[1].split("/"))[5]

         # outdir = 'E:/Temp/' + (cv[1].split("/"))[5]

         if not os.path.isdir(outdir):

             os.mkdir(outdir)

         path = outdir + '/' + (cv[1].split("/"))[7]

         if not os.path.exists(path):

             print("({0}/{1}) Downloading {2}".format(file_count, len(csvvalues), modislink.split("/")[-1]))

             with open(path, 'w+b') as out:

                 geturl(modislink, out)

# 獲取下載連結並下載影像資料

def geturl(url,out=None):

     USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r', '')

     headers = { 'user-agent' : USERAGENT }

     token = '******' # 你的 token, 可登陸 Earthdata 網站後在 profile 中得到

     headers['Authorization'] = 'Bearer ' + token

     try:

         import ssl

         CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)

         from urllib.request import urlopen, Request, URLError, HTTPError

         try:

             response = urlopen(Request(url, headers=headers), context=CTX)

             if out is None:

                 return response.read().decode('utf-8')

             else:

                 start = time.time()

                 # 將連線中的下載檔案寫入臨時檔案 並返回檔案寫入進度

                 chunk_read(response, out, report_hook=chunk_report)

                 elapsed = max(time.time() - start,1.0)

                 # 平均下載速度

                 rate = (get_total_size(response) / 1024 ** 2) / elapsed

                 print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(get_total_size(response), elapsed, rate))

                 # shutil.copyfileobj(response, out)

         except HTTPError as e:

             print('HTTP GET error code: %d' % e.code(), file=sys.stderr)

             print('HTTP GET error message: %s' % e.message, file=sys.stderr)

         except URLError as e:

             print('Failed to make request: %s' % e.reason, file=sys.stderr)

         return None

     except AttributeError:

         # OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl

         import subprocess

         try:

             args = ['curl', '--fail', '-sS', '-L', '--get', url]

             for (k,v) in headers.items():

                 args.extend(['-H', ': '.join([k, v])])

             if out is None:

                 # python3's subprocess.check_output returns stdout as a byte string

                 result = subprocess.check_output(args)

                 return result.decode('utf-8') if isinstance(result, bytes) else result

             else:

                 subprocess.call(args, stdout=out)

         except subprocess.CalledProcessError as e:

             print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)

         return None

#  chunk_read modified from

def chunk_read( response, local_file, chunk_size=10240, report_hook=None):

     # 完整檔案大小

     file_size = get_total_size(response)

     # 下載檔案大小

     bytes_so_far = 0

     # 檔案寫入本地

     while 1:

         try:

             # 從地址中讀取固定大小檔案物件

             chunk = response.read(chunk_size)

         except:

             sys.stdout.write("\n > There was an error reading data. \n")

             break

         try:

             # 將讀取出的檔案物件寫入本地檔案

             local_file.write(chunk)

         except TypeError:

             local_file.write(chunk.decode(local_file.encoding))

         # 寫入完成即更新已下載檔案大小

         bytes_so_far += len(chunk)

 

         if not chunk:

             break

         if report_hook:

             # 獲取下載進度

             report_hook(bytes_so_far, file_size)

     return bytes_so_far

def chunk_report( bytes_so_far, file_size):

     if file_size is not None:

         # 計算下載進度百分比

         percent = float(bytes_so_far) / file_size

         percent = round(percent * 100, 2)

         sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %

                          (bytes_so_far, file_size, percent))

     else:

         # We couldn't figure out the size.

         sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far))

def get_total_size(response):

    try:

       file_size = response.info().getheader('Content-Length').strip()

    except AttributeError:

       try:

          file_size = response.getheader('Content-Length').strip()

       except AttributeError:

          print ("> Problem getting size")

          return None

    return int(file_size)

if __name__ == "__main__":

     # 定義要下載資料的資訊

     ProductID = 'MOD021KM--61/'  # 產品號 #sys.argv[1]#

     # 設定資料的起始和截至時間。其實就是根據需要構造一個簡單的字串

     StartTime = '2020-06-01'  # 開始時間 #sys.argv[2]#

     EndTime = '2020-06-03'  # 截至日期 #sys.argv[3]#

     search_file = r'E:\***\ 北京市 .shp'  # 查詢範圍 #sys.argv[4]#

     # 構建查詢地址

     url = GetURL(ProductID,StartTime,EndTime,search_file)

     # 獲取資料列表

     csvdir = SearchFileList(url)

     # 根據列表下載資料

     MODISDown(csvdir)


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2776198/,如需轉載,請註明出處,否則將追究法律責任。

相關文章