首先安裝:DrissionPage
pip install DrissionPage
安裝ddddocr:
pip install ddddocr
程式碼示例:
from DrissionPage import ChromiumPage,ChromiumOptions import random import time import ddddocr # 瀏覽器路徑 path = r'C:\Program Files\Google\Chrome\Application\chrome.exe' co = ChromiumOptions().set_browser_path(path) # 開啟瀏覽器(全域性) page = ChromiumPage(co) # 目標網站 page.get("https://cszg.mca.gov.cn/biz/ma/csmh/filter/slideCaptchaindex.html",retry=3,interval=2,timeout=15) # 滑塊的運動軌跡 def get_tracks(distance): value = round(random.uniform(0.55,0.78),2) v,t,sum1 = 0,0.3,0 plus = [] mid = distance * value while sum1 < distance: if sum1 < mid: a = round(random.uniform(2.5,3.5),1) else: a = -round(random.uniform(2.0,3.0),1) s = v * t + 0.5 * a * (t ** 2) v = v + a * t sum1 += s plus.append(round(s)) return plus for i in range(5): # 獲取滑塊背景圖 background_bytes = page.ele('xpath://*[@id="oriImg"]').src() # 獲取缺口圖片 cut_bytes = page.ele('xpath://*[@id="cutImg"]').src() # 識別滑塊缺口並獲得滑動軌跡 det = ddddocr.DdddOcr(det=False,ocr=False,show_ad=False) # 獲得滑塊距離(透過滑塊圖片和缺口圖片計算出滑塊的距離) result = det.slide_match(cut_bytes,background_bytes,simple_target=True) print("滑塊距離:",result) # 獲得滑塊運動軌跡 offset = result['target'][0] tracks = get_tracks(offset) print("滑動軌跡:",tracks) # 滑動滑塊 # 按動滑鼠左鍵不放 page.actions.hold('xpath://*[@id="slider"]') # 滑鼠滑動 for track in tracks: # 使滑鼠相對當前位置移動若干距離 page.actions.move(offset_x=track,offset_y=round(random.uniform(1.0,3.0),1),duration=.1) time.sleep(0.1) # 釋放滑鼠左鍵 page.actions.release('xpath://*[@id="slider"]') # 截圖 page.ele('xpath://*[@id="captchadiv"]').get_screenshot(path='./captcha.jpg') time.sleep(5) # 如果驗證失敗 if '驗證' in page.ele('xpath:/html/body/div[6]/div/div[2]').text: print('滑動失敗,重新整理滑塊') page.ele('xpath://*[@id="captchadiv"]/div[1]/img').click() else: print("滑動成功") break print('開始採集')