# coding:utf-8 import time import random from time import sleep from csv import writer from selenium import webdriver from selenium.webdriver.common.by import By from chaojiying import Chaojiying_Client from selenium.webdriver import ActionChains driver = webdriver.Chrome() #開啟網頁 def open_web(search_name): driver.get("https://www.izaiwen.cn/pro/sonE-stwE08?psnname={}".format(search_name)) time.sleep(6) #載入cookie # with open("cookies.json", "r") as file: # cookies = json.load(file) cookies = [{'name': 'HMACCOUNT', 'value': 'xxxxxxxxx'}, {'name': 'heiheihei', 'value': 'xxxxxxxxx'}, {'name': 'hahaha', 'value': 'xxxxxxx'}, {'name': '_', 'value': 'xxxxxxxxxxx'}, {'name': 'acw', 'value': 'xxxxxxxxxxxx'}, {'name': 'cer', 'value': 'xxxxxxxxxx'}, {'name': 'ession', 'value': 'xxxxxxxxxx'}, {'name': 'userId', 'value': 'xxxxx'}, {'name': 'uuid', 'value': 'xxxxxxxxxxxxxxxxxxx'}, ] for cookie in cookies: driver.add_cookie(cookie) time.sleep(5) driver.refresh() time.sleep(random.randrange(5,10)) #檢測是否在驗證碼頁面 def check_condition(): header=driver.title#提取頁面標題 # header=driver.find_element(By.XPATH,'/html/head/title').text 此方法無效,只能取出空字串 print(header) if header == '請完成安全驗證': #如果在驗證碼頁面返回f值 return 'f' else: return 't' #基於xpath定位標籤獲取資料 def get_information(): parent_element=driver.find_elements(By.XPATH,'.//div[@class="item-box layui-card "]') for child_element in parent_element: target_element=child_element.find_elements(By.XPATH,'.//div[@class="layui-col-xs4"]') print(name) info='' for n in target_element: info+=n.text#提取標籤中的資料 info+=',' print(info) list_data=[name,info] #儲存資料 with open("資訊.csv", "a", newline="") as f_object: writer_object = writer(f_object) writer_object.writerow(list_data) time.sleep(5) #自動識別驗證碼並提交至超級鷹打碼平臺識別 def anti_anti_spider(): #找到包含驗證碼的元素 img=driver.find_element(By.XPATH,'.//div[@id="aliyunCaptcha-window-embed"]') #對此元素進行截圖 img.screenshot('D:/SeleniumX/yzm.png') #由於新版本的selenium的點選定位是從元素中心點開始,因此計算元素的尺寸來使點選從左上角開始 img_half_width = float(img.rect['width'])/2 img_half_height = float(img.rect['height'])/2 #初始化超級鷹程式碼,需要從其官網下載程式碼放到此檔案相同資料夾中並匯入 chaojiying = Chaojiying_Client('', '', '')#賬號,密碼,軟體ID #提交到平臺並獲得結果 im = open('D:/SeleniumX/yzm.png', 'rb').read() yzm_result=chaojiying.PostPic(im, 9101)['pic_str'] time.sleep(10) print(yzm_result) # for index in result.split('|'): #以"|"進行分割,得到一個列表,並迴圈出每一個字的座標,在這裡因為只返回一個結果所以不需要 x = float(yzm_result.split(',')[0]) # 得到x軸的座標 y = float(yzm_result.split(',')[1]) # 得到y軸的座標 #使用動作鏈模擬點選操作 action = ActionChains(driver) #建立動作鏈,y).click().perform() action.move_to_element_with_offset(img,x-img_half_width,y-img_half_height).click().perform() time.sleep(10) #主程式 list_name=[]#需要爬取的人名,用於構建頁面url for name in list_name: open_web(name)#開啟該網頁 flag=check_condition()#檢測是否觸發了驗證碼 print(flag) if flag == 'f':#若觸發了驗證碼,開始識別並點選驗證碼 time.sleep(30) anti_anti_spider() time.sleep(15) get_information() else: time.sleep(5) get_information() time.sleep(random.randrange(10,30)) print(name)