爬蟲—有道翻譯案例分析

專注的阿熊發表於2021-09-03

#coding:utf-8

import requests

import hashlib

import time

import random

class Youdao(object):

     def __init__(self,word):

         self.url = '

         self.headers = {

             'Cookie': 'OUTFOX_SEARCH_USER_ID=-676104602@10.108.160.100; JSESSIONID=aaac40qHRBaDr_iGhSLUx; OUTFOX_SEARCH_USER_ID_NCOO=696608045.4734024; fanyi-ad-id=115021; fanyi-ad-closed=1; ___rl__test__cookies=1630572993167',

             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',

             'Referer': 'https: // fanyi.youdao.com /'

         }

         self.formdata = None

         self.word = word

     # 生成 formdata

     def generate_formdata(self):

         word = self.word

         """

         1. 分析表單中動態變化的引數 ( 多次抓包對比 )

         2. 全域性搜尋動態引數的欄位或者值

         3. 找出對應實現的 js 程式碼 , 打斷點分析

             ts: "" +外匯跟單gendan5.com (new Date).getTime(),

             salt: r + parseInt(10 * Math.random(), 10);

             sign: n.md5("fanyideskweb" + e + i + "Y2FYu%TNSbMCxc3t2u^XT") MD5 加密演算法

          """

         # 4. js 程式碼轉化為 python 程式碼實現

         # 獲取字串形式的時間戳

         lts = str(time.time()*1000)

         # 時間戳 + 隨機數

         salt = lts + str(random.randint(1,9))

         # md5 字串加密 32

         tempstr = "fanyideskweb" + self.word + salt + "Y2FYu%TNSbMCxc3t2u^XT"

         md5 = hashlib.md5()

         md5.update(tempstr.encode())

         sign = md5.hexdigest()

         # 構造表單字典

         self.formdata = {

             'i': word,

             'from': 'AUTO',

             'to': 'AUTO',

             'smartresult': 'dict',

             'client': 'fanyideskweb',

             'salt': salt,

             'sign': sign,

             'lts': lts,

             'bv': '89e18957825871c419be045180c67d3b',

             'doctype': 'json',

             'version': '2.1',

             'keyfrom': 'fanyi.web',

             'action': 'FY_BY_CLICKBUTTION'

         }

     def get_data(self):

         response = requests.post(url=self.url,headers=self.headers,data=self.formdata)

         return response.json()

     def run(self):

         # url

         # headers

         # formdata

         self.generate_formdata()

         # 傳送請求 , 獲取相應

         data = self.get_data()

         # 解析資料

         result = data['translateResult'][0][0]['tgt']

         print(result)

if __name__ == '__main__':

     word = input(' 請輸入需要翻譯的漢語 :')

     Youdao = Youdao(word)

     Youdao.run()


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2790349/,如需轉載,請註明出處,否則將追究法律責任。

相關文章