☀️SVG對映反爬示例練習⚡直接提取SVG文字圖片的文字⚡

專注的阿熊發表於2021-09-08

import re

from requests_html import HTMLSession

from selenium import webdriver

from bisect import bisect

def parseAndReplaceSvgNode(d_tags):

     for d_tag in d_tags:

         position = d_tag.value_of_css_property("background-position")

         x, y = map(int, re.findall("\d+", position))

         num = data[bisect(ys, y)][bisect(xs, x)]

         # 替換節點為普通文字

         browser.execute_script(f"""

             var element = arguments[0];

             element.parentNode.replaceChild(document.createTextNode("{num}"), element);

         """, d_tag)

browser = webdriver.Chrome()

url = '

browser.get(url)

d_tag = browser.find_element_by_css_selector('d[class^="vhk"]')

background_image_url =外匯跟單gendan5.com d_tag.value_of_css_property("background-image")

svg_url = background_image_url[5:-2]

session = HTMLSession()

html_session = session.get(svg_url)

xs = []

ys = []

data = []

for text_tag in html_session.html.xpath(r"//text"):

     if not xs:

         xs.extend(map(int, text_tag.xpath(".//@x")[0].split()))

     ys.append(int(text_tag.xpath(".//@y")[0]))

     data.append(list(text_tag.xpath(".//text()")[0]))

# 一次性替換掉整個 DOM 中所有的 svg 節點為對應的文字

parseAndReplaceSvgNode(

     browser.find_elements_by_css_selector('d[class^="vhk"]'))

# 刪除 a 標籤

element = browser.find_element_by_css_selector('.title a')

browser.execute_script("""

var element = arguments[0];

element.parentNode.removeChild(element);

""", element)

# 獲取標題

title = browser.find_element_by_class_name("title").text

# 獲取評論

comment = browser.find_element_by_class_name("comments").text

# 人均

avgPrice = browser.find_element_by_class_name('avgPriceTitle').text

# 口味、環境、服務

comment_score_tags = browser.find_elements_by_css_selector(

     ".comment_score .item")

taste = comment_score_tags[0].text

environment = comment_score_tags[1].text

service = comment_score_tags[2].text

# 地址

address = browser.find_element_by_css_selector('.address .address_detail').text

# 特色

characteristic = browser.find_element_by_css_selector(

     '.characteristic .info-name').text

# 電話

phone = browser.find_element_by_class_name("more").text

print(title, comment, avgPrice, taste, environment,

       service, address, characteristic, phone)


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2791096/,如需轉載,請註明出處,否則將追究法律責任。

相關文章