對顯示搜尋結果的網址傳送請求
import requests url = 'https://www.dianping.com/search/keyword/150/0_%E6%84%8F%E9%9D%A2' headers = { 'Cookie':'baiduappugc_ab=ugcdetail%3AA%3A1; _lxsdk_cuid=18fee40b7a3c8-005f5aa16f3f6f-26001c51-144000-18fee40b7a37e; _lxsdk=18fee40b7a3c8-005f5aa16f3f6f-26001c51-144000-18fee40b7a37e; _hc.v=dad10692-6e0c-4402-5850-9256da170739.1717689171; fspop=test; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1719247152; s_ViewType=10; WEBDFPID=69y55w298vw15vvy1zw2734zu370z9288095y8v097u97958v3u94896-2034607213206-1719247213206OOSGICKfd79fef3d01d5e9aadc18ccd4d0c95072558; qruuid=e15e73da-9b7e-47ba-860c-22c7789f6580; dper=02023625f81edb23ecbb0420185188f1dde6080fbba05896da553ead0fc74a16e8fc188e79d114ca9c696820d09910f5ea932f09015d590fc94500000000fb200000f08fa513af0aa991e42172c624e809f0f396f2a8ab764fe6daa2d1c7baecdb3df371a116ac704b96c72a511bba45c3ca; ll=7fd06e815b796be3df069dec7836c3df; cy=150; cye=jining; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1719247398; _lxsdk_s=1904b1da218-9f6-f23-939%7C%7C135', 'Host':'www.dianping.com', # 防盜鏈 告訴瀏覽器請求的url地址從哪裡來 'Referer':'https://www.dianping.com/search/keyword/150/0_%E6%84%8F%E9%9D%A2', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36' } response = requests.get(url, headers=headers) # print(response.text)
解析出店家詳情頁資訊:
import parsel # 解析資料 selector1 = parsel.Selector(response.text) # 透過css選擇獲取所有詳情頁url地址 href = selector1.css('.shop-list ul li .pic a::attr(href)').getall()
遍歷詳情頁列表,提取店家詳情資訊:
for index in href: # 對詳情頁傳送請求 html_data = requests.get(url=index, headers=headers) # 解析資料 selector2 = parsel.Selector(html_data.text) name = selector2.xpath('//*[@id="basic-info"]/h1/text()').get() # score = selector2.xpath('//*[@id="basic-info"]/div[1]/div/div[2]').getall() print(name)