import requests
import re
from requests.exceptions import RequestException
import sys
import json
url='https://2018.cn/m/index.php?mod=category&catid=1&page='
headers={'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36',
'Cookie':'Cookie: EMARTICLE1=null; EMARTICLE2=null; EMARTICLE0=null; EMARTICLE3=05-14%2013%3A27%3A33@%23%24%u5168%u53A6%u95E8%u4E0A%u95E8%u56DE%u6536%u9EC4%u91D1%20%u6E56%u91CC%20%u601D%u660E%20%u6D77%u6CA7%20%u7FD4%u5B89%20%u540C%u5B89%20%u96C6%u7F8E%20%u674F%u6797%20-%20%u53A6%u95E8%u7269%u54C1%u56DE%u6536%20-%20%u53A6%u95E82018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4405759; EMARTICLE4=05-14%2013%3A28%3A33@%23%24%u4E8C%u624B%u949B%u5408%u91D1%u5C55%u67DC%u56DE%u6536%u5DE5%u827A%u54C1%u6536%u8D2D%u73E0%u5B9D%u5C55%u793A%u67DC%u4ED3%u5E93%u5C55%u793A%u67DC%u56DE%u6536%u5B9E%u6728%u67DC%u53F0%20-%20%u5317%u4EAC%u7269%u54C1%u56DE%u6536%20-%20%u5317%u4EAC2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4448010; EMARTICLE5=05-14%2013%3A29%3A19@%23%24%u5168%u5317%u4EAC%u9AD8%u4EF7%u4E0A%u95E8%u56DE%u6536%u529E%u516C%u5BB6%u5177%u4E2D%u592E%u7A7A%u8C03%u56DE%u6536%u529E%u516C%u7528%u54C1%20-%20%u5317%u4EAC%u5BB6%u5177/%u529E%u516C%u5BB6%u5177%20-%20%u5317%u4EAC2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4448013; EMARTICLE7=05-14%2019%3A39%3A05@%23%24%u5E93%u535A%u58EB%u71C3%u6C14%u7076%u79E6%u7687%u5C9B%u7EF4%u4FEE%u7535%u8BDD%u591A%u5C11%uFF1F%u7EF4%u4FEE%u5E08%u508524%u5C0F%u65F6%u4E0A%u95E8%u7EF4%u4FEE%20-%20%u79E6%u7687%u5C9B%u5176%u4ED6%u54C1%u724C%20-%20%u79E6%u7687%u5C9B2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456631; EMARTICLE8=05-14%2013%3A34%3A58@%23%24%u65E0%u9521%u535A%u4E16%u6EDA%u7B52%u6D17%u8863%u673A%u5168%u5E02%u7EDF%u4E00%u670D%u52A1%u7F51%u70B924%u5C0F%u65F6%u62A5%u4FEE%u7535%u8BDD%20-%20%u65E0%u9521%u6D17%u8863%u673A%u7EF4%u4FEE%20-%20%u65E0%u95212018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456634; EMARTICLE9=05-14 19:39:11@#$%u4E1C%u839E%u4E1C%u57CE%u5E9F%u94C1%u5E9F%u94A2%u56DE%u6536%u516C%u53F8%u9AD8%u4EF7%u56DE%u6536%u5851%u80F6%u5E9F%u6A21%u5177%u62A5%u4EF7%20-%20%u4E1C%u839E%u7269%u54C1%u56DE%u6536%20-%20%u4E1C%u839E2018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456318; EMARTICLE6=05-15 20:31:48@#$%u5409%u6797%u5E02%u5404%u79CD%u9AD8%u4F4E%u7AEF%u914D%u7F6E%u7535%u8111%u56DE%u6536%u8BC4%u4F30%u4E0A%u95E8%u53D6%u8D27%u4EE5%u4E00%u6761%u9F99%20-%20%u5409%u6797%u7269%u54C1%u56DE%u6536%20-%20%u5409%u67972018%u4FE1%u606F%u6E2F%u624B%u673A%u7248@%23%24https%3A//2018.cn/m/index.php%3Fmod%3Dinformation%26id%3D4456630; UM_distinctid=17208040fa497-05e8cc9963ce1-d373666-1fa400-17208040fa5564; CNZZDATA1253450280=2048944961-1589270953-https%253A%252F%252Fwww.baidu.com%252F%7C1589545383'}
def grt_one_page(url):
try:
response = requests.get(url,headers=headers)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def parse_one_page(html):
pattern = re.compile(r'<li class="">.*?<a\shref="(.*?)(\d{7})">.*?<img\sclass="thumbnail"\ssrc="(.*?)"\salt="([\u4E00-\u9FA5]+)</strong>">.*?price">.*?(\d+).*?</li>',re.S)
items=re.findall(pattern,html)
i=0
for item in items:
i=i+1
if i==4:break
yield {
'link': item[0]+item[1],
'goods_id':item[1],
'good_small_log':item[2],
'goods_name':item[3].strip(),
'goods_price':item[4]
}
return items
def write_to_file(content):
with open('F://result.json','a',encoding='utf-8') as f:
f.write(json.dumps(content,ensure_ascii=False))
if __name__ == '__main__':
for i in range(1,5):
al=str(i)
html = grt_one_page(url+al)
results=parse_one_page(html)
for result in results:
write_to_file(result)
print(result)
本作品採用《CC 協議》,轉載必須註明作者和本文連結