因為我的python版本為3.12
所以安裝一些軟體包命令 與之前有些許不同
pip install beautifulSoup4
pip install demjson3
pip install requests
話不多說 程式碼奉上
import json import demjson3 import requests from bs4 import BeautifulSoup import csv headers = { 'Host': 'www.beijing.gov.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate', 'Content-Type': 'text/json', 'X-Requested-With': 'XMLHttpRequest', 'Content-Length': '155', 'Origin': 'http://www.beijing.gov.cn', 'Connection': 'keep-alive', 'Referer': 'http://www.beijing.gov.cn/hudong/hdjl/' } if __name__ == "__main__": page = 1 datas = json.dumps({}) while page < 175: print(page) url = f"https://www.beijing.gov.cn/hudong/hdjl/sindex/bjah-index-hdjl!replyLetterListJson.action?page.pageNo={page}&page.pageSize=6&orgtitleLength=26" r = requests.post(url, data=datas, headers=headers) rr = demjson3.decode(r.text); for item in rr.get("result", []): originalId = item.get("originalId") # 編號 letterTypeName = item.get("letterTypeName") # 信件型別 # 構建詳情頁URL detail_url = f"http://www.beijing.gov.cn/hudong/hdjl/com.web.{('consult' if letterTypeName == '諮詢' else 'suggest')}.{('consultDetail' if letterTypeName == '諮詢' else 'suggesDetail')}.flow?originalId={originalId}" r1 = requests.get(detail_url, headers={'user-agent': 'Mozilla/5.0'}) if r1.status_code == 200: demo = r1.text soup = BeautifulSoup(demo, "html.parser") title = soup.find("strong").get_text().replace("\n", "") if soup.find("strong") else "" fromPeople = soup.find_all("div", {"class": "col-xs-10 col-lg-3 col-sm-3 col-md-4 text-muted"})[0].get_text().lstrip('來信人:').lstrip().rstrip() if soup.find_all("div", {"class": "col-xs-10 col-lg-3 col-sm-3 col-md-4 text-muted"}) else "" fromTime = soup.find_all("div", {"class": "col-xs-5 col-lg-3 col-sm-3 col-md-3 text-muted"})[0].get_text().lstrip('時間:') if soup.find_all("div", {"class": "col-xs-5 col-lg-3 col-sm-3 col-md-3 text-muted"}) else "" problem = soup.find_all("div", {"class": "col-xs-12 col-md-12 column p-2 text-muted mx-2"})[0].get_text().lstrip().rstrip().replace("\r", "").replace("\n", "") if soup.find_all("div", {"class", "col-xs-12 col-md-12 column p-2 text-muted mx-2"}) else "" office = soup.find_all("div", {"class": "col-xs-9 col-sm-7 col-md-5 o-font4 my-2"})[0].get_text().replace("\n", "") if soup.find_all("div", {"class": "col-xs-9 col-sm-7 col-md-5 o-font4 my-2"}) else "" answerTime = soup.find_all("div", {"class": "col-xs-12 col-sm-3 col-md-3 my-2"})[0].get_text().lstrip('答覆時間:') if soup.find_all("div", {"class": "col-xs-12 col-sm-3 col-md-3 my-2"}) else "" answer = soup.find_all("div", {"class": "col-xs-12 col-md-12 column p-4 text-muted my-3"})[0].get_text().lstrip().rstrip().replace("\n", "").replace("\r", "") if soup.find_all("div", {"class": "col-xs-12 col-md-12 column p-4 text-muted my-3"}) else "" itemm = f"{originalId}|{letterTypeName}|{title}|{fromPeople}|{fromTime}|{problem}|{office}|{answerTime}|{answer}" with open("yijian.txt", 'a', encoding='utf-8') as fp: fp.write(itemm + '\n') else: print(f"Failed to retrieve details for ID: {originalId}") page += 1