視訊地址 點這裡
豆瓣讀書列表採集程式碼
import requests
from requests.structures import CaseInsensitiveDict
from lxml import etree
import csv
book_info = []
page = 1
while 1:
page_url = f"https://book.douban.com/tag/%E7%BC%96%E7%A8%8B?start={(page-1)*20}&type=T"
print(page_url)
headers = CaseInsensitiveDict()
headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36"
res = requests.get(page_url, headers=headers)
sel = etree.HTML(res.text)
for block in sel.xpath("//li[@class='subject-item']"):
title = ''
elem_title = block.xpath('.//h2/a')
if elem_title:
title = ''.join(elem_title[0].itertext()).replace('\n', '').replace(' ', '')
price = -1
elem_price = block.xpath(".//span[@class='buy-info']/a/text()")
if elem_price:
s = elem_price[0].strip()
price = float(s[s.find('版')+2:s.find('元')])
cover = ''
elem_cover = block.xpath(".//img/@src")
if elem_cover:
cover = elem_cover[0]
book_info.append([title, cover, price])
page += 1
if page > 5:
break
# windows下要傳 encoding newline
# 不傳 encoding 預設 gbk 直接亂碼
# 不傳 newline 預設 \n 寫一行空一行
with open('book.csv', 'w', encoding ='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerows(book_info)
B站熱門列表採集程式碼
import requests
import csv
hot_list = []
page = 1
while 1:
page_url = f"https://api.bilibili.com/x/web-interface/popular?ps=20&pn={page}"
print(page_url)
res = requests.get(page_url)
json_content = res.json()
for item in json_content['data']['list']:
hot_list.append([
item['bvid'],
item['pic'],
item['title'],
item['owner']['name']
])
page += 1
if page > 11:
break
with open('bilibili.csv', 'w', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerows(hot_list)
本作品採用《CC 協議》,轉載必須註明作者和本文連結