from bs4 import BeautifulSoup
import requests
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
for i in range(0, 1330, 35):
print(i)
time.sleep(2)
url = ' 華語 &order=hot&limit=35&offset=' + str(i)# 修改這裡即可
response = requests.get(url=url, headers=headers)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# 獲取包含歌單詳情頁網址的標籤
ids = soup.select('.dec a')
# 獲取包含歌單索引頁資訊的標籤
lis = soup.select('#m-pl-container li')
print(len(lis))
for j in range(len(lis)):
# 獲取歌單詳情頁地址
url = ids[j]['href']
# 獲取歌單標題
title = ids[j]['title']
# 獲取歌單播放量
play = lis[j].select('.nb')[0].get_text()
# 獲取歌單貢獻者名字
user = lis[j].select('p')[1].select('a')[0].get_text()
# 輸出歌單索引頁資訊
print(url, title, play, user)
# 將資訊寫入 CSV 檔案中
with open('playlist.csv', 'a+', encoding='utf-8-sig') as f:
f.write(url + ',' + title + ',' + play + ',' + user + '\n')
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time
df = pd.read_csv('playlist.csv', header=None, error_bad_lines=False, names=['url', 'title', 'play', 'user'])
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
for i in df['url']:
time.sleep(2)
url = ' + i
response = requests.get(url=url, headers=headers)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
# 獲取歌單標題
title = soup.select('h2')[0].get_text().replace(',', ' , ')
# 獲取標籤
tags = []
tags_message = soup.select('.u-tag i')
for p in tags_message:
tags.append(p.get_text())
# 對標籤進行格式化
if len(tags) > 1:
tag = '-'.join(tags)
else:
tag = tags[0]
# 獲取歌單介紹
if soup.select('#album-desc-more'):
text = soup.select('#album-desc-more')[0].get_text().replace('\n', '').replace(',', ' , ')
else:
text = ' 無 '
# 獲取歌單收藏量
collection = soup.select('#content-operation i')[1].get_text().replace('(', '').replace(')', '')
# 歌單播放量
play = 外匯跟單gendan5.comsoup.select('.s-fc6')[0].get_text()
# 歌單內歌曲數
songs = soup.select('#playlist-track-count')[0].get_text()
# 歌單評論數
comments = soup.select('#cnt_comment_count')[0].get_text()
# 輸出歌單詳情頁資訊
print(title, tag, text, collection, play, songs, comments)
# 將詳情頁資訊寫入 CSV 檔案中
with open('music_message.csv', 'a+', encoding='utf-8') as f:
# f.write(title + '/' + tag + '/' + text + '/' + collection + '/' + play + '/' + songs + '/' + comments + '\n')
f.write(title + ',' + tag + ',' + text + ',' + collection + ',' + play + ',' + songs + ',' + comments + '\n')
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2931002/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- Python模組(module)
- SICP:符號求導、集合表示和Huffman樹(Python實現)
- Python Kconfiglib初次學習
- 虛擬現實 VR 碰撞 3D 視覺化,圖撲打造一體化管控平臺
- 嘉倫諮詢:後疫情時代家庭投資理財策略分析及資產配置最佳化建議(附下載)
- 簡單介紹python中使用正規表示式的方法
- POSTGRESQL 小版本升級失敗後的原因分析
- 故障分析 | MySQL死鎖案例分析
- 一篇文章帶你瞭解高可用架構分析
- 【驗證碼逆向專欄】某驗四代滑塊驗證碼逆向分析
- 從編譯到可執行,eBPF 加速容器網路的原理分析 | 龍蜥技術
- python如何求最大值?常用方法有哪些?
- 一屏統管 智慧交管三維視覺化大屏雲控系統
- acw_sc__v2引數生成分析並python實現演算法