Python 大作業 網易雲歌單資料分析及視覺化(參考多位博主文章)

專注的阿熊 發表於 2023-01-05
Python 視覺化

from bs4 import BeautifulSoup  

import requests  

import time  

headers = {  

     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'  

}  

for i in range(0, 1330, 35):  

     print(i)  

     time.sleep(2)  

     url = ' 華語 &order=hot&limit=35&offset=' + str(i)# 修改這裡即可  

     response = requests.get(url=url, headers=headers)  

     html = response.text  

     soup = BeautifulSoup(html, 'html.parser')  

     # 獲取包含歌單詳情頁網址的標籤  

     ids = soup.select('.dec a')  

     # 獲取包含歌單索引頁資訊的標籤  

     lis = soup.select('#m-pl-container li')  

     print(len(lis))  

     for j in range(len(lis)):  

         # 獲取歌單詳情頁地址  

         url = ids[j]['href']  

         # 獲取歌單標題  

         title = ids[j]['title']  

         # 獲取歌單播放量  

         play = lis[j].select('.nb')[0].get_text()  

         # 獲取歌單貢獻者名字  

         user = lis[j].select('p')[1].select('a')[0].get_text()  

         # 輸出歌單索引頁資訊  

         print(url, title, play, user)  

         # 將資訊寫入 CSV 檔案中  

         with open('playlist.csv', 'a+', encoding='utf-8-sig') as f:  

             f.write(url + ',' + title + ',' + play + ',' + user + '\n')  

from bs4 import BeautifulSoup  

import pandas as pd  

import requests  

import time  

df = pd.read_csv('playlist.csv', header=None, error_bad_lines=False, names=['url', 'title', 'play', 'user'])  

headers = {  

     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'  

}  

for i in df['url']:  

     time.sleep(2)  

     url = ' + i  

     response = requests.get(url=url, headers=headers)  

     html = response.text  

     soup = BeautifulSoup(html, 'html.parser')  

     # 獲取歌單標題  

     title = soup.select('h2')[0].get_text().replace(',', ' ')  

     # 獲取標籤  

     tags = []  

     tags_message = soup.select('.u-tag i')  

     for p in tags_message:  

         tags.append(p.get_text())  

     # 對標籤進行格式化  

     if len(tags) > 1:  

         tag = '-'.join(tags)  

     else:  

         tag = tags[0]  

     # 獲取歌單介紹  

     if soup.select('#album-desc-more'):  

         text = soup.select('#album-desc-more')[0].get_text().replace('\n', '').replace(',', ' ')  

     else:  

         text = ' '  

     # 獲取歌單收藏量  

     collection = soup.select('#content-operation i')[1].get_text().replace('(', '').replace(')', '')  

     # 歌單播放量  

     play = 外匯跟單gendan5.comsoup.select('.s-fc6')[0].get_text()  

     # 歌單內歌曲數  

     songs = soup.select('#playlist-track-count')[0].get_text()  

     # 歌單評論數  

     comments = soup.select('#cnt_comment_count')[0].get_text()  

     # 輸出歌單詳情頁資訊  

     print(title, tag, text, collection, play, songs, comments)  

     # 將詳情頁資訊寫入 CSV 檔案中  

     with open('music_message.csv', 'a+', encoding='utf-8') as f:  

         # f.write(title + '/' + tag + '/' + text + '/' + collection + '/' + play + '/' + songs + '/' + comments + '\n')  

         f.write(title + ',' + tag + ',' + text + ',' + collection + ',' + play + ',' + songs + ',' + comments + '\n')  


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2931002/,如需轉載,請註明出處,否則將追究法律責任。