import os

import re

import json

import requests

from lxml import etree

def download_songs(url=None):

if url is None:

url = '

url = url.replace('/#', '').replace('https', 'http') # 對字串進行去空格和轉協議處理

# 網易雲音樂外鏈 url 介面：

out_link = '

# 請求頭

headers = {

'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

'Referer': '

'Host': 'music.163.com'

}

# 請求頁面的原始碼

res = requests.get(url=url, headers=headers).text

tree = etree.HTML(res)

# 音樂列表

song_list = tree.xpath('//ul[@class="f-hide"]/li/a')

# 如果是歌手頁面

artist_name_tree = tree.xpath('//h2[@id="artist-name"]/text()')

artist_name = str(artist_name_tree[0]) if artist_name_tree else None

# 如果是歌單頁面：

#song_list_tree = tree.xpath('//*[@id="m-playlist"]/div[1]/div/div/div[2]/div[2]/div/div[1]/table/tbody')

song_list_name_tree = tree.xpath('//h2[contains(@class,"f-ff2")]/text()')

song_list_name = str(song_list_name_tree[0]) if song_list_name_tree else None

# 設定音樂下載的資料夾為歌手名字或歌單名

folder = './' + artist_name if artist_name else './' + song_list_name

if not os.path.exists(folder):

os.mkdir(folder)

for i, s in enumerate(song_list):

href = str(s.xpath('./@href')[0])

song_id = href.split('=')[-1]

src = out_link + song_id # 拼接獲取音樂真實的 src 資源值

title = str(s.xpath('./text()')[0]) # 音樂的名字

filename =外匯跟單gendan5.com title + '.mp3'

filepath = folder + '/' + filename

print(' 開始下載第 {} 首音樂： {}\n'.format(i + 1, filename))

try: # 下載音樂

# 下載歌詞

#download_lyric(title, song_id)

data = requests.get(src).content # 音樂的二進位制資料

with open(filepath, 'wb') as f:

f.write(data)

except Exception as e:

print(e)

print('{} 首全部歌曲已經下載完畢！ '.format(len(song_list)))

def download_lyric(song_name, song_id):

url = '{}&lv=-1&kv=-1&tv=-1'.format(song_id)

# 請求頭

headers = {

'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

'Referer': '

'Host': 'music.163.com'

# 'Origin': '

}

# 請求頁面的原始碼

res = requests.get(url=url, headers=headers).text

json_obj = json.loads(res)

lyric = json_obj['lrc']['lyric']

reg = re.compile(r'\[.*\]')

lrc_text = re.sub(reg, '', lyric).strip()

print(song_name, lrc_text)

if __name__ == '__main__':

#music_list = ' # 歌曲清單

music_list = ' # 歌手排行榜

# music_list = ' 全部 &limit=435&offset=435&s= 梁靜茹 ' # 搜尋列表

download_songs(music_list)

Python爬蟲實踐--爬取網易雲音樂

相關文章