前言
本文的文字及圖片來源於網路,僅供學習、交流使用,不具有任何商業用途,版權歸原作者所有,如有問題請及時聯絡我們以作處理
專案目標
爬取酷燃網視訊資料
https://krcom.cn/
環境
Python3.6
pycharm
爬蟲程式碼
import pprint import requests import re def download_video(title, url): filename_video = 'C:\\Users\\Administrator\\Desktop\\酷燃網\\' + title + '.mp4' response_video = requests.get(url=url) with open(filename_video, mode='wb') as f: f.write(response_video.content) def download_mp3(title, url): filename_mp3 = 'C:\\Users\\Administrator\\Desktop\\酷燃網\\' + title + '.mp3' response_mp3 = requests.get(url=url) with open(filename_mp3, mode='wb') as f: f.write(response_mp3.content) for page in range(0, 101, 20): url = 'https://krcom.cn/aj/hot/loadingmore?ajwvr=6&cursor=0;2020102014&YmdH=&__rnd=1603176486876' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } response = requests.get(url=url, headers=headers) html_data = response.text.encode('utf-8').decode('unicode_escape') urls = re.findall('vid=(.*?)\"', html_data, re.S) titles = re.findall('<h3 class="V_autocut_2l">(.*?)<', html_data, re.S) data = zip(urls, titles) for i in data: vid = i[0] title = i[1] page_url = 'https://krcom.cn/aj/dash/media?media_ids={}&protocols=dash&watermarks=krcom'.format(vid) response_2 = requests.get(url=page_url, headers=headers) html_json = response_2.json() video_url = html_json['data']['list'][0]['details'][1]['play_info']['url'] mp3_url = html_json['data']['list'][0]['details'][-1]['play_info']['url'] download_video(title, video_url) download_mp3(title, mp3_url) print(title)