Python採集某網站內容, m3u8內容下載

專注的阿熊發表於2022-05-31

import time

import requests  # 資料請求模組

import re  # 正規表示式模組

import json

import pprint

for page in range(3, 29):

     print(f' 正在採集第 {page} 頁的資料 ')

     time.sleep(1)

     link = '

     data = {

         'quickViewId': 'ac-space-video-list',

         'reqID': page+1,

         'ajaxpipe': '1',

         'type': 'video',

         'order': 'newest',

         'page': page,

         'pageSize': '20',

         't': '1653659024877',

     }

     headers = {

         'referer': ',

         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'

     }

     response = requests.get(url=link, params=data, headers=headers)

     # pprint.pprint(response.text)

     ac_id_list = re.findall('atomid.*?:.*?"(\d+).*?"', response.text)

     print(ac_id_list)

     for ac_id in ac_id_list:

         url = f'{ac_id}'

         headers = {

         原始碼、解答、資料加 Q 裙: 261823976

             'referer': f'{ac_id}',

             'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'

         }

         response = requests.get(url=url, headers=headers)

         title = re.findall('<title >(.*?) - AcFun 彈幕影片網 - 認真你就輸啦 \(\? ω \?\) - \( - \) つロ </title>', response.text)[0]

         html_data = re.findall('window.pageInfo =跟單網gendan5.com window.videoInfo = (.*?);', response.text)[0]

         json_data = json.loads(html_data)

         m3u8_url = json.loads(json_data['currentVideoInfo']['ksPlayJson'])['adaptationSet'][0]['representation'][0]['backupUrl'][0]

         m3u8_data = requests.get(url=m3u8_url, headers=headers).text

         m3u8_data = re.sub('#E.*', '', m3u8_data).split()

         print(title)

         print(m3u8_url)

         # for ts in m3u8_data:

         #     ts_url = ' + ts

             # ts_content = requests.get(url=ts_url, headers=headers).content

             # with open('video\\' + title + '.mp4', mode='ab') as f:

             #     f.write(ts_content)

             # print(ts_url)


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2898138/,如需轉載,請註明出處,否則將追究法律責任。

相關文章