python爬蟲實戰,爬蟲之路,永無止境

專注的阿熊發表於2022-01-27

# !/usr/bin/nev python

# -*-coding:utf8-*-

from threading import Thread

import tkinter as tk

import os, requests, jsonpath

from requests_html import HTMLSession

session = HTMLSession()

class kuwoSpider(object):

     def __init__(self):

         """ 定義視覺化視窗,並設定視窗和主題大小布局 """

         self.window = tk.Tk()

         self.window.title(' 音樂下載器 ')

         self.window.geometry('800x600')

         """ 建立 label_user 按鈕,與說明書 """

         self.label_user = tk.Label(self.window, text=' 請輸入要下載的歌手名字: ',

                                    font=('Arial', 12), width=130, height=2)

         self.label_user.pack()

         """ 建立 label_user 關聯輸入 """

         self.entry_user = tk.Entry(self.window, show=None, font=('Arial', 14))

         self.entry_user.pack(after=self.label_user)

         """ 建立 label_passwd 按鈕,與說明書 """

         self.label_passwd = tk.Label(self.window, text=" 請輸入爬取頁數 : ", font=('Arial', 12), width=30, height=2)

         self.label_passwd.pack()

         """ 建立 label_passwd 關聯輸入 """

         self.entry_passwd = tk.Entry(self.window, show=None, font=('Arial', 14))

         self.entry_passwd.pack(after=self.label_passwd)

         """ 建立 Text 富文字框,用於按鈕操作結果的展示 """

         self.text1 = tk.Text(self.window, font=('Arial', 12), width=85, height=22)

         self.text1.pack()

         """ 定義按鈕 1 ,繫結觸發事件方法 """

         self.button_1 = tk.Button(self.window, text=' 爬取 ', font=('Arial', 12), width=10, height=1,

                                   command=self.run)

         self.button_1.pack(before=self.text1)

         """ 定義按鈕 2 ,繫結觸發事件方法 """

         self.button_2 = tk.Button(self.window, text=' 清除 ', font=('Arial', 12), width=10, height=1,

                                   command=self.parse_hit_click_2)

         self.button_2.pack(anchor="e")

     def run(self):

         Thread(target=self.parse_hit_click_1).start()

     def parse_hit_click_1(self):

         """ 定義觸發事件 1, 呼叫 main 函式 """

         singer_name = self.entry_user.get()

         page = int(self.entry_passwd.get())

         self.main(singer_name, page)

     def main(self, singer_name, page):

         headers = {

             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'

                           '87.0.4280.88 Safari/537.36',

             'Referer': 請用自己的 ,

             'Cookie': 請用自己的 ,

             'csrf': 請用自己的 ,

             'Host': '

         }

         for i in range(page):

             start_url = f'{singer_name}&pn=' \

                         f'{i + 1}&rn=30&httpsStatus=1&reqId=d301af60-6e1e-11ec-840f-dfca3a2ceb68'

             response = requests.get(start_url, headers=headers).json()

             # 解析得到 song_names, song_rids

             song_names = jsonpath.jsonpath(response, '$..name')

             song_rids = jsonpath.jsonpath(response, '$..rid')

             # 遍歷得到 song_name, song_rid 構造 song_info_url

             for song_name, 外匯跟單gendan5.comsong_rid in zip(song_names, song_rids):

                 song_info_url = '{}' \

'&type=music&httpsStatus=1&reqId=c0ac92a0-6e35-11ec-b428-05be0a87bc11'.format(song_rid)

                 # 請求得到 song_info

                 try:

                     song_info = requests.get(song_info_url, headers=headers).json()

                     # 解析得到 song_url

                     song_url = jsonpath.jsonpath(song_info, '$..url')[0]

                     # 請求得到 song_content

                     try:

                         song_content = requests.get(song_url).content

                     except Exception as e:

                         continue

                     # 建立資料夾

                     if not os.path.exists('./{}'.format(singer_name)):

                         os.mkdir('./{}'.format(singer_name))

                     # 儲存資料

                     try:

                         with open('./{}/{}.mp3'.format(singer_name, song_name), 'wb')as f:

                             f.write(song_content)

                             self.text1.insert("insert", '****{}**** 下載成功 '.format(song_name))

                             self.text1.insert("insert", '\n ')

                             self.text1.insert("insert", '\n ')

                     except Exception as e:

                         continue

                 except Exception as e:

                     continue

     def parse_hit_click_2(self):

         """ 定義觸發事件 2 ,刪除文字框中內容 """

         self.entry_user.delete(0, "end")

         self.entry_passwd.delete(0, "end")

         self.text1.delete("1.0", "end")

     def center(self):

         """ 建立視窗居中函式方法 """

         ws = self.window.winfo_screenwidth()

         hs = self.window.winfo_screenheight()

         x = int((ws / 2) - (800 / 2))

         y = int((hs / 2) - (600 / 2))

         self.window.geometry('{}x{}+{}+{}'.format(800, 600, x, y))

     def run_loop(self):

         """ 禁止修改窗體大小規格 """

         self.window.resizable(False, False)

         """ 視窗居中 """

         self.center()

         """ 視窗維持 -- 持久化 """

         self.window.mainloop()

if __name__ == '__main__':

     b = kuwoSpider()

     b.run_loop()


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2854295/,如需轉載,請註明出處,否則將追究法律責任。

相關文章