python 爬取飄花電影 下載地址

Conan_ft發表於2020-11-22

import requests
from lxml import etree
from lxml import html
#from lxml.html import fromstring, tostring
url = “https://www.piaohua.com/html/kehuan/”
class Spider(object):
def start_requests(self):
response = requests.get(url)
#response = requests.get(url,headers = headers)
#print(response.encoding)
response.encoding = ‘utf-8’
#print(response.text)
xml = etree.HTML(response.text)

    film_tit_list = xml.xpath('//li[@class="col-md-6"]/div[@class="txt"]/h3/a/b/font/text()')
    film_src_list = xml.xpath('//li[@class="col-md-6"]/div[@class="txt"]/h3/a/@href')
    #print(film_tit_list,film_src_list)
    for tit_list,src_list in zip(film_tit_list,film_src_list):
        final_src_list = "https://www.piaohua.com" + src_list
        #print(tit_list,src_list)
        print(tit_list,final_src_list)
        self.next_file(tit_list,src_list)
def next_file(self,tit_list,src_list):
    response = requests.get("https://www.piaohua.com" + src_list)
    response.encoding = 'utf-8' 
    xml = etree.HTML(response.text)  
    magnet = xml.xpath('//div[@class="bot"]/a/text()')
    content = xml.xpath('//div[@class="article"]/p/text()')
    #content = "\n".join(xml.xpath('//div[@class="article"]/p/text()'))
    #print(last_tile) ,encoding="utf-8"
    for last_tit_list,last_src_list,last_url in zip(tit_list,src_list,magnet):
        print(last_tit_list,last_url,last_src_list)

spider = Spider()
spider.start_requests()

在這裡插入圖片描述

相關文章