爬蟲01:爬取豆瓣電影TOP 250基本資訊

冷淡的蛋黃醬發表於2020-12-29
# coding:utf-8
from urllib import request
from lxml import etree
import urllib

def page (i):
    url = 'https://movie.douban.com/top250?start=%d&filter=' % (25*i)
    head ={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1;WOW64) AppleWebKit/537.36 (KHTML,like GeCKO) Chrome/45.0.2454.85 Safari/537.36 115Broswer/6.0.3',
        'Referer': 'https://movie.douban.com/',
        'Connection': 'keep-alive'}
    rec = urllib.request.Request(url,headers = head)
    html = request.urlopen(rec).read().decode('utf-8')
    html = etree.HTML(html)

    datas = html.xpath('//ol[@class="grid_view"]/li')
    a = 0
    for data in datas:
        data_title = data.xpath('div/div[2]/div[@class="hd"]/a/span[1]/text()')
        print('NO:%d' % (25 * i + a + 1))
        print(data_title)
        a+=1


for i in range(10):
    page(i)

相關文章