Python Selenium+PhantomJs 爬煎蛋妹子圖

s990649505發表於2020-04-06

第一次爬蟲,程式碼很簡單,借鑑了很多前輩的程式碼。

import re
import os
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
#設定phantomjs路徑
driver = webdriver.PhantomJS(executable_path='D:/phantomjs-2.1.1-windows/bin/phantomjs')

def jiandanSpider(Url,basePath):
    page = 1
    #遍歷url並解析
    for urlPools in Url:
        driver.get(urlPools)
        driver.implicitly_wait(10)
        html_text = driver.page_source
        #轉換格式,並找到連結
        img_url = []
        soup = BeautifulSoup(html_text,'html.parser')
        imges = soup.select("a.view_img_link")
        for i in imges:
            wx = i.get('href')
            if str('gif') in str(wx):
                pass
            else:
                http_url = 'http:' + wx
                img_url.append(http_url)
        #開始下載
        n = 1
        for url in img_url:
            print("第 %s 張" % (n))
            with open(basePath + '妹子圖 %s-%s'%(page,n) + url[-4:],'wb' ) as f:
                f.write(requests.get(url).content)
            print('下載完成')
            n = n + 1

        page = page + 1

if __name__ == '__main__':
    urlPool = ('http://jandan.net/ooxx/page-{}#comments'.format(i) for i in range(1, 5))
    basePath = 'D:/jiandanImage/'
    jiandanSpider(urlPool,basePath)

相關文章