python-python爬取豆果網(菜譜資訊)

峰寒發表於2019-01-22
#-*- coding = utf-8 -*-
#獲取豆果網圖片
import io
from bs4 import BeautifulSoup
import requests

#爬取菜譜的地址
url = "https://www.douguo.com/cookbook/2029254.html"
#防止網址的反爬蟲
header = {`User-Agent`:`Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0`}
#獲取網頁資訊
html = requests.get(url,headers = header)
#解析網頁資訊
text = BeautifulSoup(html.content,"lxml")
#獲取主要菜譜圖片的地址(在此僅僅獲取了一張圖片)
img_title = text.select("#banner img")
imgg = img_title[0].get("src")
#根據url將圖片儲存到當地
def get_img_data(ul):
    htm = requests.get(ul,headers = header)
    filename = "1.jpg"
    #這個地方的filename可以根據需求進行修改
    f =  open(filename,"wb")
    #將圖片寫到檔案中
    f.write(htm.content)
    f.close()
get_img_data(imgg)

#分別獲取菜譜資訊
menu_title_0 = text.select(`.title.text-lips`)[0].text
menu_intro   = text.select(`.intro`)[0].text
menu_title_1 = text.select(`.mini-title`)[0].text
menu_content_scname = text.find_all(`span`,class_=`scname`)
menu_content_scnum = text.find_all(`span`,class_=`scnum`)
menu_title_2 = text.select(`.mini-title`)[1].text
menu_step = text.select(`.stepinfo`)

print(menu_title_0)
print(menu_intro)
print(menu_title_1)
count = 0
for i in menu_content_scname:
    print(i.text," ",menu_content_scnum[count].text)
    count = count + 1
print(menu_title_2)
for menu_step_i in menu_step:
    print(menu_step_i.text)

 

 

相關文章