最近寫了個專門爬百度的,後來又想爬京東的,還是採用上次的BeautifulSoup+requests模組
下面直接上程式碼,看不懂的可以看這篇文章或者註釋來學習
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#written by DY
#http://dyblog.tk
#e-mail:duyun888888@qq.com
########import###############
import requests
from bs4 import BeautifulSoup
#from openpyxl import *#寫入表格使用,寫入txt時報錯
import time
from tkinter import *
import tkinter.messagebox
from tkinter import ttk
########import結束############
#----------全域性變數-----------
https = 'https:'
headers = {
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.16 Safari/537.36",
}#定義頭部資訊,防止被網站阻止
name = []
price = []
introduct = []
urlss = []
#----------全域性變數結束-------
#===============函式區域==============
#--------圖形介面函式開始--------
def genxin():
top = Tk()
top.title("'%s'在'京東'中查詢結果"%E1.get())
top.geometry("800x600+600+100")
columns = ("物品名", "價格", "簡介", "連結")
treeview = ttk.Treeview(top, show="headings", columns=columns, height='100')
treeview.column("物品名", width=200, anchor='center')
treeview.column("價格", width=50, anchor='center')
treeview.column("簡介", width=200, anchor='center')
treeview.column("連結", width=50, anchor='center')
treeview.heading("物品名", text="物品名")
treeview.heading("價格", text="價格")
treeview.heading("簡介", text="簡介")
treeview.heading("連結", text="連結")
treeview.pack()
print(name)
print(price)
print(introduct)
print(urlss)
for write_ in range(min(len(name),len(price),len(introduct),len(urlss))): # 寫入資料
treeview.insert('', write_, values=(name[write_], price[write_], introduct[write_], urlss[write_]))
top.mainloop()
#--------圖形介面函式結束--------
def searchstart():#開啟頁面查詢,獲取html
url='https://search.jd.com/Search?keyword='+E1.get()
url = str(url)
html = requests.get(url,headers=headers).text#開啟連結,獲取html
soup = BeautifulSoup(html, 'html.parser')
for div in soup.find_all('div',class_="ml-wrap"):#包含價格,銷量,商品,頁數
for shangpin in div.find_all('div',class_="goods-list-v2 gl-type-1 J-goods-list"):
for prices in shangpin.find_all('div',class_="p-price"):#商品價格
for pricess in prices.find_all('i'):
if pricess=='':
pricess='無'
price.append(pricess.text)
for shangpin in div.find_all('div',class_="goods-list-v2 gl-type-1 J-goods-list"):#商品
for name_ in shangpin.find_all('div',class_="p-name p-name-type-2"):
for titlename in name_.find_all('em'):#簡介
if titlename=='':
titlename='無'
introduct.append(titlename.text)
for name_ in shangpin.find_all('div',class_="p-name p-name-type-2"):
for introduction in name_.find_all('a',target="_blank"):#商品名
introduction = introduction.get('title')
if introduction=='':
introduction='無'
name.append(introduction)
for url in shangpin.find_all('div',class_="p-name p-name-type-2"):
for urls in url.find_all('a'):
urlss.append(https+urls['href'])
print(introduct)
print(name)
genxin()
#===============函式區域結束==========
##########圖形介面開始#########
root = Tk()
root.title('京東商品 查詢')
root.geometry('250x160')
L1 = Label(root, text="商品名: ")
L1.place(x = 5,y = 15)
E1 = Entry(root, bd =2)
E1.place(x = 60,y = 15)
A = Button(root, text ="確定",font=('Arial', 12), width=10, height=1,command=searchstart)
A.place(x = 350,y = 10)#確定按鈕
root.mainloop()
###########圖形介面結束#########
#written by DY
#http://dyblog.tk
#e-mail:duyun888888@qq.com
執行截圖:
如果有不懂的,可以直接留言或者聯絡duyun888888@qq.com