把豆瓣TOP250裡面的 序號/電影名/評分/推薦語/連結 都爬取下來,結果就是全部展示列印出來
1 #6、豆瓣爬蟲 2 # 把豆瓣TOP250裡面的 序號/電影名/評分/推薦語/連結 都爬取下來,結果就是全部展示列印出來 3 # URL https://movie.douban.com/top250?start= 4 5 import requests 6 from bs4 import BeautifulSoup 7 8 for i in range(0,250,25): 9 res = requests.get('https://movie.douban.com/top250?start={}'.format(i)) 10 html = res.text 11 soup = BeautifulSoup(html,'html.parser') 12 items = soup.find_all('div',class_='item') 13 14 for item in items: 15 print(item.find('em').text,end='\t') 16 print(item.find('span').text,end='\t') 17 print(item.find('span',class_='rating_num').text,end='\t') 18 try: 19 print(item.find('span',class_='inq').text,end='\t') 20 except AttributeError: 21 print('',end='\t') 22 print(item.find('a')['href']) 23 24 ''' 25 老師的程式碼 26 27 import requests 28 # 引用requests模組 29 from bs4 import BeautifulSoup 30 for x in range(10): 31 url = 'https://movie.douban.com/top250?start=' + str(x*25) + '&filter=' 32 res = requests.get(url) 33 bs = BeautifulSoup(res.text, 'html.parser') 34 tag_num = bs.find_all('div', class_="item") 35 # 查詢包含序號,電影名,連結的<div>標籤 36 tag_comment = bs.find_all('div', class_='star') 37 # 查詢包含評分的<div>標籤 38 tag_word = bs.find_all('span', class_='inq') 39 # 查詢推薦語 40 41 42 list_all = [] 43 for x in range(len(tag_num)): 44 if tag_num[x].text[2:5] == '223' or tag_num[x].text[2:5] =='244': 45 list_movie = [tag_num[x].text[2:5], tag_num[x].find('img')['alt'], tag_comment[x].text[2:5], tag_num[x].find('a')['href'] ] 46 else: 47 list_movie = [tag_num[x].text[2:5], tag_num[x].find('img')['alt'], tag_comment[x].text[2:5], tag_word[x].text, tag_num[x].find('a')['href']] 48 list_all.append(list_movie) 49 print(list_all) 50 '''
items中每個Tag的內容如下
1 <div class="item"> 2 <div class="pic"> 3 <em class="">151</em> 4 <a href="https://movie.douban.com/subject/24750126/"> 5 <img width="100" alt="荒蠻故事" 6 src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2192834364.webp" class=""> 7 </a> 8 </div> 9 <div class="info"> 10 <div class="hd"> 11 <a href="https://movie.douban.com/subject/24750126/" class=""> 12 <span class="title">荒蠻故事</span> 13 <span class="title"> / Relatos salvajes</span> 14 <span class="other"> / 蠻荒故事 / 生命中最抓狂的小事(臺)</span> 15 </a> 16 <span class="playable">[可播放]</span> 17 </div> 18 <div class="bd"> 19 <p class=""> 20 導演: 達米安·斯茲弗隆 Damián Szifron 主演: 達里奧·葛蘭帝內提 Darío... 21 22 2014 / 阿根廷 西班牙 / 劇情 喜劇 犯罪 23 </p> 24 <div class="star"> 25 <span class="rating45-t"></span> 26 <span class="rating_num" property="v:average">8.8</span> 27 <span property="v:best" content="10.0"></span> 28 <span>203246人評價</span> 29 </div> 30 <p class="quote"> 31 <span class="inq">始於荒誕,止於更荒誕。</span> 32 </p> 33 </div> 34 </div> 35 </div>