Python爬蟲百度新聞標題

迷心兔發表於2020-11-29

原文:https://blog.csdn.net/weixin_43881394/article/details/108200983
新學requests-html模組

import pandas as pd
from requests_html import HTMLSession

session = HTMLSession()
news_dict = {}
r = session.get('http://news.baidu.com/')
# 提取首頁新聞內容
hot_news = r.html.find('div#pane-news', first=True)
a_s = hot_news.find('a')
news_dict['首頁新聞標題'] = [a.text for a in a_s]  # 首頁新聞標題
news_dict['首頁新聞連結'] = [a.attrs['href'] for a in a_s]  # 首頁新聞連結
# 提取熱搜詞內容
hot_news_words = r.html.find('ul.hotwords', first=True)
a_s = hot_news_words.find('a')
news_dict['熱搜新聞詞'] = [a.text for a in a_s]  # 熱搜新聞詞
news_dict['熱搜連結'] = [a.attrs['href'] for a in a_s]  # 熱搜連結
# 輸出csv檔案
dataframe = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in news_dict.items()]))
dataframe.to_csv('首頁新聞.csv', sep=',', encoding='utf-8-sig')

輸出:

在這裡插入圖片描述

相關文章