前言
每天寫一個爬蟲練練手,今天就爬learnku的個人相關資料吧
程式碼
import requests
from lxml import etree
import re
import chardet
url = 'https://learnku.com/blog/SilenceHL'
response = requests.get(url).content.decode()
result = etree.HTML(response)
name = re.search('\w+',result.xpath('//div [@class = "header"]/a[1]/text()')[0]).group()
post_count = re.search('\d+', str(result.xpath('//div [@class = "ui four statistics"]/div[1]/div[2]/text()')[0])
).group()
fans_count = re.search('\d+', str(result.xpath('//div [@class = "ui four statistics"]/div[2]/div[2]/text()')[0])
).group()
likes_count = re.search('\d+', str(result.xpath('//div [@class = "ui four statistics"]/div[3]/div[2]/text()')[0])
).group()
favorites_count = re.search('\d+', str(result.xpath('//div [@class = "ui four statistics"]/div[4]/div[2]/text()')[0])
).group()
rank = re.search('\d+', str(result.xpath('//div [@class = "ui two column grid text-center"]/div[1]//text()')[0])
).group()
access = re.search('\d+', str(result.xpath('//div [@class = "ui two column grid text-center"]/div[2]//text()')[0])
).group()
print('{}的部落格,博文為:{}篇,粉絲數為:{}人,獲得喜歡的數量為:{},獲得收藏的數量為:{}'.format(name, post_count, fans_count, likes_count, favorites_count,
rank, access))
本作品採用《CC 協議》,轉載必須註明作者和本文連結