用Python爬取新型冠狀病毒肺炎實時資料,pyecharts v1.x繪製省市區疫情地圖

蒜泥的冬天發表於2020-02-14

執行結果(2020-2-8資料)

全國疫情地圖
福建省疫情地圖
福州市疫情地圖

基本方案

資料格式

請參考以下兩篇中的抓包分析
用Python抓新型冠狀病毒肺炎實時資料,繪製市內疫情地圖
用Python抓新型冠狀病毒肺炎疫情資料,繪製全國疫情分佈圖

全國疫情地圖實現

#%%

import time, json, requests
import jsonpath
from pyecharts.charts import Map
import pyecharts.options as opts

#%%

# 全國疫情地區分佈(各省確診病例)
def catch_cn_disease_dis():
    timestamp = '%d'%int(time.time()*1000)
    url_area = ('https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
               '&callback=&_=') + timestamp
    world_data = json.loads(requests.get(url=url_area).json()['data'])
    china_data = jsonpath.jsonpath(world_data, 
                                   expr='$.areaTree[0].children[*]')
    ls_province_names = jsonpath.jsonpath(china_data, expr='$[*].name')
    ls_confirm_vals = jsonpath.jsonpath(china_data, expr='$[*].total.confirm')
    ls_province_confirm = list(zip(ls_province_names, ls_confirm_vals,))  
    return ls_province_confirm, world_data

#%%

ls_province_cfm, dic_world_data = catch_cn_disease_dis()
print(ls_province_cfm)

#%%

# 繪製全國疫情地圖
def map_cn_disease_dis() -> Map:
    c = (
        Map()
        .add('中國', ls_province_cfm, 'china')
        .set_global_opts(
            title_opts=opts.TitleOpts(title='全國新型冠狀病毒疫情地圖(確診數)'),
            visualmap_opts=opts.VisualMapOpts(is_show=True,
                                              split_number=6,
                                              is_piecewise=True,  # 是否為分段型
                                              pos_top='center',
                                              pieces=[
                                                   {'min': 10000, 'color': '#7f1818'},  #不指定 max
                                                   {'min': 1000, 'max': 10000},
                                                   {'min': 500, 'max': 999},
                                                   {'min': 100, 'max': 499},
                                                   {'min': 10, 'max': 99},
                                                   {'min': 0, 'max': 5} ],                                              
                                              ),
        )
    )
    return c
map_cn_disease_dis().render('全國疫情地圖.html')

福建省疫情地圖實現

# 獲取福建省確診分佈資料
def catch_fj_disease_dis():
    dic_world_data = catch_cn_disease_dis()[1]
    dic_fj_cfm = dict()    
    # 解析福建各個城市的資料
    dic_fj = jsonpath.jsonpath(dic_world_data, 
                               expr='$.areaTree[0].children[?(@.name=="福建")].children[*]') 
    
    for item in dic_fj:
        if item['name'] not in dic_fj_cfm: 
            dic_fj_cfm.update({item['name']:0})            
        dic_fj_cfm[item['name']] += int(item['total']['confirm'])   
    return dic_fj_cfm  

dic_fj_cfm = catch_fj_disease_dis()

#%%

# 繪製福建省疫情地圖
def map_fj_disease_dis() -> Map:
    # dic_fj_cfm = catch_fj_disease_dis
    ls_fj_cities = [name + '市' for name in dic_fj_cfm.keys()]
    c = (
        Map()
        .add('福建省', [list(z) for z in zip(ls_fj_cities, dic_fj_cfm.values())], '福建')
        .set_series_opts(label_opts=opts.LabelOpts(is_show=True, formatter='{b}\n{c}例'))
        .set_global_opts(
                    title_opts=opts.TitleOpts(title='福建新型冠狀病毒疫情地圖(確診數)'),
                    visualmap_opts=opts.VisualMapOpts(is_show=True,
                                                      split_number=6,
                                                      is_piecewise=True,  # 是否為分段型
                                                      pos_top='center',
                                                      pieces=[
                                                           {'min': 50},  
                                                           {'min': 30, 'max': 49},
                                                           {'min': 20, 'max': 29},
                                                           {'min': 10, 'max': 19},
                                                           {'min': 1, 'max': 9},
                                                           {'value': 0, "label": '無確診病例', "color": 'green'} ],                                              
                                                      ),
                )
    )
    return c
map_fj_disease_dis().render('福建疫情地圖.html')

福州市疫情地圖實現

from pyquery import PyQuery as pq
from bs4 import BeautifulSoup
import datetime
import numpy as np
import re
# 獲取福建省疾病預防控制中心官網疫情通告列表
def catch_fz_disease_rpt_list() -> str:    
    session = requests.session()
    crawl_timestamp = int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000)
    keyword = {'txtkeyword':'福建省新增新型冠狀病毒感染的肺炎疫情情況'}
    html = '' 
    while True:
        try:
            rsp = session.get('http://www.fjcdc.com.cn/search', params=keyword)
        except requests.exceptions.ChunkedEncodingError:
            continue
        
        rsp.raise_for_status()  # 非200則丟擲異常(rsp.status_code != 200)
        html = rsp.content
        break 
    return html

#%%

html = catch_fz_disease_rpt_list()

#%%

# 獲取最新一期的疫情通告連結地址
def catch_fz_disease_latest_rpt():    
    # html = catch_fz_disease_rpt_list()
    doc = pq(html)
    # 方法一:第一條資料,doc('.list li a').attr.href即可得到所要連結
    # 方法二:指定日期, doc('.list li:contains("2020-02-02") a').attr.href
    # 但是這裡我們們多寫點,練習嘛,亂寫
    news = doc('.list li').items()
    dates = []
    for item in news:
        date_str = item('span').text().strip() 
        date = datetime.datetime.strptime(date_str,'%Y-%m-%d')
        dates.append(date)
    
    temp = np.array(dates)
    latest_date = temp.max()
    latest_date_str = latest_date.strftime('%Y-%m-%d')
    latest_date_url = doc('.list li:contains("{0}") a'.format(latest_date_str)).attr.href
    latest_date_url = 'http://www.fjcdc.com.cn' + latest_date_url
    return latest_date_url
 

#%%
print(catch_fz_disease_latest_rpt())
#%%

# 解析網頁,獲取確診和疑似病例資料文字
def catch_fz_disease_dis():
    latest_date_url = catch_fz_disease_latest_rpt()
    soup = ''
    while True:
        try:
            rsp = requests.session().get(latest_date_url)
        except requests.exceptions.ChunkedEncodingError:
            continue
        
        rsp.raise_for_status()  # 非200則丟擲異常(rsp.status_code != 200)
        
        soup = BeautifulSoup(rsp.content, 'lxml')
        # print(soup)
        break 
    
    reg = re.compile('.*福州市.*')
    soup = soup.find('div', class_='showCon')
    tag = soup.find_all(text=reg)
    if len(tag) != 4:
        raise Exception('查詢到值的次數必須等於 4. 實際值為: {}'.format(len(tag))) 
    
    area_data = {}
    # area_data.update({'confirm_added':tag[0]})      
    # area_data.update({'suspend_added':tag[1]})
    area_data.update({'confirm':tag[2]})      
    area_data.update({'suspend':tag[3]})
    return area_data
#%%
fz_data = catch_fz_disease_dis()
print(fz_data)
#%%
import re
# 解析各區縣資料
def exact_towns_dis():  
    # fz_data = catch_fz_disease_dis()
    pattern = re.compile('(?<=、|()\D+[市|縣|區]\d+例')
    town_list = pattern.findall(fz_data['confirm'])
    # town_list = fz_data['confirm'].split('(|(')[1].split('))')[0].split('、')
    
    # 平潭單列,不處理 
    town_data = {'福州市區':0} 
    for town in town_list:
        match_num = re.search(r'\d+(?=例)', town)
        match_town_name = re.search(r'\D+[市|縣|區]', town)
        if match_num and town:
            match_num = int(match_num.group())
            match_town_name = match_town_name.group()
        else:
            continue
        
        if match_town_name == '長樂區':  # 地圖中長樂為市
            match_town_name = '長樂市'   
            
        town_data.update({match_town_name: match_num})  
        
        # 晉安、鼓樓、馬尾、倉山、臺江
        if match_town_name[-1] == '區' :  
            town_data['福州市區'] += match_num
    return town_data
    
   
#%%
fz_town_data = exact_towns_dis()
print(fz_town_data)


#%%
from pyecharts.commons.utils import JsCode
def map_fz_disease_dis() -> Map:
    # fz_town_data = exact_towns_dis()
    # ls_fz_towns = [name + '市' for name in fz_town_data.keys()]
    c = (
        Map()
        .add('福州市(不含平潭)', [list(z) for z in zip(fz_town_data.keys(), fz_town_data.values())], '福州')
        .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                   #  return params.value[2]; // 不存在這個值,寫個錯誤的讓地圖繪製預設值                                       
                                                   formatter=JsCode("""
                                                           function(params){                                                                                                         
                                                                if (typeof(params.data) == 'undefined') {
                                                                    return params.value[2];  
                                                                } else {                                             
                                                                    return params.data.name 
                                                                     + params.data.value + '例';
                                                                }                                                                                                                 
                                                           }"""
                                                   ))
                         ) 
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title='福州市新型冠狀病毒疫情地圖',
                subtitle='其中,福州主城區(晉安、馬尾、鼓樓、倉山、臺江)\n共確診{}例'.format(fz_town_data['福州市區'])),
            visualmap_opts=opts.VisualMapOpts(is_show=True,
                                              split_number=6,
                                              is_piecewise=True,  # 是否為分段型
                                              pos_top='center',
                                              pieces=[  
                                                   {'min': 20},
                                                   {'min': 10, 'max': 19},
                                                   {'min': 5, 'max': 9},
                                                   {'min': 1, 'max': 4}]                                             
                                              )
           )
        
    )
    return c
#%%
map_fz_disease_dis().render('福州疫情地圖.html')

其他

第一篇:用Python抓新型冠狀病毒肺炎實時資料,基於Basemap繪製省級疫情分佈圖
第二篇:用Python抓新型冠狀病毒肺炎疫情資料,繪製全國疫情分佈(Basemap)和走勢圖
第三篇:用Python抓新型冠狀病毒肺炎實時資料,基於Basemap繪製市內疫情地圖

另外,這兩天寫的pyecharts案例:
用Python pyecharts v1.x 繪製圖形(一):柱狀圖、柱狀堆疊圖、條形圖、直方圖、帕累託圖、餅圖、圓環圖、玫瑰圖
用Python pyecharts v1.x 繪製圖形(二):折線圖、折線面積圖、散點圖、雷達圖、箱線圖、詞雲圖

眾志成城戰疫魔!
天佑湖北!天佑中國!

相關文章