前言
騰訊網頁資料有點變化,所以重新爬取了一下
基於pyecharts的視覺化,這次更新新加了註釋
資料來源 騰訊疫情實時追蹤
後期會繼續更新pyecharts的繪圖,搭建視覺化大屏,感興趣的可以先關注一下
原始檔下載連結無需積分
第一部分 網頁分析
今天重新整理了一下,發現資料結構和之前的有所變化,把具體的操作步驟也說一下吧!
開啟網址推薦使用火狐瀏覽器,F12 進入開發者工具(重新整理一下頁面),如下:
分析請求頭
name=disease_h5 是資料位置
callback=jQuery341021795676971428168_1580642523637_=1580642523638返回當前時間戳的一個函式
所以我們請求的URL為:https://view.inews.qq.com/g2/getOnsInfoname=disease_h5
(自己嘗試一下,這裡需要找規律)
第二部分 資料準備
匯入模組
import time
import json
import requests
from datetime import datetime
import pandas as pd
import numpy as np
抓取資料
def catch_data ( ) :
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
reponse = requests. get( url= url) . json( )
data = json. loads( reponse[ 'data' ] )
return data
data = catch_data( )
data. keys( )
dict_keys(['chinaTotal', 'chinaAdd', 'lastUpdateTime', 'areaTree', 'chinaDayList', 'chinaDayAddList'])
資料處理
lastUpdateTime = data[ 'lastUpdateTime' ]
chinaTotal = data[ 'chinaTotal' ]
chinaAdd = data[ 'chinaAdd' ]
print ( chinaTotal)
print ( chinaAdd)
{'confirm': 17238, 'suspect': 21558, 'dead': 361, 'heal': 475}
{'confirm': 2858, 'suspect': 2014, 'dead': 57, 'heal': 147}
國內資料處理 第一步
areaTree = data[ 'areaTree' ]
china_data = areaTree[ 0 ] [ 'children' ]
china_list = [ ]
for a in range ( len ( china_data) ) :
province = china_data[ a] [ 'name' ]
province_list = china_data[ a] [ 'children' ]
for b in range ( len ( province_list) ) :
city = province_list[ b] [ 'name' ]
total = province_list[ b] [ 'total' ]
today = province_list[ b] [ 'today' ]
china_dict = { }
china_dict[ 'province' ] = province
china_dict[ 'city' ] = city
china_dict[ 'total' ] = total
china_dict[ 'today' ] = today
china_list. append( china_dict)
china_data = pd. DataFrame( china_list)
china_data. head( )
city
province
today
total
0
武漢
湖北
{'confirm': 1033, 'suspect': 0, 'dead': 41, 'h...
{'confirm': 5142, 'suspect': 0, 'dead': 265, '...
1
黃岡
湖北
{'confirm': 244, 'suspect': 0, 'dead': 2, 'hea...
{'confirm': 1246, 'suspect': 0, 'dead': 17, 'h...
2
孝感
湖北
{'confirm': 169, 'suspect': 0, 'dead': 0, 'hea...
{'confirm': 918, 'suspect': 0, 'dead': 14, 'he...
3
襄陽
湖北
{'confirm': 107, 'suspect': 0, 'dead': 0, 'hea...
{'confirm': 548, 'suspect': 0, 'dead': 0, 'hea...
4
荊州
湖北
{'confirm': 166, 'suspect': 0, 'dead': 2, 'hea...
{'confirm': 499, 'suspect': 0, 'dead': 6, 'hea...
國內資料處理 第二步
def confirm ( x) :
confirm = eval ( str ( x) ) [ 'confirm' ]
return confirm
def suspect ( x) :
suspect = eval ( str ( x) ) [ 'suspect' ]
return suspect
def dead ( x) :
dead = eval ( str ( x) ) [ 'dead' ]
return dead
def heal ( x) :
heal = eval ( str ( x) ) [ 'heal' ]
return heal
china_data[ 'confirm' ] = china_data[ 'total' ] . map ( confirm)
china_data[ 'suspect' ] = china_data[ 'total' ] . map ( suspect)
china_data[ 'dead' ] = china_data[ 'total' ] . map ( dead)
china_data[ 'heal' ] = china_data[ 'total' ] . map ( heal)
china_data[ 'addconfirm' ] = china_data[ 'today' ] . map ( confirm)
china_data[ 'addsuspect' ] = china_data[ 'today' ] . map ( suspect)
china_data[ 'adddead' ] = china_data[ 'today' ] . map ( dead)
china_data[ 'addheal' ] = china_data[ 'today' ] . map ( heal)
china_data = china_data[ [ "province" , "city" , "confirm" , "suspect" , "dead" , "heal" , "addconfirm" , "addsuspect" , "adddead" , "addheal" ] ]
china_data. head( )
province
city
confirm
suspect
dead
heal
addconfirm
addsuspect
adddead
addheal
0
湖北
武漢
5142
0
265
181
1033
0
41
0
1
湖北
黃岡
1246
0
17
27
244
0
2
0
2
湖北
孝感
918
0
14
2
169
0
0
0
3
湖北
襄陽
548
0
0
0
107
0
0
0
4
湖北
荊州
499
0
6
1
166
0
2
0
國際資料處理
global_data = pd. DataFrame( data[ 'areaTree' ] )
global_data[ 'confirm' ] = global_data[ 'total' ] . map ( confirm)
global_data[ 'suspect' ] = global_data[ 'total' ] . map ( suspect)
global_data[ 'dead' ] = global_data[ 'total' ] . map ( dead)
global_data[ 'heal' ] = global_data[ 'total' ] . map ( heal)
global_data[ 'addconfirm' ] = global_data[ 'today' ] . map ( confirm)
global_data[ 'addsuspect' ] = global_data[ 'today' ] . map ( suspect)
global_data[ 'adddead' ] = global_data[ 'today' ] . map ( dead)
global_data[ 'addheal' ] = global_data[ 'today' ] . map ( heal)
world_name = pd. read_excel( "世界各國中英文對照.xlsx" )
global_data = pd. merge( global_data, world_name, left_on = "name" , right_on = "中文" , how= "inner" )
global_data = global_data[ [ "name" , "英文" , "confirm" , "suspect" , "dead" , "heal" , "addconfirm" , "addsuspect" , "adddead" , "addheal" ] ]
global_data. head( )
name
英文
confirm
suspect
dead
heal
addconfirm
addsuspect
adddead
addheal
0
中國
China
17219
0
361
480
2732
0
57
53
1
日本
Japan
20
0
0
1
0
0
0
0
2
泰國
Thailand
19
0
0
7
0
0
0
0
3
新加坡
Singapore
18
0
0
0
0
0
0
0
4
韓國
Korea(republic of)
15
0
0
0
0
0
0
0
日資料處理
chinaDayList = pd. DataFrame( data[ 'chinaDayList' ] )
chinaDayList = chinaDayList[ [ 'date' , 'confirm' , 'suspect' , 'dead' , 'heal' ] ]
chinaDayList. head( )
date
confirm
suspect
dead
heal
0
01.13
41
0
1
0
1
01.14
41
0
1
0
2
01.15
41
0
2
5
3
01.16
45
0
2
8
4
01.17
62
0
2
12
日新增資料處理
chinaDayAddList = pd. DataFrame( data[ 'chinaDayAddList' ] )
chinaDayAddList = chinaDayAddList[ [ 'date' , 'confirm' , 'suspect' , 'dead' , 'heal' ] ]
chinaDayAddList. head( )
date
confirm
suspect
dead
heal
0
01.20
77
27
0
0
1
01.21
149
53
3
0
2
01.22
131
257
8
0
3
01.23
259
680
8
6
4
01.24
444
1118
16
3
第三部分 資料視覺化
總資料明細
from pyecharts. charts import *
from pyecharts import options as opts
from pyecharts. globals import ThemeType
total_pie = Pie( init_opts= opts. InitOpts( theme= ThemeType. WESTEROS, width = '500px' , height = '350px' ) )
total_pie. add( "" , [ list ( z) for z in zip ( chinaTotal. keys( ) , chinaTotal. values( ) ) ] ,
center= [ "50%" , "50%" ] ,
radius= [ 50 , 80 ] )
total_pie. set_global_opts(
title_opts= opts. TitleOpts( title= "全國總量" , subtitle= ( "截止" + lastUpdateTime) ) )
total_pie. set_series_opts( label_opts= opts. LabelOpts( formatter= "{c}" ) )
total_pie. render_notebook( )
<div id="df41be401be54cb6bd113d776a0d5a49" style="width:500px; height:350px;"></div>
totaladd_pie = Pie( init_opts= opts. InitOpts( theme= ThemeType. WESTEROS, width = '500px' , height = '350px' ) )
totaladd_pie. add( "" , [ list ( z) for z in zip ( chinaAdd. keys( ) , chinaAdd. values( ) ) ] ,
center= [ "50%" , "50%" ] ,
radius= [ 50 , 80 ] )
totaladd_pie. set_global_opts(
title_opts= opts. TitleOpts( title= "昨日新增" ) )
totaladd_pie. set_series_opts( label_opts= opts. LabelOpts( formatter= "{c}" ) )
totaladd_pie. render_notebook( )
<div id="e7f89ced2eee4f72aabf78c05ab56dc1" style="width:500px; height:350px;"></div>
全球疫情熱圖
world_map = Map( init_opts= opts. InitOpts( theme= ThemeType. WESTEROS) )
world_map. add( "" , [ list ( z) for z in zip ( list ( global_data[ "英文" ] ) , list ( global_data[ "confirm" ] ) ) ] , "world" , is_map_symbol_show= False )
world_map. set_global_opts( title_opts= opts. TitleOpts( title= "2019_nCoV-世界疫情地圖" ) ,
visualmap_opts= opts. VisualMapOpts( is_piecewise= True ,
pieces = [
{ "min" : 101 , "label" : '>100' } ,
{ "min" : 10 , "max" : 100 , "label" : '10-100' } ,
{ "min" : 0 , "max" : 9 , "label" : '0-9' } ] ) )
world_map. set_series_opts( label_opts= opts. LabelOpts( is_show= False ) )
world_map. render_notebook( )
<div id="c938cdb9be164ce89a16c8c3788edf61" style="width:900px; height:500px;"></div>
中國疫情地圖熱圖繪製
area_data = china_data. groupby( "province" ) [ "confirm" ] . sum ( ) . reset_index( )
area_data. columns = [ "province" , "confirm" ]
area_map = Map( init_opts= opts. InitOpts( theme= ThemeType. WESTEROS) )
area_map. add( "" , [ list ( z) for z in zip ( list ( area_data[ "province" ] ) , list ( area_data[ "confirm" ] ) ) ] , "china" , is_map_symbol_show= False )
area_map. set_global_opts( title_opts= opts. TitleOpts( title= "2019_nCoV中國疫情地圖" ) , visualmap_opts= opts. VisualMapOpts( is_piecewise= True ,
pieces = [
{ "min" : 1001 , "label" : '>1000' , "color" : "#893448" } ,
{ "min" : 500 , "max" : 1000 , "label" : '500-1000' , "color" : "#ff585e" } ,
{ "min" : 101 , "max" : 499 , "label" : '101-499' , "color" : "#fb8146" } ,
{ "min" : 10 , "max" : 100 , "label" : '10-100' , "color" : "#ffb248" } ,
{ "min" : 0 , "max" : 9 , "label" : '0-9' , "color" : "#fff2d1" } ] ) )
area_map. render_notebook( )
<div id="4afca4394dc74d64aa320c7831ca4bd1" style="width:900px; height:500px;"></div>
繪製每日資料趨勢
每日累計資料趨勢
line1 = Line( init_opts= opts. InitOpts( theme= ThemeType. WESTEROS) )
line1. add_xaxis( list ( chinaDayList[ "date" ] ) )
line1. add_yaxis( "治癒" , list ( chinaDayList[ "heal" ] ) , is_smooth= True )
line1. add_yaxis( "死亡" , list ( chinaDayList[ "dead" ] ) , is_smooth= True )
line1. set_global_opts( title_opts= opts. TitleOpts( title= "Line1-治癒與死亡趨勢" ) )
line1. render_notebook( )
<div id="8723f81496e14ff1aa3e7ff4b84eaa94" style="width:900px; height:500px;"></div>
每日累計確診趨勢
line2 = Line( init_opts= opts. InitOpts( theme= ThemeType. SHINE) )
line2. add_xaxis( list ( chinaDayList[ "date" ] ) )
line2. add_yaxis( "確診" , list ( chinaDayList[ "confirm" ] ) )
line2. add_yaxis( "疑似" , list ( chinaDayList[ "suspect" ] ) )
line2. set_global_opts( title_opts= opts. TitleOpts( title= "Line2-確診與疑似趨勢" ) )
line2. render_notebook( )
<div id="70d5c115f4344fffa84cf3aec7357c6a" style="width:900px; height:500px;"></div>
每日新增
bar = Bar( init_opts= opts. InitOpts( theme= ThemeType. WESTEROS, width = '900px' , height = '400px' ) )
bar . add_xaxis( list ( chinaDayAddList[ "date" ] ) )
bar . add_yaxis( "確診" , list ( chinaDayAddList[ "confirm" ] ) )
bar . add_yaxis( "疑似" , list ( chinaDayAddList[ "suspect" ] ) )
bar . add_yaxis( "死亡" , list ( chinaDayAddList[ "dead" ] ) )
bar . add_yaxis( "治癒" , list ( chinaDayAddList[ "heal" ] ) )
bar . set_global_opts( title_opts= opts. TitleOpts( title= "每日新增資料趨勢" ) )
bar. render_notebook( )
<div id="f361d22988f248c3a8dccd9da4c05962" style="width:900px; height:400px;"></div>
第四部分 圖片彙總
page = Page( )
page. add( total_pie)
page. add( totaladd_pie)
page. add( world_map)
page. add( area_map)
page. add( line1)
page. add( line2)
page. add( bar)
page. render( "2019_nCoV 視覺化.html" )