什麼是散點圖?可以用來呈現哪些資料關係?在資料分析過程中可以解決哪些問題?怎樣用Python繪製散點圖?本文逐一為你解答。
# 資料
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]
# 畫布:尺寸
p = figure(plot_width=400, plot_height=400)
# 畫圖
p.scatter(x, y,
size=20, # screen units 顯示器畫素單位
# radius=1, # data-space units 座標軸單位
marker="circle", color="navy", alpha=0.5)
# p.circle(x, y, size=20, color="navy", alpha=0.5)
# 顯示
show(p)
x (str or seq[float]) : 離散點的x座標,列名或列表 y (str or seq[float]) : 離散點的y座標 size (str or list[float]) : 離散點的大小,螢幕畫素單位 marker (str, or list[str]) : 離散點標記型別名稱或名稱列表 color (color value, optional) : 填充及輪廓線的顏色 source (`~bokeh.models.sources.ColumnDataSource`) : Bokeh專屬資料格式 **kwargs: 其他自定義屬性;其中標記點型別marker預設值為:“marker="circle"”,可以用“radius”定義圓的半徑大小(單位為座標軸單位)。這在Web資料化中非常有用,不同的方式,在不同的裝置上的展示效果會有些許差異。
x (:class:`~bokeh.core.properties.NumberSpec` ) : x座標 y (:class:`~bokeh.core.properties.NumberSpec` ) : y座標 angle (:class:`~bokeh.core.properties.AngleSpec` ) : 旋轉角度 angle_units (:class:`~bokeh.core.enums.AngleUnits`) : (default: 'rad') 預設:弧度,也可以採用度('degree') fill_alpha (:class:`~bokeh.core.properties.NumberSpec` ) : (default: 1.0) 填充透明度,預設:不透明 fill_color (:class:`~bokeh.core.properties.ColorSpec` ) : (default: 'gray') 填充顏色,預設:灰色 line_alpha (:class:`~bokeh.core.properties.NumberSpec` ) : (default: 1.0) 輪廓線透明度,預設:不透明 line_cap (:class:`~bokeh.core.enums.LineCap` ) : (default: 'butt') 線端(帽) line_color (:class:`~bokeh.core.properties.ColorSpec` ) : (default: 'black') 輪廓線顏色,預設:黑色 line_dash (:class:`~bokeh.core.properties.DashPattern` ) : (default: []) 虛線 line_dash_offset (:class:`~bokeh.core.properties.Int` ) : (default: 0) 虛線偏移 line_join (:class:`~bokeh.core.enums.LineJoin` ) : (default: 'bevel') line_width (:class:`~bokeh.core.properties.NumberSpec` ) : (default: 1) 線寬,預設:1
# 資料
N = 4000
x = np.random.random(size=N) * 100 # 隨機點x座標
y = np.random.random(size=N) * 100 # 隨機點y座標
radii = np.random.random(size=N) * 1.5 # 隨機半徑
# 工具條
TOOLS="hover,crosshair,pan,wheel_zoom,box_zoom,reset,tap,save,box_select,poly_select,lasso_select"
# RGB顏色,畫布1,繪圖1
colors2 = ["#%02x%02x%02x" % (int(r), int(g), 150) for r, g in zip(50+2*x, 30+2*y)]
p1 = figure(width=300, height=300, tools=TOOLS)
p1.scatter(x,y, radius=radii, fill_color=colors2, fill_alpha=0.6, line_color=None)
# RGB顏色,畫布2,繪圖2
colors2 = ["#%02x%02x%02x" % (150, int(g), int(b)) for g, b in zip(50+2*x, 30+2*y)]
p2 = figure(width=300, height=300, tools=TOOLS)
p2.scatter(x,y, radius=radii, fill_color=colors2, fill_alpha=0.6, line_color=None)
# 直接顯示
# show(p1)
# show(p2)
# 網格顯示
from bokeh.layouts import gridplot
grid = gridplot([[p1, p2]])
show(grid)
from bokeh.sampledata.iris import flowers
# 配色
colormap = {'setosa': 'red', 'versicolor': 'green', 'virginica': 'blue'}
colors = [colormap[x] for x in flowers['species']]
# 畫布
p = figure(title = "Iris Morphology")
# 繪圖
p.circle(flowers["petal_length"], flowers["petal_width"],
color=colors, fill_alpha=0.2, size=10)
# 其他
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Petal Width'
# 顯示
show(p)
執行結果如圖5所示。
from bokeh.layouts import column, gridplot from bokeh.models import BoxSelectTool, Div # 資料 x = np.linspace(0, 4*np.pi, 100) y = np.sin(x) # 工具條 TOOLS = "wheel_zoom,save,box_select,lasso_select,reset" # HTML程式碼 div = Div(text=""" <p>Bokeh中的畫布可透過多種佈局方式進行顯示;</p> <p>透過配置引數BoxSelectTool,在圖中用滑鼠選擇資料,採用不同方式進行互動。</p> """) # HTML程式碼直接作為一個圖層顯示,也可以作為整個HTML文件 # 檢視屬性 opts = dict(tools=TOOLS, plot_width=350, plot_height=350) # 繪圖1 p1 = figure(title="selection on mouseup", **opts) p1.circle(x, y, color="navy", size=6, alpha=0.6) # 繪圖2 p2 = figure(title="selection on mousemove", **opts) p2.square(x, y, color="olive", size=6, alpha=0.6) p2.select_one(BoxSelectTool).select_every_mousemove = True # 繪圖3 p3 = figure(title="default highlight", **opts) p3.circle(x, y, color="firebrick", alpha=0.5, size=6) # 繪圖4 p4 = figure(title="custom highlight", **opts) p4.square(x, y, color="navy", size=6, alpha=0.6, nonselection_color="orange", nonselection_alpha=0.6) # 佈局 layout = column(div, gridplot([[p1, p2], [p3, p4]], toolbar_location="right"), sizing_mode="scale_width") # sizing_mode 根據視窗寬度縮放影像 # 繪圖 show(layout)
from bokeh.models import ( ColumnDataSource, Range1d, DataRange1d, LinearAxis, SingleIntervalTicker, FixedTicker, Label, Arrow, NormalHead, HoverTool, TapTool, CustomJS) from bokeh.sampledata.sprint import sprint abbrev_to_country = { "USA": "United States", "GBR": "Britain", "JAM": "Jamaica", "CAN": "Canada", "TRI": "Trinidad and Tobago", "AUS": "Australia", "GER": "Germany", "CUB": "Cuba", "NAM": "Namibia", "URS": "Soviet Union", "BAR": "Barbados", "BUL": "Bulgaria", "HUN": "Hungary", "NED": "Netherlands", "NZL": "New Zealand", "PAN": "Panama", "POR": "Portugal", "RSA": "South Africa", "EUA": "United Team of Germany", } gold_fill = "#efcf6d" gold_line = "#c8a850" silver_fill = "#cccccc" silver_line = "#b0b0b1" bronze_fill = "#c59e8a" bronze_line = "#98715d" fill_color = { "gold": gold_fill, "silver": silver_fill, "bronze": bronze_fill } line_color = { "gold": gold_line, "silver": silver_line, "bronze": bronze_line } def selected_name(name, medal, year): return name if medal == "gold" and year in [1988, 1968, 1936, 1896] else "" t0 = sprint.Time[0] sprint["Abbrev"] = sprint.Country sprint["Country"] = sprint.Abbrev.map(lambda abbr: abbrev_to_country[abbr]) sprint["Medal"] = sprint.Medal.map(lambda medal: medal.lower()) sprint["Speed"] = 100.0/sprint.Time sprint["MetersBack"] = 100.0*(1.0 - t0/sprint.Time) sprint["MedalFill"] = sprint.Medal.map(lambda medal: fill_color[medal]) sprint["MedalLine"] = sprint.Medal.map(lambda medal: line_color[medal]) sprint["SelectedName"] = sprint[["Name", "Medal", "Year"]].apply(tuple, axis=1).map(lambda args: selected_name(*args)) source = ColumnDataSource(sprint) xdr = Range1d(start=sprint.MetersBack.max()+2, end=0) # XXX: +2 is poor-man's padding (otherwise misses last tick) ydr = DataRange1d(range_padding=4, range_padding_units="absolute") plot = figure( x_range=xdr, y_range=ydr, plot_width=1000, plot_height=600, toolbar_location=None, outline_line_color=None, y_axis_type=None) plot.title.text = "Usain Bolt vs. 116 years of Olympic sprinters" plot.title.text_font_size = "14pt" plot.xaxis.ticker = SingleIntervalTicker(interval=5, num_minor_ticks=0) plot.xaxis.axis_line_color = None plot.xaxis.major_tick_line_color = None plot.xgrid.grid_line_dash = "dashed" yticker = FixedTicker(ticks=[1900, 1912, 1924, 1936, 1952, 1964, 1976, 1988, 2000, 2012]) yaxis = LinearAxis(ticker=yticker, major_tick_in=-5, major_tick_out=10) plot.add_layout(yaxis, "right") medal = plot.circle(x="MetersBack", y="Year", radius=dict(value=5, units="screen"), fill_color="MedalFill", line_color="MedalLine", fill_alpha=0.5, source=source, level="overlay") plot.text(x="MetersBack", y="Year", x_offset=10, y_offset=-5, text="SelectedName", text_align="left", text_baseline="middle", text_font_size="9pt", source=source) no_olympics_label = Label( x=7.5, y=1942, text="No Olympics in 1940 or 1944", text_align="center", text_baseline="middle", text_font_size="9pt", text_font_style="italic", text_color="silver") no_olympics = plot.add_layout(no_olympics_label) x = sprint[sprint.Year == 1900].MetersBack.min() - 0.5 arrow = Arrow(x_start=x, x_end=5, y_start=1900, y_end=1900, start=NormalHead(fill_color="black", size=6), end=None, line_width=1.5) plot.add_layout(arrow) meters_back = Label( x=5, x_offset=10, y=1900, text="Meters behind 2012 Bolt", text_align="left", text_baseline="middle", text_font_size="10pt", text_font_style="bold") plot.add_layout(meters_back) disclaimer = Label( x=0, y=0, x_units="screen", y_units="screen", text="This chart includes medals for the United States and Australia in the \"Intermediary\" Games of 1906, which the I.O.C. does not formally recognize.", text_font_size="8pt", text_color="silver") plot.add_layout(disclaimer, "below") tooltips = """ <div> <span style="font-size: 15px;">@Name</span> <span style="font-size: 10px; color: #666;">(@Abbrev)</span> </div> <div> <span style="font-size: 17px; font-weight: bold;">@Time{0.00}</span> <span style="font-size: 10px; color: #666;">@Year</span> </div> <div style="font-size: 11px; color: #666;">@{MetersBack}{0.00} meters behind</div> """ plot.add_tools(HoverTool(tooltips=tooltips, renderers=[medal])) open_url = CustomJS(args=dict(source=source), code=""" source.inspected._1d.indices.forEach(function(index) { var name = source.data["Name"][index]; var url = "http://en.wikipedia.org/wiki/" + encodeURIComponent(name); window.open(url); }); """) plot.add_tools(TapTool(callback=open_url, renderers=[medal], behavior="inspect")) show(plot)
關於作者:屈希峰,資深Python工程師,Bokeh領域的實踐者和佈道者,對Bokeh有深入的研究。擅長Flask、MongoDB、Sklearn等技術,實踐經驗豐富。知乎多個專欄(Python中文社群、Python程式設計師、大資料分析挖掘)作者,專欄累計關注使用者十餘萬人。
本文摘編自《Python資料視覺化:基於Bokeh的視覺化繪圖》,經出版方授權釋出。