matplotlib
一、繪製餅圖
1.直接生成餅圖
import matplotlib.pyplot as plt
edu = [0.2515,0.3724,0.3336,0.0368,0.0057]
labels = ['中專','大專','本科','碩士','其他']
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
colors=['#9999ff','#ff9999','#7777aa','#2442aa','#dd5555']
plt.rcParams['axes.unicode_minus'] = False
plt.axes(aspect='equal')
plt.pie(x = edu,
explode=explode,
labels=labels,
colors=colors,
autopct='%.1f%%',
pctdistance=0.8,
labeldistance=1.1,
startangle=180,
radius=1.2,
counterclock=False,
wedgeprops={'linewidth':1.5,'edgecolor':'green'},
textprops={'fontsize':10,'color':'black'},
)
plt.title('失信使用者的教育水平')
plt.show()
2.用陣列生成餅圖
import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
data1 = pd.Series({'中專':0.2515,'大專':0.3724,'本科':0.3336,'碩士':0.0368,'其他':0.0057})
data1.name = ''
plt.axes(aspect = 'equal')
data1.plot(kind = 'pie',
autopct='%.1f%%',
radius = 1,
startangle = 180,
counterclock = False,
title = '失信使用者的受教育水平分佈',
wedgeprops = {'linewidth':1.5,'edgecolor':'green'},
textprops = {'fontsize':10,'color':'black'}
)
plt.show()
二、繪製條形圖
1.繪製垂直條形圖
GDP = pd.read_excel(r'E:\Province GDP 2017.xlsx')
plt.style.use('ggplot')
plt.bar(x = range(GDP.shape[0]),
height = GDP.GDP,
tick_label = GDP.Province,
color = 'steelblue',
)
plt.ylabel('GDP(萬億)')
plt.title('2017年度6個省份GDP分佈')
for x,y in enumerate(GDP.GDP):
plt.text(x,y+0.1,'%s' %round(y,1),ha='center')
plt.show()
2.繪製水平條形圖
GDP.sort_values(by = 'GDP',inplace = True)
plt.barh(y = range(GDP.shape[0]),
width = GDP.GDP,
tick_label = GDP.Province,
color = 'steelblue',
)
plt.xlabel('GDP(萬億)')
plt.title('2017年度6個省份GDP分佈')
for x,y in enumerate(GDP.GDP):
plt.text(x+0.1,y,'%s' %round(x,1),va='center')
plt.show()
3.繪製堆疊條形圖
堆疊條形圖原理:用好幾段水平or垂直條形圖,分別用不同的顏色
import pandas as pd
import matplotlib.pyplot as plt
Industry_GDP = pd.read_excel(r'E:\Industry_GDP.xlsx')
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
Quarters = Industry_GDP.Quarter.unique()
Industry1 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第一產業']
Industry1.index = range(len(Quarters))
Industry2 = Industry_GDP.GPD[Industry_GDP.Industry_Type =='第二產業']
Industry2.index = range(len(Quarters))
Industry3 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第三產業']
plt.bar(x = range(len(Quarters)),height=Industry1,color = 'steelblue',label = '第一產業',tick_label=Quarters)
plt.bar(x = range(len(Quarters)),height=Industry2,bottom = Industry1,color='green',label='第二產業')
plt.bar(x=range(len(Quarters)),height=Industry3,bottom=Industry1+Industry2,color='red',label='第三產業')
plt.ylabel('生成總值(億)')
plt.title('2017年各季度三產業總值')
plt.legend()
plt.show()
4.sns生成水平or垂直條形圖
import seaborn as sns
GDP = pd.read_excel(r'E:\Province GDP 2017.xlsx')
sns.barplot(y = 'Province',
x = 'GDP',
data = GDP,
color = 'steelblue',
orient = 'horizontal'
)
plt.xlabel('GDP(萬億)')
plt.ylabel('省份')
plt.title('2017年度6個省份GDP分佈')
for y,x in enumerate(GDP.GDP):
plt.text(x,y,'%s' %round(x,1),va='center')
plt.show()
5.sns生成水平交錯條形圖
Titanic = pd.read_csv(r'E:\titanic_train.csv')
sns.barplot(x = 'Pclass',
y = 'Age',
hue = 'Sex',
data = Titanic,
palette = 'RdBu',
errcolor = 'blue',
errwidth=2,
saturation=1,
capsize=0.05
)
plt.title('各船艙等級中男女乘客的年齡差異')
plt.show()
三.matplotlib繪製直方圖
Titanic = pd.read_csv(r'E:\titanic_train.csv')
any(Titanic.Age.isnull())
Titanic.dropna(subset=['Age'],inplace=True)
plt.hist(x = Titanic.Age,
bins = 20,
color = 'steelblue',
edgecolor='black'
)
plt.xlabel('年齡')
plt.ylabel('頻數')
plt.title('乘客年齡分佈')
plt.show()
四、核密度圖
1、pandas繪製直方圖核密度圖
Titanic = pd.read_csv(r'E:\titanic_train.csv')
Titanic.Age.plot(kind='hist',bins=20,color='steelblue',edgecolor='black',label='直方圖')
Titanic.Age.plot(kind='kde',color='red',label='核密度圖')
plt.xlabel('年齡')
plt.ylabel('核密度圖')
plt.title('乘客年齡分佈')
plt.legend()
plt.show()
2 .seaborn模組繪製直方圖核密度圖
Age_Male = Titanic.Age[Titanic.Sex == 'male']
Age_female = Titanic.Age[Titanic.Sex == 'female']
sns.distplot(Age_Male,bins=20,kde=False,hist_kws={'color':'steelblue'},label='男性')
sns.distplot(Age_female,bins=20,kde=False,hist_kws={'color':'purple'},label='女性')
plt.title('男女乘客的年齡直方圖')
plt.legend()
plt.show()
五.繪製盒型圖
Sec_Buildings = pd.read_excel(r'E:/sec_buildings.xlsx')
plt.boxplot(x=Sec_Buildings.price_unit,
patch_artist=True,
showmeans=True,
boxprops={'color':'black','facecolor':'steelblue'},
flierprops={'marker':'o','markerfacecolor':'red','markersize':3},
meanprops={'marker':'D','markerfacecolor':'indianred','markersize':4},
medianprops={'linestyle':'--','color':'orange'},
labels=['']
)
plt.title('二手房單價分佈的箱線圖')
plt.show()
六、分組箱線圖
1.plt生成分組箱線圖
import numpy as np
group_region = Sec_Buildings.groupby('region')
avg_price = group_region.aggregate({'price_unit':np.mean}).sort_values('price_unit', ascending = False)
region_price = []
for region in avg_price.index:
region_price.append(Sec_Buildings.price_unit[Sec_Buildings.region == region])
plt.boxplot(x = region_price,
patch_artist=True,
labels = avg_price.index,
showmeans=True,
boxprops = {'color':'black', 'facecolor':'steelblue'},
flierprops = {'marker':'o','markerfacecolor':'red', 'markersize':3},
meanprops = {'marker':'D','markerfacecolor':'indianred', 'markersize':4},
medianprops = {'linestyle':'--','color':'orange'}
)
plt.ylabel('單價(元)')
plt.title('不同行政區域的二手房單價對比')
plt.show()
2.sns生成分箱線形圖
import seaborn as sns
sns.boxplot(x = 'region', y = 'price_unit', data = Sec_Buildings,
order = avg_price.index, showmeans=True,color = 'steelblue',
flierprops = {'marker':'o','markerfacecolor':'red', 'markersize':3},
meanprops = {'marker':'D','markerfacecolor':'indianred', 'markersize':4},
medianprops = {'linestyle':'--','color':'orange'}
)
plt.xlabel('')
plt.ylabel('單價(元)')
plt.title('不同行政區域的二手房單價對比')
plt.show()
七、分組小提琴圖
tips = pd.read_csv(r'E:\tips.csv')
sns.violinplot(x = "total_bill",
y = "day",
hue = "sex",
data = tips,
order = ['Thur','Fri','Sat','Sun'],
scale = 'count',
split = True,
palette = 'RdBu'
)
plt.title('每天不同性別客戶的消費額情況')
plt.legend(loc = 'upper center', ncol = 2)
plt.show()
八、繪製單條折線圖
wechat = pd.read_excel(r'E:\wechat.xlsx')
plt.plot(wechat.Date,
wechat.Counts,
linestyle = '-',
linewidth = 2,
color = 'steelblue',
marker = 'o',
markersize = 6,
markeredgecolor='black',
markerfacecolor='brown')
plt.ylabel('人數')
plt.title('每天微信文章閱讀人數趨勢')
plt.show()
九、繪製兩條折線圖
import matplotlib as mpl
plt.plot(wechat.Date,
wechat.Counts,
linestyle = '-',
color = 'steelblue',
label = '閱讀人數'
)
plt.plot(wechat.Date,
wechat.Times,
linestyle = '--',
color = 'indianred',
label = '閱讀人次'
)
ax = plt.gca()
date_format = mpl.dates.DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_format)
xlocator = mpl.ticker.LinearLocator(10)
xlocator = mpl.ticker.MultipleLocator(7)
ax.xaxis.set_major_locator(xlocator)
plt.xticks(rotation=45)
plt.ylabel('人數')、
plt.title('每天微信文章閱讀人數與人次趨勢')
plt.legend()
plt.show()
十、繪製散點圖
1.plt繪製散點圖
iris = pd.read_csv(r'C:\Users\Administrator\Desktop\iris.csv')
plt.scatter(x = iris.Petal_Width,
y = iris.Petal_Length,
color = 'steelblue'
)
plt.xlabel('花瓣寬度')
plt.ylabel('花瓣長度')
plt.title('鳶尾花的花瓣寬度與長度關係')
plt.show()
2.pandas繪製散點圖
iris.plot(x = 'Petal_Width', y = 'Petal_Length', kind = 'scatter', title = '鳶尾花的花瓣寬度與長度關係')
plt.xlabel('花瓣寬度')
plt.ylabel('花瓣長度')
plt.show()
3.seaborn模組繪製分組散點圖
sns.lmplot(x = 'Petal_Width',
y = 'Petal_Length',
hue = 'Species',
data = iris,
legend_out = False,
truncate=True
)
plt.xlabel('花瓣寬度')
plt.ylabel('花瓣長度')
plt.title('鳶尾花的花瓣寬度與長度關係')
plt.show()
十一、氣泡圖
Prod_Category = pd.read_excel(r'C:\Users\Administrator\Desktop\SuperMarket.xlsx')
range_diff = Prod_Category.Profit_Ratio.max()-Prod_Category.Profit_Ratio.min()
Prod_Category['std_ratio'] = (Prod_Category.Profit_Ratio-Prod_Category.Profit_Ratio.min())/range_diff + 0.001
plt.scatter(x = Prod_Category.Sales[Prod_Category.Category == '辦公用品'],
y = Prod_Category.Profit[Prod_Category.Category == '辦公用品'],
s = Prod_Category.std_ratio[Prod_Category.Category == '辦公用品']*1000,
color = 'steelblue', label = '辦公用品', alpha = 0.6
)
plt.scatter(x = Prod_Category.Sales[Prod_Category.Category == '技術產品'],
y = Prod_Category.Profit[Prod_Category.Category == '技術產品'],
s = Prod_Category.std_ratio[Prod_Category.Category == '技術產品']*1000,
color = 'indianred' , label = '技術產品', alpha = 0.6
)
plt.scatter(x = Prod_Category.Sales[Prod_Category.Category == '傢俱產品'],
y = Prod_Category.Profit[Prod_Category.Category == '傢俱產品'],
s = Prod_Category.std_ratio[Prod_Category.Category == '傢俱產品']*1000,
color = 'black' , label = '傢俱產品', alpha = 0.6
)
plt.xlabel('銷售額')
plt.ylabel('利潤')
plt.title('銷售額、利潤及利潤率的氣泡圖')
plt.legend()
plt.show()
十二、熱力圖
Sales = pd.read_excel(r'C:\Users\Administrator\Desktop\Sales.xlsx')
Sales['year'] = Sales.Date.dt.year
Sales['month'] = Sales.Date.dt.month
Summary = Sales.pivot_table(index = 'month', columns = 'year', values = 'Sales', aggfunc = np.sum)
sns.heatmap(data = Summary,
cmap = 'PuBuGn',
linewidths = .1,
annot = True,
fmt = '.1e'
)
plt.title('每年各月份銷售總額熱力圖')
plt.show()
十三、訂單等級餅圖
Prod_Trade = pd.read_excel(r'C:\Users\Administrator\Desktop\Prod_Trade.xlsx')
Prod_Trade['year'] = Prod_Trade.Date.dt.year
Prod_Trade['month'] = Prod_Trade.Date.dt.month
plt.figure(figsize = (12,6))
ax1 = plt.subplot2grid(shape = (2,3), loc = (0,0))
Class_Counts = Prod_Trade.Order_Class[Prod_Trade.year == 2012].value_counts()
Class_Percent = Class_Counts/Class_Counts.sum()
ax1.set_aspect(aspect = 'equal')
ax1.pie(x = Class_Percent.values, labels = Class_Percent.index, autopct = '%.1f%%')
ax1.set_title('各等級訂單比例')
ax2 = plt.subplot2grid(shape = (2,3), loc = (0,1))
Month_Sales = Prod_Trade[Prod_Trade.year == 2012].groupby(by = 'month').aggregate({'Sales':np.sum})
Month_Sales.plot(title = '2012年各月銷售趨勢', ax = ax2, legend = False)
ax2.set_xlabel('')
ax3 = plt.subplot2grid(shape = (2,3), loc = (0,2), rowspan = 2)
sns.boxplot(x = 'Transport', y = 'Trans_Cost', data = Prod_Trade, ax = ax3)
ax3.set_title('各運輸方式成本分佈')
ax3.set_xlabel('')
ax3.set_ylabel('運輸成本')
ax4 = plt.subplot2grid(shape = (2,3), loc = (1,0), colspan = 2)
sns.distplot(Prod_Trade.Sales[Prod_Trade.year == 2012], bins = 40, norm_hist = True, ax = ax4, hist_kws = {'color':'steelblue'}, kde_kws=({'linestyle':'--', 'color':'red'}))
ax4.set_title('2012年客單價分佈圖')
ax4.set_xlabel('銷售額')
plt.subplots_adjust(hspace=0.6, wspace=0.3)
plt.show()