Matplotlib 庫是 python 的資料視覺化庫
import matplotlib.pyplot as plt
1、字串轉化為日期
unrate = pd.read_csv("unrate.csv") unrate["DATE"] = pd.to_datetime(unrate["DATE"])
2、拆線圖
data1 = unrate[0: 12] plt.plot(data1["DATE"], data1["VALUE"]) # x軸資料和y軸資料 plt.xticks(rotation = 45) # 將x軸的屬性旋轉一個角度 plt.xlabel("Date Month") # x軸描述 plt.ylabel("Rate Value") # y軸描述 plt.title("my first plt") # 標題 plt.show()
3、多圖拼切
fig = plt.figure() ax1 = fig.add_subplot(2, 1, 1) ax2 = fig.add_subplot(2, 1, 2) ax1.plot(np.random.randint(1, 5, 5), np.arange(5)) ax2.plot(np.arange(10)*3, np.arange(10)) plt.show()
4、一圖多線
fig = plt.figure(figsize=(6, 3)) # 設定圖尺寸 data1 = unrate[0: 12] data1["MONTH"] = data1["DATE"].dt.month plt.plot(data1["MONTH"], data1["VALUE"], c="red") data2 = unrate[12: 24] data2["MONTH"] = data2["DATE"].dt.month plt.plot(data2["MONTH"], data2["VALUE"], c="blue") plt.xticks(rotation = 45) #將x軸的屬性旋轉一個角度 plt.xlabel("Date Month") plt.ylabel("Rate Value") plt.title("my first plt") plt.show()
5、一圖多線 - 自動跑程式碼(帶圖例)
fig = plt.figure(figsize=(10, 6)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index: end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) # plt.legend(loc='best') plt.legend(loc = 'upper left') # 位置 plt.show()
6、條形圖
fand_col = ["Fandango_Stars", "Fandango_Ratingvalue", "Metacritic_norm", "RT_user_norm_round", "IMDB_norm_round"] bar_heights = fand_new.ix[0, fand_col].values # 條形圖高度 bar_positions = np.arange(5) + 0.75 # 條形圖起始位置 tick_positions = range(1, 6) fig, ax = plt.subplots() ax.bar(bar_positions, bar_heights, 0.5) # 0.5表示條形圖的寬度 ax.set_xticks(tick_positions) ax.set_xticklabels(fand_col, rotation = 90) ax.set_xlabel('Rating Source') ax.set_ylabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show()
7、條形圖 - 橫向
fand_col = ["Fandango_Stars", "Fandango_Ratingvalue", "Metacritic_norm", "RT_user_norm_round", "IMDB_norm_round"] bar_heights = fand_new.ix[0, fand_col].values bar_positions = np.arange(5) + 0.75 tick_positions = range(1, 6) fig, ax = plt.subplots() ax.barh(bar_positions, bar_heights, 0.5) # 橫向 ax.set_yticks(tick_positions) ax.set_yticklabels(fand_col, rotation = 0) ax.set_xlabel('Rating Source') ax.set_ylabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show()
8、散點圖
fig, ax = plt.subplots() ax.scatter(fand_new['Fandango_Stars'], fand_new['Metacritic_norm']) # 散點圖 ax.set_xlabel('Fandango') ax.set_ylabel('Rotten Tomatoes') plt.show()
9、直方圖
fandango_distribution = fand_new['Fandango_Stars'].value_counts() fandango_distribution = fandango_distribution.sort_index() imdb_distribution = fand_new['IMDB_norm_round'].value_counts() imdb_distribution = imdb_distribution.sort_index() # bins 是什麼?通俗一點就是分組,將N多資料分成X組。預設:bins=10 fig, ax = plt.subplots() ax.hist(fand_new['Fandango_Stars'], range=(4, 5), bins=5) # range 需要檢視x軸的範圍 plt.show()
10、多圖
fig = plt.figure(figsize=(12, 12)) ax1 = fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.hist(fand_new['Fandango_Stars'], bins=20, range=(0, 5)) ax1.set_title('Distribution of Fandango Ratings') ax1.set_ylim(0, 50) ax2.hist(fand_new['IMDB_norm_round'], 20, range=(0, 5)) ax2.set_title('Distribution of Rotten Tomatoes Ratings') ax2.set_ylim(0, 50) ax3.hist(fand_new['Metacritic_norm'], 20, range=(0, 5)) ax3.set_title('Distribution of Metacritic Ratings') ax3.set_ylim(0, 50) ax4.hist(fand_new['RT_user_norm_round'], 20, range=(0, 5)) ax4.set_title('Distribution of IMDB Ratings') ax4.set_ylim(0, 50) plt.show()
11、四分圖
fig, ax = plt.subplots() ax.boxplot(fand_new['Metacritic_norm']) ax.set_xticklabels(['Rotten Tomatoes']) ax.set_ylim(0, 5) plt.show()
12、多圖 - 透過陣列
num_cols = ['Fandango_Stars', 'IMDB_norm_round', 'Metacritic_norm', 'RT_user_norm_round'] fig, ax = plt.subplots() ax.boxplot(fand_new[num_cols].values) ax.set_xticklabels(num_cols, rotation=90) ax.set_ylim(0, 5) plt.show()
13、資料視覺化 - 簡潔一些
fig, ax = plt.subplots() ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women') ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men') ax.tick_params(bottom="off", top="off", left="off", right="off") # 可配置引數 for key,spine in ax.spines.items(): spine.set_visible(False) ax.legend(loc='upper right') plt.show()
14、資料視覺化 - 多圖 - 透過程式
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men') plt.legend(loc='upper right') plt.show()
15、資料視覺化 - 多圖 - 透過程式跑 - 多圖 簡潔
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men') for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") plt.legend(loc='upper right') plt.show()
16、如何使圖表更好看?
cb_dark_blue = (0/255, 107/255, 164/255) # 自定義顏色 cb_orange = (255/255, 128/255, 14/255) fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men') for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") plt.legend(loc='upper right') plt.show()
17、加粗線
cb_dark_blue = (0/255, 107/255, 164/255) cb_orange = (255/255, 128/255, 14/255) fig = plt.figure(figsize=(18, 3)) for sp in range(0, 4): ax = fig.add_subplot(1, 4, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) # 線條粗細 ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=3) for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") plt.legend(loc='upper right') plt.show()
18、加註釋
fig = plt.figure(figsize=(18, 3)) for sp in range(0, 4): ax = fig.add_subplot(1, 4, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=3) for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") if sp == 0: ax.text(2005, 87, 'Men') # 註釋 ax.text(2002, 8, 'Women') elif sp == 3: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') plt.show()