隨機森林n_estimators 學習曲線

ThankCAT發表於2023-04-05

隨機森林

單顆樹與隨機森林的的分對比

# 匯入包
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# 例項化紅酒資料集
wine = load_wine()
# 劃分測試集和訓練集
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
# 例項化決策樹和隨機森林,random_state=0
clf = DecisionTreeClassifier(random_state=0)
rfc = RandomForestClassifier(random_state=0)
# 訓練模型
clf.fit(x_train, y_train)
rfc.fit(x_train, y_train)
RandomForestClassifier(random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# 返回測試集的分
clf_score = clf.score(x_test, y_test)
rfc_score = rfc.score(x_test, y_test)
print("sinle tree: {0}\nrandom tree: {1}".format(clf_score, rfc_score))
sinle tree: 0.9074074074074074
random tree: 0.9629629629629629

單顆樹與隨機森林在交叉驗證下的對比圖

# 匯入交叉驗證和畫圖工具
%matplotlib inline
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
# 例項化決策樹和隨機森林
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25) #建立25棵樹組成的隨機森林
# 例項化交叉驗證 10次
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
# 檢視決策樹和隨機森林的最好結果
print("single tree mean socre: {}\nrandom tree mean socre {}".format(clf_corss.mean(), rfc_corss.mean()))
single tree mean socre: 0.8705882352941178
random tree mean socre 0.9722222222222221
# 畫出決策樹和隨機森林對比圖
plt.plot(range(1, 11), clf_corss, label="single tree")
plt.plot(range(1, 11), rfc_corss, label="random tree")
plt.xticks(range(1, 11))
plt.legend()
<matplotlib.legend.Legend at 0x7ff6f4815d50>


clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
clf_corss
array([0.88888889, 0.88888889, 0.72222222, 0.88888889, 0.83333333,
       0.83333333, 1.        , 0.94444444, 0.94117647, 0.76470588])
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
rfc_corss
array([1.        , 1.        , 0.94444444, 0.94444444, 0.88888889,
       1.        , 1.        , 1.        , 1.        , 1.        ])

十次交叉驗證下決策樹和隨機森林的對比

# 建立分數列表
clf_list = []
rfc_list = []
for i in range(10):
    clf = DecisionTreeClassifier()
    rfc = RandomForestClassifier(n_estimators=25)
    clf_corss_mean = cross_val_score(clf, wine.data, wine.target, cv=10).mean()
    rfc_corss_mean = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
    clf_list.append(clf_corss_mean)
    rfc_list.append(rfc_corss_mean)
# 畫出決策樹和隨機森林對比圖
plt.plot(range(1, 11), clf_list, label="single tree")
plt.plot(range(1, 11), rfc_list, label="random tree")
plt.xticks(range(1, 11))
plt.legend()
<matplotlib.legend.Legend at 0x7ff6f490f670>

n_estimators 學習曲線

# 1-200顆樹的學習曲線
superpa = []
for i in range(200):
    rfc = RandomForestClassifier(n_estimators=i+1, n_jobs=-1)
    rfc_cross = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
    superpa.append(rfc_cross)
print(max(superpa), superpa.index(max(superpa)))
plt.figure(figsize=(20,8))
plt.plot(range(1,201), superpa, label="rfc_cross_mean")
plt.legend()
0.9888888888888889 20





<matplotlib.legend.Legend at 0x7ff6f540f100>


相關文章