【機器學習】實現層面 決策樹 並用graphviz視覺化樹

CHNwldcmzy發表於2020-10-28
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
df_t=pd.read_excel(r'D:\EdgeDownloadPlace\3dd40612152202ee8440f82a3d277008\train.xlsx')
df_t=df_t.drop(columns='uid')
df_t

df_t

for col in df_t.columns:
    df_t[col][df_t[col] == '?'] = df_t[col].value_counts().index[0] if df_t[col].value_counts().index[0] != '?' else df_t[col].value_counts().index[1]
df_t

df_t2

arr_t=df_t.values.astype(np.float32)
arr_t

array([[61., 0., 2., …, 0., 7., 0.],
[64., 1., 3., …, 0., 7., 1.],
[40., 0., 4., …, 0., 6., 1.],
…,
[65., 0., 3., …, 1., 3., 0.],
[63., 1., 4., …, 0., 7., 0.],
[55., 0., 4., …, 1., 7., 1.]], dtype=float32)

Xtrain,Xtest,Ytrain,Ytest = train_test_split(arr_t[:,:-1],arr_t[:,-1],test_size=0.3)
dtc = tree.DecisionTreeClassifier(criterion="entropy"
                                 ,max_depth=4
                                 ,min_samples_split=10).fit(Xtrain,Ytrain)
score = dtc.score(Xtest,Ytest)
score

0.8140703517587939

graph_tree = graphviz.Source(tree.export_graphviz(dtc
                                 ,feature_names = df_t.keys()[:-1]
                                 ,class_names = ['患病','不患病']
                                 ,filled = True
                                 ,rounded = True))
graph_tree

graph_tree

相關文章