day8

DO1AKK發表於2024-09-16

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.font_manager import FontProperties
from sklearn import datasets

font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=14)

iris =datasets.load_iris()
print(iris)

x = iris.data
print('x個數:{}'.format(len(x)))
print('x:{}'.format(x[0:5]))

y= iris.target
print('y個數:{}'.format(len(y)))
print('y:{}'.format(y))

視覺化

x_ = x[:,[0,1]]
plt.scatter(x_[:50,0],x_[:50,1],color = 'r',label='山鳶尾',s = 10)
plt.scatter(x_[50:100,0],x_[50:100,1],color = 'g',label='雜色鳶尾',s = 10)
plt.scatter(x_[100:150,0],x_[100:150,1],color = 'b',label='維吉尼亞鳶尾',s = 10)

plt.rcParams['font.sans-serif']=['simHei']
plt.xlabel('萼片長度')
plt.ylabel('萼片寬度')
plt.title('鳶尾花資料')
plt.legend()
plt.show()

資料切割

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x_,y,test_size=1/3)
print('訓練集個數:{},測試集個數'.format(len(y_train)),'測試集個數:{}'.format(len(y_test)))

from sklearn.svm import SVC
from sklearn.svm import LinearSVC

clf = SVC(kernel='linear',probability=True)

訓練資料

clf.fit(x_train,y_train)

預測資料分類結果

y_prd = clf.predict(x_test)
print(y_prd)
print(y_prd - y_test)

獲取引數

clf.get_params()
clf.C

看機率

clf.predict_proba(x_test)[0:5,:]

看準確率

clf.score(x_test,y_test)

測試模型

from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf,x_,y,cv=10)
print('準確率:{:.4f}(+/-{:.4f})'.format(scores.mean(),scores.std()*2))

最佳化模型

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

svc = SVC()

param_grid = [{'C':[0.1,1,10,20],'kernel':['linear']},
{'C':[0.1,1,10,20],'kernel':['rbf'],'gamma':[0.1,1,10,20]}]

scoring = 'accuracy'

clf = GridSearchCV(estimator=svc,param_grid=param_grid,scoring=scoring,cv=10)
clf = clf.fit(x,y)
print( clf.predict(x))

檢視最優引數

print(clf.best_params_)
print(clf.best_score_)

持久化模組

import pickle

序列化

pkl_str = pickle.dumps(clf)
print(pkl_str[:100])

反序列化

clf2 = pickle.loads(pkl_str)
print(clf2.predict(x))