chapter3——邏輯迴歸手動+sklean版本

cute_Learner發表於2022-02-04

1 匯入numpy包

import numpy as np

2 sigmoid函式

def sigmoid(x):
    return 1/(1+np.exp(-x))
demox = np.array([1,2,3])
print(sigmoid(demox))
#報錯
#demox = [1,2,3]
# print(sigmoid(demox))

結果：

[0.73105858 0.88079708 0.95257413]

3 定義邏輯迴歸模型主體

### 定義邏輯迴歸模型主體
def logistic(x, y, w, b):
    # 訓練樣本量
    num_train = x.shape[0]
    # 邏輯迴歸模型輸出
    y_hat = sigmoid(np.dot(x,w)+b)
    # 交叉熵損失
    cost = -1/(num_train)*(np.sum(y*np.log(y_hat)+(1-y)*np.log(1-y_hat))) 
    # 權值梯度
    dW = np.dot(x.T,(y_hat-y))/num_train 
    # 偏置梯度
    db = np.sum(y_hat- y)/num_train
    # 壓縮損失陣列維度
    cost = np.squeeze(cost)
    return y_hat, cost, dW, db

4 初始化函式

def init_parm(dims):
    w = np.zeros((dims,1))
    b = 0
    return w ,b

5 定義邏輯迴歸模型訓練過程

### 定義邏輯迴歸模型訓練過程
def logistic_train(X, y, learning_rate, epochs):
    # 初始化模型引數
    W, b = init_parm(X.shape[1])  
    cost_list = []  
    for i in range(epochs):
        # 計算當前次的模型計算結果、損失和引數梯度
        a, cost, dW, db = logistic(X, y, W, b)    
        # 引數更新
        W = W -learning_rate * dW
        b = b -learning_rate * db        
        if i % 100 == 0:
            cost_list.append(cost)   
        if i % 100 == 0:
            print('epoch %d cost %f' % (i, cost)) 
    params = {            
        'W': W,            
        'b': b
    }        
    grads = {            
        'dW': dW,            
        'db': db
    } 
    return cost_list, params, grads

6 定義預測函式

def predict(X,params):
    y_pred = sigmoid(np.dot(X,params['W'])+params['b'])
    y_preds = [1 if y_pred[i]>0.5 else 0 for i in range(len(y_pred))] 
    return y_preds

7 生成資料

# 匯入matplotlib繪相簿
import matplotlib.pyplot as plt
# 匯入生成分類資料函式
from sklearn.datasets import make_classification
# 生成100*2的模擬二分類資料集
x ,label  = make_classification(
    n_samples=100,# 樣本個數
    n_classes=2,# 樣本類別
    n_features=2,#特徵個數
    n_redundant=0,#冗餘特徵個數（有效特徵的隨機組合）
    n_informative=2,#有效特徵，有價值特徵
    n_repeated=0, # 重複特徵個數（有效特徵和冗餘特徵的隨機組合）
    n_clusters_per_class=2 ,# 簇的個數
    random_state=1,
)
print("x.shape =",x.shape)
print("label.shape = ",label.shape)
print("np.unique(label) =",np.unique(label))
print(set(label))
# 設定隨機數種子
rng = np.random.RandomState(2)
# 對生成的特徵資料新增一組均勻分佈噪聲https://blog.csdn.net/vicdd/article/details/52667709
x += 2*rng.uniform(size=x.shape)
# 標籤類別數
unique_label  = set(label)
# 根據標籤類別數設定顏色
print(np.linspace(0,1,len(unique_label)))
colors = plt.cm.Spectral(np.linspace(0,1,len(unique_label)))
print(colors)
# 繪製模擬資料的散點圖
for k,col in zip(unique_label , colors):
    x_k=x[label==k]
    plt.plot(x_k[:,0],x_k[:,1],'o',markerfacecolor=col,markeredgecolor="k",
             markersize=14)
plt.title('Simulated binary data set')
plt.show();

結果：

x.shape = (100, 2)
label.shape =  (100,)
np.unique(label) = [0 1]
{0, 1}
[0. 1.]
[[0.61960784 0.00392157 0.25882353 1.        ]
 [0.36862745 0.30980392 0.63529412 1.        ]]

複習

# 複習
mylabel = label.reshape((-1,1))
data = np.concatenate((x,mylabel),axis=1)
print(data.shape)

結果：

(100, 3)

8 劃分資料集

offset = int(x.shape[0]*0.7)
x_train, y_train = x[:offset],label[:offset].reshape((-1,1)) 
x_test, y_test = x[offset:],label[offset:].reshape((-1,1)) 
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

結果：

(70, 2)
(70, 1)
(30, 2)
(30, 1)

9 訓練

cost_list, params, grads = logistic_train(x_train, y_train, 0.01, 1000)
print(params['b'])

結果：

epoch 0 cost 0.693147
epoch 100 cost 0.568743
epoch 200 cost 0.496925
epoch 300 cost 0.449932
epoch 400 cost 0.416618
epoch 500 cost 0.391660
epoch 600 cost 0.372186
epoch 700 cost 0.356509
epoch 800 cost 0.343574
epoch 900 cost 0.332689
-0.6646648941379839

10 準確率計算

from sklearn.metrics import accuracy_score,classification_report
y_pred = predict(x_test,params)
print("y_pred = ",y_pred)
print(y_pred)
print(y_test.shape)
print(accuracy_score(y_pred,y_test)) #不需要都是1維的，貌似會自動squeeze()
print(classification_report(y_test,y_pred))

結果：

y_pred =  [0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0]
[0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0]
(30, 1)
0.9333333333333333
              precision    recall  f1-score   support

           0       0.92      0.92      0.92        12
           1       0.94      0.94      0.94        18

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30

11 繪製邏輯迴歸決策邊界

### 繪製邏輯迴歸決策邊界
def plot_logistic(X_train, y_train, params):
    # 訓練樣本量
    n = X_train.shape[0]
    xcord1,ycord1,xcord2,ycord2 = [],[],[],[]
    # 獲取兩類座標點並存入列表
    for i in range(n):
        if y_train[i] == 1:
            xcord1.append(X_train[i][0])
            ycord1.append(X_train[i][1])
        else:
            xcord2.append(X_train[i][0])
            ycord2.append(X_train[i][1])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1,ycord1,s = 30,c = 'red')
    ax.scatter(xcord2,ycord2,s = 30,c = 'green')
    # 取值範圍
    x =np.arange(-1.5,3,0.1)
    # 決策邊界公式
    y = (-params['b'] - params['W'][0] * x) / params['W'][1]
    # 繪圖
    ax.plot(x, y)
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.show()
plot_logistic(x_train, y_train, params)

結果：

11 sklearn實現

from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(x_train,y_train)
y_pred = clf.predict(x_test)
print(y_pred)
accuracy_score(y_test,y_pred)

結果：

[0 0 1 1 1 1 0 0 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0]
0.9333333333333333

邏輯迴歸
2021-09-09
邏輯迴歸
Python邏輯迴歸
2020-02-29
Python邏輯迴歸
邏輯迴歸模型
2024-09-05
邏輯迴歸模型
線性迴歸與邏輯迴歸
2019-07-08
邏輯迴歸
邏輯迴歸演算法
2023-09-25
邏輯迴歸演算法
對比線性迴歸、邏輯迴歸和SVM
2018-08-13
邏輯迴歸
機器學習 | 線性迴歸與邏輯迴歸
2020-09-23
機器學習邏輯迴歸
邏輯迴歸損失函式
2018-04-10
邏輯迴歸函式
2.3 邏輯迴歸演算法
2019-12-31
邏輯迴歸演算法
4.邏輯迴歸（Logistic Regression）
2020-11-16
邏輯迴歸
Tensorflow教程（前三）——邏輯迴歸
2020-09-30
邏輯迴歸
機器學習：邏輯迴歸
2024-12-02
機器學習邏輯迴歸
手擼機器學習演算法 - 邏輯迴歸
2021-06-24
機器學習演算法邏輯迴歸
邏輯迴歸為什麼使用sigmod
2019-02-22
邏輯迴歸
機器學習之邏輯迴歸
2019-01-22
機器學習邏輯迴歸
機器學習整理（邏輯迴歸）
2022-03-23
機器學習邏輯迴歸
2.3邏輯迴歸損失函式
2020-12-16
邏輯迴歸函式
對數機率迴歸（邏輯迴歸）原理與Python實現
2021-01-10
邏輯迴歸Python
邏輯迴歸（Logistic Regression）原理及推導
2019-02-22
邏輯迴歸
從零開始學習邏輯迴歸
2018-11-23
邏輯迴歸
邏輯迴歸損失函式(cost function)
2018-04-10
邏輯迴歸函式Function
邏輯迴歸：使用Python的簡化方法
2018-09-24
邏輯迴歸Python
邏輯迴歸中的係數的意義
2018-11-27
邏輯迴歸
邏輯迴歸：損失函式與梯度下降
2018-04-10
邏輯迴歸函式梯度
實驗11-使用keras完成邏輯迴歸
2024-04-27
Keras邏輯迴歸
人工智慧-機器學習-邏輯迴歸
2024-05-15
人工智慧機器學習邏輯迴歸
COMP 330正則化邏輯迴歸分類
2024-04-02
邏輯迴歸
三、邏輯迴歸logistic regression——分類問題
2024-08-06
邏輯迴歸
邏輯迴歸演算法推理與實現
2022-06-07
邏輯迴歸演算法
分類演算法（1）-LR邏輯迴歸
2021-09-09
演算法邏輯迴歸
【機器學習基礎】邏輯迴歸——LogisticRegression
2021-10-20
機器學習邏輯迴歸
【小白學AI】線性迴歸與邏輯迴歸（似然引數估計）
2020-08-02
AI邏輯迴歸
ML-邏輯迴歸-Softmax-交叉熵（小航）
2019-02-27
邏輯迴歸熵
【機器學習】邏輯迴歸過程推導
2019-02-15
機器學習邏輯迴歸
機器學習之邏輯迴歸：計算概率
2020-06-21
機器學習邏輯迴歸
機器學習之邏輯迴歸：模型訓練
2020-06-27
機器學習邏輯迴歸模型
機器學習之使用Python完成邏輯迴歸
2019-07-10
機器學習Python邏輯迴歸
【6%】100小時機器學習——邏輯迴歸
2021-09-09
機器學習邏輯迴歸

chapter3——邏輯迴歸手動+sklean版本

1 匯入numpy包

2 sigmoid函式

3 定義邏輯迴歸模型主體

4 初始化函式

5 定義邏輯迴歸模型訓練過程

6 定義預測函式

7 生成資料

8 劃分資料集

9 訓練

10 準確率計算

11 繪製邏輯迴歸決策邊界

11 sklearn實現

相關文章