【機器學習】求解邏輯迴歸引數(三種方法程式碼實現)

Daycym發表於2018-06-08

簡單回顧

  1. 什麼是邏輯迴歸?
  2. 邏輯迴歸有哪些作用?
  3. 如何求解邏輯迴歸的引數?

連結:
邏輯迴歸模型
求解邏輯迴歸引數

資料準備

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(12)
num_observations = 5000

# 依據均值和協方差生成資料
# np.random.multivariate_normal方法用於根據實際情況生成一個多元正太分佈矩陣
x1 = np.random.multivariate_normal([0, 0], [[1, .75],[.75, 1]], num_observations)
x2 = np.random.multivariate_normal([1, 4], [[1, .75],[.75, 1]], num_observations)


simulated_separableish_features = np.vstack((x1, x2)).astype(np.float32)
simulated_labels = np.hstack((np.zeros(num_observations),np.ones(num_observations)))

plt.figure(figsize=(12,8))
plt.scatter(simulated_separableish_features[:, 0], simulated_separableish_features[:, 1],
            c = simulated_labels, alpha = .4)

這裡寫圖片描述

方法一:用sklearn自帶函式求解邏輯迴歸引數

# 1. 引用
from sklearn.linear_model import LogisticRegression

# 2. 調整引數
clf = LogisticRegression(fit_intercept=True, C = 1e15)

# 3. 擬合,訓練
clf.fit(simulated_separableish_features, simulated_labels)

# 4. 預測
##  這裡不用預測 

print(clf.coef_,clf.intercept_,)

這裡寫圖片描述

方法二:使用梯度下降法求解邏輯迴歸引數

# 邏輯迴歸
def sigmoid_eg(x1,x2, theta_1, theta_2,theta_3):                                                        
    z = (theta_1*x1+ theta_2*x2+theta_3).astype("float_")                                              
    return 1.0 / (1.0 + np.exp(-z)) 
    
def gradient_eg(x1,x2, y, theta_1, theta_2,theta_3):                                                         
    sigmoid_probs = sigmoid_eg(x1,x2,theta_1, theta_2,theta_3)                                        
    return 1/len(y)*np.sum((y - sigmoid_probs)*x1), 1/len(y)*np.sum((y - sigmoid_probs)*x2), 1/len(y)*np.sum((y - sigmoid_probs))       

def GradDe_eg(x1,x2,y,Max_Loop=20, alpha=0.1):
    #alpha = 0.00000001
    #Max_Loop = 200
    # 初始值
    theta_1 = 0.1
    theta_2 = -0.4
    theta_3 = 0.56
    for l in range(Max_Loop):
        delta1,delta2,delta3 = gradient_eg(x1,x2, y, theta_1, theta_2,theta_3)
        theta_1 = theta_1 + alpha*delta1
        theta_2 = theta_2 + alpha*delta2
        theta_3 = theta_3 + alpha*delta3
        if l%1000==0:
            print('delta%d ='%(l),[delta1,delta2,delta3])
            print('theta%d ='%(l),[theta_1, theta_2,theta_3],'\n')
    return [theta_1, theta_2,theta_3]

weights = GradDe_eg(simulated_separableish_features[:,0],simulated_separableish_features[:,1],simulated_labels,200000,0.9)

方法三:牛頓法

def sigmoid_eg(x1,x2, theta_1, theta_2,theta_3):                                                        
    z = (theta_1*x1+ theta_2*x2+theta_3).astype("float_")                                              
    return 1.0 / (1.0 + np.exp(-z)) 
    
def Cost(x1,x2, y, theta_1, theta_2,theta_3):                                                                
    sigmoid_probs = sigmoid_eg(x1,x2, theta_1, theta_2,theta_3)                                        
    return -np.mean(y * np.log(sigmoid_probs) + (1 - y) * np.log(1 - sigmoid_probs)) 
    
def gradient_nt(x1,x2, y, theta_1, theta_2,theta_3):                                                         
    sigmoid_probs = sigmoid_eg(x1,x2, theta_1, theta_2,theta_3)                                        
    return np.array([np.sum((y - sigmoid_probs) * x1),                        
                     np.sum((y - sigmoid_probs) * x2),
                     np.sum(y - sigmoid_probs)])  
# 三階海塞矩陣求解公式                     
def hessian(x1,x2, y, theta_1, theta_2,theta_3):                                                          
    sigmoid_probs = sigmoid_eg(x1,x2, theta_1,theta_2,theta_3)                                        
    d1 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 * x1)                  
    d2 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 * x2)                  
    d3 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 )                  
    d4 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 * x1)                  
    d5 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 * x2)                  
    d6 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 )                  
    d7 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 )                  
    d8 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 )                  
    d9 = np.sum((sigmoid_probs * (1 - sigmoid_probs)))                  
    H = np.array([[d1, d2,d3],[d4, d5,d6],[d7,d8,d9]])                                           
    return H 
    
theta_1 = 0.001                                                                     
theta_2 = -0.4 
theta_3 = 0.6
sigmoid_probs = sigmoid_eg(simulated_separableish_features[:,0],simulated_separableish_features[:,1],theta_1,theta_2,theta_3)                                        
sigmoid_probs

def newtons_method(x1,x2, y):                                                             
    # Initialize Cost & parameters                                                                   
    theta_1 = 0.001                                                                     
    theta_2 = -0.4 
    theta_3 = 0.6
    delta_l = np.Infinity                                                                
    l = Cost(x1,x2, y, theta_1, theta_2,theta_3)                                                                 
    # Convergence Conditions                                                        
    δ = .0000000001                                                                 
    max_iterations = 15                                                            
    i = 0                                                                           
    while abs(delta_l) > δ and i < max_iterations:                                       
        i += 1                                                                      
        g = gradient_nt(x1,x2, y, theta_1, theta_2,theta_3)                                                      
        hess = hessian(x1,x2, y, theta_1, theta_2,theta_3)                                                 
        H_inv = np.linalg.inv(hess)                                                 
        # @ is syntactic sugar for np.dot(H_inv, g.T)¹
        delta = H_inv @ g.T                                                             
        delta_theta_1 = delta[0]                                                             
        delta_theta_2 = delta[1]  
        delta_theta_3 = delta[2]
        print(theta_1,theta_2,theta_3,l,g)
                                                                                    
        # Perform our update step                                                    
        theta_1 += delta_theta_1                                                                 
        theta_2 += delta_theta_2   
        theta_3 += delta_theta_3
                                                                                    
        # Update the log-likelihood at each iteration                                     
        l_new = Cost(x1,x2, y, theta_1, theta_2,theta_3)                                                      
        delta_l = l - l_new                                                           
        l = l_new                                                                
    return np.array([theta_1, theta_2,theta_3])   
weights_nt = newtons_method(simulated_separableish_features[:,0],simulated_separableish_features[:,1],simulated_labels)

三種方法求得引數對比

這裡寫圖片描述

從上面結果可以看出來,三種方法求出的引數都差不多。如果想看看哪種求解比較快,可以適當調整程式碼。

注:
裡面的函式定義我未給出註釋,但都是由公式定義而來,可以根據這裡的公式,對照著看
文中難免會有錯誤,還望海涵,博主也在努力學習中。。。

相關文章