線性迴歸-程式碼庫

金字塔下的蜗牛發表於2024-08-27
import torch
import os
import numpy as np
import random 

def synthetic_data(w, b, num_examples):
    '''
    產生data
    '''
    features = torch.normal(0,1,(num_examples,len(w)))
    labels = torch.matmul(features, w) + b
    labels += torch.normal(0,0.001,labels.shape)  # 新增噪聲
    return features, labels

def data_iterator(batch_size,features, labels):
    '''
    資料生成器,生成批次資料
    '''
    n = len(labels)
    indices = np.arange(n)
    random.shuffle(indices)  # 資料shuffle
    
    for i in range(0,n,batch_size):
        indices_index = indices[i:min(i+batch_size, n)]  # 防止資料出界
        yield features[indices_index], labels[indices_index]

def linear_reg(X,w,b):
    '''
    定義模型輸出
    '''
    return torch.matmul(X, w) + b

def mse(y_hat, y_pred):
    '''
    評價標準
    '''
    return (y_hat - y_pred.reshape(y_hat.shape))**2 * 0.5 / len(y_hat) # y_hat與y_pred 維度一定要一致

def SGD(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= (lr * param.grad)
            param.grad.zero_()  # 梯度清0

def fit(num_epoch, features, labels, batch_size, lr, net, criterion, w, b):
    for i in range(num_epoch):
        for feature,label  in data_iterator(batch_size, features, labels):
            y_pred = net(feature, w,b)
            loss = criterion(y_pred, label)
            loss.sum().backward()
            SGD([w,b],lr, batch_size)

        with torch.no_grad():
            loss = criterion(net(features, w, b), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',w, 'b:= ',b)

# -----------------------------  1. 使用dataloader 載入資料-----------------------------
'''
總結: 如上為手動實現線性迴歸的完整過程。
接下來:沿著如下線索構建網路
  1. 使用dataloader載入資料 --> 使用 pytorch定義的MSE損失函式 --> 使用 系統自帶的最佳化器 --> 自定義網路 逐個部件將 其替換為由nn.module 構建的網路 
  2. 分支 
    (a). 構建網路 使用nn.parameter vs 不實用nn.parameter
'''

from torch import nn 
from torch.utils import data

def data_iter_sys(features, labels,v_batch_size, is_train=True):
    '''
    1. TensorDataset 可以用來對 tensor 進行打包,包裝成資料集,被 DataLoader 類使用
    2. * 表示解包操作
    3. DataLoader返回打散之後小批次資料 返回的是一個生成器
    '''
    dataset = data.TensorDataset(*(features, labels))
    return data.DataLoader(dataset,batch_size=v_batch_size, shuffle=is_train)

def fit_dataloader(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, w, b):
    for i in range(num_epoch):
        for feature,label  in dataloader:
            y_pred = net(feature, w,b)
            loss = criterion(y_pred, label)
            loss.sum().backward()
            SGD([w,b],lr, batch_size)

        with torch.no_grad():
            loss = criterion(net(features, w, b), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',w, 'b:= ',b)


def fit_optimizer(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, params, optimizer):
    for i in range(num_epoch):
        for feature,label  in dataloader:
            y_pred = net(feature, params[0], params[1])
            loss = criterion(y_pred, label)
            loss.sum().backward()

            optimizer.step()
            optimizer.zero_grad()
            
        with torch.no_grad():
            loss = criterion(net(features, params[0], params[1]), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',params[0], 'b:= ',params[1])

# -----------------------------  2. 自定義網路-----------------------------
class Liner_net(nn.Module):
    def __init__(self, dim):
        super(Liner_net, self).__init__()
        self.weight = torch.randn(dim, requires_grad=True)
        self.bias = torch.randn(1, requires_grad=True)
    
    def forward(self, X):
        return torch.matmul(X, self.weight)+self.bias

class Liner_net_1(nn.Module):
    def __init__(self, dim):
        super(Liner_net_1, self).__init__()
        self.weight = nn.Parameter(torch.randn(dim))
        self.bias = nn.Parameter(torch.randn(1))
    
    def forward(self, X):
        return torch.matmul(X, self.weight)+self.bias

class Liner_net_2(nn.Module):
    def __init__(self, dim):
        super(Liner_net_2, self).__init__()
        self.hidden = nn.Linear(in_features=dim, out_features=1,bias=True)
    
    def forward(self, X):
        return self.hidden(X)

def fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, params, optimizer):
    for i in range(num_epoch):
        for feature,label in dataloader:
            y_pred = net(feature).reshape(label.shape)  # 當 y_pred 是32*1,label是32 那麼此時求出的loss是不對的
            loss = criterion(y_pred, label)
            loss.sum().backward()

            optimizer.step()
            optimizer.zero_grad()
            
        with torch.no_grad():
            loss = criterion(net(features), labels).sum()
            print('i=',i,'loss:= ',loss, 'w:= ',params[0], 'b:= ',params[1])

# -----------------------------  全域性配置 ---------------------------- 
num_epoch = 100
num_examples = 2000
batch_size = 32
lr = 0.001
net = linear_reg
criterion = mse

if __name__=='__main__':
    # 0. 生成資料
    w_true = torch.tensor([3,-2,4,1], dtype=torch.float)
    b_true = torch.tensor([0.5])
    features, labels = synthetic_data( w_true, b_true, num_examples)    

    w = torch.randn(w_true.shape, requires_grad=True)
    b = torch.randn(1, requires_grad=True)
    # 1. 手動實現線性迴歸
    '''
    # 模型訓練
    fit(num_epoch, features, labels, batch_size, lr, net, criterion, w, b)
    print(10*'*','執行結果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    '''
    
    # 2. 使用dataloader 替換自定義資料生成器
    '''
    print(10*'*','2. 使用自定義資料生成器', 10*'*')
    num_epoch = 50
    dataloader = data_iter_sys(features, labels, batch_size)
    fit_dataloader(num_epoch,dataloader,features, labels, batch_size, lr, net, criterion, w, b)
    print(10*'*','執行結果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    ''' 
    # 3. 使用自定義損失函式
    ''' 
    print(10*'*','3. 使用自定義損失函式', 10*'*')
    criterion = nn.MSELoss()
    fit(num_epoch, features, labels, batch_size, lr, net, criterion, w, b)
    print(10*'*','執行結果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    '''
    # 4. 自定義最佳化器
    ''' 
    print(10*'*','4. 自定義最佳化器', 10*'*')
    w = torch.randn(w_true.shape, requires_grad=True)
    b = torch.randn(1, requires_grad=True)
    print([w,b])
    optimizer = torch.optim.SGD([w,b],lr=lr) 
    criterion = mse  # or criterion = nn.MSELoss() 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_optimizer(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, [w,b], optimizer)
    print(10*'*','執行結果',10*'*')
    print('[w_true,w]',[w_true, w])
    print('[b_true,b]',[b_true, b])
    '''
    
    # 5. 自定義網路
    ''' 
    print(10*'*','5. 自定義最佳化器-不使用parameter or parameter', 10*'*')
    criterion = nn.MSELoss()   # or criterion = nn.MSELoss() 
    num_epoch = 100
    #net = Liner_net(w.shape[0])   # 使用tensor定義net
    net = Liner_net_1(w.shape[0])  # 使用parameter定義net
    optimizer = torch.optim.SGD([net.weight,net.bias],lr=lr) 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, [net.weight,net.bias], optimizer)
    print(10*'*','執行結果',10*'*')
    print('[w_true,w]',[w_true, net.weight.data])
    print('[b_true,b]',[b_true, net.bias.data])
    '''
   # 6. 使用nn.linear 
    ''' 
    print(10*'*','6. 自定義網路 使用Sequential ', 10*'*')
    criterion = nn.MSELoss()   # or criterion = nn.MSELoss() 
    num_epoch = 100
    net = nn.Sequential(nn.Linear(in_features=w.shape[0], out_features=1,bias=True))
    net[0].weight.data = torch.randn(w.shape[0], 1, dtype=torch.float).T # 一定要定義成矩陣而不是向量
    net[0].bias.data = torch.randn(1)
    optimizer = torch.optim.SGD([net[0].weight,net[0].bias],lr=lr) 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, [net[0].weight,net[0].bias], optimizer)
    print(10*'*','執行結果',10*'*')
    print('[w_true,w]',[w_true, net[0].weight.data])
    print('[b_true,b]',[b_true, net[0].bias.data])
    '''
   # 7. 自定義網路 使用nn.linear 
    print(10*'*','7. 自定義網路 使用nn.linear', 10*'*')
    criterion = nn.MSELoss()   # or criterion = nn.MSELoss() 
    num_epoch = 100
    net = Liner_net_2(w.shape[0])
    params = [net.hidden.weight,net.hidden.bias]
    optimizer = torch.optim.SGD(params,lr=lr) 
    dataloader = data_iter_sys(features, labels, batch_size) # 不能使用 dataloader = data_iterator(batch_size,features, labels) why?
    fit_net_selfdefine(num_epoch, dataloader, features, labels, batch_size, lr, net, criterion, params, optimizer)
    print(10*'*','執行結果',10*'*')
    print('[w_true,w]',[w_true, params[0].data])
    print('[b_true,b]',[b_true, params[1].data])
    ```

相關文章