softmax-regression

Mr小明同学發表於2024-06-23
import torch
from d2l import torch as d2l


batch_size = 50
train_iter , test_iter = d2l.load_data_fashion_mnist(batch_size )
help(d2l.load_data_fashion_mnist)
Help on function load_data_fashion_mnist in module d2l.torch:

load_data_fashion_mnist(batch_size, resize=None)
    Download the Fashion-MNIST dataset and then load it into memory.
    
    Defined in :numref:`sec_fashion_mnist`

for X , Y in train_iter:
    print(X.shape , Y.shape)
    break
torch.Size([50, 1, 28, 28]) torch.Size([50])
input_dim = 28*28 
output_dim = 10
W = torch.normal( 0,0.1 , (input_dim , output_dim) , requires_grad = True )
b = torch.zeros( (output_dim) , requires_grad = True )
x = torch.randn((1,28*28  ))
(x@W+b).shape
torch.Size([1, 10])
28*28
784
# 測試 torch.sum() 用法
x = torch.range(0,11).reshape(2,6)  # 這裡是包含11
C:\Users\陳昌明\AppData\Local\Temp\ipykernel_4784\3677883941.py:2: UserWarning: torch.range is deprecated and will be removed in a future release because its behavior is inconsistent with Python's range builtin. Instead, use torch.arange, which produces values in [start, end).
  x = torch.range(0,11).reshape(2,6)  # 這裡是包含11
x
tensor([[ 0.,  1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10., 11.]])
torch.sum(x,dim = 0) , torch.sum(x,dim = 1) 
(tensor([ 6.,  8., 10., 12., 14., 16.]), tensor([15., 51.]))
torch.sum(x,dim=0).shape , torch.sum(x,dim=0,keepdim = True).shape
(torch.Size([6]), torch.Size([1, 6]))

dim: 要犧牲的維度的位置 從0開始 即:shape:(A,B,C) dim = 1 -> (A,C)
若加入了 keepdim=True 則(A,B,C) -> (A,1,C) 即 犧牲維度補一

# softmax
def softmax(x):
    x_exp = torch.exp(x)
    
    x_sum = torch.sum(x_exp , dim = 1 , keepdim = True) 
    # 注意這裡的 keepdim 或者再reshape也行
#     x_sum = torch.sum(x_exp , dim = 1 ).reshape((len(x),1)) 
    
    return x_exp/x_sum
    
x = torch.range(0,5).reshape((2,3))
C:\Users\陳昌明\AppData\Local\Temp\ipykernel_4784\4097049283.py:1: UserWarning: torch.range is deprecated and will be removed in a future release because its behavior is inconsistent with Python's range builtin. Instead, use torch.arange, which produces values in [start, end).
  x = torch.range(0,5).reshape((2,3))
x
tensor([[0., 1., 2.],
        [3., 4., 5.]])
softmax(x) ,torch.sum( softmax(x),dim = 1 )
(tensor([[0.0900, 0.2447, 0.6652],
         [0.0900, 0.2447, 0.6652]]),
 tensor([1., 1.]))
def net(X ):
    return softmax( torch.matmul(X.reshape(-1,W.shape[0]) , W)+ b )
net( torch.randn( (2,784)  ) ).shape
torch.Size([2, 10])
# loss function

def cross_entropy(y_hat , y):
    return -torch.log(y_hat[ range( len(y_hat) ) , y ]  )
text_x = torch.randn( (2,784) )
text_y = torch.tensor([1,2])
text_x.shape , text_y.shape
(torch.Size([2, 784]), torch.Size([2]))
cross_entropy( net(text_x) ,text_y )
tensor([8.5022, 5.7716], grad_fn=<NegBackward0>)
def accuracy(y_hat,y):
#     print(y_hat.shape)
    y_hat = y_hat.argmax(axis=1)# 
    return (y_hat.type(y.dtype)==y).sum()
accuracy( net( torch.randn( (2,784)  ) ), text_y )/2
tensor(0.)
class Accumulator:
    def __init__(self , n):
        self.count = [0.0]*n
        
    def add(self , *args):
        self.count = [ a+float(b) for a,b in zip(self.count , args) ]
    
    def reset(self):
        self.count = [0.0]*len(self.count)
    def __getitem__(self , index):
        return self.count[index]
def evaluate_accracy(net , data_iter):
    accu = Accumulator(2)
    with torch.no_grad():
        for X , y in data_iter:
            y_hat = net(X)
            acc = accuracy(y_hat , y)
            accu.add(acc , len(X)  )
    return accu[0]/accu[1]
evaluate_accracy(net , test_iter )
0.083
def train_epoch(net , train_iter , loss , optimizer):
    if isinstance(net , torch.nn.Module):
        net.train()
    accu = Accumulator(3)
    for X , y in train_iter:
        y_hat = net(X )
        
        l = loss(y_hat , y)
        if isinstance(optimizer , torch.optim.Optimizer):
            optimizer.zero_grad()
            l.mean().backward()
            optimizer.step()
        else:
            l.sum().backward()
            optimizer(X.shape[0])
        
        acc = accuracy( y_hat , y )
        accu.add(l.sum() , acc , len(y) )
    return accu[0]/accu[2] , accu[1]/accu[2]
lr = 0.1
def updater(batch_size):
    d2l.sgd([W,b] , lr , batch_size)
train_epoch(net , train_iter , cross_entropy , updater)
(0.6210742252667745, 0.7871166666666667)
def train_ch3(net , train_iter , test_iter ,loss , optimizer , num_epoch):
    for i in  range(num_epoch):
        l , acc = train_epoch(net , train_iter , cross_entropy , optimizer )
        test_acc = evaluate_accracy(net , test_iter)
        print(f" train loss: {l:2f} , train acc: {acc:2f} , test acc: {test_acc:2f}")
train_ch3(net , train_iter , test_iter , cross_entropy, updater , 5)
 train loss: 0.494678 , train acc: 0.829333 , test acc: 0.795800
 train loss: 0.466205 , train acc: 0.839683 , test acc: 0.827200
 train loss: 0.455145 , train acc: 0.843700 , test acc: 0.833100
 train loss: 0.443355 , train acc: 0.847467 , test acc: 0.836400
 train loss: 0.435866 , train acc: 0.848167 , test acc: 0.828300
  • 畫圖相關的內容都略了, 後續專門整一個notebook 畫圖

重點函式

  • torch.sum函式 注意dim 和keepdim 的用法 具體再本文前面有
  • 注意這裡:
    • x_sum = torch.sum(x_exp , dim = 1 , keepdim = True)
    • x_sum = torch.sum(x_exp , dim = 1 ).reshape((len(x),1))
    • softmax 中要保證維度匹配, 方便進行廣播 否則會出錯 下有示例
a1 = torch.randn((2,4)) 
a2 = torch.randn(2,1)
a3 = torch.randn(1,4)
a4 = torch.randn(2)
a5 = torch.randn(4)
(a1/a2).shape , (a1/a3).shape
(torch.Size([2, 4]), torch.Size([2, 4]))
(a1/a4).shape  # error
---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

Cell In[280], line 1
----> 1 (a1/a4).shape  # error


RuntimeError: The size of tensor a (4) must match the size of tensor b (2) at non-singleton dimension 1
(a1/a5).shape 

簡潔版

import torch
from d2l import torch as d2l


batch_size = 50
train_iter  , test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
net = torch.nn.Sequential( torch.nn.Flatten() , torch.nn.Linear(784,10) )


def init_weight(p):
    if type(p) == torch.nn.Linear:
        torch.nn.init.normal_(p.weight ,std= 0.01)
net.apply(init_weight)
Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)
loss = torch.nn.CrossEntropyLoss(reduction="none")

lr = 0.1
trainer = torch.optim.SGD(net.parameters() , lr)
help(d2l.train_ch3)
Help on function train_ch3 in module d2l.torch:

train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
    Train a model (defined in Chapter 3).
    
    Defined in :numref:`sec_softmax_scratch`

num_epoch =5
d2l.train_ch3(net,train_iter , test_iter , loss , num_epoch , trainer)

train_ch3(net,train_iter , test_iter , loss , trainer,num_epoch)
 train loss: nan , train acc: 0.825700 , test acc: 0.798500
 train loss: -4.093576 , train acc: 0.804967 , test acc: 0.783000
 train loss: -4.349966 , train acc: 0.793533 , test acc: 0.773700
 train loss: -4.517625 , train acc: 0.784267 , test acc: 0.766200
 train loss: -4.642702 , train acc: 0.779450 , test acc: 0.760700