softmax-regression

Mr小明同学發表於2024-06-23

原文網址 : https://www.cnblogs.com/cndccm/p/18263366

import torch
from d2l import torch as d2l

batch_size = 50
train_iter , test_iter = d2l.load_data_fashion_mnist(batch_size )

help(d2l.load_data_fashion_mnist)

Help on function load_data_fashion_mnist in module d2l.torch:

load_data_fashion_mnist(batch_size, resize=None)
    Download the Fashion-MNIST dataset and then load it into memory.
    
    Defined in :numref:`sec_fashion_mnist`

for X , Y in train_iter:
    print(X.shape , Y.shape)
    break

torch.Size([50, 1, 28, 28]) torch.Size([50])

input_dim = 28*28 
output_dim = 10
W = torch.normal( 0,0.1 , (input_dim , output_dim) , requires_grad = True )
b = torch.zeros( (output_dim) , requires_grad = True )

x = torch.randn((1,28*28  ))
(x@W+b).shape

torch.Size([1, 10])

28*28

# 測試 torch.sum() 用法
x = torch.range(0,11).reshape(2,6)  # 這裡是包含11

C:\Users\陳昌明\AppData\Local\Temp\ipykernel_4784\3677883941.py:2: UserWarning: torch.range is deprecated and will be removed in a future release because its behavior is inconsistent with Python's range builtin. Instead, use torch.arange, which produces values in [start, end).
  x = torch.range(0,11).reshape(2,6)  # 這裡是包含11

tensor([[ 0.,  1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10., 11.]])

torch.sum(x,dim = 0) , torch.sum(x,dim = 1)

(tensor([ 6.,  8., 10., 12., 14., 16.]), tensor([15., 51.]))

torch.sum(x,dim=0).shape , torch.sum(x,dim=0,keepdim = True).shape

(torch.Size([6]), torch.Size([1, 6]))

dim: 要犧牲的維度的位置從0開始即：shape:(A,B,C) dim = 1 -> (A,C)
若加入了 keepdim=True 則(A,B,C) -> (A,1,C) 即犧牲維度補一

# softmax
def softmax(x):
    x_exp = torch.exp(x)
    
    x_sum = torch.sum(x_exp , dim = 1 , keepdim = True) 
    # 注意這裡的 keepdim 或者再reshape也行
#     x_sum = torch.sum(x_exp , dim = 1 ).reshape((len(x),1)) 
    
    return x_exp/x_sum

x = torch.range(0,5).reshape((2,3))

C:\Users\陳昌明\AppData\Local\Temp\ipykernel_4784\4097049283.py:1: UserWarning: torch.range is deprecated and will be removed in a future release because its behavior is inconsistent with Python's range builtin. Instead, use torch.arange, which produces values in [start, end).
  x = torch.range(0,5).reshape((2,3))

tensor([[0., 1., 2.],
        [3., 4., 5.]])

softmax(x) ,torch.sum( softmax(x),dim = 1 )

(tensor([[0.0900, 0.2447, 0.6652],
         [0.0900, 0.2447, 0.6652]]),
 tensor([1., 1.]))

def net(X ):
    return softmax( torch.matmul(X.reshape(-1,W.shape[0]) , W)+ b )

net( torch.randn( (2,784)  ) ).shape

torch.Size([2, 10])

# loss function

def cross_entropy(y_hat , y):
    return -torch.log(y_hat[ range( len(y_hat) ) , y ]  )

text_x = torch.randn( (2,784) )
text_y = torch.tensor([1,2])

text_x.shape , text_y.shape

(torch.Size([2, 784]), torch.Size([2]))

cross_entropy( net(text_x) ,text_y )

tensor([8.5022, 5.7716], grad_fn=<NegBackward0>)

def accuracy(y_hat,y):
#     print(y_hat.shape)
    y_hat = y_hat.argmax(axis=1)# 
    return (y_hat.type(y.dtype)==y).sum()

accuracy( net( torch.randn( (2,784)  ) ), text_y )/2

tensor(0.)

class Accumulator:
    def __init__(self , n):
        self.count = [0.0]*n
        
    def add(self , *args):
        self.count = [ a+float(b) for a,b in zip(self.count , args) ]
    
    def reset(self):
        self.count = [0.0]*len(self.count)
    def __getitem__(self , index):
        return self.count[index]

def evaluate_accracy(net , data_iter):
    accu = Accumulator(2)
    with torch.no_grad():
        for X , y in data_iter:
            y_hat = net(X)
            acc = accuracy(y_hat , y)
            accu.add(acc , len(X)  )
    return accu[0]/accu[1]

evaluate_accracy(net , test_iter )

0.083

def train_epoch(net , train_iter , loss , optimizer):
    if isinstance(net , torch.nn.Module):
        net.train()
    accu = Accumulator(3)
    for X , y in train_iter:
        y_hat = net(X )
        
        l = loss(y_hat , y)
        if isinstance(optimizer , torch.optim.Optimizer):
            optimizer.zero_grad()
            l.mean().backward()
            optimizer.step()
        else:
            l.sum().backward()
            optimizer(X.shape[0])
        
        acc = accuracy( y_hat , y )
        accu.add(l.sum() , acc , len(y) )
    return accu[0]/accu[2] , accu[1]/accu[2]

lr = 0.1
def updater(batch_size):
    d2l.sgd([W,b] , lr , batch_size)

train_epoch(net , train_iter , cross_entropy , updater)

(0.6210742252667745, 0.7871166666666667)

def train_ch3(net , train_iter , test_iter ,loss , optimizer , num_epoch):
    for i in  range(num_epoch):
        l , acc = train_epoch(net , train_iter , cross_entropy , optimizer )
        test_acc = evaluate_accracy(net , test_iter)
        print(f" train loss: {l:2f} , train acc: {acc:2f} , test acc: {test_acc:2f}")

train_ch3(net , train_iter , test_iter , cross_entropy, updater , 5)

 train loss: 0.494678 , train acc: 0.829333 , test acc: 0.795800
 train loss: 0.466205 , train acc: 0.839683 , test acc: 0.827200
 train loss: 0.455145 , train acc: 0.843700 , test acc: 0.833100
 train loss: 0.443355 , train acc: 0.847467 , test acc: 0.836400
 train loss: 0.435866 , train acc: 0.848167 , test acc: 0.828300

畫圖相關的內容都略了，後續專門整一個notebook 畫圖

重點函式

torch.sum函式注意dim 和keepdim 的用法具體再本文前面有
注意這裡：
- x_sum = torch.sum(x_exp , dim = 1 , keepdim = True)
- x_sum = torch.sum(x_exp , dim = 1 ).reshape((len(x),1))
- softmax 中要保證維度匹配，方便進行廣播否則會出錯下有示例

a1 = torch.randn((2,4)) 
a2 = torch.randn(2,1)
a3 = torch.randn(1,4)
a4 = torch.randn(2)
a5 = torch.randn(4)
(a1/a2).shape , (a1/a3).shape

(torch.Size([2, 4]), torch.Size([2, 4]))

(a1/a4).shape  # error

---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

Cell In[280], line 1
----> 1 (a1/a4).shape  # error


RuntimeError: The size of tensor a (4) must match the size of tensor b (2) at non-singleton dimension 1

(a1/a5).shape

簡潔版

import torch
from d2l import torch as d2l

batch_size = 50
train_iter  , test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

net = torch.nn.Sequential( torch.nn.Flatten() , torch.nn.Linear(784,10) )


def init_weight(p):
    if type(p) == torch.nn.Linear:
        torch.nn.init.normal_(p.weight ,std= 0.01)
net.apply(init_weight)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)

loss = torch.nn.CrossEntropyLoss(reduction="none")

lr = 0.1
trainer = torch.optim.SGD(net.parameters() , lr)

help(d2l.train_ch3)

Help on function train_ch3 in module d2l.torch:

train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
    Train a model (defined in Chapter 3).
    
    Defined in :numref:`sec_softmax_scratch`

num_epoch =5
d2l.train_ch3(net,train_iter , test_iter , loss , num_epoch , trainer)

train_ch3(net,train_iter , test_iter , loss , trainer,num_epoch)

 train loss: nan , train acc: 0.825700 , test acc: 0.798500
 train loss: -4.093576 , train acc: 0.804967 , test acc: 0.783000
 train loss: -4.349966 , train acc: 0.793533 , test acc: 0.773700
 train loss: -4.517625 , train acc: 0.784267 , test acc: 0.766200
 train loss: -4.642702 , train acc: 0.779450 , test acc: 0.760700