import torch
from d2l import torch as d2l
batch_size = 50
train_iter , test_iter = d2l.load_data_fashion_mnist(batch_size )
help(d2l.load_data_fashion_mnist)
Help on function load_data_fashion_mnist in module d2l.torch:
load_data_fashion_mnist(batch_size, resize=None)
Download the Fashion-MNIST dataset and then load it into memory.
Defined in :numref:`sec_fashion_mnist`
for X , Y in train_iter:
print(X.shape , Y.shape)
break
torch.Size([50, 1, 28, 28]) torch.Size([50])
input_dim = 28*28
output_dim = 10
W = torch.normal( 0,0.1 , (input_dim , output_dim) , requires_grad = True )
b = torch.zeros( (output_dim) , requires_grad = True )
x = torch.randn((1,28*28 ))
(x@W+b).shape
torch.Size([1, 10])
28*28
784
# 測試 torch.sum() 用法
x = torch.range(0,11).reshape(2,6) # 這裡是包含11
C:\Users\陳昌明\AppData\Local\Temp\ipykernel_4784\3677883941.py:2: UserWarning: torch.range is deprecated and will be removed in a future release because its behavior is inconsistent with Python's range builtin. Instead, use torch.arange, which produces values in [start, end).
x = torch.range(0,11).reshape(2,6) # 這裡是包含11
x
tensor([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.]])
torch.sum(x,dim = 0) , torch.sum(x,dim = 1)
(tensor([ 6., 8., 10., 12., 14., 16.]), tensor([15., 51.]))
torch.sum(x,dim=0).shape , torch.sum(x,dim=0,keepdim = True).shape
(torch.Size([6]), torch.Size([1, 6]))
dim: 要犧牲的維度的位置 從0開始 即:shape:(A,B,C) dim = 1 -> (A,C)
若加入了 keepdim=True 則(A,B,C) -> (A,1,C) 即 犧牲維度補一
# softmax
def softmax(x):
x_exp = torch.exp(x)
x_sum = torch.sum(x_exp , dim = 1 , keepdim = True)
# 注意這裡的 keepdim 或者再reshape也行
# x_sum = torch.sum(x_exp , dim = 1 ).reshape((len(x),1))
return x_exp/x_sum
x = torch.range(0,5).reshape((2,3))
C:\Users\陳昌明\AppData\Local\Temp\ipykernel_4784\4097049283.py:1: UserWarning: torch.range is deprecated and will be removed in a future release because its behavior is inconsistent with Python's range builtin. Instead, use torch.arange, which produces values in [start, end).
x = torch.range(0,5).reshape((2,3))
x
tensor([[0., 1., 2.],
[3., 4., 5.]])
softmax(x) ,torch.sum( softmax(x),dim = 1 )
(tensor([[0.0900, 0.2447, 0.6652],
[0.0900, 0.2447, 0.6652]]),
tensor([1., 1.]))
def net(X ):
return softmax( torch.matmul(X.reshape(-1,W.shape[0]) , W)+ b )
net( torch.randn( (2,784) ) ).shape
torch.Size([2, 10])
# loss function
def cross_entropy(y_hat , y):
return -torch.log(y_hat[ range( len(y_hat) ) , y ] )
text_x = torch.randn( (2,784) )
text_y = torch.tensor([1,2])
text_x.shape , text_y.shape
(torch.Size([2, 784]), torch.Size([2]))
cross_entropy( net(text_x) ,text_y )
tensor([8.5022, 5.7716], grad_fn=<NegBackward0>)
def accuracy(y_hat,y):
# print(y_hat.shape)
y_hat = y_hat.argmax(axis=1)#
return (y_hat.type(y.dtype)==y).sum()
accuracy( net( torch.randn( (2,784) ) ), text_y )/2
tensor(0.)
class Accumulator:
def __init__(self , n):
self.count = [0.0]*n
def add(self , *args):
self.count = [ a+float(b) for a,b in zip(self.count , args) ]
def reset(self):
self.count = [0.0]*len(self.count)
def __getitem__(self , index):
return self.count[index]
def evaluate_accracy(net , data_iter):
accu = Accumulator(2)
with torch.no_grad():
for X , y in data_iter:
y_hat = net(X)
acc = accuracy(y_hat , y)
accu.add(acc , len(X) )
return accu[0]/accu[1]
evaluate_accracy(net , test_iter )
0.083
def train_epoch(net , train_iter , loss , optimizer):
if isinstance(net , torch.nn.Module):
net.train()
accu = Accumulator(3)
for X , y in train_iter:
y_hat = net(X )
l = loss(y_hat , y)
if isinstance(optimizer , torch.optim.Optimizer):
optimizer.zero_grad()
l.mean().backward()
optimizer.step()
else:
l.sum().backward()
optimizer(X.shape[0])
acc = accuracy( y_hat , y )
accu.add(l.sum() , acc , len(y) )
return accu[0]/accu[2] , accu[1]/accu[2]
lr = 0.1
def updater(batch_size):
d2l.sgd([W,b] , lr , batch_size)
train_epoch(net , train_iter , cross_entropy , updater)
(0.6210742252667745, 0.7871166666666667)
def train_ch3(net , train_iter , test_iter ,loss , optimizer , num_epoch):
for i in range(num_epoch):
l , acc = train_epoch(net , train_iter , cross_entropy , optimizer )
test_acc = evaluate_accracy(net , test_iter)
print(f" train loss: {l:2f} , train acc: {acc:2f} , test acc: {test_acc:2f}")
train_ch3(net , train_iter , test_iter , cross_entropy, updater , 5)
train loss: 0.494678 , train acc: 0.829333 , test acc: 0.795800
train loss: 0.466205 , train acc: 0.839683 , test acc: 0.827200
train loss: 0.455145 , train acc: 0.843700 , test acc: 0.833100
train loss: 0.443355 , train acc: 0.847467 , test acc: 0.836400
train loss: 0.435866 , train acc: 0.848167 , test acc: 0.828300
- 畫圖相關的內容都略了, 後續專門整一個notebook 畫圖
重點函式
- torch.sum函式 注意dim 和keepdim 的用法 具體再本文前面有
- 注意這裡:
- x_sum = torch.sum(x_exp , dim = 1 , keepdim = True)
- x_sum = torch.sum(x_exp , dim = 1 ).reshape((len(x),1))
- softmax 中要保證維度匹配, 方便進行廣播 否則會出錯 下有示例
a1 = torch.randn((2,4))
a2 = torch.randn(2,1)
a3 = torch.randn(1,4)
a4 = torch.randn(2)
a5 = torch.randn(4)
(a1/a2).shape , (a1/a3).shape
(torch.Size([2, 4]), torch.Size([2, 4]))
(a1/a4).shape # error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[280], line 1
----> 1 (a1/a4).shape # error
RuntimeError: The size of tensor a (4) must match the size of tensor b (2) at non-singleton dimension 1
(a1/a5).shape
簡潔版
import torch
from d2l import torch as d2l
batch_size = 50
train_iter , test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
net = torch.nn.Sequential( torch.nn.Flatten() , torch.nn.Linear(784,10) )
def init_weight(p):
if type(p) == torch.nn.Linear:
torch.nn.init.normal_(p.weight ,std= 0.01)
net.apply(init_weight)
Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=10, bias=True)
)
loss = torch.nn.CrossEntropyLoss(reduction="none")
lr = 0.1
trainer = torch.optim.SGD(net.parameters() , lr)
help(d2l.train_ch3)
Help on function train_ch3 in module d2l.torch:
train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
Train a model (defined in Chapter 3).
Defined in :numref:`sec_softmax_scratch`
num_epoch =5
d2l.train_ch3(net,train_iter , test_iter , loss , num_epoch , trainer)
train_ch3(net,train_iter , test_iter , loss , trainer,num_epoch)
train loss: nan , train acc: 0.825700 , test acc: 0.798500
train loss: -4.093576 , train acc: 0.804967 , test acc: 0.783000
train loss: -4.349966 , train acc: 0.793533 , test acc: 0.773700
train loss: -4.517625 , train acc: 0.784267 , test acc: 0.766200
train loss: -4.642702 , train acc: 0.779450 , test acc: 0.760700