第二章 Pytorch基礎

denngamin發表於2024-04-13

2.1 Pytorch 張量

學習心得:

標量是0維張量
向量可以表示一維張量(軸0)
形狀(4,)
二維矩陣表示二維張量(上到下軸0,左到右軸1)
形狀(4,3)
三維維矩陣表示三維張量(上到下軸0,左到右軸1,外到內軸2)
形狀(4,3,2)

初始化張量

import torch
x = torch.tensor([[1,2]])
y = torch.tensor([[1],[2]])

print(x.shape)
# torch.Size([1,2]) # one entity of two items
print(y.shape)
# torch.Size([2,1]) # two entities of one item each

torch.zeros(3, 4)
# tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
torch.ones(3, 4)
# tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
torch.randint(low=0, high=10, size=(3,4))
# tensor([[8, 2, 5, 9],
        [6, 1, 6, 0],
        [5, 6, 9, 5]])
torch.rand(3, 4) # 0-1之間隨機
# tensor([[0.3196, 0.9387, 0.9268, 0.1246],
        [0.6700, 0.7529, 0.8687, 0.3948],
        [0.2279, 0.2309, 0.0151, 0.0339]])
torch.randn(3,4) # 服從正態分佈的隨機
# tensor([[-0.4039, -1.8015,  0.9784, -1.5263],
        [ 0.9577, -1.2826,  0.2746, -0.2621],
        [-1.4713,  0.6437,  0.3326, -1.0703]])

x = np.array([[10,20,30],[2,3,4]])
# np.ndarrary
y = torch.tensor(x)
# 將numpy轉換為張量
print(type(x), type(y))
# <class 'numpy.ndarray'> <class 'torch.Tensor'>

張量運算

x = torch.tensor([[1,2,3,4], [5,6,7,8]]) 
print(x * 10)
# tensor([[10, 20, 30, 40],
#         [50, 60, 70, 80]])

x = torch.tensor([[1,2,3,4], [5,6,7,8]]) 
y = x.add(10)
print(y)
#tensor([[11, 12, 13, 14],
#        [15, 16, 17, 18]])

重塑張量

y = torch.tensor([2, 3, 1, 0]) 
y = y.view(4,1)
y 

# tensor([[2],
    [3],
    [1],
    [0]])

#另一種重塑方法squeeze方法,只適合在某個軸上數值為1才行
x = torch.randn(10,1,10)# 三維張量軸0,軸1 軸2
z1 = torch.squeeze(x, 1) # 1表示軸為1
# z1 = x.squeeze(1)
z1.shape
# torch.Size([10, 10])

x = torch.randn(10,1,10)# 三維張量軸0,軸1 軸2
z1 = torch.unsqueeze(x, 0) # 1表示軸為1
# z1 = x.unsqueeze(0)
# torch.Size([1,10,10])

除了unsqueeze也可以用None見下

z2, z3, z4 = x[None,:,:], x[:,None,:], x[:,:,None]
# torch.Size([1, 10, 10]) 
torch.Size([10, 1, 10]) 
torch.Size([10, 10, 1])

張量的矩陣乘法

y = torch.tensor([2, 3, 1, 0])
x = torch.tensor([[1,2,3,4], [5,6,7,8]])
print(torch.matmul(x, y))
# 或者 print(x@y)

張量的連線

x = torch.randn(10,10,10)
z = torch.cat([x,x], axis=0) # np.concatenate()
print('Cat axis 0:', z.shape)
# torch.Size([20, 10, 10])

提取張量最大值

x = torch.arange(25).reshape(5,5)
print('Max:', x.shape, x.max())    
# Max: torch.Size([5, 5]) tensor(24)

x.max(dim=0)# 軸0上的最大值
# torch.return_types.max(
values=tensor([20, 21, 22, 23, 24]),
indices=tensor([4, 4, 4, 4, 4]))

x.max(dim=1)# 軸1上的最大值
# torch.return_types.max(
values=tensor([ 4,  9, 14, 19, 24]),
indices=tensor([4, 4, 4, 4, 4]))

置換張量維數(不要透過重塑張量來交換維數)

x = torch.randn(10,20,30)# (0,1,2)
z = x.permute(2,0,1) # np.permute()
print('Permute dimensions:',z.shape)
# torch.Size([30, 10, 20])

dir(torch.Tensor)

檢視張量方法

help(torch.Tensor.<method>)

對某個方法檢視如何使用

2.2 張量的自動梯度

x = torch.tensor([[2., -1.], [1., 1.]], requires_grad=True)
# requires_grad=True 引數指定為張量物件計算梯度
# tensor([[ 2., -1.],
    [ 1.,  1.]], requires_grad=True)
out = x.pow(2).sum()
out.backward()
# 計算out的梯度
x.grad
# 得到out 關於 x 的梯度
# tensor([[ 4., -2.],
          [ 2.,  2.]])

一般來說,在CPU上使用Torch張量運算仍然比Numpy要快
torch的GPU最快

2.3 Pytorch構建神經網路

import torch
x = [[1,2],[3,4],[5,6],[7,8]]
y = [[3],[7],[11],[15]]

# 轉換為浮點物件
X = torch.tensor(x).float()
Y = torch.tensor(y).float()
print(X,Y)

# 將資料註冊到GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
X = X.to(device)
Y = Y.to(device)

# 定義網路架構
import torch.nn as nn

# 對nn.Module的繼承是強制的因為是神經網路的基類
# 必須利用super().__init__()來確保繼承nn.Module就可以利用事先編寫好的功能
class MyNeuralNet(nn.Module):
def __init__(self):
    super().__init__()
    self.input_to_hidden_layer = nn.Linear(2,8)#等價於 nn.Parameter(torch.rand(2,8))
    # 具體為Linear(in_features = 2 , out_features = 8 , bias = True)
    self.hidden_layer_activation = nn.ReLU()
    self.hidden_to_output_layer = nn.Linear(8,1)# nn.Parameter(torch.rand(8,1))
# 必須使用forward作為方法名,因為Pytorch保留了,用其他的會報錯!!!
def forward(self, x):
    x = self.input_to_hidden_layer(x)
    #若 nn.Parameter(torch.rand(2,8)),則x = x @ self.input_to_hidden_layer(x)
    x = self.hidden_layer_activation(x)
    x = self.hidden_to_output_layer(x)
    #若 nn.Parameter(torch.rand(8,1)),則x = x @ self.hidden_to_output_layer(x)
    return x

# 建立例項並註冊到GPU上
mynet = MyNeuralNet().to(device)

# 獲取引數權重看一下
mynet.input_to_hidden_layer.weight

# 更詳細的看下引數
mynet.parameters()
for i in  mynet.parameters():
    print(i)

# 定義MSE損失
loss_func = nn.MSELoss()
# CrossEntropyLoss() # 多分類損失
# BCELoss # 二分類損失

# 計算損失
_Y = mynet(X)
loss_value = loss_func(_Y,Y)
# 在pytorch中約定先傳預測,再傳真實資料
print(loss_value)

# 最佳化器隨機梯度下降
from torch.optim import SGD
opt = SGD(mynet.parameters(), lr = 0.001)   

loss_history = []
for _ in range(50):
    opt.zero_grad()# 重新整理上一步計算的梯度
    loss_value = loss_func(mynet(X),Y)
    loss_value.backward()# 計算梯度
    opt.step()# 根據梯度更新權重
    loss_history.append(loss_value.item())

# 繪圖
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(loss_history)
plt.title('Loss variation over increasing epochs')
plt.xlabel('epochs')
plt.ylabel('loss value')

2.4 資料集、資料載入器和批大小

批大小:用於計算損失或更新權重的資料點的數量
在有數百萬資料點的情況下很有用,

class MyDataset(Dataset):
    def __init__(self,x,y):
        self.x = torch.tensor(x).float()
        self.y = torch.tensor(y).float()
    def __len__(self):
        return len(self.x)
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]
ds = MyDataset(X, Y)

# 從ds中獲取兩個資料點
dl = DataLoader(ds, batch_size=2, shuffle=True)

# 其餘沒變可套用上方程式碼
# 變化見下
import time
loss_history = []
start = time.time()
for _ in range(50):
    for data in dl:
        x, y = data
        opt.zero_grad()
        loss_value = loss_func(mynet(x),y)
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value)
end = time.time()
print(end - start)

預測資料點

val_x = [[10,11]]
val_xf = torch.tensor(val_x).float().to('device')
mynet(val_x)

自定義損失函式

def my_mean_squared_error(_y, y):
    loss = (_y-y)**2
    loss = loss.mean()
    return loss

my_mean_squared_error(mynet(X),Y)
# 和這個效果一樣 
loss_func = nn.MSELoss()
loss_value = loss_func(mynet(X),Y)
print(loss_value)

獲取中間層的值(獲取引數上面講了)

# 種子
torch.random.manual_seed(10)

# 法1
input_to_hidden = mynet.input_to_hidden_layer(X)
hidden_activation = mynet.hidden_layer_activation(input_to_hidden)
x = mynet.hidden_to_output_layer(hidden_activation)
x

# 法2 改下類的forward函式
class MyNeuralNet(nn.Module):
def __init__(self):
    super().__init__()
    self.input_to_hidden_layer = nn.Linear(2,8)
    self.hidden_layer_activation = nn.ReLU()
    self.hidden_to_output_layer = nn.Linear(8,1)
def forward(self, x):
    hidden1 = self.input_to_hidden_layer(x)
    hidden2 = self.hidden_layer_activation(hidden1)
    x = self.hidden_to_output_layer(hidden2)
    return x, hidden1

# 呼叫
_Y, _Y_hidden = mynet(X)

2.5 使用Sequential類構建神經網路

之前是透過定義一個類來構建神經網路,

model = nn.Sequential(
nn.Linear(2, 8),
nn.ReLU(),
nn.Linear(8, 1)
).to(device)

# 這個包可以輸出一個模型的摘要(總體架構)
!pip install torch_summary
from torchsummary import summary

# 輸入模型和模型大小
summary(model, torch.zeros(2,2))

# 其餘沒變化除了模型名稱
loss_func = nn.MSELoss()
from torch.optim import SGD
opt = SGD(model.parameters(), lr = 0.001)
import time
loss_history = []
start = time.time()
for _ in range(50):
    for ix, iy in dl:
        opt.zero_grad()
        loss_value = loss_func(model(ix),iy)
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value)
end = time.time()
print(end - start)

# 預測新資料
val = [[8,9],[10,11],[1.5,2.5]]
val = torch.tensor(val).float().to(device)
model(val)

2.6 儲存並載入Pytorch模型

儲存模型

save_path = 'mymodel.pth'
torch.save(model.state_dict(), save_path)
# torch.save(model.state_dict(), 'mymodel.pth')
# state指的是模型的當前快照
# model.state_dict()返回一個字典

# 注:一個良好的儲存做法是在呼叫torch.save()
之前將模型註冊到CPU中,有助於模型載入到任何機器上(助人為樂)
# 所以可以torch.save(model.to('cpu').state_dict(), save_path)

載入模型

# 載入模型前需要事先對模型進行權重初始化
model = nn.Sequential(
        nn.Linear(2, 8),
        nn.ReLU(),
        nn.Linear(8, 1)
).to(device)

# 然後再載入引數
state_dict = torch.load('mymodel.pth')
model.load_state_dict(state_dict)

# 預測資料
val = [[8,9],[10,11],[1.5,2.5]]
val = torch.tensor(val).float().to(device)
model(val)

相關文章