transformers、torch train demo

细雨微光發表於2024-04-16

透過 pytorch 訓練模型的邏輯:

import torch.nn as nn
import torch
import numpy
#from torch.utils.tensorboard import SummaryWriter
import time
vocabList = ["0","1","2","3","4","5","6","7","8","9"]

class TwoLayerNet(nn.Module):
    def __init__(self, dim_in, dim_hide_1,dim_hide_2, dim_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = nn.Linear(dim_in, dim_hide_1, bias=True)
        self.linear2 = nn.Linear(dim_hide_1, dim_hide_2, bias=True)
        # 最後輸出32維度

    def forward(self, x):
        y_predict = self.linear2(self.linear1(x).clamp(min=0))
        return y_predict

if __name__ == "__main__":
    #writer = SummaryWriter('log')
    N = 5       # batch size
    D_in = 10 # 輸入64 x 1000維
    H1 = 10      # 100個隱藏單元
    H2 = 15      # 100個隱藏單元
    D_out = 1   # 輸出100維

    # 建立訓練資料,這裡是對訓練資料進行隨機初始化
    x1_data = torch.randn(N, D_in)
    x2_data = torch.randn(N, D_in)
    y = torch.randn(N, 1)

    model = TwoLayerNet(D_in, H1, H2, D_out)    # 模型
    loss_fn = nn.MSELoss(reduction='sum')  # 損失函式
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # 最佳化器
    #writer.add_graph(model, input_to_model = torch.rand(5,10))
    #writer.close()
    for t in range(5):
        print("Start train : ---------- ",t)
        y_query = model(x1_data)     # 前向傳播
        y_title = model(x2_data)     # 前向傳播
        logits = torch.cosine_similarity(y_query,y_title)
        loss = loss_fn(logits, y)  # 計算損失
        loss.backward()            # 反向傳播
        optimizer.step()           # 更新權重
        print(t, loss.item())      # 列印
        optimizer.zero_grad()      # 把模型內引數的梯度清零
        for name, param in model.named_parameters():
            print(name)
            print(param)
            if param.grad is not None:
                print(param.grad)
                print(param.grad.shape)
                paramGradValue = [t.numpy() for t in param.grad]
                print(paramGradValue)

相關文章