透過 pytorch 訓練模型的邏輯:
import torch.nn as nn import torch import numpy #from torch.utils.tensorboard import SummaryWriter import time vocabList = ["0","1","2","3","4","5","6","7","8","9"] class TwoLayerNet(nn.Module): def __init__(self, dim_in, dim_hide_1,dim_hide_2, dim_out): super(TwoLayerNet, self).__init__() self.linear1 = nn.Linear(dim_in, dim_hide_1, bias=True) self.linear2 = nn.Linear(dim_hide_1, dim_hide_2, bias=True) # 最後輸出32維度 def forward(self, x): y_predict = self.linear2(self.linear1(x).clamp(min=0)) return y_predict if __name__ == "__main__": #writer = SummaryWriter('log') N = 5 # batch size D_in = 10 # 輸入64 x 1000維 H1 = 10 # 100個隱藏單元 H2 = 15 # 100個隱藏單元 D_out = 1 # 輸出100維 # 建立訓練資料,這裡是對訓練資料進行隨機初始化 x1_data = torch.randn(N, D_in) x2_data = torch.randn(N, D_in) y = torch.randn(N, 1) model = TwoLayerNet(D_in, H1, H2, D_out) # 模型 loss_fn = nn.MSELoss(reduction='sum') # 損失函式 optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # 最佳化器 #writer.add_graph(model, input_to_model = torch.rand(5,10)) #writer.close() for t in range(5): print("Start train : ---------- ",t) y_query = model(x1_data) # 前向傳播 y_title = model(x2_data) # 前向傳播 logits = torch.cosine_similarity(y_query,y_title) loss = loss_fn(logits, y) # 計算損失 loss.backward() # 反向傳播 optimizer.step() # 更新權重 print(t, loss.item()) # 列印 optimizer.zero_grad() # 把模型內引數的梯度清零 for name, param in model.named_parameters(): print(name) print(param) if param.grad is not None: print(param.grad) print(param.grad.shape) paramGradValue = [t.numpy() for t in param.grad] print(paramGradValue)