PyTorch入門-殘差卷積神經網路

tanyuyang發表於2023-04-18

利用PyTorch實現的深度學習解決MNIST資料集識別,並利用GPU訓練

深度學習網路一般分為4個部分:

  1. 資料集的準備和處理
  2. 定義網路模型
  3. 定義損失函式和最佳化器
  4. 訓練和測試
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 1 data
batch_size = 64  # 批處理的大小
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])  # 先把資料轉換為Tensor,再進行歸一化

train_dataset = datasets.MNIST(root='../dataset/mnist/', train=True, download=True, transform=transform)  # 下載資料集
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)  # 把資料集放到DataLoader中

test_dataset = datasets.MNIST(root='../dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# 2 model
# 把殘差塊定義成一個類,方便程式碼複用
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.channels = channels  # 通道數
        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding='same')  # 3*3的卷積核,padding=same表示卷積後的大小不變
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding='same')
        self.relu = nn.ReLU()

    def forward(self, x):
        y = self.relu(self.conv1(x))
        y = self.conv2(y)
        return self.relu(x + y)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)  # 1表示輸入的通道數,16表示輸出的通道數,kernel_size表示卷積核的大小
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
        self.res1 = ResidualBlock(16)  # 16表示輸入的通道數,殘差網路的輸入和輸出通道數相同
        self.res2 = ResidualBlock(32)
        self.relu = nn.ReLU()
        self.mp = nn.MaxPool2d(2)  # 2表示池化核的大小
        self.fc = nn.Linear(512, 10)

    def forward(self, x):
        in_size = x.size(0)
        x = self.mp(self.relu(self.conv1(x)))  # 先卷積,再啟用,再池化
        x = self.res1(x)
        x = self.mp(self.relu(self.conv2(x)))
        x = self.res2(x)
        x = x.view(in_size, -1)  # 把x拉成一維的向量,-1表示自適應
        x = self.fc(x)
        return x


model = Net()
device = torch.device("cuda:0"if torch.cuda.is_available() else "cpu")
model.to(device)  # 把模型放到GPU上


# 3 loss and optimizer
loss = nn.CrossEntropyLoss()  # 交叉熵損失函式
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)  # momentum表示動量,作用是加快收斂速度


# 4 training and testing
def train(t_epoch):
    running_loss = 0.0
    for batch_index, data in enumerate(train_loader, 0):  # enumerate函式可以把一個可遍歷的資料物件組合成一個索引序列,同時列出資料和資料下標,0表示從0開始計數
        inputs, outputs = data
        inputs, outputs = inputs.to(device), outputs.to(device)  # 把資料放到GPU上
        optimizer.zero_grad()

        # forward + backward + update
        y_hat = model(inputs)
        t_loss = loss(y_hat, outputs)
        t_loss = t_loss.to(device)  # 把損失函式放到GPU上
        t_loss.backward()
        optimizer.step()

        running_loss += t_loss.item()
        if batch_index % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (t_epoch + 1, batch_index + 1, running_loss / 300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)  # 把資料放到GPU上
            outputs = model(images)
            predicted = torch.argmax(outputs.data, dim=1)  # torch.argmax函式可以返回每一行中最大值的索引
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('[%d / %d]' % (correct, total))
    print('Accuracy on test set: %d %%' % (100 * correct / total))


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

    print('Program finished')
    print('Using device: ', device)
    print('Using GPU: ', torch.cuda.get_device_name())  # 檢視GPU的型號

相關文章