利用PyTorch實現的深度學習解決MNIST資料集識別,並利用GPU訓練
深度學習網路一般分為4個部分:
- 資料集的準備和處理
- 定義網路模型
- 定義損失函式和最佳化器
- 訓練和測試
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# 1 data
batch_size = 64 # 批處理的大小
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) # 先把資料轉換為Tensor,再進行歸一化
train_dataset = datasets.MNIST(root='../dataset/mnist/', train=True, download=True, transform=transform) # 下載資料集
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # 把資料集放到DataLoader中
test_dataset = datasets.MNIST(root='../dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 2 model
# 把殘差塊定義成一個類,方便程式碼複用
class ResidualBlock(nn.Module):
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.channels = channels # 通道數
self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding='same') # 3*3的卷積核,padding=same表示卷積後的大小不變
self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding='same')
self.relu = nn.ReLU()
def forward(self, x):
y = self.relu(self.conv1(x))
y = self.conv2(y)
return self.relu(x + y)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=5) # 1表示輸入的通道數,16表示輸出的通道數,kernel_size表示卷積核的大小
self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
self.res1 = ResidualBlock(16) # 16表示輸入的通道數,殘差網路的輸入和輸出通道數相同
self.res2 = ResidualBlock(32)
self.relu = nn.ReLU()
self.mp = nn.MaxPool2d(2) # 2表示池化核的大小
self.fc = nn.Linear(512, 10)
def forward(self, x):
in_size = x.size(0)
x = self.mp(self.relu(self.conv1(x))) # 先卷積,再啟用,再池化
x = self.res1(x)
x = self.mp(self.relu(self.conv2(x)))
x = self.res2(x)
x = x.view(in_size, -1) # 把x拉成一維的向量,-1表示自適應
x = self.fc(x)
return x
model = Net()
device = torch.device("cuda:0"if torch.cuda.is_available() else "cpu")
model.to(device) # 把模型放到GPU上
# 3 loss and optimizer
loss = nn.CrossEntropyLoss() # 交叉熵損失函式
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5) # momentum表示動量,作用是加快收斂速度
# 4 training and testing
def train(t_epoch):
running_loss = 0.0
for batch_index, data in enumerate(train_loader, 0): # enumerate函式可以把一個可遍歷的資料物件組合成一個索引序列,同時列出資料和資料下標,0表示從0開始計數
inputs, outputs = data
inputs, outputs = inputs.to(device), outputs.to(device) # 把資料放到GPU上
optimizer.zero_grad()
# forward + backward + update
y_hat = model(inputs)
t_loss = loss(y_hat, outputs)
t_loss = t_loss.to(device) # 把損失函式放到GPU上
t_loss.backward()
optimizer.step()
running_loss += t_loss.item()
if batch_index % 300 == 299:
print('[%d, %5d] loss: %.3f' % (t_epoch + 1, batch_index + 1, running_loss / 300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device) # 把資料放到GPU上
outputs = model(images)
predicted = torch.argmax(outputs.data, dim=1) # torch.argmax函式可以返回每一行中最大值的索引
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('[%d / %d]' % (correct, total))
print('Accuracy on test set: %d %%' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
print('Program finished')
print('Using device: ', device)
print('Using GPU: ', torch.cuda.get_device_name()) # 檢視GPU的型號