Pytorch MNIST Multi-layer

jerry173985發表於2020-12-04
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
%matplotlib inline
dataset = MNIST(root='data/', download=True, transform=ToTensor())
val_size = 10000
train_size = len(dataset) - val_size

train_ds, val_ds = random_split(dataset, [train_size, val_size])
len(train_ds), len(val_ds)

batch_size=128
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)
for images, _ in train_loader:
    print('images.shape:', images.shape)
    plt.figure(figsize=(16,8))
    plt.axis('off')
    aa=make_grid(images, nrow=16)
    print(aa.permute(1, 2, 0).shape)
    plt.imshow(make_grid(images, nrow=16).permute((1, 2, 0)))
    break
images.shape: torch.Size([128, 1, 28, 28])
torch.Size([242, 482, 3])
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class MnistModel(nn.Module):
    """Feedfoward neural network with 1 hidden layer"""
    def __init__(self, in_size, hidden_size, out_size):
        super().__init__()
        # hidden layer
        self.linear1 = nn.Linear(in_size, hidden_size)
        # output layer
        self.linear2 = nn.Linear(hidden_size, out_size)
        
    def forward(self, xb):
        # Flatten the image tensors
        xb = xb.view(xb.size(0), -1)
        # Get intermediate outputs using hidden layer
        out = self.linear1(xb)
        # Apply activation function
        out = F.relu(out)
        # Get predictions using output layer
        out = self.linear2(out)
        return out
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss, 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
        
        
input_size = 784
hidden_size = 32 # you can change this
num_classes = 10
model = MnistModel(input_size, hidden_size=32, out_size=num_classes)
for t in model.parameters():
    print(t.shape)
torch.Size([32, 784])
torch.Size([32])
torch.Size([10, 32])
torch.Size([10])
for images, labels in train_loader:
    outputs = model(images)
    loss = F.cross_entropy(outputs, labels)
    print('Loss:', loss.item())
    break
    

print('outputs.shape : ', outputs.shape)
print('Sample outputs :\n', outputs[:2].data)
Loss: 2.2856671810150146
outputs.shape :  torch.Size([128, 10])
Sample outputs :
 tensor([[ 0.0361,  0.0111,  0.0389, -0.2147, -0.0217, -0.1094, -0.0970,  0.0067,
          0.0022,  0.0643],
        [ 0.1817,  0.0952,  0.1207, -0.2252, -0.0403, -0.2785, -0.1168,  0.0983,
         -0.1024,  0.0505]])
torch.cuda.is_available()
True

Let’s define a helper function to ensure that our code uses the GPU if available, and defaults to using the CPU if it isn’t.

def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

device = get_default_device()

Next, let’s define a function that can move data and model to a chosen device.

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)
for images, labels in train_loader:
    print(images.shape)
    images = to_device(images, device)
    print(images.device)
    break
torch.Size([128, 1, 28, 28])
cuda:0

Finally, we define a DeviceDataLoaderclass to wrap our existing data loaders and move data to the selected device, as a batches are accessed. Interestingly, we don’t need to extend an existing class to create a PyTorch dataloader. All we need is an __iter__method to retrieve batches of data, and an __len__method to get the number of batches.

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)
for xb, yb in val_loader:
    print('xb.device:', xb.device)
    print('yb:', yb)
    break
xb.device: cuda:0
yb: tensor([2, 7, 9, 5, 0, 7, 9, 5, 5, 5, 8, 6, 0, 9, 1, 1, 5, 9, 3, 0, 8, 3, 7, 3,
        5, 7, 4, 6, 8, 4, 0, 7, 1, 5, 6, 1, 1, 7, 8, 7, 8, 3, 3, 1, 8, 8, 1, 4,
        6, 0, 2, 1, 0, 4, 2, 3, 9, 3, 7, 7, 6, 0, 6, 3, 8, 6, 7, 1, 5, 2, 9, 1,
        7, 6, 1, 1, 6, 7, 5, 8, 7, 6, 0, 8, 4, 8, 9, 6, 6, 9, 7, 6, 7, 0, 0, 5,
        1, 1, 6, 0, 2, 3, 8, 9, 7, 3, 8, 0, 3, 4, 8, 4, 7, 2, 0, 7, 5, 2, 1, 7,
        2, 2, 7, 7, 4, 8, 0, 7, 7, 0, 2, 2, 5, 6, 0, 6, 9, 5, 6, 6, 0, 8, 1, 0,
        8, 5, 6, 3, 9, 5, 5, 0, 3, 9, 4, 3, 9, 5, 5, 8, 9, 3, 2, 5, 8, 8, 7, 4,
        5, 0, 2, 5, 6, 0, 8, 1, 7, 5, 6, 3, 4, 3, 2, 7, 2, 1, 1, 6, 8, 4, 4, 9,
        5, 1, 4, 1, 8, 4, 8, 9, 3, 5, 3, 6, 5, 0, 2, 2, 2, 6, 1, 3, 8, 9, 0, 7,
        3, 3, 6, 1, 7, 3, 4, 0, 0, 0, 4, 4, 3, 9, 6, 4, 7, 4, 7, 0, 1, 8, 4, 8,
        3, 7, 2, 1, 5, 9, 0, 2, 8, 7, 4, 2, 0, 1, 6, 0], device='cuda:0')

Train the model

def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history
# Model (on GPU)
model = MnistModel(input_size, hidden_size=hidden_size, out_size=num_classes)
to_device(model, device)
MnistModel(
  (linear1): Linear(in_features=784, out_features=32, bias=True)
  (linear2): Linear(in_features=32, out_features=10, bias=True)
)
history = [evaluate(model, val_loader)]
history
[{'val_loss': 2.3016114234924316, 'val_acc': 0.12548828125}]
history += fit(5, 0.5, model, train_loader, val_loader)
Epoch [0], val_loss: 0.2720, val_acc: 0.9210
Epoch [1], val_loss: 0.2028, val_acc: 0.9426
Epoch [2], val_loss: 0.1622, val_acc: 0.9529
Epoch [3], val_loss: 0.1651, val_acc: 0.9527
Epoch [4], val_loss: 0.1627, val_acc: 0.9542
history += fit(5, 0.1, model, train_loader, val_loader)
Epoch [0], val_loss: 0.1421, val_acc: 0.9620
Epoch [1], val_loss: 0.1411, val_acc: 0.9620
Epoch [2], val_loss: 0.1409, val_acc: 0.9623
Epoch [3], val_loss: 0.1410, val_acc: 0.9616
Epoch [4], val_loss: 0.1382, val_acc: 0.9612
history += fit(500, 0.01, model, train_loader, val_loader)

import matplotlib.pyplot as plt
%matplotlib inline
losses = [x['val_loss'] for x in history]
plt.plot(losses, '-x')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Loss vs. No. of epochs');
accuracies = [x['val_acc'] for x in history]
plt.plot(accuracies, '-x')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Accuracy vs. No. of epochs');
!pip install jovian --upgrade -q
import jovian
jovian.commit(project='jupyter-notebook-04-feedforward-nn', environment=None)

相關文章