torch--多層感知機實現影像分類

星空28發表於2024-11-04

原文網址 : https://www.cnblogs.com/jackchen28/p/18524920

方法一：

"""
多層感知機簡單實現Fashion-MNIST分類，從零開始實現
"""
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
from IPython import display
import matplotlib.pyplot as plt
from torch import nn


def get_fashion_mnist_labels(labels):
    """返回Fashion-MNIST資料集的文字標籤"""
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]


def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
    """繪製影像列表"""
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        if torch.is_tensor(img):
            # 圖片張量
            ax.imshow(img.numpy())
        else:
            # PIL圖片
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes


def get_dataloader_workers():
    """使用4個程序來讀取資料"""
    return 4


# 準備資料
def load_data_fashion_mnist(batch_size, resize=None):
    """下載Fashion-MNIST資料集，然後將其載入到記憶體中"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))


class Accumulator:
    """在n個變數上累加"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


class Animator:
    """在動畫中繪製資料"""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                 ylim=None, xscale='linear', yscale='linear',
                 fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
                 figsize=(3.5, 2.5), pic_name=None):
        # 增量地繪製多條線
        if legend is None:
            legend = []
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 使用lambda函式捕獲引數
        self.config_axes = lambda: d2l.set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts
        self.pic_name = pic_name

    def add(self, x, y):
        # 向圖表中新增多個資料點
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        plt.draw()
        plt.pause(0.1)
        plt.savefig(self.pic_name)
        display.display(self.fig)
        display.clear_output(wait=True)

    def show(self):
        display.display(self.fig)


# 訓練
def accuracy(y_hat, y):
    """計算預測正確的數量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


def evaluate_accuracy(net, data_iter):
    """計算在指定資料集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 將模型設定為評估模式
    metric = Accumulator(2)  # 正確預測數、預測總數
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


# 訓練
def train_epoch_ch3(net, train_iter, loss, updater):
    """訓練模型一個迭代週期（定義見第3章）"""
    # 將模型設定為訓練模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 訓練損失總和、訓練準確度總和、樣本數
    metric = Accumulator(3)
    for X, y in train_iter:
        # 計算梯度並更新引數
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch內建的最佳化器和損失函式
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定製的最佳化器和損失函式
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回訓練損失和訓練精度
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    """訓練模型（定義見第3章）"""
    animator = Animator(xlabel='epoch',
                        xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'],
                        pic_name="Simple_Multilayer_Fashion_mnist")
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss      # train_loss大於0.5報錯
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


def predict_ch3(net, test_iter, n=6):
    """預測標籤（定義見第3章）"""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles = ["True:" + true +'\n' + "Pred:" + pred for true, pred in zip(trues, preds)]
    d2l.show_images(
        X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])
    d2l.plt.show()


batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)     # 不進行resize操作, resize=64


# 網路模型
net = nn.Sequential(nn.Flatten(),
                    nn.Linear(784, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))


# 初始化模型引數
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)


net.apply(init_weights)


num_inputs, num_outputs, num_hiddens = 784, 10, 256
batch_size, lr, num_epochs = 256, 0.1, 10
# 損失函式
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)

train_iter, test_iter = load_data_fashion_mnist(batch_size)
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
predict_ch3(net, test_iter)

方法二：呼叫nn

"""
多層感知機實現Fashion-MNIST分類，從零開始實現
"""
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
from IPython import display
import matplotlib.pyplot as plt
from torch import nn


def get_fashion_mnist_labels(labels):
    """返回Fashion-MNIST資料集的文字標籤"""
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]


def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):  #@save
    """繪製影像列表"""
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        if torch.is_tensor(img):
            # 圖片張量
            ax.imshow(img.numpy())
        else:
            # PIL圖片
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes


def get_dataloader_workers():
    """使用4個程序來讀取資料"""
    return 4


# 準備資料
def load_data_fashion_mnist(batch_size, resize=None):
    """下載Fashion-MNIST資料集，然後將其載入到記憶體中"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))


batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)     # 不進行resize操作, resize=64


# 初始化模型引數
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))

params = [W1, b1, W2, b2]


# 啟用函式
def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)


# 網路模型
def net(X):
    X = X.reshape((-1, num_inputs))
    H = relu(X@W1 + b1)  # 這裡“@”代表矩陣乘法
    return (H@W2 + b2)


# 損失函式
loss = nn.CrossEntropyLoss(reduction='none')


class Accumulator:
    """在n個變數上累加"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


class Animator:
    """在動畫中繪製資料"""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                 ylim=None, xscale='linear', yscale='linear',
                 fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
                 figsize=(3.5, 2.5), pic_name=None):
        # 增量地繪製多條線
        if legend is None:
            legend = []
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 使用lambda函式捕獲引數
        self.config_axes = lambda: d2l.set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts
        self.pic_name = pic_name

    def add(self, x, y):
        # 向圖表中新增多個資料點
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        plt.draw()
        plt.pause(0.1)
        plt.savefig(self.pic_name)
        display.display(self.fig)
        display.clear_output(wait=True)

    def show(self):
        display.display(self.fig)


# 訓練
def accuracy(y_hat, y):
    """計算預測正確的數量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


def evaluate_accuracy(net, data_iter):
    """計算在指定資料集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 將模型設定為評估模式
    metric = Accumulator(2)  # 正確預測數、預測總數
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


# 訓練
def train_epoch_ch3(net, train_iter, loss, updater):
    """訓練模型一個迭代週期（定義見第3章）"""
    # 將模型設定為訓練模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 訓練損失總和、訓練準確度總和、樣本數
    metric = Accumulator(3)
    for X, y in train_iter:
        # 計算梯度並更新引數
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch內建的最佳化器和損失函式
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定製的最佳化器和損失函式
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回訓練損失和訓練精度
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    """訓練模型（定義見第3章）"""
    animator = Animator(xlabel='epoch',
                        xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'],
                        pic_name="Multilayer_Fashion_mnist")
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


def predict_ch3(net, test_iter, n=6):
    """預測標籤（定義見第3章）"""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles = ["True:" + true +'\n' + "Pred:" + pred for true, pred in zip(trues, preds)]
    d2l.show_images(
        X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])
    d2l.plt.show()


num_epochs, lr = 10, 0.1
updater = torch.optim.SGD(params, lr=lr)
train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
predict_ch3(net, test_iter)

Pytorch筆記之多層感知機實現MNIST資料集分類
2021-04-07
PyTorch筆記
分類演算法-多層感知機 Multi-layer Perceptron
2020-01-19
演算法
深度學習：多層感知機和異或問題(Pytorch實現)
2022-02-15
深度學習PyTorch
[深度學習]多層感知機（MLP）
2024-04-13
深度學習
「影像分類」實戰影像分類網路的視覺化
2019-09-04
視覺化
tf.keras實現線性迴歸和多層感知器
2024-05-31
Keras
感知機簡單實現
2019-10-12
Alink漫談(十五) ：多層感知機之迭代優化
2020-07-29
優化
基於Tensorflow + Opencv 實現CNN自定義影像分類
2021-09-22
OpenCVCNN
為什麼三層感知器能夠解決任意區域組合的分類問題(不同隱層數的感知器的分類能力)
2020-12-20
Alink漫談(十四) ：多層感知機之總體架構
2020-07-26
架構
TensorFlow2程式設計練習——多層感知機MLP
2021-08-09
程式設計
教程 | 用Scikit-Learn實現多類別文字分類
2018-05-14
文字分類
1. 從多層感知機到卷積神經網路
2019-02-13
卷積神經網路
使用PaddleFluid和TensorFlow實現影像分類網路SE_ResNeXt
2018-06-20
UI
輕鬆學Pytorch-使用ResNet50實現影像分類
2020-07-20
PyTorch
基於結構化平均感知機的分詞器Java實現
2019-01-14
分詞Java
如何透過Scikit-Learn實現多類別文字分類？
2018-03-05
文字分類
如何通過Scikit-Learn實現多類別文字分類？
2018-03-05
文字分類
利用orm 在業務程式碼無感知下，實現實現分庫分表
2021-06-09
ORM
演算法影像崗-影像分類與影像分割
2020-11-08
演算法
手把手教你使用LabVIEW OpenCV dnn實現影像分類（含原始碼）
2022-10-08
ViewOpenCVDNN原始碼
從單機定時到多層分發
2022-04-09
ArcGIS中的影像分類
2020-04-05
感知機演算法（PLA）程式碼實現
2020-07-22
演算法
基於PyTorch框架的多層全連線神經網路實現MNIST手寫數字分類
2024-04-13
PyTorch框架神經網路
Pytorch實現分類器
2023-04-17
PyTorch
目標檢測和影像分類及其相關計算機視覺的影像分佈
2018-10-08
計算機視覺
Promise的分層解析及實現
2019-02-28
Promise
Tensorflow 1.x 影像分類
2020-03-10
通用mapper和分類實現
2018-03-15
APP
【機器學習】--xgboost初始之程式碼實現分類
2018-06-18
機器學習
使用sklearn實現svm--用於機械故障分類
2020-11-06
Hibernate 泛型實現 dao 層的基類
2018-04-02
泛型
TF2.keras 實現基於卷積神經網路的影像分類模型
2020-03-11
TF2Keras卷積神經網路模型
棉花病害影像分類資料集
2024-05-22
水稻病害影像分類資料集
2024-05-22
概率分類之樸素貝葉斯分類（垃圾郵件分類python實現）
2020-10-05
Python

torch--多層感知機實現影像分類

相關文章