機器學習筆記(4):多類邏輯迴歸-使用gluton

菩提樹下的楊過發表於2017-12-13

接上一篇機器學習筆記(3):多類邏輯迴歸繼續,這次改用gluton來實現關鍵處理,原文見這裡 ,程式碼如下:

import matplotlib.pyplot as plt
import mxnet as mx
from mxnet import gluon
from mxnet import ndarray as nd
from mxnet import autograd

def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')

mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)

def show_images(images):
    n = images.shape[0]
    _, figs = plt.subplots(1, n, figsize=(15, 15))
    for i in range(n):
        figs[i].imshow(images[i].reshape((28, 28)).asnumpy())
        figs[i].axes.get_xaxis().set_visible(False)
        figs[i].axes.get_yaxis().set_visible(False)
    plt.show()

def get_text_labels(label):
    text_labels = [
        'T 恤', '長 褲', '套頭衫', '裙 子', '外 套',
        '涼 鞋', '襯 衣', '運動鞋', '包 包', '短 靴'
    ]
    return [text_labels[int(i)] for i in label]

data, label = mnist_train[0:10]

print('example shape: ', data.shape, 'label:', label)

show_images(data)

print(get_text_labels(label))

batch_size = 256

train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

num_inputs = 784
num_outputs = 10

W = nd.random_normal(shape=(num_inputs, num_outputs))
b = nd.random_normal(shape=num_outputs)
params = [W, b]

for param in params:
    param.attach_grad()

def accuracy(output, label):
    return nd.mean(output.argmax(axis=1) == label).asscalar()

def _get_batch(batch):
    if isinstance(batch, mx.io.DataBatch):
        data = batch.data[0]
        label = batch.label[0]
    else:
        data, label = batch
    return data, label

def evaluate_accuracy(data_iterator, net):
    acc = 0.
    if isinstance(data_iterator, mx.io.MXDataIter):
        data_iterator.reset()
    for i, batch in enumerate(data_iterator):
        data, label = _get_batch(batch)
        output = net(data)
        acc += accuracy(output, label)
    return acc / (i+1)

#使用gluon定義計算模型
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(10))
net.initialize()

#損失函式(使用交叉熵函式)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

#使用梯度下降法生成訓練器,並設定學習率為0.1
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            #計算損失
            loss = softmax_cross_entropy(output, label) 
        loss.backward()
        #使用sgd的trainer繼續向前"走一步"
        trainer.step(batch_size)
        
        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    test_acc = evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))

data, label = mnist_test[0:10]
show_images(data)
print('true labels')
print(get_text_labels(label))

predicted_labels = net(data).argmax(axis=1)
print('predicted labels')
print(get_text_labels(predicted_labels.asnumpy()))

相對上一版原始手動方法,使用gluon修改的地方都加了註釋,不多解釋。執行效果如下:

相對之前的版本可以發現,幾乎相同的引數,但是準確度有所提升,從0.7几上升到0.8幾,10個裡錯誤的預測數從4個下降到3個,說明gluon在一些細節上做了更好的優化。關於優化的細節,這裡有一些討論,供參考

相關文章