接上一篇機器學習筆記(3):多類邏輯迴歸繼續,這次改用gluton來實現關鍵處理,原文見這裡 ,程式碼如下:
import matplotlib.pyplot as plt import mxnet as mx from mxnet import gluon from mxnet import ndarray as nd from mxnet import autograd def transform(data, label): return data.astype('float32')/255, label.astype('float32') mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform) mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform) def show_images(images): n = images.shape[0] _, figs = plt.subplots(1, n, figsize=(15, 15)) for i in range(n): figs[i].imshow(images[i].reshape((28, 28)).asnumpy()) figs[i].axes.get_xaxis().set_visible(False) figs[i].axes.get_yaxis().set_visible(False) plt.show() def get_text_labels(label): text_labels = [ 'T 恤', '長 褲', '套頭衫', '裙 子', '外 套', '涼 鞋', '襯 衣', '運動鞋', '包 包', '短 靴' ] return [text_labels[int(i)] for i in label] data, label = mnist_train[0:10] print('example shape: ', data.shape, 'label:', label) show_images(data) print(get_text_labels(label)) batch_size = 256 train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True) test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False) num_inputs = 784 num_outputs = 10 W = nd.random_normal(shape=(num_inputs, num_outputs)) b = nd.random_normal(shape=num_outputs) params = [W, b] for param in params: param.attach_grad() def accuracy(output, label): return nd.mean(output.argmax(axis=1) == label).asscalar() def _get_batch(batch): if isinstance(batch, mx.io.DataBatch): data = batch.data[0] label = batch.label[0] else: data, label = batch return data, label def evaluate_accuracy(data_iterator, net): acc = 0. if isinstance(data_iterator, mx.io.MXDataIter): data_iterator.reset() for i, batch in enumerate(data_iterator): data, label = _get_batch(batch) output = net(data) acc += accuracy(output, label) return acc / (i+1) #使用gluon定義計算模型 net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(10)) net.initialize() #損失函式(使用交叉熵函式) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() #使用梯度下降法生成訓練器,並設定學習率為0.1 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) for epoch in range(5): train_loss = 0. train_acc = 0. for data, label in train_data: with autograd.record(): output = net(data) #計算損失 loss = softmax_cross_entropy(output, label) loss.backward() #使用sgd的trainer繼續向前"走一步" trainer.step(batch_size) train_loss += nd.mean(loss).asscalar() train_acc += accuracy(output, label) test_acc = evaluate_accuracy(test_data, net) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % ( epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc)) data, label = mnist_test[0:10] show_images(data) print('true labels') print(get_text_labels(label)) predicted_labels = net(data).argmax(axis=1) print('predicted labels') print(get_text_labels(predicted_labels.asnumpy()))
相對上一版原始手動方法,使用gluon修改的地方都加了註釋,不多解釋。執行效果如下:
相對之前的版本可以發現,幾乎相同的引數,但是準確度有所提升,從0.7几上升到0.8幾,10個裡錯誤的預測數從4個下降到3個,說明gluon在一些細節上做了更好的優化。關於優化的細節,這裡有一些討論,供參考