TensorFlow 實戰Google深度學習框架(第2版)第6章之LeNet-5模型實現MNIST數字識別

大笨牛@發表於2020-12-15

本篇為記錄樓主在學習《TensorFlow 實戰Google深度學習框架(第2版)》第6章 利用LeNet-5模型實現MNIST數字識別過程中遇到的一些問題的記錄、彙總。


mnist_train_cnn.py程式的輸入資料格式應做調整。主要有兩處:

  1. 這裡,在使用tf.placeholder時要指明張量第一維的長度,即shape裡第一位不能像之前一樣為None,否則會報錯“Failed to convert object of type <class ‘list’> to Tensor. Contents: [None, 3136]. Consider casting ”。
    x = tf.placeholder(tf.float32, 
                       [BATCH_SIZE, 
                        mnist_inference_cnn.IMAGE_SIZE, 
                        mnist_inference_cnn.IMAGE_SIZE, 
                        mnist_inference_cnn.NUM_CHANNELS],
                      name = "x-input")
    y_ = tf.placeholder(tf.float32, 
                        [BATCH_SIZE, 
                         mnist_inference_cnn.NUM_LABELS],name = "y-input")
  1. 這裡,不能使用tf.reshape來調整資料格式,而應該使用np.reshape,即numpy裡面的reshape命令,否則會報錯“typeError:The value of a feed cannot be a tf.Tensor object.Acceptable feed values include Python scalars,strings,lists.numpy ndarrays,or TensorHandles.For reference.the tensor object was Tensor…”。這裡的錯誤資訊解釋得很清楚。所以,我們需要用np.reshape,這樣結果就不是一個tensor。
reshaped_xs = np.reshape(xs,
                         [BATCH_SIZE, 
                         mnist_inference_cnn.IMAGE_SIZE, 
                         mnist_inference_cnn.IMAGE_SIZE, 
                         mnist_inference_cnn.NUM_CHANNELS])

下面給出了完整的訓練程式碼:

# _*_ coding:utf-8 _*_

import tensorflow as tf

IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10

# 第一層卷積層的尺寸和深度
CONV1_SIZE = 5
CONV1_DEEP = 32

# 第二層卷積層的尺寸和深度
CONV2_SIZE = 5
CONV2_DEEP = 64

# 全連線層的節點個數
FC_SIZE = 512

def inference(input_tensor,train, regularizer):
    
    # 第一層:卷積層。過濾器或核心的尺寸為5*5,深度為32,步長為1,使用全0填充
    # 輸入:BATCH_SIZE*28*28*1的張量
    # 輸出:BATCH_SIZE*28*28*32的張量
    with tf.variable_scope("layer1-conv1"):
        conv1_weight = tf.get_variable("weight",[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP], 
                                       initializer = tf.truncated_normal_initializer(stddev=0.1))
        conv1_bias = tf.get_variable("bias",[CONV1_DEEP], initializer = tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor, conv1_weight, strides = [1,1,1,1], padding="SAME")
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
       
    # 第二層:池化層。過濾器的尺寸為2*2,步長為2
    # 輸入:BATCH_SIZE*28828*32的張量
    # 輸出:BATCH_SIZE*14*14*32的張量
    with tf.variable_scope("layer2-pool1"):
        pool1 = tf.nn.max_pool(relu1, ksize=[1,2,2,1], strides = [1,2,2,1],padding="SAME")
        
    # 第三層:卷積層。過濾器或核心的尺寸為5*5,深度為64,步長為1,使用全0填充
    # 輸入:BATCH_SIZE*14*14*32的張量
    # 輸出:BATCH_SIZE*14*14*64的張量
    with tf.variable_scope("layer3-conv2"):
        conv2_weight = tf.get_variable("weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
                                      initializer = tf.truncated_normal_initializer(stddev=0.1))
        conv2_bias = tf.get_variable("bias", [CONV2_DEEP], initializer = tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_weight, strides = [1,1,1,1], padding="SAME")
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))   
        
    # 第四層:池化層。過濾器的尺寸為2,步長為2
    # 輸入:BATCH_SIZE*14*14*64的張量
    # 輸出:BATCH_SIZE*7*7*64的張量
    with tf.variable_scope("layer4-pool2"):
        pool2 = tf.nn.max_pool(relu2, ksize = [1,2,2,1], strides = [1,2,2,1], padding="SAME")
        
    # 將BATCH_SIZE*7*7*64的四維張量重新組織為BATCH_SIZE*3136的二維張量
    pool_shape = pool2.get_shape().as_list()
    nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
    reshaped = tf.reshape(pool2,[pool_shape[0],nodes])
    
    # 第五層:全連線層
    # 輸入:BATCH_SIZE*3136的張量
    # 輸出:BATCH_SIZE*512的張量
    with tf.variable_scope("layer5-cf1"):
        fc1_weight = tf.get_variable("weight", [nodes, FC_SIZE], 
                                     initializer = tf.truncated_normal_initializer(stddev=0.1))
        fc1_bias = tf.get_variable("bias",[FC_SIZE], initializer = tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weight)+fc1_bias)
        if regularizer != None:
            tf.add_to_collection("losses", regularizer(fc1_weight))
        if train: 
            fc1 = tf.nn.dropout(fc1,0.5)
        
    # 第六層:全連線層
    # 輸入:BATCH_SIZE*512的張量
    # 輸出:BATCH_SIZE*10的張量
    with tf.variable_scope("layer6-cf2"):
        fc2_weight = tf.get_variable("weight", [FC_SIZE, NUM_LABELS], 
                                     initializer = tf.truncated_normal_initializer(stddev=0.1))
        fc2_bias = tf.get_variable("bias", [NUM_LABELS], initializer = tf.constant_initializer(0.1))
        fc2 = tf.matmul(fc1, fc2_weight) + fc2_bias
        if regularizer != None:
            tf.add_to_collection("losses", regularizer(fc2_weight))
        
    return fc2
# _*_ coding: utf-8 _*_

import os
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

import mnist_inference_cnn

BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.001
MOVING_AVERAGE_DECAY = 0.99
TRAINING_STEPS = 30000

# 模型儲存的路徑和檔名
MODEL_SAVE_PATH = "/path" # 替換成自己的路徑
MODEL_NAME = "model.ckpt"

def train(mnist):
    
    x = tf.placeholder(tf.float32, 
                       [BATCH_SIZE, 
                        mnist_inference_cnn.IMAGE_SIZE, 
                        mnist_inference_cnn.IMAGE_SIZE, 
                        mnist_inference_cnn.NUM_CHANNELS],
                        name = "x-input")
    y_ = tf.placeholder(tf.float32, 
                        [BATCH_SIZE, 
                         mnist_inference_cnn.NUM_LABELS],name = "y-input")
    
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    
    # 前向傳播,使用dropout
    y = mnist_inference_cnn.inference(x, 1, regularizer)
    
    global_step = tf.Variable(0, trainable=False)
    
    # 計算cross-entropy和loss
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = y, labels = tf.argmax(y_,1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))                                                                
    
    # 設定滑動平均
    variables_average_op = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step).apply(tf.trainable_variables())
    
    # 反向傳播
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, 
                                               global_step, 
                                               mnist.train.num_examples / BATCH_SIZE, 
                                               LEARNING_RATE_DECAY, 
                                               staircase = True)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    train_op = tf.group(train_step, variables_average_op)
    
    saver = tf.train.Saver()
    with tf.Session() as sess:
        
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            reshaped_xs = np.reshape(xs,
                                     [BATCH_SIZE, 
                                      mnist_inference_cnn.IMAGE_SIZE, 
                                      mnist_inference_cnn.IMAGE_SIZE, 
                                      mnist_inference_cnn.NUM_CHANNELS])
            
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x:reshaped_xs, y_:ys})
            
            # 每1000輪儲存一次模型
            if i % 1000 == 0:            
                # 輸出當前的訓練情況
                print("%d 輪訓練後,訓練batch上的損失為%g" % (step, loss_value))
                
                # 儲存當前的模型
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step = global_step)
    

def main(argv=None):
    mnist = input_data.read_data_sets("/path", one_hot = True) # 替換成自己的路徑
    train(mnist)
    
if __name__ == "__main__":
    tf.app.run()


相關文章