TensorFlow搭建神經網路最佳實踐樣例

marsjhao發表於2020-04-06

 

一、TensorFlow完整樣例

 

在MNIST資料集上,搭建一個簡單神經網路結構,一個包含ReLU單元的非線性化處理的兩層神經網路。在訓練神經網路的時候,使用帶指數衰減的學習率設定、使用正則化來避免過擬合、使用滑動平均模型來使得最終的模型更加健壯。

程式將計算神經網路前向傳播的部分單獨定義一個函式inference,訓練部分定義一個train函式,再定義一個主函式main。

完整程式:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May 25 08:56:30 2017

@author: marsjhao
"""

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

INPUT_NODE = 784 # 輸入節點數
OUTPUT_NODE = 10 # 輸出節點數
LAYER1_NODE = 500 # 隱含層節點數
BATCH_SIZE = 100
LEARNING_RETE_BASE = 0.8 # 基學習率
LEARNING_RETE_DECAY = 0.99 # 學習率的衰減率
REGULARIZATION_RATE = 0.0001 # 正則化項的權重係數
TRAINING_STEPS = 10000 # 迭代訓練次數
MOVING_AVERAGE_DECAY = 0.99 # 滑動平均的衰減係數

# 傳入神經網路的權重和偏置,計算神經網路前向傳播的結果
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 判斷是否傳入ExponentialMovingAverage類物件
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1))
                                      + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2))\
                         + avg_class.average(biases2)

# 神經網路模型的訓練過程
def train(mnist):
    x = tf.placeholder(tf.float32, [None,INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')

    # 定義神經網路結構的引數
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE],
                                               stddev=0.1))
    biases1  = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE],
                                               stddev=0.1))
    biases2  = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 計算非滑動平均模型下的引數的前向傳播的結果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    global_step = tf.Variable(0, trainable=False) # 定義儲存當前迭代訓練輪數的變數

    # 定義ExponentialMovingAverage類物件
    variable_averages = tf.train.ExponentialMovingAverage(
                        MOVING_AVERAGE_DECAY, global_step) # 傳入當前迭代輪數引數
    # 定義對所有可訓練變數trainable_variables進行更新滑動平均值的操作op
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # 計算滑動模型下的引數的前向傳播的結果
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)

    # 定義交叉熵損失值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    # 定義L2正則化器並對weights1和weights2正則化
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization # 總損失值

    # 定義指數衰減學習率
    learning_rate = tf.train.exponential_decay(LEARNING_RETE_BASE, global_step,
                    mnist.train.num_examples / BATCH_SIZE, LEARNING_RETE_DECAY)
    # 定義梯度下降操作op,global_step引數可實現自加1運算
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
                         .minimize(loss, global_step=global_step)
    # 組合兩個操作op
    train_op = tf.group(train_step, variables_averages_op)
    '''
    # 與tf.group()等價的語句
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')
    '''
    # 定義準確率
    # 在最終預測的時候,神經網路的輸出採用的是經過滑動平均的前向傳播計算結果
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 初始化回話sess並開始迭代訓練
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # 驗證集待喂入資料
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        # 測試集待喂入資料
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print('After %d training steps, validation accuracy'
                      ' using average model is %f' % (i, validate_acc))
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_:ys})

        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print('After %d training steps, test accuracy'
              ' using average model is %f' % (TRAINING_STEPS, test_acc))

# 主函式
def main(argv=None):
    mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
    train(mnist)

# 當前的python檔案是shell檔案執行的入口檔案,而非當做import的python module。
if __name__ == '__main__': # 在模組內部執行
    tf.app.run() # 呼叫main函式並傳入所需的引數list

二、分析與改進設計

1. 程式分析改進

第一,計算前向傳播的函式inference中需要將所有的變數以引數的形式傳入函式,當神經網路結構變得更加複雜、引數更多的時候,程式的可讀性將變得非常差。

第二,在程式退出時,訓練好的模型就無法再利用,且大型神經網路的訓練時間都比較長,在訓練過程中需要每隔一段時間儲存一次模型訓練的中間結果,這樣如果在訓練過程中程式當機,當機前的最新的模型引數仍能保留,杜絕了時間和資源的浪費。

第三,將訓練和測試分成兩個獨立的程式,將訓練和測試都會用到的前向傳播的過程抽象成單獨的庫函式。這樣就保證了在訓練和預測兩個過程中所呼叫的前向傳播計算程式是一致的。

2. 改進後程式設計

mnist_inference.py

該檔案中定義了神經網路的前向傳播過程,其中的多次用到的weights定義過程又單獨定義成函式。

通過tf.get_variable函式來獲取變數,在神經網路訓練時建立這些變數,在測試時會通過儲存的模型載入這些變數的取值,而且可以在變數載入時將滑動平均值重新命名。所以可以直接通過同樣的名字在訓練時使用變數自身,在測試時使用變數的滑動平均值。

mnist_train.py

該程式給出了神經網路的完整訓練過程。

mnist_eval.py

在滑動平均模型上做測試。

通過tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH)獲取最新模型的檔名,實際是獲取checkpoint檔案的所有內容。

三、TensorFlow最佳實踐樣例

 

mnist_inference.py 

 

import tensorflow as tf

INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500

def get_weight_variable(shape, regularizer):
    weights = tf.get_variable("weights", shape,
                 initializer=tf.truncated_normal_initializer(stddev=0.1))
    if regularizer != None:
        # 將權重引數的正則化項加入至損失集合
        tf.add_to_collection('losses', regularizer(weights))
    return weights

def inference(input_tensor, regularizer):
    with tf.variable_scope('layer1'):
        weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
        biases = tf.get_variable("biases", [LAYER1_NODE],
                                 initializer=tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)

    with tf.variable_scope('layer2'):
        weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
        biases = tf.get_variable("biases", [OUTPUT_NODE],
                                 initializer=tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases

    return layer2

 

mnist_train.py

 

import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference

BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 10000
MOVING_AVERAGE_DECAY = 0.99

MODEL_SAVE_PATH = "Model_Folder/"
MODEL_NAME = "model.ckpt"

def train(mnist):
    # 定義輸入placeholder
    x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE],
                       name='x-input')
    y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE],
                       name='y-input')
    # 定義正則化器及計算前向過程輸出
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = mnist_inference.inference(x, regularizer)
    # 定義當前訓練輪數及滑動平均模型
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,
                                                          global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # 定義損失函式
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,
                                                    labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    # 定義指數衰減學習率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,
                    mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY)
    # 定義訓練操作,包括模型訓練及滑動模型操作
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
                   .minimize(loss, global_step=global_step)
    train_op = tf.group(train_step, variables_averages_op)
    # 定義Saver類物件,儲存模型,TensorFlow持久化類
    saver = tf.train.Saver()

    # 定義會話,啟動訓練過程
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, step = sess.run([train_op, loss, global_step],
                                           feed_dict={x: xs, y_: ys})
            if i % 1000 == 0:
                print("After %d training step(s), loss on training batch is %g."\
                       % (step, loss_value))
                # save方法的global_step引數可以讓每個被儲存的模型的檔名末尾加上當前訓練輪數
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                           global_step=global_step)

def main(argv=None):
    mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    tf.app.run()

mnist_eval.py

 

import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import mnist_train

EVAL_INTERVAL_SECS = 10

def evaluate(mnist):
    with tf.Graph().as_default() as g:
        # 定義輸入placeholder
        x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE],
                           name='x-input')
        y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE],
                           name='y-input')
        # 定義feed字典
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        # 測試時不加引數正則化損失
        y = mnist_inference.inference(x, None)
        # 計算正確率
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        # 載入滑動平均模型下的引數值
        variable_averages = tf.train.ExponentialMovingAverage(
                                     mnist_train.MOVING_AVERAGE_DECAY)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        # 每隔EVAL_INTERVAL_SECS秒啟動一次會話
        while True:
            with tf.Session() as sess:
                ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # 取checkpoint檔案中的當前迭代輪數global_step
                    global_step = ckpt.model_checkpoint_path\
                                      .split('/')[-1].split('-')[-1]
                    accuracy_score = sess.run(accuracy, feed_dict=validate_feed)
                    print("After %s training step(s), validation accuracy = %g"\
                          % (global_step, accuracy_score))

                else:
                    print('No checkpoint file found')
                    return
            time.sleep(EVAL_INTERVAL_SECS)

def main(argv=None):
    mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
    evaluate(mnist)

if __name__ == '__main__':
    tf.app.run()

 

相關文章