【分散式Tensorflow(0.11.0)問題 未解決】 alexnet_v2/pool1/MaxPool : tensor_in must be 4-dimensional
系統:linux Centos 7.1
Tensorflow版本:0.11.0 whl 安裝
出錯資訊:
tensorflow.python.framework.errors_impl.InvalidArgumentError: tensor_in must be 4-dimensional
[[Node: alexnet_v2/pool1/MaxPool = MaxPool[T=DT_FLOAT, data_format="NHWC", ksize=[1, 3, 3, 1], padding="VALID", strides=[1, 2, 2, 1], _device="/job:worker/replica:0/task:0/cpu:0"](alexnet_v2/conv1/Relu)]]
原始碼
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
from datetime import datetime
import math
import sys
import time
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
tf.app.flags.DEFINE_string("ps_hosts", "", "Comma-separated list of hostname:port pairs")
tf.app.flags.DEFINE_string("worker_hosts", "", "Comma-separated list of hostname:port pairs")
tf.app.flags.DEFINE_string("job_name", "", "One of 'ps', 'worker'")
tf.app.flags.DEFINE_integer("task_index", 0, "Index of task within the job")
tf.app.flags.DEFINE_integer("batch_size", 100, "Training batch size")
tf.app.flags.DEFINE_integer('num_batches', 100, "Number of batches to run.")
FLAGS = tf.app.flags.FLAGS
slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0, 0, stddev)
def alexnet_v2_arg_scope(weight_decay=0.0005):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
biases_initializer=tf.constant_initializer(0.1),
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope([slim.conv2d], padding='SAME'):
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc
def alexnet_v2(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='alexnet_v2'):
with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
scope='conv1')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
# Use conv2d instead of fully_connected layers.
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_normal(0.005),
biases_initializer=tf.constant_initializer(0.1)):
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer,
scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
def main(_):
#Construct the cluster and start the server
ps_hosts = FLAGS.ps_hosts.split(",")
worker_hosts = FLAGS.worker_hosts.split(",")
cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
if FLAGS.job_name == "ps":
server.join()
elif FLAGS.job_name == "worker":
with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)):
image_size = 224
images = tf.Variable(tf.random_normal([FLAGS.batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1))
with slim.arg_scope(alexnet_v2_arg_scope()):
logits, end_points = alexnet_v2(images, is_training = False)
saver = tf.train.Saver()
summary_op = tf.merge_all_summaries()
#summary_op = tf.summary.merge_all()
init_op = tf.global_variables_initializer()
# Create a Supervisor that will checkpoint the model and computes summaries。
sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="./alexnet_train_logs", init_op=init_op, summary_op=summary_op, saver=saver, save_model_secs=600)
# Get a TensorFlow session managed by the supervisor.
with sv.managed_session(server.target) as sess:
num_steps_burn_in = 10
total_duration = 0.0
total_duration_squared = 0.0
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time()
_ = sess.run(logits)
duration = time.time() - start_time
if i >= num_steps_burn_in:
if not i % 10:
print ('%s: step %d, duration = %.3f' % (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
print ('%s: across %d steps, %.3f +/- %.3f sec / batch' % (datetime.now(), FLAGS.num_batches, mn, sd))
# Stop TensorFlow Session
sv.stop()
if __name__ == "__main__":
tf.app.run()
不知道為什麼,如果有正在研究tensorflow的朋友,知道出錯在哪裡,請告訴我,謝謝~~~
另外,已在GitHub上提問,但是還沒有人回答
相關文章
- 【分散式Tensorflow(0.11.0)問題 未解決】Segmentation fault (core dumped)分散式Segmentation
- 用分散式鎖解決併發問題分散式
- Redis分散式鎖解決搶購問題Redis分散式
- redis實現分散式鎖---實操---問題解決Redis分散式
- Spring Boot 整合 Seata 解決分散式事務問題Spring Boot分散式
- 分散式事務系列 - 解決跨庫轉賬問題分散式
- redis分散式鎖的問題和解決Redis分散式
- 分散式冪等問題解決方案三部曲分散式
- 分散式(Distributed)Scrum團隊的問題及解決方案分散式Scrum
- Go Mysql Driver 整合 Seata-Golang 解決分散式事務問題MySqlGolang分散式
- 關於分散式事務帶來的問題及解決方案分散式
- 分散式環境下利用快取解決重複性問題分散式快取
- 靈活運用分散式鎖解決資料重複插入問題分散式
- 分散式系統中常見技術解決的問題是什麼?分散式
- TensorFlow分散式實踐分散式
- 專案總結(幾大未解決問題)
- 解決「問題」,不要解決問題
- 使用Seata徹底解決Spring Cloud中的分散式事務問題!SpringCloud分散式
- 使用redis分散式鎖解決併發執行緒資源共享問題Redis分散式執行緒
- 分散式系統的問題分散式
- 多OA的分散式問題分散式
- 簡單介紹redis分散式鎖解決表單重複提交的問題Redis分散式
- ontape之後沒有反應(問題未解決)
- 分散式鎖的解決方案分散式
- Redis分散式鎖解決方案Redis分散式
- Redis 分散式鎖解決方案Redis分散式
- SAP HANA分散式解決方案分散式
- 分散式事務解決方案分散式
- TensorFlow學習指南四、分散式分散式
- 【原創】分散式之一行程式碼解決快取擊穿問題分散式行程快取
- 如何用分散式鎖解決陪玩平臺原始碼中的併發問題?分散式原始碼
- ceph解讀:crush分散式資料分佈的問題分散式
- vue scoped 解決樣式不生效問題Vue
- [未解決]fedora9無法關機問題
- 開機後mysql服務未啟動問題解決MySql
- [分散式][Dubbo]Dubbo常見問題分散式
- 分散式系統的核心問題分散式
- 分散式下的WebSocket解決方案分散式Web