tensorflow資料清洗

來路與歸途發表於2019-11-02

原文網址 : https://blog.csdn.net/qq_42233538/article/details/89290058

import tensorflow as tf
import numpy as np
import random
import os
import math

from matplotlib import pyplot as plt

def get_files(file_dir):
"""
建立資料檔名列表

    :param file_dir:
    :return:image_list 所有影像檔名的列表,label_list 所有對應標貼的列表
    """
    #step1.獲取圖片，並貼上標貼
    #新建五個列表，儲存資料夾下的檔名
    daisy=[]
    label_daisy=[]
    dandelion=[]
    label_dandelion = []
    roses=[]
    label_roses = []
    sunflowers=[]
    label_sunflowers = []
    tulips=[]
    label_tulips = []
    for file in os.listdir(file_dir+"/daisy"):
        daisy.append(file_dir+"/daisy"+"/"+file)
        label_daisy.append(0)

    for file in os.listdir(file_dir+"/dandelion"):
        dandelion.append(file_dir+"/dandelion"+"/"+file)
        label_dandelion.append(1)
    for file in os.listdir(file_dir+"/roses"):
        roses.append(file_dir+"/roses"+"/"+file)
        label_roses.append(2)
    for file in os.listdir(file_dir+"/sunflowers"):
        sunflowers.append(file_dir+"/sunflowers"+"/"+file)
        label_sunflowers.append(3)
    for file in os.listdir(file_dir+"/tulips"):
        tulips.append(file_dir+"/tulips"+"/"+file)
        label_tulips.append(4)

    #step2:對生成的圖片路徑和標籤List做打亂處理
    #把所有圖片跟標貼合併到一個列表list（img和lab）
    images_list=np.hstack([daisy,dandelion,roses,sunflowers,tulips])
    labels_list=np.hstack([label_daisy,label_dandelion,label_roses,label_sunflowers,label_tulips])

    #利用shuffle打亂順序
    temp=np.array([images_list,labels_list]).transpose()
    np.random.shuffle(temp)
    # 從打亂的temp中再取出list（img和lab）
    image_list=list(temp[:,0])
    label_list=list(temp[:,1])
    label_list_new=[int(i) for i in label_list]

    # 將所得List分為兩部分，一部分用來訓練tra，一部分用來測試val
    # 測試樣本數, ratio是測試集的比例
    ratio=0.3
    n_sample = len(label_list)
    n_val = int(math.ceil(n_sample * ratio))
    n_train = n_sample - n_val # 訓練樣本數
    tra_images = image_list[0:n_train]
    tra_labels = label_list_new[0:n_train]
    #tra_labels = [int(float(i)) for i in tra_labels] # 轉換成int資料型別
    val_images = image_list[n_train:-1]
    val_labels = label_list_new[n_train:-1]
    #val_labels = [int(float(i)) for i in val_labels] # 轉換成int資料型別
    return tra_images, tra_labels, val_images, val_labels

#return image_list,label_list_new

def get_batch(image, label, image_W, image_H,channel, batch_size, capacity):
    #step1：將上面生成的List傳入get_batch() ，轉換型別，產生一個輸入佇列queue
    #型別轉換
    image=tf.cast(image,tf.string)
    label=tf.cast(label,tf.int32)
    #生成輸入佇列
    input_queue=tf.train.slice_input_producer([image,label])

    label=input_queue[1]
    image_contents=tf.read_file(input_queue[0])
    #print(image_contents)
    #step2：將影像解碼，不同型別的影像不能混在一起，要麼只用jpeg，要麼只用png等
    images_value=tf.image.decode_jpeg(image_contents)
    #print(images_value)
    #step3：資料預處理，對影像進行旋轉、縮放、裁剪、歸一化等操作，讓計算出的模型更健壯
    #image=tf.image.resize_image_with_crop_or_pad(images_value,image_W,image_H)
    #image=tf.image.resize_images(images_value,size=[200,200])
    image = tf.image.resize_images(images_value,size=[image_W,image_H])
    #image.set_shape(shape=[200, 200, 3])
    image.set_shape(shape=[image_W, image_H, channel])
    #print(image)
    # 對resize後的圖片進行標準化處理
    image=tf.image.per_image_standardization(image)
    #step4：生成batch
    image_batch,label_batch=tf.train.batch([image,label],batch_size=batch_size,num_threads=1,capacity=capacity)
    # 重新排列label，行數為[batch_size]
    #print(label_batch)
    label_batch = tf.reshape(label_batch, [batch_size])
    #print(label_batch)
    image_batch = tf.cast(image_batch, tf.float32)
    return image_batch,label_batch

if __name__=="__main__":
    BATCH_SIZE = 2
    CAPACITY = 256
    IMG_W = 208
    IMG_H = 208
    # 讀取檔案所在路徑
    mypath = "/home/sunxiaoming/PycharmProjects/data/flower_photos"
    image_list,label_list=get_files(mypath)
    print(len(image_list))
    print(len(label_list))
    image_batch,label_batch=get_batch(image_list,label_list,IMG_W,IMG_H,BATCH_SIZE,CAPACITY)
    print(image_batch)
    with tf.Session() as sess:
        # 開啟執行緒
        # 執行緒協調元
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        i=0
        while i<2:
            image,lable = sess.run([image_batch, label_batch])
            #image_array=np.array(image[i,:,:,:])
            for j in range(2):
                plt.imshow(image[j, :, :, :])
                plt.show()

i+=1

        # 回收執行緒
        coord.request_stop()
        coord.join(threads)

    #with tf.Session() as sess:
        # 開啟執行緒
        # 執行緒協調元
        #coord = tf.train.Coordinator()
        #threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        #i=0
        #while not coord.should_stop() and i < 2:

            #lable, image = sess.run([image_batch,label_batch])
            #print(type(image))
            #"""
                        #for j in np.arange(BATCH_SIZE):
               # print('label: %d' % lable[j])

                #plt.imshow(image[j, :, :, :])
                #plt.show()
            #i += 1

#"""

        # 回收執行緒
        #coord.request_stop()
        #coord.join(threads)

資料清洗和資料處理
2020-03-03
資料清洗如何測試？
2024-06-04
資料清洗有哪些方法？
2021-10-19
資料治理為什麼要清洗資料
2024-01-23
機器學習-資料清洗
2019-03-02
機器學習
資料清洗的方法有哪些？
2019-03-08
資料清洗處理-常用操作
2020-03-24
python 操作 excel 之資料清洗
2021-03-02
PythonExcel
一個spark清洗資料的demo
2018-07-30
Spark
常用資料清洗方法大盤點
2018-08-22
資料清洗和準備 (待更新)
2020-06-22
使用Mysql工具進行資料清洗
2020-12-04
MySql
資料分析-pandas資料處理清洗常用總結
2018-04-12
資料管理：業務資料清洗，落地實現方案
2021-06-09
八個機器學習資料清洗
2019-06-19
機器學習
DolphinDB +Python Airflow 高效實現資料清洗
2023-04-14
PythonAI
爬蟲中資料清洗的選擇
2021-06-12
爬蟲
Scikit-Learn 與 TensorFlow 機器學習實用指南學習筆記 3 —— 資料獲取與清洗
2018-12-04
機器學習筆記
TensorFlow讀寫資料
2019-03-16
大資料之路 ——（一）演算法建模中的資料清洗
2021-08-05
大資料演算法
清洗資料，我習慣用這 7 步！
2020-03-21
如何讓資料清洗工作變得簡單
2024-10-17
從MySQL大量資料清洗到TiBD說起
2021-08-23
MySql
pandas資料處理清洗案例：中文地址拆分
2021-06-15
使用tensorflow操作MNIST資料
2019-07-24
做資料分析必須瞭解的獲取資料與清洗資料技巧
2018-05-21
R資料分析：資料清洗的思路和核心函式介紹
2022-02-19
函式
機器學習中資料清洗的藝術
2019-08-23
機器學習
資料清洗與預處理：使用 Python Pandas 庫
2024-07-26
Python
LLM大模型: 常用的資料清洗方法總結
2024-07-10
大模型
日誌服務之資料清洗與入湖
2022-04-27
大資料技術與應用課堂測試-資料清洗同步
2024-04-09
大資料
TensorFlow 入門（MNIST資料集）
2018-08-17
TensorFlow學習資料彙總
2018-06-17
mxnet資料格式轉換為tensorflow，pytorch資料
2018-12-14
PyTorch
還在為資料清洗抓狂？這裡有一個簡單實用的清洗程式碼集
2019-01-22
[影像處理] 基於CleanVision庫清洗影像資料集
2024-10-24
5款優秀的資料清洗工具任你選擇
2021-10-15

tensorflow資料清洗

相關文章