• tensorflow 读、存取 图像 数据的 TFRecord 方法 (示例)


    1.     利用TFRecord 格式   读、存 取    Mnist数据集的方法

    存取   Mnist数据集的方法     (TFRecord格式

    import tensorflow as tf
    from tensorflow.examples.tutorials.mnist import input_data
    import numpy as np
    
    def _float32_feature(value):
        return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
    
    def _int64_feature(value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    
    mnist=input_data.read_data_sets('./data', dtype=tf.uint8, one_hot=True)
    """
    print(mnist.train.images)
    print(mnist.train.labels)
    print(mnist.test.images)
    print(mnist.test.labels)
    """
    train_images=mnist.train.images
    train_labels=mnist.train.labels
    #test_images=mnist.test.images
    #test_labels=mnist.test.labels
    
    train_num=mnist.train.num_examples 
    #test_num=mnist.test.num_examples 
    
    
    pixels=train_images.shape[1]   # 784 = 28*28
    
    
    file_out='./data/output.tfrecords'
    writer=tf.python_io.TFRecordWriter(file_out)
    
    
    for index in range(train_num):
        image_raw=train_images[index].tostring() #转换为bytes序列   
    
        example=tf.train.Example(features=tf.train.Features(feature={
                   'pixels': _int64_feature(pixels),
                   'label':_int64_feature(np.argmax(train_labels[index])),
                   'x':_float32_feature(0.1),
                   'image_raw':_bytes_feature(image_raw)}))
    
        writer.write(example.SerializeToString())
    writer.close()
    
    
    
     

    读取   Mnist数据集的方法     (TFRecord格式

    import tensorflow as tf
    
    reader=tf.TFRecordReader()
    
    files=tf.train.match_filenames_once('./data/output.*')
    
    #filename_queue=tf.train.string_input_producer(['./data/output.tfrecords'])
    filename_queue=tf.train.string_input_producer(files)
    
    _, serialized_example=reader.read(filename_queue)
    
    features=tf.parse_single_example(serialized_example,
                       features={
                               'image_raw':tf.FixedLenFeature([], tf.string),
                               'pixels':tf.FixedLenFeature([], tf.int64),
                               'label':tf.FixedLenFeature([], tf.int64),
                               'x':tf.FixedLenFeature([], tf.float32)
                                })
    
    #print(features['image_raw'])    # tensor string (bytes tensor      string tensor)
    
    # necessary operation
    # bytes_list   to   uint8_list
    image=tf.decode_raw(features['image_raw'], tf.uint8) 
    #print(image)    # tensor uint8
    
    label=tf.cast(features['label'], tf.int32)
    pixels=tf.cast(features['pixels'], tf.int32)
    #image.set_shape([pixels**0.5, pixels**0.5])
    image.set_shape([784])
    
    batch_size=128
    image_batch, label_batch, pixels_batch=tf.train.batch([image, label, pixels], batch_size=batch_size, capacity=1000+3*batch_size)
    
    
    
    
    coord=tf.train.Coordinator()
    
    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        threads=tf.train.start_queue_runners(sess=sess, coord=coord)
    
    
        for i in range(3):
            print(sess.run([image_batch, label_batch, pixels_batch]))
    
    
        coord.request_stop()
        coord.join(threads)

    ==================================================================

    2.     利用TFRecord 格式    存取   文件夹内图片的方法

    import matplotlib.pyplot as plt
    import tensorflow as tf   
    import numpy as np
    
    
    def _float32_feature(value):
        return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
    
    def _int64_feature(value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    
    
    file_out='./data/output1.tfrecords'
    writer=tf.python_io.TFRecordWriter(file_out)
    
    
    
    files = tf.gfile.Glob('./data/*.jpg')
    sess=tf.Session()
    for file in files:
        image_raw_data = tf.gfile.FastGFile(file,'rb').read()
        
        img_data = tf.image.decode_jpeg(image_raw_data)  # tensor
        img_data = sess.run(img_data) # np.array int
        resized = img_data.tostring() # np.array string uint8
           
        example=tf.train.Example(features=tf.train.Features(feature={
                   'y':_int64_feature(1),
                   'x':_float32_feature(0.1),
                   'image_raw':_bytes_feature(resized)}))
    
        writer.write(example.SerializeToString())
        writer.close()

    文件读取过程使用   输入队列 :

    import matplotlib.pyplot as plt
    import tensorflow as tf   
    import numpy as np
    
    
    def _float32_feature(value):
        return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
    
    def _int64_feature(value):
        #value类型应为:int,long,float
        #return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
        #value类型应为:[int],[long],[float], 这里为int的list类型
        return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
    
    
    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    
    
    file_out='./data/output2.tfrecords'
    writer=tf.python_io.TFRecordWriter(file_out)
    
    
    
    files = tf.train.match_filenames_once('./data/*.jpg')
    
    # string_input_producer会产生一个文件名队列
    filename_queue = tf.train.string_input_producer(files, shuffle=False, num_epochs=3)
    # reader从文件名队列中读数据。对应的方法是reader.read
    reader = tf.WholeFileReader()
    key, value = reader.read(filename_queue)
    
    img_data = tf.image.decode_jpeg(value)  # np.array 转换为 tensor
    #print(sess.run([key, img_data]))
    #print(img_data.get_shape())
    img_data.set_shape([None, None, 3])
    
    
    img_float = tf.image.convert_image_dtype(img_data, tf.float32)
    img_float = tf.image.resize_images(img_float, [300, 300], method=0)
    
    
    with tf.Session() as sess:
        # tf.train.string_input_producer定义了一个epoch变量,要对它进行初始化
        tf.local_variables_initializer().run()
    
        x=np.array([[1,1,1,1],[1,1,1,1]])
    
        coord = tf.train.Coordinator()
    
        # 使用start_queue_runners之后,才会开始填充队列
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    
        for _ in range(3):
            resized=sess.run(img_data)
            resized = resized.tostring()
    
           
            example=tf.train.Example(features=tf.train.Features(feature={
                   'x':_int64_feature(x.reshape(x.size).tolist()),
                   'x2':_int64_feature([1,1]),
                   'y':_float32_feature(0.1),
                   'image_raw':_bytes_feature(resized)}))
    
            writer.write(example.SerializeToString())
        writer.close()
    
        coord.request_stop()
        coord.join(threads)
  • 相关阅读:
    hibernate缓存清除(转)
    hibernate缓存
    hibernate延迟加载
    session进行增删改查操作
    curl命令详解
    CURL 宏定义列表
    CURL常用命令---样例
    打印 上一主题 下一主题 利用cURL实现单个文件分多段同时下载,支持断点续传(修订版)
    curl断点续载
    CURL常用命令
  • 原文地址:https://www.cnblogs.com/devilmaycry812839668/p/12749799.html
Copyright © 2020-2023  润新知