1. 利用TFRecord 格式 读、存 取 Mnist数据集的方法
存取 Mnist数据集的方法 (TFRecord格式)
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np def _float32_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) mnist=input_data.read_data_sets('./data', dtype=tf.uint8, one_hot=True) """ print(mnist.train.images) print(mnist.train.labels) print(mnist.test.images) print(mnist.test.labels) """ train_images=mnist.train.images train_labels=mnist.train.labels #test_images=mnist.test.images #test_labels=mnist.test.labels train_num=mnist.train.num_examples #test_num=mnist.test.num_examples pixels=train_images.shape[1] # 784 = 28*28 file_out='./data/output.tfrecords' writer=tf.python_io.TFRecordWriter(file_out) for index in range(train_num): image_raw=train_images[index].tostring() #转换为bytes序列 example=tf.train.Example(features=tf.train.Features(feature={ 'pixels': _int64_feature(pixels), 'label':_int64_feature(np.argmax(train_labels[index])), 'x':_float32_feature(0.1), 'image_raw':_bytes_feature(image_raw)})) writer.write(example.SerializeToString()) writer.close()
读取 Mnist数据集的方法 (TFRecord格式)
import tensorflow as tf reader=tf.TFRecordReader() files=tf.train.match_filenames_once('./data/output.*') #filename_queue=tf.train.string_input_producer(['./data/output.tfrecords']) filename_queue=tf.train.string_input_producer(files) _, serialized_example=reader.read(filename_queue) features=tf.parse_single_example(serialized_example, features={ 'image_raw':tf.FixedLenFeature([], tf.string), 'pixels':tf.FixedLenFeature([], tf.int64), 'label':tf.FixedLenFeature([], tf.int64), 'x':tf.FixedLenFeature([], tf.float32) }) #print(features['image_raw']) # tensor string (bytes tensor string tensor) # necessary operation # bytes_list to uint8_list image=tf.decode_raw(features['image_raw'], tf.uint8) #print(image) # tensor uint8 label=tf.cast(features['label'], tf.int32) pixels=tf.cast(features['pixels'], tf.int32) #image.set_shape([pixels**0.5, pixels**0.5]) image.set_shape([784]) batch_size=128 image_batch, label_batch, pixels_batch=tf.train.batch([image, label, pixels], batch_size=batch_size, capacity=1000+3*batch_size) coord=tf.train.Coordinator() with tf.Session() as sess: sess.run(tf.local_variables_initializer()) threads=tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(3): print(sess.run([image_batch, label_batch, pixels_batch])) coord.request_stop() coord.join(threads)
==================================================================
2. 利用TFRecord 格式 存取 文件夹内图片的方法
import matplotlib.pyplot as plt import tensorflow as tf import numpy as np def _float32_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) file_out='./data/output1.tfrecords' writer=tf.python_io.TFRecordWriter(file_out) files = tf.gfile.Glob('./data/*.jpg') sess=tf.Session() for file in files: image_raw_data = tf.gfile.FastGFile(file,'rb').read() img_data = tf.image.decode_jpeg(image_raw_data) # tensor img_data = sess.run(img_data) # np.array int resized = img_data.tostring() # np.array string uint8 example=tf.train.Example(features=tf.train.Features(feature={ 'y':_int64_feature(1), 'x':_float32_feature(0.1), 'image_raw':_bytes_feature(resized)})) writer.write(example.SerializeToString()) writer.close()
文件读取过程使用 输入队列 :
import matplotlib.pyplot as plt import tensorflow as tf import numpy as np def _float32_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) def _int64_feature(value): #value类型应为:int,long,float #return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) #value类型应为:[int],[long],[float], 这里为int的list类型 return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) file_out='./data/output2.tfrecords' writer=tf.python_io.TFRecordWriter(file_out) files = tf.train.match_filenames_once('./data/*.jpg') # string_input_producer会产生一个文件名队列 filename_queue = tf.train.string_input_producer(files, shuffle=False, num_epochs=3) # reader从文件名队列中读数据。对应的方法是reader.read reader = tf.WholeFileReader() key, value = reader.read(filename_queue) img_data = tf.image.decode_jpeg(value) # np.array 转换为 tensor #print(sess.run([key, img_data])) #print(img_data.get_shape()) img_data.set_shape([None, None, 3]) img_float = tf.image.convert_image_dtype(img_data, tf.float32) img_float = tf.image.resize_images(img_float, [300, 300], method=0) with tf.Session() as sess: # tf.train.string_input_producer定义了一个epoch变量,要对它进行初始化 tf.local_variables_initializer().run() x=np.array([[1,1,1,1],[1,1,1,1]]) coord = tf.train.Coordinator() # 使用start_queue_runners之后,才会开始填充队列 threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in range(3): resized=sess.run(img_data) resized = resized.tostring() example=tf.train.Example(features=tf.train.Features(feature={ 'x':_int64_feature(x.reshape(x.size).tolist()), 'x2':_int64_feature([1,1]), 'y':_float32_feature(0.1), 'image_raw':_bytes_feature(resized)})) writer.write(example.SerializeToString()) writer.close() coord.request_stop() coord.join(threads)