1.卷积神经网络卫星图片识别
import tensorflow as tf import matplotlib.pyplot as plt %matplotlib inline import numpy as np import pandas as pd import pathlib#面向对象的路径管理工具,类似os data_dir='./2_class' data_root=pathlib.Path(data_dir)#创建路径对象 data_root#WindowsPath('2_class') #对目录迭代 for item in data_root.iterdir():#对象的提取目录方法 print(item)#2_classairplane, 2_classlake #提取出所有路径,对象的提取所有文件方法 all_image_path=list(data_root.glob('*/*'))#正则,提取出所有目录下的所有文件 image_count=len(all_image_path)#一共1400张 all_image_path[:3] all_image_path[-3:] #设置路径,变成纯正路径 all_image_path=[str(path) for path in all_image_path] #图片路径乱序 import random random.shuffle(all_image_path) #设置标签 label_names=sorted(item.name for item in data_root.glob('*/'))#,提取路径的左右目录,两个文件夹的名字 label_names label_to_index=dict((name,index) for index,name in enumerate(label_names))#将标签(文件名)换成数字 label_to_index all_image_label=[label_to_index[pathlib.Path(p).parent.name] for p in all_image_path] all_image_label[:5] all_image_path[:5] import IPython.display as display index_to_label=dict((v,k )for k,v in label_to_index.items()) index_to_label for n in range(3): image_index=random.choice(range(len(all_image_path))) display.display(display.Image(all_image_path[image_index])) print(index_to_label[all_image_label[image_index]]) print() img_path=all_image_path[0] img_path img_raw=tf.io.read_file(img_path)#tensor类型的图片二进制文件 #图片解码 img_tensor=tf.image.decode_image(img_raw) print(img_tensor.shape) img_tensor.dtype img_tensor #图片标准化 img_tensor=tf.cast(img_tensor,tf.float32) img_tensor=img_tensor/255 img_tensor.numpy().max() img_tensor.numpy().min() #写成与i个函数 def load_preprosess_image(img_path): img_raw=tf.io.read_file(img_path) img_tensor=tf.image.decode_jpeg(img_raw,channels=3) img_tensor=tf.image.resize(img_tensor,[256,256]) img_tensor=tf.cast(img_tensor,tf.float32) img=img_tensor/255 return img plt.imshow(load_preprosess_image(all_image_path[100]))#取第100张图片看一下 path_ds=tf.data.Dataset.from_tensor_slices(all_image_path) path_ds image_dataset=path_ds.map(load_preprosess_image)#图片数据 #标签数据 label_dataset=tf.data.Dataset.from_tensor_slices(all_image_label) for label in label_dataset.take(10): print(label.numpy()) image_dataset for img in image_dataset.take(1): print(img) #数据标签合并成一个数据 dataset=tf.data.Dataset.zip((image_dataset,label_dataset)) dataset #划分数据集 test_count=int(image_count*0.2) train_count=image_count-test_count test_count,train_count train_dataset=dataset.skip(test_count) test_dataset=dataset.take(test_count) BATCH_SIZE=32 #repeat方法可以使dataset源源不断的产生数据 train_dataset=train_dataset.repeat().shuffle(buffer_size=train_count).batch(BATCH_SIZE) test_dataset=test_dataset.batch(BATCH_SIZE) #构造模型 model=tf.keras.Sequential() model.add(tf.keras.layers.Conv2D(64,(3,3),input_shape=(256,256,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Conv2D(64,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.MaxPooling2D()) model.add(tf.keras.layers.Conv2D(128,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Conv2D(128,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.MaxPooling2D()) model.add(tf.keras.layers.Conv2D(256,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Conv2D(256,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.MaxPooling2D()) model.add(tf.keras.layers.Conv2D(512,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.MaxPooling2D()) model.add(tf.keras.layers.Conv2D(512,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.MaxPooling2D()) model.add(tf.keras.layers.Conv2D(1024,(3,3),activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.GlobalAveragePooling2D()) model.add(tf.keras.layers.Dense(1024,activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Dense(256,activation='relu')) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.Dense(10,activation='sigmoid')) model.summary() model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['acc']) steps_per_epoch=train_count//BATCH_SIZE validation_steps=test_count//BATCH_SIZE history=model.fit(train_dataset,epochs=1,steps_per_epoch=steps_per_epoch,validation_data=test_dataset,validation_steps=validation_steps) history
2.序列问题,电影评论数据
import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers (x_train,y_train),(x_test,y_test)=keras.datasets.imdb.load_data(num_words=10000)#每条评论的单词对应的索引不超过10000 x_train.shape x_train[0] y_train.shape [len(x) for x in x_train]#每条评论的长度 x_train=keras.preprocessing.sequence.pad_sequences(x_train,300)#每条评论填充成300个单词长 x_test=keras.preprocessing.sequence.pad_sequences(x_test,300) [len(x) for x in x_train]#每条评论的长度 test='i am a student' dict((word,test.split().index(word)) for word in test.split()) model=keras.models.Sequential() model.add(layers.Embedding(10000,50,input_length=300))#输入数据最大10000,映射成维50的密集向量(25000,300)>>>>>>(25000,300,50) #model.add(layers.Flatten())#(25000,300,50)>>>>>>>>>(25000,300*50) model.add(layers.GlobalAveragePooling1D())#可以代替Flatten层,#(25000,300,50)>>>>>>>>>(25000,50) model.add(layers.Dense(128,activation='relu')) model.add(layers.Dropout(0.3)) model.add(layers.Dense(1,activation='sigmoid')) model.summary() model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),loss='binary_crossentropy',metrics=['acc']) history=model.fit(x_train,y_train,epochs=10,batch_size=256,validation_data=(x_test,y_test)) history.history.keys() import matplotlib.pyplot as plt %matplotlib inline plt.plot(history.epoch,history.history['loss'],'r') plt.plot(history.epoch,history.history['val_loss'],'b--') plt.plot(history.epoch,history.history['acc'],'r') plt.plot(history.epoch,history.history['val_acc'],'b--')
3.猫狗数据集 图片数据增强
import keras from keras import layers import numpy as np import os import shutil base_dir = './dataset/cat_dog' train_dir = os.path.join(base_dir , 'train') train_dir_dog = os.path.join(train_dir , 'dog') train_dir_cat = os.path.join(train_dir , 'cat') test_dir = os.path.join(base_dir , 'test') test_dir_dog = os.path.join(test_dir , 'dog') test_dir_cat = os.path.join(test_dir , 'cat') dc_dir = './dataset/dc/train' if not os.path.exists(base_dir): os.mkdir(base_dir) os.mkdir(train_dir) os.mkdir(train_dir_dog) os.mkdir(train_dir_cat) os.mkdir(test_dir) os.mkdir(test_dir_dog) os.mkdir(test_dir_cat) fnames = ['cat.{}.jpg'.format(i) for i in range(1000)] for fname in fnames: s = os.path.join(dc_dir, fname) d = os.path.join(train_dir_cat, fname) shutil.copyfile(s, d) fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)] for fname in fnames: s = os.path.join(dc_dir, fname) d = os.path.join(test_dir_cat, fname) shutil.copyfile(s, d) fnames = ['dog.{}.jpg'.format(i) for i in range(1000)] for fname in fnames: s = os.path.join(dc_dir, fname) d = os.path.join(train_dir_dog, fname) shutil.copyfile(s, d) fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)] for fname in fnames: s = os.path.join(dc_dir, fname) d = os.path.join(test_dir_dog, fname) shutil.copyfile(s, d) (1) 读取图片 (2)将图片解码 (3)预处理图片,大小 (4)图片归一化 from keras.preprocessing.image import ImageDataGenerator train_datagen = ImageDataGenerator(rescale=1/255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, brightness_range=(0.6, 1), shear_range=0.2, zoom_range=0.2, horizontal_flip=True, vertical_flip=True) test_datagen = ImageDataGenerator(rescale=1/255) train_generator = train_datagen.flow_from_directory(train_dir, target_size=(200, 200), batch_size=20, class_mode='binary' ) import matplotlib.pyplot as plt %matplotlib inline for im_batch in train_generator: for im in im_batch: plt.imshow(im[0]) break break test_generator = test_datagen.flow_from_directory(test_dir, target_size=(200, 200), batch_size=20, class_mode='binary' ) model = keras.Sequential() model.add(layers.Conv2D(64, (3, 3), activation="relu", input_shape=(200, 200, 3))) model.add(layers.Conv2D(64, (3, 3), activation="relu")) model.add(layers.MaxPooling2D()) model.add(layers.Dropout(0.25)) model.add(layers.Conv2D(64, (3, 3), activation="relu")) model.add(layers.Conv2D(64, (3, 3), activation="relu")) model.add(layers.MaxPooling2D()) model.add(layers.Dropout(0.25)) model.add(layers.Conv2D(64, (3, 3), activation="relu")) model.add(layers.Conv2D(64, (3, 3), activation="relu")) model.add(layers.MaxPooling2D()) model.add(layers.Dropout(0.25)) model.add(layers.Flatten()) model.add(layers.Dense(256, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(1, activation='sigmoid')) model.summary() model.compile(optimizer=keras.optimizers.Adam(lr), loss='binary_crossentropy', metrics=['acc'] ) history = model.fit_generator( train_generator, epochs=30, steps_per_epoch=100, validation_data=test_generator, validation_steps=50 ) keras.optimizers
4.Eager模式
import tensorflow as tf tf.executing_eagerly() x=[[2,]] m=tf.matmul(x,x)#矩阵相乘 m#Tensor的两个属性:shape,dtype。不可变 m.numpy()#tensor的方法 a=tf.constant([[1,2],[3,4]]) a b=tf.add(a,1) b c=tf.multiply(a,b)#对应元素相乘 c num=tf.convert_to_tensor(10)#数字转换成tensor类型,tensor不仅可以在cpu工作,也可以在gpu工作 num for i in range(num.numpy()): i=tf.constant(i) if int(i%2)==0:#tenor对象应用numpy的计算方法时,会自动地转换成numpu类型,ndarray和tensor可以直接运算 print('even') else: print('odd') v=tf.Variable(0.0)#定义变量 v+1 v.assign(5)#改变变量的值 v.assign_add(1)#变量加1 #读取变量、 v.read_value() #记录运算过程,方便求解梯度,跟踪变量运算,必须是float类型 w=tf.Variable([[1.0]]) with tf.GradientTape() as t: loss=w*w grad=t.gradient(loss,w) grad #跟踪常量运算 w=tf.constant(3.0) with tf.GradientTape() as t: t.watch(w) loss=w*w dloss_dw=t.gradient(loss,w) dloss_dw w=tf.constant(3.0) with tf.GradientTape(persistent=True) as t:#持久性记忆 t.watch(w) y=w*w z=y*y dy_dw=t.gradient(y,w) dy_dw dz_dw=t.gradient(z,w) dz_dw (train_image,train_labels),(test_image,test_labels)=tf.keras.datasets.mnist.load_data() train_image.shape train_labels #扩张图像的维度 train_image=tf.expand_dims(train_image,-1)#最后一维扩张 test_image=tf.expand_dims(test_image,-1) train_image.shape #改变数据类型,float train_image=tf.cast(train_image/255,tf.float32) test_image=tf.cast(test_image/255,tf.float32) #转换标签数据类型 train_labels=tf.cast(train_labels,tf.int64) test_labels=tf.cast(test_labels,tf.int64) #作为tf.data加载进来 dataset=tf.data.Dataset.from_tensor_slices((train_image,train_labels))#输入必须时元组类型 #作为tf.data加载进来 test_dataset=tf.data.Dataset.from_tensor_slices((test_image,test_labels))#输入必须时元组类型 dataset dataset=dataset.shuffle(10000).batch(32)#数据混洗 test_dataset=test_dataset.batch(32) dataset model=tf.keras.Sequential([tf.keras.layers.Conv2D(16,[3,3],activation='relu',input_shape=(None,None,1)), tf.keras.layers.Conv2D(32,[3,3],activation='relu'), tf.keras.layers.GlobalMaxPooling2D(), tf.keras.layers.Dense(10)]) model.trainable_variables #自定义循环 optimizer=tf.keras.optimizers.Adam() loss_func=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) features,labels=next(iter(dataset)) features.shape labels.shape predictions=model(features) predictions.shape tf.argmax(predictions,axis=1)#每行的最大值下标 def loss(model,x,y): y_=model(x) return loss_func(y,y_) train_loss=tf.keras.metrics.Mean('train_loss') train_accuracy=tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy') test_loss=tf.keras.metrics.Mean('test_loss') test_accuracy=tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy') def train_step(model,images,labels): with tf.GradientTape() as t: pred=model(images) loss_step=loss_func(labels,pred) grads=t.gradient(loss_step,model.trainable_variables) optimizer.apply_gradients(zip(grads,model.trainable_variables)) train_loss(loss_step) train_accuracy(labels,pred) def test_step(model,images,labels): pred=model(images) loss_step=loss_func(labels,pred) test_loss(loss_step) test_accuracy(labels,pred) def train(): for epoch in range(10): for (batch,(images,labels)) in enumerate(dataset): train_step(model,images,labels) print('Epoch{} loss is {},acc is{}'.format(epoch,train_loss.result(),train_accuracy.result())) for (batch,(images,labels)) in enumerate(test_dataset): test_step(model,images,labels) print('Epoch{} test_loss is {},acc is{}'.format(epoch,test_loss.result(),test_accuracy.result())) train_loss.reset_states() train_accuracy.reset_states() test_loss.reset_states() test_accuracy.reset_states() train() #tf.keras.metrics汇总计算模块 m=tf.keras.metrics.Mean('acc')#初始化求均值对象m.result(),m.reset_states()重置 a=tf.keras.metrics.SparseCategoricalAccuracy('acc') a(labels,model(features))
5.Tensorboard可视化
import tensorflow as tf import datetime tf.__version__ (train_image, train_labels), (test_image, test_labels) = tf.keras.datasets.mnist.load_data() train_image.shape train_image = tf.expand_dims(train_image, -1) test_image = tf.expand_dims(test_image, -1) train_image.shape train_image = tf.cast(train_image/255, tf.float32) test_image = tf.cast(test_image/255, tf.float32) train_labels = tf.cast(train_labels, tf.int64) test_labels = tf.cast(test_labels, tf.int64) dataset = tf.data.Dataset.from_tensor_slices((train_image, train_labels)) test_dataset = tf.data.Dataset.from_tensor_slices((test_image, test_labels)) dataset dataset = dataset.repeat(1).shuffle(60000).batch(128) test_dataset = test_dataset.repeat(1).batch(128) dataset model = tf.keras.Sequential([ tf.keras.layers.Conv2D(16, [3,3], activation='relu', input_shape=(None, None, 1)), tf.keras.layers.Conv2D(32, [3,3], activation='relu'), tf.keras.layers.GlobalMaxPooling2D(), tf.keras.layers.Dense(10, activation='softmax') ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) import datetime import os log_dir=os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) file_writer = tf.summary.create_file_writer(log_dir + "/metrics") file_writer.set_as_default() def lr_schedule(epoch): """ Returns a custom learning rate that decreases as epochs progress. """ learning_rate = 0.2 if epoch > 5: learning_rate = 0.02 if epoch > 10: learning_rate = 0.01 if epoch > 20: learning_rate = 0.005 tf.summary.scalar('learning rate', data=learning_rate, step=epoch) return learning_rate lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule) model.fit(dataset, epochs=5, steps_per_epoch=60000//128, validation_data=test_dataset, validation_steps=10000//128, callbacks=[tensorboard_callback]) %load_ext tensorboard %matplotlib inline %tensorboard --logdir logs '''SCALARS 面板主要用于记录诸如准确率、损失和学习率等单个值的变化趋势。在代码中用 tf.summary.scalar() 来将其记录到文件中 每个图的右下角都有 3 个小图标,第一个是查看大图,第二个是是否对 y 轴对数化,第三个是如果你拖动或者缩放了坐标轴,再重新回到原始位置。 GRAPHS 面板展示出你所构建的网络整体结构,显示数据流的方向和大小,也可以显示训练时每个节点的用时、耗费的内存大小以及参数多少。默认显示的图分为两部分:主图(Main Graph)和辅助节点(Auxiliary Nodes)。其中主图显示的就是网络结构,辅助节点则显示的是初始化、训练、保存等节点。我们可以双击某个节点或者点击节点右上角的 + 来展开查看里面的情况,也可以对齐进行缩放 DISTRIBUTIONS 主要用来展示网络中各参数随训练步数的增加的变化情况,可以说是 多分位数折线图 的堆叠。 HISTOGRAMS 和 DISTRIBUTIONS 是对同一数据不同方式的展现。与 DISTRIBUTIONS 不同的是,HISTOGRAMS 可以说是 频数分布直方图 的堆叠。 # 记录自定义标量 重新调整回归模型并记录自定义学习率。这是如何做: 使用创建文件编写器tf.summary.create_file_writer()。 定义自定义学习率功能。这将被传递给Keras LearningRateScheduler回调。 在学习率功能内,用于tf.summary.scalar()记录自定义学习率。 将LearningRateScheduler回调传递给Model.fit()。 通常,要记录自定义标量,您需要使用tf.summary.scalar()文件编写器。文件编写器负责将此运行的数据写入指定的目录,并在使用时隐式使用tf.summary.scalar()。 ''' model.fit(dataset, epochs=30, steps_per_epoch=60000//128, validation_data=test_dataset, validation_steps=10000//128, callbacks=[tensorboard_callback, lr_callback]) # 自定义训练中使用Tensorboard optimizer = tf.keras.optimizers.Adam() loss_func = tf.keras.losses.SparseCategoricalCrossentropy() def loss(model, x, y): y_ = model(x) return loss_func(y, y_) train_loss = tf.keras.metrics.Mean('train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy') test_loss = tf.keras.metrics.Mean('test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy') def train_step(model, images, labels): with tf.GradientTape() as t: pred = model(images) loss_step = loss_func(labels, pred) grads = t.gradient(loss_step, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_loss(loss_step) train_accuracy(labels, pred) def test_step(model, images, labels): pred = model(images) loss_step = loss_func(labels, pred) test_loss(loss_step) test_accuracy(labels, pred) current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = 'logs/gradient_tape/' + current_time + '/train' test_log_dir = 'logs/gradient_tape/' + current_time + '/test' train_summary_writer = tf.summary.create_file_writer(train_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) def train(): for epoch in range(10): for (batch, (images, labels)) in enumerate(dataset): train_step(model, images, labels) print('.', end='') with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss.result(), step=epoch) tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch) for (batch, (images, labels)) in enumerate(test_dataset): test_step(model, images, labels) print('*', end='') with test_summary_writer.as_default(): tf.summary.scalar('loss', test_loss.result(), step=epoch) tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch) template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print(template.format(epoch+1, train_loss.result(), train_accuracy.result()*100, test_loss.result(), test_accuracy.result()*100)) train_loss.reset_states() train_accuracy.reset_states() test_loss.reset_states() test_accuracy.reset_states() train() %tensorboard --logdir logs/gradient_tape