过拟合:
真实的应用中,并不是让模型尽量模拟训练数据的行为,而是希望训练数据对未知做出判断。
模型过于复杂后,模型会积极每一个噪声的部分,而不是学习数据中的通用 趋势。当一个模型的参数比训练数据还要多的时候,这个模型就可以记忆这个所以训练数据的结果,而使损失函数为0.
避免过拟合的常用方法:正则化。在损失函数中加入刻画模型复杂程度的指标。损失函数:
J(θ)
引入正则化损失:J(θ)+λR(ω)
λ代表模型复杂损失在总损失的比列,R(ω)刻画的是模型的复杂程度。
模型的复杂程度由权重决定,一般。
常用的刻画模型复杂程度的函数R(ω)包括两种L1型正则和L2型正则。
loss=tf.ruduce_mean(tf.square(y_-y)+tf.contrib.layers.l2_ragularizer(lambda)(w))-----L2型正则的变量定义。、
除了引入正则化损失,还有避免过拟合的方法就是 加大训练数据 和 使用 Dropout。
1 import tensorflow as tf 2 from sklearn.datasets import load_digits #从sklearn的数据集引入手写字体数据集 3 from sklearn.model_selection import train_test_split # 作用:将数据集划分为 训练集和测试集 4 from sklearn.preprocessing import LabelBinarizer #数据的预处理 5 #binarizer二值化 6 7 8 '''数据下载''' 9 10 digits=load_digits() #导入手写字体的datasets 11 X=digits.data #获得其特征向量 12 y=digits.target #获得样本label 13 y=LabelBinarizer().fit_transform(y) #二值化[0,1,1....] 14 '''扩展sklearn.proprocessing.LabelBinarizer().fit_transform()''' 15 #lb=preprocessing.LabelBinarizer() 16 #>>> lb.fit_transform(['yes', 'no', 'no', 'yes']) 17 #array([[1], 18 # [0], 19 # [0], 20 # [1]]) 21 X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=.3) 22 # test_size:float-获得多大比重的测试样本 (默认:0.25)---int - 获得多少个测试样本 23 #数据集被划分为训练集和测试集,label必须二值化因为分类的结果就是binarizer 24 25 26 '''生成层 函数''' 27 def add_layer(input,in_size,out_size,n_layer='layer',activation_function=None): 28 layer_name='layer %s' % n_layer 29 '''补充知识''' 30 #tf.name_scope:Wrapper for Graph.name_scope() using the default graph. 31 #scope名字的作用域 32 #sprase:A string (not ending with '/') will create a new name scope, in which name is appended to the prefix of all operations created in the context. 33 #If name has been used before, it will be made unique by calling self.unique_name(name). 34 with tf.name_scope('weights'): 35 Weights=tf.Variable(tf.random_normal([in_size,out_size]),name='w') 36 tf.summary.histogram(layer_name+'/wights',Weights) 37 #tf.summary.histogram:output summary with histogram直方图 38 #tf,random_normal正太分布 39 with tf.name_scope('biases'): 40 biases=tf.Variable(tf.zeros([1,out_size])+0.1) 41 tf.summary.histogram(layer_name+'/biases',biases) 42 #tf.summary.histogram:k 43 with tf.name_scope('Wx_plus_b'): 44 Wx_plus_b=tf.matmul(input,Weights)+biases 45 '''引入dropout,dropout添加在每一层的激活函数前''' 46 Wx_plus_b=tf.nn.dropout(Wx_plus_b,keep_prob) 47 #keep_prob 每个元素被留下来的概率 48 if activation_function==None: 49 outputs=Wx_plus_b 50 else: 51 outputs=activation_function(Wx_plus_b) 52 tf.summary.histogram(layer_name+'/output',outputs) 53 return outputs 54 '''准确率''' 55 56 '''占位符''' 57 keep_prob = tf.placeholder(tf.float32) 58 xs=tf.placeholder(tf.float32,[None,64]) 59 ys=tf.placeholder(tf.float32,[None,10]) 60 61 '''添加层''' 62 l1=add_layer(xs,64,50,'l1',activation_function=tf.nn.softmax ) #据说其他激活函数会出错 63 prediction=add_layer(l1,50,10,activation_function=tf.nn.softmax) 64 #sotmax激活函数,用于分类函数 65 66 '''计算''' 67 #交叉熵cross_entropy损失函数,参数分别为实际的预测值和实际的label值y,re 68 '''补充知识''' 69 #reduce_mean() 70 # 'x' is [[1., 1. ]] 71 # [2., 2.]] 72 #tf.reduce_mean(x) ==> 1.5 73 #tf.reduce_mean(x, 0) ==> [1.5, 1.5] 74 #tf.reduce_mean(x, 1) ==> [1., 2.] 75 cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=[1])) 76 tf.summary.scalar('loss',cross_entropy) 77 '''补充知识''' 78 #reduce_sum 79 # 'x' is [[1, 1, 1]] 80 # [1, 1, 1]] 81 #tf.reduce_sum(x) ==> 6 82 #tf.reduce_sum(x, 0) ==> [2, 2, 2] 83 #tf.reduce_sum(x, 1) ==> [3, 3] 84 #tf.reduce_sum(x, 1, keep_dims=True) ==> [[3], [3]] 85 #tf.reduce_sum(x, [0, 1]) ==> 6 86 train_step=tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 87 merged=tf.summary.merge_all() 88 '''Session_begin''' 89 with tf.Session() as sess: 90 91 #merged:tf.summary.merge_all:Merges all summaries collected in the default graph. 92 print("merged initialize sunccessfulliy") 93 train_writer=tf.summary.FileWriter('logs/train',sess.graph) 94 print("train_writer initialize sunccessfulliy") 95 test_writer=tf.summary.FileWriter('logs/test',sess.graph) 96 print("test_writer initialize sunccessfulliy") 97 sess.run(tf.global_variables_initializer()) 98 print("variables initialize sunccessfulliy") 99 for i in range(1000): 100 #batch_xs,batch_ys=mnist.train.next_batch(100) #逐个batch去取数据 101 sess.run(train_step,feed_dict={xs:X_train,ys:y_train,keep_prob:0.6}) 102 if(i%50==0): 103 #print(compute_accuracy(mnist.test.images,mnist.test.labels)) 104 #train_result=sess.run(merged,feed_dict={xs:X_train,ys:y_train,keep_prob:1}) 105 #test_result=sess.run(merged,feed_dict={xs:X_test,ys:y_test,keep_prob:1}) 106 #train_writer.add_summary(train_result,i) 107 #test_writer.add_summary(test_result,i) 108 print(sess.run(cross_entropy,feed_dict={xs:X_test,ys:y_test,keep_prob:1})) 109 print("the {}".format(i)) 110