• tensorflow基于csv数据集实现多元线性回归并预测


    #coding:utf8
    import tensorflow as tf
    from sklearn import linear_model
    from sklearn import preprocessing
    import numpy as np
    
    
    def read_data(file_queue):
        '''
        the function is to get features and label (即样本特征和样本的标签)
        数据来源是csv的文件,采用tensorflow 自带的对csv文件的处理方式
        :param file_queue:
        :return: features,label
        '''
        # 读取的时候需要跳过第一行
        reader = tf.TextLineReader(skip_header_lines=1)
        key, value = reader.read(file_queue)
        # 对于数据源中空的值设置默认值
        record_defaults = [[''], [''], [''], [''], [0.], [0.], [0.], [0.], [''],[0], [''], [0.], [''], [''], [0]]
        # 定义decoder,每次读取的执行都从文件中读取一行。然后,decode_csv 操作将结果解析为张量列表
        province, city, address, postCode, longitude,latitude, price, buildingTypeId, buildingTypeName, tradeTypeId, tradeTypeName, expectedDealPrice, listingDate, delislingDate, daysOnMarket = tf.decode_csv(value, record_defaults)
        #对非数值数据进行编码:buildingTypeName
        preprocess_buildingTypeName_op = tf.case({
            tf.equal(buildingTypeName, tf.constant('Residential')): lambda: tf.constant(0.00),
            tf.equal(buildingTypeName, tf.constant('Condo')): lambda: tf.constant(1.00),
            tf.equal(buildingTypeName, tf.constant('Mobile Home')): lambda: tf.constant(2.00),
            tf.equal(buildingTypeName, tf.constant('No Building')): lambda: tf.constant(3.00),
            tf.equal(buildingTypeName, tf.constant('Row / Townhouse')): lambda: tf.constant(4.00),
            tf.equal(buildingTypeName, tf.constant('Duplex')): lambda: tf.constant(5.00),
            tf.equal(buildingTypeName, tf.constant('Manufactured Home')): lambda: tf.constant(6.00),
            tf.equal(buildingTypeName, tf.constant('Commercial')): lambda: tf.constant(7.00),
            tf.equal(buildingTypeName, tf.constant('Other')): lambda: tf.constant(8.00),
        }, lambda: tf.constant(-1.00), exclusive=True)
        # 对tradeTypeName 进行编码 Sale,Lease
        preprocess_tradeTypeName_op = tf.case({
            tf.equal(tradeTypeName, tf.constant('Sale')): lambda: tf.constant(0.00),
            tf.equal(tradeTypeName, tf.constant('Lease')): lambda: tf.constant(1.00),
        }, lambda: tf.constant(-1.00), exclusive=True)
        features = tf.stack([latitude,longitude,price, preprocess_buildingTypeName_op, preprocess_tradeTypeName_op,expectedDealPrice])
        return features, daysOnMarket
    
    
    def create_pipeline(filename,batch_size,num_epochs=None):
        '''
        the function is to get every batch example and label
        此处使用的是tf.train.batch,即顺序获取,非随机获取,随机获取采用的方法是:tf.train.shuffle_batch
        :param filename:
        :param batch_size:
        :param num_epochs:
        :return:example_batch,label_batch
        '''
        file_queue = tf.train.string_input_producer([filename],num_epochs=num_epochs)
        # example,label 样本和样本标签,batch_size 返回一个样本batch样本集的样本个数
        example,dayOnMarket = read_data(file_queue)
        # 出队后队列至少剩下的数据个数,小于capacity(队列的长度)否则会报错,
        min_after_dequeue = 1000
        #队列的长度
        capacity = min_after_dequeue+batch_size
        # 顺序获取每一批数据
        example_batch,daysOnMarket_batch= tf.train.batch([example,dayOnMarket],batch_size=batch_size,capacity=capacity)#顺序读取
        return example_batch,daysOnMarket_batch
    
    
    def train(batch_size, feature_num,learn_rate,filename):
        '''
        the function is to train to get w and b
        :param batch_size: 批量大小
        :param feature_num: 特征个数
        :param learn_rate: 学习率
        :param filename:csv文件名称
        :return: w,b
        '''
    
        # 预处理输入的样本和标签,后面用获取的数据进行喂养
        x_data = tf.placeholder(tf.float32, [batch_size, feature_num])
        y_data = tf.placeholder(tf.float32, [batch_size])
        # 创建参数 w ,b
        w = tf.Variable(tf.random_uniform((feature_num, 1), -1.0, 1.0))
        b = tf.Variable(tf.random_uniform((1, 1), -1.0, 1.0))
        # 定义预测的y
        y = tf.add(tf.matmul(x_data, w), b)
        #定义损失函数
        loss = tf.reduce_mean(tf.square(y - y_data)) / 2
        #定义优化器,这里采用梯度下降的方法
        optimizer = tf.train.GradientDescentOptimizer(learn_rate)
        # 训练
        train = optimizer.minimize(loss)
        # 获取 样本和标签
        example_batch, daysOnMarket_batch = create_pipeline(filename, batch_size)
        # 初始化全局和局部变量
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        print('.........................>>>>开始会话')
        # 创建会话,采用上下文管理器的方式,无需手动关闭会话
        with tf.Session() as sess:
            sess.run(init_op)
            # 创建一个队列
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            for step in range(100):
                #获取正真的样本和标签
                example, label = sess.run([example_batch, daysOnMarket_batch])
                print('第%d批数据'%(step))
                print(example, label)
                print('.......这一批数据的直接参数')
                reg = linear_model.LinearRegression()
                reg.fit(example, label)
                print("Coefficients of sklearn: W=%s, b=%f" % (reg.coef_, reg.intercept_))
                # 数据归一化处理
                scaler = preprocessing.StandardScaler().fit(example)
                print(scaler.mean_, scaler.scale_)
                x_data_standard = scaler.transform(example)
    
                sess.run(train, feed_dict={x_data: x_data_standard, y_data: label})
                # 每十步获取一次w和b
                if step % 10 == 0:
                    print('当前w值和b值')
                    print(sess.run(w, feed_dict={x_data: x_data_standard, y_data: label}),
                          sess.run(b, feed_dict={x_data: x_data_standard, y_data: label}))
            print('。。。。。。。》》》训练后得到w和b')
            theta = sess.run(w).flatten()
            intercept = sess.run(b).flatten()
            print('W:%s' % theta)
            print('b:%f' % intercept)
            coord.request_stop()
            coord.join(threads)
        return theta, intercept
    
    def predict(data, theta,intercept, feature_num):
        '''
        the function is to predict label(daysOnMarket)
        :param data: 待预测数据
        :param theta: 训练得到的参数
        :param intercept: 截距
        :param feature_num: 特征个数(自变量个数)
        :return: result(label:预测结果)
        '''
        theta1 = tf.placeholder(tf.float32, [feature_num, 1])
        intercept1 = tf.placeholder(tf.float32, [1, 1])
        x_data = tf.placeholder(tf.float32, [1, feature_num])
    
        y = tf.add(tf.matmul(x_data, theta1), intercept1)
    
        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            result = sess.run(y, feed_dict={x_data: data, theta1: theta, intercept1: intercept})
            print(result)
        return result
    
    
    def data_type_conversion(data,theta,intercept,feature_num):
        '''
        the function is to do data_type_conversion(数据类型和形状转换)
        :param data:
        :param theta:
        :param intercept:
        :return:
        '''
        real_data1 = data.astype(np.float32)
        real_data2 = data.reshape(1,feature_num)
    
        theta_tra = theta.astype(np.float32)
        theta_real = theta.reshape(feature_num, 1)
    
        intercept_tran = intercept.astype(np.float32)
        intercept_real = intercept.reshape(1, 1)
        return real_data2,theta_real,intercept_real
    
    
    
    
    if __name__ == '__main__':
        input_longitude =int(input('请输入经度'))
        input_latitude = int(input('请输入纬度'))
        input_price = int(input('请输入价格'))
        input_buildingtype = input('请输入房源类型名称:只有9种类型:Residential:0 ,Condo:1 Mobile Home:3,No Building:4 , Row / Townhouse:5 ,Duplex:6 ,Manufactured Home:7 ,Commercial:8 ,Other:9')
        input_tradetype = input('请输入交易形式:只有两种Sale:0,Lease:1')
        input_expected_deal_price = int(input('请输入期望的交易价格'))
    
        data = np.array([input_longitude,input_latitude,input_price,input_buildingtype,input_tradetype,input_expected_deal_price])
        theta, intercept = train(10, 6, 0.3, 'house_info.csv')
        data_real, theta_real, intercept_real = data_type_conversion(data, theta, intercept,6)
        daysOnmarket = predict(data_real, theta_real, intercept_real, 6)
        print('预测的天数:%d'%int(daysOnmarket))
  • 相关阅读:
    Swift3 ——S3 API中间件兼容性测试
    解决 Python.h:没有那个文件或目录 错误的方法
    Swift云存储特性研究
    解决updateaptxapi占用资源过高的问题
    dll开发及调用
    git批量备份
    UDP端口扫描
    将markdown文件转换为pdf
    指定ssh key访问git
    CentOS6.2调整home分区大小
  • 原文地址:https://www.cnblogs.com/bluesl/p/9215749.html
Copyright © 2020-2023  润新知