• 数据标准化+网格搜索+交叉验证+预测(Python)


    Download datasets iris_training.csv from: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/tutorials/monitors

    Method: SVR

    # -*- coding: utf-8 -*-
    
    import pandas as pd
    from sklearn.grid_search import GridSearchCV
    from sklearn import svm, datasets
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.utils import shuffle
    import numpy as np
    from sklearn import metrics
    df = pd.read_csv('iris_training.csv', header=0)
    parameters = {'kernel':['rbf'], 'gamma':np.logspace(-5, 0, num=6, base=2.0),'C':np.logspace(-5, 5, num=11, base=2.0)}
    grid_search = GridSearchCV(svm.SVR(), parameters, cv=10, n_jobs=4, scoring='mean_squared_error')
    
    X = df[df.columns.drop('virginica')]
    y = df['virginica']
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42)
    
    random_seed = 13
    X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)
    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    X_test = X_scaler.transform(X_test)
    
    grid_search.fit(X_train,y_train)
    y_pred = grid_search.predict(X_test) 
    
    print 'mean_squared_error:'+str(metrics.mean_squared_error(y_test,y_pred)),
     'r2_score:'+str(metrics.r2_score(y_test,y_pred))

    Neural Network:

    # -*- coding: utf-8 -*-
    
    import pandas as pd
    from sklearn.grid_search import GridSearchCV
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.utils import shuffle
    import numpy as np
    from sklearn import metrics
    from sklearn.neural_network import MLPRegressor
    df = pd.read_csv('iris_training.csv', header=0)
    
    #neural networks for regresion
    parameters = {'hidden_layer_sizes':[200,250,300,400,500,600], 'activation':['relu']}
    grid_search = GridSearchCV(MLPRegressor(), parameters, cv=10, n_jobs=4, scoring='mean_squared_error')
    
    X = df[df.columns.drop('virginica')]
    y = df['virginica']
    
    X_train, X_test, y_train, y_test = train_test_split(
    	X, y, test_size=0.3, random_state=42)
    
    random_seed = 13
    X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)
    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    X_test = X_scaler.transform(X_test)
    
    grid_search.fit(X_train,y_train)
    y_pred = grid_search.predict(X_test) 
    
    print 'mean_squared_error:'+str(metrics.mean_squared_error(y_test,y_pred)),
     'r2_score:'+str(metrics.r2_score(y_test,y_pred))
    
  • 相关阅读:
    spark连接MongoDB
    idea+scala+spark遇到的一些问题
    linux环境变量的配置
    sqoop的导入导出
    hive中一些常用的sql语句
    Unity 插件制作笔记(持续更新)
    linux-shutdown命令说明
    linux中的redis缓存服务器
    IceScrum敏捷开发工具的安装文档-官方最新版
    PHP设计模式系列
  • 原文地址:https://www.cnblogs.com/huadongw/p/6380482.html
Copyright © 2020-2023  润新知