• 随机森林


    复制代码
    from sklearn.ensemble import  RandomForestRegressor  #导入随机森林的包
    import pandas as pd
    

    #加载入数据,这里用的是住房的数据
    from sklearn.datasets.california_housing import fetch_california_housing
    housing
    = fetch_california_housing()

    #进行数据的分割, test_size表示分割的比例
    from sklearn.model_selection import train_test_split
    data_train, data_test, target_train, target_test
    =
    train_test_split(housing.data, housing.target, test_size
    = 0.1, random_state = 42)

    #构建随机森林的树
    rfr = RandomForestRegressor(random_state=42) #每次生成的数都是相同的
    rfr.fit(data_train, target_train) #模型计算
    rfr_predict = rfr.predict(data_test) #预测结果
    rfr.score(data_train, target_train) #默认评估值

    #随机森林的参数调节

    from sklearn.grid_search import GridSearchCV

    tree_pram_grad = {'min_samples_split':list((3, 6, 9)), 'n_estimators':list((10,50,100))}
    grid
    = GridSearchCV(RandomForestRegressor(), param_grid=tree_pram_grad, cv=5) #RandomForestRegressor()

    表示需要调节的函数程序, param_grid 表示待调节的参数, cv=5表示交叉验证的次数

    grid.fit(data_train, target_train)
    print(grid.grid_scores_, grid.best_params_, grid.best_score_)
    # 输出每组的grid_scores, 输出最好的参数组合,输出最好的默认评估值

    输出的结果是在3 和 100 时呈现最好的状态

    重新构建树做预测

    rfr = RandomForestRegressor(random_state=42) #每次生成的数都是相同的
    rfr.fit(data_train, target_train) #模型计算
    new_rfr_predict = rfr.predict(data_test) #预测结果

    #展示变量的重要性
    pd.Series(rfr.feature_importances_, index=housing.feature_names).sort_values(ascending=False)

    复制代码
  • 相关阅读:
    SequenceInputStream
    BufferedReader
    FileWriter
    FileReader
    BufferedOutputStream
    javascript/html 禁止图片缓存
    localStorage util
    FormData上传文件 带进度条
    javascript 一些函数的实现 Function.prototype.bind, Array.prototype.map
    替代jquery中的几个函数
  • 原文地址:https://www.cnblogs.com/litieshuai/p/11388298.html
Copyright © 2020-2023  润新知