• 随机森林—回归


    import datetime
    
    import numpy as np
    import pandas as pd
    from matplotlib import pyplot as plt
    from sklearn.model_selection import train_test_split, GridSearchCV
    from sklearn.ensemble import RandomForestRegressor
    
    plt.rcParams["font.sans-serif"] = ["SimHei"]
    
    # 第一步:读入数据
    file_path = r"../../机器学习数据/data_temps1.csv"
    df = pd.read_csv(file_path)
    # 1.检查数据 - 检查数据的格式,数据的构成, 数据有没有缺失值
    # print(df.head())  # 显示前五行数据
    # print(df.shape)  # 显示数据形状
    # print(df.tail())
    # print(df.describe())  # 数据描述
    # print(df.info)
    # print(df.isnull())
    # print(df.isnull().sum())
    # plt.hist(df["周"], bins=7, linewidth=0.5, edgecolor='white', align='left', alpha=0.6)
    
    
    # 第二步:数据预处理
    # 1.合并年月日
    # df['date1'] = df["年"].map(str) + "-" + df["月"].map(str) + "-" + df["日"].map(str)
    # print(df)
    
    # 2.时间格式转换
    # dates = [datetime.datetime.strptime(date, "%Y-%m-%d") for date in df["date1"]]
    
    # 3.展示数据
    # plt.figure(figsize=[12, 8])
    # plt.subplot(2, 2, 1)
    # plt.plot(dates, df["当天最高温度"])
    # plt.title("当天最高温度")
    #
    # plt.subplot(2, 2, 2)
    # plt.plot(dates, df["前一天最高温度"])
    # plt.title("前一天最高温度")
    #
    # plt.subplot(2, 2, 3)
    # plt.plot(dates, df["前两天最高温度"])
    # plt.title("前两天最高温度")
    #
    # plt.subplot(2, 2, 4)
    # plt.plot(dates, df["当地气象台预测值"])
    # plt.title("当地气象台预测值")
    #
    # plt.show()
    
    # 4. 数据的one-hot编码df["周"]
    df = pd.get_dummies(df)
    
    # 第三步:划分训练集与测试集
    data = train_test_split(df, shuffle=True, test_size=0.3, random_state=100)
    train_data = data[0]  # 训练数据
    
    train_feature = train_data.drop(["当天最高温度"], axis=1)
    train_label = train_data["当天最高温度"]
    
    test_data = data[1]  # 测试数据
    
    test_feature = test_data.drop(["当天最高温度"], axis=1)
    test_label = test_data["当天最高温度"]
    
    # 第四步:建模
    n_estimators = [x for x in range(10, 101, 10)]
    max_depth = [2, 4]
    bootstrap = [True, False]
    
    param_grid = {"n_estimators": n_estimators,
                  "max_depth": max_depth,
                  "bootstrap": bootstrap}
    
    
    rf = RandomForestRegressor()  # 实例化
    clf = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, verbose=5)
    clf.fit(train_feature, train_label)
    print(clf.best_params_)
    
    # 第五步:评估
    print(clf.score(train_feature, train_label))
    print(clf.score(test_feature, test_label))
    
    # 第六步:预测
    pre_label = clf.predict(test_feature)
    test_label = test_label.to_numpy()
    
    
    # 第七步:数据可视化
    plt.plot(pre_label)
    plt.plot(test_label)
    plt.title("拟合图")
    plt.legend({"预测曲线", "真实曲线"})
    plt.show()
    
  • 相关阅读:
    歌德巴赫猜想
    Dice Possibility
    ACboy needs your help(简单DP)
    Bag of mice(概率DP)
    合唱队形(LIS)
    地震预测(模拟链表)
    关于KMP算法的感想
    Card Collector
    LOOPS
    Aeroplane chess(简单概率dp)
  • 原文地址:https://www.cnblogs.com/mysterygust/p/16408436.html
Copyright © 2020-2023  润新知