• kaggle House_Price_XGBoost


    kaggle House_Price_final

    代码

    import numpy as np
    import pandas as pd
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import mean_absolute_error
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import Imputer
    from sklearn.linear_model import LinearRegression
    from sklearn.tree import DecisionTreeRegressor
    import matplotlib.pyplot as plt
    from sklearn.preprocessing import Imputer
    from xgboost import XGBRegressor
    
    train_path = r"C:UserscbattleDesktop	rain.csv"
    test_path = r"C:UserscbattleDesktop	est.csv"
    out_path = r"C:UserscbattleDesktopout.csv"
    
    # 读入数据
    train = pd.read_csv(train_path)
    test = pd.read_csv(test_path)
    print('train:',train.shape)
    print('test :',test.shape)
    
    # 划分X,y
    X = train.drop(['Id','SalePrice'],axis=1)
    y = train['SalePrice']
    Xtest = test.drop(['Id'],axis=1)
    print('X    :',X.shape)
    print('y    :',y.shape)
    print('Xtest:',Xtest.shape)
    # for col in X:
    #     print(X[col].dtype,col)
    
    key = [col for col in X
          if X[col].dtype in ['int64','float64']
          or X[col].dtype == 'object' and X[col].nunique()<10
          ]
    X = X[key]
    Xtest = Xtest[key]
    
    # 独热编码
    key = [col for col in X
          if X[col].dtype in ['int64','float64']
          or X[col].dtype == 'object' and X[col].nunique()<10
          ]
    X = X[key]
    Xtest = Xtest[key]
    
    print(X.shape, Xtest.shape)
    X = pd.get_dummies(X)
    Xtest = pd.get_dummies(Xtest)
    X, Xtest = X.align(Xtest, join = 'left', axis=1)
    print(X.shape, Xtest.shape)
    
    # 填补空值
    my_imputer = Imputer()
    X = my_imputer.fit_transform(X)
    Xtest = my_imputer.transform(Xtest)
    print(X.shape, Xtest.shape)
    
    # 决策树
    # decisionTree = DecisionTreeRegressor()
    # decisionTree.fit(X,y)
    # ans = decisionTree.predict(Xtest)
    
    # XG boost
    xgb = XGBRegressor()
    xgb.fit(X,y,verbose=False)
    ans = xgb.predict(Xtest)
    
    # my_model = XGBRegressor(n_estimators=1000)
    # my_model.fit(train_X, train_y, early_stopping_rounds=5, 
    #              eval_set=[(val_X, val_y)], verbose=False)
    # ans = my_model.predict(Xtest)
    
    # 输出
    myAns = pd.DataFrame({'Id':test['Id'],'SalePrice':ans})
    myAns.to_csv(r"C:UserscbattleDesktopout.csv", index=False)
    print('ok')
    
  • 相关阅读:
    针对数据库索引的优化
    acd
    HDOJ 5045 Contest
    《计算机时代》2015年第7期刊登出《基于数据仓库星形模式的广东省快速公路一张网资金结算情况分析系统》
    为什么大多数编程语言中的数组都从0開始
    十年,青春就是一转眼的事
    电子商务系统的设计与实现(十四):菜单高亮
    最近1个月的财务计划没有做好,囧啊
    最近1个月的财务计划没有做好,囧啊
    雷观(十九):我的人生观
  • 原文地址:https://www.cnblogs.com/cbattle/p/8810851.html
Copyright © 2020-2023  润新知