• 慈善人数预测(不同算法简单暴力代码)


    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    
    path = r'F:数据分析专用机器学习监督学习项目census.csv'
    with open(path, 'r', encoding='utf-8') as f:
        data = pd.read_csv(f)
    
    
    def replace_str(data):
        for index, name in enumerate(data):
            # print(index, data.columns.values[index])
            # print(data[name])
            # print(data[name].dtypes)
            if data[name].dtypes == 'object':
                for i, v in enumerate(data[name].drop_duplicates()):
                    data.iloc[:, index] = data.iloc[:, index].replace(v, str(i + 1))
        return data
    
    
    data = replace_str(data)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    # 岭回归预测0.80
    from sklearn.linear_model import RidgeClassifier
    regressor = RidgeClassifier()
    regressor.fit(X_train, y_train)
    print(regressor.score(X_test,y_test))
    # 决策树预测0.818
    from sklearn.tree import DecisionTreeClassifier
    regressor = DecisionTreeClassifier()
    regressor.fit(X_train, y_train)
    print(regressor.score(X_test,y_test))
    # 支持向量机
    # from sklearn.svm import SVC
    # regressor = SVC(kernel='linear',degree=3)
    # regressor.fit(X_train, y_train)
    # print(regressor.score(X_test,y_test))
    # 随机森林 0.84
    from sklearn.ensemble import RandomForestClassifier
    import sklearn.tree
    estimators = {}
    # criterion: 分支的标准(gini/entropy)
    estimators['tree'] = sklearn.tree.DecisionTreeClassifier(criterion='gini',random_state=8) # 决策树
    # n_estimators: 树的数量
    # bootstrap: 是否随机有放回
    # n_jobs: 可并行运行的数量
    estimators['forest'] = RandomForestClassifier(n_estimators=20,criterion='gini',bootstrap=True,n_jobs=2,random_state=8) # 随机森林
    from sklearn.model_selection import cross_val_score
    import datetime
    for k in estimators.keys():
        estimators[k] = estimators[k].fit(X_train, y_train)
        pred = estimators[k].predict(X_test)
        print("%s Score: %0.2f" % (k, estimators[k].score(X_test, y_test)))
        scores = cross_val_score(estimators[k], X_train, y_train,scoring='accuracy' ,cv=10)
        print("%s Cross Avg. Score: %0.2f (+/- %0.2f)" % (k, scores.mean(), scores.std() * 2))
    # KNN 0.82
    from sklearn.neighbors import KNeighborsClassifier
    regressor = KNeighborsClassifier()
    regressor.fit(X_train, y_train)
    print(regressor.score(X_test,y_test))
    # 神经网络 0.80
    from sklearn.neural_network import MLPClassifier
    regressor = MLPClassifier()
    regressor.fit(X_train, y_train)
    print(regressor.score(X_test,y_test))
    # 回归模型 0.77
    from sklearn.linear_model import SGDClassifier
    regressor = SGDClassifier(max_iter=1000)
    regressor.fit(X_train, y_train)
    print(regressor.score(X_test,y_test))
    Win a contest, win a challenge
  • 相关阅读:
    tomcat对sessionId的处理分析
    MySQL 5.5 新增SIGNAL异常处理
    jetty对sessionId的处理分析
    Python3.x和Python2.x的区别
    java actor模型和消息传递简单示例
    构建工具scons让一切变得简单
    HTML元素控件事件表
    开始新的旅途
    JS各种常见知识点
    C#代码与javaScript函数的相互调用
  • 原文地址:https://www.cnblogs.com/pandaboy1123/p/10286586.html
Copyright © 2020-2023  润新知