• 预处理算法_12_分组聚合


    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    
    # <editable>
    
    def execute():
        # <editable>
        '''
        载入模块
        '''
        import pandas as pd
        from sqlalchemy import create_engine
        '''
        连接数据库
        '''
        engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
        '''
        选择目标数据
        '''
    
        params = {
            "features": "score as fea",
            "label": 'score',
            "method": "count",
        }
        inputs = {"table": 'test'}
        sql = 'select ' + params['features'] + ',' + params['label'] + ' from ' + inputs['table']
        data_in = pd.read_sql_query(sql, engine)
        data_in = data_in.fillna(float(20))
        print(data_in)
    
        '''
        分组聚合
        用法:
        obj.groupby(‘key’)
        obj.groupby([‘key1’,’key2’])
        '''
        b = params['label'].split(',')
        if params['method'] == 'count':
            data_out = data_in.groupby(b).count().reset_index()
        elif params['method'] == 'max':
            data_out = data_in.groupby(b).max().reset_index()
        elif params['method'] == 'mean':
            data_out = data_in.groupby(b).mean().reset_index()
        elif params['method'] == 'median':
            data_out = data_in.groupby(b).median().reset_index()
        elif params['method'] == 'size':
            data_out = data_in.groupby(b).size().reset_index()
        elif params['method'] == 'min':
            data_out = data_in.groupby(b).min().reset_index()
        elif params['method'] == 'std':
            data_out = data_in.groupby(b).std().reset_index()
        else:
            data_out = data_in.groupby(b).sum().reset_index()
    
        '''
        将结果写出
        '''
        print(data_out)
        '''
        数据示例
            fea  score
        0  80.0   80.0
        1  20.0   20.0
        2  20.0   20.0
        3   5.0    5.0
        4   4.0    4.0
        5  20.0   20.0
           score  fea
        0    4.0    1
        1    5.0    1
        2   20.0    3
        3   80.0    1
        '''
    
    # </editable>
    
    if __name__ == '__main__':
        execute()
    作者:沐禹辰
    出处:http://www.cnblogs.com/renfanzi/
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
  • 相关阅读:
    修改输入框placeholder文字默认颜色-webkit-input-placeholder
    命令行
    一看就懂的ReactJs入门教程(精华版)
    JPG、PNG和GIF图片的基本原理及优化方法
    spring中的两个数据库事务DataSourceTransactionManager 和 JtaTransactionManager区别
    classpath和classpath*的区别
    转:log4j的使用简介
    javascript学习笔记
    Spring MVC 中 HandlerInterceptorAdapter的使用
    SVN代码管理发布
  • 原文地址:https://www.cnblogs.com/renfanzi/p/14476680.html
Copyright © 2020-2023  润新知