• 大作业 数据清洗(缺省值处理)


    这次实现了缺省值处理部分内容,主要有将缺省值按中位数,平均值补全。将缺省行或者列去掉。

    #缺省值补全
    @app.route('/data_clean_supply')
    def data_clean_supply():
        supply_type = request.values.get("supply_type")
        flag='1'
        try:
            global data_clean
            data_clean =dataclean.data_clean_supply(data_clean,supply_type)  # 去重后的数据保存,为以后处理缺省值
        except (Exception, BaseException) as e:
            exstr = traceback.format_exc()
            print( exstr)
            flag='0'
        print(supply_type+"flag:"+flag)
        return jsonify({"cog":flag})
        pass
    #缺省值去除
    @app.route("/data_clean_remove")
    def data_clean_remove():
        action_on = str(request.values.get("action_on"))
        type_on = str(request.values.get("type_on"))
        min_num = str(request.values.get("min_num"))
        flag='1'
        try:
            global data_clean
            dataclean.data_clean_remove(data_clean,action_on,type_on,min_num)
        except (Exception, BaseException) as e:
            exstr = traceback.format_exc()
            print(exstr)
            flag='0'
        print( "min_num:"+min_num+",type_on:"+type_on+",action_on:"+action_on+",flag:"+flag)
        return jsonify({"cog":flag})
        pass
    #获取要补充的数值
    def data_clean_supply_num(temp_col_not_nan,suplly_type):
        if (suplly_type == "median"):
            return temp_col_not_nan.median()
        if(suplly_type == "mean"):
            return temp_col_not_nan.mean()
    #补全缺省值
    def data_clean_supply(data_clean,suplly_type):
        for i in range(data_clean.shape[1]):#遍历列数
            try:
                temp_col=data_clean.iloc[:,i]#取一列
                temp_col_not_nan=temp_col[temp_col==temp_col].astype('float')#当前一列不为nan,使用布尔索引
                mean=data_clean_supply_num(temp_col_not_nan,suplly_type)
                flag_list=[]
                num=len(temp_col)
                for j in range(num):
                    if(temp_col.iloc[j]!=temp_col.iloc[j]):
                        flag_list.append(mean)
                    else:
                        flag_list.append(temp_col.iloc[j])
                data_clean.iloc[:,i]=flag_list# 填充均值,必须整列赋值
            except:
                pass
                #print("不是数字类型")
        return data_clean
        pass
    #去除缺省行或列
    def data_clean_remove(data_clean,action_on,type_on,minnum):
        return data_clean.dropna(axis=action_on, how=type_on, thresh=int(minnum))
        pass

    有一点要注意由于从数据库中读出的缺省值是‘’而不是nan所以要转为nan

     #把‘’转换为nan
        for j in range(num_1):
            flag_list=[]
            for i in range(num_0):
                if(df.iloc[i][j]==''):
                    flag_list.append(np.nan)
                else:
                    flag_list.append(df.iloc[i][j])
            df.iloc[:,j]=flag_list#必须整列赋值,如果单个赋值则会失败

    前台部分:

         <blockquote class="layui-elem-quote layui-text">
                <h2>缺省值处理</h2>
            </blockquote>
            <fieldset class="layui-elem-field layui-field-title" style="margin-top: 30px;">
                <legend>缺省值补全</legend>
            </fieldset>
            <form class="layui-form" action="">
                <div class="layui-form-item">
                    <label class="layui-form-label">补全方式</label>
                    <div class="layui-input-block">
                        <select name="interest" lay-filter="aihao" id="supply_type">
                            <option value=""></option>
                            <option value="median">中位数</option>
                            <option value="mode" selected="">众数</option>
                            <option value="average">平均数</option>
                        </select>
                        <button type="button" class="layui-btn layui-btn-normal" id="supply_submit"  onclick="supply_submit">确定</button>
                    </div>
                </div>
            </form>
            <fieldset class="layui-elem-field layui-field-title" style="margin-top: 30px;">
                <legend>缺省值去除</legend>
            </fieldset>
            <form class="layui-form" action="">
                <div class="layui-form-item">
                    <label class="layui-form-label">作用于</label>
                    <div class="layui-input-block">
                        <input type="radio" name="action_on" value="index" title="行" checked="">
                        <input type="radio" name="action_on" value="columns" title="列">
                    </div>
                </div>
                <div class="layui-form-item">
                    <label class="layui-form-label">非空值最小数</label>
                    <div class="layui-input-block">
                        <input type="text" id="min_num" lay-verify="title" autocomplete="off" placeholder="非空值小于此数目的行或列将被删除" class="layui-input">
                    </div>
                </div>
                <div class="layui-form-item">
                    <label class="layui-form-label">方式</label>
                    <div class="layui-input-block">
                        <input type="radio" name="type_on" value="all" title="all" checked="">
                        <input type="radio" name="type_on" value="any" title="any">
                    </div>
                    <button type="button" class="layui-btn layui-btn-normal" id="remove_submit" >确定</button>
                </div>
            </form>
    <script>
        $("#supply_submit").click(function() {
            supply_type = $("#supply_type").find("option:selected").val()
            $.ajax({
                type: "GET",
                url: "/data_clean_supply?supply_type=" + supply_type,
                dataType: "json",
                success: function (data) {
                    if (data.cog == 1) {
                        alert("操作成功")
                    } else {
                        alert("操作失败")
                    }
                }
            })
        })
        $("#remove_submit").click(function() {
            action_on=$("input[name='action_on']:checked").val();
            type_on=$("input[name='type_on']:checked").val();
            min_num=$("#min_num").val()
            $.ajax({
                type: "GET",
                url: "/data_clean_remove?action_on="+action_on+"&type_on="+type_on+"&min_num="+min_num,
                dataType: "json",
                success: function (data) {
                    if (data.cog == 1) {
                        alert("操作成功")
                    } else {
                        alert("操作失败")
                    }
                }
            })
        })
    </script>
  • 相关阅读:
    2020/10/29
    2020/10/24
    2020/10/28
    2020/10/31周报
    linux shell 中判断字符串为空的正确方法
    20201107 千锤百炼软工人
    20201103 千锤百炼软工人
    20201109 千锤百炼软工人
    20201111 千锤百炼软工人
    20201105 千锤百炼软工人
  • 原文地址:https://www.cnblogs.com/fengchuiguobanxia/p/15679315.html
Copyright © 2020-2023  润新知