• pandas高级多表操作


    import pandas as pd
    import numpy as np
    position = pd.read_csv("D:mycode用pandasdataposition.csv", encoding="gbk")
    company = pd.read_csv("D:mycode用pandasdatacompany_sql.csv", encoding="gbk")
    # 修改列名
    col = list(company.columns)
    col[0] = "id"
    # 重新赋值
    company.columns = col
    print("1,------", company)
    # 右关联
    print("2,-----", position.merge(right=company, how="inner", left_on="companyId", right_on="id"))
    # 基于索引关联
    print("3,----", company.join(position))
    # 堆叠全关联左右拼接
    print("4,--------", pd.concat([company, position], axis=1))
    # 创建df1表
    df1 = pd.DataFrame(
    {
    "A": list("abcd"),
    "B": list("efgh"),
    }
    )
    print("5,--------", df1)
    # 创建df2表
    df2 = pd.DataFrame(
    {
    "C": list("abcd"),
    "D": list("efgh"),
    }
    )
    print("6,--------", df2)
    # 拼接
    print("7,----", pd.concat([df1, df2], sort=False))
    print("8,----", pd.concat([df1, df2], sort=True))
    print("9,----", pd.concat([df1, df2], axis=1))
    # 多重索引切片找值
    print(position.groupby(by=["city", "education"]).mean())
    print(position.groupby(by=["city", "education"]).mean().avg)
    print(position.groupby(by=["city", "education"]).mean().avg["上海"])
    print(position.groupby(by=["city", "education"]).mean().avg["上海"]["博士"])
    print(position.groupby(by=["city", "education"]).mean().loc["上海"])
    print(position.groupby(by=["city", "education"]).mean().loc["上海", "博士"])
    print("10,-------")
    # 基于set_index
    print(position.sort_values(by=["city", "education"]).set_index(["city", "education"]))
    print("11,------")
    # 表格
    print(position.positionLables)
    print("12,------")
    # 字符串计数
    print(position.positionLables.str.count("分析师"))
    print("13,--------")
    # 出现位置
    print(position.positionLables.str.find("数据"))
    print("14,-------")
    # 针对字符串操作
    print(position.positionLables.str[1:-1])
    print("15,--------")
    # 针对值替换
    print(position.replace(80307, ""))
    print("16,-------")
    # 针对值里面字符串替换
    print(position.positionLables.str[1:-1].str.replace("'", ""))
    print("17,------------")
    # 改为空值
    position.loc[position.city == "深圳", "city"] = np.NaN
    print(position)
    print("18,----------")
    # 空值填充
    print(position.fillna(1))
    print("19,--------")
    # 填充列名
    position.city.fillna("abc")
    print(position)
    print("20,----------")
    # 删除空值
    print(position.dropna())
    print("21,--------------")
    # 字符串拼接
    position = position[~position['avg'].isnull()]
    position.avg = position.avg.astype('str') + "k"
    print(position)
    print(position.avg)
    print("22,----------")
    # 匿名函数拼接
    position.avg.apply(lambda x: str(x) + "k")
    print(position)
    print("23,----------")
    # 不同城市之间薪资排名前5
    def func(x, n):
    r = x.sort_values("avg", ascending=False)
    return r[:n]
    position.groupby("city").apply(func, n=5)
    print(position)
    print("24,-------------")

  • 相关阅读:
    rpmdb open failed 的解决办法
    centos7 搭建vsftpd服务并锁定用户的家目录
    centos7 搭建samba服务
    python检测是否为数字
    python的random模块
    利用python编写一个简单的猜数字游戏
    在centos7中利用kvm创建虚拟机并设置为桥接模式
    支付宝提现
    冒泡排序(数组排序)
    全选获取对应值
  • 原文地址:https://www.cnblogs.com/zhang-da/p/14243583.html
Copyright © 2020-2023  润新知