• 基于MySQL分析线上充值留存率


    1.数据清洗

    步骤:

    1.查询charge_record表业务类型为充值且订单状态为成功的数据

    2.将上述数据转移到本地数据库

    使用如下脚本:

    # coding=utf-8
    import pymysql

    # 原数据库链接
    db1 = pymysql.connect(
    host='***',
    port=3306, user='***',
    passwd='***',
    db='***',
    charset='utf8')
    cursor1 = db1.cursor()
    # 定义查询语句
    len1 = cursor1.execute('select uid,update_time from charge_record where buss_type=0 and charge_status=2 and charge_prod_id is not null')

    # 迁移数据库链接
    db2 = pymysql.connect(
    host='127.0.0.1',
    port=3306, user='root',
    passwd='123456',
    db='test',
    charset='utf8')
    cursor2 = db2.cursor()
    # 批量插入语句
    sql = 'insert into charge_record(uid,update_time) value(%s, %s)'

    # 导入全部数据
    data2 = cursor1.fetchall()
    cursor2.executemany(sql, data2)

    # 提交到数据库
    db2.commit()

    # 关闭数据库连接
    db1.close()
    db2.close()

    2.计算留存率

    使用的MySQL语句如下:

    USE test;

    -- 计算首单时间

    SELECT uid,min(update_time) u_time FROM charge_record GROUP BY uid;

    -- 重采首付时间

    SELECT a.uid,b.u_time,TIMESTAMPDIFF(MONTH,b.u_time,a.update_time) m_diff,CONCAT(YEAR(b.u_time),"年",MONTH(b.u_time),"月") y_m FROM charge_record a

    LEFT JOIN (

    SELECT uid,min(update_time) u_time FROM charge_record GROUP BY uid LIMIT 0,7000

    ) b on a.uid=b.uid WHERE b.u_time is NOT NULL;

    -- 计算留存量

    CREATE table cohort as

    SELECT c.y_m "首付月份",c.m_diff"月份差",COUNT(DISTINCT c.uid) "留存量" FROM (

    SELECT a.uid,b.u_time,TIMESTAMPDIFF(MONTH,b.u_time,a.update_time) m_diff,CONCAT(YEAR(b.u_time),"年",MONTH(b.u_time),"月") y_m FROM charge_record a

    LEFT JOIN (

    SELECT uid,min(update_time) u_time FROM charge_record GROUP BY uid

    ) b on a.uid=b.uid WHERE b.u_time is NOT NULL ) c GROUP BY c.y_m,c.m_diff;

    -- 计算留存率

    SELECT c.`首付月份`,CONCAT(ROUND((c.`留存量`/m.`留存量`)*100,2),"%") 留存率 FROM cohort c

    LEFT JOIN (

    SELECT 首付月份,留存量 FROM cohort WHERE `月份差`=0

    ) m on c.`首付月份`=m.`首付月份`;

    -- 留存率进阶版

    SELECT

    n.`首付月份`,

    AVG(n.`留存量`) "本月新增",

    CONCAT(sum(n.`+1月`),"%") "+1月",

    CONCAT(sum(n.`+2月`),"%") "+2月",

    CONCAT(sum(n.`+3月`),"%") "+3月",

    CONCAT(sum(n.`+4月`),"%") "+4月",

    CONCAT(sum(n.`+5月`),"%") "+5月"

    FROM (
    # 一级子查询:转置表格,将月份差作为列名
    SELECT

    a.`首付月份`,

    a.`留存量`,

    CASE a.`月份差` when 1 THEN a.`留存率` ELSE 0 END "+1月",

    CASE a.`月份差` when 2 THEN a.`留存率` ELSE 0 END "+2月",

    CASE a.`月份差` when 3 THEN a.`留存率` ELSE 0 END "+3月",

    CASE a.`月份差` when 4 THEN a.`留存率` ELSE 0 END "+4月",

    CASE a.`月份差` when 5 THEN a.`留存率` ELSE 0 END "+5月"

    FROM(

    # 二级子查询:计算留存率

    SELECT a.`首付月份`,b.`留存量`,a.`月份差`,ROUND((a.`留存量`/b.`留存量`)*100,2) 留存率

    FROM cohort a

    LEFT JOIN (
    # 三级子查询:查询首月用户量
    SELECT `首付月份`,`留存量`

    FROM cohort

    WHERE cohort.`月份差`=0

    ) b
    on a.`首付月份`=b.`首付月份`
    ) a
    ) n
    GROUP BY n.`首付月份`;

    最终结果如下:

  • 相关阅读:
    jsp <img src="“> src 相对路径的问题
    记一次Intellij-IDEA配置JDK1.8,支持Lambda新特性
    centOS6.5 查看 启动 关闭防火墙
    java设计模式之--工厂方法模式
    BlockingQueue之---ArrayBlockingQueue
    两个线程,一个为123456,一个为ABCDEF、交替打印出1A2B3C...
    JUC之---超好用的阻塞锁
    JUC之---读写锁
    java设计模式之--代理模式
    java设计模式之--线程安全的单例模式
  • 原文地址:https://www.cnblogs.com/wanyuan/p/13107172.html
Copyright © 2020-2023  润新知