• 23 友盟项目--sparkstreaming对接kafka、集成redis--从redis中查询月留存率


    从redis中查询月留存率

    StatRemainRatioMonth

     1 package com.oldboy.umeng.spark.stat;
     2 
     3 
     4 
     5 /**
     6  * 统计月留存率
     7  */
     8 public class StatRemainRatioMonth {
     9     public static void main(String[] args) throws Exception {
    10         SparkConf conf = new SparkConf();
    11         conf.setAppName("statNew");
    12         conf.setMaster("local[4]");
    13         SparkSession sess = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate();
    14 
    15         //sess.sql("select * from row_log");
    16         //注册函数
    17         ExecSQLUtil.execRegisterFuncs(sess);
    18 
    19         String day = "20181105" ;
    20         Jedis redis = new Jedis("s101" , 6379) ;
    21         redis.select(1) ;
    22         Set<String> keys = redis.keys("*") ;
    23         //创建Row行类型List集合
    24         List<Row> rows = new ArrayList<Row>() ;
    25         for(String key :keys){
    26             String[] arr = key.split("#") ;
    27             String value = redis.get(key) ;
    28             long mintime = Long.parseLong(value.split(",")[0]) ;//redis中取出最小值
    29             System.out.println(DateUtil.formatDay(mintime , 0 , "yyyyMMdd"));
    30             rows.add(RowFactory.create(arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], arr[6],mintime)) ;
    31         }
    32         //创建java rdd
    33         JavaRDD<Row> rdd1 = new JavaSparkContext(sess.sparkContext()).parallelize(rows);
    34         //结构化字段
    35         StructField[] fields = new StructField[8];
    36         fields[0] = new StructField("appid", DataTypes.StringType, false, Metadata.empty());
    37         fields[1] = new StructField("appversion", DataTypes.StringType, false, Metadata.empty());
    38         fields[2] = new StructField("brand", DataTypes.StringType, false, Metadata.empty());
    39         fields[3] = new StructField("appplatform", DataTypes.StringType, false, Metadata.empty());
    40         fields[4] = new StructField("devicestyle", DataTypes.StringType, false, Metadata.empty());
    41         fields[5] = new StructField("ostype", DataTypes.StringType, false, Metadata.empty());
    42         fields[6] = new StructField("deviceid", DataTypes.StringType, false, Metadata.empty());
    43         fields[7] = new StructField("mintime", DataTypes.LongType, false, Metadata.empty());
    44         //指定schame类型
    45         StructType type = new StructType(fields);
    46         //创建Dataset  数据表
    47         Dataset<Row> ds1 = sess.createDataFrame(rdd1 ,type ) ;
    48         //数据表创建为临时表
    49         ds1.createOrReplaceTempView("_tt");
    50 
    51         //新增设备 各个维度下的   条件:最小值所在的月 = 给出查询的时间所在的月
    52         String sql = "select appid ,appversion,brand,appplatform,devicestyle,ostype,deviceid " +
    53                              "from _tt " +
    54                              "where formatbymonth(mintime, 0 , 'yyyyMM') = formatbymonth('"+ day+"' , 'yyyyMMdd' , 0 , 'yyyyMM')" +
    55                              "group by appid ,appversion,brand,appplatform,devicestyle,ostype,deviceid " +
    56                              "with cube" ;
    57 
    58         sess.sql(sql).createOrReplaceTempView("_t2");//再创建一个临时表
    59         System.out.println("========================");
    60         //有效的新增设备,注册v1视图  //设备id 和app 不能为空   其他字段如果为null,转为NULL,因为null不参与统计
    61         sess.sql("select ifnull(ss.appid ,'NULLL') appid  ," +
    62                          "ifnull(ss.appversion  ,'NULLL') appversion   ," +
    63                          "ifnull(ss.appplatform ,'NULLL') appplatform  ," +
    64                          "ifnull(ss.brand ,'NULLL') brand  ," +
    65                          "ifnull(ss.devicestyle ,'NULLL') devicestyle  ," +
    66                          "ifnull(ss.ostype,'NULLL') ostype ," +
    67                          "ifnull(ss.deviceid    ,'NULLL') deviceid " +
    68                          " from _t2 ss" +
    69                          " where ss.appid is not null and ss.deviceid is not null")
    70                 .createOrReplaceTempView("v1");
    71         sess.sql("select * from v1").show(1000,false);
    72 //
    73         String sql2 = ResourceUtil.readResourceAsString("stat_remainratio_month2.sql") ;
    74         sql2 = sql2.replace("${ymd}" , day) ;
    75 //
    76         //执行sql语句
    77         ExecSQLUtil.execSQLString(sess , sql2);
    78     }
    79 }

    执行sql语句

    -- 计算留存率
    use big12_umeng ;
    
    -- 查询经过一个月后的活跃用户-
    CREATE OR  replace TEMPORARY view v2 as
    SELECT
      ifnull(s.appid       ,'NULLL') appid        ,
      ifnull(s.appversion  ,'NULLL') appversion   ,
      ifnull(s.appplatform ,'NULLL') appplatform  ,
      ifnull(s.brand       ,'NULLL') brand        ,
      ifnull(s.devicestyle ,'NULLL') devicestyle  ,
      ifnull(s.ostype      ,'NULLL') ostype       ,
      ifnull(s.deviceid    ,'NULLL') deviceid
    FROM
    (
      SELECT
        appid       ,
        appversion  ,
        appplatform ,
        brand       ,
        devicestyle ,
        ostype      ,
        deviceid
      FROM
        appstartuplogs
      WHERE
        ym = formatbymonth('${ymd}' , 'yyyyMMdd' , 1 , 'yyyyMM')
      group by
        appid       ,
        appversion  ,
        appplatform ,
        brand       ,
        devicestyle ,
        ostype      ,
        deviceid
        with cube
    )s
    WHERE
      s.appid is not NULL
      and s.deviceid is not null ;
    
    --
    -- 查询交集()
    CREATE  OR  replace TEMPORARY view v3 as
    SELECT
      v1.appid          ,
      v1.appversion     ,
      v1.appplatform    ,
      v1.brand          ,
      v1.devicestyle    ,
      v1.ostype         ,
      count(v1.deviceid) cnt
    FROM
      v1,v2
    WHERE
          v1.appid       = v2.appid
      and v1.appversion  = v2.appversion
      and v1.appplatform = v2.appplatform
      and v1.brand       = v2.brand
      and v1.devicestyle = v2.devicestyle
      and v1.ostype      = v2.ostype
      and v1.deviceid    = v2.deviceid
    GROUP BY
      v1.appid       ,
      v1.appversion  ,
      v1.appplatform ,
      v1.brand       ,
      v1.devicestyle ,
      v1.ostype ;
    
    CREATE  OR  replace TEMPORARY view v4 as
    SELECT
      v1.appid          ,
      v1.appversion     ,
      v1.appplatform    ,
      v1.brand          ,
      v1.devicestyle    ,
      v1.ostype         ,
      count(v1.deviceid) cnt
    FROM
      v1
    GROUP BY
      v1.appid       ,
      v1.appversion  ,
      v1.appplatform ,
      v1.brand       ,
      v1.devicestyle ,
      v1.ostype ;
    
    select * from v4 ;
    
    SELECT
      v4.appid      ,
      v4.appversion ,
      v4.appplatform,
      v4.brand      ,
      v4.devicestyle,
      v4.ostype     ,
      ifnull(v3.cnt , 0) / v4.cnt
    FROM
      v4 left outer join v3
    on
          v3.appid       = v4.appid
      and v3.appversion  = v4.appversion
      and v3.appplatform = v4.appplatform
      and v3.brand       = v4.brand
      and v3.devicestyle = v4.devicestyle
      and v3.ostype      = v4.ostype
  • 相关阅读:
    面试题 16.07. 最大数值
    461. 汉明距离
    1290. 二进制链表转整数
    1486. 数组异或操作
    1480. 一维数组的动态和
    面试题 17.04. 消失的数字
    626. 换座位
    125. 验证回文串
    530. 二叉搜索树的最小绝对差
    ASP.NET页面之间传递值的几种方式
  • 原文地址:https://www.cnblogs.com/star521/p/10003513.html
Copyright © 2020-2023  润新知