从redis中查询月留存率
StatRemainRatioMonth
1 package com.oldboy.umeng.spark.stat; 2 3 4 5 /** 6 * 统计月留存率 7 */ 8 public class StatRemainRatioMonth { 9 public static void main(String[] args) throws Exception { 10 SparkConf conf = new SparkConf(); 11 conf.setAppName("statNew"); 12 conf.setMaster("local[4]"); 13 SparkSession sess = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate(); 14 15 //sess.sql("select * from row_log"); 16 //注册函数 17 ExecSQLUtil.execRegisterFuncs(sess); 18 19 String day = "20181105" ; 20 Jedis redis = new Jedis("s101" , 6379) ; 21 redis.select(1) ; 22 Set<String> keys = redis.keys("*") ; 23 //创建Row行类型List集合 24 List<Row> rows = new ArrayList<Row>() ; 25 for(String key :keys){ 26 String[] arr = key.split("#") ; 27 String value = redis.get(key) ; 28 long mintime = Long.parseLong(value.split(",")[0]) ;//redis中取出最小值 29 System.out.println(DateUtil.formatDay(mintime , 0 , "yyyyMMdd")); 30 rows.add(RowFactory.create(arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], arr[6],mintime)) ; 31 } 32 //创建java rdd 33 JavaRDD<Row> rdd1 = new JavaSparkContext(sess.sparkContext()).parallelize(rows); 34 //结构化字段 35 StructField[] fields = new StructField[8]; 36 fields[0] = new StructField("appid", DataTypes.StringType, false, Metadata.empty()); 37 fields[1] = new StructField("appversion", DataTypes.StringType, false, Metadata.empty()); 38 fields[2] = new StructField("brand", DataTypes.StringType, false, Metadata.empty()); 39 fields[3] = new StructField("appplatform", DataTypes.StringType, false, Metadata.empty()); 40 fields[4] = new StructField("devicestyle", DataTypes.StringType, false, Metadata.empty()); 41 fields[5] = new StructField("ostype", DataTypes.StringType, false, Metadata.empty()); 42 fields[6] = new StructField("deviceid", DataTypes.StringType, false, Metadata.empty()); 43 fields[7] = new StructField("mintime", DataTypes.LongType, false, Metadata.empty()); 44 //指定schame类型 45 StructType type = new StructType(fields); 46 //创建Dataset 数据表 47 Dataset<Row> ds1 = sess.createDataFrame(rdd1 ,type ) ; 48 //数据表创建为临时表 49 ds1.createOrReplaceTempView("_tt"); 50 51 //新增设备 各个维度下的 条件:最小值所在的月 = 给出查询的时间所在的月 52 String sql = "select appid ,appversion,brand,appplatform,devicestyle,ostype,deviceid " + 53 "from _tt " + 54 "where formatbymonth(mintime, 0 , 'yyyyMM') = formatbymonth('"+ day+"' , 'yyyyMMdd' , 0 , 'yyyyMM')" + 55 "group by appid ,appversion,brand,appplatform,devicestyle,ostype,deviceid " + 56 "with cube" ; 57 58 sess.sql(sql).createOrReplaceTempView("_t2");//再创建一个临时表 59 System.out.println("========================"); 60 //有效的新增设备,注册v1视图 //设备id 和app 不能为空 其他字段如果为null,转为NULL,因为null不参与统计 61 sess.sql("select ifnull(ss.appid ,'NULLL') appid ," + 62 "ifnull(ss.appversion ,'NULLL') appversion ," + 63 "ifnull(ss.appplatform ,'NULLL') appplatform ," + 64 "ifnull(ss.brand ,'NULLL') brand ," + 65 "ifnull(ss.devicestyle ,'NULLL') devicestyle ," + 66 "ifnull(ss.ostype,'NULLL') ostype ," + 67 "ifnull(ss.deviceid ,'NULLL') deviceid " + 68 " from _t2 ss" + 69 " where ss.appid is not null and ss.deviceid is not null") 70 .createOrReplaceTempView("v1"); 71 sess.sql("select * from v1").show(1000,false); 72 // 73 String sql2 = ResourceUtil.readResourceAsString("stat_remainratio_month2.sql") ; 74 sql2 = sql2.replace("${ymd}" , day) ; 75 // 76 //执行sql语句 77 ExecSQLUtil.execSQLString(sess , sql2); 78 } 79 }
执行sql语句
-- 计算留存率 use big12_umeng ; -- 查询经过一个月后的活跃用户- CREATE OR replace TEMPORARY view v2 as SELECT ifnull(s.appid ,'NULLL') appid , ifnull(s.appversion ,'NULLL') appversion , ifnull(s.appplatform ,'NULLL') appplatform , ifnull(s.brand ,'NULLL') brand , ifnull(s.devicestyle ,'NULLL') devicestyle , ifnull(s.ostype ,'NULLL') ostype , ifnull(s.deviceid ,'NULLL') deviceid FROM ( SELECT appid , appversion , appplatform , brand , devicestyle , ostype , deviceid FROM appstartuplogs WHERE ym = formatbymonth('${ymd}' , 'yyyyMMdd' , 1 , 'yyyyMM') group by appid , appversion , appplatform , brand , devicestyle , ostype , deviceid with cube )s WHERE s.appid is not NULL and s.deviceid is not null ; -- -- 查询交集() CREATE OR replace TEMPORARY view v3 as SELECT v1.appid , v1.appversion , v1.appplatform , v1.brand , v1.devicestyle , v1.ostype , count(v1.deviceid) cnt FROM v1,v2 WHERE v1.appid = v2.appid and v1.appversion = v2.appversion and v1.appplatform = v2.appplatform and v1.brand = v2.brand and v1.devicestyle = v2.devicestyle and v1.ostype = v2.ostype and v1.deviceid = v2.deviceid GROUP BY v1.appid , v1.appversion , v1.appplatform , v1.brand , v1.devicestyle , v1.ostype ; CREATE OR replace TEMPORARY view v4 as SELECT v1.appid , v1.appversion , v1.appplatform , v1.brand , v1.devicestyle , v1.ostype , count(v1.deviceid) cnt FROM v1 GROUP BY v1.appid , v1.appversion , v1.appplatform , v1.brand , v1.devicestyle , v1.ostype ; select * from v4 ; SELECT v4.appid , v4.appversion , v4.appplatform, v4.brand , v4.devicestyle, v4.ostype , ifnull(v3.cnt , 0) / v4.cnt FROM v4 left outer join v3 on v3.appid = v4.appid and v3.appversion = v4.appversion and v3.appplatform = v4.appplatform and v3.brand = v4.brand and v3.devicestyle = v4.devicestyle and v3.ostype = v4.ostype