• 根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式一



    根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式一

    测试数据
    java代码
      1 package com.hzf.spark.study;
      2 
      3 import java.util.ArrayList;
      4 import java.util.Collections;
      5 import java.util.Comparator;
      6 import java.util.HashMap;
      7 import java.util.Iterator;
      8 import java.util.List;
      9 import java.util.Map;
     10 import java.util.Set;
     11 
     12 import org.apache.spark.SparkConf;
     13 import org.apache.spark.api.java.JavaPairRDD;
     14 import org.apache.spark.api.java.JavaRDD;
     15 import org.apache.spark.api.java.JavaSparkContext;
     16 import org.apache.spark.api.java.function.Function;
     17 import org.apache.spark.api.java.function.PairFunction;
     18 import org.apache.spark.api.java.function.VoidFunction;
     19 import org.apache.spark.broadcast.Broadcast;
     20 
     21 import scala.Tuple2;
     22 
     23 public class HotChannel01 {
     24     public static void main(String[] args) {
     25         SparkConf conf = new SparkConf()
     26                 .setAppName("HotChannel")
     27                 .setMaster("local")
     28                 .set("spark.testing.memory", "2147480000");
     29         JavaSparkContext sc = new JavaSparkContext(conf);
     30         JavaRDD<String> logRDD = sc.textFile("userLog1");
     31         String str = "View";
     32         final Broadcast<String> broadcast = sc.broadcast(str);
     33         hotChannel(sc, logRDD, broadcast);
     34     }
     35     private static void hotChannel(JavaSparkContext sc,JavaRDD<String> logRDD, final Broadcast<String> broadcast) {
     36         JavaRDD<String> filteredLogRDD = logRDD.filter(new Function<String, Boolean>() {
     37             
     38             private static final long serialVersionUID = 1L;
     39 
     40             @Override
     41             public Boolean call(String v1) throws Exception {
     42                 String actionParam = broadcast.value();
     43                 String action = v1.split("	")[5];
     44                 return actionParam.equals(action);
     45             }
     46         });
     47         
     48         JavaPairRDD<String, String> channel2nullRDD = filteredLogRDD.mapToPair(new PairFunction<String, String,String>() {
     49 
     50             private static final long serialVersionUID = 1L;
     51 
     52             @Override
     53             public Tuple2<String, String> call(String val) throws Exception {
     54                 String channel = val.split("	")[4];
     55                 
     56                 return new Tuple2<String, String>(channel,null);
     57             }
     58         });
     59         Map<String, Object> channelPVMap = channel2nullRDD.countByKey();
     60         Set<String> keySet = channelPVMap.keySet();
     61         List<SortObj> channels  = new ArrayList<>();
     62         for(String channel : keySet){ 
     63             channels.add(new SortObj(channel, Integer.valueOf(channelPVMap.get(channel)+"")));
     64         }
     65         Collections.sort(channels, new Comparator<SortObj>() {
     66 
     67             @Override
     68             public int compare(SortObj o1, SortObj o2) {
     69                 return o2.getValue() - o1.getValue();
     70             }
     71         });
     72         
     73         List<String> hotChannelList = new ArrayList<>();
     74         for (int i = 0; i < 3; i++) {
     75             hotChannelList.add(channels.get(i).getKey());
     76         }
     77         for(String channle : hotChannelList){
     78             System.out.println("channle:" + channle);
     79         }
     80         
     81         final Broadcast<List<String>> hotChannelListBroadcast = sc.broadcast(hotChannelList);
     82         
     83          
     84         JavaRDD<String> filtedRDD = logRDD.filter(new Function<String, Boolean>() {
     85 
     86             @Override
     87             public Boolean call(String v1) throws Exception {
     88                 List<String> hostChannels = hotChannelListBroadcast.value();
     89                 String channel = v1.split("	")[4];
     90                 String userId = v1.split("	")[2];
     91                 return hostChannels.contains(channel) && !"null".equals(userId);
     92             }
     93         });
     94         
     95         JavaPairRDD<String, String> channel2UserRDD = filtedRDD.mapToPair(new PairFunction<String, String, String>() {
     96 
     97             @Override
     98             public Tuple2<String, String> call(String v1) throws Exception {
     99                 String[] splited = v1.split("	");
    100                 String channel = splited[4];
    101                 String userId = splited[2];
    102                 return new Tuple2<String, String>(channel,userId);
    103             }
    104         });
    105         
    106         channel2UserRDD.groupByKey().foreach(new VoidFunction<Tuple2<String,Iterable<String>>>() {
    107             
    108             private static final long serialVersionUID = 1L;
    109 
    110             @Override
    111             public void call(Tuple2<String, Iterable<String>> tuple) throws Exception {
    112                 String channel = tuple._1;
    113                 Iterator<String> iterator = tuple._2.iterator();
    114                 Map<String, Integer> userNumMap = new HashMap<>();
    115                 while(iterator.hasNext()){
    116                     String userId = iterator.next();
    117                     Integer count = userNumMap.get(userId);
    118                     if(count == null){
    119                         count = 1;
    120                     }else{
    121                         count ++;
    122                     }
    123                     userNumMap.put(userId, count);
    124                 }
    125                 
    126                 List<SortObj> lists = new ArrayList<>();
    127                 Set<String> keys = userNumMap.keySet();
    128                 for(String key : keys){
    129                     lists.add(new SortObj(key, userNumMap.get(key)));
    130                 }
    131                 
    132                 Collections.sort(lists,new Comparator<SortObj>() {
    133 
    134                     @Override
    135                     public int compare(SortObj O1, SortObj O2) {
    136                         return O2.getValue() - O1.getValue();
    137                     }
    138                 });
    139                 
    140                 System.out.println("HOT_CHANNEL:"+channel);
    141                 for(int i = 0 ; i < 3 ; i++){
    142                     SortObj sortObj = lists.get(i);
    143                     System.out.println(sortObj.getKey()+"=="+sortObj.getValue());
    144                 }
    145             }
    146         });
    147     }
    148 }
    View Code
    result
     
  • 相关阅读:
    js模块化历程
    夜深
    出差(六)开会
    高情商的十大典型表现
    出差(五)调整
    HighCharts简单应用
    出差(四)适应
    出差(三)尝试
    出差(二)熟悉
    ZTree简单应用
  • 原文地址:https://www.cnblogs.com/haozhengfei/p/dc318c55e428640229a5ef270d78a5f7.html
Copyright © 2020-2023  润新知