• 商圈标签


    商圈标签

    一、使用百度地图开发平台(lbs),根据经纬度查询商圈

    :中国的经纬度范围大约为:维度3.86~53.55,经度73.66~135.05不在范围内的数据可不做处理

    第一步:注册百度地图开发平台的账号,申请地址:http://lbsyun.baidu.com/

     

    第二步:填写注册信息

     

     

     

     第三步:创建应用(申请密钥)

    第四步:生成SN

    package test;

    import java.io.UnsupportedEncodingException;

    import java.net.URLEncoder;

    import java.security.NoSuchAlgorithmException;

    import java.util.LinkedHashMap;

    import java.util.Map;

    import java.util.Map.Entry;

    //java版计算signature签名public class SnCal {

            public static void main(String[] args) throws UnsupportedEncodingException,

                            NoSuchAlgorithmException {

                    SnCal snCal = new SnCal();

    // 计算sn跟参数对出现顺序有关,get请求请使用LinkedHashMap保存<key,value>,该方法根据key的插入顺序排序;post请使用TreeMap保存<key,value>,该方法会自动将key按照字母a-z顺序排序。所以get请求可自定义参数顺序(sn参数必须在最后)发送请求,但是post请求必须按照字母a-z顺序填充body(sn参数必须在最后)。以get请求为例:http://api.map.baidu.com/geocoder/v2/?address=百度大厦&output=json&ak=yourak,paramsMap中先放入address,再放output,然后放ak,放入顺序必须跟get请求中对应参数的出现顺序保持一致。

                    Map paramsMap = new LinkedHashMap<String, String>();

                    paramsMap.put("address", "百度大厦");

                    paramsMap.put("output", "json");

                    paramsMap.put("ak", "yourak");

     

                    // 调用下面的toQueryString方法,对LinkedHashMap内所有value作utf8编码,拼接返回结果address=%E7%99%BE%E5%BA%A6%E5%A4%A7%E5%8E%A6&output=json&ak=yourak

                    String paramsStr = snCal.toQueryString(paramsMap);

     

                    // 对paramsStr前面拼接上/geocoder/v2/?,后面直接拼接yoursk得到/geocoder/v2/?address=%E7%99%BE%E5%BA%A6%E5%A4%A7%E5%8E%A6&output=json&ak=yourakyoursk

                    String wholeStr = new String("/geocoder/v2/?" + paramsStr + "yoursk");

     

                    // 对上面wholeStr再作utf8编码

                    String tempStr = URLEncoder.encode(wholeStr, "UTF-8");

     

                    // 调用下面的MD5方法得到最后的sn签名7de5a22212ffaa9e326444c75a58f9a0

                    System.out.println(snCal.MD5(tempStr));

            }

     

            // 对Map内所有value作utf8编码,拼接返回结果

            public String toQueryString(Map<?, ?> data)

                            throws UnsupportedEncodingException {

                    StringBuffer queryString = new StringBuffer();

                    for (Entry<?, ?> pair : data.entrySet()) {

                            queryString.append(pair.getKey() + "=");

                            queryString.append(URLEncoder.encode((String) pair.getValue(),

                                            "UTF-8") + "&");

                    }

                    if (queryString.length() > 0) {

                            queryString.deleteCharAt(queryString.length() - 1);

                    }

                    return queryString.toString();

            }

     

            // 来自stackoverflow的MD5计算方法,调用了MessageDigest库函数,并把byte数组结果转换成16进制

            public String MD5(String md5) {

                    try {

                            java.security.MessageDigest md = java.security.MessageDigest

                                            .getInstance("MD5");

                            byte[] array = md.digest(md5.getBytes());

                            StringBuffer sb = new StringBuffer();

                            for (int i = 0; i < array.length; ++i) {

                                    sb.append(Integer.toHexString((array[i] & 0xFF) | 0x100)

                                                    .substring(1, 3));

                            }

                            return sb.toString();

                    } catch (java.security.NoSuchAlgorithmException e) {

                    }

                    return null;

            }}

     

    二、建立商圈字典

    pom.xml 新增如下配置

    <!--FastJson 解析Json-->

            <dependency>

                <groupId>com.alibaba</groupId>

                <artifactId>fastjson</artifactId>

                <version>1.2.47</version>

            </dependency>

            <!--GeoHash地理位置算法-->

            <dependency>

                <groupId>ch.hsr</groupId>

                <artifactId>geohash</artifactId>

                <version>1.3.0</version>

            </dependency>

    BusinessUtil

    package cn.bw.dmp.util;

    import com.alibaba.fastjson.JSON;

    import com.alibaba.fastjson.JSONObject;

    import org.apache.commons.httpclient.HttpClient;

    import org.apache.commons.httpclient.methods.GetMethod;

    import org.apache.commons.lang.StringUtils;

    import java.io.UnsupportedEncodingException;

    import java.net.URLEncoder;

    import java.util.LinkedHashMap;

    import java.util.Map;

    public class BusinessUtil {

        public static String getBusniss(String lonAndLat) throws Exception{

            // 计算sn跟参数对出现顺序有关,get请求请使用LinkedHashMap保存<key,value>,该方法根据key的插入顺序排序;post请使用TreeMap保存<key,value>,该方法会自动将key按照字母a-z顺序排序。所以get请求可自定义参数顺序(sn参数必须在最后)发送请求,但是post请求必须按照字母a-z顺序填充bodysn参数必须在最后)。以get请求为例:http://api.map.baidu.com/geocoder/v2/?address=百度大厦&output=json&ak=yourakparamsMap中先放入address,再放output,然后放ak,放入顺序必须跟get请求中对应参数的出现顺序保持一致。

            Map paramsMap = new LinkedHashMap<String, String>();

            //paramsMap.put("address", "百度大厦");

            paramsMap.put("callback", "renderReverse");

            paramsMap.put("location", lonAndLat);

            paramsMap.put("output", "json");

            paramsMap.put("pois", "1");

            paramsMap.put("extensions_town", "true");

            paramsMap.put("ak", "cZDWGxNBoUOsOusVflIqee2YD1CZmGdA");

            // 调用下面的toQueryString方法,对LinkedHashMap内所有valueutf8编码,拼接返回结果address=%E7%99%BE%E5%BA%A6%E5%A4%A7%E5%8E%A6&output=json&ak=yourak

            String paramsStr = toQueryString(paramsMap);

            // paramsStr前面拼接上/geocoder/v2/?,后面直接拼接yoursk得到/geocoder/v2/?address=%E7%99%BE%E5%BA%A6%E5%A4%A7%E5%8E%A6&output=json&ak=yourakyoursk

            String wholeStr = new String("/geocoder/v2/?" + paramsStr + "8ZhxmMycfBliDffZTITX5T13p7c8Bepw");

            // 对上面wholeStr再作utf8编码

            String tempStr = URLEncoder.encode(wholeStr, "UTF-8");

            String sn = MD5(tempStr);

            //String url = "http://api.map.baidu.com"+ tempStr + "&sn="+ sn;

            String url = "http://api.map.baidu.com/geocoder/v2/?"+paramsStr + "&sn=" + sn;

            //调用HttpClient访问Baidu LBS 百度地图开放平台

            HttpClient httpClient = new HttpClient();

            GetMethod get = new GetMethod(url);

            int status = httpClient.executeMethod(get);

            String business = "";

            if(status == 200){

                String response = get.getResponseBodyAsString();

                response = response.replaceAll("renderReverse&&renderReverse\(","");

                response = response.substring(0,response.length()-1);

                JSONObject jo = JSON.parseObject(response);

                JSONObject result = jo.getJSONObject("result");

                //获取商圈

                business = result.getString("business");

                //如果商圈为空,获取具体的地址最小到镇

                if(StringUtils.isEmpty(business)){

                    StringBuffer buffer = new StringBuffer();

                    JSONObject addr = result.getJSONObject("addressComponent");

                    String province = addr.getString("province");

                    String city = addr.getString("city");

                    String district = addr.getString("district");

                    String town = addr.getString("town");

                    if(StringUtils.isNotEmpty(province)){

                        buffer.append(province+";");

                    }

                    if(StringUtils.isNotEmpty(province)){

                        buffer.append(city+";");

                    }

                    if(StringUtils.isNotEmpty(province)){

                        buffer.append(district+";");

                    }

                    if(StringUtils.isNotEmpty(province)){

                        buffer.append(town);

                    }

                    business = buffer.toString();

                }

            }

            return business;

        }

        // Map内所有valueutf8编码,拼接返回结果

        public static  String toQueryString(Map<?, ?> data)

                throws UnsupportedEncodingException {

            StringBuffer queryString = new StringBuffer();

            for (Map.Entry<?, ?> pair : data.entrySet()) {

                queryString.append(pair.getKey() + "=");

                queryString.append(URLEncoder.encode((String) pair.getValue(),

                        "UTF-8") + "&");

            }

            if (queryString.length() > 0) {

                queryString.deleteCharAt(queryString.length() - 1);

            }

            return queryString.toString();

        }

        // 来自stackoverflowMD5计算方法,调用了MessageDigest库函数,并把byte数组结果转换成16进制

        public static String MD5(String md5) {

            try {

                java.security.MessageDigest md = java.security.MessageDigest

                        .getInstance("MD5");

                byte[] array = md.digest(md5.getBytes());

                StringBuffer sb = new StringBuffer();

                for (int i = 0; i < array.length; ++i) {

                    sb.append(Integer.toHexString((array[i] & 0xFF) | 0x100)

                            .substring(1, 3));

                }

                return sb.toString();

            } catch (java.security.NoSuchAlgorithmException e) {

            }

            return null;

        }

        public static void main(String[] args) throws  Exception{

            System.out.println(BusinessUtil.getBusniss("40.499603,116.420812"));

        }

    }

    建立经纬度字典

    package cn.bw.dmp.tools

    import ch.hsr.geohash.GeoHash
    import cn.bw.dmp.utils.{BusinessUtil, JedisUtil}
    import org.apache.commons.lang.StringUtils
    import org.apache.spark.SparkConf
    import org.apache.spark.sql.SparkSession
    import redis.clients.jedis.{Jedis, JedisPool}

    import scala.tools.scalap.scalax.util.StringUtil

    /**
      * Created by zcw on 2018/10/15
      */
    object LatLon2Bussiness {
      def main(args: Array[String]): Unit = {
        //1.参数的校验
        if(args.length != 1) {
          println(
            """
              |cn.bw.dmp.tools.LatLon2Bussiness
              |参数错误!!!
              |需要:LogInputPath
            """.stripMargin)
          sys.exit()
        }
          //2.接受参数
          val Array(logInputPath) = args
          //3.创建上下文
          val conf: SparkConf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster("local")
          val spark: SparkSession = SparkSession
            .builder()
            .config(conf)
            .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .getOrCreate()
        import spark.implicits._
          //4.读取并过滤数据
          spark.read.parquet(logInputPath)
            .select("lat","lon")
            .where("lat >= 3 and lat < 54 and lat != '' and lon >= 73 and lon <= 136 and lon != ''")
            .distinct()
              .foreachPartition(it =>{
                val jedis: Jedis = JedisUtil.getJedis
                it.foreach(row =>{
                  val lat: String = row.getAs[String]("lat")
                  val lon: String = row.getAs[String]("lon")
                  //根据lat lon 去百度获取出商圈信息 "火焰山美食城,点点超市,花冠超市"
                  val business: String = BusinessUtil.getBusinessByLatAndLon(lat +"," + lon)
                  //使用GeoHash算法根据lat lon 获取GeoHashCode 作为可以
                  val geocode: String = GeoHash.withCharacterPrecision(lat.toDouble,lon.toDouble,8).toBase32
                  if(StringUtils.isNotEmpty(business)){
                    jedis.set(geocode,business)
                  }
                })
                jedis.close()
              })

          //关闭SparkSession
          spark.stop()
        }
    }

    三、商圈标签的开发

    package cn.bw.dmp.tags

    import ch.hsr.geohash.GeoHash
    import org.apache.commons.lang.StringUtils
    import org.apache.spark.sql.Row
    import redis.clients.jedis.Jedis

    import scala.collection.mutable

    /**
      * Created by zcw on 2018/10/16
      */
    object Tags4Business extends Tags {
      override def makeTag(args: Any*): Map[String, Int] = {
        var map:Map[String,Int] = Map[String,Int]()
        if(args.length == 2){
          val row: Row = args(0).asInstanceOf[Row]
          val jedis:Jedis = args(1).asInstanceOf[Jedis]
          val lat: String = row.getAs[String]("lat")
          val lon: String = row.getAs[String]("lon")
          if(StringUtils.isNotEmpty(lat) && StringUtils.isNotEmpty(lon)){
            //lat >= 3 and lat < 54 and lat != '' and lon >= 73 and lon <= 136
            val lat2 = lat.toDouble
            val lon2 = lon.toDouble
            if(lat2 >3 && lat2 < 54 && lon2 > 73 && lon2 < 136){
              val geoCode: String = GeoHash.withCharacterPrecision(lat2,lon2,8).toBase32
              val business: String = jedis.get(geoCode)
              if(StringUtils.isNotEmpty(business)){
                business.split(",").foreach(b => map += ("B" + b ->1))
              }
            }
          }
        }
        map
      }
    }

    四、将商圈标签合并上下文标签

    package cn.bw.dmp.tags

    import cn.bw.dmp.utils.{JedisUtil, OutputPathUtil, TagsUtil}
    import org.apache.spark.broadcast.Broadcast
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
    import redis.clients.jedis.Jedis

    import scala.collection.mutable.ListBuffer

    /**
      * Created by zcw on 2018/10/10
      */
    object ContextTag {
      def main(args: Array[String]): Unit = {
        //1.参数的校验
        if(args.length != 4){
          println(
            """
              |cn.bw.dmp.tags.ContextTag
              |参数错误!!!
              |需要:
              |LogInputPath
              |AppDicInputPath
              |StopWordsDicInputPath
              |ResultOutputPath
            """.stripMargin)
        }
        //2.接受参数
        val Array(logInputPath,appDicInputPath,stopWordsDicInputPath,resultOutputPath)  = args
        //3.创建长下文
        val conf: SparkConf = new SparkConf()
          .setAppName(s"${this.getClass.getSimpleName}")
          .setMaster("local")
          .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
        val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
        val sc: SparkContext = spark.sparkContext
        //读取app字典
        val appDic: Map[String, String] = sc.textFile(appDicInputPath).map(line => {
          val fields: Array[String] = line.split(":")
          (fields(0), fields(1))
        }).collect.toMap
        //将app字典广播出去
        val appDicBC: Broadcast[Map[String, String]] = sc.broadcast(appDic)
        //读取stopwords字典
        val stopwordsDic: Map[String, Int] = sc.textFile(stopWordsDicInputPath).map(line =>(line,1)).collect().toMap
        //将stopwords 字典广播出去
        val stopwordsDicBC: Broadcast[Map[String, Int]] = sc.broadcast(stopwordsDic)
        //4.读取parquet文件
        val rawDF: DataFrame = spark.read.parquet(logInputPath)
        //5.过滤出去用户唯一标识不存在的数据
        val filterdDS: Dataset[Row] = rawDF.where(TagsUtil.hasUserIdCondition)
        import spark.implicits._
        //6.打标签
        val tagedRDD: RDD[(String, List[(String, Int)])] = filterdDS.mapPartitions(it => {
          val jedis: Jedis = JedisUtil.getJedis
          var list = new ListBuffer[(String, List[(String, Int)])]()
          it.foreach(row => {
            //打广告的标签
            val tagsAds: Map[String, Int] = Tags4Ads.makeTag(row)
            //app标签
            val tagsApp: Map[String, Int] = Tags4App.makeTag(row, appDicBC.value)
            //设备标签
            val tagsDevice: Map[String, Int] = Tags4Device.makeTag(row)
            //关键词标签
            val tagKeyWords: Map[String, Int] = Tags4KeyWords.makeTag(row, stopwordsDicBC.value)
            //地域标签
            val tagArea: Map[String, Int] = Tags4Area.makeTag(row)
            //商圈标签
            val tagBusiness: Map[String, Int] = Tags4Business.makeTag(row, jedis)
            //获取用户的唯一标识
            val buffer: ListBuffer[String] = TagsUtil.getAllUserId(row)
            list.append()
            val tuple: (String, List[(String, Int)]) = (buffer(0), (tagsAds ++ tagsApp ++ tagsDevice ++ tagKeyWords ++ tagArea ++ tagBusiness).toList)
            list.append(tuple)
          })
          jedis.close()
          list.iterator
        }).rdd
        //聚合
        val reduceRDD: RDD[(String, List[(String, Int)])] = tagedRDD.reduceByKey((a, b) => {
          //List(("K偶像剧",1),("K偶像剧",1),("ZP河北",1))
          //方式一
          //(a ++ b).groupBy(_._1).mapValues(_.length).toList
          //方式二
          //(a ++ b).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)).toList
          //方式三
          (a ++ b).groupBy(_._1).map{
            case (k,v) => (k,v.foldLeft(0)(_+_._2))
          }.toList
        })
        //将数据写入到磁盘
        OutputPathUtil.deleteOutputPath(resultOutputPath,sc)
        reduceRDD.saveAsTextFile(resultOutputPath)
        //关闭SparkSession
        spark.stop()
      }

    }

  • 相关阅读:
    两个栈实现一个队列
    DacningLinks实现
    boost::implicit_cast
    hibernate查询之Criteria实现分页方法(GROOVY语法)
    VS2015 android 设计器不能可视化问题解决。
    当Eclipse爱上SVN
    你不知道的getComputedStyle
    推荐的软件
    React之表单
    理解javascript中的Function.prototype.bind
  • 原文地址:https://www.cnblogs.com/JBLi/p/11548909.html
Copyright © 2020-2023  润新知