• 第一个spark+scala程序


    import org.apache.spark._
    import SparkContext._
    import java.util.{Calendar,Properties,Date,Locale}
    import java.text.SimpleDateFormat

    import java.math.BigDecimal;
    import java.math.RoundingMode;
    import java.text.DecimalFormat;
    import java.text.NumberFormat;
    import java.util.Formatter;

    //热度-订阅数 2 发文频率 3 文章质量 5
    //最高是十分 >10 也是=10

    object WordCount {
    //http://blog.chinaunix.net/uid-25885064-id-3430852.html

    //scala时间处理-获取今天日期,昨天日期,本周时间,本月时间,时间戳转换日期,时间比较
    //http://blog.csdn.net/springlustre/article/details/47273353

    //update xrk_wx_openaccounts set hscore='' where openid='';

    //fscore
    //qscore

    //update xrk_wx_openaccounts set fscore='',qscore='' where openid='';

    def format1(value:Double ):String ={

    var bd:BigDecimal = new BigDecimal(value)


    bd = bd.setScale(2, RoundingMode.HALF_UP)


    return bd.toString();

    }

    def rethscoreSql(hscore:Double,openid:String):String={

    var sql:String="update xrk_wx_openaccounts set hscore='"+format1(hscore)+"' where openid='"+openid+"';"
    sql

    }

    def retfscoreqscoreSql(fscore:Double,qscore:Double,openid:String):String={

    var sql:String="update xrk_wx_openaccounts set fscore='"+format1(fscore)+"',qscore='"+format1(qscore)+"' where openid='"+openid+"';"
    sql

    }

    //今天
    def getNowDate():String={
    var now:Date = new Date()
    var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    var NowTime = dateFormat.format( now )
    NowTime
    }
    //获取昨天的日期
    def getYesterday():String={
    var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
    var cal:Calendar=Calendar.getInstance()
    cal.add(Calendar.DATE,-1)
    var yesterday=dateFormat.format(cal.getTime())
    yesterday
    }

    //获取7天前的日期
    def get_7day():String={
    var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
    var cal:Calendar=Calendar.getInstance()
    cal.add(Calendar.DATE,-7)
    var yesterday=dateFormat.format(cal.getTime())
    yesterday
    }

    // 字符串 转成时间
    def strtoDate(tm:String):Date={
    //val loc = new Locale("en")
    // val fm = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss",loc)

    val fm = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    // val tm = "30/Jul/2015:05:00:50"
    val dt2 = fm.parse(tm);
    dt2
    }

    // 字符串 转成时间戳
    def strtoDatetolong(tm:String):Long={
    val dt= strtoDate(tm)
    val ldt=dt.getTime()
    ldt
    }



    def main(args: Array[String]) {
    /*
    * 总文章数 TotalArticle
    总点击数 TotalClick
    总阅读数 TotalReadNum
    订阅号总数TotalOpenNum

    平均文章数AvgArticle
    平均点击数AvgClick
    平均阅读数AvgReadNum

    每个订阅号的文章数量OpenArticle
    每个订阅号的总点击数OpenClick
    每个订阅号的总阅读数OpenReadNum

    每个订阅号的平均文章数量AvgOpenArticle
    每个订阅号的平均击数AvgOpenClick
    每个订阅号的平均阅读数AvgOpenReadNum
    *
    *
    * */

    if (args.length < 4 ){
    println(" spark://192.168.16.119:7077 SparkSubmit_Demo ")
    println(" /wxcontentdb/xrk_wx_articles/part-m-00000")
    println(" /outtxt")
    println(args.length.toString())
    println(args(0))
    return
    }

    def _float(line:String):Int={
    val fileds = line.split("\t")
    val timeLong=strtoDatetolong(fileds(3))
    val _7dayTime=strtoDatetolong(get_7day())

    if(timeLong>_7dayTime) 1 else 0

    }



    //大于10 小于 0.1

    def fenzhi(fenzi:Double,fenmu:Double):Double={

    var __fenzhi:Double=0.00


    __fenzhi=(fenzi/fenmu)



    if(__fenzhi>10){ 10.00
    }else if(__fenzhi<0.1){ 0.00
    }else{format1(__fenzhi).toDouble}


    }


    val conf = new SparkConf()
    .setMaster(args(0))
    .setAppName(args(1))
    .set("spark.executor.memory", "3g")
    val sc = new SparkContext(conf)

    val xrk_wx_userorder = sc.textFile(args(3))//xrk_wx_userorder
    //总条数xrk_wx_userorder_total_num
    //总记录xrk_wx_userorder_total_record
    //平均值xrk_wx_userorder_avg

    val xrk_wx_userorder_total_num=xrk_wx_userorder.count()

    val openid_num=xrk_wx_userorder.map(line => {val fileds = line.split("\t") ;( fileds(2))}).map((_,1)).reduceByKey(_ + _).map(x=>(x._2, x._1)).sortByKey(true).map(x=>(x._2,x._1))

    val xrk_wx_userorder_total_record=openid_num.count()
    val xrk_wx_userorder_avg = (xrk_wx_userorder_total_num/xrk_wx_userorder_total_record).toLong

    val openid_num_ex=openid_num.map(x=>(x._1, x._2,xrk_wx_userorder_avg,fenzhi(x._2,xrk_wx_userorder_avg)))

    /////////////////////////
    val lines = sc.textFile(args(2))//wxcontentdb

    //openid+time

    val openid_time=lines.map(line => {val fileds = line.split("\t") ;( fileds(1)+"\t"+fileds(3))}).map((_,1)).reduceByKey(_ + _)

    val _openid_time=openid_time.map(x=>(x._1.split("\t")(0))).map((_,1)).reduceByKey(_ + _).keyBy(top=>top._1)
    //


    val TotalArticle=lines.count()

    val TotalClick= lines.map(line => {val fileds = line.split("\t") ;( fileds(4).toLong)}).reduce((a,b) => a+b)

    val TotalReadNum= lines.map(line => {val fileds = line.split("\t") ;( fileds(5).toLong)}).reduce((a,b) => a+b)

    val OpenArticle = lines.map(_.split("\t")(1)).map((_,1)).reduceByKey(_ + _)

    val TotalOpenNum=OpenArticle.count()


    val OpenClick=lines.map(line => {val fileds = line.split("\t") ;( fileds(1).toString(),fileds(4).toLong)}).reduceByKey(_ + _)
    val OpenReadNum=lines.map(line => {val fileds = line.split("\t") ;( fileds(1).toString(),fileds(5).toLong)}).reduceByKey(_ + _)

    //val txt= OpenArticle.map(x=>(x._2, x._1)).sortByKey(true).map(x=>(x._2,x._1))
    val _OpenClick=OpenClick.keyBy(top=>top._1)
    val _OpenReadNum=OpenReadNum.keyBy(top=>top._1)

    val list= OpenArticle.keyBy(top=>top._1).join(_OpenClick).join(_OpenReadNum).join(_openid_time).map(f => (f._1, f._2._1._1._1._2, f._2._1._1._2._2, f._2._1._2._2, f._2._2._2))


    val AvgClick=TotalClick/TotalOpenNum
    val AvgReadNum=TotalReadNum/TotalOpenNum


    // val txt=list.map(f =>(f._1,f._2,f._3,f._4,f._5,TotalArticle,TotalOpenNum,TotalClick,TotalReadNum));
    val txt=list.map(f =>(f._1,fenzhi(f._3,AvgClick),fenzhi(f._4,AvgReadNum), fenzhi(f._2*10,8*14)/2.00+f._5*10.00/14.00/2.00 ))
    //.keyBy(top=>top._2).sortByKey(true)
    val _txt=txt.map(f =>{retfscoreqscoreSql((f._2+f._3)/2,f._4,f._1)})
    val _openid_num_ex=openid_num_ex.map(f =>{rethscoreSql(f._4,f._1)})

    // val _txt__openid_num_ex=_txt+"\n"+_openid_num_ex
    // 文章数 点击 阅读 发文
    //.map((_,TotalArticle,TotalClick,TotalReadNum))

    //openid_num_ex.saveAsTextFile(args(4))
    _openid_num_ex.saveAsTextFile(args(4))
    _txt.saveAsTextFile(args(5))
    sc.stop()


    //val beginnow =new Date();
    //val mbegindate = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format beginnow

    }
    }

    ////////////////////////////////////////////////////////////提交///////////////////////////////////////////

     /spark-1.0.2/bin/spark-submit  --class WordCount  spark-wordcount-in-scala.jar    spark://192.168.16.119:7077  SparkSubmit_Demo    /user/root/wxcontentdb/part-m-00000 /user/root/xrk_wx_userorder/part-m-00000    outtxt1 outtxt2  –num-workers 1 –master-memory 2g –worker-memory 2g

  • 相关阅读:
    我与ARM的那些事儿1初识ARM
    WP的万能小应用时钟表
    单片机的模拟智能灌溉系统
    android的计算器
    C语言经典面试题目(转的,不过写的的确好!)
    数据库sqlite3在linux中的使用
    认识域模型
    认识JMS
    认识JDOM
    认识RMI
  • 原文地址:https://www.cnblogs.com/bigdata007/p/4955799.html
Copyright © 2020-2023  润新知