• 通过sparkstreaming分析url的数据


    spark version 1.6.2

    scala verson 2.10.6

    此代码参考官方例子----

    自定义接收器

    import java.io.BufferedReader
    
    
    import org.apache.spark.Logging
    import org.apache.spark.storage.StorageLevel
    import org.apache.spark.streaming.receiver.Receiver
    
    import scala.io.{BufferedSource, Source}
    
    /**
      * 自定义接收器
      * @param URL url
      */
    class UrlReceiver(URL:String) extends Receiver[String](StorageLevel.MEMORY_AND_DISK) with Logging{
      override def onStart(): Unit = {
        new Thread("创建一个线程"){
         override def run(){
           url()
          }
        }.start()
    
    
    
      }
    
      override def onStop(): Unit = {
    
      }
    
      private def url(): Unit ={
        var input:String=null
        try{
        val fileContent: BufferedSource = Source.fromURL(URL, "utf-8")
        val reader: BufferedReader = fileContent.bufferedReader()
        input=reader.readLine()
        while (!isStopped && input!=null){
    
    
            //推送数据给streaming
            store(input)
            input=reader.readLine()
        }
    
        reader.close()
        logInfo("停止接受")
        restart("尝试再次连接~~~~~~~~~~~~~")
        }catch {
          case t:Throwable =>{
            restart(s"接受数据错误${t}")
          }
          case s=>{
            restart(s"连接出现错误${URL}:${s}")
          }
        }
      }
    }
    

     写streming程序

    import org.apache.spark.streaming.dstream.ReceiverInputDStream
    import org.apache.spark.streaming.{Seconds, StreamingContext}
    import org.apache.spark.{SparkConf, SparkContext}
    
    
    object urlstreaming {
    
    
      def main(args: Array[String]): Unit = {
        new SparkConf
       SparkContext
        //此处以百度为例
        val URL= "https://www.baidu.com"
        val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName(s"${this.getClass.getSimpleName}").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
          .set("spark.streaming.stopGracefullyOnShutdown", "true")
    
    
    
    
        val ssc = new StreamingContext(conf,Seconds(3))
    
        val value: ReceiverInputDStream[String] = ssc.receiverStream(new UrlReceiver(URL))
    
          value.foreachRDD(
            rdd=>{
    
              rdd.foreach(println)
            }
          )
    
    
        ssc.start()
        ssc.awaitTermination()
    
      }
    }
    
  • 相关阅读:
    unable to start kestrel System.Net.Sockets.SocketException (10013): 以一种访问权限不允许的方式做了一个访问套接字的尝试。
    c# 复制文件夹内所有文件到另外一个文件夹
    git初始化
    c# 递归获取所有目录,所有文件,并替换文件
    新增项目 提交到gitee
    netcore3.1 跨域请求
    netcore appsettings.json 绑定对象
    nuget安装包
    做人六字诀:静,缓,忍,让,淡,平
    docker安装部署
  • 原文地址:https://www.cnblogs.com/hekang-14/p/10248719.html
Copyright © 2020-2023  润新知