• 运行spark sql时出现的一个问题


    运行如下代码时 出现了

    org.apache.spark.sql.AnalysisException 错误

    import org.apache.log4j.{Level, Logger}
    import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
    
    /**
      * 使用SaprkSQL实现iplocation
      * Created by lq on 2018/9/29 17:04.
      */
    object SQLIPLocation {
      val rulesFilePath = "f:\data\ip.txt"
      val accessFilePath = "f:\data\access.log"
    
      def main(args: Array[String]): Unit = {
        Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
        val spark = SparkSession.builder().appName("SQLIPLocation").master("local[*]").getOrCreate()
    
        //读取ip规则数据
        val ipRulesLine: Dataset[String] = spark.read.textFile(rulesFilePath)
    
        //整理IP规则数据
        import spark.implicits._
        val tpRDDs: Dataset[(Long, Long, String)] = ipRulesLine.map(line => {
          val fields = line.split("[|]")
          val startNum = fields(2).toLong
          val endNum = fields(3).toLong
          val province = fields(6)
          (startNum, endNum, province)
        })
    
        val ipRulesDF: DataFrame = tpRDDs.toDF("start_num", "end_num", "province")
        //将IP规则数据注册成视图
        ipRulesDF.createTempView("v_ip_rules")
    
        //读取访问日志数据
        val accessLogLine: Dataset[String] = spark.read.textFile(accessFilePath)
    
        //整理访问日志数据
        import cn.edu360.spark.day06.MyUtils
        val ips: DataFrame = accessLogLine.map(line=> {
          val fields = line.split("[|]")
          val ip = fields(1)
          MyUtils.ip2Long(ip)
        }).toDF("ip")
    
        //将访问日志数据注册成视图
        ips.createTempView("v_access_ip")
    
        //写SQL(Join)关联两张表数据
        val result = spark.sql("SELECT province, COUNT(*) counts FROM v_ip_rules JOIN v_access_ip ON ip>=start_num AND ip<=end_num GROUP BY province ORDER BY counts DESC")
    
        //触发Action
        result.show()
    
        //释放资源
        spark.stop()
      }
    }
    

      

    Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
    Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve '`word`' given input columns: [value]; line 1 pos 56;
    'Sort ['counts DESC NULLS LAST], true
    +- 'Aggregate ['word], [value#10 AS word#13, count(1) AS counts#14L]
    +- SubqueryAlias w_words, `w_words`
    +- SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#10]
    +- MapPartitions <function1>, obj#9: java.lang.String
    +- DeserializeToObject cast(value#0 as string).toString, obj#8: java.lang.String
    +- Project [value#0]
    +- Relation[value#0] text
    
    at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:86)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:83)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:290)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:290)
    at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
    at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:289)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:255)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:255)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:266)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:276)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(QueryPlan.scala:280)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
    at scala.collection.AbstractTraversable.map(Traversable.scala:104)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:280)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$6.apply(QueryPlan.scala:285)
    at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:285)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:255)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:83)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:76)
    at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:128)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:127)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:127)
    at scala.collection.immutable.List.foreach(List.scala:381)
    at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:76)
    at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:57)
    at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52)
    at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:63)
    at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592)
    at cn.edu360.spark.day08.SQLWorkCount$.main(SQLWorkCount.scala:28)
    at cn.edu360.spark.day08.SQLWorkCount.main(SQLWorkCount.scala)
    

      

    找遍了网上所有解决方法,无果

    最后发现 更换下pom.xml中

    <spark.version>2.1.1</spark.version>

    改为
    <spark.version>2.2.1</spark.version>

    即可。

    
    
  • 相关阅读:
    【C++】关于new分配空间
    【嵌入式】keil不识别野火高速dap的问题
    【Java】质数判断
    【学习方法】大学背知识点方法
    【算法】浮点数多次运算精确值下降
    【算法】main函数的堆栈溢出
    【算法】三值选中法
    【CC++笔记】register寄存器关键字
    JSON.parse()和JSON.stringify()
    webStorm使用和安装
  • 原文地址:https://www.cnblogs.com/lq0310/p/9727913.html
Copyright © 2020-2023  润新知