• Spark记录-本地Spark读取Hive数据简单例子


    注意:将mysql的驱动包拷贝到spark/lib下,将hive-site.xml拷贝到项目resources下,远程调试不要使用主机名

    import org.apache.spark._ import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.hive.HiveContext import java.io.FileNotFoundException import java.io.IOException object HiveSelect { def main(args: Array[String]) { System.setProperty("hadoop.home.dir", "D:\hadoop") //加载hadoop组件 val conf = new SparkConf().setAppName("HiveApp").setMaster("spark://192.168.66.66:7077") .set("spark.executor.memory", "1g") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .setJars(Seq("D:\workspace\scala\out\scala.jar"))//加载远程spark //.set("hive.metastore.uris", "thrift://192.168.66.66:9083")//远程hive的meterstore地址 // .set("spark.driver.extraClassPath","D:\json\mysql-connector-java-5.1.39.jar") val sparkcontext = new SparkContext(conf); try { val hiveContext = new HiveContext(sparkcontext); hiveContext.sql("use siat"); //使用数据库 hiveContext.sql("DROP TABLE IF EXISTS src") //删除表 hiveContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' ");//创建表 hiveContext.sql("LOAD DATA LOCAL INPATH 'D:\workspace\scala\src.txt' INTO TABLE src "); //导入数据 hiveContext.sql(" SELECT * FROM src").collect().foreach(println);//查询数据 } catch { case e: FileNotFoundException => println("Missing file exception") case ex: IOException => println("IO Exception") case ee: ArithmeticException => println(ee) case eee: Throwable => println("found a unknown exception" + eee) case ef: NumberFormatException => println(ef) case ec: Exception => println(ec) case e: IllegalArgumentException => println("illegal arg. exception"); case e: IllegalStateException => println("illegal state exception"); } finally { sparkcontext.stop() } } }

     附录1:scala-spark api-http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package 

    org.apache.spark
    
    org.apache.spark.api.java
    
    org.apache.spark.api.java.function
    
    org.apache.spark.broadcast
    
    org.apache.spark.graphx
    
    org.apache.spark.graphx.impl
    
    org.apache.spark.graphx.lib
    
    org.apache.spark.graphx.util
    
    org.apache.spark.input
    
    org.apache.spark.internal
    
    org.apache.spark.internal.io
    
    org.apache.spark.io
    
    org.apache.spark.launcher
    
    org.apache.spark.mapred
    
    org.apache.spark.metrics.source
    
    org.apache.spark.ml
    
    org.apache.spark.ml.attribute
    
    org.apache.spark.ml.classification
    
    org.apache.spark.ml.clustering
    
    org.apache.spark.ml.evaluation
    
    org.apache.spark.ml.feature
    
    org.apache.spark.ml.fpm
    
    org.apache.spark.ml.linalg
    
    org.apache.spark.ml.param
    
    org.apache.spark.ml.recommendation
    
    org.apache.spark.ml.regression
    
    org.apache.spark.ml.source.libsvm
    
    org.apache.spark.ml.stat
    
    org.apache.spark.ml.stat.distribution
    
    org.apache.spark.ml.tree
    
    org.apache.spark.ml.tuning
    
    org.apache.spark.ml.util
    
    org.apache.spark.mllib
    
    org.apache.spark.mllib.classification
    
    org.apache.spark.mllib.clustering
    
    org.apache.spark.mllib.evaluation
    
    org.apache.spark.mllib.feature
    
    org.apache.spark.mllib.fpm
    
    org.apache.spark.mllib.linalg
    
    org.apache.spark.mllib.linalg.distributed
    
    org.apache.spark.mllib.optimization
    
    org.apache.spark.mllib.pmml
    
    org.apache.spark.mllib.random
    
    org.apache.spark.mllib.rdd
    
    org.apache.spark.mllib.recommendation
    
    org.apache.spark.mllib.regression
    
    org.apache.spark.mllib.stat
    
    org.apache.spark.mllib.stat.distribution
    
    org.apache.spark.mllib.stat.test
    
    org.apache.spark.mllib.tree
    
    org.apache.spark.mllib.tree.configuration
    
    org.apache.spark.mllib.tree.impurity
    
    org.apache.spark.mllib.tree.loss
    
    org.apache.spark.mllib.tree.model
    
    org.apache.spark.mllib.util
    
    org.apache.spark.partial
    
    org.apache.spark.rdd
    
    org.apache.spark.scheduler
    
    org.apache.spark.scheduler.cluster
    
    org.apache.spark.security
    
    org.apache.spark.serializer
    
    org.apache.spark.sql
    
    org.apache.spark.sql.api.java
    
    org.apache.spark.sql.catalog
    
    org.apache.spark.sql.expressions
    
    org.apache.spark.sql.expressions.javalang
    
    org.apache.spark.sql.expressions.scalalang
    
    org.apache.spark.sql.hive
    
    org.apache.spark.sql.hive.execution
    
    org.apache.spark.sql.hive.orc
    
    org.apache.spark.sql.jdbc
    
    org.apache.spark.sql.sources
    
    org.apache.spark.sql.streaming
    
    org.apache.spark.sql.types
    
    org.apache.spark.sql.util
    
    org.apache.spark.status.api.v1
    
    org.apache.spark.status.api.v1.streaming
    
    org.apache.spark.storage
    
    org.apache.spark.streaming
    
    org.apache.spark.streaming.api.java
    
    org.apache.spark.streaming.dstream
    
    org.apache.spark.streaming.flume
    
    org.apache.spark.streaming.kafka
    
    org.apache.spark.streaming.kinesis
    
    org.apache.spark.streaming.receiver
    
    org.apache.spark.streaming.scheduler
    
    org.apache.spark.streaming.scheduler.rate
    
    org.apache.spark.streaming.util
    
    org.apache.spark.ui.env
    
    org.apache.spark.ui.exec
    
    org.apache.spark.ui.jobs
    
    org.apache.spark.ui.storage
    
    org.apache.spark.util
    
    org.apache.spark.util.random
    
    org.apache.spark.util.sketch
    

      

  • 相关阅读:
    Add Two Numbers
    Remove Duplicates from Sorted List II
    Reorder List
    Divide Two Integers
    Reverse Nodes in k-Group
    链表反转
    模板类 error LNK2019: 无法解析的外部符号
    传参数应该用哪种形式——值、引用、指针?
    OpenMesh 将默认的 float 类型改为 double 类型
    fatal error LNK1169: 找到一个或多个多重定义的符号
  • 原文地址:https://www.cnblogs.com/xinfang520/p/7832274.html
Copyright © 2020-2023  润新知