• sparksql hive作为数据源


    根据官方文档的说法,要把hive-site.xml,core-site.xml,hdfs-site.xml拷贝到spark的conf目录下,保证mysql已经启动

    java

     1 public class Demo {
     2     private static SparkSession session = SparkSession.builder().appName("demo").enableHiveSupport()
     3             .config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate();
     4 
     5     public static void main(String[] args) {
     6         session.sql("drop table if exists students_info");
     7         session.sql("create table if not exists students_info(name string,age int) "
     8                 + "row format delimited fields terminated by '	' 
    ");
     9 
    10         // 将数据导入学生信息表
    11         session.sql(
    12                 "load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info");
    13 
    14         session.sql("drop table if exists students_score");
    15         session.sql("create table if not exists students_score(name string,score int)  
    "
    16                 + "row format delimited fields terminated by '	' 
    ");
    17 
    18         // 将数据导入学生成绩表
    19         session.sql(
    20                 "load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score");
    21 
    22         // 查询
    23         Dataset<Row> dataset = session.sql(
    24                 "select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score>80");
    25 
    26         // 将dataset中的数据保存到hive中
    27         session.sql("drop table if exists students_result");
    28         dataset.write().saveAsTable("students_result");
    29 
    30         // 将hive中的表转成dataset,查看数据是否成功保存
    31         Dataset<Row> table = session.table("students_result");
    32         table.show();
    33 
    34         session.stop();
    35 
    36     }
    37 }

    scala

     1 object Demo {
     2   def main(args: Array[String]): Unit = {
     3     val session = SparkSession.builder().appName("demo").enableHiveSupport().config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate()
     4 
     5     session.sql("drop table if exists students_info")
     6     session.sql("create table if not exists students_info(name string,age int) 
     row format delimited fields terminated by '	'")
     7 
     8     session.sql("load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info")
     9 
    10     session.sql("drop table if exists students_score")
    11     session.sql("create table if not exists students_score(name string,score int) 
     row format delimited fields terminated by '	'")
    12 
    13     session.sql("load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score")
    14 
    15     //保存到hive中
    16     session.sql("drop table if exists students_result")
    17     session.sql("select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score >90").write.saveAsTable("students_result")
    18 
    19     //检查数据是否保存
    20     val df = session.table("students_result")
    21     df.show()
    22 
    23     session.stop()
    24   }
    25 }
  • 相关阅读:
    第一次个人作业-热身
    OO总结
    oo第三单元总结
    oo第二单元总结
    软件工程 —— 课程回顾与个人总结
    BUAA软件案例分析——智能表单抽取识别
    软件工程—结对项目博客
    轨迹预测文献阅读整理(轨迹多样性、车辆轨迹、图神经网络、潜码)
    软件工程
    软件工程
  • 原文地址:https://www.cnblogs.com/tele-share/p/10397525.html
Copyright © 2020-2023  润新知