• SparkSql


    pom

     1 <?xml version="1.0" encoding="UTF-8"?>
     2 <project xmlns="http://maven.apache.org/POM/4.0.0"
     3          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     4          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
     5     <modelVersion>4.0.0</modelVersion>
     6 
     7     <groupId>org.example</groupId>
     8     <artifactId>test3-24</artifactId>
     9     <version>1.0-SNAPSHOT</version>
    10     <dependencies>
    11         <dependency>
    12             <groupId>org.projectlombok</groupId>
    13             <artifactId>lombok</artifactId>
    14             <version>1.18.16</version>
    15         </dependency>
    16         <dependency>
    17             <groupId>org.scala-lang</groupId>
    18             <artifactId>scala-library</artifactId>
    19             <version>2.12.4</version>
    20         </dependency>
    21         <dependency>
    22             <groupId>org.scala-lang</groupId>
    23             <artifactId>scala-compiler</artifactId>
    24             <version>2.12.4</version>
    25         </dependency>
    26         <dependency>
    27             <groupId>org.scala-lang</groupId>
    28             <artifactId>scala-reflect</artifactId>
    29             <version>2.12.4</version>
    30         </dependency>
    31         <dependency>
    32             <groupId>log4j</groupId>
    33             <artifactId>log4j</artifactId>
    34             <version>1.2.12</version>
    35         </dependency>
    36         <dependency>
    37             <groupId>org.apache.spark</groupId>
    38             <artifactId>spark-core_2.12</artifactId>
    39             <version>3.0.0</version>
    40         </dependency>
    41 
    42 
    43         <dependency>
    44             <groupId>org.apache.spark</groupId>
    45             <artifactId>spark-sql_2.12</artifactId>
    46             <version>3.0.0</version>
    47         </dependency>
    48 
    49         <dependency>
    50             <groupId>org.apache.spark</groupId>
    51             <artifactId>spark-hive_2.12</artifactId>
    52             <version>3.0.0</version>
    53         </dependency>
    54 
    55         <dependency>
    56             <groupId>mysql</groupId>
    57             <artifactId>mysql-connector-java</artifactId>
    58             <version>5.1.6</version>
    59             <scope>runtime</scope>
    60         </dependency>
    61 
    62     </dependencies>
    63 
    64     <build>
    65         <plugins>
    66             <plugin>
    67                 <groupId>org.scala-tools</groupId>
    68                 <artifactId>maven-scala-plugin</artifactId>
    69                 <version>2.15.2</version>
    70                 <executions>
    71                     <execution>
    72                         <goals>
    73                             <goal>compile</goal>
    74                             <goal>testCompile</goal>
    75                         </goals>
    76                     </execution>
    77                 </executions>
    78             </plugin>
    79         </plugins>
    80     </build>
    81 
    82 </project>

    bean

     1 import lombok.AllArgsConstructor;
     2 import lombok.Data;
     3 import lombok.NoArgsConstructor;
     4 
     5 @Data
     6 @NoArgsConstructor
     7 @AllArgsConstructor
     8 public class Date {
     9     //Date.txt文件定义了日期的分类,将每天分别赋予所属的月份、星期、季度等属性
    10     // 日期,年月,年,月,日,周几,第几周,季度,旬、半月
    11     private String data;
    12     private String year_month;
    13     private String year;
    14     private String month;
    15     private String day;
    16     private String week;
    17     private String week_th;
    18     private String quarter;
    19     private String a_period_of_ten_days;
    20     private String meniscus;
    21 }
     1 import lombok.AllArgsConstructor;
     2 import lombok.Data;
     3 import lombok.NoArgsConstructor;
     4 
     5 @AllArgsConstructor
     6 @NoArgsConstructor
     7 @Data
     8 public class Details {
     9     //订单号,行号,货品,数量,价格,金额
    10     private String orderNo;
    11     private String rowkey;
    12     private String shop;
    13     private String num;
    14     private String price;
    15     private String Amount;
    16 }

    test

     1 import org.apache.spark.SparkConf;
     2 import org.apache.spark.SparkContext;
     3 import org.apache.spark.api.java.JavaRDD;
     4 import org.apache.spark.api.java.function.Function;
     5 import org.apache.spark.rdd.RDD;
     6 import org.apache.spark.sql.Dataset;
     7 import org.apache.spark.sql.Row;
     8 import org.apache.spark.sql.SparkSession;
     9 
    10 
    11 public class SparkSql {
    12 
    13     public static void main(String[] args) throws  Exception {
    14         //spark conf
    15         SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("app");
    16         //spark context
    17         SparkContext sparkContext = new SparkContext(conf);
    18         //spark session
    19         SparkSession session = SparkSession.builder().config(conf).getOrCreate();
    20         SparkSession sparkSession = SparkSession.builder().appName("name").master("local[*]").getOrCreate();
    21         //from windows
    22         Dataset<String> dataset = sparkSession.read().textFile("C:\\Date.txt");
    23         //javaRDD<Date>
    24         JavaRDD<Date> datemap = dataset.toJavaRDD().map(new Function<String, Date>() {
    25             @Override
    26             public Date call(String v1) throws Exception {
    27                 String[] split = v1.split(",");
    28                 return new Date(split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8],split[9]);
    29             }
    30         });
    31         //from hdfs
    32         RDD<String> stringRDD = sparkContext.textFile("hdfs://hadoop106:8020/StockDetail.txt",1);
    33         JavaRDD<String> stringJavaRDD = stringRDD.toJavaRDD();
    34         //javaRDD<Details>
    35         JavaRDD<Details> map = stringJavaRDD.map(new Function<String, Details>() {
    36             @Override
    37             public Details call(String s) throws Exception {
    38                 String[] split = s.split(",");
    39                 return new Details(split[0], split[1], split[2], split[3], split[4],split[5]);
    40 
    41             }
    42         });
    43 
    44         Dataset<Row> dateDataFrame = session.createDataFrame(datemap, Date.class);
    45         Dataset<Row> dataFrame = session.createDataFrame(map, Details.class);
    46 
    47         dateDataFrame.createTempView("date");
    48         dataFrame.createTempView("detail");
    49 
    50         Dataset<Row> dateSql = sparkSession.sql("select * from date");
    51         Dataset<Row> sql = session.sql("select * from detail");
    52 
    53         dateSql.show();
    54         sql.show();
    55 
    56 
    57     }
    58 }
  • 相关阅读:
    ORACLE 11g RACRAC DG Duplicate 搭建(生产操作文档)
    oracle跨数据库
    Linux查看网络流量
    数据库性能监控脚本
    三节点DG环境主库单机转RAC(DG主备切换)
    Oracle dblink介绍
    script 加载失败触发onerror事件
    流程图
    The State of JavaScript 2019
    throw , console.error, new Error
  • 原文地址:https://www.cnblogs.com/chang09/p/16048504.html
Copyright © 2020-2023  润新知