• spring-boot集成spark并使用spark-sql


    首先添加相关依赖:

    <?xml version="1.0" encoding="UTF-8"?>
    
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>1.5.6.RELEASE</version>
        <relativePath />
      </parent>
    
      <groupId>com.cord</groupId>
      <artifactId>spark-example</artifactId>
      <version>1.0-SNAPSHOT</version>
    
      <name>spark-example</name>
      <!-- FIXME change it to the project's website -->
      <url>http://www.example.com</url>
    
      <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <java.version>1.8</java.version>
        <scala.version>2.10.3</scala.version>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
      </properties>
    
      <dependencies>
        <dependency>
          <groupId>org.springframework.boot</groupId>
          <artifactId>spring-boot-starter</artifactId>
          <version>1.5.6.RELEASE</version>
            <exclusions>
                <exclusion>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-starter-logging</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-core_2.10</artifactId>
          <version>1.6.1</version>
          <scope>provided</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>log4j</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-sql_2.10</artifactId>
          <version>1.6.1</version>
          <scope>provided</scope>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-hive_2.10</artifactId>
          <version>1.6.1</version>
          <scope>provided</scope>
        </dependency>
        <dependency>
          <groupId>org.scala-lang</groupId>
          <artifactId>scala-library</artifactId>
          <version>${scala.version}</version>
          <scope>provided</scope>
        </dependency>
        <!-- yarn-cluster模式 -->
        <dependency>
          <groupId>mysql</groupId>
          <artifactId>mysql-connector-java</artifactId>
          <version>5.1.22</version>
        </dependency>
      </dependencies>
    
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-shade-plugin</artifactId>
                    <dependencies>
                        <dependency>
                            <groupId>org.springframework.boot</groupId>
                            <artifactId>spring-boot-maven-plugin</artifactId>
                            <version>1.5.6.RELEASE</version>
                        </dependency>
                    </dependencies>
                    <configuration>
                        <keepDependenciesWithProvidedScope>false</keepDependenciesWithProvidedScope>
                        <createDependencyReducedPom>false</createDependencyReducedPom>
                        <filters>
                            <filter>
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*.RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                        <transformers>
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>META-INF/spring.handlers</resource>
                            </transformer>
                            <transformer
                                    implementation="org.springframework.boot.maven.PropertiesMergingResourceTransformer">
                                <resource>META-INF/spring.factories</resource>
                            </transformer>
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>META-INF/spring.schemas</resource>
                            </transformer>
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                <mainClass>com.cord.StartApplication</mainClass>
                            </transformer>
                        </transformers>
                    </configuration>
                    <executions>
                        <execution>
                            <phase>package</phase>
                            <goals>
                                <goal>shade</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
            </plugins>
        </build>
    </project>
    

    需要注意的是依赖中排除掉的日志模块,以及特殊的打包方式

    定义配置类:

    SparkContextBean.class

    @Configuration
    public class SparkContextBean {
    
        private String appName = "sparkExp";
    
        private String master = "local";
    
        @Bean
        @ConditionalOnMissingBean(SparkConf.class)
        public SparkConf sparkConf() throws Exception {
            SparkConf conf = new SparkConf().setAppName(appName).setMaster(master);
            return conf;
        }
    
        @Bean
        @ConditionalOnMissingBean
        public JavaSparkContext javaSparkContext() throws Exception {
            return new JavaSparkContext(sparkConf());
        }
    
        @Bean
        @ConditionalOnMissingBean
        public HiveContext hiveContext() throws Exception {
            return new HiveContext(javaSparkContext());
        }
        ......
    }
    

    启动类:

    StartApplication.class

    @SpringBootApplication
    public class StartApplication implements CommandLineRunner {
    
        @Autowired
        private HiveContext hc;
    
        public static void main(String[] args) {
            SpringApplication.run(StartApplication.class, args);
        }
    
        @Override
        public void run(String... args) throws Exception {
            DataFrame df = hc.sql("select count(1) from LCS_DB.STAFF_INFO");
            List<Long> result = df.javaRDD().map((Function<Row, Long>) row -> {
                return row.getLong(0);
            }).collect();
            result.stream().forEach(System.out::println);
        }
    }
    

    执行方式:

    spark-submit 
        --class com.cord.StartApplication  
        --executor-memory 4G 
        --num-executors 8 
        --master yarn-client 
    /data/cord/spark-example-1.0-SNAPSHOT.jar
    

    参考链接:

    https://stackoverflow.com/questions/45189701/submitting-spring-boot-application-jar-to-spark-submit

    https://my.oschina.net/woter/blog/1843755

  • 相关阅读:
    1.17 Python基础知识
    反射
    面向对象——类
    异常处理
    面向对象——静态方法、类方法、属性法法,其他特殊方法
    面向对象——继承
    question
    configparser模块
    hashlib模块
    shelve模块
  • 原文地址:https://www.cnblogs.com/cord/p/9530404.html
Copyright © 2020-2023  润新知