• spark_to_es


    package es
    
    import java.io.InputStream
    import java.text.SimpleDateFormat
    import java.util.{Calendar, Date, Properties}
    
    import org.elasticsearch.spark.rdd.EsSpark
    import org.apache.spark.SparkConf
    import org.apache.spark.sql.SparkSession
    import org.slf4j.LoggerFactory
    
    object ShoppingcartMarketToEs {
    
      private val log = LoggerFactory.getLogger(ShoppingcartMarketToEs.getClass)
    
      val prop = new Properties()
      val is: InputStream = this.getClass().getResourceAsStream("/elastic.properties")
      prop.load(is)
      val ENVIRONMENT_SETING = "es_host_sit"
      val host = prop.getProperty(ENVIRONMENT_SETING)
    
      def main(args: Array[String]): Unit = {
        val sparkConf = new SparkConf().setAppName("ReadSnCategoryToEs")
        //    sparkConf.set("spark.sql.hive.metastorePartitionPruning", "false")
        sparkConf.set("es.nodes", host)
        sparkConf.set("es.nodes.wan.only", "true")
        //    sparkConf.set("es.port", "9200")
        //    sparkConf.set("es.index.auto.create", "true")
        //    sparkConf.set("es.batch.size.entries", "5000")
        //    sparkConf.set("es.write.operation", "upsert")
    
        val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
        session.sql("use sospdm")
        session.udf.register("get_utc_time", () => {
          val cal = Calendar.getInstance()
          cal.setTime(new Date())
          val zoneOffset = cal.get(Calendar.ZONE_OFFSET)
          val dstOffset = cal.get(Calendar.DST_OFFSET)
          cal.add(Calendar.MILLISECOND, -(zoneOffset + dstOffset))
          val utcTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(cal.getTime)
          utcTime.replace(" ", "T") + "+0000"
        })
        val querySql = "select pid,shop_id,gds_cd,gds_nm,gds_add_num,gds_add_time,gds_price,expect_tran_price,l4_gds_grp_cd,l4_gds_grp_nm,category_cd,category_nm,brand_cd,brand_nm,'null' as create_user,'null' as update_user,create_time,update_time,get_utc_time() as `@timestamp` from sospdm.tdm_wbank_opts_t_goods_info_newest_ed"
        val resultDF = session.sql(querySql)
        if (!ENVIRONMENT_SETING.contains("prd")) {
          resultDF.show(10)
        }
    
        val tuple = resultDF.rdd.map(row => {
          val pid: String = row.getAs[String]("pid").toString()
          val shop_id: String = row.getAs[String]("shop_id").toString()
          val gds_cd: String = row.getAs[String]("gds_cd").toString()
          val gds_nm: String = row.getAs[String]("gds_nm").toString()
          val gds_add_num: String = row.getAs[String]("gds_add_num").toString()
          val gds_add_time: String = row.getAs[String]("gds_add_time").toString()
          val gds_price: String = row.getAs[String]("gds_price").toString()
          val expect_tran_price: String = row.getAs[String]("expect_tran_price").toString()
          val l4_gds_grp_cd: String = row.getAs[String]("l4_gds_grp_cd").toString()
          val l4_gds_grp_nm: String = row.getAs[String]("l4_gds_grp_nm").toString()
          val category_cd: String = row.getAs[String]("category_cd").toString()
          val category_nm: String = row.getAs[String]("category_nm").toString()
          val brand_cd: String = row.getAs[String]("brand_cd").toString()
          val brand_nm: String = row.getAs[String]("brand_nm").toString()
          val create_user: String = row.getAs[String]("create_user").toString()
          val update_user: String = row.getAs[String]("update_user").toString()
          val create_time: String = row.getAs[String]("create_time").toString()
          val update_time: String = row.getAs[String]("update_time").toString()
          val `@timestamp`: String = row.getAs[String]("@timestamp").toString()
          var map = Map[String, Object]()
          map += ("pid" -> pid)
          map += ("shop_id" -> shop_id)
          map += ("gds_cd" -> gds_cd)
          map += ("gds_nm" -> gds_nm)
          map += ("gds_add_num" -> gds_add_num)
          map += ("gds_add_time" -> gds_add_time)
          map += ("gds_price" -> gds_price)
          map += ("expect_tran_price" -> expect_tran_price)
          map += ("l4_gds_grp_cd" -> l4_gds_grp_cd)
          map += ("l4_gds_grp_nm" -> l4_gds_grp_nm)
          map += ("category_cd" -> category_cd)
          map += ("category_nm" -> category_nm)
          map += ("brand_cd" -> brand_cd)
          map += ("brand_nm" -> brand_nm)
          map += ("create_user" -> create_user)
          map += ("update_user" -> update_user)
          map += ("create_time" -> create_time)
          map += ("@timestamp" -> `@timestamp`)
    
          (shop_id + gds_cd + gds_add_time, map)
        })
        EsSpark.saveToEsWithMeta(tuple, "idx_shop_goods_addcart/idx_shop_goods_addcart")
      }
    
    
    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>spark-hive</groupId>
        <artifactId>spark-hive</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <properties>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
            <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
            <scala.version>2.11.8</scala.version>
            <spark.version>2.1.0.9</spark.version>
            <spark.artifactId.version>2.11</spark.artifactId.version>
        </properties>
        <dependencies>
            <dependency>
                <groupId>commons-logging</groupId>
                <artifactId>commons-logging</artifactId>
                <version>1.1.1</version>
                <type>jar</type>
            </dependency>
    
            <dependency>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-lang3</artifactId>
                <version>3.1</version>
            </dependency>
    
            <dependency>
                <groupId>log4j</groupId>
                <artifactId>log4j</artifactId>
                <version>1.2.17</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-common</artifactId>
                <version>2.6.2</version>
            </dependency>
            <dependency>
                <groupId>mysql</groupId>
                <artifactId>mysql-connector-java</artifactId>
                <version>5.1.21</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-core_2.11</artifactId>
                <version>2.1.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-streaming_2.11</artifactId>
                <version>2.1.0</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
                <version>2.1.0</version>
            </dependency>
            <dependency>
                <groupId>com.google.code.gson</groupId>
                <artifactId>gson</artifactId>
                <version>2.8.2</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-sql_2.11</artifactId>
                <version>2.1.0</version>
            </dependency>
    
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
                <version>1.2.29</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-hive_${spark.artifactId.version}</artifactId>
                <version>${spark.version}</version>
                <scope>provided</scope>
            </dependency>
            <!--flink dependency-->
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-java</artifactId>
                <version>1.5.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-streaming-java_2.11</artifactId>
                <version>1.5.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-clients_2.11</artifactId>
                <version>1.5.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-connector-wikiedits_2.11</artifactId>
                <version>1.5.0</version>
            </dependency>
            <!--hbase dependency-->
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase</artifactId>
                <version>0.98.8-hadoop2</version>
                <type>pom</type>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-client</artifactId>
                <version>0.98.8-hadoop2</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-common</artifactId>
                <version>0.98.8-hadoop2</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-server</artifactId>
                <version>0.98.8-hadoop2</version>
            </dependency>
            <dependency>
                <groupId>org.elasticsearch</groupId>
                <artifactId>elasticsearch-spark-20_${spark.artifactId.version}</artifactId>
                <version>6.7.1</version>
            </dependency>
            <!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch -->
        </dependencies>
        <build>
            <plugins>
                <plugin>
                    <artifactId>maven-assembly-plugin</artifactId>
                    <configuration>
                        <descriptorRefs>
                            <descriptorRef>jar-with-dependencies</descriptorRef>
                        </descriptorRefs>
                    </configuration>
                </plugin>
                <plugin>
                    <groupId>org.codehaus.mojo</groupId>
                    <artifactId>build-helper-maven-plugin</artifactId>
                    <version>1.8</version>
                    <executions>
                        <execution>
                            <id>add-source</id>
                            <phase>generate-sources</phase>
                            <goals>
                                <goal>add-source</goal>
                            </goals>
                            <configuration>
                                <sources>
                                    <source>src/main/scala</source>
                                    <source>src/test/scala</source>
                                </sources>
                            </configuration>
                        </execution>
                        <execution>
                            <id>add-test-source</id>
                            <phase>generate-sources</phase>
                            <goals>
                                <goal>add-test-source</goal>
                            </goals>
                            <configuration>
                                <sources>
                                    <source>src/test/scala</source>
                                </sources>
                            </configuration>
                        </execution>
                    </executions>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>2.3.2</version>
                    <configuration>
                        <source>1.7</source>
                        <target>1.7</target>
                        <encoding>${project.build.sourceEncoding}</encoding>
                    </configuration>
                </plugin>
                <plugin>
                    <groupId>org.scala-tools</groupId>
                    <artifactId>maven-scala-plugin</artifactId>
                    <executions>
                        <execution>
                            <goals>
                                <goal>compile</goal>
                                <goal>add-source</goal>
                                <goal>testCompile</goal>
                            </goals>
                        </execution>
                    </executions>
                    <configuration>
                        <scalaVersion>2.11.8</scalaVersion>
                        <sourceDir>src/main/scala</sourceDir>
                        <jvmArgs>
                            <jvmArg>-Xms64m</jvmArg>
                            <jvmArg>-Xmx1024m</jvmArg>
                        </jvmArgs>
                    </configuration>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-release-plugin</artifactId>
                    <version>2.5.3</version>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-deploy-plugin</artifactId>
                    <configuration>
                        <skip>false</skip>
                    </configuration>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-shade-plugin</artifactId>
                    <version>2.4.1</version>
                    <executions>
                        <execution>
                            <phase>package</phase>
                            <goals>
                                <goal>shade</goal>
                            </goals>
                            <configuration>
                                <filters>
                                    <filter>
                                        <artifact>*:*</artifact>
                                        <excludes>
                                            <exclude>META-INF/*.SF</exclude>
                                            org.apache.hive
                                            <exclude>META-INF/*.DSA</exclude>
                                            <exclude>META-INF/*.RSA</exclude>
                                        </excludes>
                                    </filter>
                                </filters>
                                <minimizeJar>false</minimizeJar>
                            </configuration>
                        </execution>
                    </executions>
                </plugin>
            </plugins>
            <resources>
                <resource>
                    <directory>src/main/resources</directory>
                    <filtering>true</filtering>
                </resource>
                <resource>
                    <directory>src/main/resources/${profiles.active}</directory>
                </resource>
            </resources>
    
            <!-- 修复 Plugin execution not covered by lifecycle configuration -->
            <pluginManagement>
                <plugins>
                    <plugin>
                        <groupId>org.eclipse.m2e</groupId>
                        <artifactId>lifecycle-mapping</artifactId>
                        <version>1.0.0</version>
                        <configuration>
                            <lifecycleMappingMetadata>
                                <pluginExecutions>
                                    <pluginExecution>
                                        <pluginExecutionFilter>
                                            <groupId>org.codehaus.mojo</groupId>
                                            <artifactId>build-helper-maven-plugin</artifactId>
                                            <versionRange>[1.8,)</versionRange>
                                            <goals>
                                                <goal>add-source</goal>
                                                <goal>add-test-source</goal>
                                            </goals>
                                        </pluginExecutionFilter>
                                        <action>
                                            <ignore></ignore>
                                        </action>
                                    </pluginExecution>
    
                                    <pluginExecution>
                                        <pluginExecutionFilter>
                                            <groupId>org.scala-tools</groupId>
                                            <artifactId>maven-scala-plugin</artifactId>
                                            <versionRange>[1.8,)</versionRange>
                                            <goals>
                                                <goal>compile</goal>
                                                <goal>add-source</goal>
                                                <goal>testCompile</goal>
                                            </goals>
                                        </pluginExecutionFilter>
                                        <action>
                                            <ignore></ignore>
                                        </action>
                                    </pluginExecution>
                                </pluginExecutions>
                            </lifecycleMappingMetadata>
                        </configuration>
                    </plugin>
                </plugins>
            </pluginManagement>
        </build>
    </project>
    View Code
  • 相关阅读:
    通过Maven简单搭建SSM框架
    javaWeb常用面试题
    到底什么是对象,什么是对象的引用?对象和对象的引用有那些区别?
    第二章 python中重要的数据结构(下)
    第一章 python中重要的数据结构(上)
    springboot 集成完整的swagger2
    JAVA -> 数据加密和解密 留存
    mac rar文件解压缩
    java 图片合成文字或者awt包下的对话框引入自定义字体库
    java中list或数组中随机子集工具类
  • 原文地址:https://www.cnblogs.com/yin-fei/p/10769897.html
Copyright © 2020-2023  润新知