• Spark 之 读取配置连接Mysql和上传到HDFS


    一、读取配置

    driver.properties

    #mysql
    driver=com.mysql.jdbc.Driver
    url=jdbc:mysql://192.168.56.111:3306/myshops2
    user=root
    password=root
    
    #hadoop
    hadoop_url=hdfs://192.168.56.111:9000
    package com.njbdqn.util
    
    import java.io.FileInputStream
    import java.util.Properties
    
    object ReadPropertiesFileTool {
      def readProperties(flag:String): Map[String,String] ={
        val prop = new Properties()
        prop.load(new FileInputStream
          (ReadPropertiesFileTool.getClass.getClassLoader.getResource("driver.properties").getPath))
        var map:Map[String,String] = Map.empty
        if(flag.equalsIgnoreCase("mysql")){
          map+=("driver"->prop.getProperty("driver"))
          map+=("url"->prop.getProperty("url"))
          map+=("user"->prop.getProperty("user"))
          map+=("password"->prop.getProperty("password"))
        }else{
          map+=("hadoop_url"->prop.getProperty("hadoop_url"))
        }
        map
      }
    
    }

    二、读取resource中配置,操作Mysql

    package com.njbdqn.util
    import java.util.Properties
    
    import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
    
    object MYSQLConnection {
    
      val paramMap = ReadPropertiesFileTool.readProperties("mysql")
    
        // 读取数据库中指定的表
        def readMySql(spark:SparkSession,tableName:String): DataFrame ={
          val map:Map[String,String] = Map(
            "driver"->paramMap("driver"),
            "url"->paramMap("url"),
            "user"->paramMap("user"),
            "password"->paramMap("password"),
            "dbtable"->tableName
          )
          spark.read.format("jdbc").options(map) // Adds input options for the underlying data source
            .load()
        }
    
      // 将df写入数据库到指定的表
      def writeTable(spark:SparkSession,df:DataFrame,tableName:String): Unit ={
        val prop = new Properties()
        prop.put("user","root")
        prop.put("password","root")
        df.write.mode(SaveMode.Overwrite).jdbc("jdbc:mysql://192.168.56.111:3306/myshops2",tableName,prop)
      }
    
    }

    三、上传/下载HDFS

    package com.njbdqn.util
    
    import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
    import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
    
    /**
     * HDFS操作
     */
    object HDFSConnection {
    
      val paramMap = ReadPropertiesFileTool.readProperties("hadoop")
    
      /**
       * 将数据写入到hdfs
       */
      def writeDataToHDFS(path:String,df:DataFrame): Unit ={
        df.write.mode(SaveMode.Overwrite).save(paramMap("hadoop_url")+path)
      }
    
      /**
       * 从hdfs的指定位置读到内存中
       */
      def readDataToHDFS(spark:SparkSession,path:String): DataFrame ={
        spark.read.parquet(paramMap("hadoop_url")+path)
      }
      /**
       * 从hdfs读取LR
       */
      def readLRModelToHDFS(path:String): LogisticRegressionModel ={
        LogisticRegressionModel.read.load(paramMap("hadoop_url")+path)
      }
    
      /**
       *  LR模型写入HDFS
       */
      def writeLRModelToHDFS(lr:LogisticRegressionModel,path:String): Unit ={
        lr.save(paramMap("hadoop_url")+path)
      }
    
    }
  • 相关阅读:
    让元素水平和垂直居中的方法总结
    a标签常见问题
    centos7的安装
    java面向对象-类的定义
    java内存划分
    java基础知识点-数组
    java基础知识点-重载
    第一章:服务器的组成知识点
    java 第二天运算符及录入函数Scanner
    java第一天 数据类型、变量的命名、类型的转换
  • 原文地址:https://www.cnblogs.com/sabertobih/p/13874061.html
Copyright © 2020-2023  润新知