• spark以rdd方式读写mysql


    1、读取mysql数据。从mysql读取的时候需要传入数据边界,数据类型是long,一般建议主键列,如果是时间列,需要转换成时间戳。

    参考demo:https://github.com/asker124143222/spark-demo

    package com.home.spark
    
    
    import java.sql.DriverManager
    import java.time.{LocalDateTime, ZoneOffset}
    
    import org.apache.spark.rdd.{JdbcRDD, RDD}
    import org.apache.spark.{SparkConf, SparkContext}
    
    
    object Ex_mysql {
      def main(args: Array[String]): Unit = {
        val conf = new SparkConf(true).setMaster("local[*]").setAppName("spark mysql demo")
    
        val sc = new SparkContext(conf)
    
        val driverClassName = "com.mysql.jdbc.Driver"
        val url = "jdbc:mysql://localhost:3306/busdata?characterEncoding=utf8&useSSL=false"
        val user = "root"
        val password = "root"
    
        //mysql里时间类型为datetime,传入的条件为时间戳
        val sql = "select userId,userName,name from user where createTime > from_unixtime(?) and createTime < from_unixtime(?)"
    
    
        val connection = () => {
          Class.forName(driverClassName)
          DriverManager.getConnection(url, user, password)
        }
    
        val startTime = LocalDateTime.of(2018, 11, 3, 0, 0, 0)
        val endTime = LocalDateTime.of(2018, 11, 4, 0, 0)
        //mysql的时间戳只有10位,需要把java里的13位时间戳降低精度,直接除以1000
        val startTimeStamp = startTime.toInstant(ZoneOffset.ofHours(8)).toEpochMilli / 1000
        val endTimeStamp = endTime.toInstant(ZoneOffset.ofHours(8)).toEpochMilli / 1000
    
        println("startTime: " + startTime + ", endTime: " + endTime)
        println("startTime: " + startTimeStamp + ", endTime: " + endTimeStamp)
    
        //读取
        val result: JdbcRDD[(Int, String, String)] = new JdbcRDD[(Int, String, String)](
          sc,
          connection,
          sql,
          startTimeStamp,
          endTimeStamp,
          2,
          rs => {
    
            val userId = rs.getInt(1)
            val userName = rs.getString(2)
            val name = rs.getString(3)
            //        println(s"id:${userId},userName:${userName},name:${name}")
            (userId, userName, name)
          }
        )
        result.collect().foreach(println)
    
        sc.stop()
      }
    
    }

    2、写mysql。减少连接创建次数,使用foreachPartition,而不是foreach

    package com.home.spark
    
    import java.sql.{DriverManager, PreparedStatement}
    import java.time.LocalDateTime
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    import scala.collection.mutable
    
    object Ex_mysql2 {
      def main(args: Array[String]): Unit = {
        val conf = new SparkConf(true).setMaster("local[*]").setAppName("spark mysql demo")
    
        val sc = new SparkContext(conf)
    
        val driverClassName = "com.mysql.jdbc.Driver"
        val url = "jdbc:mysql://localhost:3306/busdata?characterEncoding=utf8&useSSL=false"
        val user = "root"
        val password = "root"
    
    
        //写入
        val logBuffer = mutable.ListBuffer[(String, String, String, String, String, String)]()
        import java.time.format.DateTimeFormatter
        val ofPattern = DateTimeFormatter.ofPattern("yyyy-MM-dd hh:mm:ss")
        for (i <- 1 to 100) {
          logBuffer.+=(("write" + i, "写入测试" + i, "localhost" + i, LocalDateTime.now().format(ofPattern), "spark", LocalDateTime.now().format(ofPattern)))
        }
    
        //    logBuffer.foreach(println)
        val logRDD: RDD[(String, String, String, String, String, String)] = sc.makeRDD(logBuffer)
    
    
        //为了减少连接创建次数,使用foreachPartition,而不是foreach
        //缺陷:所有按Partition方式传输整个迭代器的方式都有OOM的风险
        logRDD.foreachPartition(logData => {
          Class.forName(driverClassName)
          val connection = DriverManager.getConnection(url, user, password)
          val sql = "insert into syslog(action, event, host, insertTime, userName, update_Time) values(?,?,?,?,?,?)"
          val statement: PreparedStatement = connection.prepareStatement(sql)
          try {
            logData.foreach {
              case (action, event, host, insertTime, userName, updateTime) => {
    
                statement.setString(1, action)
                statement.setString(2, event)
                statement.setString(3, host)
                statement.setString(4, insertTime)
                statement.setString(5, userName)
                statement.setString(6, updateTime)
                statement.executeUpdate()
              }
            }
          }
          finally {
            if(statement!=null) statement.close()
            if(connection!=null) connection.close()
          }
    
    
          connection.close()
        }
        )
    
        sc.stop()
      }
    
    }
  • 相关阅读:
    WPF开发学生信息管理系统【WPF+Prism+MAH+WebApi】(三)
    WPF开发学生信息管理系统【WPF+Prism+MAH+WebApi】(四)
    WPF开发学生信息管理系统【WPF+Prism+MAH+WebApi】(完)
    Backend Serverside Programing (Nginx)
    Sec资产管理——SwebUI开源应用解决方案
    SwebONE 项目还原部署教程
    SWebONE ,OA办公——SwebUI开源应用解决方案
    使用activityViewModels()或viewModels()报错的原因
    Inheritance from an interface with '@JvmDefault' members is only allowed with Xjvmdefault option
    Android Studio终端通过命令在gitlab上创建分支
  • 原文地址:https://www.cnblogs.com/asker009/p/12092362.html
Copyright © 2020-2023  润新知