• JDBC的ResultSet游标转spark的DataFrame,数据类型的映射以TeraData数据库为例


    1.编写给ResultSet添加spark的schema成员及DF(DataFrame)成员

    /*
        spark、sc对象因为是全局的,没有导入,需自行定义
        teradata的字段类型转换成spark的数据类型
    */
    
    import java.sql.{ResultSet, ResultSetMetaData}
    
    import org.apache.spark.sql.types._
    import org.apache.spark.sql.{DataFrame, Row}
    
    object addDataframeMember {
    
      trait ResultSetMetaDataToSchema {
        def columnCount: Int
    
        def schema: StructType
      }
    
      implicit def wrapResultSetMetaData(rsmd: ResultSetMetaData) = {
        new ResultSetMetaDataToSchema {
          def columnCount = rsmd.getColumnCount
    
          def schema = {
            def tdCovert(tdDpeStr: String, precision: Int = 0, scale: Int = 0, className: String = ""): DataType = {
              tdDpeStr match {
                case "BYTEINT" => IntegerType
                case "SMALLINT" => Integerype
                case "INTEGER" => IntegerType
                case "BIGINT" => LongType
                case "FLOAT" => DoubleType
                case "CHAR" => StringType
                case "DECIMAL" => DecimalType(precision, scale)
                case "VARCHAR" => StringType
                case "BYTE" => ByteType
                case "VARBYTE" => ByteType
                case "DATE" => DateType
                case "TIME" => TimestampType
                case "TIMESTAMP" => TimestampType
                case "CLOB" => StringType
                case "BLOB" => BinaryType
                case "Structured UDT" => ObjectType(Class.forName(className))
              }
            }
    
            def col2StructField(rsmd: ResultSetMetaData, i: Int): StructField = StructField(rsmd.getColumnName(i), tdCovert(rsmd.getColumnTypeName(i), rsmd.getPrecision(i), rsmd.getScale(i), rsmd.getColumnClassName(i)), rsmd.isNullable(i) match { case 1 => true case 0 => false }).withComment(rsmd.getColumnLabel(i))
    
            def rsmd2Schema(rsmd: ResultSetMetaData): StructType = (1 to columnCount).map(col2StructField(rsmd, _)).foldLeft(new StructType)((s: StructType, i: StructField) => s.add(i))
    
            rsmd2Schema(rsmd)
          }
        }
      }
    
      trait ResultSetToDF {
        def schema: StructType
    
        def DF: DataFrame
      }
    
      implicit def wrapResultSet(rs: ResultSet) = {
        def rsmd = rs.getMetaData
    
        def toList[T](retrieve: ResultSet => T): List[T] = Iterator.continually((rs.next(), rs)).takeWhile(_._1).map(r => r._2).map(retrieve).toList
    
        def rsContent2Row(rs: ResultSet): Row = Row.fromSeq(Array.tabulate[Object](rsmd.columnCount)(i => rs.getObject(i + 1)).toSeq)
    
        new ResultSetToDF {
          def schema = rsmd.schema
    
          def DF = spark.createDataFrame(sc.parallelize(toList(rsContent2Row)), schema)
        }
    
      }
    
    
    }
    

      

    2.正常基于JDBC连接并且获得数据集游标

    import java.sql.{Connection, DriverManager}
    
    /*
        获取TeraData的连接
    */
    
    val (dialect, host, user, passwd, database, charset) = ("teradata", "ip", "user", "password", "database", "ASCII")
    val tdConf = collection.immutable.Map(
      "driver" -> "com.ncr.teradata.TeraDriver",
      "uri" -> s"jdbc:$dialect://$host/CLIENT_CHARSET=EUC_CN,TMODE=TERA,COLUMN_NAME=ON,CHARSET=ASCII,database=$database",
      "username" -> user,
      "password" -> passwd
    )
    
    def getTeraConn: Connection = {
      Class.forName(tdConf("driver"))
      DriverManager.getConnection(tdConf("uri"), tdConf("username"), tdConf("password"))
    }
    val sql = "SELECT TOP 10 * FROM xxx"
    var conn = getTeraConn
    val stmt = conn.createStatement()
    val rs = stmt.executeQuery(sql)
    

    3.导入隐式转换,调用成员

    import addDataframeMember.wrapResultSet
    rs.DF.show()
    

      

  • 相关阅读:
    DBImport v3.44 中文版发布:数据库数据互导及文档生成工具(IT人员必备)
    IT人生知识分享:博弈论的理性思维
    IT人生知识分享:概率与运气
    开源:秋式广告杀手源码
    浅说秋色园域名被国家互联网应急中心封与解的过程
    自定义可视化调试工具(Microsoft.VisualStudio.DebuggerVisualizers)
    Excel导入导出的业务进化场景及组件化的设计方案(上)
    回忆录:30岁那年,你成长了吗?(上篇)
    Excel导入导出组件的设计
    DBImport v3.3 中文版发布:数据库数据互导及文档生成工具(IT人员必备)
  • 原文地址:https://www.cnblogs.com/shld/p/11803503.html
Copyright © 2020-2023  润新知