• SparkSQL实验5.2


    2.编程实现将 RDD 转换为 DataFrame
    import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
    import org.apache.spark.sql.Encoder
    import spark.implicits._
    object RDDtoDF {
     def main(args: Array[String]) {
    case class Employee(id:Long,name: String, age: Long)
    val employeeDF = 
    spark.sparkContext.textFile("file:///usr/local/spark/employee.txt").map(_.split(",")).map(at
    tributes => Employee(attributes(0).trim.toInt,attributes(1), attributes(2).trim.toInt)).toDF()
    employeeDF.createOrReplaceTempView("employee")
    val employeeRDD = spark.sql("select id,name,age from employee")
    employeeRDD.map(t => "id:"+t(0)+","+"name:"+t(1)+","+"age:"+t(2)).show()
     } }
    

      

    方法二:使用编程接口,构造一个 schema 并将其应用在已知的 RDD 上。
    import org.apache.spark.sql.types._import org.apache.spark.sql.Encoder
    import org.apache.spark.sql.Row
    object RDDtoDF {
     def main(args: Array[String]) {
    val employeeRDD =
    spark.sparkContext.textFile("file:///usr/local/spark/employee.txt")
    val schemaString = "id name age"
    val fields = schemaString.split(" ").map(fieldName => StructField(fieldName, 
    StringType, nullable = true))
    val schema = StructType(fields)
    val rowRDD = employeeRDD.map(_.split(",")).map(attributes => 
    Row(attributes(0).trim, attributes(1), attributes(2).trim))
    val employeeDF = spark.createDataFrame(rowRDD, schema)
    employeeDF.createOrReplaceTempView("employee")
    val results = spark.sql("SELECT id,name,age FROM employee")
    results.map(t => "id:"+t(0)+","+"name:"+t(1)+","+"age:"+t(2)).show()
     } }
    

      

  • 相关阅读:
    使用公钥登录SSL
    javascript看你能够做对几题
    windows 与fedora时间差
    Linux 启动直接进入 console,
    fedora -- java多版本切换
    fedora 解决yumBackend.py进程CPU占用过高
    fedora 禁止nouveau加载
    联邦学习中的隐私研究
    优秀博客链接
    【论文学习11】GIANT: Globally Improved Approximate Newton Method for Distributed Optimization
  • 原文地址:https://www.cnblogs.com/huaobin/p/15945500.html
Copyright © 2020-2023  润新知