• 利用python将两张表链接




    from pyspark.sql import SparkSession
    from pyspark.sql.types import *
    import os


    def getUser(spark,path):
    struct1 = StructType([
    StructField("user", StringType(), True),
    StructField("vedios", StringType(), True),
    StructField("id", IntegerType(), True)
    ])
    df = spark.read.csv(path, schema=struct1, sep=" ", header=True)
    df.createOrReplaceTempView("users1")
    df = spark.sql("select * from users1")
    return df


    def getMovies(spark,path):
    df = spark.read.csv(path, header=True)
    df.createOrReplaceTempView("movies")
    df = spark.sql("select * from movies ")
    return df


    if __name__ == '__main__':
    os.environ['JAVA_HOME'] = 'C:Program FilesJavajdk1.8.0_211'
    print(os.path)
    spark = SparkSession
    .builder
    .appName("Python Spark SQL basic example")
    .config("spark.some.config.option", "some-value")
    .getOrCreate()
    path_user = "C:/Users/Administrator/Desktop/guiliVideo/user/2008/0903/user.txt"
    path_movies="C:/Users/Administrator/Desktop/vedios.txt"
    df1=getUser(spark,path_user)
    df2=getMovies(spark,path_movies)
    df3=df1.join(df2,df1.user==df2.uploader,how='inner')
    df3.createOrReplaceTempView('table1')
    df4=spark.sql('select * from table1 limit 10')
    df4.show(http://www.amjmh.com)
     
    ---------------------

  • 相关阅读:
    bzoj3574[Hnoi2014]抄卡组
    bzoj3576[Hnoi2014]江南乐
    [GDKOI2016]小学生数学题
    bzoj3572[Hnoi2014]世界树
    bzoj3571[Hnoi2014]画框
    bzoj3573[Hnoi2014]米特运输
    指数循环节
    bzoj4013[HNOI2015]实验比较
    bzoj4012[HNOI2015]开店
    bzoj1095[ZJOI2007]Hide 捉迷藏
  • 原文地址:https://www.cnblogs.com/ly570/p/11357427.html
Copyright © 2020-2023  润新知