一、Pyspark.sql.DataFrame与pandas.DataFrame之间的相互转换:
# pandas转spark values = pandas_df.values.tolist() columns = pandas_df.columns.tolist() spark_df = spark.createDataFrame(values, columns) # spark转pandas pandas_df = spark_df.toPandas()
# pandas转spark values = pandas_df.values.tolist() columns = pandas_df.columns.tolist() spark_df = spark.createDataFrame(values, columns) # spark转pandas pandas_df = spark_df.toPandas()