from pyspark.sql import SparkSession,Row
from pyspark.sql.types import StructField, StructType, StringType, IntegerType, LongType
data = [('Alex','male',3),('Nancy','female',6),['Jack','male',9]] # mixed
rdd_ = spark.sparkContext.parallelize(data)
# schema
schema = StructType([
# true代表不为空
StructField("name", StringType(), True),
StructField("gender", StringType(), True),
StructField("num", StringType(), True)
])
df = spark.createDataFrame(rdd_, schema=schema) # working when the struct of data is same.
print(df.show())