1 println("--------------------"+data.rdd.getNumPartitions) // 获取DF中partition的数目 2 val partitions = data.rdd.glom().collect() // 获取所有data下所有的partition,返回一个partition的集合 3 for(part <- partitions){ 4 println(part.getClass.getName + "::::::::" + part.length) // 每个partition中的数据量 5 }
结果:
--------------------100 [Lorg.apache.spark.sql.Row;::::::::61516 [Lorg.apache.spark.sql.Row;::::::::61656 [Lorg.apache.spark.sql.Row;::::::::61991 [Lorg.apache.spark.sql.Row;::::::::61269 [Lorg.apache.spark.sql.Row;::::::::61654 [Lorg.apache.spark.sql.Row;::::::::61780 [Lorg.apache.spark.sql.Row;::::::::62059 [Lorg.apache.spark.sql.Row;::::::::61675 [Lorg.apache.spark.sql.Row;::::::::61339 [Lorg.apache.spark.sql.Row;::::::::61783 [Lorg.apache.spark.sql.Row;::::::::61620 [Lorg.apache.spark.sql.Row;::::::::61883 [Lorg.apache.spark.sql.Row;::::::::61631 [Lorg.apache.spark.sql.Row;::::::::61930 [Lorg.apache.spark.sql.Row;::::::::61451 [Lorg.apache.spark.sql.Row;::::::::61797 [Lorg.apache.spark.sql.Row;::::::::61367 [Lorg.apache.spark.sql.Row;::::::::61647 [Lorg.apache.spark.sql.Row;::::::::61488 [Lorg.apache.spark.sql.Row;::::::::61584 [Lorg.apache.spark.sql.Row;::::::::61733 [Lorg.apache.spark.sql.Row;::::::::61491 [Lorg.apache.spark.sql.Row;::::::::61809 [Lorg.apache.spark.sql.Row;::::::::61062 [Lorg.apache.spark.sql.Row;::::::::61658 [Lorg.apache.spark.sql.Row;::::::::61599 [Lorg.apache.spark.sql.Row;::::::::61911 [Lorg.apache.spark.sql.Row;::::::::61602 [Lorg.apache.spark.sql.Row;::::::::61348 [Lorg.apache.spark.sql.Row;::::::::61677 [Lorg.apache.spark.sql.Row;::::::::61722 [Lorg.apache.spark.sql.Row;::::::::61482 [Lorg.apache.spark.sql.Row;::::::::61714 [Lorg.apache.spark.sql.Row;::::::::61241 [Lorg.apache.spark.sql.Row;::::::::61737 [Lorg.apache.spark.sql.Row;::::::::62015 [Lorg.apache.spark.sql.Row;::::::::62062 [Lorg.apache.spark.sql.Row;::::::::61557 [Lorg.apache.spark.sql.Row;::::::::61607 [Lorg.apache.spark.sql.Row;::::::::61175 [Lorg.apache.spark.sql.Row;::::::::61653 [Lorg.apache.spark.sql.Row;::::::::61460 [Lorg.apache.spark.sql.Row;::::::::61705 [Lorg.apache.spark.sql.Row;::::::::61492 [Lorg.apache.spark.sql.Row;::::::::61340 [Lorg.apache.spark.sql.Row;::::::::61767 [Lorg.apache.spark.sql.Row;::::::::61756 [Lorg.apache.spark.sql.Row;::::::::61793 [Lorg.apache.spark.sql.Row;::::::::61417 [Lorg.apache.spark.sql.Row;::::::::61376 [Lorg.apache.spark.sql.Row;::::::::62039 [Lorg.apache.spark.sql.Row;::::::::61571 [Lorg.apache.spark.sql.Row;::::::::61849 [Lorg.apache.spark.sql.Row;::::::::61553 [Lorg.apache.spark.sql.Row;::::::::61612 [Lorg.apache.spark.sql.Row;::::::::61980 [Lorg.apache.spark.sql.Row;::::::::61714 [Lorg.apache.spark.sql.Row;::::::::62376 [Lorg.apache.spark.sql.Row;::::::::61884 [Lorg.apache.spark.sql.Row;::::::::61273 [Lorg.apache.spark.sql.Row;::::::::61669 [Lorg.apache.spark.sql.Row;::::::::61695 [Lorg.apache.spark.sql.Row;::::::::61515 [Lorg.apache.spark.sql.Row;::::::::61247 [Lorg.apache.spark.sql.Row;::::::::61909 [Lorg.apache.spark.sql.Row;::::::::61879 [Lorg.apache.spark.sql.Row;::::::::61913 [Lorg.apache.spark.sql.Row;::::::::61199 [Lorg.apache.spark.sql.Row;::::::::61678 [Lorg.apache.spark.sql.Row;::::::::61619 [Lorg.apache.spark.sql.Row;::::::::61909 [Lorg.apache.spark.sql.Row;::::::::61406 [Lorg.apache.spark.sql.Row;::::::::61775 [Lorg.apache.spark.sql.Row;::::::::61559 [Lorg.apache.spark.sql.Row;::::::::61773 [Lorg.apache.spark.sql.Row;::::::::61888 [Lorg.apache.spark.sql.Row;::::::::61634 [Lorg.apache.spark.sql.Row;::::::::61786 [Lorg.apache.spark.sql.Row;::::::::61666 [Lorg.apache.spark.sql.Row;::::::::61519 [Lorg.apache.spark.sql.Row;::::::::61563 [Lorg.apache.spark.sql.Row;::::::::61481 [Lorg.apache.spark.sql.Row;::::::::61295 [Lorg.apache.spark.sql.Row;::::::::61343 [Lorg.apache.spark.sql.Row;::::::::61750 [Lorg.apache.spark.sql.Row;::::::::61328 [Lorg.apache.spark.sql.Row;::::::::61650 [Lorg.apache.spark.sql.Row;::::::::61541 [Lorg.apache.spark.sql.Row;::::::::61397 [Lorg.apache.spark.sql.Row;::::::::61505 [Lorg.apache.spark.sql.Row;::::::::61761 [Lorg.apache.spark.sql.Row;::::::::61795 [Lorg.apache.spark.sql.Row;::::::::62291 [Lorg.apache.spark.sql.Row;::::::::61566 [Lorg.apache.spark.sql.Row;::::::::61213 [Lorg.apache.spark.sql.Row;::::::::62028 [Lorg.apache.spark.sql.Row;::::::::62634 [Lorg.apache.spark.sql.Row;::::::::61838 [Lorg.apache.spark.sql.Row;::::::::61243 [Lorg.apache.spark.sql.Row;::::::::61585
样例:
--------------------100
[Lorg.apache.spark.sql.Row;::::::::61516 [Lorg.apache.spark.sql.Row;::::::::61656 [Lorg.apache.spark.sql.Row;::::::::61991 [Lorg.apache.spark.sql.Row;::::::::61269 [Lorg.apache.spark.sql.Row;::::::::61654 [Lorg.apache.spark.sql.Row;::::::::61780