• pyspark 日期格式


    1. 获取当前日期

    from pyspark.sql.functions import current_date
    
    spark.range(3).withColumn('date',current_date()).show()
    # +---+----------+
    # | id| date|
    # +---+----------+
    # | 0|2018-03-23|
    # | 1|2018-03-23|
    2. 获取当前日期和时间
    from pyspark.sql.functions import current_timestamp
    
    spark.range(3).withColumn('date',current_timestamp()).show()
    # +---+--------------------+
    # | id| date|
    # +---+--------------------+
    # | 0|2018-03-23 17:40:...|
    # | 1|2018-03-23 17:40:...|
    # | 2|2018-03-23 17:40:...|
    # +---+--------------------+

    3. 日期格式转换

    from pyspark.sql.functions import date_format
    
    df = spark.createDataFrame([('2015-04-08',)], ['a'])
    
    df.select(date_format('a', 'MM/dd/yyy').alias('date')).show()

    1
    2
    3
    4
    5
    4. 字符转日期

    from pyspark.sql.functions import to_date, to_timestamp
    
    # 1.转日期
    df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
    df.select(to_date(df.t).alias('date')).show()
    # [Row(date=datetime.date(1997, 2, 28))]

    # 2.带时间的日期

    df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
    df.select(to_timestamp(df.t).alias('dt')).show()
    # [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
    
    # 还可以指定日期格式
    df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
    df.select(to_timestamp(df.t, 'yyyy-MM-dd HH:mm:ss').alias('dt')).show()
    # [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]

    5. 获取日期中的年月日

    from pyspark.sql.functions import year, month, dayofmonth
    
    df = spark.createDataFrame([('2015-04-08',)], ['a'])
    df.select(year('a').alias('year'), 
    month('a').alias('month'),
    dayofmonth('a').alias('day')
    ).show()

    6. 获取时分秒

    from pyspark.sql.functions import hour, minute, second
    df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
    df.select(hour('a').alias('hour'),
    minute('a').alias('minute'),
    second('a').alias('second')
    ).show()

    7. 获取日期对应的季度

    from pyspark.sql.functions import quarter
    
    df = spark.createDataFrame([('2015-04-08',)], ['a'])
    df.select(quarter('a').alias('quarter')).show()

    8. 日期加减

    from pyspark.sql.functions import date_add, date_sub
    df = spark.createDataFrame([('2015-04-08',)], ['d'])
    df.select(date_add(df.d, 1).alias('d-add'),
    date_sub(df.d, 1).alias('d-sub')
    ).show()

    9. 月份加减

    from pyspark.sql.functions import add_months
    df = spark.createDataFrame([('2015-04-08',)], ['d'])
    
    df.select(add_months(df.d, 1).alias('d')).show()

    10. 日期差,月份差

    from pyspark.sql.functions import datediff, months_between
    
    # 1.日期差
    df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
    df.select(datediff(df.d2, df.d1).alias('diff')).show()
    
    # 2.月份差
    df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['t', 'd'])
    df.select(months_between(df.t, df.d).alias('months')).show()

    11. 计算下一个日子的日期

    计算当前日期的下一个星期1,2,3,4,5,6,7的具体日子,属于实用函数
    
    from pyspark.sql.functions import next_day
    
    # "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
    df = spark.createDataFrame([('2015-07-27',)], ['d'])
    df.select(next_day(df.d, 'Sun').alias('date')).show()

    12. 本月的最后一个日期

    from pyspark.sql.functions import last_day
    
    df = spark.createDataFrame([('1997-02-10',)], ['d'])
    df.select(last_day(df.d).alias('date')).show()
  • 相关阅读:
    刚体动力学
    碰撞检测系统
    动画系统II
    动画系统
    Game Develop Books
    光照技术
    LR参数组取值操作方法
    loadrunner测试ajax框架
    ​Web(click and script) 与 Web(HTTP/HTML)协议区别
    性能测试常用的linux命令
  • 原文地址:https://www.cnblogs.com/xzjf/p/10082677.html
Copyright © 2020-2023  润新知