1、对于时间格式数据的处理
有些时候time_stamp是object格式,提取相应的日期,小时,星期等:
方法1
from datetime import datetime
user_trad['time_stamp']=user_trad['time_stamp'].apply(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M'))
user_trad['hour']=user_trad['time_stamp'].dt.hour
user_trad['date']=user_trad['time_stamp'].dt.date
user_trad['weekday']=user_trad['time_stamp'].dt.weekday
user_trad['hour']=user_trad['time_stamp'].dt.hour
user_trad['date']=user_trad['time_stamp'].dt.date
user_trad['weekday']=user_trad['time_stamp'].dt.weekday
shop_hour_user_cnt['hour_cate']=shop_hour_user_cnt['hour'].apply(am_pm_hour)
方法2
#添加小时
join_table['hour']=int(str(join_table['time_stamp'][0])[11:13])
join_table['hour_type']=join_table['hour'].map(am_pm_hour)
#加上每天是星期几
join_table['date']=(join_table['time_stamp'][0])[0:4]+str(join_table['time_stamp'][0])[5:7]+str(join_table['time_stamp'][0])[8:10]
join_table['weekday']=datetime.strptime(str(join_table['date'][0]),"%Y%m%d").weekday() +1
join_table['hour']=int(str(join_table['time_stamp'][0])[11:13])
join_table['hour_type']=join_table['hour'].map(am_pm_hour)
#加上每天是星期几
join_table['date']=(join_table['time_stamp'][0])[0:4]+str(join_table['time_stamp'][0])[5:7]+str(join_table['time_stamp'][0])[8:10]
join_table['weekday']=datetime.strptime(str(join_table['date'][0]),"%Y%m%d").weekday() +1
2、
# 按每个小时去重到店客户
tmp=user_trad.drop_duplicates(['shop_id','user_id','hour'])