功能说明
前置条件
代码示例
# -*- coding: utf-8 -*-
import os
import datetime
import pandas as pd
import numpy as np
import math
def transform_time(file_dir):
for n,file_nm in enumerate(os.listdir(file_dir)):
if file_nm.endswith("pcd"):
pcd_timestamp = os.path.splitext(file_nm)[0][-16:]
#转换成localtime-微秒转换为标准时间戳
timedata = int(pcd_timestamp)/1000000
time_local = datetime.datetime.fromtimestamp(timedata)
#转换成新的时间格式(2016-05-05 20:28:54)
dt = time_local.strftime("%Y-%m-%d %H:%M:%S:%f")
print(n,dt,pcd_timestamp)
if __name__ == "__main__":
file_dir = r"/home/test/data/"
data_orig = list()
for n,file_nm in enumerate(sorted(os.listdir(file_dir))):
if file_nm.endswith("jpg"):
pcd_timestamp = os.path.splitext(file_nm)[0][-16:]
pcd_seconds = os.path.splitext(file_nm)[0][-16:-6]
data_info = (file_nm,int(pcd_timestamp),int(pcd_seconds))
data_orig.append(data_info)
orig_df = pd.DataFrame(data_orig, columns=['file', 'pcd_micro', 'pcd_second'])
# 可以10秒一分组
orig_df['pcd_second'] = pd.to_datetime(orig_df['pcd_second'],unit='s')
datt = orig_df.groupby(pd.Grouper(key='pcd_second', freq="10s"))
top_group = list()
for name,or_data in datt:
#print(name,len(or_data),or_data)
sig_list =list()
pcd_group = pd.DataFrame(or_data)
for name,sub_group_data in pcd_group.groupby('pcd_second'):
k= math.floor(len(sub_group_data)/2)
sub_group_data = sub_group_data.reset_index()
if len(sub_group_data) >=2:
sig_list.append(sub_group_data.iloc[[0]])
sig_list.append(sub_group_data.iloc[[k]])
else:
sig_list.append(sub_group_data)
for data in sig_list:
print(data.values[0][1])
传统编程
##01.构建字典 -分组
字典映射-一个key映射多个value,通过将value组合成list或者set的方式,进行分组
想保持元素的插入顺序就应该使用列表,如果想去掉重复元素就使用集合
collections模块中的defaultdict来构造这样的字典。defaultdict的一个特征是它会自动初始化每个key刚开始对应的值
##001.利用传统的dict
d = {}
for key, value in pairs:
if key not in d:
d[key] = []
d[key].append(value)
###002.利用defaultdict的代码如下:
from collections import defaultdict
d = defaultdict(list)
for key, value in pairs:
d[key].append(value)
##02.对各个分组--进行操作
# 向上取整-抽取帧的下标 创建等差数列
choose_num_index = np.linspace(0, file_k-1, choose_total_num, dtype=int)
numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)
start:返回样本数据开始点
stop:返回样本数据结束点
num:生成的样本数据量,默认为50
endpoint:True则包含stop;False则不包含stop
retstep:If True, return (samples, step), where step is the spacing between samples.(即如果为True则结果会给出数据间隔)
dtype:输出数组类型
axis:0(默认)或-1
# numpy.logspace函数生成等比数列
## range函数 range(start, stop [,step]) 可以使用3个参数:起始值,终止值,间隔
左闭右开区间【)
Python2 中 range() 生成的是列表,本文基于Python3,生成的是可迭代对象
类似的代码-进行实际应用
image_group = {}
for image_file in image_list:
# 提取时间戳到秒- 1659173359.123156.jpg
time_sec = get_sec_time(image_file)
if time_sec not in image_group:
image_group[time_sec] = []
image_group[time_sec].append(image_file)
from collections import defaultdict
d = defaultdict(list)
for img_file in img_list:
time_sec =get_sec_time(image_file)
d[time_sec].append(img_file)
示例代码
# -*- coding: utf-8 -*-
import os
from glob import glob
def image_sample(image_path,interval_ps = 2):
# image目录 global 是全局数据
# glob 的文件名List只包括当前目录下的文件名,不包括子文件夹中的文件
# os.listdir( path )
image_list = glob(os.path.join(image_path, '*.jpg'))
##去除后缀.jpg的 1659173359.123156.jpg
image_list.sort(key=lambda f: float(f.split('/')[-1][0:-4]))
image_group = {}
for image_file in image_list:
# 提取时间戳到秒- 1659173359.123156.jpg
time_sec = image_file.split('/')[-1].split('.')[0]
if time_sec not in image_group:
image_group[time_sec] = []
image_group[time_sec].append(image_file)
choose_img = []
for time_sec in image_group:
image_group_list = image_group[time_sec]
if len(image_group_list) <= interval_ps:
choose_img.extend(image_group_list)
else:
offset = round(len(image_group_list) / interval_ps)
count = 0
for i in range(0, len(image_group_list), offset):
choose_img.append(image_group_list[i])
count = count + 1
if count == interval_ps:
break
return choose_img
if __name__ == "__main__":
extra_interval=3
choose_file="/dta"
time_dat = image_sample(choose_file, extra_interval)
print(time_dat)
示例代码
##构建不同的数据结构,注意解耦合和扩展性
frame_files=[]
seconds = []
for filenames in os.listdir(image_file_path):
file_second =get_file_second(filenames)
if file_second not in seconds:
seconds.append(file_second)
second_file = {
'name': file,
'second': file_second,
'source': os.path.join(image_file_path,file),
'target':os.path.join(image_file_path.replace('source', 'dest'),file)
}
frame_files.append(second_file)
interval_ps =2
choose_img = []
for second in seconds:
second_files = [image for image in frame_files if image['second'] == second]
image_sec_len = len(second_files)
if image_sec_len < interval_ps:
pass
else:
offset = round(len(image_group_list) / interval_ps)
参考
详解pd.Grouper()以及时间分组groupby() https://blog.csdn.net/weixin_46713695/article/details/125416343