下面是一些工作过程中比较常见的工具方法,但不代表最终答案。希望能对你有所帮助,如果您有更好更多的方法工具,欢迎推荐!
1. 按行读取带json字符串的文件
# -*- coding:utf-8 -*-
import json
import sys
with open("./aldwx-log-20.20190619194001.json.COMPLETED",'r') as load_f:
for line in load_f:
try:
load_dict = json.loads(line)
#server_time = load_dict['server_time']
#et = load_dict['et'] if load_dict.has_key("et") else server_time #三元表达式
except :
pass
2. 获取每日的时间列表
# -*- coding:utf-8 -*-
import datetime
def getBetweenDay():
date_list = []
#获取开始与结束时间,范例为获取最近7天时间
now_time = datetime.datetime.now()
begin_day = (now_time+datetime.timedelta(days=-7)).strftime("%Y%m%d")
end_day = (now_time+datetime.timedelta(days=-1)).strftime("%Y%m%d")
begin_date = datetime.datetime.strptime(begin_day, "%Y%m%d")
end_date = datetime.datetime.strptime(end_day, "%Y%m%d")
while begin_date <= end_date:
date_str = begin_date.strftime("%Y%m%d")
date_list.append(date_str)
begin_date += datetime.timedelta(days=1)
return date_list
3. 计量单位格式化输出
# -*- coding:utf-8 -*-
#计量单位格式化输出
def formatData(value):
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB' ]
try:
size = float(value)
except:
return False
if size < 0:
return False
for unit in units:
if size >= 1024:
#中间结果保留4位小数。网上很多资料只注重最终的结果,导致最终结果误差很大
size = float('%.4f' %( size / 1024))
else:
size_h = '{0:.2f} {1}'.format(size, unit)
return size_h
#超大数据仅保留到PB级
size_h = '{0:.2f} {1}'.format(size, units[-1])
return size_h
4. 元组转换为字典
# -*- coding:utf-8 -*-
def tuple_to_dic(results):
"""元组转换为字典,字典key为元组第一个值,字典值为元组第二个值的列表集"""
tmp_dic = {}
for i in range(len(results)):
if tmp_dic.has_key(results[i][0]):
tmp_dic[results[i][0]].append(results[i][1])
else:
tmp_dic[results[i][0]] = []
tmp_dic[results[i][0]].append(results[i][1])
return tmp_dic
示例:
代码如下
# -*- coding:utf-8 -*-
results = ((20190713,1),(20190713,2),(20190713,3),(20190714,4))
def tuple_to_dic(results):
"""元组转换为字典,字典key为元组第一个值,字典值为元组第二个值的列表集"""
tmp_dic = {}
for i in range(len(results)):
if tmp_dic.has_key(results[i][0]):
tmp_dic[results[i][0]].append(results[i][1])
else:
tmp_dic[results[i][0]] = []
tmp_dic[results[i][0]].append(results[i][1])
return tmp_dic
dic = tuple_to_dic(results)
print dic
输出结果为
{20190713: [1, 2, 3], 20190714: [4]}
持续更新中...