安装:根据自己需要
pip install IPython==5.10.0 -i http://mirrors.sangfor.org/pypi/web/simple/ --trusted-host mirrors.sangfor.org
pip install setuptools==45.0.0 -i http://mirrors.sangfor.org/pypi/web/simple/ --trusted-host mirrors.sangfor.org
1 @profile 2 def translate_one(self, log_name, log_param, file_list): 3 """ 4 解析一批文件 5 :param log_name: 6 :param log_param: 7 :param file_list: 8 :return: 9 """ 10 sql_prefix = CK_INPUT_PREFIX_SQL[log_name] 11 self._init_data_info(log_name) 12 trans_info = self.data_info[log_name] 13 if not file_list: 14 if int(time.time()) - trans_info['last_insert_time'] >= DELAY_INSERT_TIME 15 and trans_info['insert_index'] != 0: 16 self.insert_data(sql_prefix, trans_info['data_list'][:trans_info['insert_index']]) 17 trans_info['insert_index'] = 0 18 trans_info['last_insert_time'] = int(time.time()) 19 return True 20 self.has_files = True # 有文件需要解析 21 record_handle = NetflowRecordDeal() 22 if record_handle is None: 23 logging.error('init deal handle error, logname: %s', log_name) 24 return False 25 26 total_count = 0 # 插入日志总数 27 insert_count = 0 # 插入成功的日志数 28 filtered_count = 0 # 过滤掉的日志数 29 start_time = time.time() # 开始时间 30 avro_reader = AvroReader(log_name,log_param['log_type'],log_param['file_type']) 31 for record in avro_reader.get_record_list(file_list): 32 total_count += 1 33 record_handle.deal_record(record, {}, log_param['log_type']) 34 if not record or not record_handle.filter_record(record, {}): 35 filtered_count += 1 36 continue 37 trans_info['data_list'][trans_info['insert_index']] = record_handle.record2str(record) 38 trans_info['insert_index'] += 1 39 insert_count += 1 40 if trans_info['insert_index'] >= CK_BATCH_COUNT: 41 self.insert_data(sql_prefix, trans_info['data_list'][:trans_info['insert_index']]) 42 trans_info['insert_index'] = 0 43 trans_info['last_insert_time'] = int(time.time()) 44 Utils.save_stat([], start_time, total_count, insert_count, filtered_count, NETFLOW_PARSE_STAT_PATH) 45 return True
1 .... # kernprof -l -v netflow.py 2 Wrote profile results to netflow.py.lprof 3 Timer unit: 1e-06 s 4 5 Total time: 3.96648 s 6 File: netflow.py 7 Function: translate_one at line 66 8 9 Line # Hits Time Per Hit % Time Line Contents 10 ============================================================== 11 66 @profile 12 67 def translate_one(self, log_name, log_param, file_list): 13 68 """ 14 69 解析一批文件 15 70 :param log_name: 16 71 :param log_param: 17 72 :param file_list: 18 73 :return: 19 74 """ 20 75 1 1.0 1.0 0.0 sql_prefix = CK_INPUT_PREFIX_SQL[log_name] 21 76 1 31.0 31.0 0.0 self._init_data_info(log_name) 22 77 1 1.0 1.0 0.0 trans_info = self.data_info[log_name] 23 78 1 1.0 1.0 0.0 if not file_list: 24 79 if int(time.time()) - trans_info['last_insert_time'] >= DELAY_INSERT_TIME 25 80 and trans_info['insert_index'] != 0: 26 81 self.insert_data(sql_prefix, trans_info['data_list'][:trans_info['insert_index']]) 27 82 trans_info['insert_index'] = 0 28 83 trans_info['last_insert_time'] = int(time.time()) 29 84 return True 30 85 1 1.0 1.0 0.0 self.has_files = True 31 86 1 14.0 14.0 0.0 record_handle = NetflowRecordDeal() 32 87 1 1.0 1.0 0.0 if record_handle is None: 33 88 logging.error('init deal handle error, logname: %s', log_name) 34 89 return False 35 90 36 91 1 1.0 1.0 0.0 total_count = 037 92 1 1.0 1.0 0.0 insert_count = 038 93 1 0.0 0.0 0.0 filtered_count = 039 94 1 2.0 2.0 0.0 start_time = time.time() 40 95 1 7.0 7.0 0.0 avro_reader = AvroReader(log_name,log_param['log_type'],log_param['file_type']) 41 96 50001 2900011.0 58.0 73.1 for record in avro_reader.get_record_list(file_list): 42 97 50000 44874.0 0.9 1.1 total_count += 1 43 98 50000 262267.0 5.2 6.6 record_handle.deal_record(record, {}, log_param['log_type']) 44 99 50000 72572.0 1.5 1.8 if not record or not record_handle.filter_record(record, {}): 45 100 filtered_count += 1 46 101 continue 47 102 50000 293543.0 5.9 7.4 trans_info['data_list'][trans_info['insert_index']] = record_handle.record2str(record) 48 103 50000 48448.0 1.0 1.2 trans_info['insert_index'] += 1 49 104 50000 42369.0 0.8 1.1 insert_count += 1 50 105 50000 44861.0 0.9
在需要测试的地方装上装饰器 @profile
然后使用:
kernprof -l -v netflow.py 运行