-
自己的项目
import pandas as pd
import numpy as np
import altair as alt
import pymysql
from sqlalchemy import create_engine
import seaborn
import datetime
# 读取数据
engine = create_engine('mysql+pymysql://root:nokia123@10.101.35.249:3306/db5')
sql = ''' select case_name ,result, platform_name, error_msg ,report_create_time from report_reportdetail; '''
df = pd.read_sql_query(sql, engine)
#选取2星期数据
df = df.set_index(pd.to_datetime(df["report_create_time"])).drop("report_create_time", axis=1)
week_ago = datetime.date.today() - datetime.timedelta(days=14)
df = df.sort_index().truncate(before=week_ago)
# 根据 error message区分
ErrorMsg=df.loc[df['result']=='fail',:].groupby(['error_msg']).count().sort_values(by = ['result'], ascending = [False])
# select count(result) as i,error_msg from report_reportdetail where result = 'fail' group by error_msg order by i desc ;
# 选取failed case 和指定的平台
df0 =df.loc[(df['result']=='fail') ,:].copy()
df1 =df.loc[(df['result']=='fail')&(df['platform_name']=='AEQE') ,:].copy()
df2 =df.loc[(df['result']=='fail')&(df['platform_name']=='AEQV') ,:].copy()
#错误消息太长
df0.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
df1.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
df2.loc[:, "error_msg_short"] = df2["error_msg"].str[0:60]
#多层索引
df0.set_index(['error_msg_short', 'case_name'], inplace=True) # 1. error_msg_short 为索引,排序就不会乱
# df0.set_index(['case_name', 'error_msg_short'], inplace=True) # 2. case_name 为索引,排序会乱
df1.set_index(['case_name', 'error_msg_short'], inplace=True)
df2.set_index(['case_name', 'error_msg_short'], inplace=True)
#按照索引分组
df0 =df0.groupby(level=df0.index.names).count() # 1.2. error_msg_short 为索引,排序就不会乱
df1 =df1.groupby(level=df1.index.names).count()
df2 =df2.groupby(level=df2.index.names).count()
# 按结果分析
# HW=df0.loc[(df0["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
ErrorMsgByCase=df0.loc[(df0["result"]>1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
AEQE=df1.loc[(df1["result"]>1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
AEQV=df2.loc[(df2["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
AEQE_ENV=df1.loc[(df1["result"]<=1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
AEQV_ENV=df2.loc[(df2["result"]<=3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
AEQE_PR = df1.iloc[df1.index.get_level_values(1).str.contains('PR\d{6}:')]
AEQV_PR = df2.iloc[df2.index.get_level_values(1).str.contains('PR\d{6}:')]
# df.head(10)
# df[0:10]
# df
# df["report_create_time"]
# ErrorMsg
ErrorMsgByCase
# HW
# AEQE
# AEQV
# AEQV_PR
# AEQE_PR
# AEQE_ENV
data = ErrorMsgByCase.reset_index()
fg = seaborn.factorplot(y='case_name', x='出现次数',col='error_msg_short', data=data, kind='bar')
fg.fig.set_size_inches(18,3)
fg.set_xlabels('')