• 数据探查postgresql数据库


      1 import pandas as pd
      2 import xlrd
      3 import psycopg2
      4 
      5 class Expedition(object):
      6     def __init__(self, database, user, psd, ip, port):
      7         self.database = database
      8         self.user = user
      9         self.psd = psd
     10         self.ip = ip
     11         self.port = port
     12         self.tables = []
     13         self.schema = 'schema名'
     14 
     15     def get_table_info(self):
     16         dblink = psycopg2.connect(database=self.database, user=self.user, password=self.psd, host=self.ip, port=self.port)
     17         print("链接成功")
     18         cur = dblink.cursor()
     19         print("创建游标成功")
     20         for table in self.tables:
     21             print(table)
     22             cur.execute(f"""SELECT cast(obj_description(relfilenode,'pg_class') as varchar)  FROM    pg_class b 
     23 WHERE  relname = '{table}'""")
     24             table_value = cur.fetchall()
     25             if table_value == []:
     26                 continue
     27             # 获取表备注
     28             table_comment = table_value[0][0]
     29             print (f"获取表备注{table_comment}成功")
     30 
     31             # 获取字段列表
     32             cur.execute(f"""select column_name  from information_schema.columns where  table_catalog ='{self.database}' and table_name = '{table}'""")
     33             columns = cur.fetchall()
     34             print(f"获取字段列表:{columns}成功")
     35             if columns == []:
     36                 continue
     37             
     38             # 从字段列表中取出包裹在集合的字段名
     39             for column in columns:
     40                 # 得到字段名
     41                 column_name = column[0]
     42                 print (f"获得字段成功{column_name}")
     43 
     44                 # 获得该字段的备注信息和字段类型
     45                 print(f"""SELECT col_description(a.attrelid,a.attnum),format_type(a.atttypid,a.atttypmod) as type
     46 FROM pg_class as c,pg_attribute as a where c.relname = '{table}' and a.attrelid = c.oid and a.attnum>0 and a.attname = '{column_name}'""")
     47                 cur.execute(f"""SELECT cast(col_description(a.attrelid,a.attnum)as varchar),format_type(a.atttypid,a.atttypmod) as type
     48 FROM pg_class as c,pg_attribute as a where c.relname = '{table}' and a.attrelid = c.oid and a.attnum>0 and a.attname = '{column_name}'""")
     49                 comment = cur.fetchall()
     50                 
     51                 # 获得该字段备注和字段类型列表
     52                 print(f"获得备注类型列表:{comment}")
     53 
     54                 # 获得字段的备注和类型
     55                 column_comment = comment[0][0]
     56                 column_type = comment[0][1]
     57                 print(f"获得字段备注:{column_comment}成功,获得字段类型成功:{column_type}")
     58 
     59                 # 获得记录数
     60                 print(f"SELECT count(1),count(DISTINCT cast({column_name} as varchar)) from {self.schema}.{table}")
     61                 cur.execute(f"""SELECT count(1),count(DISTINCT cast("{column_name}" as varchar)) from {self.schema}.{table}""")
     62                 field_num = cur.fetchall()
     63                 all_num = field_num[0][0]
     64                 only_num = field_num[0][1]
     65                 print(f"记录数:{all_num},唯一记录数:{only_num}")
     66 
     67                 # 唯一率
     68                 try:
     69 
     70                     only_rate = only_num / all_num
     71                 except Exception as result:
     72                     print(result)
     73                     only_rate = 0
     74 
     75 
     76                 # 空值数量
     77                 try:
     78                     print(f"SELECT count(1) from self.schema where CAST({column_name} as varchar) != ''")
     79                     cur.execute(f"""SELECT count(1) from {self.schema}.{table}
     80 where cast("{column_name}" as varchar) = ''or "{column_name}" is null""")
     81                     null_resule = cur.fetchall()
     82                     null_num = null_resule[0][0]
     83 
     84                     null_rate = (all_num - null_num) / all_num
     85                     print(f"获得null值数量;{null_num}成功")
     86                 except Exception as result:
     87                     print(result)
     88                     null_num = "未获得数据"
     89                     null_rate = "未获得数据"
     90 
     91                 # # 获取最大值/最小值
     92                 
     93                 if column_type.startswith("i") or column_type.startswith("f") or column_type.startswith("do") or column_type.startswith("n") or column_type.startswith("bi"):
     94                     try:
     95                         print(
     96                             f"select MAX(cast({table}.{column_name} as varchar)),MIN(cast({table}.{column_name} as varchar)) from {self.schema}.{table}")
     97                         cur.execute(
     98                             f"""select MAX(cast({table}."{column_name}" as float)),MIN(cast({table}."{column_name}" as float)) from {self.schema}.{table}""")
     99                         extreme_result = cur.fetchall()
    100                         max_value = extreme_result[0][0]
    101                         min_value = extreme_result[0][1]
    102                         print(extreme_result)
    103                         print(f"最大值:{max_value},最小值:{min_value}")
    104                     except:
    105 
    106                         print("错误信息")
    107 
    108                 else:
    109                     try:
    110                         print(
    111                             f"select MAX(cast({table}.{column_name} as varchar)),MIN(cast({table}.{column_name} as varchar)) from {self.schema}.{table}")
    112                         cur.execute(
    113                             f"""select MAX(cast({table}."{column_name}" as varchar)),MIN(cast({table}."{column_name}" as varchar)) from {self.schema}.{table}""")
    114                         extreme_result = cur.fetchall()
    115                         max_value = extreme_result[0][0]
    116                         min_value = extreme_result[0][1]
    117                         print(extreme_result)
    118                         print(f"最大值:{max_value},最小值:{min_value}")
    119                     except:
    120 
    121                         print("错误信息")
    122                 
    123                 # # 获取最大长度和最小长度
    124                 try:
    125                     cur.execute(f"""SELECT max(char_length(CAST("{column_name}" as VARCHAR))),
    126                 min(char_length(CAST("{column_name}" as VARCHAR))) from {self.schema}.{table}""")
    127                     column_lengtn = cur.fetchall()
    128                     max_lenth = column_lengtn[0][0]
    129                     min_lenth = column_lengtn[0][1]
    130                     print(f"最大长度:{max_lenth},最小长度:{min_lenth}")
    131                 except Exception as err:
    132                     max_lenth = "未获得数据"
    133                     min_lenth = "未获得数据"
    134                 
    135                 # 获得样例数据
    136                 wordlist = []
    137                 try:
    138                     print(f"""SELECT distinct cast({column_name} as varchar) from {self.schema}.{table} where CAST('{column_name}' as varchar) != ''
    139 LIMIT 4""")
    140                     cur.execute(f"""SELECT distinct cast("{column_name}" as varchar) from {self.schema}.{table} where CAST("{column_name}" as varchar) != ''
    141 LIMIT 4""")
    142                     words = cur.fetchall()
    143                     for word in words:
    144                         print(word)
    145                         wordlist.append(word[0])
    146                     print(wordlist)
    147                 except:
    148                     wordlist = ['未获得数据']
    149                 
    150 
    151                
    152                 
    153 
    154                 # print(table, table_comment, column_name, column_comment, column_type, all_num, null_num, only_num, only_rate, max_value, min_value, max_lenth, min_lenth, f"{wordlist}")
    155                 # 调用写入
    156                 self.write_into_excel(table, table_comment, column_name, column_comment, column_type, all_num, null_num, null_rate, only_num, 
    157                                       only_rate, max_value, min_value, max_lenth, min_lenth, f"{wordlist}")
    158                 print("写入成功")
    159             
    160             
    161 
    162 
    163 
    164     # 从表格获取表明
    165     def get_excel_value(self):
    166         # 打开
    167         data = xlrd.open_workbook(r"C:UsersAdministratorDesktop未探查总.xlsx")
    168         # 获取sheet对象
    169         table = data.sheet_by_name(U"Sheet1")
    170 
    171         # row = table.row_values(0)
    172         # 读取第一列数据,返回一个列表
    173         col = table.col_values(0)
    174         self.tables = col
    175         print (f"共从excel中读取{len(self.tables)}个表名")
    176 
    177 
    178     # 将数据写入表格
    179     def write_into_excel(self, table_name, table_comment, column_name, column_comment, column_type, all_num, null_num, null_rate, only_num, only_rate, max_value, min_value,max_lenth,min_lenth,word_list):
    180 
    181         df = pd.DataFrame(pd.read_excel('C:\UsersAdministratorDesktop探查模板.xlsx'))  # 读取原数据
    182         df_rows = df.shape[0]  # 获取行数
    183         # 增加一条数据
    184         df.loc[df_rows] = [table_name, table_comment, column_name, column_comment, column_type, all_num, null_num,null_rate, only_num, only_rate, max_value, min_value, max_lenth, min_lenth, word_list]  # 与原数据同格式
    185         df.to_excel('C:\UsersAdministratorDesktop探查模板.xlsx', sheet_name='sheet1', index=False, header=True)
    186 
    187     
    188 
    189 
    190 if __name__=='__main__':
    191     run = Expedition("数据库名", "账户名", "密码", "ip地址", 端口号)
    192     run.get_excel_value()
    193     run.get_table_info()
  • 相关阅读:
    Spring Security教程(一)
    java报错:The reference to entity "characterEncoding" must end with the ';' delimiter.
    SpringBoot定时任务升级篇(动态添加修改删除定时任务)
    SpringBoot几种定时任务的实现方式
    JDK中的Timer和TimerTask详解
    SpringMVC拦截器与SpringBoot自定义拦截器
    Java注解入门
    Servlet 4.0 入门
    spring邮件发送
    class path resource [spring/ApplicationContext-springmvc.xml] cannot be opened because it does not exist
  • 原文地址:https://www.cnblogs.com/luweilehei/p/12508792.html
Copyright © 2020-2023  润新知