• 带线表格据gt生成无线表格


    json解析练习,python图像处理练习,表格包围框毛刺问题待解决。

      1 # -*- coding: utf-8 -*-
      2 # coding: utf-8
      3 from PIL import Image, ImageDraw
      4 import cv2
      5 import os
      6 import csv
      7 import json
      8 color = {
      9     0: [255, 0, 0],
     10     1: [0, 255, 0],
     11     2: [0, 0, 255]
     12 }
     13 from PIL import Image
     14 import numpy as np
     15 
     16 # 取批量点取RGB众数,来推算背景色值
     17 def publicnum(num, d=0):
     18     dictnum = {}
     19     for i in range(len(num)):
     20         if str(num[i]) in dictnum.keys():
     21             dictnum[str(num[i])] += 1
     22         else:
     23             dictnum.setdefault(str(num[i]), 1)
     24     maxnum = 0
     25     maxkey = '[255 255 255]'
     26     for k, v in dictnum.items():
     27         if v >= maxnum:
     28             maxnum = v
     29             maxkey = k
     30     return maxkey
     31 
     32 page_img_dir = "JPG"
     33 output_dir = "年报_PDF_TABLE_JPG_eliminate_lines-5-15-final"
     34 
     35 isExists = os.path.exists(output_dir)
     36 if not isExists:
     37     os.makedirs(output_dir)
     38 csv_file = csv.reader(open('表格结构标注-带线年报_UTF-8.csv', 'r', encoding="gbk"))
     39 data_list = []
     40 for data in csv_file:
     41     data_list.append(data)
     42 
     43 print("page number: ", len(data_list) - 1)
     44 
     45 for data in data_list[1:]:  # 跳过第一行
     46     img_path = data[0]
     47     img_name = img_path.split('/')[-1]  # /分割后最后一个为名字
     48     pdf_name = img_name.split('_')[0]  # -分割后 第一个是名字
     49     local_img_path = os.path.join(page_img_dir, img_name)  # 拼接路径
     50     print(local_img_path)
     51     annotation = json.loads(data[2])  # json单元格读取
     52     objects = annotation['objects']  # object是一个列表,读取该列表
     53     cnt = 0
     54     tu = Image.open(local_img_path)
     55     page_img = np.array(tu)
     56     for page_object in objects:
     57         if 'cur' in page_object.keys():
     58             cur = page_object['cur']
     59         else:
     60             cur = cnt
     61         polygon = page_object['polygon']['ptList']
     62         x_list = [p['x'] for p in polygon]
     63         y_list = [p['y'] for p in polygon]
     64         x_min = min(x_list)
     65         x_max = max(x_list)
     66         y_min = min(y_list)
     67         y_max = max(y_list)
     68         if abs(x_max - x_min) < 20:  # 纵向线条
     69             xx = int((x_min + x_max) / 2)
     70             inline_y_list = [y_max+20, y_min]
     71             #寻找相交横线分割点
     72             for in_page_object in objects:
     73                 in_polygon = in_page_object['polygon']['ptList']
     74                 in_x_list = [in_p['x'] for in_p in in_polygon]
     75                 in_y_list = [in_p['y'] for in_p in in_polygon]
     76                 in_x_min = min(in_x_list)
     77                 in_x_max = max(in_x_list)
     78                 in_y_min = min(in_y_list)
     79                 in_y_max = max(in_y_list)
     80                 if in_y_max - in_y_min < 20:  # 判断为横线
     81                     if in_x_max+5 >= xx and in_x_min-5 <= xx:  # 判断相交
     82                         point_y = in_y_min
     83                         inline_y_list.append(point_y)
     84                         if 0<abs(y_max-point_y)< 10:
     85                             try:
     86                                 inline_y_list.remove(max(y_max+20, point_y))
     87                                 inline_y_list.append(min(y_max+20, point_y))
     88                             except:
     89                                 pass
     90                         elif 0 < abs(y_min-point_y) < 10:
     91                             try:
     92                                 inline_y_list.remove(min(y_min, point_y))
     93                                 inline_y_list.append(max(y_min, point_y))
     94                             except:
     95                                 pass
     96             inline_y_list = list({}.fromkeys(inline_y_list).keys())
     97             inline_y_list.sort()
     98             inline_y_list[-1]+=5
     99             if inline_y_list[-1]>2339:
    100                 inline_y_list[-1]=2339
    101             # 线条分割结束
    102             for i in range(0, inline_y_list.__len__()):
    103                 if i < inline_y_list.__len__() - 1:
    104                     # 开始取样
    105                     back_colors = []
    106                     for yy in range(inline_y_list[i], inline_y_list[i + 1]):
    107                         if xx + 8 < 1654:
    108                             back_colors.append(page_img[yy, xx + 8])
    109                         else:
    110                             back_colors.append(page_img[yy, xx - 8])
    111                     back_color = publicnum(back_colors)
    112                     back_color = back_color[1:-1]
    113                     try:
    114                         back_color = back_color.split(' ')
    115                     except:
    116                         back_color = back_color.split('   ')
    117                         print(type(back_color))
    118                     if len(back_color) > 3:
    119                         back_color = list(filter(None, back_color))
    120                     # 取样结束
    121                     # 纵向填色
    122 
    123                     for yy in range(inline_y_list[i]-4, inline_y_list[i + 1]-4):
    124                         if y_min-20<inline_y_list[i]<y_max+20 :
    125                             for ranging in range(-4, x_max-x_min+5):
    126                                 if x_min+ranging < 1654 and x_min+ranging >= 0:
    127                                         page_img[yy, x_min + ranging] = back_color
    128                                 else:
    129                                     pass
    130                         else:
    131                             pass
    132         elif abs(y_max - y_min) < 20:  # 横向线条
    133             yy = int((y_min + y_max) / 2)
    134             inline_x_list = [x_max+20, x_min]
    135             # 寻找相交横线分割点
    136             for in_page_object2 in objects:
    137                 polygon2 = in_page_object2['polygon']['ptList']
    138                 in_x_list2 = [p['x'] for p in polygon2]
    139                 in_y_list2 = [p['y'] for p in polygon2]
    140                 in_x_min = min(in_x_list2)
    141                 in_x_max = max(in_x_list2)
    142                 in_y_min = min(in_y_list2)
    143                 in_y_max = max(in_y_list2)
    144                 if abs(in_x_max - in_x_min) < 20:  # 判断为纵线
    145                     if in_y_max+5 >= y_min and in_y_min-5 <= y_max:  # 判断相交
    146                         point_x = in_x_min
    147                         inline_x_list.append(point_x)
    148                         if 0<abs(x_max-point_x)<10:
    149                             try:
    150                                 inline_x_list.remove(max(x_max+20, point_x))
    151                                 inline_x_list.append(min(x_max+20, point_x))
    152                             except:
    153                                 pass
    154                         elif 0<abs(x_min-point_x)<10:
    155                             try:
    156                                 inline_x_list.remove(min(x_min, point_x))
    157                                 inline_x_list.append(max(x_min, point_x))
    158                             except:
    159                                 pass
    160                         else:
    161                             pass
    162             inline_x_list = list({}.fromkeys(inline_x_list).keys())
    163             inline_x_list.sort()
    164             #inline_x_list[-1]+=5
    165             # 线条分割结束
    166             for i in range(0, inline_x_list.__len__()):
    167                 if i < inline_x_list.__len__() - 1:
    168                     # 开始取样
    169                     back_colors = []
    170                     for xx in range(inline_x_list[i], inline_x_list[i + 1]):
    171                         if yy+8 < 2339:
    172                             back_colors.append(page_img[yy + 8, xx])
    173                         else:
    174                             back_colors.append(page_img[yy - 8, xx])
    175                     back_color = publicnum(back_colors)
    176                     back_color = back_color[1:-1]
    177                     try:
    178                         back_color = back_color.split(' ')
    179                     except:
    180                         back_color = back_color.split('   ')
    181 
    182                     if len(back_color) > 3:
    183                         back_color = list(filter(None, back_color))
    184                     # 取样结束
    185                     # 横线填色
    186                     for xx in range(inline_x_list[i]-4, inline_x_list[i + 1]-4):
    187                         if x_min-20<inline_x_list[i]<x_max+20:
    188                             for ranging in range(-4, y_max-y_min+5):
    189                                 if y_min+ranging < 2339 and y_min+ranging >= 0:
    190                                     page_img[y_min+ranging, xx] = back_color
    191                                 elif y_min+ranging>=2339:
    192                                     page_img[2338, xx] = back_color
    193                                 else:
    194                                     page_img[0, xx] = back_color
    195                         else:
    196                             pass
    197         else:
    198             print("no such line", 'x_min:', x_min,'x_max:', x_max, 'y_max:', y_max, 'y_min:', y_min)
    199     tu = Image.fromarray(page_img.astype('uint8'))
    200     output_path = os.path.join(output_dir, img_name.split('.')[0] + '_' + str(cur) + ".jpg")
    201     tu.save(output_path)
    202     cv2.imwrite(output_path, page_img)
    203     cnt += 1
  • 相关阅读:
    表单分页,默认第一页,点击第5页,返回,如何跳转到第1页
    2019面试题
    企业微信中,获取外部联系人信息
    js vue 在页面中将摄像头放在一个标签里展示,(模仿手机拍照功能)
    微信小程序 自定义三列城市弹窗
    微信小程序 密码键盘
    vue 上传图片视频组件,可拍照选择照片,解决苹果手机拍照旋转问题
    vue 上拉加载自定义组件,超好用哦
    vue 模仿机票自定义日历组件,区间选择
    vue 日历组件只显示本月和下个月 -- 多选日期
  • 原文地址:https://www.cnblogs.com/wind-chaser/p/10868935.html
Copyright © 2020-2023  润新知