forked from jainammm/E-Invoicing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
144 lines (104 loc) · 3.96 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def getMaxConfidence(model_output, class_name):
outputs = []
for item in model_output:
if item['class_name'] == class_name:
outputs.append(item)
confidence = 0
final_output = ''
final_id = -1
for item in outputs:
if confidence < item['confidence']:
final_output = item['text']
final_id = item['id']
confidence = item['confidence']
return final_id, final_output
def getAddress(model_output, class_name):
outputs = []
for item in model_output:
if item['class_name'] == class_name:
outputs.append(item)
confidence = 0
bounding_box = []
for item in outputs:
if confidence < item['confidence']:
bounding_box = item['bounding_box']
confidence = item['confidence']
if bounding_box == []:
return ''
word_height = bounding_box[3] - bounding_box[1]
valid_box_ymin = bounding_box[1] - int(2.5*word_height)
valid_box_ymax = bounding_box[3] + int(2.5*word_height)
final_address = ''
for item in outputs:
bbox_item = item['bounding_box']
ymin = bbox_item[1]
ymax = bbox_item[3]
if ymin < valid_box_ymax and ymax > valid_box_ymin:
final_address += item['text'] + ' '
return final_address
def getTotalAmount(model_output, class_name):
outputs = []
for item in model_output:
if item['class_name'] == class_name:
outputs.append(item)
confidence = 0
bounding_box = []
for item in outputs:
if confidence < item['confidence']:
bounding_box = item['bounding_box']
confidence = item['confidence']
if bounding_box == []:
return ''
word_height = bounding_box[3] - bounding_box[1]
valid_box_ymin = bounding_box[1] - int(0.25*word_height)
valid_box_ymax = bounding_box[3] + int(0.25*word_height)
final_amount = ''
for item in outputs:
bbox_item = item['bounding_box']
ymin = bbox_item[1]
ymax = bbox_item[3]
if ymax < valid_box_ymax and ymin > valid_box_ymin:
final_amount += item['text'] + ' '
return final_amount
def getTable(model_output, class_names):
base_class = 'HSN'
bboxes_row = []
bounding_box_hsn = []
for item in model_output:
if item['class_name'] == base_class:
bounding_box_hsn.append(item['bounding_box'])
for i, item in enumerate(bounding_box_hsn):
word_height = item[3] - item[1]
if i == 0:
bbox_row_ymin = item[1] - (1 * word_height)
else:
bbox_row_ymin = item[1] - int(0.25 * word_height)
if i == len(bounding_box_hsn) - 1:
bbox_row_ymax = item[3] + (2*word_height)
else:
bbox_row_ymax = max(
bounding_box_hsn[i+1][1] - (0.2 * word_height), item[3] + int(0.25 * word_height))
bboxes_row.append([bbox_row_ymin, bbox_row_ymax])
final_rows = []
for bbox in bboxes_row:
row_dict = {}
for class_name in class_names:
if class_name == 'TITLE':
output = ''
for item in model_output:
if item['class_name'] == class_name:
ymin, ymax = item['bounding_box'][1], item['bounding_box'][3]
if ymin < bbox[1] and ymax > bbox[0]:
output += item['text'] + ' '
row_dict[class_name] = output
else:
output = []
for item in model_output:
if item['class_name'] == class_name:
ymin, ymax = item['bounding_box'][1], item['bounding_box'][3]
if ymin < bbox[1] and ymax > bbox[0]:
output.append(item)
_, item_value = getMaxConfidence(output, class_name)
row_dict[class_name] = item_value
final_rows.append(row_dict)
return final_rows