-
Notifications
You must be signed in to change notification settings - Fork 39
/
json2pdf.py
107 lines (99 loc) · 4.24 KB
/
json2pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from reportlab.pdfgen import canvas # pip install reportlab==3.6.8
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# from reportlab.lib.utils import ImageReader
import json
import os
from PIL import Image
from imp import reload
def save_pdf(tempdir, pagenum):
with open(os.path.join(tempdir, str(pagenum) + ".json")) as f:
data = json.load(f)
c = canvas.Canvas(
os.path.join(tempdir, str(pagenum) + ".pdf"),
pagesize=(data['page']['pw'], data['page']['ph']),
)
styles = dict()
for style in data['style']:
for style_c in style['c']:
if not styles.get(style_c):
styles[style_c] = dict()
for each in style['s']:
styles[style_c][each] = style['s'][each]
ttfs = [x for x in os.listdir(tempdir) if x[-4:] == '.ttf' and int(x[-8: -4], 16) == pagenum]
# ttf2font = data['font']
reload(pdfmetrics) # TODO: a issue here, if no reload(), font only work in page 1.
for ttf in ttfs:
# pdfmetrics.registerFont(TTFont(ttf2font.get(ttf[:-4]), os.path.join(tempdir, ttf)))
pdfmetrics.registerFont(TTFont(ttf[:-4], os.path.join(tempdir, ttf)))
try:
img = Image.open(os.path.join(tempdir, str(pagenum) + '.png'))
except:
pass
try:
os.mkdir(os.path.join(tempdir, str(pagenum)))
except:
pass
data_body = data['body']
data_body = sorted(data_body, key=lambda each: each['p']['z'])
for item in data_body:
if item['t'] == 'word':
style = dict()
for item_r in item['r']:
style.update(styles[item_r])
text = item['c']
# TODO: bold do not work
'''
if style.get('bold'):
text = '<b>' + text + '</b>'
'''
textobject = c.beginText()
textobject.setTextOrigin(
item['p']['x'],
data['page']['ph'] - item['p']['y'] - 14 # TODO: why is 14?
)
if style.get('font-family'):
textobject.setFont(
style['font-family'],
float(style['font-size']) if style.get('font-size') else 16
)
if style.get('letter-spacing'):
textobject.setCharSpace(float(style['letter-spacing']))
if style.get('color'):
textobject.setFillColorRGB(
int(style['color'][1: 3], 16) / 255,
int(style['color'][3: 5], 16) / 255,
int(style['color'][5: 7], 16) / 255
)
textobject.setFillColorRGB(0,0,0)
textobject.textLine(text)
c.drawText(textobject)
elif item['t'] == 'pic':
# TODO: a issue here. follow code do not work.
# https://groups.google.com/g/reportlab-users/c/SmIzKYdCodo
# new_image = Image.new('RGBA', (int(item['c']['iw']), int(item['c']['ih'])))
# new_image.paste(img, (int(item['c']['ix']), int(item['c']['iy'])))
new_image = img.crop((
int(item['c']['ix']),
int(item['c']['iy']),
int(item['c']['iw'] + item['c']['ix']),
int(item['c']['ih'] + item['c']['iy'])
))
img_width = None
img_height = None
if int(item['c']['iw']) != int(item['p']['w']) or int(item['c']['ih']) != int(item['p']['h']):
img_width = item['p']['w']
img_height = item['c']['ih'] / item['c']['iw'] * item['p']['w']
new_image.save(os.path.join(tempdir, str(pagenum), '{}-{}.png'.format(item['p']['x'], item['p']['y'])))
# c.drawImage(ImageReader(new_image), int(item['p']['x']), data['page']['ph'] - int(item['p']['y']) - int(item['c']['ih']), mask='auto')
c.drawImage(
os.path.join(tempdir, str(pagenum),
'{}-{}.png'.format(item['p']['x'], item['p']['y'])),
int(item['p']['x']),
data['page']['ph'] - int(item['p']['y']) - img_height if img_height else int(item['c']['ih']),
width=img_width,
height=img_height,
mask='auto'
)
c.showPage()
c.save()