forked from chaonan99/ppt_presenter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pptx2mp4.py
executable file
·149 lines (124 loc) · 5.67 KB
/
pptx2mp4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3.10
# -*- coding: utf-8 -*-
import os
import tempfile
import argparse
import subprocess
import shutil
from subprocess import call
from PIL import Image
from pdf2image import convert_from_path
from gtts import gTTS
from pptx import Presentation
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
import site
__author__ = ['chaonan99','Mathieu']
def ppt_presenter(pptx_path, language, use_gtts, output_path):
if use_gtts:
print("Using gtts\n")
else:
print("Using TTS\n")
location = site.getsitepackages()[0]
path = location + "/TTS/.models.json"
model_manager = ModelManager(path)
if language == "fr":
model_path, config_path, model_item = model_manager.download_model("tts_models/fr/css10/vits")
syn = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
)
else:
model_path, config_path, model_item = model_manager.download_model("tts_models/en/ljspeech/tacotron2-DDC")
voc_path, voc_config_path, _ = model_manager.download_model(model_item["default_vocoder"])
syn = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
vocoder_checkpoint=voc_path,
vocoder_config=voc_config_path
)
with tempfile.TemporaryDirectory(dir=os.getcwd()) as temp_path:
filename = os.path.splitext(os.path.basename(pptx_path))[0]
pdf_path = os.path.join(temp_path, filename + '.pdf')
print('Converting pptx to pdf: ' + pdf_path + '\n')
call(['soffice', '--headless', '--convert-to', 'pdf', '--outdir' , temp_path, '--convert-images-to', 'jpg', pptx_path])
print('pdf created\n')
# temp_path = "./temp"
images_from_path = convert_from_path(pdf_path)
prs = Presentation(pptx_path)
assert len(images_from_path) == len(prs.slides)
count = len(prs.slides)
for i, (slide, image) in enumerate(zip(prs.slides, images_from_path)):
print('Processing {}/{}\n'.format(i + 1, count))
if slide.has_notes_slide:
image_path = os.path.join(temp_path, 'frame_{}.jpg'.format(i))
image.save(image_path)
make_jpeg_even(image_path)
notes = slide.notes_slide.notes_text_frame.text
if use_gtts:
audio_path = os.path.join(temp_path, 'frame_{}.mp3'.format(i))
tts = gTTS(text=notes, lang=language)
tts.save(audio_path)
else:
audio_path = os.path.join(temp_path, 'frame_{}.wav'.format(i))
outputs = syn.tts(notes)
syn.save_wav(outputs, audio_path)
ffmpeg_call(image_path, audio_path, temp_path, i)
print('Concatenating videos...\n')
video_list = [os.path.join(temp_path, 'frame_{}.ts'.format(i)) \
for i in range(len(images_from_path))]
video_list_str = 'concat:' + '|'.join(video_list)
ffmpeg_concat(video_list_str, output_path)
print('Generation completed\n')
def make_jpeg_even(image_path):
img = Image.open(image_path)
# Crop image to have even dimensions
even_width = img.size[0] - (img.size[0] % 2)
even_height = img.size[1] - (img.size[1] % 2)
img = img.crop((0, 0, even_width, even_height))
# Save cropped image
img.save(image_path)
def ffmpeg_call(image_path, audio_path, temp_path, i):
out_path_mp4 = os.path.join(temp_path, 'frame_{}.mp4'.format(i))
out_path_ts = os.path.join(temp_path, 'frame_{}.ts'.format(i))
print('Checking audio duration...\n')
duration = subprocess.check_output([
'ffprobe',
'-i', audio_path,
'-show_entries',
'format=duration',
'-v', 'quiet',
'-of', 'csv=p=0'])
duration = float(duration)
print("Audio duration: " + str(duration) + "\n")
call(['ffmpeg', '-loop', '1', '-y', '-i', image_path, '-i', audio_path,
'-c:v', 'libx264', '-tune', 'stillimage', '-c:a', 'aac',
'-b:a', '192k', '-pix_fmt', 'yuv420p', '-t', str(duration) , out_path_mp4])
print('Converting to ts...\n')
call(['ffmpeg', '-y', '-i', out_path_mp4, '-c', 'copy',
'-bsf:v', 'h264_mp4toannexb', '-f', 'mpegts', out_path_ts])
def ffmpeg_concat(video_list_str, out_path):
call(['ffmpeg', '-y', '-f', 'mpegts', '-i', '{}'.format(video_list_str),
'-c', 'copy', '-bsf:a', 'aac_adtstoasc', out_path])
def is_executable_in_path(executable_name):
return shutil.which(executable_name) is not None
def main():
parser = argparse.ArgumentParser(description='PPTX 2 mp4 help.')
parser.add_argument('-i', '--input', help='input pptx path')
parser.add_argument('-l', '--lang', help='text language (fr, en, etc...)')
parser.add_argument('-o', '--output', help='output path')
parser.add_argument('-t', '--tts', help='Text to speech engine to use (gtts or tts). Default is tts (CoquiTTS)', required=False, default='tts')
args = parser.parse_args()
error = False
if not is_executable_in_path('soffice'):
print('Please, install LibreOffice and ensure "soffice" is in your path.\n')
error = True
if not is_executable_in_path('ffmpeg'):
print('Please, install ffmpeg and ensure "ffmpeg" is in your path.\n')
error = True
if error:
print('Error: missing software. Please install required software and try again.\n')
exit(1)
ppt_presenter(args.input, args.lang, args.tts == 'gtts', args.output)
if __name__ == '__main__':
main()