From 96557076a2502f7d0c1c629909cde365f7fd63c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jordi=20Pujol=20Ahull=C3=B3?= Date: Mon, 26 Oct 2020 16:57:25 +0100 Subject: [PATCH] MDL-70038 assign: add suport for pdftoppm tool pdftoppm (from poppler-utils package) is several orders of magnitude quicker than ghostscript extracting PNG images from PDF documents. We add support for this tool and use it whenever set up, or using gs as before if missing. Revisited code with peer review comments. In particular: 1. Make default path for pdftoppm empty string. 2. Fix a typo on the method description for get_gs_command_for_image. 3. Added more information why is useful pdftoppm into string pathtopdftoppm_help. 4. Check that path for pdftoppm is executable to prevent errors. Otherwise, use gs. --- admin/settings/server.php | 2 + lang/en/admin.php | 2 + mod/assign/feedback/editpdf/classes/pdf.php | 67 +++++++++++++++++---- 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/admin/settings/server.php b/admin/settings/server.php index beb70f3f974b2..179cc982abb14 100644 --- a/admin/settings/server.php +++ b/admin/settings/server.php @@ -38,6 +38,8 @@ new lang_string('pathtodot_help', 'admin'), '')); $temp->add(new admin_setting_configexecutable('pathtogs', new lang_string('pathtogs', 'admin'), new lang_string('pathtogs_help', 'admin'), '/usr/bin/gs')); + $temp->add(new admin_setting_configexecutable('pathtopdftoppm', new lang_string('pathtopdftoppm', 'admin'), + new lang_string('pathtopdftoppm_help', 'admin'), '')); $temp->add(new admin_setting_configexecutable('pathtopython', new lang_string('pathtopython', 'admin'), new lang_string('pathtopythondesc', 'admin'), '')); $ADMIN->add('server', $temp); diff --git a/lang/en/admin.php b/lang/en/admin.php index 6de019dca6525..d6617ed08ac5f 100644 --- a/lang/en/admin.php +++ b/lang/en/admin.php @@ -949,6 +949,8 @@ $string['pathtodu'] = 'Path to du'; $string['pathtogs'] = 'Path to ghostscript'; $string['pathtogs_help'] = 'On most Linux installs, this can be left as \'/usr/bin/gs\'. On Windows it will be something like \'c:\\gs\\bin\\gswin32c.exe\' (make sure there are no spaces in the path - if necessary copy the files \'gswin32c.exe\' and \'gsdll32.dll\' to a new folder without a space in the path)'; +$string['pathtopdftoppm'] = 'Path to pdftoppm'; +$string['pathtopdftoppm_help'] = '\'pdftoppm\' is a tool that converts PDF pages to PNG at least as fast as \'gs\' does. However, you will probably have a better performance when converting large documents. If present, \'pdftoppm\' will be used instead of \'gs\' for this task. On most Linux installs, this can be left as \'/usr/bin/pdftoppm\'. If not present, install the poppler-utils or poppler package, depending on the Linux distribution. On Windows it will be provided by Cygwin installs. See Poppler project for more details.'; $string['pathtopgdump'] = 'Path to pg_dump'; $string['pathtopgdumpdesc'] = 'This is only necessary to enter if you have more than one pg_dump on your system (for example if you have more than one version of postgresql installed)'; $string['pathtopgdumpinvalid'] = 'Invalid path to pg_dump - either wrong path or not executable'; diff --git a/mod/assign/feedback/editpdf/classes/pdf.php b/mod/assign/feedback/editpdf/classes/pdf.php index c3b6921088cc5..615845ee62f56 100644 --- a/mod/assign/feedback/editpdf/classes/pdf.php +++ b/mod/assign/feedback/editpdf/classes/pdf.php @@ -536,8 +536,6 @@ public function set_image_folder($folder) { * @return string the filename of the generated image */ public function get_image($pageno) { - global $CFG; - if (!$this->filename) { throw new \coding_exception('Attempting to generate a page image without first setting the PDF filename'); } @@ -560,15 +558,7 @@ public function get_image($pageno) { } if ($generate) { - // Use ghostscript to generate an image of the specified page. - $gsexec = \escapeshellarg($CFG->pathtogs); - $imageres = \escapeshellarg(100); - $imagefilearg = \escapeshellarg($imagefile); - $filename = \escapeshellarg($this->filename); - $pagenoinc = \escapeshellarg($pageno + 1); - $command = "$gsexec -q -sDEVICE=png16m -dSAFER -dBATCH -dNOPAUSE -r$imageres -dFirstPage=$pagenoinc -dLastPage=$pagenoinc ". - "-dDOINTERPOLATE -dGraphicsAlphaBits=4 -dTextAlphaBits=4 -sOutputFile=$imagefilearg $filename"; - + $command = $this->get_command_for_image($pageno, $imagefile); $output = null; $result = exec($command, $output); if (!file_exists($imagefile)) { @@ -585,6 +575,61 @@ public function get_image($pageno) { return self::IMAGE_PAGE . $pageno . '.png'; } + /** + * Gets the command to use to extract as image the given $pageno page number + * from a PDF document into the $imagefile file. + * @param int $pageno Page number to extract from document. + * @param string $imagefile Target filename for the PNG image as absolute path. + * @return string The command to use to extract a page as PNG image. + */ + private function get_command_for_image(int $pageno, string $imagefile): string { + global $CFG; + + // First, quickest convertion option. + if (!empty($CFG->pathtopdftoppm) && is_executable($CFG->pathtopdftoppm)) { + return $this->get_pdftoppm_command_for_image($pageno, $imagefile); + } + + // Otherwise, rely on default behaviour. + return $this->get_gs_command_for_image($pageno, $imagefile); + } + + /** + * Gets the pdftoppm command to use to extract as image the given $pageno page number + * from a PDF document into the $imagefile file. + * @param int $pageno Page number to extract from document. + * @param string $imagefile Target filename for the PNG image as absolute path. + * @return string The pdftoppm command to use to extract a page as PNG image. + */ + private function get_pdftoppm_command_for_image(int $pageno, string $imagefile): string { + global $CFG; + $pdftoppmexec = \escapeshellarg($CFG->pathtopdftoppm); + $imageres = \escapeshellarg(100); + $imagefile = substr($imagefile, 0, -4); // Pdftoppm tool automatically adds extension file. + $imagefilearg = \escapeshellarg($imagefile); + $filename = \escapeshellarg($this->filename); + $pagenoinc = \escapeshellarg($pageno + 1); + return "$pdftoppmexec -q -r $imageres -f $pagenoinc -l $pagenoinc -png -singlefile $filename $imagefilearg"; + } + + /** + * Gets the ghostscript (gs) command to use to extract as image the given $pageno page number + * from a PDF document into the $imagefile file. + * @param int $pageno Page number to extract from document. + * @param string $imagefile Target filename for the PNG image as absolute path. + * @return string The ghostscript (gs) command to use to extract a page as PNG image. + */ + private function get_gs_command_for_image(int $pageno, string $imagefile): string { + global $CFG; + $gsexec = \escapeshellarg($CFG->pathtogs); + $imageres = \escapeshellarg(100); + $imagefilearg = \escapeshellarg($imagefile); + $filename = \escapeshellarg($this->filename); + $pagenoinc = \escapeshellarg($pageno + 1); + return "$gsexec -q -sDEVICE=png16m -dSAFER -dBATCH -dNOPAUSE -r$imageres -dFirstPage=$pagenoinc -dLastPage=$pagenoinc ". + "-dDOINTERPOLATE -dGraphicsAlphaBits=4 -dTextAlphaBits=4 -sOutputFile=$imagefilearg $filename"; + } + /** * Check to see if PDF is version 1.4 (or below); if not: use ghostscript to convert it *