Skip to content

Commit

Permalink
MDL-52954 core: Add a document converter to file_storage
Browse files Browse the repository at this point in the history
This lets us convert between common office formats. E.g. docx -> pdf
html -> pdf, html -> ods.

This commit also updates assignment editpdf plugin to use this converter
on all compatible submission files.
  • Loading branch information
Damyon Wiese committed Mar 30, 2016
1 parent 1fca8a7 commit 2e76c14
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 4 deletions.
1 change: 1 addition & 0 deletions admin/settings/server.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
$temp->add(new admin_setting_configexecutable('aspellpath', new lang_string('aspellpath', 'admin'), new lang_string('edhelpaspellpath'), ''));
$temp->add(new admin_setting_configexecutable('pathtodot', new lang_string('pathtodot', 'admin'), new lang_string('pathtodot_help', 'admin'), ''));
$temp->add(new admin_setting_configexecutable('pathtogs', new lang_string('pathtogs', 'admin'), new lang_string('pathtogs_help', 'admin'), '/usr/bin/gs'));
$temp->add(new admin_setting_configexecutable('pathtopandoc', new lang_string('pathtopandoc', 'admin'), new lang_string('pathtopandoc_help', 'admin'), '/usr/bin/pandoc'));
$ADMIN->add('server', $temp);


Expand Down
6 changes: 6 additions & 0 deletions config-dist.php
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,12 @@
// Note that, for now, this only used by the profiling features
// (Development->Profiling) built into Moodle.
// $CFG->pathtodot = '';
//
// Path to pandoc.
// Probably something like /usr/bin/pandoc. Used to convert between document formats.
// It is recommended to install the latest stable release of pandoc.
// Download packages for all platforms are available from http://pandoc.org/
// $CFG->pathtopandoc = '';

//=========================================================================
// ALL DONE! To continue installation, visit your main page with a browser
Expand Down
2 changes: 2 additions & 0 deletions lang/en/admin.php
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@
$string['passwordresettime'] = 'Maximum time to validate password reset request';
$string['passwordreuselimit'] = 'Password rotation limit';
$string['passwordreuselimit_desc'] = 'Number of times a user must change their password before they are allowed to reuse a password. Hashes of previously used passwords are stored in local database table. This feature might not be compatible with some external authentication plugins.';
$string['pathtopandoc'] = 'Path to pandoc document converter';
$string['pathtopandoc_help'] = 'Path to pandoc document converter. This is an executable that is capable of converting between document formats. This is optional, but if specified, Moodle will be able to perform automated conversion of documents from a wide range of file formats. This is used by the Assignment module "Annotate PDF" feature.';
$string['pathtodot'] = 'Path to dot';
$string['pathtodot_help'] = 'Path to dot. Probably something like /usr/bin/dot. To be able to generate graphics from DOT files, you must have installed the dot executable and point to it here. Note that, for now, this only used by the profiling features (Development->Profiling) built into Moodle.';
$string['pathtodu'] = 'Path to du';
Expand Down
2 changes: 1 addition & 1 deletion lib/behat/lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ function behat_clean_init_config() {
'umaskpermissions', 'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix',
'dboptions', 'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword',
'proxybypass', 'theme', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot', 'skiplangupgrade',
'altcacheconfigpath'
'altcacheconfigpath', 'pathtopandoc'
));

// Add extra allowed settings.
Expand Down
142 changes: 142 additions & 0 deletions lib/filestorage/file_storage.php
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,128 @@ public function get_file_instance(stdClass $filerecord) {
return $storedfile;
}

/**
* Get converted document.
*
* Get an alternate version of the specified document, if it is possible to convert.
*
* @param stored_file $file the file we want to preview
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
* @return stored_file|bool false if unable to create the conversion, stored file otherwise
*/
public function get_converted_document(stored_file $file, $format) {

$context = context_system::instance();
$path = '/' . $format . '/';
$conversion = $this->get_file($context->id, 'core', 'documentconversion', 0, $path, $file->get_contenthash());

if (!$conversion) {
$conversion = $this->create_converted_document($file, $format);
if (!$conversion) {
return false;
}
}

return $conversion;
}

/**
* Verify the format is supported.
*
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
* @return bool - True if the format is supported for input.
*/
protected function is_input_format_supported_by_pandoc($format) {
$sanitized = trim(strtolower($format));
return in_array($sanitized, array('md', 'html', 'tex', 'docx', 'odt', 'epub', 'png', 'jpg', 'gif'));
}

/**
* Verify the format is supported.
*
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
* @return bool - True if the format is supported for output.
*/
protected function is_output_format_supported_by_pandoc($format) {
$sanitized = trim(strtolower($format));
return in_array($sanitized, array('md', 'pdf', 'html', 'tex', 'docx', 'odt', 'odf', 'epub'));
}

/**
* Perform a file format conversion on the specified document.
*
* @param stored_file $file the file we want to preview
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
* @return stored_file|bool false if unable to create the conversion, stored file otherwise
*/
protected function create_converted_document(stored_file $file, $format) {
global $CFG;

if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) {
// No conversions are possible, sorry.
return false;
}

$fileextension = strtolower(pathinfo($file->get_filename(), PATHINFO_EXTENSION));
if (!self::is_input_format_supported_by_pandoc($fileextension)) {
return false;
}

if (!self::is_output_format_supported_by_pandoc($format)) {
return false;
}

// Copy the file to the local tmp dir.
$tmp = make_request_directory();
$localfilename = $file->get_filename();
// Safety.
$localfilename = clean_param($localfilename, PARAM_FILE);

$filename = $tmp . '/' . $localfilename;
$file->copy_content_to($filename);

if (in_array($fileextension, array('gif', 'jpg', 'png'))) {
// We wrap images in a tiny html file - pandoc will generate documents from them.
$htmlwrapperfile = $tmp . '/wrapper.html';

file_put_contents($htmlwrapperfile, "<html><body><img src=\"$localfilename\"></body></html>");

$filename = $htmlwrapperfile;
}

$newtmpfile = pathinfo($filename, PATHINFO_FILENAME) . '.' . $format;

// Safety.
$newtmpfile = $tmp . '/' . clean_param($newtmpfile, PARAM_FILE);

$cmd = escapeshellcmd(trim($CFG->pathtopandoc)) . ' ' .
escapeshellarg('-o') . ' ' .
escapeshellarg($newtmpfile) . ' ' .
escapeshellarg($filename);

$e = file_exists($filename);
$output = null;
$currentdir = getcwd();
chdir($tmp);
$result = exec($cmd, $output);
chdir($currentdir);
if (!file_exists($newtmpfile)) {
return false;
}

$context = context_system::instance();
$record = array(
'contextid' => $context->id,
'component' => 'core',
'filearea' => 'documentconversion',
'itemid' => 0,
'filepath' => '/' . $format . '/',
'filename' => $file->get_contenthash(),
);

return $this->create_file_from_pathname($record, $newtmpfile);
}

/**
* Returns an image file that represent the given stored file as a preview
*
Expand Down Expand Up @@ -2282,6 +2404,26 @@ public function cron() {
$rs->close();
mtrace('done.');

// remove orphaned converted files (that is files in the core documentconversion filearea without
// the existing original file)
mtrace('Deleting orphaned document conversion files... ', '');
cron_trace_time_and_memory();
$sql = "SELECT p.*
FROM {files} p
LEFT JOIN {files} o ON (p.filename = o.contenthash)
WHERE p.contextid = ? AND p.component = 'core' AND p.filearea = 'documentconversion' AND p.itemid = 0
AND o.id IS NULL";
$syscontext = context_system::instance();
$rs = $DB->get_recordset_sql($sql, array($syscontext->id));
foreach ($rs as $orphan) {
$file = $this->get_file_instance($orphan);
if (!$file->is_directory()) {
$file->delete();
}
}
$rs->close();
mtrace('done.');

// remove trash pool files once a day
// if you want to disable purging of trash put $CFG->fileslastcleanup=time(); into config.php
if (empty($CFG->fileslastcleanup) or $CFG->fileslastcleanup < time() - 60*60*24) {
Expand Down
3 changes: 2 additions & 1 deletion lib/phpunit/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@
$allowed = array('wwwroot', 'dataroot', 'dirroot', 'admin', 'directorypermissions', 'filepermissions',
'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix', 'dboptions',
'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword', 'proxybypass', // keep proxy settings from config.php
'altcacheconfigpath', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot'
'altcacheconfigpath', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot',
'pathtopandoc'
);
$productioncfg = (array)$CFG;
$CFG = new stdClass();
Expand Down
59 changes: 57 additions & 2 deletions mod/assign/feedback/editpdf/classes/document_services.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

namespace assignfeedback_editpdf;

use DOMDocument;

/**
* Functions for generating the annotated pdf.
*
Expand All @@ -40,6 +42,8 @@ class document_services {
const FINAL_PDF_FILEAREA = 'download';
/** File area for combined pdf */
const COMBINED_PDF_FILEAREA = 'combined';
/** File area for importing html */
const IMPORT_HTML_FILEAREA = 'importhtml';
/** File area for page images */
const PAGE_IMAGE_FILEAREA = 'pages';
/** File area for readonly page images */
Expand Down Expand Up @@ -84,6 +88,32 @@ private static function hash($assignment, $userid, $attemptnumber) {
return sha1($assignmentid . '_' . $userid . '_' . $attemptnumber);
}

/**
* Use a DOM parser to accurately replace images with their alt text.
* @param string $html
* @return string New html with no image tags.
*/
protected static function strip_images($html) {
$dom = new DOMDocument();
$dom->loadHTML($html);
$images = $dom->getElementsByTagName('img');
$i = 0;

for ($i = ($images->length - 1); $i >= 0; $i--) {
$node = $images->item($i);

if ($node->hasAttribute('alt')) {
$replacement = ' [ ' . $node->getAttribute('alt') . ' ] ';
} else {
$replacement = ' ';
}

$text = $dom->createTextNode($replacement);
$node->parentNode->replaceChild($text, $node);
}
return $dom->saveHTML();
}

/**
* This function will search for all files that can be converted
* and concatinated into a PDF (1.4) - for any submission plugin
Expand Down Expand Up @@ -116,13 +146,38 @@ public static function list_compatible_submission_files_for_attempt($assignment,
if (!$submission) {
return $files;
}

$fs = get_file_storage();
// Ask each plugin for it's list of files.
foreach ($assignment->get_submission_plugins() as $plugin) {
if ($plugin->is_enabled() && $plugin->is_visible()) {
$pluginfiles = $plugin->get_files($submission, $user);
foreach ($pluginfiles as $filename => $file) {
if (($file instanceof \stored_file) && ($file->get_mimetype() === 'application/pdf')) {
$files[$filename] = $file;
if ($file instanceof \stored_file) {
if ($file->get_mimetype() === 'application/pdf') {
$files[$filename] = $file;
} else if ($convertedfile = $fs->get_converted_document($file, 'pdf')) {
$files[$filename] = $convertedfile;
}
} else {
// Create a tmp stored_file from this html string.
$file = reset($file);
// Strip image tags, because they will not be resolvable.
$file = self::strip_images($file);
$record = new \stdClass();
$record->contextid = $assignment->get_context()->id;
$record->component = 'assignfeedback_editpdf';
$record->filearea = self::IMPORT_HTML_FILEAREA;
$record->itemid = $submission->id;
$record->filepath = '/';
$record->filename = $plugin->get_type() . '-' . $filename;

$htmlfile = $fs->create_file_from_string($record, $file);
$convertedfile = $fs->get_converted_document($htmlfile, 'pdf');
$htmlfile->delete();
if ($convertedfile) {
$files[$filename] = $convertedfile;
}
}
}
}
Expand Down

0 comments on commit 2e76c14

Please sign in to comment.