Skip to content

Commit

Permalink
New XML parser
Browse files Browse the repository at this point in the history
  • Loading branch information
yurydelendik committed Mar 20, 2018
1 parent 6662985 commit 655c8d3
Show file tree
Hide file tree
Showing 3 changed files with 382 additions and 133 deletions.
127 changes: 0 additions & 127 deletions src/display/dom_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,132 +135,6 @@ class DOMSVGFactory {
}
}

class SimpleDOMNode {
constructor(nodeName, nodeValue) {
this.nodeName = nodeName;
this.nodeValue = nodeValue;

Object.defineProperty(this, 'parentNode', { value: null, writable: true, });
}

get firstChild() {
return this.childNodes[0];
}

get nextSibling() {
let index = this.parentNode.childNodes.indexOf(this);
return this.parentNode.childNodes[index + 1];
}

get textContent() {
if (!this.childNodes) {
return this.nodeValue || '';
}
return this.childNodes.map(function(child) {
return child.textContent;
}).join('');
}

hasChildNodes() {
return this.childNodes && this.childNodes.length > 0;
}
}

class SimpleXMLParser {
parseFromString(data) {
let nodes = [];

// Remove all comments and processing instructions.
data = data.replace(/<\?[\s\S]*?\?>|<!--[\s\S]*?-->/g, '').trim();
data = data.replace(/<!DOCTYPE[^>\[]+(\[[^\]]+)?[^>]+>/g, '').trim();

// Extract all text nodes and replace them with a numeric index in
// the nodes.
data = data.replace(/>([^<][\s\S]*?)</g, (all, text) => {
let length = nodes.length;
let node = new SimpleDOMNode('#text', this._decodeXML(text));
nodes.push(node);
if (node.textContent.trim().length === 0) {
return '><'; // Ignore whitespace.
}
return '>' + length + ',<';
});

// Extract all CDATA nodes.
data = data.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g,
function(all, text) {
let length = nodes.length;
let node = new SimpleDOMNode('#text', text);
nodes.push(node);
return length + ',';
});

// Until nodes without '<' and '>' content are present, replace them
// with a numeric index in the nodes.
let regex =
/<([\w\:]+)((?:[\s\w:=]|'[^']*'|"[^"]*")*)(?:\/>|>([\d,]*)<\/[^>]+>)/g;
let lastLength;
do {
lastLength = nodes.length;
data = data.replace(regex, function(all, name, attrs, data) {
let length = nodes.length;
let node = new SimpleDOMNode(name);
let children = [];
if (data) {
data = data.split(',');
data.pop();
data.forEach(function(child) {
let childNode = nodes[+child];
childNode.parentNode = node;
children.push(childNode);
});
}

node.childNodes = children;
nodes.push(node);
return length + ',';
});
} while (lastLength < nodes.length);

// We should only have one root index left, which will be last in the nodes.
return {
documentElement: nodes.pop(),
};
}

_decodeXML(text) {
if (!text.includes('&')) {
return text;
}

return text.replace(/&(#(x[0-9a-f]+|\d+)|\w+);/gi,
function(all, entityName, number) {
if (number) {
if (number[0] === 'x') {
number = parseInt(number.substring(1), 16);
} else {
number = +number;
}
return String.fromCharCode(number);
}

switch (entityName) {
case 'amp':
return '&';
case 'lt':
return '<';
case 'gt':
return '>';
case 'quot':
return '\"';
case 'apos':
return '\'';
}
return '&' + entityName + ';';
});
}
}

var RenderingCancelledException = (function RenderingCancelledException() {
function RenderingCancelledException(msg, type) {
this.message = msg;
Expand Down Expand Up @@ -411,7 +285,6 @@ export {
DOMCanvasFactory,
DOMCMapReaderFactory,
DOMSVGFactory,
SimpleXMLParser,
StatTimer,
DummyStatTimer,
};
14 changes: 8 additions & 6 deletions src/display/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
*/

import { assert } from '../shared/util';
import { SimpleXMLParser } from './dom_utils';
import { SimpleXMLParser } from './xml_parser';

class Metadata {
constructor(data) {
Expand All @@ -23,13 +23,15 @@ class Metadata {
// Ghostscript may produce invalid metadata, so try to repair that first.
data = this._repair(data);

// Convert the string to a DOM `Document`.
// Convert the string to an XML document.
let parser = new SimpleXMLParser();
data = parser.parseFromString(data);
const xmlDocument = parser.parseFromString(data);

this._metadata = Object.create(null);

this._parse(data);
if (xmlDocument) {
this._parse(xmlDocument);
}
}

_repair(data) {
Expand Down Expand Up @@ -68,8 +70,8 @@ class Metadata {
});
}

_parse(domDocument) {
let rdf = domDocument.documentElement;
_parse(xmlDocument) {
let rdf = xmlDocument.documentElement;

if (rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in <xmpmeta>
rdf = rdf.firstChild;
Expand Down
Loading

0 comments on commit 655c8d3

Please sign in to comment.