diff --git a/src/lib/Parser.ts b/src/lib/Parser.ts
index 7e884e9..ab7a103 100644
--- a/src/lib/Parser.ts
+++ b/src/lib/Parser.ts
@@ -9,7 +9,6 @@ import { XmlText } from './XmlText.js';
import type { XmlNode } from './XmlNode.js';
-
const emptyString = '';
/**
@@ -29,11 +28,18 @@ export class Parser {
* @param options Parser options.
*/
constructor(xml: string, options: ParserOptions = {}) {
- this.document = new XmlDocument();
- this.currentNode = this.document;
+ let doc = this.document = new XmlDocument();
+ let scanner = this.scanner = new StringScanner(xml);
+
+ this.currentNode = doc;
this.options = options;
- this.scanner = new StringScanner(normalizeXmlString(xml));
+ if (this.options.includeOffsets) {
+ doc.start = 0;
+ doc.end = xml.length;
+ }
+
+ scanner.consumeStringFast('\uFEFF'); // byte order mark
this.consumeProlog();
if (!this.consumeElement()) {
@@ -42,7 +48,7 @@ export class Parser {
while (this.consumeMisc()) {} // eslint-disable-line no-empty
- if (!this.scanner.isEnd) {
+ if (!scanner.isEnd) {
throw this.error('Extra content at the end of the document');
}
}
@@ -50,9 +56,14 @@ export class Parser {
/**
* Adds the given `XmlNode` as a child of `this.currentNode`.
*/
- addNode(node: XmlNode) {
+ addNode(node: XmlNode, charIndex: number) {
node.parent = this.currentNode;
+ if (this.options.includeOffsets) {
+ node.start = this.scanner.charIndexToByteIndex(charIndex);
+ node.end = this.scanner.charIndexToByteIndex();
+ }
+
// @ts-expect-error: XmlDocument has a more limited set of possible children
// than XmlElement so TypeScript is unhappy, but we always do the right
// thing.
@@ -63,10 +74,12 @@ export class Parser {
* Adds the given _text_ to the document, either by appending it to a
* preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
*/
- addText(text: string) {
+ addText(text: string, charIndex: number) {
let { children } = this.currentNode;
let { length } = children;
+ text = normalizeLineBreaks(text);
+
if (length > 0) {
let prevNode = children[length - 1];
@@ -74,11 +87,16 @@ export class Parser {
// The previous node is a text node, so we can append to it and avoid
// creating another node.
prevNode.text += text;
+
+ if (this.options.includeOffsets) {
+ prevNode.end = this.scanner.charIndexToByteIndex();
+ }
+
return;
}
}
- this.addNode(new XmlText(text));
+ this.addNode(new XmlText(text), charIndex);
}
/**
@@ -199,6 +217,7 @@ export class Parser {
*/
consumeCdataSection(): boolean {
let { scanner } = this;
+ let startIndex = scanner.charIndex;
if (!scanner.consumeStringFast('`');
}
- this.addText(charData);
+ this.addText(charData, startIndex);
return true;
}
@@ -252,6 +272,7 @@ export class Parser {
*/
consumeComment(): boolean {
let { scanner } = this;
+ let startIndex = scanner.charIndex;
if (!scanner.consumeStringFast('`, { preserveComments: true });
+ assert.strictEqual(root.children[0].start, -1);
+ });
+ });
+
+ describe('end', () => {
+ it('is `-1`', () => {
+ let { root } = parseXml(``, { preserveComments: true });
+ assert.strictEqual(root.children[0].end, -1);
+ });
+ });
+ });
+
+ describe('when `options.includeOffsets` is `true`', () => {
+ describe('start', () => {
+ it('is the starting byte offset of the comment', () => {
+ let { root } = parseXml(``, { includeOffsets: true, preserveComments: true });
+ assert.strictEqual(root.children[0].start, 6);
+ });
+ });
+
+ describe('end', () => {
+ it('is the ending byte offset of the comment', () => {
+ let { root } = parseXml(``, { includeOffsets: true, preserveComments: true });
+ assert.strictEqual(root.children[0].end, 29);
+ });
+ });
+ });
+
describe('parent', () => {
it('is the parent node', () => {
let { root } = parseXml(``, { preserveComments: true });
diff --git a/tests/lib/XmlDocument.test.js b/tests/lib/XmlDocument.test.js
index 21eafc7..86bb755 100644
--- a/tests/lib/XmlDocument.test.js
+++ b/tests/lib/XmlDocument.test.js
@@ -39,6 +39,38 @@ describe('XmlDocument', () => {
});
});
+ describe('when `options.includeOffsets` is `false`', () => {
+ describe('start', () => {
+ it('is `-1`', () => {
+ let doc = parseXml('');
+ assert.strictEqual(doc.start, -1);
+ });
+ });
+
+ describe('end', () => {
+ it('is `-1`', () => {
+ let doc = parseXml('');
+ assert.strictEqual(doc.end, -1);
+ });
+ });
+ });
+
+ describe('when `options.includeOffsets` is `true`', () => {
+ describe('start', () => {
+ it('is the starting byte offset of the document', () => {
+ let doc = parseXml('', { includeOffsets: true });
+ assert.strictEqual(doc.start, 0);
+ });
+ });
+
+ describe('end', () => {
+ it('is the ending byte offset of the document', () => {
+ let doc = parseXml('', { includeOffsets: true });
+ assert.strictEqual(doc.end, 8);
+ });
+ });
+ });
+
describe('parent', () => {
it('is `null`', () => {
assert.strictEqual(parseXml('').parent, null);
diff --git a/tests/lib/XmlElement.test.js b/tests/lib/XmlElement.test.js
index 05eb751..7443e7c 100644
--- a/tests/lib/XmlElement.test.js
+++ b/tests/lib/XmlElement.test.js
@@ -129,6 +129,42 @@ describe('XmlElement', () => {
});
});
+ describe('when `options.includeOffsets` is `false`', () => {
+ describe('start', () => {
+ it('is `-1`', () => {
+ let { root } = parseXml('');
+ assert.strictEqual(root.start, -1);
+ });
+ });
+
+ describe('end', () => {
+ it('is `-1`', () => {
+ let { root } = parseXml('');
+ assert.strictEqual(root.end, -1);
+ });
+ });
+ });
+
+ describe('when `options.includeOffsets` is `true`', () => {
+ describe('start', () => {
+ it('is the starting byte offset of the element', () => {
+ let { root } = parseXml('', { includeOffsets: true });
+ assert.strictEqual(root.start, 0);
+ assert.strictEqual(root.children[0].start, 3);
+ assert.strictEqual(root.children[0].children[0].start, 6);
+ });
+ });
+
+ describe('end', () => {
+ it('is the ending byte offset of the element', () => {
+ let { root } = parseXml('', { includeOffsets: true });
+ assert.strictEqual(root.end, 19);
+ assert.strictEqual(root.children[0].end, 15);
+ assert.strictEqual(root.children[0].children[0].end, 11);
+ });
+ });
+ });
+
describe('parent', () => {
describe('when the element is the root element', () => {
it('is the document', () => {
diff --git a/tests/lib/XmlNode.test.js b/tests/lib/XmlNode.test.js
index c79ff99..39a6a1f 100644
--- a/tests/lib/XmlNode.test.js
+++ b/tests/lib/XmlNode.test.js
@@ -15,6 +15,28 @@ describe('XmlNode', () => {
});
});
+ describe('toJSON()', () => {
+ describe('when `start` is `-1`', () => {
+ it('doesn\'t include the `start` or `end` properties', () => {
+ let json = new XmlNode().toJSON();
+ assert.strictEqual(json.start, undefined);
+ assert.strictEqual(json.end, undefined);
+ });
+ });
+
+ describe('when `start` is greater than -1', () => {
+ it('includes the `start` and `end` properties', () => {
+ let node = new XmlNode();
+ node.start = 0;
+ node.end = 3;
+
+ let json = node.toJSON();
+ assert.strictEqual(json.start, 0);
+ assert.strictEqual(json.end, 3);
+ });
+ });
+ });
+
describe('type', () => {
it('is an empty string', () => {
let node = new XmlNode();
diff --git a/tests/lib/XmlProcessingInstruction.test.js b/tests/lib/XmlProcessingInstruction.test.js
index 681605c..ab8a24a 100644
--- a/tests/lib/XmlProcessingInstruction.test.js
+++ b/tests/lib/XmlProcessingInstruction.test.js
@@ -30,6 +30,38 @@ describe('XmlProcessingInstruction', () => {
});
});
+ describe('when `options.includeOffsets` is `false`', () => {
+ describe('start', () => {
+ it('is `-1`', () => {
+ let { root } = parseXml('');
+ assert.strictEqual(root.children[0].start, -1);
+ });
+ });
+
+ describe('end', () => {
+ it('is `-1`', () => {
+ let { root } = parseXml('');
+ assert.strictEqual(root.children[0].end, -1);
+ });
+ });
+ });
+
+ describe('when `options.includeOffsets` is `true`', () => {
+ describe('start', () => {
+ it('is the starting byte offset of the processing instruction', () => {
+ let { root } = parseXml('', { includeOffsets: true });
+ assert.strictEqual(root.children[0].start, 6);
+ });
+ });
+
+ describe('end', () => {
+ it('is the ending byte offset of the processing instruction', () => {
+ let { root } = parseXml('', { includeOffsets: true });
+ assert.strictEqual(root.children[0].end, 13);
+ });
+ });
+ });
+
describe('parent', () => {
it('is the parent element', () => {
let { root } = parseXml('');
diff --git a/tests/lib/XmlText.test.js b/tests/lib/XmlText.test.js
index 4a5cb7a..ef86860 100644
--- a/tests/lib/XmlText.test.js
+++ b/tests/lib/XmlText.test.js
@@ -23,6 +23,43 @@ describe('XmlText', () => {
});
});
+ describe('when `options.includeOffsets` is `false`', () => {
+ describe('start', () => {
+ it('is `-1`', () => {
+ let { root } = parseXml(' foo & bar\r\nbaz ');
+ assert.strictEqual(root.children[0].start, -1);
+ });
+ });
+
+ describe('end', () => {
+ it('is `-1`', () => {
+ let { root } = parseXml(' foo & bar\r\nbaz ');
+ assert.strictEqual(root.children[0].end, -1);
+ });
+ });
+ });
+
+ describe('when `options.includeOffsets` is `true`', () => {
+ describe('start', () => {
+ it('is the starting byte offset of the text node', () => {
+ let { root } = parseXml(' foo ', { includeOffsets: true });
+ assert.strictEqual(root.children[0].start, 6);
+ });
+ });
+
+ describe('end', () => {
+ it('is the ending byte offset of the text node', () => {
+ let { root } = parseXml(' foo ', { includeOffsets: true });
+ assert.strictEqual(root.children[0].end, 11);
+ });
+
+ it('is correct after multiple text nodes have been merged', () => {
+ let { root } = parseXml('one&twothree', { includeOffsets: true });
+ assert.strictEqual(root.children[0].end, 38);
+ });
+ });
+ });
+
describe('text', () => {
it('is the text content of the text node', () => {
let { root } = parseXml(' foo & bar\r\nbaz ');