From 0c816dd03a058be11ccb589df942288ece835749 Mon Sep 17 00:00:00 2001 From: Colin O'Dell Date: Sun, 7 Feb 2016 07:40:54 -0500 Subject: [PATCH] Fix tab handling This commit mirrors several changes from jgm/commonmark.js: - Fix tabs in list indentation (jgm/commonmark.js#86) - Fix handling of partially-consumed tabs - Proper tab handling with blockquotes, fenced code, lists --- src/Block/Element/BlockQuote.php | 4 +- src/Block/Parser/BlockQuoteParser.php | 4 +- src/Block/Parser/ListParser.php | 13 +++--- src/Cursor.php | 59 +++++++++++++++++++-------- tests/unit/CursorTest.php | 11 +++-- 5 files changed, 58 insertions(+), 33 deletions(-) diff --git a/src/Block/Element/BlockQuote.php b/src/Block/Element/BlockQuote.php index 21d2c8fe38..c212653ac3 100644 --- a/src/Block/Element/BlockQuote.php +++ b/src/Block/Element/BlockQuote.php @@ -55,9 +55,7 @@ public function matchesNextLine(Cursor $cursor) if (!$cursor->isIndented() && $cursor->getFirstNonSpaceCharacter() === '>') { $cursor->advanceToFirstNonSpace(); $cursor->advance(); - if ($cursor->getCharacter() === ' ') { - $cursor->advance(); - } + $cursor->advanceBySpaceOrTab(); return true; } diff --git a/src/Block/Parser/BlockQuoteParser.php b/src/Block/Parser/BlockQuoteParser.php index 17593270e4..c7615af315 100644 --- a/src/Block/Parser/BlockQuoteParser.php +++ b/src/Block/Parser/BlockQuoteParser.php @@ -38,9 +38,7 @@ public function parse(ContextInterface $context, Cursor $cursor) $cursor->advanceToFirstNonSpace(); $cursor->advance(); - if ($cursor->getCharacter() === ' ') { - $cursor->advance(); - } + $cursor->advanceBySpaceOrTab(); $context->addBlock(new BlockQuote()); diff --git a/src/Block/Parser/ListParser.php b/src/Block/Parser/ListParser.php index 24319a1c68..38107412d0 100644 --- a/src/Block/Parser/ListParser.php +++ b/src/Block/Parser/ListParser.php @@ -91,19 +91,18 @@ private function calculateListMarkerPadding(Cursor $cursor, $markerLength) $start = $cursor->saveState(); $spacesStartCol = $cursor->getColumn(); - do { - $cursor->advanceBy(1, true); - $nextChar = $cursor->getCharacter(); - } while ($cursor->getColumn() - $spacesStartCol < 5 && ($nextChar === ' ' || $nextChar === "\t")); + while ($cursor->getColumn() - $spacesStartCol < 5) { + if (!$cursor->advanceBySpaceOrTab()) { + break; + } + } $blankItem = $cursor->peek() === null; $spacesAfterMarker = $cursor->getColumn() - $spacesStartCol; if ($spacesAfterMarker >= 5 || $spacesAfterMarker < 1 || $blankItem) { $cursor->restoreState($start); - if ($cursor->peek() === ' ') { - $cursor->advanceBy(1, true); - } + $cursor->advanceBySpaceOrTab(); return $markerLength + 1; } diff --git a/src/Cursor.php b/src/Cursor.php index 411f9bb6ca..ca941a0e3b 100644 --- a/src/Cursor.php +++ b/src/Cursor.php @@ -53,6 +53,11 @@ class Cursor */ private $firstNonSpaceCache; + /** + * @var bool + */ + private $partiallyConsumedTab = false; + /** * @param string $line */ @@ -186,30 +191,42 @@ public function advanceBy($characters, $advanceByColumns = false) return; } + $this->previousPosition = $this->currentPosition; $this->firstNonSpaceCache = null; - $i = 0; - $cols = 0; - while ($advanceByColumns ? ($cols < $characters) : ($i < $characters)) { - if ($this->peek($i) === "\t") { - $cols += (4 - (($this->column + $cols) % 4)); + while ($characters > 0 && ($c = $this->getCharacter()) !== null) { + if ($c === "\t") { + $charsToTab = 4 - ($this->column % 4); + $this->partiallyConsumedTab = $advanceByColumns && $charsToTab > $characters; + $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab; + $this->column += $charsToAdvance; + $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1; + $characters -= ($advanceByColumns ? $charsToAdvance : 1); } else { - $cols++; + $this->partiallyConsumedTab = false; + $this->currentPosition++; + $this->column++; + $characters--; } - - $i++; } + } - $this->previousPosition = $this->currentPosition; - $newPosition = $this->currentPosition + $i; + /** + * Advances the cursor by a single space or tab, if present + * + * @return bool + */ + public function advanceBySpaceOrTab() + { + $character = $this->getCharacter(); - $this->column += $cols; + if ($character === ' ' || $character === "\t") { + $this->advanceBy(1, true); - if ($newPosition >= $this->length) { - $this->currentPosition = $this->length; - } else { - $this->currentPosition = $newPosition; + return true; } + + return false; } /** @@ -274,9 +291,17 @@ public function getRemainder() { if ($this->isAtEnd()) { return ''; - } else { - return mb_substr($this->line, $this->currentPosition, $this->length, 'utf-8'); } + + $prefix = ''; + $position = $this->currentPosition; + if ($this->partiallyConsumedTab) { + $position++; + $charsToTab = 4 - ($this->column % 4); + $prefix = str_repeat(' ', $charsToTab); + } + + return $prefix . mb_substr($this->line, $position, $this->length, 'utf-8'); } /** diff --git a/tests/unit/CursorTest.php b/tests/unit/CursorTest.php index 3551a40714..a005cb27f5 100644 --- a/tests/unit/CursorTest.php +++ b/tests/unit/CursorTest.php @@ -272,11 +272,16 @@ public function testAdvanceByColumnOffset() { $cursor = new Cursor("1. \t\tthere"); $cursor->advanceBy(3); + + $this->assertEquals(5, $cursor->getIndent()); + $this->assertEquals(3, $cursor->getPosition()); + $this->assertEquals(3, $cursor->getColumn()); + $cursor->advanceBy(4, true); - $this->assertEquals(0, $cursor->getIndent()); - $this->assertEquals(5, $cursor->getPosition()); - $this->assertEquals(8, $cursor->getColumn()); + $this->assertEquals(1, $cursor->getIndent()); + $this->assertEquals(4, $cursor->getPosition()); + $this->assertEquals(7, $cursor->getColumn()); } /**