-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Scrape Crystal's HTML for links, add them into highlighted code
- Loading branch information
Showing
6 changed files
with
196 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import collections | ||
import html.parser | ||
import io | ||
from typing import Callable, Sequence, Tuple | ||
|
||
from markupsafe import Markup, escape | ||
|
||
LinkTokens = Sequence[Tuple[int, int, str]] | ||
|
||
|
||
class TextWithLinks(collections.UserString): | ||
def __init__(self, string, tokens: LinkTokens): | ||
super().__init__(string) | ||
self.tokens = tokens | ||
|
||
def __repr__(self) -> str: | ||
return f"TextWithLinks({self.data!r}, {self.tokens!r})" | ||
|
||
|
||
def parse_crystal_html(crystal_html: str) -> TextWithLinks: | ||
parser = _CrystalHTMLHandler() | ||
parser.feed(crystal_html) | ||
return TextWithLinks(parser.text.getvalue(), parser.tokens) | ||
|
||
|
||
def linkify_highlighted_html( | ||
pygments_html: str, html_tokens: LinkTokens, make_link: Callable[[str, str], str] | ||
) -> str: | ||
pygments_parser = _PygmentsHTMLHandler(html_tokens, make_link) | ||
pygments_parser.feed(pygments_html) | ||
return Markup(pygments_parser.html.getvalue()) | ||
|
||
|
||
class _CrystalHTMLHandler(html.parser.HTMLParser): | ||
def __init__(self): | ||
super().__init__() | ||
self.text = io.StringIO() | ||
self.tokens: LinkTokens = [] | ||
self._link_starts: List[Tuple[int, str]] = [] | ||
|
||
def handle_starttag(self, tag, attrs): | ||
if tag == "a": | ||
href = next(v for k, v in attrs if k == "href") | ||
self._link_starts.append((self.text.tell(), self.link_to_path(href))) | ||
|
||
def handle_endtag(self, tag): | ||
if tag == "a": | ||
start, link = self._link_starts.pop() | ||
self.tokens.append((start, self.text.tell(), link)) | ||
|
||
def handle_data(self, data): | ||
self.text.write(data) | ||
|
||
@classmethod | ||
def link_to_path(cls, href): | ||
if href.endswith(".html"): | ||
href = href[:-5] | ||
while href.startswith("../"): | ||
href = href[3:] | ||
return href.replace("/", "::") | ||
|
||
|
||
class _PygmentsHTMLHandler(html.parser.HTMLParser): | ||
def __init__(self, tokens: LinkTokens, make_link: Callable[[str, str], str]): | ||
super().__init__() | ||
self.tokens = tokens | ||
self.make_link = make_link | ||
|
||
self.pos = 0 | ||
self.html = io.StringIO() | ||
self.inlink: Optional[int] = None | ||
|
||
def handle_starttag(self, tag, attrs): | ||
if tag == "span" and self.inlink is None: | ||
if self.tokens and self.tokens[0][0] <= self.pos: | ||
self.inlink = self.html.tell() | ||
|
||
if self.inlink is None: | ||
attrs = "".join(f' {k}="{escape(v)}"' for k, v in attrs) | ||
self.html.write(f"<{tag}{attrs}>") | ||
|
||
def handle_endtag(self, tag): | ||
if self.inlink is None: | ||
self.html.write(f"</{tag}>") | ||
|
||
if tag == "span" and self.inlink is not None: | ||
if self.tokens and self.tokens[0][1] <= self.pos: | ||
self.html.seek(self.inlink) | ||
subhtml = Markup(self.html.read()) | ||
subhtml = self.make_link(self.tokens.pop(0)[2], subhtml) | ||
self.html.seek(self.inlink) | ||
self.html.truncate() | ||
self.html.write(subhtml) | ||
self.inlink = None | ||
|
||
def handle_data(self, data): | ||
self.html.write(escape(data)) | ||
self.pos += len(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
crystal_code_html: |2- | ||
: <a href="../Foo/Bar.html">Foo::Bar</a> | ||
out_code: |2- | ||
: Foo::Bar | ||
out_tokens: | ||
- - 3 | ||
- 11 | ||
- Foo::Bar | ||
pygments_code_html: |2- | ||
<div class="highlight"><pre><span></span><span class="p">:</span> <span class="n">Foo</span><span class="o">::</span><span class="n">Bar</span> | ||
</pre></div> | ||
out_linkified_code_html: |2- | ||
<div class="highlight"><pre><span></span><span class="p">:</span> <a id="Foo::Bar">Foo::Bar</a> | ||
</pre></div> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
crystal_code_html: |- | ||
(timeout : <a href="Time/Span.html">Time::Span</a>?, select_action : Channel::TimeoutAction? = <span class="n">nil</span>) : <a href="Nil.html">Nil</a> | ||
out_code: |- | ||
(timeout : Time::Span?, select_action : Channel::TimeoutAction? = nil) : Nil | ||
out_tokens: | ||
- - 11 | ||
- 21 | ||
- Time::Span | ||
- - 73 | ||
- 76 | ||
- Nil | ||
pygments_code_html: | | ||
<div class="highlight"><pre><span></span><span class="p">(</span><span class="n">timeout</span> <span class="p">:</span> <span class="n">Time</span><span class="o">::</span><span class="n">Span?</span><span class="p">,</span> <span class="n">select_action</span> <span class="p">:</span> <span class="n">Channel</span><span class="o">::</span><span class="n">TimeoutAction?</span> <span class="o">=</span> <span class="kc">nil</span><span class="p">)</span> <span class="p">:</span> <span class="n">Nil</span> | ||
</pre></div> | ||
out_linkified_code_html: | | ||
<div class="highlight"><pre><span></span><span class="p">(</span><span class="n">timeout</span> <span class="p">:</span> <a id="Time::Span">Time::Span?</a><span class="p">,</span> <span class="n">select_action</span> <span class="p">:</span> <span class="n">Channel</span><span class="o">::</span><span class="n">TimeoutAction?</span> <span class="o">=</span> <span class="kc">nil</span><span class="p">)</span> <span class="p">:</span> <a id="Nil">Nil</a> | ||
</pre></div> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
crystal_code_html: |- | ||
(blend_mode : <a href="../SF/BlendMode.html">BlendMode</a>, transform : <a href="../SF/Transform.html">Transform</a>, texture : <a href="../SF/Texture.html">Texture</a>?, shader : <a href="../SF/Transform.html">Transform</a>, texture : <a href="../SF/Texture.html">Texture</a>?, shader : <a href="../SF/Shader.html">Shader</a>?) | ||
out_code: |- | ||
(blend_mode : BlendMode, transform : Transform, texture : Texture?, shader : Transform, texture : Texture?, shader : Shader?) | ||
out_tokens: | ||
- - 14 | ||
- 23 | ||
- SF::BlendMode | ||
- - 37 | ||
- 46 | ||
- SF::Transform | ||
- - 58 | ||
- 65 | ||
- SF::Texture | ||
- - 77 | ||
- 86 | ||
- SF::Transform | ||
- - 98 | ||
- 105 | ||
- SF::Texture | ||
- - 117 | ||
- 123 | ||
- SF::Shader | ||
pygments_code_html: | | ||
<div class="highlight"><pre><span></span><span class="p">(</span><span class="n">blend_mode</span> <span class="p">:</span> <span class="n">BlendMode</span><span class="p">,</span> <span class="n">transform</span> <span class="p">:</span> <span class="n">Transform</span><span class="p">,</span> <span class="n">texture</span> <span class="p">:</span> <span class="n">Texture?</span><span class="p">,</span> <span class="n">shader</span> <span class="p">:</span> <span class="n">Shader?</span><span class="p">)</span> | ||
</pre></div> | ||
out_linkified_code_html: |+ | ||
<div class="highlight"><pre><span></span><span class="p">(</span><span class="n">blend_mode</span> <span class="p">:</span> <a id="SF::BlendMode">BlendMode</a><span class="p">,</span> <span class="n">transform</span> <span class="p">:</span> <a id="SF::Transform">Transform</a><span class="p">,</span> <span class="n">texture</span> <span class="p">:</span> <a id="SF::Texture">Texture?</a><span class="p">,</span> <span class="n">shader</span> <span class="p">:</span> Shader?) | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import markupsafe | ||
import pytest | ||
|
||
from mkdocstrings.handlers.crystal import crystal_html | ||
|
||
|
||
@pytest.mark.golden_test("crystal_html/**/*.yml") | ||
def test_crystal_html(golden): | ||
code_html = crystal_html.parse_crystal_html(golden["crystal_code_html"]) | ||
assert str(code_html) == golden.out["out_code"] | ||
assert [list(tok) for tok in code_html.tokens] == golden.out["out_tokens"] | ||
|
||
# print(pygments.highlight(code_html.text, pygments.lexers.get_lexer_by_name("crystal"), pygments.formatters.HtmlFormatter())) | ||
pygments_html = golden["pygments_code_html"] | ||
make_link = markupsafe.Markup('<a id="{}">{}</a>').format | ||
linkified_code_html = crystal_html.linkify_highlighted_html( | ||
pygments_html, code_html.tokens, make_link | ||
) | ||
assert str(linkified_code_html) == golden.out["out_linkified_code_html"] |