From 81e7b8c7fc53d32c8a8730ae68614d8925388782 Mon Sep 17 00:00:00 2001 From: squidfunk Date: Sun, 8 Jan 2023 09:39:05 +0100 Subject: [PATCH] Fixed search plugin crashing on nested headlines --- material/plugins/search/plugin.py | 52 ++++++++++++++++++++++--------- src/plugins/search/plugin.py | 52 ++++++++++++++++++++++--------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/material/plugins/search/plugin.py b/material/plugins/search/plugin.py index b3413f3ee68..710746becd7 100644 --- a/material/plugins/search/plugin.py +++ b/material/plugins/search/plugin.py @@ -266,6 +266,10 @@ def __init__(self, tag, attrs = dict()): self.tag = tag self.attrs = attrs + # String representation + def __repr__(self): + return self.tag + # Support comparison (compare by tag only) def __eq__(self, other): if other is Element: @@ -291,12 +295,22 @@ class Section: """ # Initialize HTML section - def __init__(self, el): - self.el = el + def __init__(self, el, depth = 0): + self.el = el + self.depth = depth + + # Initialize section data self.text = [] self.title = [] self.id = None + # String representation + def __repr__(self): + if self.id: + return "#".join([self.el.tag, self.id]) + else: + return self.el.tag + # Check whether the section should be excluded def is_excluded(self): return self.el.is_excluded() @@ -350,15 +364,16 @@ def handle_starttag(self, tag, attrs): # Handle headings if tag in ([f"h{x}" for x in range(1, 7)]): + depth = len(self.context) if "id" in attrs: # Ensure top-level section if tag != "h1" and not self.data: - self.section = Section(Element("hx")) + self.section = Section(Element("hx"), depth) self.data.append(self.section) # Set identifier, if not first section - self.section = Section(el) + self.section = Section(el, depth) if self.data: self.section.id = attrs["id"] @@ -398,6 +413,20 @@ def handle_endtag(self, tag): if not self.context or self.context[-1] != tag: return + # Check whether we're exiting the current context, which happens when + # a headline is nested in another element. In that case, we close the + # current section, continuing to append data to the previous section, + # which could also be a nested section – see https://bit.ly/3IxxIJZ + if self.section.depth > len(self.context): + for section in reversed(self.data): + if section.depth and section.depth <= len(self.context): + + # Set depth to 0 in order to denote that the current section + # is exited and must not be considered again. + self.section.depth = 0 + self.section = section + break + # Remove element from skip list el = self.context.pop() if el in self.skip: @@ -407,19 +436,14 @@ def handle_endtag(self, tag): # Render closing tag if kept if not self.skip.intersection(self.context): if tag in self.keep: + + # Check whether we're inside the section title data = self.section.text - if self.section.el in reversed(self.context): + if self.section.el in self.context: data = self.section.title - # Remove element if empty (or only whitespace) - if data[-1] == f"<{tag}>": - del data[-1:] - elif data[-1].isspace() and data[-2] == f"<{tag}>": - del data[-2:] - # Append to section title or text - else: - data.append(f"") + data.append(f"") # Called for the text contents of each tag def handle_data(self, data): @@ -439,7 +463,7 @@ def handle_data(self, data): self.data.append(self.section) # Handle section headline - if self.section.el in reversed(self.context): + if self.section.el in self.context: permalink = False for el in self.context: if el.tag == "a" and el.attrs.get("class") == "headerlink": diff --git a/src/plugins/search/plugin.py b/src/plugins/search/plugin.py index b3413f3ee68..710746becd7 100644 --- a/src/plugins/search/plugin.py +++ b/src/plugins/search/plugin.py @@ -266,6 +266,10 @@ def __init__(self, tag, attrs = dict()): self.tag = tag self.attrs = attrs + # String representation + def __repr__(self): + return self.tag + # Support comparison (compare by tag only) def __eq__(self, other): if other is Element: @@ -291,12 +295,22 @@ class Section: """ # Initialize HTML section - def __init__(self, el): - self.el = el + def __init__(self, el, depth = 0): + self.el = el + self.depth = depth + + # Initialize section data self.text = [] self.title = [] self.id = None + # String representation + def __repr__(self): + if self.id: + return "#".join([self.el.tag, self.id]) + else: + return self.el.tag + # Check whether the section should be excluded def is_excluded(self): return self.el.is_excluded() @@ -350,15 +364,16 @@ def handle_starttag(self, tag, attrs): # Handle headings if tag in ([f"h{x}" for x in range(1, 7)]): + depth = len(self.context) if "id" in attrs: # Ensure top-level section if tag != "h1" and not self.data: - self.section = Section(Element("hx")) + self.section = Section(Element("hx"), depth) self.data.append(self.section) # Set identifier, if not first section - self.section = Section(el) + self.section = Section(el, depth) if self.data: self.section.id = attrs["id"] @@ -398,6 +413,20 @@ def handle_endtag(self, tag): if not self.context or self.context[-1] != tag: return + # Check whether we're exiting the current context, which happens when + # a headline is nested in another element. In that case, we close the + # current section, continuing to append data to the previous section, + # which could also be a nested section – see https://bit.ly/3IxxIJZ + if self.section.depth > len(self.context): + for section in reversed(self.data): + if section.depth and section.depth <= len(self.context): + + # Set depth to 0 in order to denote that the current section + # is exited and must not be considered again. + self.section.depth = 0 + self.section = section + break + # Remove element from skip list el = self.context.pop() if el in self.skip: @@ -407,19 +436,14 @@ def handle_endtag(self, tag): # Render closing tag if kept if not self.skip.intersection(self.context): if tag in self.keep: + + # Check whether we're inside the section title data = self.section.text - if self.section.el in reversed(self.context): + if self.section.el in self.context: data = self.section.title - # Remove element if empty (or only whitespace) - if data[-1] == f"<{tag}>": - del data[-1:] - elif data[-1].isspace() and data[-2] == f"<{tag}>": - del data[-2:] - # Append to section title or text - else: - data.append(f"") + data.append(f"") # Called for the text contents of each tag def handle_data(self, data): @@ -439,7 +463,7 @@ def handle_data(self, data): self.data.append(self.section) # Handle section headline - if self.section.el in reversed(self.context): + if self.section.el in self.context: permalink = False for el in self.context: if el.tag == "a" and el.attrs.get("class") == "headerlink":