Fixed search plugin crashing on nested headlines

squidfunk · Jan 8, 2023 · 81e7b8c · 81e7b8c
1 parent c4d61cd
commit 81e7b8c
Show file tree

Hide file tree

Showing 2 changed files with 76 additions and 28 deletions.
diff --git a/material/plugins/search/plugin.py b/material/plugins/search/plugin.py
@@ -266,6 +266,10 @@ def __init__(self, tag, attrs = dict()):
         self.tag   = tag
         self.attrs = attrs
 
+    # String representation
+    def __repr__(self):
+        return self.tag
+
     # Support comparison (compare by tag only)
     def __eq__(self, other):
         if other is Element:
@@ -291,12 +295,22 @@ class Section:
     """
 
     # Initialize HTML section
-    def __init__(self, el):
-        self.el    = el
+    def __init__(self, el, depth = 0):
+        self.el = el
+        self.depth = depth
+
+        # Initialize section data
         self.text  = []
         self.title = []
         self.id = None
 
+    # String representation
+    def __repr__(self):
+        if self.id:
+            return "#".join([self.el.tag, self.id])
+        else:
+            return self.el.tag
+
     # Check whether the section should be excluded
     def is_excluded(self):
         return self.el.is_excluded()
@@ -350,15 +364,16 @@ def handle_starttag(self, tag, attrs):
 
         # Handle headings
         if tag in ([f"h{x}" for x in range(1, 7)]):
+            depth = len(self.context)
             if "id" in attrs:
 
                 # Ensure top-level section
                 if tag != "h1" and not self.data:
-                    self.section = Section(Element("hx"))
+                    self.section = Section(Element("hx"), depth)
                     self.data.append(self.section)
 
                 # Set identifier, if not first section
-                self.section = Section(el)
+                self.section = Section(el, depth)
                 if self.data:
                     self.section.id = attrs["id"]
 
@@ -398,6 +413,20 @@ def handle_endtag(self, tag):
         if not self.context or self.context[-1] != tag:
             return
 
+        # Check whether we're exiting the current context, which happens when
+        # a headline is nested in another element. In that case, we close the
+        # current section, continuing to append data to the previous section,
+        # which could also be a nested section – see https://bit.ly/3IxxIJZ
+        if self.section.depth > len(self.context):
+            for section in reversed(self.data):
+                if section.depth and section.depth <= len(self.context):
+
+                    # Set depth to 0 in order to denote that the current section
+                    # is exited and must not be considered again.
+                    self.section.depth = 0
+                    self.section = section
+                    break
+
         # Remove element from skip list
         el = self.context.pop()
         if el in self.skip:
@@ -407,19 +436,14 @@ def handle_endtag(self, tag):
         # Render closing tag if kept
         if not self.skip.intersection(self.context):
             if tag in self.keep:
+
+                # Check whether we're inside the section title
                 data = self.section.text
-                if self.section.el in reversed(self.context):
+                if self.section.el in self.context:
                     data = self.section.title
 
-                # Remove element if empty (or only whitespace)
-                if data[-1] == f"<{tag}>":
-                    del data[-1:]
-                elif data[-1].isspace() and data[-2] == f"<{tag}>":
-                    del data[-2:]
-
                 # Append to section title or text
-                else:
-                    data.append(f"</{tag}>")
+                data.append(f"</{tag}>")
 
     # Called for the text contents of each tag
     def handle_data(self, data):
@@ -439,7 +463,7 @@ def handle_data(self, data):
             self.data.append(self.section)
 
         # Handle section headline
-        if self.section.el in reversed(self.context):
+        if self.section.el in self.context:
             permalink = False
             for el in self.context:
                 if el.tag == "a" and el.attrs.get("class") == "headerlink":

diff --git a/src/plugins/search/plugin.py b/src/plugins/search/plugin.py
@@ -266,6 +266,10 @@ def __init__(self, tag, attrs = dict()):
         self.tag   = tag
         self.attrs = attrs
 
+    # String representation
+    def __repr__(self):
+        return self.tag
+
     # Support comparison (compare by tag only)
     def __eq__(self, other):
         if other is Element:
@@ -291,12 +295,22 @@ class Section:
     """
 
     # Initialize HTML section
-    def __init__(self, el):
-        self.el    = el
+    def __init__(self, el, depth = 0):
+        self.el = el
+        self.depth = depth
+
+        # Initialize section data
         self.text  = []
         self.title = []
         self.id = None
 
+    # String representation
+    def __repr__(self):
+        if self.id:
+            return "#".join([self.el.tag, self.id])
+        else:
+            return self.el.tag
+
     # Check whether the section should be excluded
     def is_excluded(self):
         return self.el.is_excluded()
@@ -350,15 +364,16 @@ def handle_starttag(self, tag, attrs):
 
         # Handle headings
         if tag in ([f"h{x}" for x in range(1, 7)]):
+            depth = len(self.context)
             if "id" in attrs:
 
                 # Ensure top-level section
                 if tag != "h1" and not self.data:
-                    self.section = Section(Element("hx"))
+                    self.section = Section(Element("hx"), depth)
                     self.data.append(self.section)
 
                 # Set identifier, if not first section
-                self.section = Section(el)
+                self.section = Section(el, depth)
                 if self.data:
                     self.section.id = attrs["id"]
 
@@ -398,6 +413,20 @@ def handle_endtag(self, tag):
         if not self.context or self.context[-1] != tag:
             return
 
+        # Check whether we're exiting the current context, which happens when
+        # a headline is nested in another element. In that case, we close the
+        # current section, continuing to append data to the previous section,
+        # which could also be a nested section – see https://bit.ly/3IxxIJZ
+        if self.section.depth > len(self.context):
+            for section in reversed(self.data):
+                if section.depth and section.depth <= len(self.context):
+
+                    # Set depth to 0 in order to denote that the current section
+                    # is exited and must not be considered again.
+                    self.section.depth = 0
+                    self.section = section
+                    break
+
         # Remove element from skip list
         el = self.context.pop()
         if el in self.skip:
@@ -407,19 +436,14 @@ def handle_endtag(self, tag):
         # Render closing tag if kept
         if not self.skip.intersection(self.context):
             if tag in self.keep:
+
+                # Check whether we're inside the section title
                 data = self.section.text
-                if self.section.el in reversed(self.context):
+                if self.section.el in self.context:
                     data = self.section.title
 
-                # Remove element if empty (or only whitespace)
-                if data[-1] == f"<{tag}>":
-                    del data[-1:]
-                elif data[-1].isspace() and data[-2] == f"<{tag}>":
-                    del data[-2:]
-
                 # Append to section title or text
-                else:
-                    data.append(f"</{tag}>")
+                data.append(f"</{tag}>")
 
     # Called for the text contents of each tag
     def handle_data(self, data):
@@ -439,7 +463,7 @@ def handle_data(self, data):
             self.data.append(self.section)
 
         # Handle section headline
-        if self.section.el in reversed(self.context):
+        if self.section.el in self.context:
             permalink = False
             for el in self.context:
                 if el.tag == "a" and el.attrs.get("class") == "headerlink":