From cb4c7dd484dad8934ddfe726a255f9ee4d106d3f Mon Sep 17 00:00:00 2001 From: Ellis Michael Date: Tue, 27 Sep 2022 13:20:51 -0700 Subject: [PATCH] Process subtractive_selectors first Currently, if a filter rule is more selective than the subtractive selector, the subtractive selector will not be executed because the information by which to do the subtraction is already discarded. This fixes that by removing elements first and then applying filter selections. --- changedetectionio/fetch_site_status.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 79e282b59a7..c95e0b447fe 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -157,17 +157,17 @@ def run(self, uuid): stripped_text_from_html = html_content else: # Then we assume HTML + if has_subtractive_selectors: + html_content = html_tools.element_removal(subtractive_selectors, html_content) + if has_filter_rule: # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'): html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''), - html_content=fetcher.content) + html_content=html_content) else: # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text - html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content) - - if has_subtractive_selectors: - html_content = html_tools.element_removal(subtractive_selectors, html_content) + html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=html_content) if not is_source: # extract text