From 9662c41f5dfa23ebb8e188f84c276a6489401d25 Mon Sep 17 00:00:00 2001 From: Walnut <39544927+Walnut356@users.noreply.github.com> Date: Mon, 5 Aug 2024 20:00:40 -0500 Subject: [PATCH 1/3] update docs --- docs/content/documentation/getting-started/configuration.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/content/documentation/getting-started/configuration.md b/docs/content/documentation/getting-started/configuration.md index 2565e0913..9d67b9377 100644 --- a/docs/content/documentation/getting-started/configuration.md +++ b/docs/content/documentation/getting-started/configuration.md @@ -83,7 +83,7 @@ feed_filenames = ["atom.xml"] hard_link_static = false # The default author for pages -author = +author = # The taxonomies to be rendered for the site and their configuration of the default languages # Example: @@ -144,6 +144,9 @@ lazy_async_image = false # Whether footnotes are rendered in the GitHub-style (at the bottom, with back references) or plain (in the place, where they are defined) bottom_footnotes = false +# Whether text in code blocks counts towards the word count of the page/section +count_code_block_words = true + # Configuration of the link checker. [link_checker] # Skip link checking for external URLs that start with these prefixes From 140872965bb3cb905fde80b181c8f460fe74c75d Mon Sep 17 00:00:00 2001 From: Walnut <39544927+Walnut356@users.noreply.github.com> Date: Mon, 5 Aug 2024 20:01:05 -0500 Subject: [PATCH 2/3] add config option, logic, and tests --- components/config/src/config/markup.rs | 3 +++ components/content/src/page.rs | 2 +- components/content/src/section.rs | 2 +- components/content/src/utils.rs | 36 ++++++++++++++++++++++---- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/components/config/src/config/markup.rs b/components/config/src/config/markup.rs index 580a665ae..fd7ac1f28 100644 --- a/components/config/src/config/markup.rs +++ b/components/config/src/config/markup.rs @@ -55,6 +55,8 @@ pub struct Markdown { pub extra_theme_set: Arc>, /// Add loading="lazy" decoding="async" to img tags. When turned on, the alt text must be plain text. Defaults to false pub lazy_async_image: bool, + /// Whether or not text in code blocks counts towards the word count. Defaults to true + pub count_code_block_words: bool, } impl Markdown { @@ -210,6 +212,7 @@ impl Default for Markdown { extra_syntax_set: None, extra_theme_set: Arc::new(None), lazy_async_image: false, + count_code_block_words: true, } } } diff --git a/components/content/src/page.rs b/components/content/src/page.rs index 94672b2bd..c14a7d4d3 100644 --- a/components/content/src/page.rs +++ b/components/content/src/page.rs @@ -111,7 +111,7 @@ impl Page { page.file.find_language(&config.default_language, &config.other_languages_codes())?; page.raw_content = content.to_string(); - let (word_count, reading_time) = get_reading_analytics(&page.raw_content); + let (word_count, reading_time) = get_reading_analytics(&page.raw_content, config); page.word_count = Some(word_count); page.reading_time = Some(reading_time); diff --git a/components/content/src/section.rs b/components/content/src/section.rs index d4c84a9bd..ba3de7fb5 100644 --- a/components/content/src/section.rs +++ b/components/content/src/section.rs @@ -87,7 +87,7 @@ impl Section { .file .find_language(&config.default_language, &config.other_languages_codes())?; section.raw_content = content.to_string(); - let (word_count, reading_time) = get_reading_analytics(§ion.raw_content); + let (word_count, reading_time) = get_reading_analytics(§ion.raw_content, config); section.word_count = Some(word_count); section.reading_time = Some(reading_time); diff --git a/components/content/src/utils.rs b/components/content/src/utils.rs index 8349c2b9f..3bf85098e 100644 --- a/components/content/src/utils.rs +++ b/components/content/src/utils.rs @@ -59,8 +59,15 @@ pub fn find_related_assets(path: &Path, config: &Config, recursive: bool) -> Vec } /// Get word count and estimated reading time -pub fn get_reading_analytics(content: &str) -> (usize, usize) { - let word_count: usize = content.unicode_words().count(); +pub fn get_reading_analytics(content: &str, config: &Config) -> (usize, usize) { + let word_count = if config.markdown.count_code_block_words { + content.unicode_words().count() + } else { + // code fences "toggle" the state from non-code to code, so anything inbetween the first + // fence and the next can be ignored + let split = content.split("```"); + split.step_by(2).map(|section| section.unicode_words().count()).sum() + }; // https://help.medium.com/hc/en-us/articles/214991667-Read-time // 275 seems a bit too high though @@ -219,14 +226,15 @@ mod tests { #[test] fn reading_analytics_empty_text() { - let (word_count, reading_time) = get_reading_analytics(" "); + let (word_count, reading_time) = get_reading_analytics(" ", &Config::default()); assert_eq!(word_count, 0); assert_eq!(reading_time, 0); } #[test] fn reading_analytics_short_text() { - let (word_count, reading_time) = get_reading_analytics("Hello World"); + let (word_count, reading_time) = + get_reading_analytics("Hello World", &Config::default_for_test()); assert_eq!(word_count, 2); assert_eq!(reading_time, 1); } @@ -237,8 +245,26 @@ mod tests { for _ in 0..1000 { content.push_str(" Hello world"); } - let (word_count, reading_time) = get_reading_analytics(&content); + let (word_count, reading_time) = + get_reading_analytics(&content, &Config::default_for_test()); assert_eq!(word_count, 2000); assert_eq!(reading_time, 10); } + + #[test] + fn reading_analytics_no_code() { + let mut config = Config::default_for_test(); + config.markdown.count_code_block_words = false; + let (word_count, reading_time) = + get_reading_analytics("hello world ``` code goes here ``` goodbye world", &config); + assert_eq!(word_count, 4); + assert_eq!(reading_time, 1); + + let (word_count, reading_time) = get_reading_analytics( + "hello world ``` code goes here ``` goodbye world ``` dangling fence", + &config, + ); + assert_eq!(word_count, 4); + assert_eq!(reading_time, 1); + } } From 63a86a0d2c24bc6f8c3266f13e99f80d379867d5 Mon Sep 17 00:00:00 2001 From: Walnut <39544927+Walnut356@users.noreply.github.com> Date: Wed, 7 Aug 2024 05:38:38 -0500 Subject: [PATCH 3/3] remove config option --- components/config/src/config/markup.rs | 3 --- components/content/src/page.rs | 2 +- components/content/src/section.rs | 2 +- components/content/src/utils.rs | 27 +++++++------------ .../getting-started/configuration.md | 5 +--- 5 files changed, 12 insertions(+), 27 deletions(-) diff --git a/components/config/src/config/markup.rs b/components/config/src/config/markup.rs index fd7ac1f28..580a665ae 100644 --- a/components/config/src/config/markup.rs +++ b/components/config/src/config/markup.rs @@ -55,8 +55,6 @@ pub struct Markdown { pub extra_theme_set: Arc>, /// Add loading="lazy" decoding="async" to img tags. When turned on, the alt text must be plain text. Defaults to false pub lazy_async_image: bool, - /// Whether or not text in code blocks counts towards the word count. Defaults to true - pub count_code_block_words: bool, } impl Markdown { @@ -212,7 +210,6 @@ impl Default for Markdown { extra_syntax_set: None, extra_theme_set: Arc::new(None), lazy_async_image: false, - count_code_block_words: true, } } } diff --git a/components/content/src/page.rs b/components/content/src/page.rs index c14a7d4d3..94672b2bd 100644 --- a/components/content/src/page.rs +++ b/components/content/src/page.rs @@ -111,7 +111,7 @@ impl Page { page.file.find_language(&config.default_language, &config.other_languages_codes())?; page.raw_content = content.to_string(); - let (word_count, reading_time) = get_reading_analytics(&page.raw_content, config); + let (word_count, reading_time) = get_reading_analytics(&page.raw_content); page.word_count = Some(word_count); page.reading_time = Some(reading_time); diff --git a/components/content/src/section.rs b/components/content/src/section.rs index ba3de7fb5..d4c84a9bd 100644 --- a/components/content/src/section.rs +++ b/components/content/src/section.rs @@ -87,7 +87,7 @@ impl Section { .file .find_language(&config.default_language, &config.other_languages_codes())?; section.raw_content = content.to_string(); - let (word_count, reading_time) = get_reading_analytics(§ion.raw_content, config); + let (word_count, reading_time) = get_reading_analytics(§ion.raw_content); section.word_count = Some(word_count); section.reading_time = Some(reading_time); diff --git a/components/content/src/utils.rs b/components/content/src/utils.rs index 3bf85098e..e26998862 100644 --- a/components/content/src/utils.rs +++ b/components/content/src/utils.rs @@ -59,15 +59,11 @@ pub fn find_related_assets(path: &Path, config: &Config, recursive: bool) -> Vec } /// Get word count and estimated reading time -pub fn get_reading_analytics(content: &str, config: &Config) -> (usize, usize) { - let word_count = if config.markdown.count_code_block_words { - content.unicode_words().count() - } else { - // code fences "toggle" the state from non-code to code, so anything inbetween the first - // fence and the next can be ignored - let split = content.split("```"); - split.step_by(2).map(|section| section.unicode_words().count()).sum() - }; +pub fn get_reading_analytics(content: &str) -> (usize, usize) { + // code fences "toggle" the state from non-code to code and back, so anything inbetween the + // first fence and the next can be ignored + let split = content.split("```"); + let word_count = split.step_by(2).map(|section| section.unicode_words().count()).sum(); // https://help.medium.com/hc/en-us/articles/214991667-Read-time // 275 seems a bit too high though @@ -226,15 +222,14 @@ mod tests { #[test] fn reading_analytics_empty_text() { - let (word_count, reading_time) = get_reading_analytics(" ", &Config::default()); + let (word_count, reading_time) = get_reading_analytics(" "); assert_eq!(word_count, 0); assert_eq!(reading_time, 0); } #[test] fn reading_analytics_short_text() { - let (word_count, reading_time) = - get_reading_analytics("Hello World", &Config::default_for_test()); + let (word_count, reading_time) = get_reading_analytics("Hello World"); assert_eq!(word_count, 2); assert_eq!(reading_time, 1); } @@ -245,24 +240,20 @@ mod tests { for _ in 0..1000 { content.push_str(" Hello world"); } - let (word_count, reading_time) = - get_reading_analytics(&content, &Config::default_for_test()); + let (word_count, reading_time) = get_reading_analytics(&content); assert_eq!(word_count, 2000); assert_eq!(reading_time, 10); } #[test] fn reading_analytics_no_code() { - let mut config = Config::default_for_test(); - config.markdown.count_code_block_words = false; let (word_count, reading_time) = - get_reading_analytics("hello world ``` code goes here ``` goodbye world", &config); + get_reading_analytics("hello world ``` code goes here ``` goodbye world"); assert_eq!(word_count, 4); assert_eq!(reading_time, 1); let (word_count, reading_time) = get_reading_analytics( "hello world ``` code goes here ``` goodbye world ``` dangling fence", - &config, ); assert_eq!(word_count, 4); assert_eq!(reading_time, 1); diff --git a/docs/content/documentation/getting-started/configuration.md b/docs/content/documentation/getting-started/configuration.md index 9d67b9377..2565e0913 100644 --- a/docs/content/documentation/getting-started/configuration.md +++ b/docs/content/documentation/getting-started/configuration.md @@ -83,7 +83,7 @@ feed_filenames = ["atom.xml"] hard_link_static = false # The default author for pages -author = +author = # The taxonomies to be rendered for the site and their configuration of the default languages # Example: @@ -144,9 +144,6 @@ lazy_async_image = false # Whether footnotes are rendered in the GitHub-style (at the bottom, with back references) or plain (in the place, where they are defined) bottom_footnotes = false -# Whether text in code blocks counts towards the word count of the page/section -count_code_block_words = true - # Configuration of the link checker. [link_checker] # Skip link checking for external URLs that start with these prefixes