From 94bdba1227c1950c7ae0f3e5588b90597e832ba2 Mon Sep 17 00:00:00 2001 From: George Dietrich Date: Thu, 8 Aug 2024 04:31:53 -0400 Subject: [PATCH] Add `underscore_to_space` option to `String#titleize` (#14822) --- spec/std/string_spec.cr | 10 ++++++++++ src/string.cr | 35 ++++++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 00310bfcbc47..2ea13d52010d 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -724,6 +724,10 @@ describe "String" do it { assert_prints " spáçes before".titleize, " Spáçes Before" } it { assert_prints "testá-se múitô".titleize, "Testá-se Múitô" } it { assert_prints "iO iO".titleize(Unicode::CaseOptions::Turkic), "İo İo" } + it { assert_prints "foo_Bar".titleize, "Foo_bar" } + it { assert_prints "foo_bar".titleize, "Foo_bar" } + it { assert_prints "testá_se múitô".titleize(underscore_to_space: true), "Testá Se Múitô" } + it { assert_prints "foo_bar".titleize(underscore_to_space: true), "Foo Bar" } it "handles multi-character mappings correctly (#13533)" do assert_prints "fflİ İffl dz DZ".titleize, "Ffli̇ İffl Dz Dz" @@ -735,6 +739,12 @@ describe "String" do String.build { |io| "\xB5!\xE0\xC1\xB5?".titleize(io) }.should eq("\xB5!\xE0\xC1\xB5?".scrub) String.build { |io| "a\xA0b".titleize(io) }.should eq("A\xA0b".scrub) end + + describe "with IO" do + it { String.build { |io| "foo_Bar".titleize io }.should eq "Foo_bar" } + it { String.build { |io| "foo_bar".titleize io }.should eq "Foo_bar" } + it { String.build { |io| "foo_bar".titleize(io, underscore_to_space: true) }.should eq "Foo Bar" } + end end describe "chomp" do diff --git a/src/string.cr b/src/string.cr index d3bc7d6998b2..08bbb87fc505 100644 --- a/src/string.cr +++ b/src/string.cr @@ -1506,15 +1506,17 @@ class String end end - # Returns a new `String` with the first letter after any space converted to uppercase and every - # other letter converted to lowercase. + # Returns a new `String` with the first letter after any space converted to uppercase and every other letter converted to lowercase. + # Optionally, if *underscore_to_space* is `true`, underscores (`_`) will be converted to a space and the following letter converted to uppercase. # # ``` - # "hEllO tAb\tworld".titleize # => "Hello Tab\tWorld" - # " spaces before".titleize # => " Spaces Before" - # "x-men: the last stand".titleize # => "X-men: The Last Stand" + # "hEllO tAb\tworld".titleize # => "Hello Tab\tWorld" + # " spaces before".titleize # => " Spaces Before" + # "x-men: the last stand".titleize # => "X-men: The Last Stand" + # "foo_bar".titleize # => "Foo_bar" + # "foo_bar".titleize(underscore_to_space: true) # => "Foo Bar" # ``` - def titleize(options : Unicode::CaseOptions = :none) : String + def titleize(options : Unicode::CaseOptions = :none, *, underscore_to_space : Bool = false) : String return self if empty? if single_byte_optimizable? && (options.none? || options.ascii?) @@ -1525,9 +1527,15 @@ class String byte = to_unsafe[i] if byte < 0x80 char = byte.unsafe_chr - replaced_char = upcase_next ? char.upcase : char.downcase + replaced_char, upcase_next = if upcase_next + {char.upcase, false} + elsif underscore_to_space && '_' == char + {' ', true} + else + {char.downcase, char.ascii_whitespace?} + end + buffer[i] = replaced_char.ord.to_u8! - upcase_next = char.ascii_whitespace? else buffer[i] = byte upcase_next = false @@ -1537,26 +1545,31 @@ class String end end - String.build(bytesize) { |io| titleize io, options } + String.build(bytesize) { |io| titleize io, options, underscore_to_space: underscore_to_space } end # Writes a titleized version of `self` to the given *io*. + # Optionally, if *underscore_to_space* is `true`, underscores (`_`) will be converted to a space and the following letter converted to uppercase. # # ``` # io = IO::Memory.new # "x-men: the last stand".titleize io # io.to_s # => "X-men: The Last Stand" # ``` - def titleize(io : IO, options : Unicode::CaseOptions = :none) : Nil + def titleize(io : IO, options : Unicode::CaseOptions = :none, *, underscore_to_space : Bool = false) : Nil upcase_next = true each_char_with_index do |char, i| if upcase_next + upcase_next = false char.titlecase(options) { |c| io << c } + elsif underscore_to_space && '_' == char + upcase_next = true + io << ' ' else + upcase_next = char.whitespace? char.downcase(options) { |c| io << c } end - upcase_next = char.whitespace? end end