Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add overload to String.from_utf16 with pointer #5583

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions spec/std/string/utf16_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,11 @@ describe "String UTF16" do
input = Slice[0xdc00_u16, 0xd800_u16]
String.from_utf16(input).should eq("\u{fffd}\u{fffd}")
end

it "handles null bytes" do
slice = Slice[104_u16, 105_u16, 0_u16, 55296_u16, 56485_u16]
String.from_utf16(slice).should eq("hi\0000𐂥")
String.from_utf16(slice.to_unsafe).should eq("hi")
end
end
end
35 changes: 34 additions & 1 deletion src/string/utf16.cr
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,15 @@ class String
# slice = Slice[104_u16, 105_u16, 32_u16, 55296_u16, 56485_u16]
# String.from_utf16(slice) # => "hi 𐂥"
# ```
def self.from_utf16(slice : Slice(UInt16)) : String
#
# If *slice* is a pointer, the string ends when a zero value is found.
#
# ```
# slice = Slice[104_u16, 105_u16, 0_u16, 55296_u16, 56485_u16]
# String.from_utf16(slice) # => "hi\0000𐂥"
# String.from_utf16(slice.to_unsafe) # => "hi"
# ```
def self.from_utf16(slice : Slice(UInt16) | Pointer(UInt16)) : String
bytesize = 0
size = 0

Expand Down Expand Up @@ -97,4 +105,29 @@ class String
i += 1
end
end

# Yields each decoded char in the given pointer, stopping at the first null byte.
private def self.each_utf16_char(pointer : Pointer(UInt16))
loop do
byte = pointer.value.to_i
break if byte == 0

if byte < 0xd800 || byte >= 0xe000
# One byte
codepoint = byte
elsif 0xd800 <= byte < 0xdc00 &&
0xdc00 <= (pointer + 1).value <= 0xdfff
# Surrougate pair
pointer = pointer + 1
codepoint = ((byte - 0xd800) << 10) + (pointer.value - 0xdc00) + 0x10000
else
# Invalid byte
codepoint = 0xfffd
end

yield codepoint.chr

pointer = pointer + 1
end
end
end