From 63c30cf8fe234a13640aebe2e2588e94bc6a878e Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Sun, 23 Jul 2023 21:02:40 -0600 Subject: [PATCH 1/2] der: add `BmpString` Adds initial support for a Basic Multilingual Plane (a.k.a. UCS-2) string type. --- der/src/asn1.rs | 3 + der/src/asn1/bmp_string.rs | 160 +++++++++++++++++++++++++++++++++++++ der/src/bytes_owned.rs | 6 ++ 3 files changed, 169 insertions(+) create mode 100644 der/src/asn1/bmp_string.rs diff --git a/der/src/asn1.rs b/der/src/asn1.rs index dead9ae02..b04b1b58f 100644 --- a/der/src/asn1.rs +++ b/der/src/asn1.rs @@ -6,6 +6,8 @@ mod internal_macros; mod any; mod bit_string; +#[cfg(feature = "alloc")] +mod bmp_string; mod boolean; mod choice; mod context_specific; @@ -52,6 +54,7 @@ pub use self::{ pub use self::{ any::Any, bit_string::BitString, + bmp_string::BmpString, ia5_string::Ia5String, integer::{int::Int, uint::Uint}, octet_string::OctetString, diff --git a/der/src/asn1/bmp_string.rs b/der/src/asn1/bmp_string.rs new file mode 100644 index 000000000..c2dc2d85d --- /dev/null +++ b/der/src/asn1/bmp_string.rs @@ -0,0 +1,160 @@ +//! ASN.1 `BMPString` support. + +use crate::{ + BytesOwned, DecodeValue, EncodeValue, Error, FixedTag, Header, Length, Reader, Result, Tag, + Writer, +}; +use alloc::{boxed::Box, vec::Vec}; +use core::{fmt, str::FromStr}; + +/// ASN.1 `BMPString` type. +/// +/// Encodes Basic Multilingual Plane (BMP) subset of Unicode (ISO 10646), +/// a.k.a. UCS-2. +#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)] +pub struct BmpString { + bytes: BytesOwned, +} + +impl BmpString { + /// Create a new [`BmpString`] from its UCS-2 encoding. + pub fn from_ucs2(bytes: impl Into>) -> Result { + let bytes = bytes.into(); + + if bytes.len() % 2 != 0 { + return Err(Tag::BmpString.length_error()); + } + + let ret = Self { + bytes: bytes.try_into()?, + }; + + // Ensure resulting string is free of unpaired surrogates + for maybe_char in char::decode_utf16(ret.codepoints()) { + if maybe_char.is_err() { + return Err(Tag::BmpString.value_error()); + } + } + + Ok(ret) + } + + /// Create a new [`BmpString`] from a UTF-8 string. + pub fn from_utf8(utf8: &str) -> Result { + #[allow(clippy::integer_arithmetic)] + let mut bytes = Vec::with_capacity(utf8.len() * 2); + + for code_point in utf8.encode_utf16() { + bytes.extend(code_point.to_be_bytes()); + } + + Ok(Self { + bytes: bytes.try_into()?, + }) + } + + /// Borrow the encoded UCS-2 as bytes. + pub fn as_bytes(&self) -> &[u8] { + self.bytes.as_ref() + } + + /// Obtain the inner bytes. + #[inline] + pub fn into_bytes(self) -> Box<[u8]> { + self.bytes.into() + } + + /// Get an iterator over characters in the string. + pub fn chars(&self) -> impl Iterator + '_ { + char::decode_utf16(self.codepoints()) + .map(|maybe_char| maybe_char.expect("unpaired surrogates checked in constructor")) + } + + /// Get an iterator over the `u16` codepoints. + pub fn codepoints(&self) -> impl Iterator + '_ { + // TODO(tarcieri): use `array_chunks` + self.as_bytes() + .chunks_exact(2) + .map(|chunk| u16::from_be_bytes(chunk.try_into().expect("two bytes"))) + } +} + +impl AsRef<[u8]> for BmpString { + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl<'a> DecodeValue<'a> for BmpString { + fn decode_value>(reader: &mut R, header: Header) -> Result { + Self::from_ucs2(reader.read_vec(header.length)?) + } +} + +impl EncodeValue for BmpString { + fn value_len(&self) -> Result { + Ok(self.bytes.len()) + } + + fn encode_value(&self, writer: &mut impl Writer) -> Result<()> { + writer.write(self.as_bytes()) + } +} + +impl FixedTag for BmpString { + const TAG: Tag = Tag::BmpString; +} + +impl FromStr for BmpString { + type Err = Error; + + fn from_str(s: &str) -> Result { + Self::from_utf8(s) + } +} + +impl fmt::Debug for BmpString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BmpString(\"{}\")", self) + } +} + +impl fmt::Display for BmpString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for c in self.chars() { + write!(f, "{}", c)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::BmpString; + use crate::{Decode, Encode}; + use alloc::string::ToString; + use hex_literal::hex; + + const EXAMPLE_BYTES: &[u8] = &hex!( + "1e 26 00 43 00 65 00 72 00 74" + " 00 69 00 66 00 69 00 63" + " 00 61 00 74 00 65 00 54" + " 00 65 00 6d 00 70 00 6c" + " 00 61 00 74 00 65" + ); + + const EXAMPLE_UTF8: &str = "CertificateTemplate"; + + #[test] + fn decode() { + let bmp_string = BmpString::from_der(EXAMPLE_BYTES).unwrap(); + assert_eq!(bmp_string.to_string(), EXAMPLE_UTF8); + } + + #[test] + fn encode() { + let bmp_string = BmpString::from_utf8(EXAMPLE_UTF8).unwrap(); + let encoded = bmp_string.to_der().unwrap(); + assert_eq!(encoded, EXAMPLE_BYTES); + } +} diff --git a/der/src/bytes_owned.rs b/der/src/bytes_owned.rs index a9f67b21a..b5e928e3b 100644 --- a/der/src/bytes_owned.rs +++ b/der/src/bytes_owned.rs @@ -83,6 +83,12 @@ impl DerOrd for BytesOwned { } } +impl From for Box<[u8]> { + fn from(bytes: BytesOwned) -> Box<[u8]> { + bytes.inner + } +} + impl From> for BytesOwned { fn from(s: StrRef<'_>) -> BytesOwned { let bytes = s.as_bytes(); From e87a10b7422250ef08989ba14b8553eac550fb0b Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Mon, 24 Jul 2023 10:08:37 -0600 Subject: [PATCH 2/2] Attempt to error on characters outside Basic Multilingual Plane Ensure all codepoints fit in `u16`. --- der/src/asn1/bmp_string.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/der/src/asn1/bmp_string.rs b/der/src/asn1/bmp_string.rs index c2dc2d85d..3ae313931 100644 --- a/der/src/asn1/bmp_string.rs +++ b/der/src/asn1/bmp_string.rs @@ -29,10 +29,12 @@ impl BmpString { bytes: bytes.try_into()?, }; - // Ensure resulting string is free of unpaired surrogates for maybe_char in char::decode_utf16(ret.codepoints()) { - if maybe_char.is_err() { - return Err(Tag::BmpString.value_error()); + match maybe_char { + // All surrogates paired and character is in the Basic Multilingual Plane + Ok(c) if (c as u64) < u64::from(u16::MAX) => (), + // Unpaired surrogates or characters outside Basic Multilingual Plane + _ => return Err(Tag::BmpString.value_error()), } } @@ -41,16 +43,18 @@ impl BmpString { /// Create a new [`BmpString`] from a UTF-8 string. pub fn from_utf8(utf8: &str) -> Result { - #[allow(clippy::integer_arithmetic)] - let mut bytes = Vec::with_capacity(utf8.len() * 2); + let capacity = utf8 + .len() + .checked_mul(2) + .ok_or_else(|| Tag::BmpString.length_error())?; + + let mut bytes = Vec::with_capacity(capacity); for code_point in utf8.encode_utf16() { bytes.extend(code_point.to_be_bytes()); } - Ok(Self { - bytes: bytes.try_into()?, - }) + Self::from_ucs2(bytes) } /// Borrow the encoded UCS-2 as bytes.