From d9016f55fa97a928d11a1ae48a7df1fa3e61b2ff Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Fri, 22 Jun 2018 17:38:58 +0200 Subject: [PATCH 1/5] rewrite from_str_lenient to fix #22 --- src/lib.rs | 54 ++++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1db25aa2a..60ae6cada 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -225,7 +225,7 @@ impl Bech32 { for b in raw_hrp.bytes() { // Valid subset of ASCII if b < 33 || b > 126 { - return Err(Error::InvalidChar(b)) + return Err(Error::InvalidChar(b as char)) } let mut c = b; // Lowercase @@ -242,34 +242,28 @@ impl Bech32 { } // Check data payload - let mut data_bytes: Vec = Vec::new(); - for b in raw_data.bytes() { - // Alphanumeric only - if !((b >= b'0' && b <= b'9') || (b >= b'A' && b <= b'Z') || (b >= b'a' && b <= b'z')) { - return Err(Error::InvalidChar(b)) + let mut data_bytes = raw_data.chars().map(|c| { + // Only check if c is in the ASCII range, all invalid ASCII characters have the value -1 + // in CHARSET_REV (which covers the whole ASCII range) and will be filtered out later. + if !c.is_ascii() { + return Err(Error::InvalidChar(c)) } - // Excludes these characters: [1,b,i,o] - if b == b'1' || b == b'b' || b == b'i' || b == b'o' { - return Err(Error::InvalidChar(b)) - } - // Lowercase - if b >= b'a' && b <= b'z' { + + if c.is_ascii_lowercase() { has_lower = true; + } else if c.is_ascii_uppercase() { + has_upper = true; } - // Uppercase - let c = if b >= b'A' && b <= b'Z' { - has_upper = true; - // Convert to lowercase - b + (b'a'-b'A') - } else { - b - }; - - data_bytes.push(u5::try_from_u8(CHARSET_REV[c as usize] as u8).expect( - "range was already checked above" - )); - } + // c should be <128 since it is in the ASCII range, CHARSET_REV.len() == 128 + let num_value = CHARSET_REV[c as usize]; + + if num_value > 31 || num_value < 0 { + return Err(Error::InvalidChar(c)); + } + + Ok(u5::try_from_u8(num_value as u8).expect("range checked above, num_value <= 31")) + }).collect::, Error>>()?; // Ensure no mixed case if has_lower && has_upper { @@ -402,7 +396,7 @@ pub enum Error { /// The data or human-readable part is too long or too short InvalidLength, /// Some part of the string contains an invalid character - InvalidChar(u8), + InvalidChar(char), /// Some part of the data has an invalid value InvalidData(u8), /// The bit conversion failed due to a padding issue @@ -545,9 +539,9 @@ mod tests { fn invalid_strings() { let pairs: Vec<(&str, Error)> = vec!( (" 1nwldj5", - Error::InvalidChar(b' ')), + Error::InvalidChar(' ')), ("\x7f1axkwrx", - Error::InvalidChar(0x7f)), + Error::InvalidChar(0x7f as char)), ("an84characterslonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1569pvx", Error::InvalidLength), ("pzry9x0s0muk", @@ -555,11 +549,11 @@ mod tests { ("1pzry9x0s0muk", Error::InvalidLength), ("x1b4n0q5v", - Error::InvalidChar(b'b')), + Error::InvalidChar('b')), ("li1dgmt3", Error::InvalidLength), ("de1lg7wt\u{ff}", - Error::InvalidChar(0xc3)), // ASCII 0xff -> \uC3BF in UTF-8 + Error::InvalidChar(0xc3 as char)), // ASCII 0xff -> \uC3BF in UTF-8 ); for p in pairs { let (s, expected_error) = p; From 866e15ff676b893692b324f7b18753263284c526 Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Fri, 22 Jun 2018 23:15:46 +0200 Subject: [PATCH 2/5] Adapt invalid_string tests to chars instead of bytes Formerly input strings were processed byte-wise, since it happens now character-wise the tests which result in Error::InvalidChar(c)'s had to be changed. --- src/lib.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 60ae6cada..1158c7c21 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,8 +63,9 @@ #![deny(unused_mut)] use std::{error, fmt}; -use std::str::FromStr; +use std::ascii::AsciiExt; use std::fmt::{Display, Formatter}; +use std::str::FromStr; /// Integer in the range `0..32` #[derive(PartialEq, Eq, Debug, Copy, Clone, Default, PartialOrd, Ord, Hash)] @@ -249,9 +250,9 @@ impl Bech32 { return Err(Error::InvalidChar(c)) } - if c.is_ascii_lowercase() { + if c.is_lowercase() { has_lower = true; - } else if c.is_ascii_uppercase() { + } else if c.is_uppercase() { has_upper = true; } @@ -540,8 +541,8 @@ mod tests { let pairs: Vec<(&str, Error)> = vec!( (" 1nwldj5", Error::InvalidChar(' ')), - ("\x7f1axkwrx", - Error::InvalidChar(0x7f as char)), + ("abc1\u{2192}axkwrx", + Error::InvalidChar('\u{2192}')), ("an84characterslonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1569pvx", Error::InvalidLength), ("pzry9x0s0muk", @@ -553,7 +554,7 @@ mod tests { ("li1dgmt3", Error::InvalidLength), ("de1lg7wt\u{ff}", - Error::InvalidChar(0xc3 as char)), // ASCII 0xff -> \uC3BF in UTF-8 + Error::InvalidChar('\u{ff}')), ); for p in pairs { let (s, expected_error) = p; @@ -562,7 +563,7 @@ mod tests { println!("{:?}", dec_result.unwrap()); panic!("Should be invalid: {:?}", s); } - assert_eq!(dec_result.unwrap_err(), expected_error); + assert_eq!(dec_result.unwrap_err(), expected_error, "testing input '{}'", s); } } From 16cb139cc801da15194f1561a0ab65860de65fbf Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Mon, 25 Jun 2018 00:41:26 +0200 Subject: [PATCH 3/5] Add CHARSET_REV test --- src/lib.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 1158c7c21..e5273288d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -650,4 +650,24 @@ mod tests { use ToBase32; assert_eq!([0xffu8].to_base32(), [0x1f, 0x1c].check_base32().unwrap()); } + + #[test] + fn reverse_charset() { + use std::ascii::AsciiExt; + use ::CHARSET_REV; + + fn get_char_value(c: char) -> i8 { + let charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; + match charset.find(c.to_ascii_lowercase()) { + Some(x) => x as i8, + None => -1, + } + } + + let expected_rev_charset = (0u8..128).map(|i| { + get_char_value(i as char) + }).collect::>(); + + assert_eq!(&(CHARSET_REV[..]), expected_rev_charset.as_slice()); + } } From 4929ff4a7b4c1c05b3a212589d68d317e7db96f3 Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Mon, 25 Jun 2018 00:46:55 +0200 Subject: [PATCH 4/5] Add bug test for #22 --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index e5273288d..1d754fbf8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -551,6 +551,8 @@ mod tests { Error::InvalidLength), ("x1b4n0q5v", Error::InvalidChar('b')), + ("ABC1DEFGOH", + Error::InvalidChar('O')), ("li1dgmt3", Error::InvalidLength), ("de1lg7wt\u{ff}", From afa37d1f40191308d7a75c1d2a90897d1cc57fa1 Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Tue, 26 Jun 2018 01:03:09 +0200 Subject: [PATCH 5/5] bump version to 0.5.0. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 45caf3c4b..bccc290ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bech32" -version = "0.4.1" +version = "0.5.0" authors = ["Clark Moody"] repository = "https://github.com/rust-bitcoin/rust-bech32" description = "Encodes and decodes the Bech32 format"