From f3dfe51805c831001ea82dcbbdfac53d35ef8b36 Mon Sep 17 00:00:00 2001 From: Artyom Pavlov Date: Sun, 2 Apr 2023 02:21:12 +0000 Subject: [PATCH] hex-literal: remove support for comments inside literals and migrate to CTFE (#816) --- .github/workflows/hex-literal.yml | 4 +- hex-literal/CHANGELOG.md | 8 ++ hex-literal/Cargo.toml | 10 +- hex-literal/README.md | 84 ++++++++++++ hex-literal/src/comments.rs | 216 ------------------------------ hex-literal/src/lib.rs | 213 +++++++++-------------------- hex-literal/tests/comments.rs | 41 ------ 7 files changed, 163 insertions(+), 413 deletions(-) create mode 100644 hex-literal/README.md delete mode 100644 hex-literal/src/comments.rs delete mode 100644 hex-literal/tests/comments.rs diff --git a/.github/workflows/hex-literal.yml b/.github/workflows/hex-literal.yml index 133e36ef..dc77aeb2 100644 --- a/.github/workflows/hex-literal.yml +++ b/.github/workflows/hex-literal.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: rust: - - 1.45.0 # MSRV + - 1.57.0 # MSRV - stable target: - thumbv7em-none-eabi @@ -48,7 +48,7 @@ jobs: strategy: matrix: rust: - - 1.45.0 # MSRV + - 1.57.0 # MSRV - stable steps: - uses: actions/checkout@v3 diff --git a/hex-literal/CHANGELOG.md b/hex-literal/CHANGELOG.md index 1c72f15b..338be64b 100644 --- a/hex-literal/CHANGELOG.md +++ b/hex-literal/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## UNRELEASED +### Changed +- Disallow comments inside hex strings ([#816]) +- Migrate to 2021 edition and bump MSRV to 1.57 ([#816]) +- Use CTFE instead of proc macro ([#816]) + +[#816]: https://github.com/RustCrypto/utils/pull/816 + ## 0.3.4 (2021-11-11) ### Changed - Provide more info in `panic!` messages ([#664]) diff --git a/hex-literal/Cargo.toml b/hex-literal/Cargo.toml index e3d36833..b704b6b9 100644 --- a/hex-literal/Cargo.toml +++ b/hex-literal/Cargo.toml @@ -3,11 +3,9 @@ name = "hex-literal" version = "0.3.4" authors = ["RustCrypto Developers"] license = "MIT OR Apache-2.0" -description = "Procedural macro for converting hexadecimal string to byte array at compile time." +description = "Macro for converting hexadecimal string to a byte array at compile time" documentation = "https://docs.rs/hex-literal" repository = "https://github.com/RustCrypto/utils" -keywords = ["hex", "proc-macro", "literals"] -edition = "2018" - -[lib] -proc-macro = true +keywords = ["hex", "literals"] +edition = "2021" +rust-version = "1.57" diff --git a/hex-literal/README.md b/hex-literal/README.md new file mode 100644 index 00000000..72d527e4 --- /dev/null +++ b/hex-literal/README.md @@ -0,0 +1,84 @@ +# [RustCrypto]: hex-literal + +[![Crate][crate-image]][crate-link] +[![Docs][docs-image]][docs-link] +![Apache 2.0/MIT Licensed][license-image] +![MSRV][rustc-image] +[![Build Status][build-image]][build-link] + +This crate provides the `hex!` macro for converting hexadecimal string literals to a byte array at compile time. + +It accepts the following characters in the input string: + +- `'0'...'9'`, `'a'...'f'`, `'A'...'F'` — hex characters which will be used in construction of the output byte array +- `' '`, `'\r'`, `'\n'`, `'\t'` — formatting characters which will be ignored + +# Examples +```rust +use hex_literal::hex; + +// The macro can be used in const contexts +const DATA: [u8; 4] = hex!("01020304"); +assert_eq!(DATA, [1, 2, 3, 4]); + +// Both upper and lower hex values are supported +assert_eq!(hex!("a1 b2 c3 d4"), [0xA1, 0xB2, 0xC3, 0xD4]); +assert_eq!(hex!("E5 E6 90 92"), [0xE5, 0xE6, 0x90, 0x92]); +assert_eq!(hex!("0a0B 0C0d"), [10, 11, 12, 13]); + +// Multi-line literals +let bytes1 = hex!(" + 00010203 04050607 + 08090a0b 0c0d0e0f +"); +assert_eq!(bytes1, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + +// It's possible to use several literals (results will be concatenated) +let bytes2 = hex!( + "00010203 04050607" // first half + "08090a0b" /* block comment */ "0c0d0e0f" // second half +); +assert_eq!(bytes1, bytes2); +``` + +Using an unsupported character inside literals will result in a compilation error: +```rust,compile_fail +# use hex_literal::hex; +hex!("АА"); // Cyrillic "А" +hex!("11 22"); // Japanese space +hex!("0123 // Сomments inside literals are not supported"); +``` + +## Minimum Supported Rust Version + +Rust **1.57** or newer. + +In the future, we reserve the right to change MSRV (i.e. MSRV is out-of-scope for this crate's SemVer guarantees), however when we do it will be accompanied by a minor version bump. + +## License + +Licensed under either of: + +* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) +* [MIT license](http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. + +[//]: # (badges) + +[crate-image]: https://img.shields.io/crates/v/hex-literal.svg +[crate-link]: https://crates.io/crates/hex-literal +[docs-image]: https://docs.rs/hex-literal/badge.svg +[docs-link]: https://docs.rs/hex-literal/ +[license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg +[rustc-image]: https://img.shields.io/badge/rustc-1.57+-blue.svg +[build-image]: https://github.com/RustCrypto/utils/actions/workflows/hex-literal.yml/badge.svg +[build-link]: https://github.com/RustCrypto/utils/actions/workflows/hex-literal.yml + +[//]: # (general links) + +[RustCrypto]: https://github.com/RustCrypto diff --git a/hex-literal/src/comments.rs b/hex-literal/src/comments.rs deleted file mode 100644 index 6f8799d2..00000000 --- a/hex-literal/src/comments.rs +++ /dev/null @@ -1,216 +0,0 @@ -//! Provides an Iterator decorator that uses a finite state machine to exclude comments -//! from a string in linear time and constant space. - -use std::iter::Peekable; - -pub(crate) trait Exclude: Sized + Iterator { - fn exclude_comments(self) -> ExcludingComments; -} - -impl> Exclude for T { - fn exclude_comments(self) -> ExcludingComments { - ExcludingComments::new_from_iter(self) - } -} - -pub(crate) struct ExcludingComments> { - state: State, - iter: Peekable, -} - -impl> Iterator for ExcludingComments { - type Item = u8; - - fn next(&mut self) -> Option { - let next_byte = self.next_byte(); - if next_byte.is_none() { - match self.state { - State::BlockComment | State::PotentialBlockCommentEnd => { - panic!("block comment not terminated with */") - } - State::PotentialComment { .. } => panic!("encountered isolated `/`"), - _ => {} - } - } - next_byte - } -} - -/// States of the comment removal machine: -///
-///           Normal
-///            '/'                   
-///      PotentialComment
-///     '/'            '*'
-/// LineComment     BlockComment
-///    '\n'            '*'
-///   Normal      PotentialBlockCommentEnd
-///                    '/'           '_'
-///                   Normal     BlockComment
-/// 
-#[derive(Copy, Clone)] -enum State { - Normal, - PotentialComment, - LineComment, - BlockComment, - PotentialBlockCommentEnd, -} - -impl> ExcludingComments { - fn new_from_iter(iter: I) -> Self { - Self { - state: State::Normal, - iter: iter.peekable(), - } - } - - fn next_byte(&mut self) -> Option { - loop { - let next = self.iter.next()?; - self.state = match (self.state, next) { - (State::Normal, b'/') => State::PotentialComment, - (State::Normal, _) => return Some(next), - (State::PotentialComment, b'/') => State::LineComment, - (State::PotentialComment, b'*') => State::BlockComment, - (State::PotentialComment, _) => panic!("encountered isolated `/`"), - (State::LineComment, b'\n') => { - self.state = State::Normal; - return Some(b'\n'); - } - (State::LineComment, _) => continue, - (State::BlockComment, b'*') => State::PotentialBlockCommentEnd, - (State::BlockComment, _) => continue, - (State::PotentialBlockCommentEnd, b'/') => State::Normal, - (State::PotentialBlockCommentEnd, _) => State::BlockComment, - }; - } - } -} - -#[cfg(test)] -mod tests { - use std::vec::IntoIter; - - use super::*; - - /// Converts the input to an iterator of u8, excludes comments, maps back to char and collects - /// the results. - fn exclude_comments(input: &str) -> String { - let excluding_comments: ExcludingComments> = input - .to_string() - .into_bytes() - .into_iter() - .exclude_comments(); - excluding_comments.map(|b| b as char).collect() - } - - #[test] - fn empty() { - assert!(exclude_comments("").is_empty()); - } - - #[test] - fn single_char() { - assert_eq!(exclude_comments("0"), "0"); - } - - #[test] - fn two_chars() { - assert_eq!(exclude_comments("ab"), "ab"); - } - - #[test] - fn comment() { - assert_eq!(exclude_comments("ab//cd"), "ab"); - } - - #[test] - fn comments_are_ended_by_new_line() { - assert_eq!(exclude_comments("ab//comment\nde"), "ab\nde"); - } - - #[test] - fn new_lines_without_comments() { - assert_eq!(exclude_comments("ab\nde"), "ab\nde"); - } - - #[test] - #[should_panic] - fn panic_on_single_slash() { - exclude_comments("ab/cd"); - } - - #[test] - fn line_comments_on_multiple_lines() { - assert_eq!( - exclude_comments( - " -line 1 //comment 1 -line 2 // comment 2 // comment 3 -line 3 -line 4 // comment 4" - ), - " -line 1 -line 2 -line 3 -line 4 " - ); - } - - #[test] - fn block_comment() { - assert_eq!(exclude_comments("ab/*comment*/12"), "ab12"); - } - - #[test] - fn empty_block_comment() { - assert_eq!(exclude_comments("ab/**/12"), "ab12"); - } - - #[test] - fn block_comment_with_asterisk_and_slash_inside() { - assert_eq!(exclude_comments("ab/*false * asterisk and / */12"), "ab12"); - } - - #[test] - fn block_comment_within_line_comment() { - assert_eq!(exclude_comments("ab// /*comment*/12"), "ab"); - } - - #[test] - #[should_panic(expected = "block comment not terminated with */")] - fn block_comment_not_terminated() { - exclude_comments("ab /*comment"); - } - - #[test] - #[should_panic(expected = "block comment not terminated with */")] - fn block_comment_not_completely_terminated() { - exclude_comments("ab /*comment*"); - } - - #[test] - fn block_and_line_comments_on_multiple_lines() { - assert_eq!( - exclude_comments( - " -line 1 /* comment 1 */ -line /* comment 2 */2 // line comment 1 -line 3 /* some comments -over multiple lines -*/ -line 4 /* more multiline comments -* with leading -* asterisks -*/end// line comment 2" - ), - " -line 1 -line 2 -line 3 -line 4 end" - ); - } -} diff --git a/hex-literal/src/lib.rs b/hex-literal/src/lib.rs index 5399d3f3..4b31b071 100644 --- a/hex-literal/src/lib.rs +++ b/hex-literal/src/lib.rs @@ -1,169 +1,86 @@ -//! This crate provides the `hex!` macro for converting hexadecimal string literals -//! to a byte array at compile time. -//! -//! It accepts the following characters in the input string: -//! -//! - `'0'...'9'`, `'a'...'f'`, `'A'...'F'` — hex characters which will be used -//! in construction of the output byte array -//! - `' '`, `'\r'`, `'\n'`, `'\t'` — formatting characters which will be -//! ignored -//! -//! Additionally it accepts line (`//`) and block (`/* .. */`) comments. Characters -//! inside of those are ignored. -//! -//! # Examples -//! ``` -//! # #[macro_use] extern crate hex_literal; -//! // the macro can be used in const context -//! const DATA: [u8; 4] = hex!("01020304"); -//! # fn main() { -//! assert_eq!(DATA, [1, 2, 3, 4]); -//! -//! // it understands both upper and lower hex values -//! assert_eq!(hex!("a1 b2 c3 d4"), [0xA1, 0xB2, 0xC3, 0xD4]); -//! assert_eq!(hex!("E5 E6 90 92"), [0xE5, 0xE6, 0x90, 0x92]); -//! assert_eq!(hex!("0a0B 0C0d"), [10, 11, 12, 13]); -//! let bytes = hex!(" -//! 00010203 04050607 -//! 08090a0b 0c0d0e0f -//! "); -//! assert_eq!(bytes, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); -//! -//! // it's possible to use several literals (results will be concatenated) -//! let bytes2 = hex!( -//! "00010203 04050607" // first half -//! "08090a0b 0c0d0e0f" // second half -//! ); -//! assert_eq!(bytes2, bytes); -//! -//! // comments can be also included inside literals -//! assert_eq!(hex!("0a0B // 0c0d line comments"), [10, 11]); -//! assert_eq!(hex!("0a0B // line comments -//! 0c0d"), [10, 11, 12, 13]); -//! assert_eq!(hex!("0a0B /* block comments */ 0c0d"), [10, 11, 12, 13]); -//! assert_eq!(hex!("0a0B /* multi-line -//! block comments -//! */ 0c0d"), [10, 11, 12, 13]); -//! # } -//! ``` +#![doc = include_str!("../README.md")] +#![no_std] #![doc( html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg", html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg" )] -mod comments; -extern crate proc_macro; - -use std::vec::IntoIter; - -use proc_macro::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree}; - -use crate::comments::{Exclude, ExcludingComments}; - -/// Strips any outer `Delimiter::None` groups from the input, -/// returning a `TokenStream` consisting of the innermost -/// non-empty-group `TokenTree`. -/// This is used to handle a proc macro being invoked -/// by a `macro_rules!` expansion. -/// See https://github.com/rust-lang/rust/issues/72545 for background -fn ignore_groups(mut input: TokenStream) -> TokenStream { - let mut tokens = input.clone().into_iter(); - loop { - if let Some(TokenTree::Group(group)) = tokens.next() { - if group.delimiter() == Delimiter::None { - input = group.stream(); - continue; - } - } - return input; +const fn next_hex_char(string: &[u8], mut pos: usize) -> Option<(u8, usize)> { + while pos < string.len() { + let raw_val = string[pos]; + pos += 1; + let val = match raw_val { + b'0'..=b'9' => raw_val - 48, + b'A'..=b'F' => raw_val - 55, + b'a'..=b'f' => raw_val - 87, + b' ' | b'\r' | b'\n' | b'\t' => continue, + 0..=127 => panic!("Encountered invalid ASCII character"), + _ => panic!("Encountered non-ASCII character"), + }; + return Some((val, pos)); } + None } -struct TokenTreeIter { - buf: ExcludingComments>, - is_punct: bool, +const fn next_byte(string: &[u8], pos: usize) -> Option<(u8, usize)> { + let (half1, pos) = match next_hex_char(string, pos) { + Some(v) => v, + None => return None, + }; + let (half2, pos) = match next_hex_char(string, pos) { + Some(v) => v, + None => panic!("Odd number of hex characters"), + }; + Some(((half1 << 4) + half2, pos)) } -impl TokenTreeIter { - /// Constructs a new `TokenTreeIter` from a given `proc_macro::Literal`. - /// - /// # Panics - /// This panics if the given `Literal` is not a string literal. - fn new(input: Literal) -> Self { - let mut buf: Vec = input.to_string().into(); - - match buf.as_slice() { - [b'"', .., b'"'] => (), - _ => panic!("expected string literal, got `{}`", input), - }; - buf.pop(); - let mut iter = buf.into_iter().exclude_comments(); - iter.next(); - Self { - buf: iter, - is_punct: false, +/// Compute length of a byte array which will be decoded from the strings. +/// +/// This function is an implementation detail and SHOULD NOT be called directly! +#[doc(hidden)] +pub const fn len(strings: &[&[u8]]) -> usize { + let mut i = 0; + let mut len = 0; + while i < strings.len() { + let mut pos = 0; + while let Some((_, new_pos)) = next_byte(strings[i], pos) { + len += 1; + pos = new_pos; } + i += 1; } + len +} - /// Parses a single hex character (a-f/A-F/0-9) as a `u8` from the `TokenTreeIter`'s - /// internal buffer, ignoring whitespace. - /// - /// # Panics - /// This panics if a non-hex, non-whitespace character is encountered. - fn next_hex_val(&mut self) -> Option { - loop { - let v = self.buf.next()?; - let n = match v { - b'0'..=b'9' => v - 48, - b'A'..=b'F' => v - 55, - b'a'..=b'f' => v - 87, - b' ' | b'\r' | b'\n' | b'\t' => continue, - 0..=127 => panic!("encountered invalid character: `{}`", v as char), - _ => panic!("encountered invalid non-ASCII character"), - }; - return Some(n); +/// Decode hex strings into a byte array of pre-computed length. +/// +/// This function is an implementation detail and SHOULD NOT be called directly! +#[doc(hidden)] +pub const fn decode(strings: &[&[u8]]) -> [u8; LEN] { + let mut i = 0; + let mut buf = [0u8; LEN]; + let mut buf_pos = 0; + while i < strings.len() { + let mut pos = 0; + while let Some((byte, new_pos)) = next_byte(strings[i], pos) { + buf[buf_pos] = byte; + buf_pos += 1; + pos = new_pos; } + i += 1; } -} - -impl Iterator for TokenTreeIter { - type Item = TokenTree; - - /// Produces hex values (as `u8` literals) parsed from the `TokenTreeIter`'s - /// internal buffer, alternating with commas to separate the elements of the - /// generated array of bytes. - /// - /// # Panics - /// This panics if the internal buffer contains an odd number of hex - /// characters. - fn next(&mut self) -> Option { - let v = if self.is_punct { - TokenTree::Punct(Punct::new(',', Spacing::Alone)) - } else { - let p1 = self.next_hex_val()?; - let p2 = match self.next_hex_val() { - Some(v) => v, - None => panic!("expected even number of hex characters"), - }; - let val = (p1 << 4) + p2; - TokenTree::Literal(Literal::u8_suffixed(val)) - }; - self.is_punct = !self.is_punct; - Some(v) + if LEN != buf_pos { + panic!("Length mismatch. Please report this bug."); } + buf } /// Macro for converting sequence of string literals containing hex-encoded data /// into an array of bytes. -#[proc_macro] -pub fn hex(input: TokenStream) -> TokenStream { - let mut out_ts = TokenStream::new(); - for tt in ignore_groups(input) { - let iter = match tt { - TokenTree::Literal(literal) => TokenTreeIter::new(literal), - unexpected => panic!("expected string literal, got `{}`", unexpected), - }; - out_ts.extend(iter); - } - TokenStream::from(TokenTree::Group(Group::new(Delimiter::Bracket, out_ts))) +#[macro_export] +macro_rules! hex { + ($($s:literal)*) => {{ + const STRINGS: &[&'static [u8]] = &[$($s.as_bytes(),)*]; + $crate::decode::<{ $crate::len(STRINGS) }>(STRINGS) + }}; } diff --git a/hex-literal/tests/comments.rs b/hex-literal/tests/comments.rs deleted file mode 100644 index 7ebb7eb7..00000000 --- a/hex-literal/tests/comments.rs +++ /dev/null @@ -1,41 +0,0 @@ -use hex_literal::hex; - -#[test] -fn single_line_comments() { - assert_eq!(hex!("dd 03 // comment"), [0xdd, 0x03]); - assert_eq!( - hex!( - "00 04 f0 // a comment here - 54 fe // another comment" - ), - [0x00, 0x04, 0xf0, 0x54, 0xfe] - ); - assert_eq!( - hex!( - "// initial comment - 01 02" - ), - [0x01, 0x02] - ); -} - -#[test] -fn block_comments() { - assert_eq!( - hex!("00 01 02 /* intervening comment */ 03 04"), - [0x00, 0x01, 0x02, 0x03, 0x04] - ); - assert_eq!(hex!("/* initial comment */ ff df dd"), [0xff, 0xdf, 0xdd]); - assert_eq!( - hex!( - "8f ff 7d /* - comment - on - several - lines - */ - d0 a3" - ), - [0x8f, 0xff, 0x7d, 0xd0, 0xa3] - ); -}