From f3dfe51805c831001ea82dcbbdfac53d35ef8b36 Mon Sep 17 00:00:00 2001
From: Artyom Pavlov <newpavlov@gmail.com>
Date: Sun, 2 Apr 2023 02:21:12 +0000
Subject: [PATCH] hex-literal: remove support for comments inside literals and
 migrate to CTFE (#816)

---
 .github/workflows/hex-literal.yml |   4 +-
 hex-literal/CHANGELOG.md          |   8 ++
 hex-literal/Cargo.toml            |  10 +-
 hex-literal/README.md             |  84 ++++++++++++
 hex-literal/src/comments.rs       | 216 ------------------------------
 hex-literal/src/lib.rs            | 213 +++++++++--------------------
 hex-literal/tests/comments.rs     |  41 ------
 7 files changed, 163 insertions(+), 413 deletions(-)
 create mode 100644 hex-literal/README.md
 delete mode 100644 hex-literal/src/comments.rs
 delete mode 100644 hex-literal/tests/comments.rs

diff --git a/.github/workflows/hex-literal.yml b/.github/workflows/hex-literal.yml
index 133e36ef..dc77aeb2 100644
--- a/.github/workflows/hex-literal.yml
+++ b/.github/workflows/hex-literal.yml
@@ -22,7 +22,7 @@ jobs:
     strategy:
       matrix:
         rust:
-          - 1.45.0 # MSRV
+          - 1.57.0 # MSRV
           - stable
         target:
           - thumbv7em-none-eabi
@@ -48,7 +48,7 @@ jobs:
     strategy:
       matrix:
         rust:
-          - 1.45.0 # MSRV
+          - 1.57.0 # MSRV
           - stable
     steps:
       - uses: actions/checkout@v3
diff --git a/hex-literal/CHANGELOG.md b/hex-literal/CHANGELOG.md
index 1c72f15b..338be64b 100644
--- a/hex-literal/CHANGELOG.md
+++ b/hex-literal/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## UNRELEASED
+### Changed
+- Disallow comments inside hex strings ([#816])
+- Migrate to 2021 edition and bump MSRV to 1.57 ([#816])
+- Use CTFE instead of proc macro ([#816])
+
+[#816]: https://github.com/RustCrypto/utils/pull/816
+
 ## 0.3.4 (2021-11-11)
 ### Changed
 - Provide more info in `panic!` messages ([#664])
diff --git a/hex-literal/Cargo.toml b/hex-literal/Cargo.toml
index e3d36833..b704b6b9 100644
--- a/hex-literal/Cargo.toml
+++ b/hex-literal/Cargo.toml
@@ -3,11 +3,9 @@ name = "hex-literal"
 version = "0.3.4"
 authors = ["RustCrypto Developers"]
 license = "MIT OR Apache-2.0"
-description = "Procedural macro for converting hexadecimal string to byte array at compile time."
+description = "Macro for converting hexadecimal string to a byte array at compile time"
 documentation = "https://docs.rs/hex-literal"
 repository = "https://github.com/RustCrypto/utils"
-keywords = ["hex", "proc-macro", "literals"]
-edition = "2018"
-
-[lib]
-proc-macro = true
+keywords = ["hex", "literals"]
+edition = "2021"
+rust-version = "1.57"
diff --git a/hex-literal/README.md b/hex-literal/README.md
new file mode 100644
index 00000000..72d527e4
--- /dev/null
+++ b/hex-literal/README.md
@@ -0,0 +1,84 @@
+# [RustCrypto]: hex-literal
+
+[![Crate][crate-image]][crate-link]
+[![Docs][docs-image]][docs-link]
+![Apache 2.0/MIT Licensed][license-image]
+![MSRV][rustc-image]
+[![Build Status][build-image]][build-link]
+
+This crate provides the `hex!` macro for converting hexadecimal string literals to a byte array at compile time.
+
+It accepts the following characters in the input string:
+
+- `'0'...'9'`, `'a'...'f'`, `'A'...'F'` — hex characters which will be used in construction of the output byte array
+- `' '`, `'\r'`, `'\n'`, `'\t'` — formatting characters which will be ignored
+
+# Examples
+```rust
+use hex_literal::hex;
+
+// The macro can be used in const contexts
+const DATA: [u8; 4] = hex!("01020304");
+assert_eq!(DATA, [1, 2, 3, 4]);
+
+// Both upper and lower hex values are supported
+assert_eq!(hex!("a1 b2 c3 d4"), [0xA1, 0xB2, 0xC3, 0xD4]);
+assert_eq!(hex!("E5 E6 90 92"), [0xE5, 0xE6, 0x90, 0x92]);
+assert_eq!(hex!("0a0B 0C0d"), [10, 11, 12, 13]);
+
+// Multi-line literals
+let bytes1 = hex!("
+    00010203 04050607
+    08090a0b 0c0d0e0f
+");
+assert_eq!(bytes1, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+
+// It's possible to use several literals (results will be concatenated)
+let bytes2 = hex!(
+    "00010203 04050607" // first half
+    "08090a0b" /* block comment */ "0c0d0e0f" // second half
+);
+assert_eq!(bytes1, bytes2);
+```
+
+Using an unsupported character inside literals will result in a compilation error:
+```rust,compile_fail
+# use hex_literal::hex;
+hex!("АА"); // Cyrillic "А"
+hex!("11　22"); // Japanese space
+hex!("0123 // Сomments inside literals are not supported");
+```
+
+## Minimum Supported Rust Version
+
+Rust **1.57** or newer.
+
+In the future, we reserve the right to change MSRV (i.e. MSRV is out-of-scope for this crate's SemVer guarantees), however when we do it will be accompanied by a minor version bump.
+
+## License
+
+Licensed under either of:
+
+* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
+* [MIT license](http://opensource.org/licenses/MIT)
+
+at your option.
+
+### Contribution
+
+Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
+
+[//]: # (badges)
+
+[crate-image]: https://img.shields.io/crates/v/hex-literal.svg
+[crate-link]: https://crates.io/crates/hex-literal
+[docs-image]: https://docs.rs/hex-literal/badge.svg
+[docs-link]: https://docs.rs/hex-literal/
+[license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg
+[rustc-image]: https://img.shields.io/badge/rustc-1.57+-blue.svg
+[build-image]: https://github.com/RustCrypto/utils/actions/workflows/hex-literal.yml/badge.svg
+[build-link]: https://github.com/RustCrypto/utils/actions/workflows/hex-literal.yml
+
+[//]: # (general links)
+
+[RustCrypto]: https://github.com/RustCrypto
diff --git a/hex-literal/src/comments.rs b/hex-literal/src/comments.rs
deleted file mode 100644
index 6f8799d2..00000000
--- a/hex-literal/src/comments.rs
+++ /dev/null
@@ -1,216 +0,0 @@
-//! Provides an Iterator<Item=u8> decorator that uses a finite state machine to exclude comments
-//! from a string in linear time and constant space.
-
-use std::iter::Peekable;
-
-pub(crate) trait Exclude: Sized + Iterator<Item = u8> {
-    fn exclude_comments(self) -> ExcludingComments<Self>;
-}
-
-impl<T: Iterator<Item = u8>> Exclude for T {
-    fn exclude_comments(self) -> ExcludingComments<T> {
-        ExcludingComments::new_from_iter(self)
-    }
-}
-
-pub(crate) struct ExcludingComments<I: Iterator<Item = u8>> {
-    state: State,
-    iter: Peekable<I>,
-}
-
-impl<I: Iterator<Item = u8>> Iterator for ExcludingComments<I> {
-    type Item = u8;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let next_byte = self.next_byte();
-        if next_byte.is_none() {
-            match self.state {
-                State::BlockComment | State::PotentialBlockCommentEnd => {
-                    panic!("block comment not terminated with */")
-                }
-                State::PotentialComment { .. } => panic!("encountered isolated `/`"),
-                _ => {}
-            }
-        }
-        next_byte
-    }
-}
-
-/// States of the comment removal machine:
-/// <pre>
-///           Normal
-///            '/'                   
-///      PotentialComment
-///     '/'            '*'
-/// LineComment     BlockComment
-///    '\n'            '*'
-///   Normal      PotentialBlockCommentEnd
-///                    '/'           '_'
-///                   Normal     BlockComment
-/// </pre>  
-#[derive(Copy, Clone)]
-enum State {
-    Normal,
-    PotentialComment,
-    LineComment,
-    BlockComment,
-    PotentialBlockCommentEnd,
-}
-
-impl<I: Iterator<Item = u8>> ExcludingComments<I> {
-    fn new_from_iter(iter: I) -> Self {
-        Self {
-            state: State::Normal,
-            iter: iter.peekable(),
-        }
-    }
-
-    fn next_byte(&mut self) -> Option<u8> {
-        loop {
-            let next = self.iter.next()?;
-            self.state = match (self.state, next) {
-                (State::Normal, b'/') => State::PotentialComment,
-                (State::Normal, _) => return Some(next),
-                (State::PotentialComment, b'/') => State::LineComment,
-                (State::PotentialComment, b'*') => State::BlockComment,
-                (State::PotentialComment, _) => panic!("encountered isolated `/`"),
-                (State::LineComment, b'\n') => {
-                    self.state = State::Normal;
-                    return Some(b'\n');
-                }
-                (State::LineComment, _) => continue,
-                (State::BlockComment, b'*') => State::PotentialBlockCommentEnd,
-                (State::BlockComment, _) => continue,
-                (State::PotentialBlockCommentEnd, b'/') => State::Normal,
-                (State::PotentialBlockCommentEnd, _) => State::BlockComment,
-            };
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::vec::IntoIter;
-
-    use super::*;
-
-    /// Converts the input to an iterator of u8, excludes comments, maps back to char and collects
-    /// the results.
-    fn exclude_comments(input: &str) -> String {
-        let excluding_comments: ExcludingComments<IntoIter<u8>> = input
-            .to_string()
-            .into_bytes()
-            .into_iter()
-            .exclude_comments();
-        excluding_comments.map(|b| b as char).collect()
-    }
-
-    #[test]
-    fn empty() {
-        assert!(exclude_comments("").is_empty());
-    }
-
-    #[test]
-    fn single_char() {
-        assert_eq!(exclude_comments("0"), "0");
-    }
-
-    #[test]
-    fn two_chars() {
-        assert_eq!(exclude_comments("ab"), "ab");
-    }
-
-    #[test]
-    fn comment() {
-        assert_eq!(exclude_comments("ab//cd"), "ab");
-    }
-
-    #[test]
-    fn comments_are_ended_by_new_line() {
-        assert_eq!(exclude_comments("ab//comment\nde"), "ab\nde");
-    }
-
-    #[test]
-    fn new_lines_without_comments() {
-        assert_eq!(exclude_comments("ab\nde"), "ab\nde");
-    }
-
-    #[test]
-    #[should_panic]
-    fn panic_on_single_slash() {
-        exclude_comments("ab/cd");
-    }
-
-    #[test]
-    fn line_comments_on_multiple_lines() {
-        assert_eq!(
-            exclude_comments(
-                "
-line 1 //comment 1
-line 2 // comment 2 // comment 3
-line 3
-line 4 // comment 4"
-            ),
-            "
-line 1 
-line 2 
-line 3
-line 4 "
-        );
-    }
-
-    #[test]
-    fn block_comment() {
-        assert_eq!(exclude_comments("ab/*comment*/12"), "ab12");
-    }
-
-    #[test]
-    fn empty_block_comment() {
-        assert_eq!(exclude_comments("ab/**/12"), "ab12");
-    }
-
-    #[test]
-    fn block_comment_with_asterisk_and_slash_inside() {
-        assert_eq!(exclude_comments("ab/*false * asterisk and / */12"), "ab12");
-    }
-
-    #[test]
-    fn block_comment_within_line_comment() {
-        assert_eq!(exclude_comments("ab// /*comment*/12"), "ab");
-    }
-
-    #[test]
-    #[should_panic(expected = "block comment not terminated with */")]
-    fn block_comment_not_terminated() {
-        exclude_comments("ab /*comment");
-    }
-
-    #[test]
-    #[should_panic(expected = "block comment not terminated with */")]
-    fn block_comment_not_completely_terminated() {
-        exclude_comments("ab /*comment*");
-    }
-
-    #[test]
-    fn block_and_line_comments_on_multiple_lines() {
-        assert_eq!(
-            exclude_comments(
-                "
-line 1 /* comment 1 */
-line /* comment 2 */2 // line comment 1
-line 3 /* some comments
-over multiple lines
-*/
-line 4 /* more multiline comments
-* with leading
-* asterisks
-*/end// line comment 2"
-            ),
-            "
-line 1 
-line 2 
-line 3 
-line 4 end"
-        );
-    }
-}
diff --git a/hex-literal/src/lib.rs b/hex-literal/src/lib.rs
index 5399d3f3..4b31b071 100644
--- a/hex-literal/src/lib.rs
+++ b/hex-literal/src/lib.rs
@@ -1,169 +1,86 @@
-//! This crate provides the `hex!` macro for converting hexadecimal string literals
-//! to a byte array at compile time.
-//!
-//! It accepts the following characters in the input string:
-//!
-//! - `'0'...'9'`, `'a'...'f'`, `'A'...'F'` — hex characters which will be used
-//!     in construction of the output byte array
-//! - `' '`, `'\r'`, `'\n'`, `'\t'` — formatting characters which will be
-//!     ignored
-//!
-//! Additionally it accepts line (`//`) and block (`/* .. */`) comments. Characters
-//! inside of those are ignored.
-//!
-//! # Examples
-//! ```
-//! # #[macro_use] extern crate hex_literal;
-//! // the macro can be used in const context
-//! const DATA: [u8; 4] = hex!("01020304");
-//! # fn main() {
-//! assert_eq!(DATA, [1, 2, 3, 4]);
-//!
-//! // it understands both upper and lower hex values
-//! assert_eq!(hex!("a1 b2 c3 d4"), [0xA1, 0xB2, 0xC3, 0xD4]);
-//! assert_eq!(hex!("E5 E6 90 92"), [0xE5, 0xE6, 0x90, 0x92]);
-//! assert_eq!(hex!("0a0B 0C0d"), [10, 11, 12, 13]);
-//! let bytes = hex!("
-//!     00010203 04050607
-//!     08090a0b 0c0d0e0f
-//! ");
-//! assert_eq!(bytes, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-//!
-//! // it's possible to use several literals (results will be concatenated)
-//! let bytes2 = hex!(
-//!     "00010203 04050607" // first half
-//!     "08090a0b 0c0d0e0f" // second half
-//! );
-//! assert_eq!(bytes2, bytes);
-//!
-//! // comments can be also included inside literals
-//! assert_eq!(hex!("0a0B // 0c0d line comments"), [10, 11]);
-//! assert_eq!(hex!("0a0B // line comments
-//!                  0c0d"), [10, 11, 12, 13]);
-//! assert_eq!(hex!("0a0B /* block comments */ 0c0d"), [10, 11, 12, 13]);
-//! assert_eq!(hex!("0a0B /* multi-line
-//!                          block comments
-//!                       */ 0c0d"), [10, 11, 12, 13]);
-//! # }
-//! ```
+#![doc = include_str!("../README.md")]
+#![no_std]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
     html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg"
 )]
 
-mod comments;
-extern crate proc_macro;
-
-use std::vec::IntoIter;
-
-use proc_macro::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree};
-
-use crate::comments::{Exclude, ExcludingComments};
-
-/// Strips any outer `Delimiter::None` groups from the input,
-/// returning a `TokenStream` consisting of the innermost
-/// non-empty-group `TokenTree`.
-/// This is used to handle a proc macro being invoked
-/// by a `macro_rules!` expansion.
-/// See https://github.com/rust-lang/rust/issues/72545 for background
-fn ignore_groups(mut input: TokenStream) -> TokenStream {
-    let mut tokens = input.clone().into_iter();
-    loop {
-        if let Some(TokenTree::Group(group)) = tokens.next() {
-            if group.delimiter() == Delimiter::None {
-                input = group.stream();
-                continue;
-            }
-        }
-        return input;
+const fn next_hex_char(string: &[u8], mut pos: usize) -> Option<(u8, usize)> {
+    while pos < string.len() {
+        let raw_val = string[pos];
+        pos += 1;
+        let val = match raw_val {
+            b'0'..=b'9' => raw_val - 48,
+            b'A'..=b'F' => raw_val - 55,
+            b'a'..=b'f' => raw_val - 87,
+            b' ' | b'\r' | b'\n' | b'\t' => continue,
+            0..=127 => panic!("Encountered invalid ASCII character"),
+            _ => panic!("Encountered non-ASCII character"),
+        };
+        return Some((val, pos));
     }
+    None
 }
 
-struct TokenTreeIter {
-    buf: ExcludingComments<IntoIter<u8>>,
-    is_punct: bool,
+const fn next_byte(string: &[u8], pos: usize) -> Option<(u8, usize)> {
+    let (half1, pos) = match next_hex_char(string, pos) {
+        Some(v) => v,
+        None => return None,
+    };
+    let (half2, pos) = match next_hex_char(string, pos) {
+        Some(v) => v,
+        None => panic!("Odd number of hex characters"),
+    };
+    Some(((half1 << 4) + half2, pos))
 }
 
-impl TokenTreeIter {
-    /// Constructs a new `TokenTreeIter` from a given `proc_macro::Literal`.
-    ///
-    /// # Panics
-    /// This panics if the given `Literal` is not a string literal.
-    fn new(input: Literal) -> Self {
-        let mut buf: Vec<u8> = input.to_string().into();
-
-        match buf.as_slice() {
-            [b'"', .., b'"'] => (),
-            _ => panic!("expected string literal, got `{}`", input),
-        };
-        buf.pop();
-        let mut iter = buf.into_iter().exclude_comments();
-        iter.next();
-        Self {
-            buf: iter,
-            is_punct: false,
+/// Compute length of a byte array which will be decoded from the strings.
+///
+/// This function is an implementation detail and SHOULD NOT be called directly!
+#[doc(hidden)]
+pub const fn len(strings: &[&[u8]]) -> usize {
+    let mut i = 0;
+    let mut len = 0;
+    while i < strings.len() {
+        let mut pos = 0;
+        while let Some((_, new_pos)) = next_byte(strings[i], pos) {
+            len += 1;
+            pos = new_pos;
         }
+        i += 1;
     }
+    len
+}
 
-    /// Parses a single hex character (a-f/A-F/0-9) as a `u8` from the `TokenTreeIter`'s
-    /// internal buffer, ignoring whitespace.
-    ///
-    /// # Panics
-    /// This panics if a non-hex, non-whitespace character is encountered.
-    fn next_hex_val(&mut self) -> Option<u8> {
-        loop {
-            let v = self.buf.next()?;
-            let n = match v {
-                b'0'..=b'9' => v - 48,
-                b'A'..=b'F' => v - 55,
-                b'a'..=b'f' => v - 87,
-                b' ' | b'\r' | b'\n' | b'\t' => continue,
-                0..=127 => panic!("encountered invalid character: `{}`", v as char),
-                _ => panic!("encountered invalid non-ASCII character"),
-            };
-            return Some(n);
+/// Decode hex strings into a byte array of pre-computed length.
+///
+/// This function is an implementation detail and SHOULD NOT be called directly!
+#[doc(hidden)]
+pub const fn decode<const LEN: usize>(strings: &[&[u8]]) -> [u8; LEN] {
+    let mut i = 0;
+    let mut buf = [0u8; LEN];
+    let mut buf_pos = 0;
+    while i < strings.len() {
+        let mut pos = 0;
+        while let Some((byte, new_pos)) = next_byte(strings[i], pos) {
+            buf[buf_pos] = byte;
+            buf_pos += 1;
+            pos = new_pos;
         }
+        i += 1;
     }
-}
-
-impl Iterator for TokenTreeIter {
-    type Item = TokenTree;
-
-    /// Produces hex values (as `u8` literals) parsed from the `TokenTreeIter`'s
-    /// internal buffer, alternating with commas to separate the elements of the
-    /// generated array of bytes.
-    ///
-    /// # Panics
-    /// This panics if the internal buffer contains an odd number of hex
-    /// characters.
-    fn next(&mut self) -> Option<TokenTree> {
-        let v = if self.is_punct {
-            TokenTree::Punct(Punct::new(',', Spacing::Alone))
-        } else {
-            let p1 = self.next_hex_val()?;
-            let p2 = match self.next_hex_val() {
-                Some(v) => v,
-                None => panic!("expected even number of hex characters"),
-            };
-            let val = (p1 << 4) + p2;
-            TokenTree::Literal(Literal::u8_suffixed(val))
-        };
-        self.is_punct = !self.is_punct;
-        Some(v)
+    if LEN != buf_pos {
+        panic!("Length mismatch. Please report this bug.");
     }
+    buf
 }
 
 /// Macro for converting sequence of string literals containing hex-encoded data
 /// into an array of bytes.
-#[proc_macro]
-pub fn hex(input: TokenStream) -> TokenStream {
-    let mut out_ts = TokenStream::new();
-    for tt in ignore_groups(input) {
-        let iter = match tt {
-            TokenTree::Literal(literal) => TokenTreeIter::new(literal),
-            unexpected => panic!("expected string literal, got `{}`", unexpected),
-        };
-        out_ts.extend(iter);
-    }
-    TokenStream::from(TokenTree::Group(Group::new(Delimiter::Bracket, out_ts)))
+#[macro_export]
+macro_rules! hex {
+    ($($s:literal)*) => {{
+        const STRINGS: &[&'static [u8]] = &[$($s.as_bytes(),)*];
+        $crate::decode::<{ $crate::len(STRINGS) }>(STRINGS)
+    }};
 }
diff --git a/hex-literal/tests/comments.rs b/hex-literal/tests/comments.rs
deleted file mode 100644
index 7ebb7eb7..00000000
--- a/hex-literal/tests/comments.rs
+++ /dev/null
@@ -1,41 +0,0 @@
-use hex_literal::hex;
-
-#[test]
-fn single_line_comments() {
-    assert_eq!(hex!("dd 03 // comment"), [0xdd, 0x03]);
-    assert_eq!(
-        hex!(
-            "00 04 f0 // a comment here
-            54 fe // another comment"
-        ),
-        [0x00, 0x04, 0xf0, 0x54, 0xfe]
-    );
-    assert_eq!(
-        hex!(
-            "// initial comment
-            01 02"
-        ),
-        [0x01, 0x02]
-    );
-}
-
-#[test]
-fn block_comments() {
-    assert_eq!(
-        hex!("00 01 02 /* intervening comment */ 03 04"),
-        [0x00, 0x01, 0x02, 0x03, 0x04]
-    );
-    assert_eq!(hex!("/* initial comment */ ff df dd"), [0xff, 0xdf, 0xdd]);
-    assert_eq!(
-        hex!(
-            "8f ff 7d /*
-            comment
-            on
-            several
-            lines
-            */
-            d0 a3"
-        ),
-        [0x8f, 0xff, 0x7d, 0xd0, 0xa3]
-    );
-}