From 2fe93efdaf41efc74d63c3f9f3c97eb62d6e3ed3 Mon Sep 17 00:00:00 2001 From: Navneet Aman Date: Fri, 4 Oct 2024 10:54:05 +0530 Subject: [PATCH] Expose the substitute method of pcre as Regex::substitute and Regex::substitute_all. --- src/bytes.rs | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ffi.rs | 54 +++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/src/bytes.rs b/src/bytes.rs index 2129119..02755fa 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -588,6 +588,58 @@ impl Regex { ) -> CaptureMatches<'r, 's> { CaptureMatches { re: self, subject, last_end: 0, last_match: None } } + + /// Replaces the first match in `subject` with the `replacement`, + /// and puts the replaced string in `output`. + /// ```rust + /// # fn example() -> Result<(), ::pcre2::Error> { + /// use std::str; + /// + /// use pcre2::bytes::Regex; + /// + /// let re = Regex::new(r"mike")?; + /// let text = b"Hi mike, wait you are not mike."; + /// let mut output = Vec::new(); + /// re.substitute(text, b"john", &mut output).unwrap(); + /// assert_eq!(&output, b"Hi john, wait you are not mike."); + /// # Ok(()) }; example().unwrap() + /// ``` + pub fn substitute( + &self, + subject: &[u8], + replacement: &[u8], + output: &mut Vec, + ) -> Result { + self.code.substitute(subject, replacement, output, 0) + } + /// Replaces all the matches in `subject` with the `replacement`, + /// and puts the replaced string in `output`. + /// ```rust + /// # fn example() -> Result<(), ::pcre2::Error> { + /// use std::str; + /// + /// use pcre2::bytes::Regex; + /// + /// let re = Regex::new(r"mike")?; + /// let text = b"Hi mike, wait you are not mike."; + /// let mut output = Vec::new(); + /// re.substitute_all(text, b"john", &mut output).unwrap(); + /// assert_eq!(&output, b"Hi john, wait you are not john."); + /// # Ok(()) }; example().unwrap() + /// ``` + pub fn substitute_all( + &self, + subject: &[u8], + replacement: &[u8], + output: &mut Vec, + ) -> Result { + self.code.substitute( + subject, + replacement, + output, + pcre2_sys::PCRE2_SUBSTITUTE_GLOBAL, + ) + } } /// Advanced or "lower level" search methods. @@ -1370,4 +1422,28 @@ mod tests { let matched = re.find(hay.as_bytes()).unwrap().unwrap(); assert_eq!(matched.as_bytes(), "πŸ˜€πŸ‘πŸΌπŸŽ‰".as_bytes()); } + #[test] + fn test_substitute() { + let hay = "0123456789abcdefghijklmnopqrstuvwxyzABCDKLMNOPQRSTUVWXYZ"; + let pattern = r"(?i)abcd"; + let re = Regex::new(pattern).unwrap(); + let mut output = Vec::new(); + re.substitute(hay.as_bytes(), b"42", &mut output).unwrap(); + assert_eq!( + &output, + b"012345678942efghijklmnopqrstuvwxyzABCDKLMNOPQRSTUVWXYZ" + ); + } + #[test] + fn test_substitute_all() { + let hay = "0123456789abcdefghijklmnopqrstuvwxyzABCDKLMNOPQRSTUVWXYZ"; + let pattern = r"(?i)abcd"; + let re = Regex::new(pattern).unwrap(); + let mut output = Vec::new(); + re.substitute_all(hay.as_bytes(), b"42", &mut output).unwrap(); + assert_eq!( + &output, + b"012345678942efghijklmnopqrstuvwxyz42KLMNOPQRSTUVWXYZ" + ); + } } diff --git a/src/ffi.rs b/src/ffi.rs index aaabf74..03482d6 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -258,6 +258,60 @@ impl Code { Ok(1 + count as usize) } } + + /// Substitute the replacement pattern in subject and put the output in + /// output vec. Output vec is will be cleared before use. + pub(crate) fn substitute( + &self, + subject: &[u8], + replacement: &[u8], + output: &mut Vec, + options: u32, + ) -> Result { + output.clear(); + let mut output_length = output.capacity(); + let mut rc = unsafe { + pcre2_substitute_8( + self.as_ptr(), + subject.as_ptr(), + subject.len(), + 0, //startoffset + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | options, + ptr::null_mut(), //match_data + ptr::null_mut(), //match context + replacement.as_ptr(), + replacement.len(), + output.as_mut_ptr(), + &mut output_length, + ) + }; + if rc == PCRE2_ERROR_NOMEMORY { + output.reserve_exact(output_length + 1); + rc = unsafe { + pcre2_substitute_8( + self.as_ptr(), + subject.as_ptr(), + subject.len(), + 0, //startoffset + options, + ptr::null_mut(), //match_data + ptr::null_mut(), //match context + replacement.as_ptr(), + replacement.len(), + output.as_mut_ptr(), + &mut output_length, + ) + } + } + if rc < 0 { + Err(Error::info(rc)) + } else { + // Safety: pcre2_substitute_8 method above would set this + // field correctly. + unsafe { output.set_len(output_length) }; + Ok(rc as usize) + } + } } /// A low level representation of PCRE2's compilation context.