Skip to content

Commit

Permalink
Expose the substitute method of pcre as Regex::substitute and
Browse files Browse the repository at this point in the history
Regex::substitute_all.
  • Loading branch information
navneetankur committed Oct 4, 2024
1 parent df0aff5 commit 2f9da67
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 0 deletions.
76 changes: 76 additions & 0 deletions src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,44 @@ impl Regex {
) -> CaptureMatches<'r, 's> {
CaptureMatches { re: self, subject, last_end: 0, last_match: None }
}

/// Replaces the first match in `subject` with the `replacement`,
/// and puts the replaced string in `output`.
/// ```rust
/// # fn example() -> Result<(), ::pcre2::Error> {
/// use std::str;
///
/// use pcre2::bytes::Regex;
///
/// let re = Regex::new(r"mike")?;
/// let text = b"Hi mike, wait you are not mike.";
/// let mut output = Vec::new();
/// re.substitute(text, b"john", &mut output).unwrap();
/// assert_eq!(&output, b"Hi john, wait you are not mike.");
/// # Ok(()) }; example().unwrap()
/// ```
pub fn substitute(&self, subject: &[u8], replacement: &[u8], output: &mut Vec<u8>) -> Result<usize, Error>{
self.code.substitute(subject, replacement, output, 0)
}
/// Replaces all the matches in `subject` with the `replacement`,
/// and puts the replaced string in `output`.
/// ```rust
/// # fn example() -> Result<(), ::pcre2::Error> {
/// use std::str;
///
/// use pcre2::bytes::Regex;
///
/// let re = Regex::new(r"mike")?;
/// let text = b"Hi mike, wait you are not mike.";
/// let mut output = Vec::new();
/// re.substitute_all(text, b"john", &mut output).unwrap();
/// assert_eq!(&output, b"Hi john, wait you are not john.");
/// # Ok(()) }; example().unwrap()
/// ```
pub fn substitute_all(&self, subject: &[u8], replacement: &[u8], output: &mut Vec<u8>) -> Result<usize, Error>{
self.code.substitute(subject, replacement, output, pcre2_sys::PCRE2_SUBSTITUTE_GLOBAL)
}

}

/// Advanced or "lower level" search methods.
Expand Down Expand Up @@ -1370,4 +1408,42 @@ mod tests {
let matched = re.find(hay.as_bytes()).unwrap().unwrap();
assert_eq!(matched.as_bytes(), "😀👍🏼🎉".as_bytes());
}
#[test]
fn test_substitute() {
let hay = "0123456789😀👍🏼🎉abcdefghijklmnopqrst😀👍🏼🎉auvwxyzABCKLMNOPQRSTUVWXYZ";
let pattern = r"(*UTF)
(?x) (?#: Allow comments and whitespace.)
[^\N{U+0000}-\N{U+007F}] (?#: Non-ascii code points.)
+ (?#: One or more times.)
";
let re = RegexBuilder::new()
.extended(true)
.utf(true)
.jit_if_available(true)
.build(pattern)
.unwrap();
let mut output = Vec::new();
re.substitute(hay.as_bytes(), b"42", &mut output).unwrap();
assert_eq!(&output, "012345678942abcdefghijklmnopqrst😀👍🏼🎉auvwxyzABCKLMNOPQRSTUVWXYZ".as_bytes());
}
#[test]
fn test_substitute_all() {
let hay = "0123456789😀👍🏼🎉abcdefghijklmnopqrst😀👍🏼🎉auvwxyzABCKLMNOPQRSTUVWXYZ";
let pattern = r"(*UTF)
(?x) (?#: Allow comments and whitespace.)
[^\N{U+0000}-\N{U+007F}] (?#: Non-ascii code points.)
+ (?#: One or more times.)
";
let re = RegexBuilder::new()
.extended(true)
.utf(true)
.jit_if_available(true)
.build(pattern)
.unwrap();
let mut output = Vec::new();
re.substitute_all(hay.as_bytes(), b"42", &mut output).unwrap();
assert_eq!(&output, "012345678942abcdefghijklmnopqrst42auvwxyzABCKLMNOPQRSTUVWXYZ".as_bytes());
}
}
48 changes: 48 additions & 0 deletions src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,54 @@ impl Code {
Ok(1 + count as usize)
}
}

/// Substitute the replacement pattern in subject and put the output in
/// output vec. Output vec is will be cleared before use.
pub(crate) fn substitute(&self, subject: &[u8], replacement: &[u8], output: &mut Vec<u8>, options: u32) -> Result<usize, Error>{
output.clear();
let mut output_length = output.capacity();
let mut rc = unsafe {
pcre2_substitute_8(
self.as_ptr(),
subject.as_ptr(),
subject.len(),
0, //startoffset
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | options,
ptr::null_mut(), //match_data
ptr::null_mut(), //match context
replacement.as_ptr(),
replacement.len(),
output.as_mut_ptr(),
&mut output_length,
)
};
if rc == PCRE2_ERROR_NOMEMORY {
output.reserve_exact(output_length + 1);
rc = unsafe {
pcre2_substitute_8(
self.as_ptr(),
subject.as_ptr(),
subject.len(),
0, //startoffset
options,
ptr::null_mut(), //match_data
ptr::null_mut(), //match context
replacement.as_ptr(),
replacement.len(),
output.as_mut_ptr(),
&mut output_length,
)
}
}
if rc < 0 {
Err(Error::info(rc))
} else {
// Safety: pcre2_substitute_8 method above would set this
// field correctly.
unsafe { output.set_len(output_length) };
Ok(rc as usize)
}
}
}

/// A low level representation of PCRE2's compilation context.
Expand Down

0 comments on commit 2f9da67

Please sign in to comment.