From d250387cabf8056502f82a6cfb6d084c9749796d Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sat, 5 Oct 2024 19:47:36 +0100 Subject: [PATCH 1/2] [parser] Add configurable strategy for handling odd length data elements - Add OddLengthStrategy as a data set reader option - Add DataSetReader::new_with_ts_options shortcut function - [object] extend option to file reading --- object/src/file.rs | 14 +++ object/src/mem.rs | 40 ++++++- parser/src/dataset/read.rs | 224 +++++++++++++++++++++++++++++++++---- 3 files changed, 254 insertions(+), 24 deletions(-) diff --git a/object/src/file.rs b/object/src/file.rs index 90bf339be..9748d653c 100644 --- a/object/src/file.rs +++ b/object/src/file.rs @@ -3,6 +3,9 @@ use dicom_dictionary_std::StandardDataDictionary; use dicom_encoding::transfer_syntax::TransferSyntaxIndex; use dicom_transfer_syntax_registry::TransferSyntaxRegistry; +// re-export from dicom_parser +pub use dicom_parser::dataset::read::OddLengthStrategy; + use crate::{DefaultDicomObject, ReadError}; use std::io::Read; use std::path::Path; @@ -58,6 +61,7 @@ pub struct OpenFileOptions, read_preamble: ReadPreamble, + odd_length: OddLengthStrategy, } impl OpenFileOptions { @@ -92,6 +96,12 @@ impl OpenFileOptions { self } + /// Set how data elements with an odd length should be handled. + pub fn odd_length_strategy(mut self, option: OddLengthStrategy) -> Self { + self.odd_length = option; + self + } + /// Set the transfer syntax index to use when reading the file. pub fn tranfer_syntax_index(self, ts_index: Tr) -> OpenFileOptions where @@ -102,6 +112,7 @@ impl OpenFileOptions { read_until: self.read_until, read_preamble: self.read_preamble, ts_index, + odd_length: self.odd_length, } } @@ -116,6 +127,7 @@ impl OpenFileOptions { read_until: self.read_until, read_preamble: self.read_preamble, ts_index: self.ts_index, + odd_length: self.odd_length, } } @@ -133,6 +145,7 @@ impl OpenFileOptions { self.ts_index, self.read_until, self.read_preamble, + self.odd_length, ) } @@ -154,6 +167,7 @@ impl OpenFileOptions { self.ts_index, self.read_until, self.read_preamble, + self.odd_length, ) } } diff --git a/object/src/mem.rs b/object/src/mem.rs index 2992b8b09..90b37b4dd 100644 --- a/object/src/mem.rs +++ b/object/src/mem.rs @@ -39,6 +39,7 @@ use dicom_core::ops::{ ApplyOp, AttributeAction, AttributeOp, AttributeSelector, AttributeSelectorStep, }; +use dicom_parser::dataset::read::{DataSetReaderOptions, OddLengthStrategy}; use itertools::Itertools; use smallvec::SmallVec; use snafu::{ensure, OptionExt, ResultExt}; @@ -309,7 +310,14 @@ where P: AsRef, R: TransferSyntaxIndex, { - Self::open_file_with_all_options(path, dict, ts_index, None, ReadPreamble::Auto) + Self::open_file_with_all_options( + path, + dict, + ts_index, + None, + ReadPreamble::Auto, + Default::default(), + ) } // detect the presence of a preamble @@ -343,6 +351,7 @@ where ts_index: R, read_until: Option, mut read_preamble: ReadPreamble, + odd_length: OddLengthStrategy, ) -> Result where P: AsRef, @@ -369,7 +378,15 @@ where // read rest of data according to metadata, feed it to object if let Some(ts) = ts_index.get(&meta.transfer_syntax) { - let mut dataset = DataSetReader::new_with_ts(file, ts).context(CreateParserSnafu)?; + let mut options = DataSetReaderOptions::default(); + options.odd_length = odd_length; + let mut dataset = DataSetReader::new_with_ts_cs_options( + file, + ts, + SpecificCharacterSet::default(), + options, + ) + .context(CreateParserSnafu)?; let obj = InMemDicomObject::build_object( &mut dataset, dict, @@ -441,7 +458,14 @@ where S: Read + 's, R: TransferSyntaxIndex, { - Self::from_reader_with_all_options(src, dict, ts_index, None, ReadPreamble::Auto) + Self::from_reader_with_all_options( + src, + dict, + ts_index, + None, + ReadPreamble::Auto, + Default::default(), + ) } pub(crate) fn from_reader_with_all_options<'s, S, R>( @@ -450,6 +474,7 @@ where ts_index: R, read_until: Option, mut read_preamble: ReadPreamble, + odd_length: OddLengthStrategy, ) -> Result where S: Read + 's, @@ -473,7 +498,14 @@ where // read rest of data according to metadata, feed it to object if let Some(ts) = ts_index.get(&meta.transfer_syntax) { - let mut dataset = DataSetReader::new_with_ts(file, ts).context(CreateParserSnafu)?; + let mut options = DataSetReaderOptions::default(); + options.odd_length = odd_length; + let mut dataset = DataSetReader::new_with_ts_options( + file, + ts, + options, + ) + .context(CreateParserSnafu)?; let obj = InMemDicomObject::build_object( &mut dataset, dict, diff --git a/parser/src/dataset/read.rs b/parser/src/dataset/read.rs index 8c00692a4..1185f596f 100644 --- a/parser/src/dataset/read.rs +++ b/parser/src/dataset/read.rs @@ -65,14 +65,18 @@ pub enum Error { #[snafu(display("Unexpected item tag {} while reading element header", tag))] UnexpectedItemTag { tag: Tag, backtrace: Backtrace }, #[snafu(display( - "Unexpected item header outside a dataset sequence at {} bytes", + "Unexpected item header outside a dataset sequence at {:#x}", bytes_read ))] UnexpectedItemHeader { bytes_read: u64, backtrace: Backtrace, }, - /// Undefined pixel item length + /// Invalid data element length {len:04X} of {tag} at {bytes_read:#x} + InvalidElementLength { tag: Tag, len: u32, bytes_read: u64 }, + /// Invalid sequence item length {len:04X} at {bytes_read:#x} + InvalidItemLength { len: u32, bytes_read: u64 }, + /// Undefined pixel data item length UndefinedItemLength, } @@ -130,13 +134,32 @@ pub enum ValueReadStrategy { Raw, } +/// A strategy for when the parser finds a data element with an odd number +/// in the _length_ header field. +#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)] +#[non_exhaustive] +pub enum OddLengthStrategy { + /// Accept elements with an odd length as is, + /// continuing data set reading normally. + #[default] + Accept, + /// Assume that the real length is `length + 1`, + /// as in the next even number. + NextEven, + /// Raise an error instead + Fail, +} + /// The set of options for the data set reader. #[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)] #[non_exhaustive] pub struct DataSetReaderOptions { - /// the value reading strategy + /// The value reading strategy pub value_read: ValueReadStrategy, - /// the position of the reader as received at building time + /// The strategy for handling odd length data elements + pub odd_length: OddLengthStrategy, + /// The position of the reader as received at building time in bytes. + /// Defaults to 0. pub base_offset: u64, } @@ -189,6 +212,19 @@ impl DataSetReader> { Self::new_with_ts_cs_options(source, ts, Default::default(), Default::default()) } + /// Create a new iterator with the given transfer syntax and options. + #[inline] + pub fn new_with_ts_options( + source: R, + ts: &TransferSyntax, + options: DataSetReaderOptions, + ) -> Result + where + R: Read, + { + Self::new_with_ts_cs_options(source, ts, SpecificCharacterSet::default(), options) + } + /// Create a new data set token reader with the given byte source, /// while considering the given transfer syntax specifier /// and the specific character set to assume by default. @@ -283,6 +319,18 @@ where Ok(header) => { match header { SequenceItemHeader::Item { len } => { + let len = match self.sanitize_length(len) { + Some(len) => len, + None => { + return Some( + InvalidItemLengthSnafu { + bytes_read: self.parser.position(), + len: len.0, + } + .fail(), + ) + } + }; // entered a new item self.in_sequence = false; @@ -380,6 +428,19 @@ where match self.parser.decode_item_header() { Ok(header) => match header { SequenceItemHeader::Item { len } => { + let len = match self.sanitize_length(len) { + Some(len) => len, + None => { + return Some( + InvalidItemLengthSnafu { + bytes_read: self.parser.position(), + len: len.0, + } + .fail(), + ) + } + }; + // entered a new item self.in_sequence = false; self.push_sequence_token(SeqTokenType::Item, len, true); @@ -433,6 +494,20 @@ where vr: VR::SQ, len, }) => { + let len = match self.sanitize_length(len) { + Some(len) => len, + None => { + return Some( + InvalidElementLengthSnafu { + tag, + len: len.0, + bytes_read: self.parser.position(), + } + .fail(), + ) + } + }; + self.in_sequence = true; self.push_sequence_token(SeqTokenType::Sequence, len, false); @@ -485,7 +560,21 @@ where Some(Ok(DataToken::SequenceStart { tag, len })) } - Ok(header) => { + Ok(mut header) => { + match self.sanitize_length(header.len) { + Some(len) => header.len = len, + None => { + return Some( + InvalidElementLengthSnafu { + tag: header.tag, + len: header.len.0, + bytes_read: self.parser.position(), + } + .fail(), + ) + } + }; + // save it for the next step self.last_header = Some(header); Some(Ok(DataToken::ElementHeader(header))) @@ -592,11 +681,27 @@ where tag: header.tag, }) } + + /// Check for a non-compliant length + /// and handle it according to the current strategy. + /// Returns `None` if the length cannot or should not be resolved. + fn sanitize_length(&self, length: Length) -> Option { + if length.is_defined() && length.0 & 1 != 0 { + match self.options.odd_length { + OddLengthStrategy::Accept => Some(length), + OddLengthStrategy::NextEven => Some(length + 1), + OddLengthStrategy::Fail => None, + } + } else { + Some(length) + } + } } #[cfg(test)] mod tests { use super::{DataSetReader, DataToken, StatefulDecode}; + use crate::dataset::read::{DataSetReaderOptions, OddLengthStrategy}; use crate::stateful::decode::StatefulDecoder; use dicom_core::header::{DataElementHeader, Length}; use dicom_core::value::PrimitiveValue; @@ -607,7 +712,7 @@ mod tests { }; use dicom_encoding::text::SpecificCharacterSet; - fn validate_dataset_reader_implicit_vr(data: &[u8], ground_truth: I) + fn validate_read_data_implicit_vr(data: &[u8], ground_truth: I) where I: IntoIterator, { @@ -619,10 +724,10 @@ mod tests { SpecificCharacterSet::default(), ); - validate_dataset_reader(data, parser, ground_truth) + validate_read_data(data, parser, ground_truth) } - fn validate_dataset_reader_explicit_vr(data: &[u8], ground_truth: I) + fn validate_read_data_explicit_vr(data: &[u8], ground_truth: I) where I: IntoIterator, { @@ -634,16 +739,26 @@ mod tests { SpecificCharacterSet::default(), ); - validate_dataset_reader(&data, parser, ground_truth) + validate_read_data(&data, parser, ground_truth) } - fn validate_dataset_reader(data: &[u8], parser: D, ground_truth: I) + fn validate_read_data(data: &[u8], parser: D, ground_truth: I) where I: IntoIterator, D: StatefulDecode, { - let mut dset_reader = DataSetReader::new(parser, Default::default()); + let dset_reader = DataSetReader::new(parser, Default::default()); + validate_data_set_reader(data, dset_reader, ground_truth); + } + fn validate_data_set_reader( + data: &[u8], + mut dset_reader: DataSetReader, + ground_truth: I, + ) where + S: StatefulDecode, + I: IntoIterator, + { let iter = (&mut dset_reader).into_iter(); let mut ground_truth = ground_truth.into_iter(); @@ -735,7 +850,7 @@ mod tests { DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())), ]; - validate_dataset_reader_explicit_vr(DATA, ground_truth); + validate_read_data_explicit_vr(DATA, ground_truth); } #[test] @@ -811,7 +926,7 @@ mod tests { )), ]; - validate_dataset_reader_explicit_vr(DATA, ground_truth); + validate_read_data_explicit_vr(DATA, ground_truth); } #[test] @@ -832,7 +947,7 @@ mod tests { DataToken::SequenceEnd, ]; - validate_dataset_reader_explicit_vr(DATA, ground_truth); + validate_read_data_explicit_vr(DATA, ground_truth); } /// Gracefully ignore a stray item end tag in the data set. @@ -855,7 +970,7 @@ mod tests { // no item end ]; - validate_dataset_reader_explicit_vr(DATA, ground_truth); + validate_read_data_explicit_vr(DATA, ground_truth); } #[test] @@ -929,7 +1044,7 @@ mod tests { DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())), ]; - validate_dataset_reader_explicit_vr(DATA, ground_truth); + validate_read_data_explicit_vr(DATA, ground_truth); } #[test] @@ -962,7 +1077,7 @@ mod tests { DataToken::SequenceEnd, ]; - validate_dataset_reader_implicit_vr(DATA, ground_truth); + validate_read_data_implicit_vr(DATA, ground_truth); } #[test] @@ -1011,7 +1126,7 @@ mod tests { DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())), ]; - validate_dataset_reader_explicit_vr(DATA, ground_truth); + validate_read_data_explicit_vr(DATA, ground_truth); } #[test] @@ -1063,7 +1178,7 @@ mod tests { DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())), ]; - validate_dataset_reader_explicit_vr(DATA, ground_truth); + validate_read_data_explicit_vr(DATA, ground_truth); } #[test] @@ -1163,7 +1278,7 @@ mod tests { DataToken::SequenceEnd, ]; - validate_dataset_reader_implicit_vr(DATA, ground_truth); + validate_read_data_implicit_vr(DATA, ground_truth); } #[test] @@ -1281,4 +1396,73 @@ mod tests { dbg!(&token_res); assert!(token_res.is_err()); } + + #[test] + fn read_odd_length_element() { + #[rustfmt::skip] + static DATA: &[u8] = &[ + 0x08, 0x00, 0x16, 0x00, // (0008,0016) SOPClassUID + b'U', b'I', // VR + 0x0b, 0x00, // len = 11 + b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', + 0x00, // padding + ]; + + let ground_truth = vec![ + DataToken::ElementHeader(DataElementHeader { + tag: Tag(0x0008, 0x0016), + vr: VR::UI, + len: Length(12), + }), + DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.100\0")), + ]; + + // strategy: assume next even + + let mut cursor = DATA; + let parser = StatefulDecoder::new( + &mut cursor, + ExplicitVRLittleEndianDecoder::default(), + LittleEndianBasicDecoder::default(), + SpecificCharacterSet::default(), + ); + let dset_reader = DataSetReader::new( + parser, + DataSetReaderOptions { + odd_length: OddLengthStrategy::NextEven, + ..Default::default() + }, + ); + + validate_data_set_reader(DATA, dset_reader, ground_truth); + + // strategy: fail + + let mut cursor = DATA; + let parser = StatefulDecoder::new( + &mut cursor, + ExplicitVRLittleEndianDecoder::default(), + LittleEndianBasicDecoder::default(), + SpecificCharacterSet::default(), + ); + let dset_reader = DataSetReader::new( + parser, + DataSetReaderOptions { + odd_length: OddLengthStrategy::Fail, + ..Default::default() + }, + ); + + let mut tokens = dset_reader.into_iter(); + let token = tokens.next(); + + assert!(matches!( + token, + Some(Err(super::Error::InvalidElementLength { + tag: Tag(0x0008, 0x0016), + len: 11, + bytes_read: 8, + })), + ), "got: {:?}", token); + } } From e79199c6b415913a73c81d00c6162dc75c7ac494 Mon Sep 17 00:00:00 2001 From: Eduardo Pinho Date: Sat, 5 Oct 2024 20:00:13 +0100 Subject: [PATCH 2/2] [object/fuzz] Update fuzz target open_file - remove group length elements before writing - assume that writing back should not fail - tweak target to keep corpus when the fuzz does not fail --- object/fuzz/fuzz_targets/open_file.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/object/fuzz/fuzz_targets/open_file.rs b/object/fuzz/fuzz_targets/open_file.rs index 4a01d43a3..45ea92ee9 100644 --- a/object/fuzz/fuzz_targets/open_file.rs +++ b/object/fuzz/fuzz_targets/open_file.rs @@ -1,18 +1,29 @@ #![no_main] -use libfuzzer_sys::fuzz_target; +use libfuzzer_sys::{fuzz_target, Corpus}; use std::error::Error; -fuzz_target!(|data: &[u8]| { - let _ = fuzz(data); +fuzz_target!(|data: &[u8]| -> Corpus { + match fuzz(data) { + Ok(_) => Corpus::Keep, + Err(_) => Corpus::Reject, + } }); fn fuzz(data: &[u8]) -> Result<(), Box> { // deserialize random bytes - let obj = dicom_object::from_reader(data)?; + let mut obj = dicom_object::OpenFileOptions::new() + .read_preamble(dicom_object::file::ReadPreamble::Auto) + .odd_length_strategy(dicom_object::file::OddLengthStrategy::Fail) + .from_reader(data)?; + // remove group length elements + for g in 0..=0x07FF { + obj.remove_element(dicom_object::Tag(g, 0x0000)); + } // serialize object back to bytes let mut bytes = Vec::new(); - obj.write_all(&mut bytes)?; + obj.write_all(&mut bytes) + .expect("writing DICOM file should always be successful"); // deserialize back to object let obj2 = dicom_object::from_reader(bytes.as_slice())