Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mark buffers created from stdin as modified #7431

Merged
merged 1 commit into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 79 additions & 26 deletions helix-view/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,33 +397,11 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
let mut buf_out = [0u8; BUF_SIZE];
let mut builder = RopeBuilder::new();

// By default, the encoding of the text is auto-detected by
// `encoding_rs` for_bom, and if it fails, from `chardetng`
// crate which requires sample data from the reader.
// As a manual override to this auto-detection is possible, the
// same data is read into `buf` to ensure symmetry in the upcoming
// loop.
let (encoding, has_bom, mut decoder, mut slice, mut is_empty) = {
let read = reader.read(&mut buf)?;
let is_empty = read == 0;
let (encoding, has_bom) = encoding
.map(|encoding| (encoding, false))
.or_else(|| {
encoding::Encoding::for_bom(&buf).map(|(encoding, _bom_size)| (encoding, true))
})
.unwrap_or_else(|| {
let mut encoding_detector = chardetng::EncodingDetector::new();
encoding_detector.feed(&buf, is_empty);
(encoding_detector.guess(None, true), false)
});

let decoder = encoding.new_decoder();
let (encoding, has_bom, mut decoder, read) =
read_and_detect_encoding(reader, encoding, &mut buf)?;

// If the amount of bytes read from the reader is less than
// `buf.len()`, it is undesirable to read the bytes afterwards.
let slice = &buf[..read];
(encoding, has_bom, decoder, slice, is_empty)
};
let mut slice = &buf[..read];
let mut is_empty = read == 0;

// `RopeBuilder::append()` expects a `&str`, so this is the "real"
// output buffer. When decoding, the number of bytes in the output
Expand Down Expand Up @@ -493,6 +471,81 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
Ok((rope, encoding, has_bom))
}

pub fn read_to_string<R: std::io::Read + ?Sized>(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would also be useful for #3966

reader: &mut R,
encoding: Option<&'static Encoding>,
) -> Result<(String, &'static Encoding, bool), Error> {
let mut buf = [0u8; BUF_SIZE];

let (encoding, has_bom, mut decoder, read) =
read_and_detect_encoding(reader, encoding, &mut buf)?;

let mut slice = &buf[..read];
let mut is_empty = read == 0;
let mut buf_string = String::with_capacity(buf.len());

loop {
let mut total_read = 0usize;

loop {
let (result, read, ..) =
decoder.decode_to_string(&slice[total_read..], &mut buf_string, is_empty);

total_read += read;

match result {
encoding::CoderResult::InputEmpty => {
debug_assert_eq!(slice.len(), total_read);
break;
}
encoding::CoderResult::OutputFull => {
debug_assert!(slice.len() > total_read);
buf_string.reserve(buf.len())
}
}
}

if is_empty {
debug_assert_eq!(reader.read(&mut buf)?, 0);
break;
}

let read = reader.read(&mut buf)?;
slice = &buf[..read];
is_empty = read == 0;
}
Ok((buf_string, encoding, has_bom))
}

/// Reads the first chunk from a Reader into the given buffer
/// and detects the encoding.
///
/// By default, the encoding of the text is auto-detected by
/// `encoding_rs` for_bom, and if it fails, from `chardetng`
/// crate which requires sample data from the reader.
/// As a manual override to this auto-detection is possible, the
/// same data is read into `buf` to ensure symmetry in the upcoming
/// loop.
fn read_and_detect_encoding<R: std::io::Read + ?Sized>(
reader: &mut R,
encoding: Option<&'static Encoding>,
buf: &mut [u8],
) -> Result<(&'static Encoding, bool, encoding::Decoder, usize), Error> {
let read = reader.read(buf)?;
let is_empty = read == 0;
let (encoding, has_bom) = encoding
.map(|encoding| (encoding, false))
.or_else(|| encoding::Encoding::for_bom(buf).map(|(encoding, _bom_size)| (encoding, true)))
.unwrap_or_else(|| {
let mut encoding_detector = chardetng::EncodingDetector::new();
encoding_detector.feed(buf, is_empty);
(encoding_detector.guess(None, true), false)
});
let decoder = encoding.new_decoder();

Ok((encoding, has_bom, decoder, read))
}

// The documentation and implementation of this function should be up-to-date with
// its sibling function, `from_reader()`.
//
Expand Down
21 changes: 16 additions & 5 deletions helix-view/src/editor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1386,11 +1386,22 @@ impl Editor {
}

pub fn new_file_from_stdin(&mut self, action: Action) -> Result<DocumentId, Error> {
let (rope, encoding, has_bom) = crate::document::from_reader(&mut stdin(), None)?;
Ok(self.new_file_from_document(
action,
Document::from(rope, Some((encoding, has_bom)), self.config.clone()),
))
let (stdin, encoding, has_bom) = crate::document::read_to_string(&mut stdin(), None)?;
let doc = Document::from(
helix_core::Rope::default(),
Some((encoding, has_bom)),
self.config.clone(),
);
let doc_id = self.new_file_from_document(action, doc);
let doc = doc_mut!(self, &doc_id);
let view = view_mut!(self);
doc.ensure_view_init(view.id);
let transaction =
helix_core::Transaction::insert(doc.text(), doc.selection(view.id), stdin.into())
.with_selection(Selection::point(0));
doc.apply(&transaction, view.id);
doc.append_changes_to_history(view);
Ok(doc_id)
}

// ??? possible use for integration tests
Expand Down