Skip to content

Commit

Permalink
api: polish things up
Browse files Browse the repository at this point in the history
This polishes up the public API. In particular, the big change here is
to demphasize the raw Decoder and Encoder types, since they are very
rarely what one wants. They are now put in their own 'raw' sub-module.

The top-level API now only consists of the Error type and the Result
type alias.

We also make sure every public API type has a std::fmt::Debug impl.
  • Loading branch information
BurntSushi committed Feb 14, 2020
1 parent 0dd721e commit 21d2ec4
Show file tree
Hide file tree
Showing 10 changed files with 184 additions and 91 deletions.
6 changes: 3 additions & 3 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ macro_rules! compress {
src.to_owned()
};
};
let mut dst = vec![0; snap::max_compress_len(SRC.len())];
let mut dst = vec![0; snap::raw::max_compress_len(SRC.len())];
b.bytes = SRC.len() as u64;
b.iter(|| $comp(SRC.as_slice(), &mut dst).unwrap());
}
Expand All @@ -43,7 +43,7 @@ macro_rules! decompress {
src.to_owned()
};
static ref COMPRESSED: Vec<u8> =
{ snap::Encoder::new().compress_vec(&*SRC).unwrap() };
{ snap::raw::Encoder::new().compress_vec(&*SRC).unwrap() };
};

let mut dst = vec![0; SRC.len()];
Expand All @@ -55,7 +55,7 @@ macro_rules! decompress {

mod rust {
use lazy_static::lazy_static;
use snap::{Decoder, Encoder, Result};
use snap::{raw::Decoder, raw::Encoder, Result};

#[inline(always)]
fn compress(input: &[u8], output: &mut [u8]) -> Result<usize> {
Expand Down
14 changes: 12 additions & 2 deletions src/compress.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::fmt;
use std::ops::{Deref, DerefMut};
use std::ptr;

Expand Down Expand Up @@ -58,15 +59,24 @@ pub fn max_compress_len(input_len: usize) -> usize {
/// Thie encoder does not use the Snappy frame format and simply compresses the
/// given bytes in one big Snappy block (that is, it has a single header).
///
/// Unless you explicitly need the low-level control, you should use `Writer`
/// Unless you explicitly need the low-level control, you should use
/// [`read::FrameEncoder`](../read/struct.FrameEncoder.html)
/// or
/// [`write::FrameEncoder`](../write/struct.FrameEncoder.html)
/// instead, which compresses to the Snappy frame format.
///
/// It is beneficial to reuse an Encoder.
/// It is beneficial to reuse an Encoder when possible.
pub struct Encoder {
small: [u16; SMALL_TABLE_SIZE],
big: Vec<u16>,
}

impl fmt::Debug for Encoder {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Encoder(...)")
}
}

impl Encoder {
/// Return a new encoder that can be used for compressing bytes.
pub fn new() -> Encoder {
Expand Down
4 changes: 2 additions & 2 deletions src/decompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ pub fn decompress_len(input: &[u8]) -> Result<usize> {
/// the given bytes as if it were returned from `Encoder`.
///
/// Unless you explicitly need the low-level control, you should use
/// `snap::read::FrameDecoder` instead, which decompresses the Snappy frame
/// format.
/// [`read::FrameDecoder`](../read/struct.FrameDecoder.html)
/// instead, which decompresses the Snappy frame format.
#[derive(Clone, Debug, Default)]
pub struct Decoder {
// Place holder for potential future fields.
Expand Down
12 changes: 4 additions & 8 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@ pub struct IntoInnerError<W> {
err: io::Error,
}

/// Creates a new `IntoInnerError`.
///
/// (This is a visibility hack. It's public in this module, but not in the
/// crate.)
pub fn new_into_inner_error<W>(wtr: W, err: io::Error) -> IntoInnerError<W> {
IntoInnerError { wtr: wtr, err: err }
}

impl<W> IntoInnerError<W> {
pub(crate) fn new(wtr: W, err: io::Error) -> IntoInnerError<W> {
IntoInnerError { wtr, err }
}

/// Returns the error which caused the call to `into_inner` to fail.
///
/// This error was returned when attempting to flush the internal buffer.
Expand Down
32 changes: 18 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,21 @@ snap = "0.2"
# Overview
This crate provides two ways to use Snappy. The first way is through the
`snap::read::FrameDecoder` and `snap::write::FrameEncoder` types, which
implement the `std::io::Read` and `std::io::Write` traits with the Snappy frame
format. Unless you have a specific reason to the contrary, you should only
use the Snappy frame format. Specifically, the Snappy frame format permits
streaming compression or decompression.
The second way is through the `Decoder` and `Encoder` types. These types
provide lower level control to the raw Snappy format, and don't support a
streaming interface directly. You should only use these types if you know you
specifically need the Snappy raw format.
[`read::FrameDecoder`](read/struct.FrameDecoder.html)
and
[`write::FrameEncoder`](write/struct.FrameEncoder.html)
types, which implement the `std::io::Read` and `std::io::Write` traits with the
Snappy frame format. Unless you have a specific reason to the contrary, you
should only use the Snappy frame format. Specifically, the Snappy frame format
permits streaming compression or decompression.
The second way is through the
[`raw::Decoder`](raw/struct.Decoder.html)
and
[`raw::Encoder`](raw/struct.Encoder.html)
types. These types provide lower level control to the raw Snappy format, and
don't support a streaming interface directly. You should only use these types
if you know you specifically need the Snappy raw format.
Finally, the `Error` type in this crate provides an exhaustive list of error
conditions that are probably useless in most circumstances. Therefore,
Expand Down Expand Up @@ -79,12 +84,10 @@ fn main() {

#![deny(missing_docs)]

pub use crate::compress::{max_compress_len, Encoder};
pub use crate::decompress::{decompress_len, Decoder};
pub use crate::error::{Error, IntoInnerError, Result};
pub use crate::error::{Error, Result};

/// We don't permit compressing a block bigger than what can fit in a u32.
const MAX_INPUT_SIZE: u64 = ::std::u32::MAX as u64;
const MAX_INPUT_SIZE: u64 = std::u32::MAX as u64;

/// The maximum number of bytes that we process at once. A block is the unit
/// at which we scan for candidates for compression.
Expand All @@ -95,6 +98,7 @@ mod crc32;
mod decompress;
mod error;
mod frame;
pub mod raw;
pub mod read;
mod tag;
#[cfg(test)]
Expand Down
14 changes: 14 additions & 0 deletions src/raw.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*!
This module provides a raw Snappy encoder and decoder.
A raw Snappy encoder/decoder can only compress/decompress a fixed amount of
data at a time. For this reason, this module is lower level and more difficult
to use than the higher level streaming readers and writers exposed as part of
the [`read`](../read/index.html) and [`write`](../write/index.html) modules.
Generally, one only needs to use the raw format if some other source is
generating raw Snappy compressed data and you have no choice but to do the
same. Otherwise, the Snappy frame format should probably always be preferred.
*/
pub use crate::compress::{max_compress_len, Encoder};
pub use crate::decompress::{decompress_len, Decoder};
139 changes: 94 additions & 45 deletions src/read.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
/*!
This module provides two `std::io::Read` implementations:
- `read::FrameDecoder` wraps another `std::io::Read` implemenation, and
decompresses data encoded using the Snappy frame format. Use this
if you have a compressed data source and wish to read it as uncompressed data.
- `read::FrameEncoder` wraps another `std::io::Read` implemenation, and
compresses data encoded using the Snappy frame format. Use this if you have
uncompressed data source and wish to read it as compressed data.
* [`read::FrameDecoder`](struct.FrameDecoder.html)
wraps another `std::io::Read` implemenation, and decompresses data encoded
using the Snappy frame format. Use this if you have a compressed data source
and wish to read it as uncompressed data.
* [`read::FrameEncoder`](struct.FrameEncoder.html)
wraps another `std::io::Read` implemenation, and compresses data encoded
using the Snappy frame format. Use this if you have uncompressed data source
and wish to read it as compressed data.
Typically, `read::FrameDecoder` is the version that you'll want.
*/

use std::cmp;
use std::io::{self, Read};
use std::fmt;
use std::io;

use byteorder::{ByteOrder, LittleEndian as LE, ReadBytesExt};
use lazy_static::lazy_static;
Expand All @@ -26,17 +29,26 @@ use crate::frame::{
};
use crate::MAX_BLOCK_SIZE;

lazy_static! {
/// The maximum block that `FrameEncoder` can output in a single read
/// operation.
static ref MAX_READ_FRAME_ENCODER_BLOCK_SIZE: usize = STREAM_IDENTIFIER
.len()
+ CHUNK_HEADER_AND_CRC_SIZE
+ *MAX_COMPRESS_BLOCK_SIZE;
}

/// A reader for decompressing a Snappy stream.
///
/// This `FrameDecoder` wraps any other reader that implements `io::Read`. Bytes
/// read from this reader are decompressed using the
/// This `FrameDecoder` wraps any other reader that implements `std::io::Read`.
/// Bytes read from this reader are decompressed using the
/// [Snappy frame format](https://github.com/google/snappy/blob/master/framing_format.txt)
/// (file extension `sz`, MIME type `application/x-snappy-framed`).
///
/// This reader can potentially make many small reads from the underlying
/// stream depending on its format, therefore, passing in a buffered reader
/// may be beneficial.
pub struct FrameDecoder<R: Read> {
pub struct FrameDecoder<R: io::Read> {
/// The underlying reader.
r: R,
/// A Snappy decoder that we reuse that does the actual block based
Expand All @@ -55,7 +67,7 @@ pub struct FrameDecoder<R: Read> {
read_stream_ident: bool,
}

impl<R: Read> FrameDecoder<R> {
impl<R: io::Read> FrameDecoder<R> {
/// Create a new reader for streaming Snappy decompression.
pub fn new(rdr: R) -> FrameDecoder<R> {
FrameDecoder {
Expand All @@ -75,7 +87,7 @@ impl<R: Read> FrameDecoder<R> {
}
}

impl<R: Read> Read for FrameDecoder<R> {
impl<R: io::Read> io::Read for FrameDecoder<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
macro_rules! fail {
($err:expr) => {
Expand Down Expand Up @@ -198,62 +210,61 @@ impl<R: Read> Read for FrameDecoder<R> {
}
}

// read_exact_eof is like Read::read_exact, except it converts an UnexpectedEof
// error to a bool of false.
//
// If no error occurred, then this returns true.
fn read_exact_eof<R: Read>(rdr: &mut R, buf: &mut [u8]) -> io::Result<bool> {
use std::io::ErrorKind::UnexpectedEof;
match rdr.read_exact(buf) {
Ok(()) => Ok(true),
Err(ref err) if err.kind() == UnexpectedEof => Ok(false),
Err(err) => Err(err),
impl<R: fmt::Debug + io::Read> fmt::Debug for FrameDecoder<R> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("FrameDecoder")
.field("r", &self.r)
.field("dec", &self.dec)
.field("src", &"[...]")
.field("dst", &"[...]")
.field("dsts", &self.dsts)
.field("dste", &self.dste)
.field("read_stream_ident", &self.read_stream_ident)
.finish()
}
}

lazy_static! {
/// The maximum block that `FrameEncoder` can output in a single read
/// operation.
static ref MAX_READ_FRAME_ENCODER_BLOCK_SIZE: usize = STREAM_IDENTIFIER
.len()
+ CHUNK_HEADER_AND_CRC_SIZE
+ *MAX_COMPRESS_BLOCK_SIZE;
}

/// A reader for compressing data using snappy as it is read. Usually you'll
/// want `snap::read::FrameDecoder` (for decompressing while reading) or
/// `snap::write::FrameEncoder` (for compressing while writing) instead.
pub struct FrameEncoder<R: Read> {
/// A reader for compressing data using snappy as it is read.
///
/// This `FrameEncoder` wraps any other reader that implements `std::io::Read`.
/// Bytes read from this reader are compressed using the
/// [Snappy frame format](https://github.com/google/snappy/blob/master/framing_format.txt)
/// (file extension `sz`, MIME type `application/x-snappy-framed`).
///
/// Usually you'll want
/// [`read::FrameDecoder`](struct.FrameDecoder.html)
/// (for decompressing while reading) or
/// [`write::FrameEncoder`](../write/struct.FrameEncoder.html)
/// (for compressing while writing) instead.
///
/// Unlike `FrameDecoder`, this will attempt to make large reads roughly
/// equivalent to the size of a single Snappy block. Therefore, callers may not
/// benefit from using a buffered reader.
pub struct FrameEncoder<R: io::Read> {
/// Internally, we split `FrameEncoder` in two to keep the borrow checker
/// happy. The `inner` member contains everything that `read_frame` needs
/// to fetch a frame's worth of data and compress it.
inner: Inner<R>,

/// Data that we've encoded and are ready to return to our caller.
dst: Vec<u8>,

/// Starting point of bytes in `dst` not yet given back to the caller.
dsts: usize,

/// Ending point of bytes in `dst` that we want to give to our caller.
dste: usize,
}

struct Inner<R: Read> {
struct Inner<R: io::Read> {
/// The underlying data source.
r: R,

/// An encoder that we reuse that does the actual block based compression.
enc: Encoder,

/// Data taken from the underlying `r`, and not yet compressed.
src: Vec<u8>,

/// Have we written the standard snappy header to `dst` yet?
wrote_stream_ident: bool,
}

impl<R: Read> FrameEncoder<R> {
impl<R: io::Read> FrameEncoder<R> {
/// Create a new reader for streaming Snappy compression.
pub fn new(rdr: R) -> FrameEncoder<R> {
FrameEncoder {
Expand Down Expand Up @@ -285,7 +296,7 @@ impl<R: Read> FrameEncoder<R> {
}
}

impl<R: Read> Read for FrameEncoder<R> {
impl<R: io::Read> io::Read for FrameEncoder<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
// Try reading previously compressed bytes from our `dst` buffer, if
// any.
Expand All @@ -309,7 +320,7 @@ impl<R: Read> Read for FrameEncoder<R> {
}
}

impl<R: Read> Inner<R> {
impl<R: io::Read> Inner<R> {
/// Read from `self.r`, and create a new frame, writing it to `dst`, which
/// must be at least `*MAX_READ_FRAME_ENCODER_BLOCK_SIZE` bytes in size.
fn read_frame(&mut self, dst: &mut [u8]) -> io::Result<usize> {
Expand Down Expand Up @@ -354,3 +365,41 @@ impl<R: Read> Inner<R> {
Ok(dst_write_start + frame_data.len())
}
}

impl<R: fmt::Debug + io::Read> fmt::Debug for FrameEncoder<R> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("FrameEncoder")
.field("inner", &self.inner)
.field("dst", &"[...]")
.field("dsts", &self.dsts)
.field("dste", &self.dste)
.finish()
}
}

impl<R: fmt::Debug + io::Read> fmt::Debug for Inner<R> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Inner")
.field("r", &self.r)
.field("enc", &self.enc)
.field("src", &"[...]")
.field("wrote_stream_ident", &self.wrote_stream_ident)
.finish()
}
}

// read_exact_eof is like Read::read_exact, except it converts an UnexpectedEof
// error to a bool of false.
//
// If no error occurred, then this returns true.
fn read_exact_eof<R: io::Read>(
rdr: &mut R,
buf: &mut [u8],
) -> io::Result<bool> {
use std::io::ErrorKind::UnexpectedEof;
match rdr.read_exact(buf) {
Ok(()) => Ok(true),
Err(ref err) if err.kind() == UnexpectedEof => Ok(false),
Err(err) => Err(err),
}
}
Loading

0 comments on commit 21d2ec4

Please sign in to comment.