1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//! Binary encoding for Rust values which preserves lexicographic sort order. Order-preserving
//! encoding is useful for creating keys for sorted key-value stores with byte string typed keys,
//! such as [leveldb](https://github.com/google/leveldb). `bytekey` attempts to encode values into
//! the fewest number of bytes possible while preserving ordering. Type information is *not*
//! serialized alongside values, and thus the type of serialized data must be known in order to
//! perform decoding (`bytekey` does not implement a self-describing format).
//!
//! #### Supported Data Types
//!
//! `bytekey` encoding currently supports all Rust primitives, strings, options, structs, enums, and
//! tuples. `isize` and `usize` types are variable-length encoded. Sequence (`Vec`) and map types are
//! not currently supported (but could be in the future). See `Encoder` for details on the
//! serialization format.
//!
//! #### Usage
//!
//! ```
//! extern crate rustc_serialize;
//! extern crate bytekey;
//! use bytekey::{encode, decode};
//!
//! #[derive(RustcEncodable, RustcDecodable, Show, PartialEq)]
//! struct MyKey { a: u32, b: String }
//!
//! # fn main() {
//! let a = MyKey { a: 1, b: "foo".to_string() };
//! let b = MyKey { a: 2, b: "foo".to_string() };
//! let c = MyKey { a: 2, b: "fooz".to_string() };
//!
//! assert!(encode(&a).unwrap() < encode(&b).unwrap());
//! assert!(encode(&b).unwrap() < encode(&c).unwrap());
//! assert_eq!(a, decode(encode(&a).unwrap()).unwrap());
//! # }
//! ```
//!
//! #### Type Evolution
//!
//! In general, the exact type of a serialized value must be known in order to correctly deserialize
//! it. For structs and enums, the type is effectively frozen once any values of the type have been
//! serialized: changes to the struct or enum will cause deserialization of already encoded values
//! to fail or return incorrect values. The only exception is adding adding new variants to the end
//! of an existing enum. Enum variants may *not* change type, be removed, or be reordered. All
//! changes to structs, including adding, removing, reordering, or changing the type of a field are
//! forbidden.
//!
//! These restrictions lead to a few best-practices when using `bytekey` encoding:
//!
//! * Don't use `bytekey` unless you need lexicographic ordering of encoded values! A more
//! general encoding library such as [Cap'n Proto](https://github.com/dwrensha/capnproto-rust) or
//! [binary-encode](https://github.com/TyOverby/binary-encode) will serve you better if this
//! feature is not necessary.
//! * If you persist encoded values for longer than the life of a process (i.e. you write the
//! encoded values to a file or a database), consider using an enum as a top-level wrapper type.
//! This will allow you to seamlessly add a new variant when you need to change the key format in a
//! backwards-compatible manner (the different key types will sort seperately). If your enum has
//! less than 16 variants, then the overhead is just a single byte in encoded output.

#![feature(core, custom_attribute, io, plugin, unicode)]
#![cfg_attr(test, feature(std_misc))]
#![cfg_attr(test, plugin(quickcheck_macros))]

extern crate byteorder;
extern crate rustc_serialize;

#[cfg(test)] extern crate quickcheck;
#[cfg(test)] extern crate rand;

pub use encoder::Encoder;
pub use decoder::Decoder;

mod encoder;
mod decoder;

use rustc_serialize::{Encodable, Decodable};
use std::{error, fmt, io, result};
use std::error::Error as StdError;

/// Encode data into a byte vector.
///
/// #### Usage
///
/// ```
/// # use bytekey::encode;
/// assert_eq!(vec!(0x00, 0x00, 0x00, 0x2A), encode(&42u32).unwrap());
/// assert_eq!(vec!(0x66, 0x69, 0x7A, 0x7A, 0x62, 0x75, 0x7A, 0x7A, 0x00), encode(&"fizzbuzz").unwrap());
/// assert_eq!(vec!(0x2A, 0x66, 0x69, 0x7A, 0x7A, 0x00), encode(&(42u8, "fizz")).unwrap());
/// ```
pub fn encode<T>(value: &T) -> Result<Vec<u8>>
where T: Encodable {
    let mut writer = Vec::new();
    {
        let mut encoder = Encoder::new(&mut writer);
        try!(value.encode(&mut encoder));
    }
    Ok(writer)
}

/// Decode data from a byte vector.
///
/// #### Usage
///
/// ```
/// # use bytekey::{encode, decode};
/// assert_eq!(42usize, decode::<usize>(encode(&42usize).unwrap()).unwrap());
/// ```
pub fn decode<T>(bytes: Vec<u8>) -> Result<T>
where T: Decodable {
    Decodable::decode(&mut Decoder::new(io::Cursor::new(bytes)))
}

/// A short-hand for `result::Result<T, bytekey::decoder::Error>`.
pub type Result<T> = result::Result<T, Error>;

/// An error type for bytekey decoding and encoding.
///
/// This is a thin wrapper over the standard `io::Error` type. Namely, it
/// adds two additional error cases: an unexpected EOF, and invalid utf8.
#[derive(Debug)]
pub enum Error {

    /// Variant representing that the underlying stream was read successfully but it did not contain
    /// valid utf8 data.
    NotUtf8,

    /// Variant representing that the underlying stream returns less bytes, than are required to
    /// decode a meaningful value.
    UnexpectedEof,

    /// Variant representing that an I/O error occurred.
    Io(io::Error),
}

impl From<io::Error> for Error {
    fn from(error: io::Error) -> Error { Error::Io(error) }
}

impl From<io::CharsError> for Error {
    fn from(error: io::CharsError) -> Error {
        match error {
            io::CharsError::NotUtf8 => Error::NotUtf8,
            io::CharsError::Other(error) => Error::Io(error),
        }
    }
}

impl From<byteorder::Error> for Error {
    fn from(error: byteorder::Error) -> Error {
        match error {
            byteorder::Error::UnexpectedEOF => Error::UnexpectedEof,
            byteorder::Error::Io(error) => Error::Io(error),
        }
    }
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Error::NotUtf8 => write!(f, "byte stream did not contain valid utf8"),
            Error::UnexpectedEof => write!(f, "unexpected end of file"),
            Error::Io(ref err) => err.fmt(f),
        }
    }
}

impl error::Error for Error {
    fn description(&self) -> &str {
        match *self {
            Error::NotUtf8 => "invalid utf8 encoding",
            Error::UnexpectedEof => "unexpected end of file",
            Error::Io(ref err) => err.description(),
        }
    }

    fn cause(&self) -> Option<&error::Error> {
        match *self {
            Error::NotUtf8 => None,
            Error::UnexpectedEof => None,
            Error::Io(ref err) => err.cause(),
        }
    }
}