1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
//! Binary encoding for Rust values which preserves lexicographic sort order. Order-preserving //! encoding is useful for creating keys for sorted key-value stores with byte string typed keys, //! such as [leveldb](https://github.com/google/leveldb). `bytekey` attempts to encode values into //! the fewest number of bytes possible while preserving ordering. Type information is *not* //! serialized alongside values, and thus the type of serialized data must be known in order to //! perform decoding (`bytekey` does not implement a self-describing format). //! //! #### Supported Data Types //! //! `bytekey` encoding currently supports all Rust primitives, strings, options, structs, enums, and //! tuples. `isize` and `usize` types are variable-length encoded. Sequence (`Vec`) and map types are //! not currently supported (but could be in the future). See `Encoder` for details on the //! serialization format. //! //! #### Usage //! //! ``` //! extern crate rustc_serialize; //! extern crate bytekey; //! use bytekey::{encode, decode}; //! //! #[derive(RustcEncodable, RustcDecodable, Show, PartialEq)] //! struct MyKey { a: u32, b: String } //! //! # fn main() { //! let a = MyKey { a: 1, b: "foo".to_string() }; //! let b = MyKey { a: 2, b: "foo".to_string() }; //! let c = MyKey { a: 2, b: "fooz".to_string() }; //! //! assert!(encode(&a).unwrap() < encode(&b).unwrap()); //! assert!(encode(&b).unwrap() < encode(&c).unwrap()); //! assert_eq!(a, decode(encode(&a).unwrap()).unwrap()); //! # } //! ``` //! //! #### Type Evolution //! //! In general, the exact type of a serialized value must be known in order to correctly deserialize //! it. For structs and enums, the type is effectively frozen once any values of the type have been //! serialized: changes to the struct or enum will cause deserialization of already encoded values //! to fail or return incorrect values. The only exception is adding adding new variants to the end //! of an existing enum. Enum variants may *not* change type, be removed, or be reordered. All //! changes to structs, including adding, removing, reordering, or changing the type of a field are //! forbidden. //! //! These restrictions lead to a few best-practices when using `bytekey` encoding: //! //! * Don't use `bytekey` unless you need lexicographic ordering of encoded values! A more //! general encoding library such as [Cap'n Proto](https://github.com/dwrensha/capnproto-rust) or //! [binary-encode](https://github.com/TyOverby/binary-encode) will serve you better if this //! feature is not necessary. //! * If you persist encoded values for longer than the life of a process (i.e. you write the //! encoded values to a file or a database), consider using an enum as a top-level wrapper type. //! This will allow you to seamlessly add a new variant when you need to change the key format in a //! backwards-compatible manner (the different key types will sort seperately). If your enum has //! less than 16 variants, then the overhead is just a single byte in encoded output. #![feature(core, custom_attribute, io, plugin, unicode)] #![cfg_attr(test, feature(std_misc))] #![cfg_attr(test, plugin(quickcheck_macros))] extern crate byteorder; extern crate rustc_serialize; #[cfg(test)] extern crate quickcheck; #[cfg(test)] extern crate rand; pub use encoder::Encoder; pub use decoder::Decoder; mod encoder; mod decoder; use rustc_serialize::{Encodable, Decodable}; use std::{error, fmt, io, result}; use std::error::Error as StdError; /// Encode data into a byte vector. /// /// #### Usage /// /// ``` /// # use bytekey::encode; /// assert_eq!(vec!(0x00, 0x00, 0x00, 0x2A), encode(&42u32).unwrap()); /// assert_eq!(vec!(0x66, 0x69, 0x7A, 0x7A, 0x62, 0x75, 0x7A, 0x7A, 0x00), encode(&"fizzbuzz").unwrap()); /// assert_eq!(vec!(0x2A, 0x66, 0x69, 0x7A, 0x7A, 0x00), encode(&(42u8, "fizz")).unwrap()); /// ``` pub fn encode<T>(value: &T) -> Result<Vec<u8>> where T: Encodable { let mut writer = Vec::new(); { let mut encoder = Encoder::new(&mut writer); try!(value.encode(&mut encoder)); } Ok(writer) } /// Decode data from a byte vector. /// /// #### Usage /// /// ``` /// # use bytekey::{encode, decode}; /// assert_eq!(42usize, decode::<usize>(encode(&42usize).unwrap()).unwrap()); /// ``` pub fn decode<T>(bytes: Vec<u8>) -> Result<T> where T: Decodable { Decodable::decode(&mut Decoder::new(io::Cursor::new(bytes))) } /// A short-hand for `result::Result<T, bytekey::decoder::Error>`. pub type Result<T> = result::Result<T, Error>; /// An error type for bytekey decoding and encoding. /// /// This is a thin wrapper over the standard `io::Error` type. Namely, it /// adds two additional error cases: an unexpected EOF, and invalid utf8. #[derive(Debug)] pub enum Error { /// Variant representing that the underlying stream was read successfully but it did not contain /// valid utf8 data. NotUtf8, /// Variant representing that the underlying stream returns less bytes, than are required to /// decode a meaningful value. UnexpectedEof, /// Variant representing that an I/O error occurred. Io(io::Error), } impl From<io::Error> for Error { fn from(error: io::Error) -> Error { Error::Io(error) } } impl From<io::CharsError> for Error { fn from(error: io::CharsError) -> Error { match error { io::CharsError::NotUtf8 => Error::NotUtf8, io::CharsError::Other(error) => Error::Io(error), } } } impl From<byteorder::Error> for Error { fn from(error: byteorder::Error) -> Error { match error { byteorder::Error::UnexpectedEOF => Error::UnexpectedEof, byteorder::Error::Io(error) => Error::Io(error), } } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { Error::NotUtf8 => write!(f, "byte stream did not contain valid utf8"), Error::UnexpectedEof => write!(f, "unexpected end of file"), Error::Io(ref err) => err.fmt(f), } } } impl error::Error for Error { fn description(&self) -> &str { match *self { Error::NotUtf8 => "invalid utf8 encoding", Error::UnexpectedEof => "unexpected end of file", Error::Io(ref err) => err.description(), } } fn cause(&self) -> Option<&error::Error> { match *self { Error::NotUtf8 => None, Error::UnexpectedEof => None, Error::Io(ref err) => err.cause(), } } }