rust/src/libserialize/hex.rs

229 lines
6.5 KiB
Rust
Raw Normal View History

2014-01-30 12:29:35 -06:00
// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// ignore-lexer-test FIXME #15679
//! Hex binary-to-text encoding
pub use self::FromHexError::*;
use std::fmt;
use std::error;
/// A trait for converting a value to hexadecimal encoding
2015-01-04 20:39:02 -06:00
pub trait ToHex {
/// Converts the value of `self` to a hex value, returning the owned
/// string.
fn to_hex(&self) -> String;
}
const CHARS: &'static [u8] = b"0123456789abcdef";
impl ToHex for [u8] {
/// Turn a vector of `u8` bytes into a hexadecimal string.
///
/// # Examples
///
/// ```
2015-03-13 17:28:35 -05:00
/// # #![feature(rustc_private)]
/// extern crate serialize;
/// use serialize::hex::ToHex;
///
/// fn main () {
/// let str = [52,32].to_hex();
/// println!("{}", str);
/// }
/// ```
fn to_hex(&self) -> String {
let mut v = Vec::with_capacity(self.len() * 2);
2015-01-31 11:20:46 -06:00
for &byte in self {
v.push(CHARS[(byte >> 4) as usize]);
v.push(CHARS[(byte & 0xf) as usize]);
}
unsafe {
String::from_utf8_unchecked(v)
}
}
}
/// A trait for converting hexadecimal encoded values
2015-01-04 20:39:02 -06:00
pub trait FromHex {
2013-08-04 15:09:04 -05:00
/// Converts the value of `self`, interpreted as hexadecimal encoded data,
/// into an owned vector of bytes, returning the vector.
fn from_hex(&self) -> Result<Vec<u8>, FromHexError>;
}
/// Errors that can occur when decoding a hex encoded string
2015-01-28 07:34:18 -06:00
#[derive(Copy, Debug)]
pub enum FromHexError {
/// The input contained a character not part of the hex format
InvalidHexCharacter(char, usize),
2014-01-30 12:29:35 -06:00
/// The input had an invalid length
InvalidHexLength,
}
std: Rename Show/String to Debug/Display This commit is an implementation of [RFC 565][rfc] which is a stabilization of the `std::fmt` module and the implementations of various formatting traits. Specifically, the following changes were performed: [rfc]: https://github.com/rust-lang/rfcs/blob/master/text/0565-show-string-guidelines.md * The `Show` trait is now deprecated, it was renamed to `Debug` * The `String` trait is now deprecated, it was renamed to `Display` * Many `Debug` and `Display` implementations were audited in accordance with the RFC and audited implementations now have the `#[stable]` attribute * Integers and floats no longer print a suffix * Smart pointers no longer print details that they are a smart pointer * Paths with `Debug` are now quoted and escape characters * The `unwrap` methods on `Result` now require `Display` instead of `Debug` * The `Error` trait no longer has a `detail` method and now requires that `Display` must be implemented. With the loss of `String`, this has moved into libcore. * `impl<E: Error> FromError<E> for Box<Error>` now exists * `derive(Show)` has been renamed to `derive(Debug)`. This is not currently warned about due to warnings being emitted on stage1+ While backwards compatibility is attempted to be maintained with a blanket implementation of `Display` for the old `String` trait (and the same for `Show`/`Debug`) this is still a breaking change due to primitives no longer implementing `String` as well as modifications such as `unwrap` and the `Error` trait. Most code is fairly straightforward to update with a rename or tweaks of method calls. [breaking-change] Closes #21436
2015-01-20 17:45:07 -06:00
impl fmt::Display for FromHexError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
InvalidHexCharacter(ch, idx) =>
write!(f, "Invalid character '{}' at position {}", ch, idx),
InvalidHexLength => write!(f, "Invalid input length"),
}
}
}
impl error::Error for FromHexError {
fn description(&self) -> &str {
match *self {
InvalidHexCharacter(_, _) => "invalid character",
InvalidHexLength => "invalid length",
}
}
}
impl FromHex for str {
/// Convert any hexadecimal encoded string (literal, `@`, `&`, or `~`)
/// to the byte values it encodes.
///
/// You can use the `String::from_utf8` function to turn a
/// `Vec<u8>` into a string with characters corresponding to those values.
///
/// # Examples
///
/// This converts a string literal to hexadecimal and back.
///
/// ```
2015-03-13 17:28:35 -05:00
/// # #![feature(rustc_private)]
/// extern crate serialize;
/// use serialize::hex::{FromHex, ToHex};
///
/// fn main () {
/// let hello_str = "Hello, World".as_bytes().to_hex();
/// println!("{}", hello_str);
/// let bytes = hello_str.from_hex().unwrap();
/// println!("{:?}", bytes);
/// let result_str = String::from_utf8(bytes).unwrap();
/// println!("{}", result_str);
/// }
/// ```
fn from_hex(&self) -> Result<Vec<u8>, FromHexError> {
// This may be an overestimate if there is any whitespace
let mut b = Vec::with_capacity(self.len() / 2);
2015-01-25 15:05:03 -06:00
let mut modulus = 0;
let mut buf = 0;
for (idx, byte) in self.bytes().enumerate() {
buf <<= 4;
match byte {
b'A'...b'F' => buf |= byte - b'A' + 10,
b'a'...b'f' => buf |= byte - b'a' + 10,
b'0'...b'9' => buf |= byte - b'0',
b' '|b'\r'|b'\n'|b'\t' => {
buf >>= 4;
continue
}
_ => return Err(InvalidHexCharacter(self.char_at(idx), idx)),
}
modulus += 1;
if modulus == 2 {
modulus = 0;
b.push(buf);
}
}
match modulus {
2014-09-14 22:27:36 -05:00
0 => Ok(b.into_iter().collect()),
_ => Err(InvalidHexLength),
}
}
}
#[cfg(test)]
mod tests {
2014-02-13 19:49:11 -06:00
extern crate test;
use self::test::Bencher;
use hex::{FromHex, ToHex};
#[test]
pub fn test_to_hex() {
assert_eq!("foobar".as_bytes().to_hex(), "666f6f626172");
}
#[test]
pub fn test_from_hex_okay() {
assert_eq!("666f6f626172".from_hex().unwrap(),
b"foobar");
assert_eq!("666F6F626172".from_hex().unwrap(),
b"foobar");
}
#[test]
pub fn test_from_hex_odd_len() {
assert!("666".from_hex().is_err());
assert!("66 6".from_hex().is_err());
}
#[test]
pub fn test_from_hex_invalid_char() {
assert!("66y6".from_hex().is_err());
}
#[test]
pub fn test_from_hex_ignores_whitespace() {
assert_eq!("666f 6f6\r\n26172 ".from_hex().unwrap(),
b"foobar");
}
#[test]
pub fn test_to_hex_all_bytes() {
2015-01-24 08:39:32 -06:00
for i in 0..256 {
assert_eq!([i as u8].to_hex(), format!("{:02x}", i as usize));
}
}
#[test]
pub fn test_from_hex_all_bytes() {
2015-01-24 08:39:32 -06:00
for i in 0..256 {
let ii: &[u8] = &[i as u8];
assert_eq!(format!("{:02x}", i as usize).from_hex()
.unwrap(),
ii);
assert_eq!(format!("{:02X}", i as usize).from_hex()
.unwrap(),
ii);
}
}
#[bench]
pub fn bench_to_hex(b: &mut Bencher) {
let s = "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム \
";
b.iter(|| {
s.as_bytes().to_hex();
});
b.bytes = s.len() as u64;
}
#[bench]
pub fn bench_from_hex(b: &mut Bencher) {
let s = "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム \
";
let sb = s.as_bytes().to_hex();
b.iter(|| {
sb.from_hex().unwrap();
});
b.bytes = sb.len() as u64;
}
}