Auto merge of #92604 - nnethercote:optimize-impl_read_unsigned_leb128, r=michaelwoerister

Optimize `impl_read_unsigned_leb128`

I see instruction count improvements of up to 3.5% locally with these changes, mostly on the smaller benchmarks.

r? `@michaelwoerister`
This commit is contained in:
bors 2022-01-15 07:27:30 +00:00
commit 38c22af015
3 changed files with 22 additions and 21 deletions

View File

@ -53,16 +53,24 @@ pub fn $fn_name(
macro_rules! impl_read_unsigned_leb128 { macro_rules! impl_read_unsigned_leb128 {
($fn_name:ident, $int_ty:ty) => { ($fn_name:ident, $int_ty:ty) => {
#[inline] #[inline]
pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) { pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
let mut result = 0; // The first iteration of this loop is unpeeled. This is a
let mut shift = 0; // performance win because this code is hot and integer values less
let mut position = 0; // than 128 are very common, typically occurring 50-80% or more of
// the time, even for u64 and u128.
let byte = slice[*position];
*position += 1;
if (byte & 0x80) == 0 {
return byte as $int_ty;
}
let mut result = (byte & 0x7F) as $int_ty;
let mut shift = 7;
loop { loop {
let byte = slice[position]; let byte = slice[*position];
position += 1; *position += 1;
if (byte & 0x80) == 0 { if (byte & 0x80) == 0 {
result |= (byte as $int_ty) << shift; result |= (byte as $int_ty) << shift;
return (result, position); return result;
} else { } else {
result |= ((byte & 0x7F) as $int_ty) << shift; result |= ((byte & 0x7F) as $int_ty) << shift;
} }
@ -122,15 +130,14 @@ pub fn $fn_name(
macro_rules! impl_read_signed_leb128 { macro_rules! impl_read_signed_leb128 {
($fn_name:ident, $int_ty:ty) => { ($fn_name:ident, $int_ty:ty) => {
#[inline] #[inline]
pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) { pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
let mut result = 0; let mut result = 0;
let mut shift = 0; let mut shift = 0;
let mut position = 0;
let mut byte; let mut byte;
loop { loop {
byte = slice[position]; byte = slice[*position];
position += 1; *position += 1;
result |= <$int_ty>::from(byte & 0x7F) << shift; result |= <$int_ty>::from(byte & 0x7F) << shift;
shift += 7; shift += 7;
@ -144,7 +151,7 @@ pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) {
result |= (!0 << shift); result |= (!0 << shift);
} }
(result, position) result
} }
}; };
} }

View File

@ -560,11 +560,7 @@ pub fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] {
} }
macro_rules! read_leb128 { macro_rules! read_leb128 {
($dec:expr, $fun:ident) => {{ ($dec:expr, $fun:ident) => {{ Ok(leb128::$fun($dec.data, &mut $dec.position)) }};
let (value, bytes_read) = leb128::$fun(&$dec.data[$dec.position..]);
$dec.position += bytes_read;
Ok(value)
}};
} }
impl<'a> serialize::Decoder for Decoder<'a> { impl<'a> serialize::Decoder for Decoder<'a> {

View File

@ -30,9 +30,8 @@ fn $test_name() {
let mut position = 0; let mut position = 0;
for &expected in &values { for &expected in &values {
let (actual, bytes_read) = $read_fn_name(&stream[position..]); let actual = $read_fn_name(&stream, &mut position);
assert_eq!(expected, actual); assert_eq!(expected, actual);
position += bytes_read;
} }
assert_eq!(stream.len(), position); assert_eq!(stream.len(), position);
} }
@ -77,9 +76,8 @@ fn $test_name() {
let mut position = 0; let mut position = 0;
for &expected in &values { for &expected in &values {
let (actual, bytes_read) = $read_fn_name(&stream[position..]); let actual = $read_fn_name(&stream, &mut position);
assert_eq!(expected, actual); assert_eq!(expected, actual);
position += bytes_read;
} }
assert_eq!(stream.len(), position); assert_eq!(stream.len(), position);
} }