Merge pull request #257 from arpankapoor/master
Optimize bitreverse codegen
This commit is contained in:
commit
ade3739c07
@ -168,6 +168,9 @@ fn main() {
|
||||
world as Box<dyn SomeTrait>;
|
||||
|
||||
assert_eq!(intrinsics::bitreverse(0b10101000u8), 0b00010101u8);
|
||||
assert_eq!(intrinsics::bitreverse(0xddccu16), 0x33bbu16);
|
||||
assert_eq!(intrinsics::bitreverse(0xffee_ddccu32), 0x33bb77ffu32);
|
||||
assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddccu64), 0x33bb77ff1e6a2c48u64);
|
||||
|
||||
assert_eq!(intrinsics::bswap(0xabu8), 0xabu8);
|
||||
assert_eq!(intrinsics::bswap(0xddccu16), 0xccddu16);
|
||||
|
@ -58,6 +58,7 @@ fn main() {
|
||||
|
||||
assert_eq!(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128.leading_zeros(), 26);
|
||||
assert_eq!(0b0000000000000000000000000010000000000000000000000000000000000000_0000000000000000000000000000000000001000000000000000000010000000u128.trailing_zeros(), 7);
|
||||
assert_eq!(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128.reverse_bits(), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128);
|
||||
|
||||
let _d = 0i128.checked_div(2i128);
|
||||
let _d = 0u128.checked_div(2u128);
|
||||
|
@ -549,141 +549,52 @@ fn bit_reverse(&mut self, width: u64, value: RValue<'gcc>) -> RValue<'gcc> {
|
||||
let context = &self.cx.context;
|
||||
let result =
|
||||
match width {
|
||||
8 => {
|
||||
8 | 16 | 32 | 64 => {
|
||||
let mask = ((1u128 << width) - 1) as u64;
|
||||
let (m0, m1, m2) = if width > 16 {
|
||||
(
|
||||
context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
|
||||
context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
|
||||
context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
|
||||
context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
|
||||
context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
|
||||
)
|
||||
};
|
||||
let one = context.new_rvalue_from_int(typ, 1);
|
||||
let two = context.new_rvalue_from_int(typ, 2);
|
||||
let four = context.new_rvalue_from_int(typ, 4);
|
||||
|
||||
// First step.
|
||||
let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
|
||||
let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
|
||||
let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
|
||||
let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
|
||||
let left = self.lshr(value, one);
|
||||
let left = self.and(left, m0);
|
||||
let right = self.and(value, m0);
|
||||
let right = self.shl(right, one);
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
|
||||
let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
|
||||
let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
|
||||
let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
|
||||
let left = self.lshr(step1, two);
|
||||
let left = self.and(left, m1);
|
||||
let right = self.and(step1, m1);
|
||||
let right = self.shl(right, two);
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
|
||||
let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
|
||||
let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
|
||||
let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
|
||||
let step3 = self.or(left, right);
|
||||
|
||||
step3
|
||||
},
|
||||
16 => {
|
||||
// First step.
|
||||
let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
|
||||
let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
|
||||
let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
|
||||
let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
|
||||
let left = self.lshr(step2, four);
|
||||
let left = self.and(left, m2);
|
||||
let right = self.and(step2, m2);
|
||||
let right = self.shl(right, four);
|
||||
let step3 = self.or(left, right);
|
||||
|
||||
// Fourth step.
|
||||
let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
|
||||
let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
|
||||
let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
|
||||
let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
|
||||
let step4 = self.or(left, right);
|
||||
|
||||
step4
|
||||
},
|
||||
32 => {
|
||||
// TODO(antoyo): Refactor with other implementations.
|
||||
// First step.
|
||||
let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
|
||||
let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
|
||||
let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
|
||||
let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
|
||||
let step3 = self.or(left, right);
|
||||
|
||||
// Fourth step.
|
||||
let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
|
||||
let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
|
||||
let step4 = self.or(left, right);
|
||||
|
||||
// Fifth step.
|
||||
let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
|
||||
let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
|
||||
let step5 = self.or(left, right);
|
||||
|
||||
step5
|
||||
},
|
||||
64 => {
|
||||
// First step.
|
||||
let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
|
||||
let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
|
||||
let step1 = self.or(left, right);
|
||||
|
||||
// Second step.
|
||||
let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
|
||||
let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
|
||||
let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
|
||||
let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
|
||||
let step2 = self.or(left, right);
|
||||
|
||||
// Third step.
|
||||
let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
|
||||
let left = self.xor(step2, left);
|
||||
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
|
||||
|
||||
let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
|
||||
let left = self.or(temp, left);
|
||||
let step3 = self.xor(left, step2);
|
||||
|
||||
// Fourth step.
|
||||
let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
|
||||
let left = self.xor(step3, left);
|
||||
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
|
||||
|
||||
let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
|
||||
let left = self.or(temp, left);
|
||||
let step4 = self.xor(left, step3);
|
||||
|
||||
// Fifth step.
|
||||
let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
|
||||
let left = self.xor(step4, left);
|
||||
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
|
||||
|
||||
let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
|
||||
let left = self.or(temp, left);
|
||||
let step5 = self.xor(left, step4);
|
||||
|
||||
step5
|
||||
if width == 8 {
|
||||
step3
|
||||
} else {
|
||||
self.gcc_bswap(step3, width)
|
||||
}
|
||||
},
|
||||
128 => {
|
||||
// TODO(antoyo): find a more efficient implementation?
|
||||
|
Loading…
Reference in New Issue
Block a user