Auto merge of #121428 - okaneco:ipaddr_parse, r=cuviper

net: Don't use checked arithmetic when parsing numbers with known max digits

Add a branch to `Parser::read_number` that determines whether checked or regular arithmetic is used.

- If `max_digits.is_some()`, then we know we are parsing a `u8` or `u16` because `read_number` is only called with `Some(3)` or `Some(4)`. Both types fit within a `u32` without risk of overflow. Thus, we can use plain arithmetic to avoid extra instructions from `checked_mul` and `checked_add`.

Add benches for `IpAddr`, `Ipv4Addr`, `Ipv6Addr`, `SocketAddr`, `SocketAddrV4`, and `SocketAddrV6` parsing
This commit is contained in:
bors 2024-03-05 15:29:19 +00:00
commit 96561a8fd1
4 changed files with 130 additions and 20 deletions

View File

@ -16,6 +16,7 @@
mod fmt; mod fmt;
mod hash; mod hash;
mod iter; mod iter;
mod net;
mod num; mod num;
mod ops; mod ops;
mod pattern; mod pattern;

View File

@ -0,0 +1,78 @@
use core::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
use core::str::FromStr;
use test::{black_box, Bencher};
const IPV4_STR: &str = "192.168.0.1";
const IPV4_STR_PORT: &str = "192.168.0.1:8080";
const IPV6_STR_FULL: &str = "2001:db8:0:0:0:0:c0a8:1";
const IPV6_STR_COMPRESS: &str = "2001:db8::c0a8:1";
const IPV6_STR_V4: &str = "2001:db8::192.168.0.1";
const IPV6_STR_PORT: &str = "[2001:db8::c0a8:1]:8080";
const IPV6_STR_PORT_SCOPE_ID: &str = "[2001:db8::c0a8:1%1337]:8080";
#[bench]
fn bench_parse_ipv4(b: &mut Bencher) {
b.iter(|| Ipv4Addr::from_str(black_box(IPV4_STR)));
}
#[bench]
fn bench_parse_ipv6_full(b: &mut Bencher) {
b.iter(|| Ipv6Addr::from_str(black_box(IPV6_STR_FULL)));
}
#[bench]
fn bench_parse_ipv6_compress(b: &mut Bencher) {
b.iter(|| Ipv6Addr::from_str(black_box(IPV6_STR_COMPRESS)));
}
#[bench]
fn bench_parse_ipv6_v4(b: &mut Bencher) {
b.iter(|| Ipv6Addr::from_str(black_box(IPV6_STR_V4)));
}
#[bench]
fn bench_parse_ipaddr_v4(b: &mut Bencher) {
b.iter(|| IpAddr::from_str(black_box(IPV4_STR)));
}
#[bench]
fn bench_parse_ipaddr_v6_full(b: &mut Bencher) {
b.iter(|| IpAddr::from_str(black_box(IPV6_STR_FULL)));
}
#[bench]
fn bench_parse_ipaddr_v6_compress(b: &mut Bencher) {
b.iter(|| IpAddr::from_str(black_box(IPV6_STR_COMPRESS)));
}
#[bench]
fn bench_parse_ipaddr_v6_v4(b: &mut Bencher) {
b.iter(|| IpAddr::from_str(black_box(IPV6_STR_V4)));
}
#[bench]
fn bench_parse_socket_v4(b: &mut Bencher) {
b.iter(|| SocketAddrV4::from_str(black_box(IPV4_STR_PORT)));
}
#[bench]
fn bench_parse_socket_v6(b: &mut Bencher) {
b.iter(|| SocketAddrV6::from_str(black_box(IPV6_STR_PORT)));
}
#[bench]
fn bench_parse_socket_v6_scope_id(b: &mut Bencher) {
b.iter(|| SocketAddrV6::from_str(black_box(IPV6_STR_PORT_SCOPE_ID)));
}
#[bench]
fn bench_parse_socketaddr_v4(b: &mut Bencher) {
b.iter(|| SocketAddr::from_str(black_box(IPV4_STR_PORT)));
}
#[bench]
fn bench_parse_socketaddr_v6(b: &mut Bencher) {
b.iter(|| SocketAddr::from_str(black_box(IPV6_STR_PORT)));
}

View File

@ -0,0 +1 @@
mod addr_parser;

View File

@ -3,7 +3,7 @@
//! This module is "publicly exported" through the `FromStr` implementations //! This module is "publicly exported" through the `FromStr` implementations
//! below. //! below.
use crate::convert::TryInto; use crate::convert::{TryFrom, TryInto};
use crate::error::Error; use crate::error::Error;
use crate::fmt; use crate::fmt;
use crate::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}; use crate::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
@ -104,12 +104,46 @@ fn read_separator<T, F>(&mut self, sep: char, index: usize, inner: F) -> Option<
// Read a number off the front of the input in the given radix, stopping // Read a number off the front of the input in the given radix, stopping
// at the first non-digit character or eof. Fails if the number has more // at the first non-digit character or eof. Fails if the number has more
// digits than max_digits or if there is no number. // digits than max_digits or if there is no number.
fn read_number<T: ReadNumberHelper>( //
// INVARIANT: `max_digits` must be less than the number of digits that `u32`
// can represent.
fn read_number<T: ReadNumberHelper + TryFrom<u32>>(
&mut self, &mut self,
radix: u32, radix: u32,
max_digits: Option<usize>, max_digits: Option<usize>,
allow_zero_prefix: bool, allow_zero_prefix: bool,
) -> Option<T> { ) -> Option<T> {
// If max_digits.is_some(), then we are parsing a `u8` or `u16` and
// don't need to use checked arithmetic since it fits within a `u32`.
if let Some(max_digits) = max_digits {
// u32::MAX = 4_294_967_295u32, which is 10 digits long.
// `max_digits` must be less than 10 to not overflow a `u32`.
debug_assert!(max_digits < 10);
self.read_atomically(move |p| {
let mut result = 0_u32;
let mut digit_count = 0;
let has_leading_zero = p.peek_char() == Some('0');
while let Some(digit) = p.read_atomically(|p| p.read_char()?.to_digit(radix)) {
result *= radix;
result += digit;
digit_count += 1;
if digit_count > max_digits {
return None;
}
}
if digit_count == 0 {
None
} else if !allow_zero_prefix && has_leading_zero && digit_count > 1 {
None
} else {
result.try_into().ok()
}
})
} else {
self.read_atomically(move |p| { self.read_atomically(move |p| {
let mut result = T::ZERO; let mut result = T::ZERO;
let mut digit_count = 0; let mut digit_count = 0;
@ -119,11 +153,6 @@ fn read_number<T: ReadNumberHelper>(
result = result.checked_mul(radix)?; result = result.checked_mul(radix)?;
result = result.checked_add(digit)?; result = result.checked_add(digit)?;
digit_count += 1; digit_count += 1;
if let Some(max_digits) = max_digits {
if digit_count > max_digits {
return None;
}
}
} }
if digit_count == 0 { if digit_count == 0 {
@ -135,6 +164,7 @@ fn read_number<T: ReadNumberHelper>(
} }
}) })
} }
}
/// Read an IPv4 address. /// Read an IPv4 address.
fn read_ipv4_addr(&mut self) -> Option<Ipv4Addr> { fn read_ipv4_addr(&mut self) -> Option<Ipv4Addr> {