bors 9fb91aa2e7 Auto merge of #122059 - nyurik:with-as-const-str, r=cuviper
Optimize write with as_const_str for shorter code

Following up on #121001

Apparently this code generates significant code block for each call to `write()` with non-simple formatting string - approx 100 lines of assembly code, possibly due to `dyn` (?).  See generated assembly code [here](https://github.com/nyurik/rust-optimize-format-str/compare/before-changes..with-my-change#diff-6b404e954c692d8cdc8c452d819a216aa5dcf40522b5944639e9ad947279a477):

<details><summary>Details</summary>
<p>

This is the inlining of `write!(buffer, "Iteration {value} was written")`

```asm
core::fmt::Write::write_fmt:
		// /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 194
		fn write_fmt(&mut self, args: Arguments<'_>) -> Result {
	push r15
	push r14
	push r13
	push r12
	push rbx
	mov rdx, rsi
		// /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 427
		match (self.pieces, self.args) {
	mov rcx, qword ptr [rsi + 8]
	mov rax, qword ptr [rsi + 24]
		// /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 428
		([], []) => Some(""),
	cmp rcx, 1
	je .LBB0_8
	test rcx, rcx
	jne .LBB0_9
	test rax, rax
	jne .LBB0_9
		// /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 911
		self.buf.reserve(self.len, additional);
	lea r12, [rdi + 16]
	lea rsi, [rip + .L__unnamed_2]
	xor ebx, ebx
.LBB0_6:
	mov r14, qword ptr [r12]
	jmp .LBB0_7
.LBB0_8:
		// /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 429
		([s], []) => Some(s),
	test rax, rax
	je .LBB0_4
.LBB0_9:
		// /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 1108
		if let Some(s) = args.as_str() { output.write_str(s) } else { write_internal(output, args) }
	lea rsi, [rip + .L__unnamed_1]
	pop rbx
	pop r12
	pop r13
	pop r14
	pop r15
	jmp qword ptr [rip + core::fmt::write_internal@GOTPCREL]
.LBB0_4:
	mov rax, qword ptr [rdx]
		// /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 429
		([s], []) => Some(s),
	mov rsi, qword ptr [rax]
	mov rbx, qword ptr [rax + 8]
		// /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 248
		if T::IS_ZST { usize::MAX } else { self.cap.0 }
	mov rax, qword ptr [rdi]
		// /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 911
		self.buf.reserve(self.len, additional);
	mov r14, qword ptr [rdi + 16]
		// /home/nyurik/dev/rust/rust/library/core/src/num/mod.rs : 1281
		uint_impl! {
	sub rax, r14
		// /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 392
		additional > self.capacity().wrapping_sub(len)
	cmp rax, rbx
		// /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 309
		if self.needs_to_grow(len, additional) {
	jb .LBB0_5
.LBB0_7:
	mov rax, qword ptr [rdi + 8]
		// /home/nyurik/dev/rust/rust/library/core/src/ptr/mut_ptr.rs : 1046
		unsafe { intrinsics::offset(self, count) }
	add rax, r14
	mov r15, rdi
		// /home/nyurik/dev/rust/rust/library/core/src/intrinsics.rs : 2922
		copy_nonoverlapping(src, dst, count)
	mov rdi, rax
	mov rdx, rbx
	call qword ptr [rip + memcpy@GOTPCREL]
		// /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 2040
		self.len += count;
	add r14, rbx
	mov qword ptr [r15 + 16], r14
		// /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 216
		}
	xor eax, eax
	pop rbx
	pop r12
	pop r13
	pop r14
	pop r15
	ret
.LBB0_5:
		// /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 911
		self.buf.reserve(self.len, additional);
	lea r12, [rdi + 16]
	mov r15, rdi
	mov r13, rsi
		// /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 310
		do_reserve_and_handle(self, len, additional);
	mov rsi, r14
	mov rdx, rbx
	call alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle
	mov rsi, r13
	mov rdi, r15
	jmp .LBB0_6
```

</p>
</details>

```rust
#[inline]
pub fn write(output: &mut dyn Write, args: Arguments<'_>) -> Result {
    if let Some(s) = args.as_str() { output.write_str(s) } else { write_internal(output, args) }
}
```

So, this brings back the older experiment - where I used `if core::intrinsics::is_val_statically_known(s.is_some()) { s } else { None }` helper function, and called it in multiple places that used `write`.  This is not as optimal because now every user of `write` must do this logic, but at least it results in significantly smaller assembly code for the formatting case, and results in identical code as now for the "simple" (no formatting) case. See [assembly comparison](https://github.com/nyurik/rust-optimize-format-str/compare/with-my-change..with-as-const-str#diff-6b404e954c692d8cdc8c452d819a216aa5dcf40522b5944639e9ad947279a477) of what is now with what this change brings (focus only on `fmt/intel-lib.txt` and `str/intel-lib.txt` files).

```rust
               if let Some(s) = args.as_const_str() {
                    self.write_str(s)
                } else {
                    write(self, args)
                }
```
2024-03-08 04:15:17 +00:00
2024-03-06 17:49:37 -08:00
x

The Rust Programming Language

Rust Community

This is the main source code repository for Rust. It contains the compiler, standard library, and documentation.

Note: this README is for users rather than contributors. If you wish to contribute to the compiler, you should read CONTRIBUTING.md instead.

Table of Contents

Quick Start

Read "Installation" from The Book.

Installing from Source

If you really want to install from source (though this is not recommended), see INSTALL.md.

Getting Help

See https://www.rust-lang.org/community for a list of chat platforms and forums.

Contributing

See CONTRIBUTING.md.

License

Rust is primarily distributed under the terms of both the MIT license and the Apache License (Version 2.0), with portions covered by various BSD-like licenses.

See LICENSE-APACHE, LICENSE-MIT, and COPYRIGHT for details.

Trademark

The Rust Foundation owns and protects the Rust and Cargo trademarks and logos (the "Rust Trademarks").

If you want to use these names or brands, please read the media guide.

Third-party logos may be subject to third-party copyrights and trademarks. See Licenses for details.

Description
No description provided
Readme 1.5 GiB
Languages
Rust 95.7%
Shell 1%
RenderScript 0.7%
JavaScript 0.6%
Fluent 0.4%
Other 1.4%