From cc33ce6fd07467bca1006823ae7336e84054726c Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Sat, 20 Dec 2014 17:17:58 +0000
Subject: [PATCH 1/2] Add String::push_with_ascii_fast_path, bench it against
 String::push

`String::push(&mut self, ch: char)` currently has a single code path
that calls `Char::encode_utf8`.
Perhaps it could be faster for ASCII `char`s, which are represented as
a single byte in UTF-8.

This commit leaves the method unchanged,
adds a copy of it with the fast path,
and adds benchmarks to compare them.

Results show that the fast path very significantly improves the performance
of repeatedly pushing an ASCII `char`,
but does not significantly affect the performance for a non-ASCII `char`
(where the fast path is not taken).

Output of `make check-stage1-collections NO_REBUILD=1 PLEASE_BENCH=1 TESTNAME=string::tests::bench_push`

```
test string::tests::bench_push_char_one_byte                 ... bench:     59552 ns/iter (+/- 2132) = 167 MB/s
test string::tests::bench_push_char_one_byte_with_fast_path  ... bench:      6563 ns/iter (+/- 658) = 1523 MB/s
test string::tests::bench_push_char_two_bytes                ... bench:     71520 ns/iter (+/- 3541) = 279 MB/s
test string::tests::bench_push_char_two_bytes_with_slow_path ... bench:     71452 ns/iter (+/- 4202) = 279 MB/s
test string::tests::bench_push_str                           ... bench:        24 ns/iter (+/- 2)
test string::tests::bench_push_str_one_byte                  ... bench:     38910 ns/iter (+/- 2477) = 257 MB/s
```

A benchmark of pushing a one-byte-long `&str` is added for comparison,
but its performance [has varied a lot lately](
https://github.com/rust-lang/rust/pull/19640#issuecomment-67741561).
(When the input is fixed, `s.push_str("x")` could be used
instead of `s.push('x')`.)
---
 src/libcollections/string.rs | 80 ++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index bcd1e3b3680..d894f0b58d9 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -528,6 +528,29 @@ impl String {
         }
     }
 
+    #[inline]
+    fn push_with_ascii_fast_path(&mut self, ch: char) {
+        if (ch as u32) < 0x80 {
+            self.vec.push(ch as u8);
+            return;
+        }
+
+        let cur_len = self.len();
+        // This may use up to 4 bytes.
+        self.vec.reserve(4);
+
+        unsafe {
+            // Attempt to not use an intermediate buffer by just pushing bytes
+            // directly onto this string.
+            let slice = RawSlice {
+                data: self.vec.as_ptr().offset(cur_len as int),
+                len: 4,
+            };
+            let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0);
+            self.vec.set_len(cur_len + used);
+        }
+    }
+
     /// Works with the underlying buffer as a byte slice.
     ///
     /// # Examples
@@ -1408,6 +1431,63 @@ mod tests {
         });
     }
 
+    const REPETITIONS: u64 = 10_000;
+
+    #[bench]
+    fn bench_push_str_one_byte(b: &mut Bencher) {
+        b.bytes = REPETITIONS;
+        b.iter(|| {
+            let mut r = String::new();
+            for _ in range(0, REPETITIONS) {
+                r.push_str("a")
+            }
+        });
+    }
+
+    #[bench]
+    fn bench_push_char_one_byte(b: &mut Bencher) {
+        b.bytes = REPETITIONS;
+        b.iter(|| {
+            let mut r = String::new();
+            for _ in range(0, REPETITIONS) {
+                r.push('a')
+            }
+        });
+    }
+
+    #[bench]
+    fn bench_push_char_one_byte_with_fast_path(b: &mut Bencher) {
+        b.bytes = REPETITIONS;
+        b.iter(|| {
+            let mut r = String::new();
+            for _ in range(0, REPETITIONS) {
+                r.push_with_ascii_fast_path('a')
+            }
+        });
+    }
+
+    #[bench]
+    fn bench_push_char_two_bytes(b: &mut Bencher) {
+        b.bytes = REPETITIONS * 2;
+        b.iter(|| {
+            let mut r = String::new();
+            for _ in range(0, REPETITIONS) {
+                r.push('â')
+            }
+        });
+    }
+
+    #[bench]
+    fn bench_push_char_two_bytes_with_slow_path(b: &mut Bencher) {
+        b.bytes = REPETITIONS * 2;
+        b.iter(|| {
+            let mut r = String::new();
+            for _ in range(0, REPETITIONS) {
+                r.push_with_ascii_fast_path('â')
+            }
+        });
+    }
+
     #[bench]
     fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
         let s = b"Hello there, the quick brown fox jumped over the lazy dog! \

From e40a81b37bf11ae6f7fc3294ac17230eabaaab03 Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Sat, 20 Dec 2014 17:29:02 +0000
Subject: [PATCH 2/2] Merge String::push_with_ascii_fast_path into
 String::push.

---
 src/libcollections/string.rs | 40 ------------------------------------
 1 file changed, 40 deletions(-)

diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index d894f0b58d9..678e81d40b4 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -512,24 +512,6 @@ impl String {
     #[inline]
     #[stable = "function just renamed from push_char"]
     pub fn push(&mut self, ch: char) {
-        let cur_len = self.len();
-        // This may use up to 4 bytes.
-        self.vec.reserve(4);
-
-        unsafe {
-            // Attempt to not use an intermediate buffer by just pushing bytes
-            // directly onto this string.
-            let slice = RawSlice {
-                data: self.vec.as_ptr().offset(cur_len as int),
-                len: 4,
-            };
-            let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0);
-            self.vec.set_len(cur_len + used);
-        }
-    }
-
-    #[inline]
-    fn push_with_ascii_fast_path(&mut self, ch: char) {
         if (ch as u32) < 0x80 {
             self.vec.push(ch as u8);
             return;
@@ -1455,17 +1437,6 @@ mod tests {
         });
     }
 
-    #[bench]
-    fn bench_push_char_one_byte_with_fast_path(b: &mut Bencher) {
-        b.bytes = REPETITIONS;
-        b.iter(|| {
-            let mut r = String::new();
-            for _ in range(0, REPETITIONS) {
-                r.push_with_ascii_fast_path('a')
-            }
-        });
-    }
-
     #[bench]
     fn bench_push_char_two_bytes(b: &mut Bencher) {
         b.bytes = REPETITIONS * 2;
@@ -1477,17 +1448,6 @@ mod tests {
         });
     }
 
-    #[bench]
-    fn bench_push_char_two_bytes_with_slow_path(b: &mut Bencher) {
-        b.bytes = REPETITIONS * 2;
-        b.iter(|| {
-            let mut r = String::new();
-            for _ in range(0, REPETITIONS) {
-                r.push_with_ascii_fast_path('â')
-            }
-        });
-    }
-
     #[bench]
     fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
         let s = b"Hello there, the quick brown fox jumped over the lazy dog! \