diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 708f62f476e..117a3e23044 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -480,7 +480,7 @@ pub fn from_box(boxed: Box) -> Wtf8Buf { #[inline] pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { self.bytes.extend_from_slice(other); - self.is_known_utf8 = self.is_known_utf8 || self.next_surrogate(0).is_none(); + self.is_known_utf8 = false; } } diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 6a1cc41a8fb..b57c99a8452 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -725,3 +725,27 @@ fn wtf8_utf8_boundary_between_surrogates() { string.push(CodePoint::from_u32(0xD800).unwrap()); check_utf8_boundary(&string, 3); } + +#[test] +fn wobbled_wtf8_plus_bytes_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wobbled_wtf8_plus_str_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.push_str("some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_utf8_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.push_str("some utf-8"); + assert!(string.is_known_utf8); +}