From f3661dce09bc715a46c01f7ea57694e06b587f29 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Mon, 10 Jun 2024 15:01:31 -0700 Subject: [PATCH] rustdoc: word wrap CamelCase in the item list table This is an alternative to ee6459d6521cf6a4c2e08b6e13ce3c6ce5d55ed0. That is, it fixes the issue that affects the very long type names in https://docs.rs/async-stripe/0.31.0/stripe/index.html#structs. This is, necessarily, a pile of nasty heuristics. We need to balance a few issues: - Sometimes, there's no real word break. For example, `BTreeMap` should be `BTreeMap`, not `BTreeMap`. - Sometimes, there's a legit word break, but the name is tiny and the HTML overhead isn't worth it. For example, if we're typesetting `TyCtx`, writing `TyCtx` would have an HTML overhead of 50%. Line breaking inside it makes no sense. --- Cargo.lock | 1 + src/librustdoc/Cargo.toml | 1 + src/librustdoc/html/escape.rs | 44 ++++++++++++++ src/librustdoc/html/escape/tests.rs | 57 +++++++++++++++++++ src/librustdoc/html/format.rs | 3 +- src/librustdoc/html/render/print_item.rs | 6 +- ...long_typename.extremely_long_typename.html | 1 + tests/rustdoc/extremely_long_typename.rs | 7 +++ .../item-desc-list-at-start.item-table.html | 2 +- 9 files changed, 117 insertions(+), 5 deletions(-) create mode 100644 src/librustdoc/html/escape/tests.rs create mode 100644 tests/rustdoc/extremely_long_typename.extremely_long_typename.html create mode 100644 tests/rustdoc/extremely_long_typename.rs diff --git a/Cargo.lock b/Cargo.lock index 281599a21fc..1a7d7e3f5d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4826,6 +4826,7 @@ dependencies = [ "tracing", "tracing-subscriber", "tracing-tree", + "unicode-segmentation", ] [[package]] diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index fe531f0ff59..dfd7414652f 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -23,6 +23,7 @@ tempfile = "3" tracing = "0.1" tracing-tree = "0.3.0" threadpool = "1.8.1" +unicode-segmentation = "1.9" [dependencies.tracing-subscriber] version = "0.3.3" diff --git a/src/librustdoc/html/escape.rs b/src/librustdoc/html/escape.rs index ea4b573aeb9..94414913163 100644 --- a/src/librustdoc/html/escape.rs +++ b/src/librustdoc/html/escape.rs @@ -5,6 +5,8 @@ use std::fmt; +use unicode_segmentation::UnicodeSegmentation; + /// Wrapper struct which will emit the HTML-escaped version of the contained /// string when passed to a format string. pub(crate) struct Escape<'a>(pub &'a str); @@ -74,3 +76,45 @@ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { Ok(()) } } + +/// Wrapper struct which will emit the HTML-escaped version of the contained +/// string when passed to a format string. This function also word-breaks +/// CamelCase and snake_case word names. +/// +/// This is only safe to use for text nodes. If you need your output to be +/// safely contained in an attribute, use [`Escape`]. If you don't know the +/// difference, use [`Escape`]. +pub(crate) struct EscapeBodyTextWithWbr<'a>(pub &'a str); + +impl<'a> fmt::Display for EscapeBodyTextWithWbr<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let EscapeBodyTextWithWbr(text) = *self; + if text.len() < 8 { + return EscapeBodyText(text).fmt(fmt); + } + let mut last = 0; + let mut it = text.grapheme_indices(true).peekable(); + let _ = it.next(); // don't insert wbr before first char + while let Some((i, s)) = it.next() { + let pk = it.peek(); + let is_uppercase = || s.chars().any(|c| c.is_uppercase()); + let next_is_uppercase = + || pk.map_or(true, |(_, t)| t.chars().any(|c| c.is_uppercase())); + let next_is_underscore = || pk.map_or(true, |(_, t)| t.contains('_')); + if (i - last > 3 && is_uppercase() && !next_is_uppercase()) + || (s.contains('_') && !next_is_underscore()) + { + EscapeBodyText(&text[last..i]).fmt(fmt)?; + fmt.write_str("")?; + last = i; + } + } + if last < text.len() { + EscapeBodyText(&text[last..]).fmt(fmt)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests; diff --git a/src/librustdoc/html/escape/tests.rs b/src/librustdoc/html/escape/tests.rs new file mode 100644 index 00000000000..f99a2a693a6 --- /dev/null +++ b/src/librustdoc/html/escape/tests.rs @@ -0,0 +1,57 @@ +// basic examples +#[test] +fn escape_body_text_with_wbr() { + use super::EscapeBodyTextWithWbr as E; + // extreme corner cases + assert_eq!(&E("").to_string(), ""); + assert_eq!(&E("a").to_string(), "a"); + assert_eq!(&E("A").to_string(), "A"); + // real(istic) examples + assert_eq!(&E("FirstSecond").to_string(), "FirstSecond"); + assert_eq!(&E("First_Second").to_string(), "First_Second"); + assert_eq!(&E("First_Second").to_string(), "First<T>_Second"); + assert_eq!(&E("first_second").to_string(), "first_second"); + assert_eq!(&E("MY_CONSTANT").to_string(), "MY_CONSTANT"); + assert_eq!(&E("___________").to_string(), "___________"); + // a string won't get wrapped if it's less than 8 bytes + assert_eq!(&E("HashSet").to_string(), "HashSet"); + // an individual word won't get wrapped if it's less than 4 bytes + assert_eq!(&E("VecDequeue").to_string(), "VecDequeue"); + assert_eq!(&E("VecDequeueSet").to_string(), "VecDequeueSet"); + // how to handle acronyms + assert_eq!(&E("BTreeMap").to_string(), "BTreeMap"); + assert_eq!(&E("HTTPSProxy").to_string(), "HTTPSProxy"); + // more corners + assert_eq!(&E("ṼẽçÑñéå").to_string(), "ṼẽçÑñéå"); + assert_eq!(&E("V\u{0300}e\u{0300}c\u{0300}D\u{0300}e\u{0300}q\u{0300}u\u{0300}e\u{0300}u\u{0300}e\u{0300}").to_string(), "V\u{0300}e\u{0300}c\u{0300}D\u{0300}e\u{0300}q\u{0300}u\u{0300}e\u{0300}u\u{0300}e\u{0300}"); + assert_eq!(&E("LPFNACCESSIBLEOBJECTFROMWINDOW").to_string(), "LPFNACCESSIBLEOBJECTFROMWINDOW"); +} +// property test +#[test] +fn escape_body_text_with_wbr_makes_sense() { + use itertools::Itertools as _; + + use super::EscapeBodyTextWithWbr as E; + const C: [u8; 3] = [b'a', b'A', b'_']; + for chars in [ + C.into_iter(), + C.into_iter(), + C.into_iter(), + C.into_iter(), + C.into_iter(), + C.into_iter(), + C.into_iter(), + C.into_iter(), + ] + .into_iter() + .multi_cartesian_product() + { + let s = String::from_utf8(chars).unwrap(); + assert_eq!(s.len(), 8); + let esc = E(&s).to_string(); + assert!(!esc.contains("")); + assert!(!esc.ends_with("")); + assert!(!esc.starts_with("")); + assert_eq!(&esc.replace("", ""), &s); + } +} diff --git a/src/librustdoc/html/format.rs b/src/librustdoc/html/format.rs index d6aed75103d..bb5ac303ffd 100644 --- a/src/librustdoc/html/format.rs +++ b/src/librustdoc/html/format.rs @@ -32,7 +32,7 @@ use crate::clean::{self, ExternalCrate, PrimitiveType}; use crate::formats::cache::Cache; use crate::formats::item_type::ItemType; -use crate::html::escape::Escape; +use crate::html::escape::{Escape, EscapeBodyText}; use crate::html::render::Context; use crate::passes::collect_intra_doc_links::UrlFragment; @@ -988,6 +988,7 @@ pub(crate) fn anchor<'a, 'cx: 'a>( f, r#"{text}"#, path = join_with_double_colon(&fqp), + text = EscapeBodyText(text.as_str()), ) } else { f.write_str(text.as_str()) diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs index 24476e80778..3f01c082ba9 100644 --- a/src/librustdoc/html/render/print_item.rs +++ b/src/librustdoc/html/render/print_item.rs @@ -29,7 +29,7 @@ use crate::config::ModuleSorting; use crate::formats::item_type::ItemType; use crate::formats::Impl; -use crate::html::escape::Escape; +use crate::html::escape::{Escape, EscapeBodyTextWithWbr}; use crate::html::format::{ display_fn, join_with_double_colon, print_abi_with_space, print_constness_with_space, print_where_clause, visibility_print_with_space, Buffer, Ending, PrintWithSpace, @@ -423,7 +423,7 @@ fn cmp(i1: &clean::Item, i2: &clean::Item, tcx: TyCtxt<'_>) -> Ordering { "
{}extern crate {} as {};", visibility_print_with_space(myitem, cx), anchor(myitem.item_id.expect_def_id(), src, cx), - myitem.name.unwrap(), + EscapeBodyTextWithWbr(myitem.name.unwrap().as_str()), ), None => write!( w, @@ -520,7 +520,7 @@ fn cmp(i1: &clean::Item, i2: &clean::Item, tcx: TyCtxt<'_>) -> Ordering { {stab_tags}\
\ {docs_before}{docs}{docs_after}", - name = myitem.name.unwrap(), + name = EscapeBodyTextWithWbr(myitem.name.unwrap().as_str()), visibility_and_hidden = visibility_and_hidden, stab_tags = extra_info_tags(myitem, item, tcx), class = myitem.type_(), diff --git a/tests/rustdoc/extremely_long_typename.extremely_long_typename.html b/tests/rustdoc/extremely_long_typename.extremely_long_typename.html new file mode 100644 index 00000000000..b20e59866da --- /dev/null +++ b/tests/rustdoc/extremely_long_typename.extremely_long_typename.html @@ -0,0 +1 @@ +
  • \ No newline at end of file diff --git a/tests/rustdoc/extremely_long_typename.rs b/tests/rustdoc/extremely_long_typename.rs new file mode 100644 index 00000000000..212afe2d110 --- /dev/null +++ b/tests/rustdoc/extremely_long_typename.rs @@ -0,0 +1,7 @@ +// ignore-tidy-linelength +// Make sure that, if an extremely long type name is named, +// the item table has it line wrapped. +// There should be some reasonably-placed `` tags in the snapshot file. + +// @snapshot extremely_long_typename "extremely_long_typename/index.html" '//ul[@class="item-table"]/li' +pub struct CreateSubscriptionPaymentSettingsPaymentMethodOptionsCustomerBalanceBankTransferEuBankTransfer; diff --git a/tests/rustdoc/item-desc-list-at-start.item-table.html b/tests/rustdoc/item-desc-list-at-start.item-table.html index 72bde573cea..ab8b1508b55 100644 --- a/tests/rustdoc/item-desc-list-at-start.item-table.html +++ b/tests/rustdoc/item-desc-list-at-start.item-table.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file