From 179fc6dbfd97eb59e92df84e80fa9354ce5eeea0 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 3 Jun 2014 23:45:54 -0400 Subject: [PATCH] Some minor documentation touchups for libregex. Fixes #13800. --- src/libregex/lib.rs | 15 ++++++++------- src/libregex/re.rs | 28 ++++++++++++++++------------ 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/libregex/lib.rs b/src/libregex/lib.rs index 3e100b7889c..4b9d76a9e5d 100644 --- a/src/libregex/lib.rs +++ b/src/libregex/lib.rs @@ -155,15 +155,16 @@ //! # Unicode //! //! This implementation executes regular expressions **only** on sequences of -//! UTF8 codepoints while exposing match locations as byte indices. +//! Unicode code points while exposing match locations as byte indices into the +//! search string. //! //! Currently, only naive case folding is supported. Namely, when matching //! case insensitively, the characters are first converted to their uppercase //! forms and then compared. //! //! Regular expressions themselves are also **only** interpreted as a sequence -//! of UTF8 codepoints. This means you can embed Unicode characters directly -//! into your expression: +//! of Unicode code points. This means you can use Unicode characters +//! directly in your expression: //! //! ```rust //! # #![feature(phase)] @@ -229,10 +230,10 @@ //! x*? zero or more of x (ungreedy) //! x+? one or more of x (ungreedy) //! x?? zero or one of x (ungreedy) -//! x{n,m} at least n and at most x (greedy) +//! x{n,m} at least n x and at most m x (greedy) //! x{n,} at least n x (greedy) //! x{n} exactly n x -//! x{n,m}? at least n and at most x (ungreedy) +//! x{n,m}? at least n x and at most m x (ungreedy) //! x{n,}? at least n x (ungreedy) //! x{n}? exactly n x //! @@ -300,7 +301,7 @@ //! \v vertical tab (\x0B) //! \123 octal character code (up to three digits) //! \x7F hex character code (exactly two digits) -//! \x{10FFFF} any hex character code corresponding to a valid UTF8 codepoint +//! \x{10FFFF} any hex character code corresponding to a Unicode code point //! //! //! ## Perl character classes (Unicode friendly) @@ -390,7 +391,7 @@ #[cfg(test, not(windows))] mod test; -/// The `program` module exists to support the `regex!` macro. Do not use. +/// The `native` module exists to support the `regex!` macro. Do not use. #[doc(hidden)] pub mod native { // Exporting this stuff is bad form, but it's necessary for two reasons. diff --git a/src/libregex/re.rs b/src/libregex/re.rs index 61cf1604cd5..3f62f16e0b1 100644 --- a/src/libregex/re.rs +++ b/src/libregex/re.rs @@ -18,8 +18,10 @@ use vm; use vm::{CaptureLocs, MatchKind, Exists, Location, Submatches}; -/// Escapes all regular expression meta characters in `text` so that it may be -/// safely used in a regular expression as a literal string. +/// Escapes all regular expression meta characters in `text`. +/// +/// The string returned may be safely used as a literal in a regular +/// expression. pub fn quote(text: &str) -> String { let mut quoted = String::with_capacity(text.len()); for c in text.chars() { @@ -45,9 +47,10 @@ pub fn is_match(regex: &str, text: &str) -> Result { Regex::new(regex).map(|r| r.is_match(text)) } -/// Regex is a compiled regular expression, represented as either a sequence -/// of bytecode instructions (dynamic) or as a specialized Rust function -/// (native). It can be used to search, split +/// A compiled regular expression +/// +/// It is represented as either a sequence of bytecode instructions (dynamic) +/// or as a specialized Rust function (native). It can be used to search, split /// or replace text. All searching is done with an implicit `.*?` at the /// beginning and end of an expression. To force an expression to match the /// whole string (or a prefix or a suffix), you must use an anchor like `^` or @@ -55,7 +58,7 @@ pub fn is_match(regex: &str, text: &str) -> Result { /// /// While this crate will handle Unicode strings (whether in the regular /// expression or in the search text), all positions returned are **byte -/// indices**. Every byte index is guaranteed to be at a UTF8 codepoint +/// indices**. Every byte index is guaranteed to be at a Unicode code point /// boundary. /// /// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a @@ -189,7 +192,7 @@ pub fn is_match(&self, text: &str) -> bool { /// /// # Example /// - /// Find the start and end location of every word with exactly 13 + /// Find the start and end location of the first word with exactly 13 /// characters: /// /// ```rust @@ -216,7 +219,7 @@ pub fn find(&self, text: &str) -> Option<(uint, uint)> { /// /// # Example /// - /// Find the start and end location of the first word with exactly 13 + /// Find the start and end location of every word with exactly 13 /// characters: /// /// ```rust @@ -577,8 +580,8 @@ fn reg_replace<'a>(&'a mut self, caps: &Captures) -> MaybeOwned<'a> { } } -impl<'a> Replacer for |&Captures|: 'a -> String { - fn reg_replace<'r>(&'r mut self, caps: &Captures) -> MaybeOwned<'r> { +impl<'t> Replacer for |&Captures|: 't -> String { + fn reg_replace<'a>(&'a mut self, caps: &Captures) -> MaybeOwned<'a> { Owned((*self)(caps)) } } @@ -823,8 +826,9 @@ fn next(&mut self) -> Option> { } /// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. The iterator stops when no more matches can -/// be found. +/// particular regular expression. +/// +/// The iterator stops when no more matches can be found. /// /// `'r` is the lifetime of the compiled expression and `'t` is the lifetime /// of the matched string.