diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index d28cdcc3f4b..0fe40081a46 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -630,7 +630,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_default(|c| out.push(c)); + for c in c.escape_default() { + out.push(c); + } } out } @@ -640,7 +642,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_unicode(|c| out.push(c)); + for c in c.escape_unicode() { + out.push(c); + } } out } diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 55d2424eba6..1210465098a 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -17,7 +17,7 @@ use mem::transmute; use option::{None, Option, Some}; -use iter::range_step; +use iter::{range_step, Iterator, RangeStep}; use slice::SlicePrelude; // UTF-8 ranges and tags for encoding characters @@ -165,7 +165,9 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> { /// #[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { - c.escape_unicode(f) + for char in c.escape_unicode() { + f(char); + } } /// @@ -182,7 +184,9 @@ pub fn escape_unicode(c: char, f: |char|) { /// #[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { - c.escape_default(f) + for c in c.escape_default() { + f(c); + } } /// Returns the amount of bytes this `char` would need if encoded in UTF-8 @@ -266,7 +270,7 @@ pub trait Char { /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(self, f: |char|); + fn escape_unicode(self) -> UnicodeEscapedChars; /// Returns a 'default' ASCII and C++11-like literal escape of a /// character. @@ -281,7 +285,7 @@ pub trait Char { /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. #[unstable = "pending error conventions, trait organization"] - fn escape_default(self, f: |char|); + fn escape_default(self) -> DefaultEscapedChars; /// Returns the amount of bytes this character would need if encoded in /// UTF-8. @@ -351,38 +355,23 @@ impl Char for char { fn from_u32(i: u32) -> Option<char> { from_u32(i) } #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(self, f: |char|) { - // avoid calling str::to_str_radix because we don't really need to allocate - // here. - f('\\'); - let pad = match () { - _ if self <= '\xff' => { f('x'); 2 } - _ if self <= '\uffff' => { f('u'); 4 } - _ => { f('U'); 8 } - }; - for offset in range_step::<i32>(4 * (pad - 1), -1, -4) { - let offset = offset as uint; - unsafe { - match ((self as i32) >> offset) & 0xf { - i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } - i => { f(transmute('a' as i32 + (i - 10))); } - } - } - } + fn escape_unicode(self) -> UnicodeEscapedChars { + UnicodeEscapedChars { c: self, state: UnicodeEscapedCharsState::Backslash } } #[unstable = "pending error conventions, trait organization"] - fn escape_default(self, f: |char|) { - match self { - '\t' => { f('\\'); f('t'); } - '\r' => { f('\\'); f('r'); } - '\n' => { f('\\'); f('n'); } - '\\' => { f('\\'); f('\\'); } - '\'' => { f('\\'); f('\''); } - '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(self); } - _ => self.escape_unicode(f), - } + fn escape_default(self) -> DefaultEscapedChars { + let init_state = match self { + '\t' => DefaultEscapedCharsState::Backslash('t'), + '\r' => DefaultEscapedCharsState::Backslash('r'), + '\n' => DefaultEscapedCharsState::Backslash('n'), + '\\' => DefaultEscapedCharsState::Backslash('\\'), + '\'' => DefaultEscapedCharsState::Backslash('\''), + '"' => DefaultEscapedCharsState::Backslash('"'), + '\x20' ... '\x7e' => DefaultEscapedCharsState::Char(self), + _ => DefaultEscapedCharsState::Unicode(self.escape_unicode()) + }; + DefaultEscapedChars { state: init_state } } #[inline] @@ -456,3 +445,75 @@ impl Char for char { } } } + +/// An iterator over the characters that represent a `char`, as escaped by +/// Rust's unicode escaping rules. +pub struct UnicodeEscapedChars { + c: char, + state: UnicodeEscapedCharsState +} + +enum UnicodeEscapedCharsState { + Backslash, + Type, + Value(RangeStep<i32>), +} + +impl Iterator<char> for UnicodeEscapedChars { + fn next(&mut self) -> Option<char> { + match self.state { + UnicodeEscapedCharsState::Backslash => { + self.state = UnicodeEscapedCharsState::Type; + Some('\\') + } + UnicodeEscapedCharsState::Type => { + let (typechar, pad) = if self.c <= '\x7f' { ('x', 2) } + else if self.c <= '\uffff' { ('u', 4) } + else { ('U', 8) }; + self.state = UnicodeEscapedCharsState::Value(range_step(4 * (pad - 1), -1, -4i32)); + Some(typechar) + } + UnicodeEscapedCharsState::Value(ref mut range_step) => match range_step.next() { + Some(offset) => { + let offset = offset as uint; + let v = match ((self.c as i32) >> offset) & 0xf { + i @ 0 ... 9 => '0' as i32 + i, + i => 'a' as i32 + (i - 10) + }; + Some(unsafe { transmute(v) }) + } + None => None + } + } + } +} + +/// An iterator over the characters that represent a `char`, escaped +/// for maximum portability. +pub struct DefaultEscapedChars { + state: DefaultEscapedCharsState +} + +enum DefaultEscapedCharsState { + Backslash(char), + Char(char), + Done, + Unicode(UnicodeEscapedChars), +} + +impl Iterator<char> for DefaultEscapedChars { + fn next(&mut self) -> Option<char> { + match self.state { + DefaultEscapedCharsState::Backslash(c) => { + self.state = DefaultEscapedCharsState::Char(c); + Some('\\') + } + DefaultEscapedCharsState::Char(c) => { + self.state = DefaultEscapedCharsState::Done; + Some(c) + } + DefaultEscapedCharsState::Done => None, + DefaultEscapedCharsState::Unicode(ref mut iter) => iter.next() + } + } +} diff --git a/src/libgraphviz/lib.rs b/src/libgraphviz/lib.rs index df8cdabbcaa..3ad546edf8d 100644 --- a/src/libgraphviz/lib.rs +++ b/src/libgraphviz/lib.rs @@ -431,7 +431,7 @@ impl<'a> LabelText<'a> { // not escaping \\, since Graphviz escString needs to // interpret backslashes; see EscStr above. '\\' => f(c), - _ => c.escape_default(f) + _ => for c in c.escape_default() { f(c) } } } fn escape_str(s: &str) -> String { diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index d27a338b308..6a8074b9958 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -262,7 +262,7 @@ pub fn sanitize(s: &str) -> String { _ => { let mut tstr = String::new(); - char::escape_unicode(c, |c| tstr.push(c)); + for c in c.escape_unicode() { tstr.push(c) } result.push('$'); result.push_str(tstr.as_slice().slice_from(1)); } diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index 209d8c7ca0f..52aab752c57 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -2033,9 +2033,9 @@ fn lit_to_string(lit: &ast::Lit) -> String { ast::LitBinary(ref data) => format!("{}", data), ast::LitByte(b) => { let mut res = String::from_str("b'"); - (b as char).escape_default(|c| { + for c in (b as char).escape_default() { res.push(c); - }); + } res.push('\''); res }, diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index e19e38e2977..4c759cfc4fd 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -193,7 +193,7 @@ impl<'a> StringReader<'a> { fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! { let mut m = m.to_string(); m.push_str(": "); - char::escape_default(c, |c| m.push(c)); + for c in c.escape_default() { m.push(c) } self.fatal_span_(from_pos, to_pos, m.as_slice()); } @@ -202,7 +202,7 @@ impl<'a> StringReader<'a> { fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) { let mut m = m.to_string(); m.push_str(": "); - char::escape_default(c, |c| m.push(c)); + for c in c.escape_default() { m.push(c) } self.err_span_(from_pos, to_pos, m.as_slice()); } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 5652a9a9d3a..4ce0d74bd37 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -2756,7 +2756,9 @@ impl<'a> State<'a> { } ast::LitChar(ch) => { let mut res = String::from_str("'"); - ch.escape_default(|c| res.push(c)); + for c in ch.escape_default() { + res.push(c); + } res.push('\''); word(&mut self.s, res.as_slice()) }