From 2194fd7569b227ba083c4f27e156af939e061c1b Mon Sep 17 00:00:00 2001 From: Kevin Butler Date: Fri, 20 Jun 2014 01:11:32 +0100 Subject: [PATCH 1/7] liburl: rename and move from_str to Url::parse_str. url::from_str => url::Url::parse_str The FromStr trait still works, but its confusing to have a from_str free function that retuns a Result, while the regular from_str returns an Option, hence the rename. [breaking-change] --- src/liburl/lib.rs | 133 +++++++++++++++++++++------------------------- 1 file changed, 61 insertions(+), 72 deletions(-) diff --git a/src/liburl/lib.rs b/src/liburl/lib.rs index 577a1e8ea9e..33688412f5f 100644 --- a/src/liburl/lib.rs +++ b/src/liburl/lib.rs @@ -36,16 +36,13 @@ use std::uint; /// # Example /// /// ```rust -/// use url::{Url, UserInfo}; +/// use url::Url; /// -/// let url = Url { scheme: "https".to_string(), -/// user: Some(UserInfo { user: "username".to_string(), pass: None }), -/// host: "example.com".to_string(), -/// port: Some("8080".to_string()), -/// path: "/foo/bar".to_string(), -/// query: vec!(("baz".to_string(), "qux".to_string())), -/// fragment: Some("quz".to_string()) }; -/// // https://username@example.com:8080/foo/bar?baz=qux#quz +/// let raw = "https://username@example.com:8080/foo/bar?baz=qux#quz"; +/// match Url::parse(raw) { +/// Ok(u) => println!("Parsed '{}'", u), +/// Err(e) => println!("Couldn't parse '{}': {}", raw, e), +/// } /// ``` #[deriving(Clone, PartialEq, Eq)] pub struct Url { @@ -110,6 +107,38 @@ impl Url { fragment: fragment, } } + + /// Parses a URL, converting it from a string to a `Url` representation. + /// + /// # Arguments + /// * rawurl - a string representing the full URL, including scheme. + /// + /// # Return value + /// + /// `Err(e)` if the string did not represent a valid URL, where `e` is a + /// `String` error message. Otherwise, `Ok(u)` where `u` is a `Url` struct + /// representing the URL. + pub fn parse(rawurl: &str) -> Result { + // scheme + let (scheme, rest) = try!(get_scheme(rawurl)); + + // authority + let (userinfo, host, port, rest) = try!(get_authority(rest.as_slice())); + + // path + let has_authority = host.len() > 0; + let (path, rest) = try!(get_path(rest.as_slice(), has_authority)); + + // query and fragment + let (query, fragment) = try!(get_query_fragment(rest.as_slice())); + + Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) + } +} + +#[deprecated="use `Url::parse`"] +pub fn from_str(s: &str) -> Result { + Url::parse(s) } impl Path { @@ -734,46 +763,6 @@ fn get_query_fragment(rawurl: &str) -> return Ok((query_from_str(q.as_slice()), f)); } -/** - * Parses a URL, converting it from a string to `Url` representation. - * - * # Arguments - * - * `rawurl` - a string representing the full URL, including scheme. - * - * # Returns - * - * A `Url` struct type representing the URL. - */ -pub fn from_str(rawurl: &str) -> Result { - // scheme - let (scheme, rest) = match get_scheme(rawurl) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - // authority - let (userinfo, host, port, rest) = match get_authority(rest.as_slice()) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - // path - let has_authority = host.len() > 0; - let (path, rest) = match get_path(rest.as_slice(), has_authority) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - // query and fragment - let (query, fragment) = match get_query_fragment(rest.as_slice()) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) -} - pub fn path_from_str(rawpath: &str) -> Result { let (path, rest) = match get_path(rawpath, false) { Ok(val) => val, @@ -791,7 +780,7 @@ pub fn path_from_str(rawpath: &str) -> Result { impl FromStr for Url { fn from_str(s: &str) -> Option { - match from_str(s) { + match Url::parse(s) { Ok(url) => Some(url), Err(_) => None } @@ -969,8 +958,8 @@ fn test_get_path() { #[cfg(test)] mod tests { use {encode_form_urlencoded, decode_form_urlencoded, - decode, encode, from_str, encode_component, decode_component, - path_from_str, UserInfo, get_scheme}; + decode, encode, encode_component, decode_component, + path_from_str, UserInfo, get_scheme, Url}; use std::collections::HashMap; @@ -978,7 +967,7 @@ mod tests { fn test_url_parse() { let url = "http://user:pass@rust-lang.org:8080/doc/~u?s=v#something"; - let up = from_str(url); + let up = from_str::(url); let u = up.unwrap(); assert_eq!(&u.scheme, &"http".to_string()); assert_eq!(&u.user, &Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); @@ -1003,7 +992,7 @@ mod tests { #[test] fn test_url_parse_host_slash() { let urlstr = "http://0.42.42.42/"; - let url = from_str(urlstr).unwrap(); + let url = from_str::(urlstr).unwrap(); assert!(url.host == "0.42.42.42".to_string()); assert!(url.path == "/".to_string()); } @@ -1018,14 +1007,14 @@ mod tests { #[test] fn test_url_host_with_port() { let urlstr = "scheme://host:1234"; - let url = from_str(urlstr).unwrap(); + let url = from_str::(urlstr).unwrap(); assert_eq!(&url.scheme, &"scheme".to_string()); assert_eq!(&url.host, &"host".to_string()); assert_eq!(&url.port, &Some("1234".to_string())); // is empty path really correct? Other tests think so assert_eq!(&url.path, &"".to_string()); let urlstr = "scheme://host:1234/"; - let url = from_str(urlstr).unwrap(); + let url = from_str::(urlstr).unwrap(); assert_eq!(&url.scheme, &"scheme".to_string()); assert_eq!(&url.host, &"host".to_string()); assert_eq!(&url.port, &Some("1234".to_string())); @@ -1035,7 +1024,7 @@ mod tests { #[test] fn test_url_with_underscores() { let urlstr = "http://dotcom.com/file_name.html"; - let url = from_str(urlstr).unwrap(); + let url = from_str::(urlstr).unwrap(); assert!(url.path == "/file_name.html".to_string()); } @@ -1049,7 +1038,7 @@ mod tests { #[test] fn test_url_with_dashes() { let urlstr = "http://dotcom.com/file-name.html"; - let url = from_str(urlstr).unwrap(); + let url = from_str::(urlstr).unwrap(); assert!(url.path == "/file-name.html".to_string()); } @@ -1067,75 +1056,75 @@ mod tests { #[test] fn test_invalid_scheme_errors() { - assert!(from_str("99://something").is_err()); - assert!(from_str("://something").is_err()); + assert!(Url::parse("99://something").is_err()); + assert!(Url::parse("://something").is_err()); } #[test] fn test_full_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc?s=v#something"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_userless_url_parse_and_format() { let url = "http://rust-lang.org/doc?s=v#something"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_queryless_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc#something"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_empty_query_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc?#something"; let should_be = "http://user:pass@rust-lang.org/doc#something"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), should_be); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), should_be); } #[test] fn test_fragmentless_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc?q=v"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_minimal_url_parse_and_format() { let url = "http://rust-lang.org/doc"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_url_with_port_parse_and_format() { let url = "http://rust-lang.org:80/doc"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_scheme_host_only_url_parse_and_format() { let url = "http://rust-lang.org"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_pathless_url_parse_and_format() { let url = "http://user:pass@rust-lang.org?q=v#something"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_scheme_host_fragment_only_url_parse_and_format() { let url = "http://rust-lang.org#something"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] fn test_url_component_encoding() { let url = "http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B"; - let u = from_str(url).unwrap(); + let u = from_str::(url).unwrap(); assert!(u.path == "/doc uments".to_string()); assert!(u.query == vec!(("ba%d ".to_string(), "#&+".to_string()))); } @@ -1151,7 +1140,7 @@ mod tests { #[test] fn test_url_without_authority() { let url = "mailto:test@email.com"; - assert_eq!(from_str(url).unwrap().to_str().as_slice(), url); + assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); } #[test] From a9e82e145e61f4b9825b4622f78ea9364387ae70 Mon Sep 17 00:00:00 2001 From: Kevin Butler Date: Fri, 20 Jun 2014 01:30:12 +0100 Subject: [PATCH 2/7] liburl: rename and move path_from_str to Path::parse_str. url::path_from_str => url::Path::parse_str The FromStr trait still works, but its confusing to have a path_from_str free function that retuns a Result, while the regular from_str style functions return an Option, hence the rename to indicate a Result. [breaking-change] --- src/liburl/lib.rs | 60 +++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/liburl/lib.rs b/src/liburl/lib.rs index 33688412f5f..dbdd7a6aa17 100644 --- a/src/liburl/lib.rs +++ b/src/liburl/lib.rs @@ -60,7 +60,7 @@ pub struct Url { /// `vec!(("baz".to_string(), "qux".to_string()))` represents the fragment /// `baz=qux` in the above example. pub query: Query, - /// The fragment component, such as `quz`. Doesn't include the leading `#` character. + /// The fragment component, such as `quz`. Not including the leading `#` character. pub fragment: Option } @@ -72,7 +72,7 @@ pub struct Path { /// `vec!(("baz".to_string(), "qux".to_string()))` represents the fragment /// `baz=qux` in the above example. pub query: Query, - /// The fragment component, such as `quz`. Doesn't include the leading `#` character. + /// The fragment component, such as `quz`. Not including the leading `#` character. pub fragment: Option } @@ -152,6 +152,30 @@ impl Path { fragment: fragment, } } + + /// Parses a URL path, converting it from a string to a `Path` representation. + /// + /// # Arguments + /// * rawpath - a string representing the path component of a URL. + /// + /// # Return value + /// + /// `Err(e)` if the string did not represent a valid URL path, where `e` is a + /// `String` error message. Otherwise, `Ok(p)` where `p` is a `Path` struct + /// representing the URL path. + pub fn parse(rawpath: &str) -> Result { + let (path, rest) = try!(get_path(rawpath, false)); + + // query and fragment + let (query, fragment) = try!(get_query_fragment(rest.as_slice())); + + Ok(Path{ path: path, query: query, fragment: fragment }) + } +} + +#[deprecated="use `Path::parse`"] +pub fn path_from_str(s: &str) -> Result { + Path::parse(s) } impl UserInfo { @@ -763,21 +787,6 @@ fn get_query_fragment(rawurl: &str) -> return Ok((query_from_str(q.as_slice()), f)); } -pub fn path_from_str(rawpath: &str) -> Result { - let (path, rest) = match get_path(rawpath, false) { - Ok(val) => val, - Err(e) => return Err(e) - }; - - // query and fragment - let (query, fragment) = match get_query_fragment(rest.as_slice()) { - Ok(val) => val, - Err(e) => return Err(e), - }; - - Ok(Path{ path: path, query: query, fragment: fragment }) -} - impl FromStr for Url { fn from_str(s: &str) -> Option { match Url::parse(s) { @@ -789,7 +798,7 @@ impl FromStr for Url { impl FromStr for Path { fn from_str(s: &str) -> Option { - match path_from_str(s) { + match Path::parse(s) { Ok(path) => Some(path), Err(_) => None } @@ -957,9 +966,8 @@ fn test_get_path() { #[cfg(test)] mod tests { - use {encode_form_urlencoded, decode_form_urlencoded, - decode, encode, encode_component, decode_component, - path_from_str, UserInfo, get_scheme, Url}; + use {encode_form_urlencoded, decode_form_urlencoded, decode, encode, + encode_component, decode_component, UserInfo, get_scheme, Url, Path}; use std::collections::HashMap; @@ -982,7 +990,7 @@ mod tests { fn test_path_parse() { let path = "/doc/~u?s=v#something"; - let up = path_from_str(path); + let up = from_str::(path); let u = up.unwrap(); assert_eq!(&u.path, &"/doc/~u".to_string()); assert_eq!(&u.query, &vec!(("s".to_string(), "v".to_string()))); @@ -1000,7 +1008,7 @@ mod tests { #[test] fn test_path_parse_host_slash() { let pathstr = "/"; - let path = path_from_str(pathstr).unwrap(); + let path = from_str::(pathstr).unwrap(); assert!(path.path == "/".to_string()); } @@ -1031,7 +1039,7 @@ mod tests { #[test] fn test_path_with_underscores() { let pathstr = "/file_name.html"; - let path = path_from_str(pathstr).unwrap(); + let path = from_str::(pathstr).unwrap(); assert!(path.path == "/file_name.html".to_string()); } @@ -1045,7 +1053,7 @@ mod tests { #[test] fn test_path_with_dashes() { let pathstr = "/file-name.html"; - let path = path_from_str(pathstr).unwrap(); + let path = from_str::(pathstr).unwrap(); assert!(path.path == "/file-name.html".to_string()); } @@ -1132,7 +1140,7 @@ mod tests { #[test] fn test_path_component_encoding() { let path = "/doc%20uments?ba%25d%20=%23%26%2B"; - let p = path_from_str(path).unwrap(); + let p = from_str::(path).unwrap(); assert!(p.path == "/doc uments".to_string()); assert!(p.query == vec!(("ba%d ".to_string(), "#&+".to_string()))); } From ed47c479d73fd7b57d3b493e03c74e2932733163 Mon Sep 17 00:00:00 2001 From: Kevin Butler Date: Sat, 21 Jun 2014 00:05:06 +0100 Subject: [PATCH 3/7] liburl: remove redundant fields in Url. url.path - Now a Path instead of a String. To fix old code: url.path => url.path.path url.query => url.path.query url.fragment => url.path.fragment Not much point having the Path struct if it's not going to be used. [breaking-change] --- src/liburl/lib.rs | 53 +++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/src/liburl/lib.rs b/src/liburl/lib.rs index dbdd7a6aa17..a5031e11d4c 100644 --- a/src/liburl/lib.rs +++ b/src/liburl/lib.rs @@ -54,22 +54,16 @@ pub struct Url { pub host: String, /// A TCP port number, for example `8080`. pub port: Option, - /// The path component of a URL, for example `/foo/bar`. - pub path: String, - /// The query component of a URL. - /// `vec!(("baz".to_string(), "qux".to_string()))` represents the fragment - /// `baz=qux` in the above example. - pub query: Query, - /// The fragment component, such as `quz`. Not including the leading `#` character. - pub fragment: Option + /// The path component of a URL, for example `/foo/bar?baz=qux#quz`. + pub path: Path, } -#[deriving(Clone, PartialEq)] +#[deriving(Clone, PartialEq, Eq)] pub struct Path { /// The path component of a URL, for example `/foo/bar`. pub path: String, /// The query component of a URL. - /// `vec!(("baz".to_string(), "qux".to_string()))` represents the fragment + /// `vec![("baz".to_string(), "qux".to_string())]` represents the fragment /// `baz=qux` in the above example. pub query: Query, /// The fragment component, such as `quz`. Not including the leading `#` character. @@ -102,9 +96,7 @@ impl Url { user: user, host: host, port: port, - path: path, - query: query, - fragment: fragment, + path: Path::new(path, query, fragment) } } @@ -836,18 +828,7 @@ impl fmt::Show for Url { } } - try!(write!(f, "{}", self.path)); - - if !self.query.is_empty() { - try!(write!(f, "?{}", query_to_str(&self.query))); - } - - match self.fragment { - Some(ref fragment) => { - write!(f, "#{}", encode_component(fragment.as_slice())) - } - None => Ok(()), - } + write!(f, "{}", self.path) } } @@ -855,7 +836,7 @@ impl fmt::Show for Path { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { try!(write!(f, "{}", self.path)); if !self.query.is_empty() { - try!(write!(f, "?{}", self.query)) + try!(write!(f, "?{}", query_to_str(&self.query))) } match self.fragment { @@ -981,9 +962,9 @@ mod tests { assert_eq!(&u.user, &Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); assert_eq!(&u.host, &"rust-lang.org".to_string()); assert_eq!(&u.port, &Some("8080".to_string())); - assert_eq!(&u.path, &"/doc/~u".to_string()); - assert_eq!(&u.query, &vec!(("s".to_string(), "v".to_string()))); - assert_eq!(&u.fragment, &Some("something".to_string())); + assert_eq!(&u.path.path, &"/doc/~u".to_string()); + assert_eq!(&u.path.query, &vec!(("s".to_string(), "v".to_string()))); + assert_eq!(&u.path.fragment, &Some("something".to_string())); } #[test] @@ -1002,7 +983,7 @@ mod tests { let urlstr = "http://0.42.42.42/"; let url = from_str::(urlstr).unwrap(); assert!(url.host == "0.42.42.42".to_string()); - assert!(url.path == "/".to_string()); + assert!(url.path.path == "/".to_string()); } #[test] @@ -1020,20 +1001,20 @@ mod tests { assert_eq!(&url.host, &"host".to_string()); assert_eq!(&url.port, &Some("1234".to_string())); // is empty path really correct? Other tests think so - assert_eq!(&url.path, &"".to_string()); + assert_eq!(&url.path.path, &"".to_string()); let urlstr = "scheme://host:1234/"; let url = from_str::(urlstr).unwrap(); assert_eq!(&url.scheme, &"scheme".to_string()); assert_eq!(&url.host, &"host".to_string()); assert_eq!(&url.port, &Some("1234".to_string())); - assert_eq!(&url.path, &"/".to_string()); + assert_eq!(&url.path.path, &"/".to_string()); } #[test] fn test_url_with_underscores() { let urlstr = "http://dotcom.com/file_name.html"; let url = from_str::(urlstr).unwrap(); - assert!(url.path == "/file_name.html".to_string()); + assert!(url.path.path == "/file_name.html".to_string()); } #[test] @@ -1047,7 +1028,7 @@ mod tests { fn test_url_with_dashes() { let urlstr = "http://dotcom.com/file-name.html"; let url = from_str::(urlstr).unwrap(); - assert!(url.path == "/file-name.html".to_string()); + assert!(url.path.path == "/file-name.html".to_string()); } #[test] @@ -1133,8 +1114,8 @@ mod tests { fn test_url_component_encoding() { let url = "http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B"; let u = from_str::(url).unwrap(); - assert!(u.path == "/doc uments".to_string()); - assert!(u.query == vec!(("ba%d ".to_string(), "#&+".to_string()))); + assert!(u.path.path == "/doc uments".to_string()); + assert!(u.path.query == vec!(("ba%d ".to_string(), "#&+".to_string()))); } #[test] From 11b093425d44b9e14c4c3cace4d7ee691c56e25b Mon Sep 17 00:00:00 2001 From: Kevin Butler Date: Sat, 21 Jun 2014 00:42:21 +0100 Subject: [PATCH 4/7] liburl: Simplify encoding/decoding using iterators. --- src/liburl/lib.rs | 598 ++++++++++++++++++++-------------------------- 1 file changed, 264 insertions(+), 334 deletions(-) diff --git a/src/liburl/lib.rs b/src/liburl/lib.rs index a5031e11d4c..29df376a28f 100644 --- a/src/liburl/lib.rs +++ b/src/liburl/lib.rs @@ -25,8 +25,6 @@ use std::collections::HashMap; use std::fmt; use std::from_str::FromStr; use std::hash; -use std::io::BufReader; -use std::string::String; use std::uint; /// A Uniform Resource Locator (URL). A URL is a form of URI (Uniform Resource @@ -110,7 +108,7 @@ impl Url { /// `Err(e)` if the string did not represent a valid URL, where `e` is a /// `String` error message. Otherwise, `Ok(u)` where `u` is a `Url` struct /// representing the URL. - pub fn parse(rawurl: &str) -> Result { + pub fn parse(rawurl: &str) -> DecodeResult { // scheme let (scheme, rest) = try!(get_scheme(rawurl)); @@ -155,7 +153,7 @@ impl Path { /// `Err(e)` if the string did not represent a valid URL path, where `e` is a /// `String` error message. Otherwise, `Ok(p)` where `p` is a `Path` struct /// representing the URL path. - pub fn parse(rawpath: &str) -> Result { + pub fn parse(rawpath: &str) -> DecodeResult { let (path, rest) = try!(get_path(rawpath, false)); // query and fragment @@ -178,293 +176,220 @@ impl UserInfo { } fn encode_inner(s: &str, full_url: bool) -> String { - let mut rdr = BufReader::new(s.as_bytes()); - let mut out = String::new(); + s.bytes().fold(String::new(), |mut out, b| { + match b as char { + // unreserved: + 'A' .. 'Z' + | 'a' .. 'z' + | '0' .. '9' + | '-' | '.' | '_' | '~' => out.push_char(b as char), - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Err(..) => break, - Ok(..) => buf[0] as char, + // gen-delims: + ':' | '/' | '?' | '#' | '[' | ']' | '@' | + // sub-delims: + '!' | '$' | '&' | '"' | '(' | ')' | '*' | + '+' | ',' | ';' | '=' + if full_url => out.push_char(b as char), + + ch => out.push_str(format!("%{:02X}", ch as uint).as_slice()), }; - match ch { - // unreserved: - 'A' .. 'Z' | - 'a' .. 'z' | - '0' .. '9' | - '-' | '.' | '_' | '~' => { - out.push_char(ch); - } - _ => { - if full_url { - match ch { - // gen-delims: - ':' | '/' | '?' | '#' | '[' | ']' | '@' | - - // sub-delims: - '!' | '$' | '&' | '"' | '(' | ')' | '*' | - '+' | ',' | ';' | '=' => { - out.push_char(ch); - } - - _ => out.push_str(format!("%{:02X}", ch as uint).as_slice()) - } - } else { - out.push_str(format!("%{:02X}", ch as uint).as_slice()); - } - } - } - } - - out + out + }) } -/** - * Encodes a URI by replacing reserved characters with percent-encoded - * character sequences. - * - * This function is compliant with RFC 3986. - * - * # Example - * - * ```rust - * use url::encode; - * - * let url = encode("https://example.com/Rust (programming language)"); - * println!("{}", url); // https://example.com/Rust%20(programming%20language) - * ``` - */ +/// Encodes a URI by replacing reserved characters with percent-encoded +/// character sequences. +/// +/// This function is compliant with RFC 3986. +/// +/// # Example +/// +/// ```rust +/// use url::encode; +/// +/// let url = encode("https://example.com/Rust (programming language)"); +/// println!("{}", url); // https://example.com/Rust%20(programming%20language) +/// ``` pub fn encode(s: &str) -> String { encode_inner(s, true) } -/** - * Encodes a URI component by replacing reserved characters with percent- - * encoded character sequences. - * - * This function is compliant with RFC 3986. - */ +/// Encodes a URI component by replacing reserved characters with percent- +/// encoded character sequences. +/// +/// This function is compliant with RFC 3986. pub fn encode_component(s: &str) -> String { encode_inner(s, false) } -fn decode_inner(s: &str, full_url: bool) -> String { - let mut rdr = BufReader::new(s.as_bytes()); - let mut out = String::new(); +pub type DecodeResult = Result; - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Err(..) => break, - Ok(..) => buf[0] as char - }; - match ch { - '%' => { - let mut bytes = [0, 0]; - match rdr.read(bytes) { - Ok(2) => {} - _ => fail!() // FIXME: malformed url? - } - let ch = uint::parse_bytes(bytes, 16u).unwrap() as u8 as char; - - if full_url { - // Only decode some characters: - match ch { - // gen-delims: - ':' | '/' | '?' | '#' | '[' | ']' | '@' | - - // sub-delims: - '!' | '$' | '&' | '"' | '(' | ')' | '*' | - '+' | ',' | ';' | '=' => { - out.push_char('%'); - out.push_char(bytes[0u] as char); - out.push_char(bytes[1u] as char); - } - - ch => out.push_char(ch) - } - } else { - out.push_char(ch); - } - } - ch => out.push_char(ch) - } - } - - out -} - -/** - * Decodes a percent-encoded string representing a URI. - * - * This will only decode escape sequences generated by `encode`. - * - * # Example - * - * ```rust - * use url::decode; - * - * let url = decode("https://example.com/Rust%20(programming%20language)"); - * println!("{}", url); // https://example.com/Rust (programming language) - * ``` - */ -pub fn decode(s: &str) -> String { +/// Decodes a percent-encoded string representing a URI. +/// +/// This will only decode escape sequences generated by `encode`. +/// +/// # Example +/// +/// ```rust +/// use url::decode; +/// +/// let url = decode("https://example.com/Rust%20(programming%20language)"); +/// println!("{}", url); // https://example.com/Rust (programming language) +/// ``` +pub fn decode(s: &str) -> DecodeResult { decode_inner(s, true) } -/** - * Decode a string encoded with percent encoding. - */ -pub fn decode_component(s: &str) -> String { +/// Decode a string encoded with percent encoding. +pub fn decode_component(s: &str) -> DecodeResult { decode_inner(s, false) } -fn encode_plus(s: &str) -> String { - let mut rdr = BufReader::new(s.as_bytes()); +fn decode_inner(s: &str, full_url: bool) -> DecodeResult { let mut out = String::new(); + let mut iter = s.bytes(); loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Ok(..) => buf[0] as char, - Err(..) => break, - }; - match ch { - 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '.' | '-' => { - out.push_char(ch); - } - ' ' => out.push_char('+'), - _ => out.push_str(format!("%{:X}", ch as uint).as_slice()) + match iter.next() { + Some(b) => match b as char { + '%' => { + let bytes = match (iter.next(), iter.next()) { + (Some(one), Some(two)) => [one as u8, two as u8], + _ => return Err(format!("Malformed input: found '%' \ + without two trailing bytes")), + }; + + // Only decode some characters if full_url: + match uint::parse_bytes(bytes, 16u).unwrap() as u8 as char { + // gen-delims: + ':' | '/' | '?' | '#' | '[' | ']' | '@' | + + // sub-delims: + '!' | '$' | '&' | '"' | '(' | ')' | '*' | + '+' | ',' | ';' | '=' + if full_url => { + out.push_char('%'); + out.push_char(bytes[0u] as char); + out.push_char(bytes[1u] as char); + } + + ch => out.push_char(ch) + } + } + ch => out.push_char(ch) + }, + None => return Ok(out), } } - - out } -/** - * Encode a hashmap to the 'application/x-www-form-urlencoded' media type. - */ +/// Encode a hashmap to the 'application/x-www-form-urlencoded' media type. pub fn encode_form_urlencoded(m: &HashMap>) -> String { - let mut out = String::new(); - let mut first = true; + fn encode_plus(s: &T) -> String { + s.as_slice().bytes().fold(String::new(), |mut out, b| { + match b as char { + 'A' .. 'Z' + | 'a' .. 'z' + | '0' .. '9' + | '_' | '.' | '-' => out.push_char(b as char), + ' ' => out.push_char('+'), + ch => out.push_str(format!("%{:X}", ch as uint).as_slice()) + } - for (key, values) in m.iter() { - let key = encode_plus(key.as_slice()); + out + }) + } + + let mut first = true; + m.iter().fold(String::new(), |mut out, (key, values)| { + let key = encode_plus(key); for value in values.iter() { if first { first = false; } else { out.push_char('&'); - first = false; } - out.push_str(format!("{}={}", - key, - encode_plus(value.as_slice())).as_slice()); + out.push_str(key.as_slice()); + out.push_char('='); + out.push_str(encode_plus(value).as_slice()); + } + + out + }) +} + +/// Decode a string encoded with the 'application/x-www-form-urlencoded' media +/// type into a hashmap. +pub fn decode_form_urlencoded(s: &[u8]) + -> DecodeResult>> { + fn maybe_push_value(map: &mut HashMap>, + key: String, + value: String) { + if key.len() > 0 && value.len() > 0 { + let values = map.find_or_insert_with(key, |_| vec!()); + values.push(value); } } - out -} + let mut out = HashMap::new(); + let mut iter = s.iter().map(|&x| x); -/** - * Decode a string encoded with the 'application/x-www-form-urlencoded' media - * type into a hashmap. - */ -#[allow(experimental)] -pub fn decode_form_urlencoded(s: &[u8]) -> HashMap> { - let mut rdr = BufReader::new(s); - let mut m: HashMap> = HashMap::new(); let mut key = String::new(); let mut value = String::new(); let mut parsing_key = true; loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Ok(..) => buf[0] as char, - Err(..) => break, - }; - match ch { - '&' | ';' => { - if key.len() > 0 && value.len() > 0 { - let mut values = match m.pop_equiv(&key.as_slice()) { - Some(values) => values, - None => vec!(), + match iter.next() { + Some(b) => match b as char { + '&' | ';' => { + maybe_push_value(&mut out, key, value); + + parsing_key = true; + key = String::new(); + value = String::new(); + } + '=' => parsing_key = false, + ch => { + let ch = match ch { + '%' => { + let bytes = match (iter.next(), iter.next()) { + (Some(one), Some(two)) => [one as u8, two as u8], + _ => return Err(format!("Malformed input: found \ + '%' without two trailing bytes")) + }; + + uint::parse_bytes(bytes, 16u).unwrap() as u8 as char + } + '+' => ' ', + ch => ch }; - values.push(value); - m.insert(key, values); - } - - parsing_key = true; - key = String::new(); - value = String::new(); - } - '=' => parsing_key = false, - ch => { - let ch = match ch { - '%' => { - let mut bytes = [0, 0]; - match rdr.read(bytes) { - Ok(2) => {} - _ => fail!() // FIXME: malformed? - } - uint::parse_bytes(bytes, 16u).unwrap() as u8 as char + if parsing_key { + key.push_char(ch) + } else { + value.push_char(ch) } - '+' => ' ', - ch => ch - }; - - if parsing_key { - key.push_char(ch) - } else { - value.push_char(ch) } + }, + None => { + maybe_push_value(&mut out, key, value); + return Ok(out) } } } - - if key.len() > 0 && value.len() > 0 { - let mut values = match m.pop_equiv(&key.as_slice()) { - Some(values) => values, - None => vec!(), - }; - - values.push(value); - m.insert(key, values); - } - - m } +fn split_char_first<'a>(s: &'a str, c: char) -> (&'a str, &'a str) { + let mut iter = s.splitn(c, 1); -fn split_char_first(s: &str, c: char) -> (String, String) { - let len = s.len(); - let mut index = len; - let mut mat = 0; - let mut rdr = BufReader::new(s.as_bytes()); - loop { - let mut buf = [0]; - let ch = match rdr.read(buf) { - Ok(..) => buf[0] as char, - Err(..) => break, - }; - if ch == c { - // found a match, adjust markers - index = (rdr.tell().unwrap() as uint) - 1; - mat = 1; - break; - } - } - if index+mat == len { - return (s.slice(0, index).to_string(), "".to_string()); - } else { - return (s.slice(0, index).to_string(), - s.slice(index + mat, s.len()).to_string()); + match (iter.next(), iter.next()) { + (Some(a), Some(b)) => (a, b), + (Some(a), None) => (a, ""), + (None, _) => unreachable!(), } } @@ -477,42 +402,40 @@ impl fmt::Show for UserInfo { } } -fn query_from_str(rawquery: &str) -> Query { +fn query_from_str(rawquery: &str) -> DecodeResult { let mut query: Query = vec!(); if !rawquery.is_empty() { for p in rawquery.split('&') { let (k, v) = split_char_first(p, '='); - query.push((decode_component(k.as_slice()), - decode_component(v.as_slice()))); - }; + query.push((try!(decode_component(k)), + try!(decode_component(v)))); + } } - return query; + + Ok(query) } -/** - * Converts an instance of a URI `Query` type to a string. - * - * # Example - * - * ```rust - * let query = vec!(("title".to_string(), "The Village".to_string()), - * ("north".to_string(), "52.91".to_string()), - * ("west".to_string(), "4.10".to_string())); - * println!("{}", url::query_to_str(&query)); // title=The%20Village&north=52.91&west=4.10 - * ``` - */ -#[allow(unused_must_use)] +/// Converts an instance of a URI `Query` type to a string. +/// +/// # Example +/// +/// ```rust +/// let query = vec!(("title".to_string(), "The Village".to_string()), +/// ("north".to_string(), "52.91".to_string()), +/// ("west".to_string(), "4.10".to_string())); +/// println!("{}", url::query_to_str(&query)); // title=The%20Village&north=52.91&west=4.10 +/// ``` pub fn query_to_str(query: &Query) -> String { - use std::io::MemWriter; - use std::str; + query.iter().enumerate().fold(String::new(), |mut out, (i, &(ref k, ref v))| { + if i != 0 { + out.push_char('&'); + } - let mut writer = MemWriter::new(); - for (i, &(ref k, ref v)) in query.iter().enumerate() { - if i != 0 { write!(&mut writer, "&"); } - write!(&mut writer, "{}={}", encode_component(k.as_slice()), - encode_component(v.as_slice())); - } - str::from_utf8_lossy(writer.unwrap().as_slice()).to_string() + out.push_str(encode_component(k.as_slice()).as_slice()); + out.push_char('='); + out.push_str(encode_component(v.as_slice()).as_slice()); + out + }) } /** @@ -532,7 +455,7 @@ pub fn query_to_str(query: &Query) -> String { * println!("Scheme in use: {}.", scheme); // Scheme in use: https. * ``` */ -pub fn get_scheme(rawurl: &str) -> Result<(String, String), String> { +pub fn get_scheme(rawurl: &str) -> DecodeResult<(String, String)> { for (i,c) in rawurl.chars().enumerate() { match c { 'A' .. 'Z' | 'a' .. 'z' => continue, @@ -568,7 +491,7 @@ enum Input { // returns userinfo, host, port, and unparsed part, or an error fn get_authority(rawurl: &str) -> - Result<(Option, String, Option, String), String> { + DecodeResult<(Option, String, Option, String)> { if !rawurl.starts_with("//") { // there is no authority. return Ok((None, "".to_string(), None, rawurl.to_str())); @@ -727,8 +650,7 @@ fn get_authority(rawurl: &str) -> // returns the path and unparsed part of url, or an error -fn get_path(rawurl: &str, authority: bool) -> - Result<(String, String), String> { +fn get_path(rawurl: &str, authority: bool) -> DecodeResult<(String, String)> { let len = rawurl.len(); let mut end = len; for (i,c) in rawurl.chars().enumerate() { @@ -746,25 +668,20 @@ fn get_path(rawurl: &str, authority: bool) -> } } - if authority { - if end != 0 && !rawurl.starts_with("/") { - return Err("Non-empty path must begin with\ - '/' in presence of authority.".to_string()); - } + if authority && end != 0 && !rawurl.starts_with("/") { + Err("Non-empty path must begin with \ + '/' in presence of authority.".to_string()) + } else { + Ok((try!(decode_component(rawurl.slice(0, end))), + rawurl.slice(end, len).to_string())) } - - return Ok((decode_component(rawurl.slice(0, end)), - rawurl.slice(end, len).to_string())); } // returns the parsed query and the fragment, if present -fn get_query_fragment(rawurl: &str) -> - Result<(Query, Option), String> { +fn get_query_fragment(rawurl: &str) -> DecodeResult<(Query, Option)> { if !rawurl.starts_with("?") { if rawurl.starts_with("#") { - let f = decode_component(rawurl.slice( - 1, - rawurl.len())); + let f = try!(decode_component(rawurl.slice(1, rawurl.len()))); return Ok((vec!(), Some(f))); } else { return Ok((vec!(), None)); @@ -772,11 +689,12 @@ fn get_query_fragment(rawurl: &str) -> } let (q, r) = split_char_first(rawurl.slice(1, rawurl.len()), '#'); let f = if r.len() != 0 { - Some(decode_component(r.as_slice())) + Some(try!(decode_component(r))) } else { None }; - return Ok((query_from_str(q.as_slice()), f)); + + Ok((try!(query_from_str(q)), f)) } impl FromStr for Url { @@ -866,12 +784,12 @@ impl hash::Hash for Path { #[test] fn test_split_char_first() { let (u,v) = split_char_first("hello, sweet world", ','); - assert_eq!(u, "hello".to_string()); - assert_eq!(v, " sweet world".to_string()); + assert_eq!(u, "hello"); + assert_eq!(v, " sweet world"); let (u,v) = split_char_first("hello sweet world", ','); - assert_eq!(u, "hello sweet world".to_string()); - assert_eq!(v, "".to_string()); + assert_eq!(u, "hello sweet world"); + assert_eq!(v, ""); } #[test] @@ -1195,58 +1113,70 @@ mod tests { #[test] fn test_decode() { - assert_eq!(decode(""), "".to_string()); - assert_eq!(decode("abc/def 123"), "abc/def 123".to_string()); - assert_eq!(decode("abc%2Fdef%20123"), "abc%2Fdef 123".to_string()); - assert_eq!(decode("%20"), " ".to_string()); - assert_eq!(decode("%21"), "%21".to_string()); - assert_eq!(decode("%22"), "%22".to_string()); - assert_eq!(decode("%23"), "%23".to_string()); - assert_eq!(decode("%24"), "%24".to_string()); - assert_eq!(decode("%25"), "%".to_string()); - assert_eq!(decode("%26"), "%26".to_string()); - assert_eq!(decode("%27"), "'".to_string()); - assert_eq!(decode("%28"), "%28".to_string()); - assert_eq!(decode("%29"), "%29".to_string()); - assert_eq!(decode("%2A"), "%2A".to_string()); - assert_eq!(decode("%2B"), "%2B".to_string()); - assert_eq!(decode("%2C"), "%2C".to_string()); - assert_eq!(decode("%2F"), "%2F".to_string()); - assert_eq!(decode("%3A"), "%3A".to_string()); - assert_eq!(decode("%3B"), "%3B".to_string()); - assert_eq!(decode("%3D"), "%3D".to_string()); - assert_eq!(decode("%3F"), "%3F".to_string()); - assert_eq!(decode("%40"), "%40".to_string()); - assert_eq!(decode("%5B"), "%5B".to_string()); - assert_eq!(decode("%5D"), "%5D".to_string()); + fn t(input: &str, expected: &str) { + assert_eq!(decode(input), Ok(expected.to_string())) + } + + assert!(decode("sadsadsda%").is_err()); + assert!(decode("waeasd%4").is_err()); + t("", ""); + t("abc/def 123", "abc/def 123"); + t("abc%2Fdef%20123", "abc%2Fdef 123"); + t("%20", " "); + t("%21", "%21"); + t("%22", "%22"); + t("%23", "%23"); + t("%24", "%24"); + t("%25", "%"); + t("%26", "%26"); + t("%27", "'"); + t("%28", "%28"); + t("%29", "%29"); + t("%2A", "%2A"); + t("%2B", "%2B"); + t("%2C", "%2C"); + t("%2F", "%2F"); + t("%3A", "%3A"); + t("%3B", "%3B"); + t("%3D", "%3D"); + t("%3F", "%3F"); + t("%40", "%40"); + t("%5B", "%5B"); + t("%5D", "%5D"); } #[test] fn test_decode_component() { - assert_eq!(decode_component(""), "".to_string()); - assert_eq!(decode_component("abc/def 123"), "abc/def 123".to_string()); - assert_eq!(decode_component("abc%2Fdef%20123"), "abc/def 123".to_string()); - assert_eq!(decode_component("%20"), " ".to_string()); - assert_eq!(decode_component("%21"), "!".to_string()); - assert_eq!(decode_component("%22"), "\"".to_string()); - assert_eq!(decode_component("%23"), "#".to_string()); - assert_eq!(decode_component("%24"), "$".to_string()); - assert_eq!(decode_component("%25"), "%".to_string()); - assert_eq!(decode_component("%26"), "&".to_string()); - assert_eq!(decode_component("%27"), "'".to_string()); - assert_eq!(decode_component("%28"), "(".to_string()); - assert_eq!(decode_component("%29"), ")".to_string()); - assert_eq!(decode_component("%2A"), "*".to_string()); - assert_eq!(decode_component("%2B"), "+".to_string()); - assert_eq!(decode_component("%2C"), ",".to_string()); - assert_eq!(decode_component("%2F"), "/".to_string()); - assert_eq!(decode_component("%3A"), ":".to_string()); - assert_eq!(decode_component("%3B"), ";".to_string()); - assert_eq!(decode_component("%3D"), "=".to_string()); - assert_eq!(decode_component("%3F"), "?".to_string()); - assert_eq!(decode_component("%40"), "@".to_string()); - assert_eq!(decode_component("%5B"), "[".to_string()); - assert_eq!(decode_component("%5D"), "]".to_string()); + fn t(input: &str, expected: &str) { + assert_eq!(decode_component(input), Ok(expected.to_string())) + } + + assert!(decode_component("asacsa%").is_err()); + assert!(decode_component("acsas%4").is_err()); + t("", ""); + t("abc/def 123", "abc/def 123"); + t("abc%2Fdef%20123", "abc/def 123"); + t("%20", " "); + t("%21", "!"); + t("%22", "\""); + t("%23", "#"); + t("%24", "$"); + t("%25", "%"); + t("%26", "&"); + t("%27", "'"); + t("%28", "("); + t("%29", ")"); + t("%2A", "*"); + t("%2B", "+"); + t("%2C", ","); + t("%2F", "/"); + t("%3A", ":"); + t("%3B", ";"); + t("%3D", "="); + t("%3F", "?"); + t("%40", "@"); + t("%5B", "["); + t("%5D", "]"); } #[test] @@ -1270,10 +1200,10 @@ mod tests { #[test] fn test_decode_form_urlencoded() { - assert_eq!(decode_form_urlencoded([]).len(), 0); + assert_eq!(decode_form_urlencoded([]).unwrap().len(), 0); let s = "a=1&foo+bar=abc&foo+bar=12+%3D+34".as_bytes(); - let form = decode_form_urlencoded(s); + let form = decode_form_urlencoded(s).unwrap(); assert_eq!(form.len(), 2); assert_eq!(form.get(&"a".to_string()), &vec!("1".to_string())); assert_eq!(form.get(&"foo bar".to_string()), From feaad623a12aeb1ce4ee3dabf251e36ccdfe7107 Mon Sep 17 00:00:00 2001 From: Kevin Butler Date: Mon, 23 Jun 2014 23:13:11 +0100 Subject: [PATCH 5/7] liburl: Remove some unnecessary allocations. Some signatures have changed from String to &str returns. To fix, call to_string() on the returned value. [breaking-change] --- src/liburl/lib.rs | 329 +++++++++++++++++++++++----------------------- 1 file changed, 162 insertions(+), 167 deletions(-) diff --git a/src/liburl/lib.rs b/src/liburl/lib.rs index 29df376a28f..55c78672080 100644 --- a/src/liburl/lib.rs +++ b/src/liburl/lib.rs @@ -51,7 +51,7 @@ pub struct Url { /// A domain name or IP address. For example, `example.com`. pub host: String, /// A TCP port number, for example `8080`. - pub port: Option, + pub port: Option, /// The path component of a URL, for example `/foo/bar?baz=qux#quz`. pub path: Path, } @@ -84,7 +84,7 @@ impl Url { pub fn new(scheme: String, user: Option, host: String, - port: Option, + port: Option, path: String, query: Query, fragment: Option) @@ -113,16 +113,23 @@ impl Url { let (scheme, rest) = try!(get_scheme(rawurl)); // authority - let (userinfo, host, port, rest) = try!(get_authority(rest.as_slice())); + let (userinfo, host, port, rest) = try!(get_authority(rest)); // path let has_authority = host.len() > 0; - let (path, rest) = try!(get_path(rest.as_slice(), has_authority)); + let (path, rest) = try!(get_path(rest, has_authority)); // query and fragment - let (query, fragment) = try!(get_query_fragment(rest.as_slice())); + let (query, fragment) = try!(get_query_fragment(rest)); - Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) + let url = Url::new(scheme.to_string(), + userinfo, + host.to_string(), + port, + path, + query, + fragment); + Ok(url) } } @@ -438,65 +445,50 @@ pub fn query_to_str(query: &Query) -> String { }) } -/** - * Returns a tuple of the URI scheme and the rest of the URI, or a parsing error. - * - * Does not include the separating `:` character. - * - * # Example - * - * ```rust - * use url::get_scheme; - * - * let scheme = match get_scheme("https://example.com/") { - * Ok((sch, _)) => sch, - * Err(_) => "(None)".to_string(), - * }; - * println!("Scheme in use: {}.", scheme); // Scheme in use: https. - * ``` - */ -pub fn get_scheme(rawurl: &str) -> DecodeResult<(String, String)> { +/// Returns a tuple of the URI scheme and the rest of the URI, or a parsing error. +/// +/// Does not include the separating `:` character. +/// +/// # Example +/// +/// ```rust +/// use url::get_scheme; +/// +/// let scheme = match get_scheme("https://example.com/") { +/// Ok((sch, _)) => sch, +/// Err(_) => "(None)", +/// }; +/// println!("Scheme in use: {}.", scheme); // Scheme in use: https. +/// ``` +pub fn get_scheme<'a>(rawurl: &'a str) -> DecodeResult<(&'a str, &'a str)> { for (i,c) in rawurl.chars().enumerate() { - match c { - 'A' .. 'Z' | 'a' .. 'z' => continue, - '0' .. '9' | '+' | '-' | '.' => { - if i == 0 { - return Err("url: Scheme must begin with a \ - letter.".to_string()); - } - continue; - } - ':' => { - if i == 0 { - return Err("url: Scheme cannot be empty.".to_string()); - } else { - return Ok((rawurl.slice(0,i).to_string(), - rawurl.slice(i+1,rawurl.len()).to_string())); - } - } - _ => { - return Err("url: Invalid character in scheme.".to_string()); - } - } - }; - return Err("url: Scheme must be terminated with a colon.".to_string()); -} + let result = match c { + 'A' .. 'Z' + | 'a' .. 'z' => continue, + '0' .. '9' | '+' | '-' | '.' => { + if i != 0 { continue } -#[deriving(Clone, PartialEq)] -enum Input { - Digit, // all digits - Hex, // digits and letters a-f - Unreserved // all other legal characters + Err("url: Scheme must begin with a letter.".to_string()) + } + ':' => { + if i == 0 { + Err("url: Scheme cannot be empty.".to_string()) + } else { + Ok((rawurl.slice(0,i), rawurl.slice(i+1,rawurl.len()))) + } + } + _ => Err("url: Invalid character in scheme.".to_string()), + }; + + return result; + } + + Err("url: Scheme must be terminated with a colon.".to_string()) } // returns userinfo, host, port, and unparsed part, or an error -fn get_authority(rawurl: &str) -> - DecodeResult<(Option, String, Option, String)> { - if !rawurl.starts_with("//") { - // there is no authority. - return Ok((None, "".to_string(), None, rawurl.to_str())); - } - +fn get_authority<'a>(rawurl: &'a str) -> + DecodeResult<(Option, &'a str, Option, &'a str)> { enum State { Start, // starting state PassHostPort, // could be in user or port @@ -506,12 +498,24 @@ fn get_authority(rawurl: &str) -> InPort // are in port } + #[deriving(Clone, PartialEq)] + enum Input { + Digit, // all digits + Hex, // digits and letters a-f + Unreserved // all other legal characters + } + + if !rawurl.starts_with("//") { + // there is no authority. + return Ok((None, "", None, rawurl)); + } + let len = rawurl.len(); let mut st = Start; let mut input = Digit; // most restricted, start here. let mut userinfo = None; - let mut host = "".to_string(); + let mut host = ""; let mut port = None; let mut colon_count = 0u; @@ -519,27 +523,27 @@ fn get_authority(rawurl: &str) -> let mut begin = 2; let mut end = len; - for (i,c) in rawurl.chars().enumerate() { - if i < 2 { continue; } // ignore the leading // - + for (i,c) in rawurl.chars().enumerate() + // ignore the leading '//' handled by early return + .skip(2) { // deal with input class first match c { - '0' .. '9' => (), - 'A' .. 'F' | 'a' .. 'f' => { - if input == Digit { - input = Hex; + '0' .. '9' => (), + 'A' .. 'F' + | 'a' .. 'f' => { + if input == Digit { + input = Hex; + } } - } - 'G' .. 'Z' | 'g' .. 'z' | '-' | '.' | '_' | '~' | '%' | - '&' |'\'' | '(' | ')' | '+' | '!' | '*' | ',' | ';' | '=' => { - input = Unreserved; - } - ':' | '@' | '?' | '#' | '/' => { - // separators, don't change anything - } - _ => { - return Err("Illegal character in authority".to_string()); - } + 'G' .. 'Z' + | 'g' .. 'z' + | '-' | '.' | '_' | '~' | '%' + | '&' |'\'' | '(' | ')' | '+' + | '!' | '*' | ',' | ';' | '=' => input = Unreserved, + ':' | '@' | '?' | '#' | '/' => { + // separators, don't change anything + } + _ => return Err("Illegal character in authority".to_string()), } // now process states @@ -563,7 +567,7 @@ fn get_authority(rawurl: &str) -> pos = i; if input == Unreserved { // must be port - host = rawurl.slice(begin, i).to_string(); + host = rawurl.slice(begin, i); st = InPort; } else { // can't be sure whether this is an ipv6 address or a port @@ -572,21 +576,18 @@ fn get_authority(rawurl: &str) -> } Ip6Port => { if input == Unreserved { - return Err("Illegal characters in \ - authority.".to_string()); + return Err("Illegal characters in authority.".to_string()); } st = Ip6Host; } Ip6Host => { if colon_count > 7 { - host = rawurl.slice(begin, i).to_string(); + host = rawurl.slice(begin, i); pos = i; st = InPort; } } - _ => { - return Err("Invalid ':' in authority.".to_string()); - } + _ => return Err("Invalid ':' in authority.".to_string()), } input = Digit; // reset input class } @@ -606,9 +607,7 @@ fn get_authority(rawurl: &str) -> userinfo = Some(UserInfo::new(user, Some(pass))); st = InHost; } - _ => { - return Err("Invalid '@' in authority.".to_string()); - } + _ => return Err("Invalid '@' in authority.".to_string()), } begin = i+1; } @@ -623,43 +622,53 @@ fn get_authority(rawurl: &str) -> // finish up match st { - Start => { - host = rawurl.slice(begin, end).to_string(); - } - PassHostPort | Ip6Port => { + Start => host = rawurl.slice(begin, end), + PassHostPort + | Ip6Port => { if input != Digit { return Err("Non-digit characters in port.".to_string()); } - host = rawurl.slice(begin, pos).to_string(); - port = Some(rawurl.slice(pos+1, end).to_string()); - } - Ip6Host | InHost => { - host = rawurl.slice(begin, end).to_string(); + host = rawurl.slice(begin, pos); + port = Some(rawurl.slice(pos+1, end)); } + Ip6Host + | InHost => host = rawurl.slice(begin, end), InPort => { if input != Digit { return Err("Non-digit characters in port.".to_string()); } - port = Some(rawurl.slice(pos+1, end).to_string()); + port = Some(rawurl.slice(pos+1, end)); } } - let rest = rawurl.slice(end, len).to_string(); - return Ok((userinfo, host, port, rest)); + let rest = rawurl.slice(end, len); + // If we have a port string, ensure it parses to u16. + let port = match port { + None => None, + opt => match opt.and_then(|p| FromStr::from_str(p)) { + None => return Err(format!("Failed to parse port: {}", port)), + opt => opt + } + }; + + Ok((userinfo, host, port, rest)) } // returns the path and unparsed part of url, or an error -fn get_path(rawurl: &str, authority: bool) -> DecodeResult<(String, String)> { +fn get_path<'a>(rawurl: &'a str, is_authority: bool) + -> DecodeResult<(String, &'a str)> { let len = rawurl.len(); let mut end = len; for (i,c) in rawurl.chars().enumerate() { match c { - 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '&' |'\'' | '(' | ')' | '.' - | '@' | ':' | '%' | '/' | '+' | '!' | '*' | ',' | ';' | '=' - | '_' | '-' | '~' => { - continue; - } + 'A' .. 'Z' + | 'a' .. 'z' + | '0' .. '9' + | '&' |'\'' | '(' | ')' | '.' + | '@' | ':' | '%' | '/' | '+' + | '!' | '*' | ',' | ';' | '=' + | '_' | '-' | '~' => continue, '?' | '#' => { end = i; break; @@ -668,68 +677,53 @@ fn get_path(rawurl: &str, authority: bool) -> DecodeResult<(String, String)> { } } - if authority && end != 0 && !rawurl.starts_with("/") { + if is_authority && end != 0 && !rawurl.starts_with("/") { Err("Non-empty path must begin with \ '/' in presence of authority.".to_string()) } else { Ok((try!(decode_component(rawurl.slice(0, end))), - rawurl.slice(end, len).to_string())) + rawurl.slice(end, len))) } } // returns the parsed query and the fragment, if present fn get_query_fragment(rawurl: &str) -> DecodeResult<(Query, Option)> { - if !rawurl.starts_with("?") { - if rawurl.starts_with("#") { - let f = try!(decode_component(rawurl.slice(1, rawurl.len()))); - return Ok((vec!(), Some(f))); - } else { - return Ok((vec!(), None)); - } - } - let (q, r) = split_char_first(rawurl.slice(1, rawurl.len()), '#'); - let f = if r.len() != 0 { - Some(try!(decode_component(r))) - } else { - None + let (before_fragment, raw_fragment) = split_char_first(rawurl, '#'); + + // Parse the fragment if available + let fragment = match raw_fragment { + "" => None, + raw => Some(try!(decode_component(raw))) }; - Ok((try!(query_from_str(q)), f)) + match before_fragment.slice_shift_char() { + (Some('?'), rest) => Ok((try!(query_from_str(rest)), fragment)), + (None, "") => Ok((vec!(), fragment)), + _ => Err(format!("Query didn't start with '?': '{}..'", before_fragment)), + } } impl FromStr for Url { fn from_str(s: &str) -> Option { - match Url::parse(s) { - Ok(url) => Some(url), - Err(_) => None - } + Url::parse(s).ok() } } impl FromStr for Path { fn from_str(s: &str) -> Option { - match Path::parse(s) { - Ok(path) => Some(path), - Err(_) => None - } + Path::parse(s).ok() } } impl fmt::Show for Url { - /** - * Converts a URL from `Url` to string representation. - * - * # Arguments - * - * `url` - a URL. - * - * # Returns - * - * A string that contains the formatted URL. Note that this will usually - * be an inverse of `from_str` but might strip out unneeded separators; - * for example, "http://somehost.com?", when parsed and formatted, will - * result in just "http://somehost.com". - */ + /// Converts a URL from `Url` to string representation. + /// + /// # Returns + /// + /// A string that contains the formatted URL. Note that this will usually + /// be an inverse of `from_str` but might strip out unneeded separators; + /// for example, "http://somehost.com?", when parsed and formatted, will + /// result in just "http://somehost.com". fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { try!(write!(f, "{}:", self.scheme)); @@ -797,40 +791,39 @@ fn test_get_authority() { let (u, h, p, r) = get_authority( "//user:pass@rust-lang.org/something").unwrap(); assert_eq!(u, Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); - assert_eq!(h, "rust-lang.org".to_string()); + assert_eq!(h, "rust-lang.org"); assert!(p.is_none()); - assert_eq!(r, "/something".to_string()); + assert_eq!(r, "/something"); let (u, h, p, r) = get_authority( "//rust-lang.org:8000?something").unwrap(); assert!(u.is_none()); - assert_eq!(h, "rust-lang.org".to_string()); - assert_eq!(p, Some("8000".to_string())); - assert_eq!(r, "?something".to_string()); + assert_eq!(h, "rust-lang.org"); + assert_eq!(p, Some(8000)); + assert_eq!(r, "?something"); - let (u, h, p, r) = get_authority( - "//rust-lang.org#blah").unwrap(); + let (u, h, p, r) = get_authority("//rust-lang.org#blah").unwrap(); assert!(u.is_none()); - assert_eq!(h, "rust-lang.org".to_string()); + assert_eq!(h, "rust-lang.org"); assert!(p.is_none()); - assert_eq!(r, "#blah".to_string()); + assert_eq!(r, "#blah"); // ipv6 tests let (_, h, _, _) = get_authority( "//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah").unwrap(); - assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334".to_string()); + assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334"); let (_, h, p, _) = get_authority( "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah").unwrap(); - assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334".to_string()); - assert_eq!(p, Some("8000".to_string())); + assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334"); + assert_eq!(p, Some(8000)); let (u, h, p, _) = get_authority( "//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah" ).unwrap(); assert_eq!(u, Some(UserInfo::new("us".to_string(), Some("p".to_string())))); - assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334".to_string()); - assert_eq!(p, Some("8000".to_string())); + assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334"); + assert_eq!(p, Some(8000)); // invalid authorities; assert!(get_authority("//user:pass@rust-lang:something").is_err()); @@ -839,25 +832,27 @@ fn test_get_authority() { "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a").is_err()); assert!(get_authority( "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00").is_err()); + // outside u16 range + assert!(get_authority("//user:pass@rust-lang:65536").is_err()); // these parse as empty, because they don't start with '//' let (_, h, _, _) = get_authority("user:pass@rust-lang").unwrap(); - assert_eq!(h, "".to_string()); + assert_eq!(h, ""); let (_, h, _, _) = get_authority("rust-lang.org").unwrap(); - assert_eq!(h, "".to_string()); + assert_eq!(h, ""); } #[test] fn test_get_path() { let (p, r) = get_path("/something+%20orother", true).unwrap(); assert_eq!(p, "/something+ orother".to_string()); - assert_eq!(r, "".to_string()); + assert_eq!(r, ""); let (p, r) = get_path("test@email.com#fragment", false).unwrap(); assert_eq!(p, "test@email.com".to_string()); - assert_eq!(r, "#fragment".to_string()); + assert_eq!(r, "#fragment"); let (p, r) = get_path("/gen/:addr=?q=v", false).unwrap(); assert_eq!(p, "/gen/:addr=".to_string()); - assert_eq!(r, "?q=v".to_string()); + assert_eq!(r, "?q=v"); //failure cases assert!(get_path("something?q", true).is_err()); @@ -879,7 +874,7 @@ mod tests { assert_eq!(&u.scheme, &"http".to_string()); assert_eq!(&u.user, &Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); assert_eq!(&u.host, &"rust-lang.org".to_string()); - assert_eq!(&u.port, &Some("8080".to_string())); + assert_eq!(&u.port, &Some(8080)); assert_eq!(&u.path.path, &"/doc/~u".to_string()); assert_eq!(&u.path.query, &vec!(("s".to_string(), "v".to_string()))); assert_eq!(&u.path.fragment, &Some("something".to_string())); @@ -917,14 +912,14 @@ mod tests { let url = from_str::(urlstr).unwrap(); assert_eq!(&url.scheme, &"scheme".to_string()); assert_eq!(&url.host, &"host".to_string()); - assert_eq!(&url.port, &Some("1234".to_string())); + assert_eq!(&url.port, &Some(1234)); // is empty path really correct? Other tests think so assert_eq!(&url.path.path, &"".to_string()); let urlstr = "scheme://host:1234/"; let url = from_str::(urlstr).unwrap(); assert_eq!(&url.scheme, &"scheme".to_string()); assert_eq!(&url.host, &"host".to_string()); - assert_eq!(&url.port, &Some("1234".to_string())); + assert_eq!(&url.port, &Some(1234)); assert_eq!(&url.path.path, &"/".to_string()); } From 465ec239184ea3aa24fb6108d6d78901e3f84b94 Mon Sep 17 00:00:00 2001 From: Kevin Butler Date: Wed, 25 Jun 2014 23:51:36 +0100 Subject: [PATCH 6/7] liburl: cosmetic test changes. --- src/liburl/lib.rs | 210 +++++++++++++++++++++++++--------------------- 1 file changed, 113 insertions(+), 97 deletions(-) diff --git a/src/liburl/lib.rs b/src/liburl/lib.rs index 55c78672080..a024be898a5 100644 --- a/src/liburl/lib.rs +++ b/src/liburl/lib.rs @@ -427,9 +427,9 @@ fn query_from_str(rawquery: &str) -> DecodeResult { /// # Example /// /// ```rust -/// let query = vec!(("title".to_string(), "The Village".to_string()), +/// let query = vec![("title".to_string(), "The Village".to_string()), /// ("north".to_string(), "52.91".to_string()), -/// ("west".to_string(), "4.10".to_string())); +/// ("west".to_string(), "4.10".to_string())]; /// println!("{}", url::query_to_str(&query)); // title=The%20Village&north=52.91&west=4.10 /// ``` pub fn query_to_str(query: &Query) -> String { @@ -868,87 +868,86 @@ mod tests { #[test] fn test_url_parse() { let url = "http://user:pass@rust-lang.org:8080/doc/~u?s=v#something"; + let u = from_str::(url).unwrap(); - let up = from_str::(url); - let u = up.unwrap(); - assert_eq!(&u.scheme, &"http".to_string()); - assert_eq!(&u.user, &Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); - assert_eq!(&u.host, &"rust-lang.org".to_string()); - assert_eq!(&u.port, &Some(8080)); - assert_eq!(&u.path.path, &"/doc/~u".to_string()); - assert_eq!(&u.path.query, &vec!(("s".to_string(), "v".to_string()))); - assert_eq!(&u.path.fragment, &Some("something".to_string())); + assert_eq!(u.scheme, "http".to_string()); + assert_eq!(u.user, Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); + assert_eq!(u.host, "rust-lang.org".to_string()); + assert_eq!(u.port, Some(8080)); + assert_eq!(u.path.path, "/doc/~u".to_string()); + assert_eq!(u.path.query, vec!(("s".to_string(), "v".to_string()))); + assert_eq!(u.path.fragment, Some("something".to_string())); } #[test] fn test_path_parse() { let path = "/doc/~u?s=v#something"; + let u = from_str::(path).unwrap(); - let up = from_str::(path); - let u = up.unwrap(); - assert_eq!(&u.path, &"/doc/~u".to_string()); - assert_eq!(&u.query, &vec!(("s".to_string(), "v".to_string()))); - assert_eq!(&u.fragment, &Some("something".to_string())); + assert_eq!(u.path, "/doc/~u".to_string()); + assert_eq!(u.query, vec!(("s".to_string(), "v".to_string()))); + assert_eq!(u.fragment, Some("something".to_string())); } #[test] fn test_url_parse_host_slash() { let urlstr = "http://0.42.42.42/"; let url = from_str::(urlstr).unwrap(); - assert!(url.host == "0.42.42.42".to_string()); - assert!(url.path.path == "/".to_string()); + assert_eq!(url.host, "0.42.42.42".to_string()); + assert_eq!(url.path.path, "/".to_string()); } #[test] fn test_path_parse_host_slash() { let pathstr = "/"; let path = from_str::(pathstr).unwrap(); - assert!(path.path == "/".to_string()); + assert_eq!(path.path, "/".to_string()); } #[test] fn test_url_host_with_port() { let urlstr = "scheme://host:1234"; let url = from_str::(urlstr).unwrap(); - assert_eq!(&url.scheme, &"scheme".to_string()); - assert_eq!(&url.host, &"host".to_string()); - assert_eq!(&url.port, &Some(1234)); + assert_eq!(url.scheme, "scheme".to_string()); + assert_eq!(url.host, "host".to_string()); + assert_eq!(url.port, Some(1234)); // is empty path really correct? Other tests think so - assert_eq!(&url.path.path, &"".to_string()); + assert_eq!(url.path.path, "".to_string()); + let urlstr = "scheme://host:1234/"; let url = from_str::(urlstr).unwrap(); - assert_eq!(&url.scheme, &"scheme".to_string()); - assert_eq!(&url.host, &"host".to_string()); - assert_eq!(&url.port, &Some(1234)); - assert_eq!(&url.path.path, &"/".to_string()); + assert_eq!(url.scheme, "scheme".to_string()); + assert_eq!(url.host, "host".to_string()); + assert_eq!(url.port, Some(1234)); + assert_eq!(url.path.path, "/".to_string()); } #[test] fn test_url_with_underscores() { let urlstr = "http://dotcom.com/file_name.html"; let url = from_str::(urlstr).unwrap(); - assert!(url.path.path == "/file_name.html".to_string()); + assert_eq!(url.path.path, "/file_name.html".to_string()); } #[test] fn test_path_with_underscores() { let pathstr = "/file_name.html"; let path = from_str::(pathstr).unwrap(); - assert!(path.path == "/file_name.html".to_string()); + assert_eq!(path.path, "/file_name.html".to_string()); } #[test] fn test_url_with_dashes() { let urlstr = "http://dotcom.com/file-name.html"; let url = from_str::(urlstr).unwrap(); - assert!(url.path.path == "/file-name.html".to_string()); + assert_eq!(url.path.path, "/file-name.html".to_string()); } #[test] fn test_path_with_dashes() { let pathstr = "/file-name.html"; let path = from_str::(pathstr).unwrap(); - assert!(path.path == "/file-name.html".to_string()); + assert_eq!(path.path, "/file-name.html".to_string()); } #[test] @@ -965,62 +964,72 @@ mod tests { #[test] fn test_full_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc?s=v#something"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_userless_url_parse_and_format() { let url = "http://rust-lang.org/doc?s=v#something"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_queryless_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc#something"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_empty_query_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc?#something"; let should_be = "http://user:pass@rust-lang.org/doc#something"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), should_be); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), should_be); } #[test] fn test_fragmentless_url_parse_and_format() { let url = "http://user:pass@rust-lang.org/doc?q=v"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_minimal_url_parse_and_format() { let url = "http://rust-lang.org/doc"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_url_with_port_parse_and_format() { let url = "http://rust-lang.org:80/doc"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_scheme_host_only_url_parse_and_format() { let url = "http://rust-lang.org"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_pathless_url_parse_and_format() { let url = "http://user:pass@rust-lang.org?q=v#something"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_scheme_host_fragment_only_url_parse_and_format() { let url = "http://rust-lang.org#something"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] @@ -1042,68 +1051,75 @@ mod tests { #[test] fn test_url_without_authority() { let url = "mailto:test@email.com"; - assert_eq!(from_str::(url).unwrap().to_str().as_slice(), url); + let u = from_str::(url).unwrap(); + assert_eq!(format!("{}", u).as_slice(), url); } #[test] fn test_encode() { - assert_eq!(encode(""), "".to_string()); - assert_eq!(encode("http://example.com"), "http://example.com".to_string()); - assert_eq!(encode("foo bar% baz"), "foo%20bar%25%20baz".to_string()); - assert_eq!(encode(" "), "%20".to_string()); - assert_eq!(encode("!"), "!".to_string()); - assert_eq!(encode("\""), "\"".to_string()); - assert_eq!(encode("#"), "#".to_string()); - assert_eq!(encode("$"), "$".to_string()); - assert_eq!(encode("%"), "%25".to_string()); - assert_eq!(encode("&"), "&".to_string()); - assert_eq!(encode("'"), "%27".to_string()); - assert_eq!(encode("("), "(".to_string()); - assert_eq!(encode(")"), ")".to_string()); - assert_eq!(encode("*"), "*".to_string()); - assert_eq!(encode("+"), "+".to_string()); - assert_eq!(encode(","), ",".to_string()); - assert_eq!(encode("/"), "/".to_string()); - assert_eq!(encode(":"), ":".to_string()); - assert_eq!(encode(";"), ";".to_string()); - assert_eq!(encode("="), "=".to_string()); - assert_eq!(encode("?"), "?".to_string()); - assert_eq!(encode("@"), "@".to_string()); - assert_eq!(encode("["), "[".to_string()); - assert_eq!(encode("]"), "]".to_string()); - assert_eq!(encode("\0"), "%00".to_string()); - assert_eq!(encode("\n"), "%0A".to_string()); + fn t(input: &str, expected: &str) { + assert_eq!(encode(input), expected.to_string()) + } + + t("", ""); + t("http://example.com", "http://example.com"); + t("foo bar% baz", "foo%20bar%25%20baz"); + t(" ", "%20"); + t("!", "!"); + t("\"", "\""); + t("#", "#"); + t("$", "$"); + t("%", "%25"); + t("&", "&"); + t("'", "%27"); + t("(", "("); + t(")", ")"); + t("*", "*"); + t("+", "+"); + t(",", ","); + t("/", "/"); + t(":", ":"); + t(";", ";"); + t("=", "="); + t("?", "?"); + t("@", "@"); + t("[", "["); + t("]", "]"); + t("\0", "%00"); + t("\n", "%0A"); } #[test] fn test_encode_component() { - assert_eq!(encode_component(""), "".to_string()); - assert!(encode_component("http://example.com") == - "http%3A%2F%2Fexample.com".to_string()); - assert!(encode_component("foo bar% baz") == - "foo%20bar%25%20baz".to_string()); - assert_eq!(encode_component(" "), "%20".to_string()); - assert_eq!(encode_component("!"), "%21".to_string()); - assert_eq!(encode_component("#"), "%23".to_string()); - assert_eq!(encode_component("$"), "%24".to_string()); - assert_eq!(encode_component("%"), "%25".to_string()); - assert_eq!(encode_component("&"), "%26".to_string()); - assert_eq!(encode_component("'"), "%27".to_string()); - assert_eq!(encode_component("("), "%28".to_string()); - assert_eq!(encode_component(")"), "%29".to_string()); - assert_eq!(encode_component("*"), "%2A".to_string()); - assert_eq!(encode_component("+"), "%2B".to_string()); - assert_eq!(encode_component(","), "%2C".to_string()); - assert_eq!(encode_component("/"), "%2F".to_string()); - assert_eq!(encode_component(":"), "%3A".to_string()); - assert_eq!(encode_component(";"), "%3B".to_string()); - assert_eq!(encode_component("="), "%3D".to_string()); - assert_eq!(encode_component("?"), "%3F".to_string()); - assert_eq!(encode_component("@"), "%40".to_string()); - assert_eq!(encode_component("["), "%5B".to_string()); - assert_eq!(encode_component("]"), "%5D".to_string()); - assert_eq!(encode_component("\0"), "%00".to_string()); - assert_eq!(encode_component("\n"), "%0A".to_string()); + fn t(input: &str, expected: &str) { + assert_eq!(encode_component(input), expected.to_string()) + } + + t("", ""); + t("http://example.com", "http%3A%2F%2Fexample.com"); + t("foo bar% baz", "foo%20bar%25%20baz"); + t(" ", "%20"); + t("!", "%21"); + t("#", "%23"); + t("$", "%24"); + t("%", "%25"); + t("&", "%26"); + t("'", "%27"); + t("(", "%28"); + t(")", "%29"); + t("*", "%2A"); + t("+", "%2B"); + t(",", "%2C"); + t("/", "%2F"); + t(":", "%3A"); + t(";", "%3B"); + t("=", "%3D"); + t("?", "%3F"); + t("@", "%40"); + t("[", "%5B"); + t("]", "%5D"); + t("\0", "%00"); + t("\n", "%0A"); } #[test] @@ -1189,8 +1205,8 @@ mod tests { let mut m = HashMap::new(); m.insert("foo bar".to_string(), vec!("abc".to_string(), "12 = 34".to_string())); - assert!(encode_form_urlencoded(&m) == - "foo+bar=abc&foo+bar=12+%3D+34".to_string()); + assert_eq!(encode_form_urlencoded(&m), + "foo+bar=abc&foo+bar=12+%3D+34".to_string()); } #[test] From 4703bb4eaa157ede1885bbfdb8839d749d2d8108 Mon Sep 17 00:00:00 2001 From: Kevin Butler Date: Thu, 26 Jun 2014 16:43:32 +0100 Subject: [PATCH 7/7] liburl: Generic input for {en,de}code. --- src/liburl/lib.rs | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/liburl/lib.rs b/src/liburl/lib.rs index a024be898a5..585d96e9307 100644 --- a/src/liburl/lib.rs +++ b/src/liburl/lib.rs @@ -26,6 +26,7 @@ use std::fmt; use std::from_str::FromStr; use std::hash; use std::uint; +use std::path::BytesContainer; /// A Uniform Resource Locator (URL). A URL is a form of URI (Uniform Resource /// Identifier) that includes network location information, such as hostname or @@ -182,8 +183,8 @@ impl UserInfo { } } -fn encode_inner(s: &str, full_url: bool) -> String { - s.bytes().fold(String::new(), |mut out, b| { +fn encode_inner(c: T, full_url: bool) -> String { + c.container_as_bytes().iter().fold(String::new(), |mut out, &b| { match b as char { // unreserved: 'A' .. 'Z' @@ -218,8 +219,8 @@ fn encode_inner(s: &str, full_url: bool) -> String { /// let url = encode("https://example.com/Rust (programming language)"); /// println!("{}", url); // https://example.com/Rust%20(programming%20language) /// ``` -pub fn encode(s: &str) -> String { - encode_inner(s, true) +pub fn encode(container: T) -> String { + encode_inner(container, true) } @@ -227,8 +228,8 @@ pub fn encode(s: &str) -> String { /// encoded character sequences. /// /// This function is compliant with RFC 3986. -pub fn encode_component(s: &str) -> String { - encode_inner(s, false) +pub fn encode_component(container: T) -> String { + encode_inner(container, false) } pub type DecodeResult = Result; @@ -245,18 +246,18 @@ pub type DecodeResult = Result; /// let url = decode("https://example.com/Rust%20(programming%20language)"); /// println!("{}", url); // https://example.com/Rust (programming language) /// ``` -pub fn decode(s: &str) -> DecodeResult { - decode_inner(s, true) +pub fn decode(container: T) -> DecodeResult { + decode_inner(container, true) } /// Decode a string encoded with percent encoding. -pub fn decode_component(s: &str) -> DecodeResult { - decode_inner(s, false) +pub fn decode_component(container: T) -> DecodeResult { + decode_inner(container, false) } -fn decode_inner(s: &str, full_url: bool) -> DecodeResult { +fn decode_inner(c: T, full_url: bool) -> DecodeResult { let mut out = String::new(); - let mut iter = s.bytes(); + let mut iter = c.container_as_bytes().iter().map(|&b| b); loop { match iter.next() { @@ -864,6 +865,7 @@ mod tests { encode_component, decode_component, UserInfo, get_scheme, Url, Path}; use std::collections::HashMap; + use std::path::BytesContainer; #[test] fn test_url_parse() { @@ -1057,7 +1059,7 @@ mod tests { #[test] fn test_encode() { - fn t(input: &str, expected: &str) { + fn t(input: T, expected: &str) { assert_eq!(encode(input), expected.to_string()) } @@ -1087,11 +1089,13 @@ mod tests { t("]", "]"); t("\0", "%00"); t("\n", "%0A"); + + t(&[0u8, 10, 37], "%00%0A%25"); } #[test] fn test_encode_component() { - fn t(input: &str, expected: &str) { + fn t(input: T, expected: &str) { assert_eq!(encode_component(input), expected.to_string()) } @@ -1120,11 +1124,13 @@ mod tests { t("]", "%5D"); t("\0", "%00"); t("\n", "%0A"); + + t(&[0u8, 10, 37], "%00%0A%25"); } #[test] fn test_decode() { - fn t(input: &str, expected: &str) { + fn t(input: T, expected: &str) { assert_eq!(decode(input), Ok(expected.to_string())) } @@ -1154,11 +1160,13 @@ mod tests { t("%40", "%40"); t("%5B", "%5B"); t("%5D", "%5D"); + + t("%00%0A%25".as_bytes(), "\0\n%"); } #[test] fn test_decode_component() { - fn t(input: &str, expected: &str) { + fn t(input: T, expected: &str) { assert_eq!(decode_component(input), Ok(expected.to_string())) } @@ -1188,6 +1196,8 @@ mod tests { t("%40", "@"); t("%5B", "["); t("%5D", "]"); + + t("%00%0A%25".as_bytes(), "\0\n%"); } #[test]