From 4667c492b539e3abd046007f904c9395ef22310e Mon Sep 17 00:00:00 2001 From: Yehuda Katz Date: Wed, 12 Feb 2014 21:28:58 -0800 Subject: [PATCH] Adds support for working with URL Paths It is sometimes useful to parse just the path portion of a URL (path, query string and fragment) rather than the entire URL. --- src/libextra/url.rs | 115 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/src/libextra/url.rs b/src/libextra/url.rs index 6138c5416f2..580e11b1158 100644 --- a/src/libextra/url.rs +++ b/src/libextra/url.rs @@ -55,6 +55,17 @@ pub struct Url { fragment: Option<~str> } +#[deriving(Clone, Eq)] +pub struct Path { + /// The path component of a URL, for example `/foo/bar`. + path: ~str, + /// The query component of a URL. `~[(~"baz", ~"qux")]` represents the + /// fragment `baz=qux` in the above example. + query: Query, + /// The fragment component, such as `quz`. Doesn't include the leading `#` character. + fragment: Option<~str> +} + /// An optional subcomponent of a URI authority component. #[deriving(Clone, Eq)] pub struct UserInfo { @@ -88,6 +99,19 @@ impl Url { } } +impl Path { + pub fn new(path: ~str, + query: Query, + fragment: Option<~str>) + -> Path { + Path { + path: path, + query: query, + fragment: fragment, + } + } +} + impl UserInfo { #[inline] pub fn new(user: ~str, pass: Option<~str>) -> UserInfo { @@ -695,6 +719,21 @@ pub fn from_str(rawurl: &str) -> Result { Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) } +pub fn path_from_str(rawpath: &str) -> Result { + let (path, rest) = match get_path(rawpath, false) { + Ok(val) => val, + Err(e) => return Err(e) + }; + + // query and fragment + let (query, fragment) = match get_query_fragment(rest) { + Ok(val) => val, + Err(e) => return Err(e), + }; + + Ok(Path{ path: path, query: query, fragment: fragment }) +} + impl FromStr for Url { fn from_str(s: &str) -> Option { match from_str(s) { @@ -704,6 +743,15 @@ impl FromStr for Url { } } +impl FromStr for Path { + fn from_str(s: &str) -> Option { + match path_from_str(s) { + Ok(path) => Some(path), + Err(_) => None + } + } +} + /** * Format a `url` as a string * @@ -749,18 +797,45 @@ pub fn to_str(url: &Url) -> ~str { format!("{}:{}{}{}{}", url.scheme, authority, url.path, query, fragment) } +pub fn path_to_str(path: &Path) -> ~str { + let query = if path.query.is_empty() { + ~"" + } else { + format!("?{}", query_to_str(&path.query)) + }; + + let fragment = match path.fragment { + Some(ref fragment) => format!("\\#{}", encode_component(*fragment)), + None => ~"", + }; + + format!("{}{}{}", path.path, query, fragment) +} + impl ToStr for Url { fn to_str(&self) -> ~str { to_str(self) } } +impl ToStr for Path { + fn to_str(&self) -> ~str { + path_to_str(self) + } +} + impl IterBytes for Url { fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool { self.to_str().iter_bytes(lsb0, f) } } +impl IterBytes for Path { + fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool { + self.to_str().iter_bytes(lsb0, f) + } +} + // Put a few tests outside of the 'test' module so they can test the internal // functions and those functions don't need 'pub' @@ -868,6 +943,17 @@ mod tests { assert_eq!(&u.fragment, &Some(~"something")); } + #[test] + fn test_path_parse() { + let path = ~"/doc/~u?s=v#something"; + + let up = path_from_str(path); + let u = up.unwrap(); + assert_eq!(&u.path, &~"/doc/~u"); + assert_eq!(&u.query, &~[(~"s", ~"v")]); + assert_eq!(&u.fragment, &Some(~"something")); + } + #[test] fn test_url_parse_host_slash() { let urlstr = ~"http://0.42.42.42/"; @@ -876,6 +962,13 @@ mod tests { assert!(url.path == ~"/"); } + #[test] + fn test_path_parse_host_slash() { + let pathstr = ~"/"; + let path = path_from_str(pathstr).unwrap(); + assert!(path.path == ~"/"); + } + #[test] fn test_url_host_with_port() { let urlstr = ~"scheme://host:1234"; @@ -899,6 +992,13 @@ mod tests { assert!(url.path == ~"/file_name.html"); } + #[test] + fn test_path_with_underscores() { + let pathstr = ~"/file_name.html"; + let path = path_from_str(pathstr).unwrap(); + assert!(path.path == ~"/file_name.html"); + } + #[test] fn test_url_with_dashes() { let urlstr = ~"http://dotcom.com/file-name.html"; @@ -906,6 +1006,13 @@ mod tests { assert!(url.path == ~"/file-name.html"); } + #[test] + fn test_path_with_dashes() { + let pathstr = ~"/file-name.html"; + let path = path_from_str(pathstr).unwrap(); + assert!(path.path == ~"/file-name.html"); + } + #[test] fn test_no_scheme() { assert!(get_scheme("noschemehere.html").is_err()); @@ -986,6 +1093,14 @@ mod tests { assert!(u.query == ~[(~"ba%d ", ~"#&+")]); } + #[test] + fn test_path_component_encoding() { + let path = ~"/doc%20uments?ba%25d%20=%23%26%2B"; + let p = path_from_str(path).unwrap(); + assert!(p.path == ~"/doc uments"); + assert!(p.query == ~[(~"ba%d ", ~"#&+")]); + } + #[test] fn test_url_without_authority() { let url = ~"mailto:test@email.com";