Adds support for working with URL Paths

It is sometimes useful to parse just the path portion of a URL (path,
query string and fragment) rather than the entire URL.
This commit is contained in:
Yehuda Katz 2014-02-12 21:28:58 -08:00
parent 58eeb07c2a
commit 4667c492b5

View File

@ -55,6 +55,17 @@ pub struct Url {
fragment: Option<~str> fragment: Option<~str>
} }
#[deriving(Clone, Eq)]
pub struct Path {
/// The path component of a URL, for example `/foo/bar`.
path: ~str,
/// The query component of a URL. `~[(~"baz", ~"qux")]` represents the
/// fragment `baz=qux` in the above example.
query: Query,
/// The fragment component, such as `quz`. Doesn't include the leading `#` character.
fragment: Option<~str>
}
/// An optional subcomponent of a URI authority component. /// An optional subcomponent of a URI authority component.
#[deriving(Clone, Eq)] #[deriving(Clone, Eq)]
pub struct UserInfo { pub struct UserInfo {
@ -88,6 +99,19 @@ impl Url {
} }
} }
impl Path {
pub fn new(path: ~str,
query: Query,
fragment: Option<~str>)
-> Path {
Path {
path: path,
query: query,
fragment: fragment,
}
}
}
impl UserInfo { impl UserInfo {
#[inline] #[inline]
pub fn new(user: ~str, pass: Option<~str>) -> UserInfo { pub fn new(user: ~str, pass: Option<~str>) -> UserInfo {
@ -695,6 +719,21 @@ pub fn from_str(rawurl: &str) -> Result<Url, ~str> {
Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) Ok(Url::new(scheme, userinfo, host, port, path, query, fragment))
} }
pub fn path_from_str(rawpath: &str) -> Result<Path, ~str> {
let (path, rest) = match get_path(rawpath, false) {
Ok(val) => val,
Err(e) => return Err(e)
};
// query and fragment
let (query, fragment) = match get_query_fragment(rest) {
Ok(val) => val,
Err(e) => return Err(e),
};
Ok(Path{ path: path, query: query, fragment: fragment })
}
impl FromStr for Url { impl FromStr for Url {
fn from_str(s: &str) -> Option<Url> { fn from_str(s: &str) -> Option<Url> {
match from_str(s) { match from_str(s) {
@ -704,6 +743,15 @@ impl FromStr for Url {
} }
} }
impl FromStr for Path {
fn from_str(s: &str) -> Option<Path> {
match path_from_str(s) {
Ok(path) => Some(path),
Err(_) => None
}
}
}
/** /**
* Format a `url` as a string * Format a `url` as a string
* *
@ -749,18 +797,45 @@ pub fn to_str(url: &Url) -> ~str {
format!("{}:{}{}{}{}", url.scheme, authority, url.path, query, fragment) format!("{}:{}{}{}{}", url.scheme, authority, url.path, query, fragment)
} }
pub fn path_to_str(path: &Path) -> ~str {
let query = if path.query.is_empty() {
~""
} else {
format!("?{}", query_to_str(&path.query))
};
let fragment = match path.fragment {
Some(ref fragment) => format!("\\#{}", encode_component(*fragment)),
None => ~"",
};
format!("{}{}{}", path.path, query, fragment)
}
impl ToStr for Url { impl ToStr for Url {
fn to_str(&self) -> ~str { fn to_str(&self) -> ~str {
to_str(self) to_str(self)
} }
} }
impl ToStr for Path {
fn to_str(&self) -> ~str {
path_to_str(self)
}
}
impl IterBytes for Url { impl IterBytes for Url {
fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool { fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
self.to_str().iter_bytes(lsb0, f) self.to_str().iter_bytes(lsb0, f)
} }
} }
impl IterBytes for Path {
fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
self.to_str().iter_bytes(lsb0, f)
}
}
// Put a few tests outside of the 'test' module so they can test the internal // Put a few tests outside of the 'test' module so they can test the internal
// functions and those functions don't need 'pub' // functions and those functions don't need 'pub'
@ -868,6 +943,17 @@ mod tests {
assert_eq!(&u.fragment, &Some(~"something")); assert_eq!(&u.fragment, &Some(~"something"));
} }
#[test]
fn test_path_parse() {
let path = ~"/doc/~u?s=v#something";
let up = path_from_str(path);
let u = up.unwrap();
assert_eq!(&u.path, &~"/doc/~u");
assert_eq!(&u.query, &~[(~"s", ~"v")]);
assert_eq!(&u.fragment, &Some(~"something"));
}
#[test] #[test]
fn test_url_parse_host_slash() { fn test_url_parse_host_slash() {
let urlstr = ~"http://0.42.42.42/"; let urlstr = ~"http://0.42.42.42/";
@ -876,6 +962,13 @@ mod tests {
assert!(url.path == ~"/"); assert!(url.path == ~"/");
} }
#[test]
fn test_path_parse_host_slash() {
let pathstr = ~"/";
let path = path_from_str(pathstr).unwrap();
assert!(path.path == ~"/");
}
#[test] #[test]
fn test_url_host_with_port() { fn test_url_host_with_port() {
let urlstr = ~"scheme://host:1234"; let urlstr = ~"scheme://host:1234";
@ -899,6 +992,13 @@ mod tests {
assert!(url.path == ~"/file_name.html"); assert!(url.path == ~"/file_name.html");
} }
#[test]
fn test_path_with_underscores() {
let pathstr = ~"/file_name.html";
let path = path_from_str(pathstr).unwrap();
assert!(path.path == ~"/file_name.html");
}
#[test] #[test]
fn test_url_with_dashes() { fn test_url_with_dashes() {
let urlstr = ~"http://dotcom.com/file-name.html"; let urlstr = ~"http://dotcom.com/file-name.html";
@ -906,6 +1006,13 @@ mod tests {
assert!(url.path == ~"/file-name.html"); assert!(url.path == ~"/file-name.html");
} }
#[test]
fn test_path_with_dashes() {
let pathstr = ~"/file-name.html";
let path = path_from_str(pathstr).unwrap();
assert!(path.path == ~"/file-name.html");
}
#[test] #[test]
fn test_no_scheme() { fn test_no_scheme() {
assert!(get_scheme("noschemehere.html").is_err()); assert!(get_scheme("noschemehere.html").is_err());
@ -986,6 +1093,14 @@ mod tests {
assert!(u.query == ~[(~"ba%d ", ~"#&+")]); assert!(u.query == ~[(~"ba%d ", ~"#&+")]);
} }
#[test]
fn test_path_component_encoding() {
let path = ~"/doc%20uments?ba%25d%20=%23%26%2B";
let p = path_from_str(path).unwrap();
assert!(p.path == ~"/doc uments");
assert!(p.query == ~[(~"ba%d ", ~"#&+")]);
}
#[test] #[test]
fn test_url_without_authority() { fn test_url_without_authority() {
let url = ~"mailto:test@email.com"; let url = ~"mailto:test@email.com";