liburl: Remove some unnecessary allocations.

Some signatures have changed from String to &str returns.

To fix, call to_string() on the returned value.

[breaking-change]
This commit is contained in:
Kevin Butler 2014-06-23 23:13:11 +01:00
parent 11b093425d
commit feaad623a1

View File

@ -51,7 +51,7 @@ pub struct Url {
/// A domain name or IP address. For example, `example.com`. /// A domain name or IP address. For example, `example.com`.
pub host: String, pub host: String,
/// A TCP port number, for example `8080`. /// A TCP port number, for example `8080`.
pub port: Option<String>, pub port: Option<u16>,
/// The path component of a URL, for example `/foo/bar?baz=qux#quz`. /// The path component of a URL, for example `/foo/bar?baz=qux#quz`.
pub path: Path, pub path: Path,
} }
@ -84,7 +84,7 @@ impl Url {
pub fn new(scheme: String, pub fn new(scheme: String,
user: Option<UserInfo>, user: Option<UserInfo>,
host: String, host: String,
port: Option<String>, port: Option<u16>,
path: String, path: String,
query: Query, query: Query,
fragment: Option<String>) fragment: Option<String>)
@ -113,16 +113,23 @@ impl Url {
let (scheme, rest) = try!(get_scheme(rawurl)); let (scheme, rest) = try!(get_scheme(rawurl));
// authority // authority
let (userinfo, host, port, rest) = try!(get_authority(rest.as_slice())); let (userinfo, host, port, rest) = try!(get_authority(rest));
// path // path
let has_authority = host.len() > 0; let has_authority = host.len() > 0;
let (path, rest) = try!(get_path(rest.as_slice(), has_authority)); let (path, rest) = try!(get_path(rest, has_authority));
// query and fragment // query and fragment
let (query, fragment) = try!(get_query_fragment(rest.as_slice())); let (query, fragment) = try!(get_query_fragment(rest));
Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) let url = Url::new(scheme.to_string(),
userinfo,
host.to_string(),
port,
path,
query,
fragment);
Ok(url)
} }
} }
@ -438,65 +445,50 @@ pub fn query_to_str(query: &Query) -> String {
}) })
} }
/** /// Returns a tuple of the URI scheme and the rest of the URI, or a parsing error.
* Returns a tuple of the URI scheme and the rest of the URI, or a parsing error. ///
* /// Does not include the separating `:` character.
* Does not include the separating `:` character. ///
* /// # Example
* # Example ///
* /// ```rust
* ```rust /// use url::get_scheme;
* use url::get_scheme; ///
* /// let scheme = match get_scheme("https://example.com/") {
* let scheme = match get_scheme("https://example.com/") { /// Ok((sch, _)) => sch,
* Ok((sch, _)) => sch, /// Err(_) => "(None)",
* Err(_) => "(None)".to_string(), /// };
* }; /// println!("Scheme in use: {}.", scheme); // Scheme in use: https.
* println!("Scheme in use: {}.", scheme); // Scheme in use: https. /// ```
* ``` pub fn get_scheme<'a>(rawurl: &'a str) -> DecodeResult<(&'a str, &'a str)> {
*/
pub fn get_scheme(rawurl: &str) -> DecodeResult<(String, String)> {
for (i,c) in rawurl.chars().enumerate() { for (i,c) in rawurl.chars().enumerate() {
match c { let result = match c {
'A' .. 'Z' | 'a' .. 'z' => continue, 'A' .. 'Z'
'0' .. '9' | '+' | '-' | '.' => { | 'a' .. 'z' => continue,
if i == 0 { '0' .. '9' | '+' | '-' | '.' => {
return Err("url: Scheme must begin with a \ if i != 0 { continue }
letter.".to_string());
}
continue;
}
':' => {
if i == 0 {
return Err("url: Scheme cannot be empty.".to_string());
} else {
return Ok((rawurl.slice(0,i).to_string(),
rawurl.slice(i+1,rawurl.len()).to_string()));
}
}
_ => {
return Err("url: Invalid character in scheme.".to_string());
}
}
};
return Err("url: Scheme must be terminated with a colon.".to_string());
}
#[deriving(Clone, PartialEq)] Err("url: Scheme must begin with a letter.".to_string())
enum Input { }
Digit, // all digits ':' => {
Hex, // digits and letters a-f if i == 0 {
Unreserved // all other legal characters Err("url: Scheme cannot be empty.".to_string())
} else {
Ok((rawurl.slice(0,i), rawurl.slice(i+1,rawurl.len())))
}
}
_ => Err("url: Invalid character in scheme.".to_string()),
};
return result;
}
Err("url: Scheme must be terminated with a colon.".to_string())
} }
// returns userinfo, host, port, and unparsed part, or an error // returns userinfo, host, port, and unparsed part, or an error
fn get_authority(rawurl: &str) -> fn get_authority<'a>(rawurl: &'a str) ->
DecodeResult<(Option<UserInfo>, String, Option<String>, String)> { DecodeResult<(Option<UserInfo>, &'a str, Option<u16>, &'a str)> {
if !rawurl.starts_with("//") {
// there is no authority.
return Ok((None, "".to_string(), None, rawurl.to_str()));
}
enum State { enum State {
Start, // starting state Start, // starting state
PassHostPort, // could be in user or port PassHostPort, // could be in user or port
@ -506,12 +498,24 @@ fn get_authority(rawurl: &str) ->
InPort // are in port InPort // are in port
} }
#[deriving(Clone, PartialEq)]
enum Input {
Digit, // all digits
Hex, // digits and letters a-f
Unreserved // all other legal characters
}
if !rawurl.starts_with("//") {
// there is no authority.
return Ok((None, "", None, rawurl));
}
let len = rawurl.len(); let len = rawurl.len();
let mut st = Start; let mut st = Start;
let mut input = Digit; // most restricted, start here. let mut input = Digit; // most restricted, start here.
let mut userinfo = None; let mut userinfo = None;
let mut host = "".to_string(); let mut host = "";
let mut port = None; let mut port = None;
let mut colon_count = 0u; let mut colon_count = 0u;
@ -519,27 +523,27 @@ fn get_authority(rawurl: &str) ->
let mut begin = 2; let mut begin = 2;
let mut end = len; let mut end = len;
for (i,c) in rawurl.chars().enumerate() { for (i,c) in rawurl.chars().enumerate()
if i < 2 { continue; } // ignore the leading // // ignore the leading '//' handled by early return
.skip(2) {
// deal with input class first // deal with input class first
match c { match c {
'0' .. '9' => (), '0' .. '9' => (),
'A' .. 'F' | 'a' .. 'f' => { 'A' .. 'F'
if input == Digit { | 'a' .. 'f' => {
input = Hex; if input == Digit {
input = Hex;
}
} }
} 'G' .. 'Z'
'G' .. 'Z' | 'g' .. 'z' | '-' | '.' | '_' | '~' | '%' | | 'g' .. 'z'
'&' |'\'' | '(' | ')' | '+' | '!' | '*' | ',' | ';' | '=' => { | '-' | '.' | '_' | '~' | '%'
input = Unreserved; | '&' |'\'' | '(' | ')' | '+'
} | '!' | '*' | ',' | ';' | '=' => input = Unreserved,
':' | '@' | '?' | '#' | '/' => { ':' | '@' | '?' | '#' | '/' => {
// separators, don't change anything // separators, don't change anything
} }
_ => { _ => return Err("Illegal character in authority".to_string()),
return Err("Illegal character in authority".to_string());
}
} }
// now process states // now process states
@ -563,7 +567,7 @@ fn get_authority(rawurl: &str) ->
pos = i; pos = i;
if input == Unreserved { if input == Unreserved {
// must be port // must be port
host = rawurl.slice(begin, i).to_string(); host = rawurl.slice(begin, i);
st = InPort; st = InPort;
} else { } else {
// can't be sure whether this is an ipv6 address or a port // can't be sure whether this is an ipv6 address or a port
@ -572,21 +576,18 @@ fn get_authority(rawurl: &str) ->
} }
Ip6Port => { Ip6Port => {
if input == Unreserved { if input == Unreserved {
return Err("Illegal characters in \ return Err("Illegal characters in authority.".to_string());
authority.".to_string());
} }
st = Ip6Host; st = Ip6Host;
} }
Ip6Host => { Ip6Host => {
if colon_count > 7 { if colon_count > 7 {
host = rawurl.slice(begin, i).to_string(); host = rawurl.slice(begin, i);
pos = i; pos = i;
st = InPort; st = InPort;
} }
} }
_ => { _ => return Err("Invalid ':' in authority.".to_string()),
return Err("Invalid ':' in authority.".to_string());
}
} }
input = Digit; // reset input class input = Digit; // reset input class
} }
@ -606,9 +607,7 @@ fn get_authority(rawurl: &str) ->
userinfo = Some(UserInfo::new(user, Some(pass))); userinfo = Some(UserInfo::new(user, Some(pass)));
st = InHost; st = InHost;
} }
_ => { _ => return Err("Invalid '@' in authority.".to_string()),
return Err("Invalid '@' in authority.".to_string());
}
} }
begin = i+1; begin = i+1;
} }
@ -623,43 +622,53 @@ fn get_authority(rawurl: &str) ->
// finish up // finish up
match st { match st {
Start => { Start => host = rawurl.slice(begin, end),
host = rawurl.slice(begin, end).to_string(); PassHostPort
} | Ip6Port => {
PassHostPort | Ip6Port => {
if input != Digit { if input != Digit {
return Err("Non-digit characters in port.".to_string()); return Err("Non-digit characters in port.".to_string());
} }
host = rawurl.slice(begin, pos).to_string(); host = rawurl.slice(begin, pos);
port = Some(rawurl.slice(pos+1, end).to_string()); port = Some(rawurl.slice(pos+1, end));
}
Ip6Host | InHost => {
host = rawurl.slice(begin, end).to_string();
} }
Ip6Host
| InHost => host = rawurl.slice(begin, end),
InPort => { InPort => {
if input != Digit { if input != Digit {
return Err("Non-digit characters in port.".to_string()); return Err("Non-digit characters in port.".to_string());
} }
port = Some(rawurl.slice(pos+1, end).to_string()); port = Some(rawurl.slice(pos+1, end));
} }
} }
let rest = rawurl.slice(end, len).to_string(); let rest = rawurl.slice(end, len);
return Ok((userinfo, host, port, rest)); // If we have a port string, ensure it parses to u16.
let port = match port {
None => None,
opt => match opt.and_then(|p| FromStr::from_str(p)) {
None => return Err(format!("Failed to parse port: {}", port)),
opt => opt
}
};
Ok((userinfo, host, port, rest))
} }
// returns the path and unparsed part of url, or an error // returns the path and unparsed part of url, or an error
fn get_path(rawurl: &str, authority: bool) -> DecodeResult<(String, String)> { fn get_path<'a>(rawurl: &'a str, is_authority: bool)
-> DecodeResult<(String, &'a str)> {
let len = rawurl.len(); let len = rawurl.len();
let mut end = len; let mut end = len;
for (i,c) in rawurl.chars().enumerate() { for (i,c) in rawurl.chars().enumerate() {
match c { match c {
'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '&' |'\'' | '(' | ')' | '.' 'A' .. 'Z'
| '@' | ':' | '%' | '/' | '+' | '!' | '*' | ',' | ';' | '=' | 'a' .. 'z'
| '_' | '-' | '~' => { | '0' .. '9'
continue; | '&' |'\'' | '(' | ')' | '.'
} | '@' | ':' | '%' | '/' | '+'
| '!' | '*' | ',' | ';' | '='
| '_' | '-' | '~' => continue,
'?' | '#' => { '?' | '#' => {
end = i; end = i;
break; break;
@ -668,68 +677,53 @@ fn get_path(rawurl: &str, authority: bool) -> DecodeResult<(String, String)> {
} }
} }
if authority && end != 0 && !rawurl.starts_with("/") { if is_authority && end != 0 && !rawurl.starts_with("/") {
Err("Non-empty path must begin with \ Err("Non-empty path must begin with \
'/' in presence of authority.".to_string()) '/' in presence of authority.".to_string())
} else { } else {
Ok((try!(decode_component(rawurl.slice(0, end))), Ok((try!(decode_component(rawurl.slice(0, end))),
rawurl.slice(end, len).to_string())) rawurl.slice(end, len)))
} }
} }
// returns the parsed query and the fragment, if present // returns the parsed query and the fragment, if present
fn get_query_fragment(rawurl: &str) -> DecodeResult<(Query, Option<String>)> { fn get_query_fragment(rawurl: &str) -> DecodeResult<(Query, Option<String>)> {
if !rawurl.starts_with("?") { let (before_fragment, raw_fragment) = split_char_first(rawurl, '#');
if rawurl.starts_with("#") {
let f = try!(decode_component(rawurl.slice(1, rawurl.len()))); // Parse the fragment if available
return Ok((vec!(), Some(f))); let fragment = match raw_fragment {
} else { "" => None,
return Ok((vec!(), None)); raw => Some(try!(decode_component(raw)))
}
}
let (q, r) = split_char_first(rawurl.slice(1, rawurl.len()), '#');
let f = if r.len() != 0 {
Some(try!(decode_component(r)))
} else {
None
}; };
Ok((try!(query_from_str(q)), f)) match before_fragment.slice_shift_char() {
(Some('?'), rest) => Ok((try!(query_from_str(rest)), fragment)),
(None, "") => Ok((vec!(), fragment)),
_ => Err(format!("Query didn't start with '?': '{}..'", before_fragment)),
}
} }
impl FromStr for Url { impl FromStr for Url {
fn from_str(s: &str) -> Option<Url> { fn from_str(s: &str) -> Option<Url> {
match Url::parse(s) { Url::parse(s).ok()
Ok(url) => Some(url),
Err(_) => None
}
} }
} }
impl FromStr for Path { impl FromStr for Path {
fn from_str(s: &str) -> Option<Path> { fn from_str(s: &str) -> Option<Path> {
match Path::parse(s) { Path::parse(s).ok()
Ok(path) => Some(path),
Err(_) => None
}
} }
} }
impl fmt::Show for Url { impl fmt::Show for Url {
/** /// Converts a URL from `Url` to string representation.
* Converts a URL from `Url` to string representation. ///
* /// # Returns
* # Arguments ///
* /// A string that contains the formatted URL. Note that this will usually
* `url` - a URL. /// be an inverse of `from_str` but might strip out unneeded separators;
* /// for example, "http://somehost.com?", when parsed and formatted, will
* # Returns /// result in just "http://somehost.com".
*
* A string that contains the formatted URL. Note that this will usually
* be an inverse of `from_str` but might strip out unneeded separators;
* for example, "http://somehost.com?", when parsed and formatted, will
* result in just "http://somehost.com".
*/
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
try!(write!(f, "{}:", self.scheme)); try!(write!(f, "{}:", self.scheme));
@ -797,40 +791,39 @@ fn test_get_authority() {
let (u, h, p, r) = get_authority( let (u, h, p, r) = get_authority(
"//user:pass@rust-lang.org/something").unwrap(); "//user:pass@rust-lang.org/something").unwrap();
assert_eq!(u, Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); assert_eq!(u, Some(UserInfo::new("user".to_string(), Some("pass".to_string()))));
assert_eq!(h, "rust-lang.org".to_string()); assert_eq!(h, "rust-lang.org");
assert!(p.is_none()); assert!(p.is_none());
assert_eq!(r, "/something".to_string()); assert_eq!(r, "/something");
let (u, h, p, r) = get_authority( let (u, h, p, r) = get_authority(
"//rust-lang.org:8000?something").unwrap(); "//rust-lang.org:8000?something").unwrap();
assert!(u.is_none()); assert!(u.is_none());
assert_eq!(h, "rust-lang.org".to_string()); assert_eq!(h, "rust-lang.org");
assert_eq!(p, Some("8000".to_string())); assert_eq!(p, Some(8000));
assert_eq!(r, "?something".to_string()); assert_eq!(r, "?something");
let (u, h, p, r) = get_authority( let (u, h, p, r) = get_authority("//rust-lang.org#blah").unwrap();
"//rust-lang.org#blah").unwrap();
assert!(u.is_none()); assert!(u.is_none());
assert_eq!(h, "rust-lang.org".to_string()); assert_eq!(h, "rust-lang.org");
assert!(p.is_none()); assert!(p.is_none());
assert_eq!(r, "#blah".to_string()); assert_eq!(r, "#blah");
// ipv6 tests // ipv6 tests
let (_, h, _, _) = get_authority( let (_, h, _, _) = get_authority(
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah").unwrap(); "//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah").unwrap();
assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334".to_string()); assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334");
let (_, h, p, _) = get_authority( let (_, h, p, _) = get_authority(
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah").unwrap(); "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah").unwrap();
assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334".to_string()); assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334");
assert_eq!(p, Some("8000".to_string())); assert_eq!(p, Some(8000));
let (u, h, p, _) = get_authority( let (u, h, p, _) = get_authority(
"//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah" "//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah"
).unwrap(); ).unwrap();
assert_eq!(u, Some(UserInfo::new("us".to_string(), Some("p".to_string())))); assert_eq!(u, Some(UserInfo::new("us".to_string(), Some("p".to_string()))));
assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334".to_string()); assert_eq!(h, "2001:0db8:85a3:0042:0000:8a2e:0370:7334");
assert_eq!(p, Some("8000".to_string())); assert_eq!(p, Some(8000));
// invalid authorities; // invalid authorities;
assert!(get_authority("//user:pass@rust-lang:something").is_err()); assert!(get_authority("//user:pass@rust-lang:something").is_err());
@ -839,25 +832,27 @@ fn test_get_authority() {
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a").is_err()); "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a").is_err());
assert!(get_authority( assert!(get_authority(
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00").is_err()); "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00").is_err());
// outside u16 range
assert!(get_authority("//user:pass@rust-lang:65536").is_err());
// these parse as empty, because they don't start with '//' // these parse as empty, because they don't start with '//'
let (_, h, _, _) = get_authority("user:pass@rust-lang").unwrap(); let (_, h, _, _) = get_authority("user:pass@rust-lang").unwrap();
assert_eq!(h, "".to_string()); assert_eq!(h, "");
let (_, h, _, _) = get_authority("rust-lang.org").unwrap(); let (_, h, _, _) = get_authority("rust-lang.org").unwrap();
assert_eq!(h, "".to_string()); assert_eq!(h, "");
} }
#[test] #[test]
fn test_get_path() { fn test_get_path() {
let (p, r) = get_path("/something+%20orother", true).unwrap(); let (p, r) = get_path("/something+%20orother", true).unwrap();
assert_eq!(p, "/something+ orother".to_string()); assert_eq!(p, "/something+ orother".to_string());
assert_eq!(r, "".to_string()); assert_eq!(r, "");
let (p, r) = get_path("test@email.com#fragment", false).unwrap(); let (p, r) = get_path("test@email.com#fragment", false).unwrap();
assert_eq!(p, "test@email.com".to_string()); assert_eq!(p, "test@email.com".to_string());
assert_eq!(r, "#fragment".to_string()); assert_eq!(r, "#fragment");
let (p, r) = get_path("/gen/:addr=?q=v", false).unwrap(); let (p, r) = get_path("/gen/:addr=?q=v", false).unwrap();
assert_eq!(p, "/gen/:addr=".to_string()); assert_eq!(p, "/gen/:addr=".to_string());
assert_eq!(r, "?q=v".to_string()); assert_eq!(r, "?q=v");
//failure cases //failure cases
assert!(get_path("something?q", true).is_err()); assert!(get_path("something?q", true).is_err());
@ -879,7 +874,7 @@ mod tests {
assert_eq!(&u.scheme, &"http".to_string()); assert_eq!(&u.scheme, &"http".to_string());
assert_eq!(&u.user, &Some(UserInfo::new("user".to_string(), Some("pass".to_string())))); assert_eq!(&u.user, &Some(UserInfo::new("user".to_string(), Some("pass".to_string()))));
assert_eq!(&u.host, &"rust-lang.org".to_string()); assert_eq!(&u.host, &"rust-lang.org".to_string());
assert_eq!(&u.port, &Some("8080".to_string())); assert_eq!(&u.port, &Some(8080));
assert_eq!(&u.path.path, &"/doc/~u".to_string()); assert_eq!(&u.path.path, &"/doc/~u".to_string());
assert_eq!(&u.path.query, &vec!(("s".to_string(), "v".to_string()))); assert_eq!(&u.path.query, &vec!(("s".to_string(), "v".to_string())));
assert_eq!(&u.path.fragment, &Some("something".to_string())); assert_eq!(&u.path.fragment, &Some("something".to_string()));
@ -917,14 +912,14 @@ mod tests {
let url = from_str::<Url>(urlstr).unwrap(); let url = from_str::<Url>(urlstr).unwrap();
assert_eq!(&url.scheme, &"scheme".to_string()); assert_eq!(&url.scheme, &"scheme".to_string());
assert_eq!(&url.host, &"host".to_string()); assert_eq!(&url.host, &"host".to_string());
assert_eq!(&url.port, &Some("1234".to_string())); assert_eq!(&url.port, &Some(1234));
// is empty path really correct? Other tests think so // is empty path really correct? Other tests think so
assert_eq!(&url.path.path, &"".to_string()); assert_eq!(&url.path.path, &"".to_string());
let urlstr = "scheme://host:1234/"; let urlstr = "scheme://host:1234/";
let url = from_str::<Url>(urlstr).unwrap(); let url = from_str::<Url>(urlstr).unwrap();
assert_eq!(&url.scheme, &"scheme".to_string()); assert_eq!(&url.scheme, &"scheme".to_string());
assert_eq!(&url.host, &"host".to_string()); assert_eq!(&url.host, &"host".to_string());
assert_eq!(&url.port, &Some("1234".to_string())); assert_eq!(&url.port, &Some(1234));
assert_eq!(&url.path.path, &"/".to_string()); assert_eq!(&url.path.path, &"/".to_string());
} }