rust/src/libextra/net_url.rs

1072 lines
31 KiB
Rust
Raw Normal View History

// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Types/fns concerning URLs (see RFC 3986)
#[allow(missing_doc)];
use core::prelude::*;
use core::iterator::IteratorUtil;
use core::cmp::Eq;
use core::io::{Reader, ReaderUtil};
use core::io;
use core::hashmap::HashMap;
use core::str;
use core::to_bytes;
use core::uint;
#[deriving(Clone, Eq)]
2012-09-02 22:59:22 -05:00
struct Url {
scheme: ~str,
2012-08-30 13:01:39 -05:00
user: Option<UserInfo>,
host: ~str,
2012-08-20 14:23:37 -05:00
port: Option<~str>,
path: ~str,
2012-08-30 13:01:39 -05:00
query: Query,
2012-08-20 14:23:37 -05:00
fragment: Option<~str>
2012-09-02 22:59:22 -05:00
}
#[deriving(Clone, Eq)]
struct UserInfo {
user: ~str,
2012-08-20 14:23:37 -05:00
pass: Option<~str>
}
2012-10-03 18:43:56 -05:00
pub type Query = ~[(~str, ~str)];
impl Url {
pub fn new(scheme: ~str,
user: Option<UserInfo>,
host: ~str,
port: Option<~str>,
path: ~str,
query: Query,
fragment: Option<~str>)
-> Url {
Url {
scheme: scheme,
user: user,
host: host,
port: port,
path: path,
query: query,
fragment: fragment,
}
}
}
impl UserInfo {
pub fn new(user: ~str, pass: Option<~str>) -> UserInfo {
UserInfo { user: user, pass: pass }
}
}
fn encode_inner(s: &str, full_url: bool) -> ~str {
do io::with_str_reader(s) |rdr| {
let mut out = ~"";
while !rdr.eof() {
let ch = rdr.read_byte() as char;
match ch {
// unreserved:
2012-09-01 20:38:05 -05:00
'A' .. 'Z' |
'a' .. 'z' |
'0' .. '9' |
2012-08-03 21:59:04 -05:00
'-' | '.' | '_' | '~' => {
2012-09-21 20:36:32 -05:00
str::push_char(&mut out, ch);
}
2012-08-03 21:59:04 -05:00
_ => {
if full_url {
match ch {
// gen-delims:
':' | '/' | '?' | '#' | '[' | ']' | '@' |
// sub-delims:
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
2012-08-03 21:59:04 -05:00
'+' | ',' | ';' | '=' => {
2012-09-21 20:36:32 -05:00
str::push_char(&mut out, ch);
}
_ => out += fmt!("%%%X", ch as uint)
}
} else {
out += fmt!("%%%X", ch as uint);
}
}
}
}
out
}
}
2012-07-31 22:14:50 -05:00
/**
* Encodes a URI by replacing reserved characters with percent encoded
* character sequences.
*
* This function is compliant with RFC 3986.
*/
pub fn encode(s: &str) -> ~str {
encode_inner(s, true)
}
2012-07-31 22:14:50 -05:00
/**
* Encodes a URI component by replacing reserved characters with percent
* encoded character sequences.
*
* This function is compliant with RFC 3986.
*/
pub fn encode_component(s: &str) -> ~str {
encode_inner(s, false)
}
2012-09-02 23:43:20 -05:00
fn decode_inner(s: &str, full_url: bool) -> ~str {
do io::with_str_reader(s) |rdr| {
let mut out = ~"";
while !rdr.eof() {
match rdr.read_char() {
2012-08-03 21:59:04 -05:00
'%' => {
let bytes = rdr.read_bytes(2u);
let ch = uint::parse_bytes(bytes, 16u).get() as char;
if full_url {
// Only decode some characters:
match ch {
// gen-delims:
':' | '/' | '?' | '#' | '[' | ']' | '@' |
// sub-delims:
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
2012-08-03 21:59:04 -05:00
'+' | ',' | ';' | '=' => {
2012-09-21 20:36:32 -05:00
str::push_char(&mut out, '%');
str::push_char(&mut out, bytes[0u] as char);
str::push_char(&mut out, bytes[1u] as char);
}
2012-09-21 20:36:32 -05:00
ch => str::push_char(&mut out, ch)
}
} else {
2012-09-21 20:36:32 -05:00
str::push_char(&mut out, ch);
}
}
2012-09-21 20:36:32 -05:00
ch => str::push_char(&mut out, ch)
}
}
out
}
}
/**
* Decode a string encoded with percent encoding.
2012-07-31 22:14:50 -05:00
*
* This will only decode escape sequences generated by encode.
*/
pub fn decode(s: &str) -> ~str {
decode_inner(s, true)
}
2012-07-31 22:14:50 -05:00
/**
* Decode a string encoded with percent encoding.
*/
pub fn decode_component(s: &str) -> ~str {
decode_inner(s, false)
}
2012-09-02 23:43:20 -05:00
fn encode_plus(s: &str) -> ~str {
do io::with_str_reader(s) |rdr| {
let mut out = ~"";
while !rdr.eof() {
let ch = rdr.read_byte() as char;
match ch {
2012-09-01 20:38:05 -05:00
'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '.' | '-' => {
2012-09-21 20:36:32 -05:00
str::push_char(&mut out, ch);
}
2012-09-21 20:36:32 -05:00
' ' => str::push_char(&mut out, '+'),
_ => out += fmt!("%%%X", ch as uint)
}
}
out
}
}
2012-07-31 22:14:50 -05:00
/**
* Encode a hashmap to the 'application/x-www-form-urlencoded' media type.
*/
pub fn encode_form_urlencoded(m: &HashMap<~str, ~[~str]>) -> ~str {
let mut out = ~"";
let mut first = true;
for m.each |key, values| {
let key = encode_plus(*key);
2013-02-07 20:03:13 -06:00
for values.each |value| {
if first {
first = false;
} else {
2012-09-21 20:36:32 -05:00
str::push_char(&mut out, '&');
first = false;
}
out += fmt!("%s=%s", key, encode_plus(*value));
}
}
out
}
2012-07-31 22:14:50 -05:00
/**
* Decode a string encoded with the 'application/x-www-form-urlencoded' media
* type into a hashmap.
*/
pub fn decode_form_urlencoded(s: &[u8]) -> HashMap<~str, ~[~str]> {
do io::with_bytes_reader(s) |rdr| {
let mut m = HashMap::new();
let mut key = ~"";
let mut value = ~"";
let mut parsing_key = true;
while !rdr.eof() {
match rdr.read_char() {
'&' | ';' => {
if key != ~"" && value != ~"" {
let mut values = match m.pop(&key) {
2013-02-15 01:30:30 -06:00
Some(values) => values,
None => ~[],
};
values.push(value);
m.insert(key, values);
}
parsing_key = true;
key = ~"";
value = ~"";
}
'=' => parsing_key = false,
ch => {
let ch = match ch {
'%' => {
let bytes = rdr.read_bytes(2u);
uint::parse_bytes(bytes, 16u).get() as char
}
'+' => ' ',
ch => ch
};
if parsing_key {
str::push_char(&mut key, ch)
} else {
str::push_char(&mut value, ch)
}
}
}
}
if key != ~"" && value != ~"" {
let mut values = match m.pop(&key) {
2013-02-15 01:30:30 -06:00
Some(values) => values,
None => ~[],
};
values.push(value);
m.insert(key, values);
}
m
}
}
fn split_char_first(s: &str, c: char) -> (~str, ~str) {
let len = str::len(s);
let mut index = len;
let mut mat = 0;
do io::with_str_reader(s) |rdr| {
let mut ch;
while !rdr.eof() {
ch = rdr.read_byte() as char;
if ch == c {
// found a match, adjust markers
index = rdr.tell()-1;
mat = 1;
break;
}
}
}
if index+mat == len {
return (str::slice(s, 0, index).to_owned(), ~"");
} else {
return (str::slice(s, 0, index).to_owned(),
str::slice(s, index + mat, str::len(s)).to_owned());
}
}
fn userinfo_from_str(uinfo: &str) -> UserInfo {
let (user, p) = split_char_first(uinfo, ':');
let pass = if str::len(p) == 0 {
None
} else {
Some(p)
};
return UserInfo::new(user, pass);
}
fn userinfo_to_str(userinfo: &UserInfo) -> ~str {
match userinfo.pass {
Some(ref pass) => fmt!("%s:%s@", userinfo.user, *pass),
None => fmt!("%s@", userinfo.user),
}
}
fn query_from_str(rawquery: &str) -> Query {
2012-08-30 13:01:39 -05:00
let mut query: Query = ~[];
if str::len(rawquery) != 0 {
for str::each_split_char(rawquery, '&') |p| {
let (k, v) = split_char_first(p, '=');
query.push((decode_component(k), decode_component(v)));
};
}
2012-08-01 19:30:05 -05:00
return query;
}
pub fn query_to_str(query: &Query) -> ~str {
let mut strvec = ~[];
for query.each |kv| {
match kv {
&(ref k, ref v) => {
strvec.push(fmt!("%s=%s",
encode_component(*k),
encode_component(*v))
);
}
}
}
return str::connect(strvec, "&");
}
// returns the scheme and the rest of the url, or a parsing error
pub fn get_scheme(rawurl: &str) -> Result<(~str, ~str), ~str> {
for rawurl.iter().enumerate().advance |(i,c)| {
match c {
'A' .. 'Z' | 'a' .. 'z' => loop,
2012-09-01 20:38:05 -05:00
'0' .. '9' | '+' | '-' | '.' => {
if i == 0 {
return Err(~"url: Scheme must begin with a letter.");
}
loop;
}
2012-08-03 21:59:04 -05:00
':' => {
if i == 0 {
return Err(~"url: Scheme cannot be empty.");
} else {
return Ok((rawurl.slice(0,i).to_owned(),
rawurl.slice(i+1,str::len(rawurl)).to_owned()));
}
}
2012-08-03 21:59:04 -05:00
_ => {
return Err(~"url: Invalid character in scheme.");
}
}
};
return Err(~"url: Scheme must be terminated with a colon.");
}
#[deriving(Clone, Eq)]
2012-08-30 13:01:39 -05:00
enum Input {
Digit, // all digits
Hex, // digits and letters a-f
Unreserved // all other legal characters
2012-08-27 18:26:35 -05:00
}
// returns userinfo, host, port, and unparsed part, or an error
fn get_authority(rawurl: &str) ->
Result<(Option<UserInfo>, ~str, Option<~str>, ~str), ~str> {
if !str::starts_with(rawurl, "//") {
// there is no authority.
return Ok((None, ~"", None, rawurl.to_str()));
}
2012-08-30 13:01:39 -05:00
enum State {
Start, // starting state
PassHostPort, // could be in user or port
Ip6Port, // either in ipv6 host or port
Ip6Host, // are in an ipv6 host
InHost, // are in a host - may be ipv6, but don't know yet
InPort // are in port
}
2012-08-27 18:26:35 -05:00
let len = rawurl.len();
let mut st = Start;
let mut in = Digit; // most restricted, start here.
let mut userinfo = None;
let mut host = ~"";
let mut port = None;
let mut colon_count = 0;
let mut (pos, begin, end) = (0, 2, len);
for rawurl.iter().enumerate().advance |(i,c)| {
if i < 2 { loop; } // ignore the leading //
// deal with input class first
match c {
2012-09-01 20:38:05 -05:00
'0' .. '9' => (),
'A' .. 'F' | 'a' .. 'f' => {
2012-08-30 13:01:39 -05:00
if in == Digit {
in = Hex;
}
}
2012-09-01 20:38:05 -05:00
'G' .. 'Z' | 'g' .. 'z' | '-' | '.' | '_' | '~' | '%' |
2012-08-03 21:59:04 -05:00
'&' |'\'' | '(' | ')' | '+' | '!' | '*' | ',' | ';' | '=' => {
2012-08-30 13:01:39 -05:00
in = Unreserved;
}
2012-08-03 21:59:04 -05:00
':' | '@' | '?' | '#' | '/' => {
// separators, don't change anything
}
2012-08-03 21:59:04 -05:00
_ => {
return Err(~"Illegal character in authority");
}
}
2012-07-31 22:14:50 -05:00
// now process states
match c {
2012-08-03 21:59:04 -05:00
':' => {
colon_count += 1;
match st {
2012-08-30 13:01:39 -05:00
Start => {
pos = i;
2012-08-30 13:01:39 -05:00
st = PassHostPort;
}
2012-08-30 13:01:39 -05:00
PassHostPort => {
// multiple colons means ipv6 address.
2012-08-30 13:01:39 -05:00
if in == Unreserved {
return Err(
~"Illegal characters in IPv6 address.");
}
2012-08-30 13:01:39 -05:00
st = Ip6Host;
}
2012-08-30 13:01:39 -05:00
InHost => {
pos = i;
// can't be sure whether this is an ipv6 address or a port
2012-08-30 13:01:39 -05:00
if in == Unreserved {
return Err(~"Illegal characters in authority.");
}
2012-08-30 13:01:39 -05:00
st = Ip6Port;
}
2012-08-30 13:01:39 -05:00
Ip6Port => {
if in == Unreserved {
return Err(~"Illegal characters in authority.");
}
2012-08-30 13:01:39 -05:00
st = Ip6Host;
}
2012-08-30 13:01:39 -05:00
Ip6Host => {
if colon_count > 7 {
host = str::slice(rawurl, begin, i).to_owned();
pos = i;
2012-08-30 13:01:39 -05:00
st = InPort;
}
}
2012-08-03 21:59:04 -05:00
_ => {
return Err(~"Invalid ':' in authority.");
}
}
2012-08-30 13:01:39 -05:00
in = Digit; // reset input class
}
2012-08-03 21:59:04 -05:00
'@' => {
2012-08-30 13:01:39 -05:00
in = Digit; // reset input class
colon_count = 0; // reset count
match st {
2012-08-30 13:01:39 -05:00
Start => {
let user = str::slice(rawurl, begin, i).to_owned();
userinfo = Some(UserInfo::new(user, None));
2012-08-30 13:01:39 -05:00
st = InHost;
}
2012-08-30 13:01:39 -05:00
PassHostPort => {
let user = str::slice(rawurl, begin, pos).to_owned();
let pass = str::slice(rawurl, pos+1, i).to_owned();
userinfo = Some(UserInfo::new(user, Some(pass)));
2012-08-30 13:01:39 -05:00
st = InHost;
}
2012-08-03 21:59:04 -05:00
_ => {
return Err(~"Invalid '@' in authority.");
}
}
begin = i+1;
}
2012-07-31 22:14:50 -05:00
2012-08-03 21:59:04 -05:00
'?' | '#' | '/' => {
end = i;
break;
}
2012-08-03 21:59:04 -05:00
_ => ()
}
end = i;
}
2012-08-05 18:33:28 -05:00
let end = end; // make end immutable so it can be captured
let host_is_end_plus_one: &fn() -> bool = || {
2012-08-05 18:33:28 -05:00
end+1 == len
2012-09-25 19:39:22 -05:00
&& !['?', '#', '/'].contains(&(rawurl[end] as char))
2012-08-05 18:33:28 -05:00
};
// finish up
match st {
2012-08-30 13:01:39 -05:00
Start => {
2012-08-05 18:33:28 -05:00
if host_is_end_plus_one() {
host = str::slice(rawurl, begin, end+1).to_owned();
} else {
host = str::slice(rawurl, begin, end).to_owned();
}
}
2012-08-30 13:01:39 -05:00
PassHostPort | Ip6Port => {
if in != Digit {
return Err(~"Non-digit characters in port.");
}
host = str::slice(rawurl, begin, pos).to_owned();
port = Some(str::slice(rawurl, pos+1, end).to_owned());
}
2012-08-30 13:01:39 -05:00
Ip6Host | InHost => {
host = str::slice(rawurl, begin, end).to_owned();
}
2012-08-30 13:01:39 -05:00
InPort => {
if in != Digit {
return Err(~"Non-digit characters in port.");
}
port = Some(str::slice(rawurl, pos+1, end).to_owned());
}
}
2012-08-05 18:33:28 -05:00
let rest = if host_is_end_plus_one() { ~"" }
else { str::slice(rawurl, end, len).to_owned() };
return Ok((userinfo, host, port, rest));
}
// returns the path and unparsed part of url, or an error
fn get_path(rawurl: &str, authority: bool) ->
Result<(~str, ~str), ~str> {
let len = str::len(rawurl);
let mut end = len;
for rawurl.iter().enumerate().advance |(i,c)| {
match c {
2012-09-01 20:38:05 -05:00
'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '&' |'\'' | '(' | ')' | '.'
2012-08-06 15:12:49 -05:00
| '@' | ':' | '%' | '/' | '+' | '!' | '*' | ',' | ';' | '='
2012-08-06 17:17:08 -05:00
| '_' | '-' => {
loop;
}
2012-08-03 21:59:04 -05:00
'?' | '#' => {
end = i;
break;
}
_ => return Err(~"Invalid character in path.")
}
}
if authority {
if end != 0 && !str::starts_with(rawurl, "/") {
return Err(~"Non-empty path must begin with\
'/' in presence of authority.");
}
}
2012-07-31 22:14:50 -05:00
return Ok((decode_component(str::slice(rawurl, 0, end)),
str::slice(rawurl, end, len).to_owned()));
}
// returns the parsed query and the fragment, if present
fn get_query_fragment(rawurl: &str) ->
Result<(Query, Option<~str>), ~str> {
if !str::starts_with(rawurl, "?") {
if str::starts_with(rawurl, "#") {
let f = decode_component(str::slice(rawurl,
2012-07-31 22:14:50 -05:00
1,
str::len(rawurl)));
return Ok((~[], Some(f)));
} else {
return Ok((~[], None));
}
}
let (q, r) = split_char_first(str::slice(rawurl, 1, rawurl.len()), '#');
2012-07-31 22:14:50 -05:00
let f = if str::len(r) != 0 {
Some(decode_component(r)) } else { None };
return Ok((query_from_str(q), f));
}
/**
* Parse a `str` to a `url`
*
* # Arguments
*
* `rawurl` - a string representing a full url, including scheme.
*
* # Returns
*
* a `url` that contains the parsed representation of the url.
*
*/
pub fn from_str(rawurl: &str) -> Result<Url, ~str> {
// scheme
let (scheme, rest) = match get_scheme(rawurl) {
Ok(val) => val,
Err(e) => return Err(e),
};
// authority
let (userinfo, host, port, rest) = match get_authority(rest) {
Ok(val) => val,
Err(e) => return Err(e),
};
// path
let has_authority = if host == ~"" { false } else { true };
let (path, rest) = match get_path(rest, has_authority) {
Ok(val) => val,
Err(e) => return Err(e),
};
// query and fragment
let (query, fragment) = match get_query_fragment(rest) {
Ok(val) => val,
Err(e) => return Err(e),
};
Ok(Url::new(scheme, userinfo, host, port, path, query, fragment))
}
impl FromStr for Url {
fn from_str(s: &str) -> Option<Url> {
match from_str(s) {
2013-02-15 01:30:30 -06:00
Ok(url) => Some(url),
Err(_) => None
}
}
}
/**
* Format a `url` as a string
*
* # Arguments
*
* `url` - a url.
*
* # Returns
*
* a `str` that contains the formatted url. Note that this will usually
* be an inverse of `from_str` but might strip out unneeded separators.
* for example, "http://somehost.com?", when parsed and formatted, will
* result in just "http://somehost.com".
*
*/
pub fn to_str(url: &Url) -> ~str {
let user = match url.user {
Some(ref user) => userinfo_to_str(user),
None => ~"",
};
let authority = if url.host.is_empty() {
~""
} else {
fmt!("//%s%s", user, url.host)
};
let query = if url.query.is_empty() {
~""
} else {
fmt!("?%s", query_to_str(&url.query))
};
let fragment = match url.fragment {
Some(ref fragment) => fmt!("#%s", encode_component(*fragment)),
None => ~"",
};
fmt!("%s:%s%s%s%s", url.scheme, authority, url.path, query, fragment)
}
impl ToStr for Url {
pub fn to_str(&self) -> ~str {
to_str(self)
}
}
2013-05-02 17:33:27 -05:00
impl IterBytes for Url {
fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
self.to_str().iter_bytes(lsb0, f)
}
}
// Put a few tests outside of the 'test' module so they can test the internal
// functions and those functions don't need 'pub'
#[test]
fn test_split_char_first() {
2013-05-23 11:39:00 -05:00
let (u,v) = split_char_first("hello, sweet world", ',');
assert_eq!(u, ~"hello");
assert_eq!(v, ~" sweet world");
2013-05-23 11:39:00 -05:00
let (u,v) = split_char_first("hello sweet world", ',');
assert_eq!(u, ~"hello sweet world");
assert_eq!(v, ~"");
}
#[test]
fn test_get_authority() {
let (u, h, p, r) = get_authority(
"//user:pass@rust-lang.org/something").unwrap();
assert_eq!(u, Some(UserInfo::new(~"user", Some(~"pass"))));
assert_eq!(h, ~"rust-lang.org");
2013-03-28 20:39:09 -05:00
assert!(p.is_none());
assert_eq!(r, ~"/something");
let (u, h, p, r) = get_authority(
"//rust-lang.org:8000?something").unwrap();
2013-03-28 20:39:09 -05:00
assert!(u.is_none());
assert_eq!(h, ~"rust-lang.org");
assert_eq!(p, Some(~"8000"));
assert_eq!(r, ~"?something");
let (u, h, p, r) = get_authority(
"//rust-lang.org#blah").unwrap();
2013-03-28 20:39:09 -05:00
assert!(u.is_none());
assert_eq!(h, ~"rust-lang.org");
2013-03-28 20:39:09 -05:00
assert!(p.is_none());
assert_eq!(r, ~"#blah");
// ipv6 tests
let (_, h, _, _) = get_authority(
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah").unwrap();
assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334");
let (_, h, p, _) = get_authority(
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah").unwrap();
assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334");
assert_eq!(p, Some(~"8000"));
let (u, h, p, _) = get_authority(
"//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah"
).unwrap();
assert_eq!(u, Some(UserInfo::new(~"us", Some(~"p"))));
assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334");
assert_eq!(p, Some(~"8000"));
// invalid authorities;
2013-03-28 20:39:09 -05:00
assert!(get_authority("//user:pass@rust-lang:something").is_err());
assert!(get_authority("//user@rust-lang:something:/path").is_err());
assert!(get_authority(
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a").is_err());
2013-03-28 20:39:09 -05:00
assert!(get_authority(
"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00").is_err());
// these parse as empty, because they don't start with '//'
2013-05-23 11:39:00 -05:00
let (_, h, _, _) = get_authority("user:pass@rust-lang").unwrap();
assert_eq!(h, ~"");
2013-05-23 11:39:00 -05:00
let (_, h, _, _) = get_authority("rust-lang.org").unwrap();
assert_eq!(h, ~"");
}
#[test]
fn test_get_path() {
let (p, r) = get_path("/something+%20orother", true).unwrap();
assert_eq!(p, ~"/something+ orother");
assert_eq!(r, ~"");
let (p, r) = get_path("test@email.com#fragment", false).unwrap();
assert_eq!(p, ~"test@email.com");
assert_eq!(r, ~"#fragment");
2013-05-23 11:39:00 -05:00
let (p, r) = get_path("/gen/:addr=?q=v", false).unwrap();
assert_eq!(p, ~"/gen/:addr=");
assert_eq!(r, ~"?q=v");
//failure cases
2013-05-23 11:39:00 -05:00
assert!(get_path("something?q", true).is_err());
}
#[cfg(test)]
mod tests {
use net_url::*;
2012-12-28 14:46:08 -06:00
use core::hashmap::HashMap;
2012-12-28 14:46:08 -06:00
#[test]
fn test_url_parse() {
let url = ~"http://user:pass@rust-lang.org/doc?s=v#something";
2012-07-31 22:14:50 -05:00
let up = from_str(url);
let u = up.unwrap();
2013-03-28 20:39:09 -05:00
assert!(u.scheme == ~"http");
let userinfo = u.user.get_ref();
2013-03-28 20:39:09 -05:00
assert!(userinfo.user == ~"user");
assert!(userinfo.pass.get_ref() == &~"pass");
assert!(u.host == ~"rust-lang.org");
assert!(u.path == ~"/doc");
assert!(u.query == ~[(~"s", ~"v")]);
assert!(u.fragment.get_ref() == &~"something");
}
2012-08-05 18:33:28 -05:00
#[test]
fn test_url_parse_host_slash() {
2012-08-05 18:33:28 -05:00
let urlstr = ~"http://0.42.42.42/";
let url = from_str(urlstr).unwrap();
2013-03-28 20:39:09 -05:00
assert!(url.host == ~"0.42.42.42");
assert!(url.path == ~"/");
2012-08-05 18:33:28 -05:00
}
2012-08-06 15:12:49 -05:00
#[test]
fn test_url_with_underscores() {
2012-08-06 15:12:49 -05:00
let urlstr = ~"http://dotcom.com/file_name.html";
let url = from_str(urlstr).unwrap();
2013-03-28 20:39:09 -05:00
assert!(url.path == ~"/file_name.html");
2012-08-06 15:12:49 -05:00
}
2012-08-06 17:17:08 -05:00
#[test]
fn test_url_with_dashes() {
2012-08-06 17:17:08 -05:00
let urlstr = ~"http://dotcom.com/file-name.html";
let url = from_str(urlstr).unwrap();
2013-03-28 20:39:09 -05:00
assert!(url.path == ~"/file-name.html");
2012-08-06 17:17:08 -05:00
}
#[test]
fn test_no_scheme() {
2013-03-28 20:39:09 -05:00
assert!(get_scheme("noschemehere.html").is_err());
}
#[test]
fn test_invalid_scheme_errors() {
2013-03-28 20:39:09 -05:00
assert!(from_str("99://something").is_err());
assert!(from_str("://something").is_err());
}
#[test]
fn test_full_url_parse_and_format() {
let url = ~"http://user:pass@rust-lang.org/doc?s=v#something";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_userless_url_parse_and_format() {
let url = ~"http://rust-lang.org/doc?s=v#something";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_queryless_url_parse_and_format() {
let url = ~"http://user:pass@rust-lang.org/doc#something";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_empty_query_url_parse_and_format() {
let url = ~"http://user:pass@rust-lang.org/doc?#something";
let should_be = ~"http://user:pass@rust-lang.org/doc#something";
assert_eq!(from_str(url).unwrap().to_str(), should_be);
}
#[test]
fn test_fragmentless_url_parse_and_format() {
let url = ~"http://user:pass@rust-lang.org/doc?q=v";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_minimal_url_parse_and_format() {
let url = ~"http://rust-lang.org/doc";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_scheme_host_only_url_parse_and_format() {
let url = ~"http://rust-lang.org";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_pathless_url_parse_and_format() {
let url = ~"http://user:pass@rust-lang.org?q=v#something";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_scheme_host_fragment_only_url_parse_and_format() {
let url = ~"http://rust-lang.org#something";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_url_component_encoding() {
let url = ~"http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B";
let u = from_str(url).unwrap();
2013-03-28 20:39:09 -05:00
assert!(u.path == ~"/doc uments");
assert!(u.query == ~[(~"ba%d ", ~"#&+")]);
}
#[test]
fn test_url_without_authority() {
let url = ~"mailto:test@email.com";
assert_eq!(from_str(url).unwrap().to_str(), url);
}
#[test]
fn test_encode() {
assert_eq!(encode(""), ~"");
assert_eq!(encode("http://example.com"), ~"http://example.com");
assert_eq!(encode("foo bar% baz"), ~"foo%20bar%25%20baz");
assert_eq!(encode(" "), ~"%20");
assert_eq!(encode("!"), ~"!");
assert_eq!(encode("\""), ~"\"");
assert_eq!(encode("#"), ~"#");
assert_eq!(encode("$"), ~"$");
assert_eq!(encode("%"), ~"%25");
assert_eq!(encode("&"), ~"&");
assert_eq!(encode("'"), ~"%27");
assert_eq!(encode("("), ~"(");
assert_eq!(encode(")"), ~")");
assert_eq!(encode("*"), ~"*");
assert_eq!(encode("+"), ~"+");
assert_eq!(encode(","), ~",");
assert_eq!(encode("/"), ~"/");
assert_eq!(encode(":"), ~":");
assert_eq!(encode(";"), ~";");
assert_eq!(encode("="), ~"=");
assert_eq!(encode("?"), ~"?");
assert_eq!(encode("@"), ~"@");
assert_eq!(encode("["), ~"[");
assert_eq!(encode("]"), ~"]");
}
#[test]
fn test_encode_component() {
assert_eq!(encode_component(""), ~"");
2013-03-28 20:39:09 -05:00
assert!(encode_component("http://example.com") ==
~"http%3A%2F%2Fexample.com");
2013-03-28 20:39:09 -05:00
assert!(encode_component("foo bar% baz") ==
2013-03-06 21:09:17 -06:00
~"foo%20bar%25%20baz");
assert_eq!(encode_component(" "), ~"%20");
assert_eq!(encode_component("!"), ~"%21");
assert_eq!(encode_component("#"), ~"%23");
assert_eq!(encode_component("$"), ~"%24");
assert_eq!(encode_component("%"), ~"%25");
assert_eq!(encode_component("&"), ~"%26");
assert_eq!(encode_component("'"), ~"%27");
assert_eq!(encode_component("("), ~"%28");
assert_eq!(encode_component(")"), ~"%29");
assert_eq!(encode_component("*"), ~"%2A");
assert_eq!(encode_component("+"), ~"%2B");
assert_eq!(encode_component(","), ~"%2C");
assert_eq!(encode_component("/"), ~"%2F");
assert_eq!(encode_component(":"), ~"%3A");
assert_eq!(encode_component(";"), ~"%3B");
assert_eq!(encode_component("="), ~"%3D");
assert_eq!(encode_component("?"), ~"%3F");
assert_eq!(encode_component("@"), ~"%40");
assert_eq!(encode_component("["), ~"%5B");
assert_eq!(encode_component("]"), ~"%5D");
}
#[test]
fn test_decode() {
assert_eq!(decode(""), ~"");
assert_eq!(decode("abc/def 123"), ~"abc/def 123");
assert_eq!(decode("abc%2Fdef%20123"), ~"abc%2Fdef 123");
assert_eq!(decode("%20"), ~" ");
assert_eq!(decode("%21"), ~"%21");
assert_eq!(decode("%22"), ~"%22");
assert_eq!(decode("%23"), ~"%23");
assert_eq!(decode("%24"), ~"%24");
assert_eq!(decode("%25"), ~"%");
assert_eq!(decode("%26"), ~"%26");
assert_eq!(decode("%27"), ~"'");
assert_eq!(decode("%28"), ~"%28");
assert_eq!(decode("%29"), ~"%29");
assert_eq!(decode("%2A"), ~"%2A");
assert_eq!(decode("%2B"), ~"%2B");
assert_eq!(decode("%2C"), ~"%2C");
assert_eq!(decode("%2F"), ~"%2F");
assert_eq!(decode("%3A"), ~"%3A");
assert_eq!(decode("%3B"), ~"%3B");
assert_eq!(decode("%3D"), ~"%3D");
assert_eq!(decode("%3F"), ~"%3F");
assert_eq!(decode("%40"), ~"%40");
assert_eq!(decode("%5B"), ~"%5B");
assert_eq!(decode("%5D"), ~"%5D");
}
#[test]
fn test_decode_component() {
assert_eq!(decode_component(""), ~"");
assert_eq!(decode_component("abc/def 123"), ~"abc/def 123");
assert_eq!(decode_component("abc%2Fdef%20123"), ~"abc/def 123");
assert_eq!(decode_component("%20"), ~" ");
assert_eq!(decode_component("%21"), ~"!");
assert_eq!(decode_component("%22"), ~"\"");
assert_eq!(decode_component("%23"), ~"#");
assert_eq!(decode_component("%24"), ~"$");
assert_eq!(decode_component("%25"), ~"%");
assert_eq!(decode_component("%26"), ~"&");
assert_eq!(decode_component("%27"), ~"'");
assert_eq!(decode_component("%28"), ~"(");
assert_eq!(decode_component("%29"), ~")");
assert_eq!(decode_component("%2A"), ~"*");
assert_eq!(decode_component("%2B"), ~"+");
assert_eq!(decode_component("%2C"), ~",");
assert_eq!(decode_component("%2F"), ~"/");
assert_eq!(decode_component("%3A"), ~":");
assert_eq!(decode_component("%3B"), ~";");
assert_eq!(decode_component("%3D"), ~"=");
assert_eq!(decode_component("%3F"), ~"?");
assert_eq!(decode_component("%40"), ~"@");
assert_eq!(decode_component("%5B"), ~"[");
assert_eq!(decode_component("%5D"), ~"]");
}
#[test]
fn test_encode_form_urlencoded() {
let mut m = HashMap::new();
assert_eq!(encode_form_urlencoded(&m), ~"");
m.insert(~"", ~[]);
m.insert(~"foo", ~[]);
assert_eq!(encode_form_urlencoded(&m), ~"");
let mut m = HashMap::new();
m.insert(~"foo", ~[~"bar", ~"123"]);
assert_eq!(encode_form_urlencoded(&m), ~"foo=bar&foo=123");
let mut m = HashMap::new();
m.insert(~"foo bar", ~[~"abc", ~"12 = 34"]);
2013-03-28 20:39:09 -05:00
assert!(encode_form_urlencoded(&m) ==
2013-03-06 21:09:17 -06:00
~"foo+bar=abc&foo+bar=12+%3D+34");
}
#[test]
fn test_decode_form_urlencoded() {
// FIXME #4449: Commented out because this causes an ICE, but only
// on FreeBSD
/*
2013-05-23 11:39:00 -05:00
assert_eq!(decode_form_urlencoded([]).len(), 0);
let s = str::to_bytes("a=1&foo+bar=abc&foo+bar=12+%3D+34");
let form = decode_form_urlencoded(s);
assert_eq!(form.len(), 2);
assert_eq!(form.get_ref(&~"a"), &~[~"1"]);
assert_eq!(form.get_ref(&~"foo bar"), &~[~"abc", ~"12 = 34"]);
*/
}
}