2012-07-09 13:08:07 -05:00
|
|
|
//! Types/fns concerning URLs (see RFC 3986)
|
2012-09-02 23:43:20 -05:00
|
|
|
#[forbid(deprecated_mode)];
|
|
|
|
#[forbid(deprecated_pattern)];
|
2012-07-09 13:08:07 -05:00
|
|
|
|
2012-09-04 13:23:53 -05:00
|
|
|
use core::cmp::Eq;
|
|
|
|
use map::{hashmap, str_hash};
|
|
|
|
use io::{Reader, ReaderUtil};
|
|
|
|
use dvec::DVec;
|
|
|
|
use from_str::FromStr;
|
|
|
|
use result::{Err, Ok};
|
|
|
|
use to_str::ToStr;
|
2012-07-24 22:21:32 -05:00
|
|
|
|
2012-09-04 13:24:44 -05:00
|
|
|
export Url, Query;
|
2012-07-24 22:21:32 -05:00
|
|
|
export from_str, to_str;
|
|
|
|
export get_scheme;
|
2012-09-03 18:09:37 -05:00
|
|
|
export query_to_str;
|
2012-07-24 22:21:32 -05:00
|
|
|
|
|
|
|
export encode, decode;
|
|
|
|
export encode_component, decode_component;
|
|
|
|
export encode_form_urlencoded, decode_form_urlencoded;
|
2012-07-09 13:08:07 -05:00
|
|
|
|
2012-09-02 22:59:22 -05:00
|
|
|
struct Url {
|
2012-07-09 13:08:07 -05:00
|
|
|
scheme: ~str,
|
2012-08-30 13:01:39 -05:00
|
|
|
user: Option<UserInfo>,
|
2012-07-09 13:08:07 -05:00
|
|
|
host: ~str,
|
2012-08-20 14:23:37 -05:00
|
|
|
port: Option<~str>,
|
2012-07-09 13:08:07 -05:00
|
|
|
path: ~str,
|
2012-08-30 13:01:39 -05:00
|
|
|
query: Query,
|
2012-08-20 14:23:37 -05:00
|
|
|
fragment: Option<~str>
|
2012-09-02 22:59:22 -05:00
|
|
|
}
|
2012-07-09 13:08:07 -05:00
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
type UserInfo = {
|
2012-07-09 13:08:07 -05:00
|
|
|
user: ~str,
|
2012-08-20 14:23:37 -05:00
|
|
|
pass: Option<~str>
|
2012-07-09 13:08:07 -05:00
|
|
|
};
|
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
type Query = ~[(~str, ~str)];
|
2012-07-09 13:08:07 -05:00
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn Url(+scheme: ~str, +user: Option<UserInfo>, +host: ~str,
|
|
|
|
+port: Option<~str>, +path: ~str, +query: Query,
|
|
|
|
+fragment: Option<~str>) -> Url {
|
|
|
|
Url { scheme: move scheme, user: move user, host: move host,
|
|
|
|
port: move port, path: move path, query: move query,
|
|
|
|
fragment: move fragment }
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn UserInfo(+user: ~str, +pass: Option<~str>) -> UserInfo {
|
|
|
|
{user: move user, pass: move pass}
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-09-03 14:33:51 -05:00
|
|
|
fn encode_inner(s: &str, full_url: bool) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
do io::with_str_reader(s) |rdr| {
|
|
|
|
let mut out = ~"";
|
|
|
|
|
|
|
|
while !rdr.eof() {
|
|
|
|
let ch = rdr.read_byte() as char;
|
2012-08-01 17:04:33 -05:00
|
|
|
match ch {
|
2012-07-24 22:21:32 -05:00
|
|
|
// unreserved:
|
2012-09-01 20:38:05 -05:00
|
|
|
'A' .. 'Z' |
|
|
|
|
'a' .. 'z' |
|
|
|
|
'0' .. '9' |
|
2012-08-03 21:59:04 -05:00
|
|
|
'-' | '.' | '_' | '~' => {
|
2012-07-24 22:21:32 -05:00
|
|
|
str::push_char(out, ch);
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
_ => {
|
|
|
|
if full_url {
|
2012-08-01 17:04:33 -05:00
|
|
|
match ch {
|
2012-07-24 22:21:32 -05:00
|
|
|
// gen-delims:
|
|
|
|
':' | '/' | '?' | '#' | '[' | ']' | '@' |
|
|
|
|
|
|
|
|
// sub-delims:
|
|
|
|
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
|
2012-08-03 21:59:04 -05:00
|
|
|
'+' | ',' | ';' | '=' => {
|
2012-07-24 22:21:32 -05:00
|
|
|
str::push_char(out, ch);
|
|
|
|
}
|
|
|
|
|
2012-08-03 21:59:04 -05:00
|
|
|
_ => out += #fmt("%%%X", ch as uint)
|
2012-07-24 22:21:32 -05:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
out += #fmt("%%%X", ch as uint);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
/**
|
|
|
|
* Encodes a URI by replacing reserved characters with percent encoded
|
|
|
|
* character sequences.
|
2012-07-24 22:21:32 -05:00
|
|
|
*
|
|
|
|
* This function is compliant with RFC 3986.
|
|
|
|
*/
|
2012-09-03 14:33:51 -05:00
|
|
|
fn encode(s: &str) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
encode_inner(s, true)
|
|
|
|
}
|
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
/**
|
|
|
|
* Encodes a URI component by replacing reserved characters with percent
|
|
|
|
* encoded character sequences.
|
2012-07-24 22:21:32 -05:00
|
|
|
*
|
|
|
|
* This function is compliant with RFC 3986.
|
|
|
|
*/
|
2012-09-03 14:33:51 -05:00
|
|
|
fn encode_component(s: &str) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
encode_inner(s, false)
|
|
|
|
}
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn decode_inner(s: &str, full_url: bool) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
do io::with_str_reader(s) |rdr| {
|
|
|
|
let mut out = ~"";
|
|
|
|
|
|
|
|
while !rdr.eof() {
|
2012-08-01 17:04:33 -05:00
|
|
|
match rdr.read_char() {
|
2012-08-03 21:59:04 -05:00
|
|
|
'%' => {
|
2012-07-24 22:21:32 -05:00
|
|
|
let bytes = rdr.read_bytes(2u);
|
|
|
|
let ch = uint::parse_buf(bytes, 16u).get() as char;
|
|
|
|
|
|
|
|
if full_url {
|
|
|
|
// Only decode some characters:
|
2012-08-01 17:04:33 -05:00
|
|
|
match ch {
|
2012-07-24 22:21:32 -05:00
|
|
|
// gen-delims:
|
|
|
|
':' | '/' | '?' | '#' | '[' | ']' | '@' |
|
|
|
|
|
|
|
|
// sub-delims:
|
|
|
|
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
|
2012-08-03 21:59:04 -05:00
|
|
|
'+' | ',' | ';' | '=' => {
|
2012-07-24 22:21:32 -05:00
|
|
|
str::push_char(out, '%');
|
|
|
|
str::push_char(out, bytes[0u] as char);
|
|
|
|
str::push_char(out, bytes[1u] as char);
|
|
|
|
}
|
|
|
|
|
2012-08-03 21:59:04 -05:00
|
|
|
ch => str::push_char(out, ch)
|
2012-07-24 22:21:32 -05:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
str::push_char(out, ch);
|
|
|
|
}
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
ch => str::push_char(out, ch)
|
2012-07-24 22:21:32 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-25 18:22:59 -05:00
|
|
|
/**
|
|
|
|
* Decode a string encoded with percent encoding.
|
2012-07-31 22:14:50 -05:00
|
|
|
*
|
2012-07-24 22:21:32 -05:00
|
|
|
* This will only decode escape sequences generated by encode_uri.
|
|
|
|
*/
|
2012-09-02 23:43:20 -05:00
|
|
|
fn decode(s: &str) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
decode_inner(s, true)
|
|
|
|
}
|
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
/**
|
2012-07-25 18:22:59 -05:00
|
|
|
* Decode a string encoded with percent encoding.
|
2012-07-24 22:21:32 -05:00
|
|
|
*/
|
2012-09-02 23:43:20 -05:00
|
|
|
fn decode_component(s: &str) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
decode_inner(s, false)
|
|
|
|
}
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn encode_plus(s: &str) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
do io::with_str_reader(s) |rdr| {
|
|
|
|
let mut out = ~"";
|
|
|
|
|
|
|
|
while !rdr.eof() {
|
|
|
|
let ch = rdr.read_byte() as char;
|
2012-08-01 17:04:33 -05:00
|
|
|
match ch {
|
2012-09-01 20:38:05 -05:00
|
|
|
'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '.' | '-' => {
|
2012-07-24 22:21:32 -05:00
|
|
|
str::push_char(out, ch);
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
' ' => str::push_char(out, '+'),
|
|
|
|
_ => out += #fmt("%%%X", ch as uint)
|
2012-07-24 22:21:32 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
/**
|
2012-07-25 18:22:59 -05:00
|
|
|
* Encode a hashmap to the 'application/x-www-form-urlencoded' media type.
|
2012-07-24 22:21:32 -05:00
|
|
|
*/
|
2012-08-14 18:54:13 -05:00
|
|
|
fn encode_form_urlencoded(m: hashmap<~str, @DVec<@~str>>) -> ~str {
|
2012-07-24 22:21:32 -05:00
|
|
|
let mut out = ~"";
|
|
|
|
let mut first = true;
|
|
|
|
|
|
|
|
for m.each |key, values| {
|
|
|
|
let key = encode_plus(key);
|
|
|
|
|
|
|
|
for (*values).each |value| {
|
|
|
|
if first {
|
|
|
|
first = false;
|
|
|
|
} else {
|
|
|
|
str::push_char(out, '&');
|
|
|
|
first = false;
|
|
|
|
}
|
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
out += #fmt("%s=%s", key, encode_plus(*value));
|
2012-07-24 22:21:32 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
/**
|
2012-07-25 18:22:59 -05:00
|
|
|
* Decode a string encoded with the 'application/x-www-form-urlencoded' media
|
2012-07-24 22:21:32 -05:00
|
|
|
* type into a hashmap.
|
|
|
|
*/
|
2012-07-31 22:14:50 -05:00
|
|
|
fn decode_form_urlencoded(s: ~[u8]) ->
|
2012-08-14 18:54:13 -05:00
|
|
|
map::hashmap<~str, @dvec::DVec<@~str>> {
|
2012-07-24 22:21:32 -05:00
|
|
|
do io::with_bytes_reader(s) |rdr| {
|
|
|
|
let m = str_hash();
|
|
|
|
let mut key = ~"";
|
|
|
|
let mut value = ~"";
|
|
|
|
let mut parsing_key = true;
|
|
|
|
|
|
|
|
while !rdr.eof() {
|
2012-08-01 17:04:33 -05:00
|
|
|
match rdr.read_char() {
|
2012-08-03 21:59:04 -05:00
|
|
|
'&' | ';' => {
|
2012-07-24 22:21:32 -05:00
|
|
|
if key != ~"" && value != ~"" {
|
2012-08-01 17:04:33 -05:00
|
|
|
let values = match m.find(key) {
|
2012-08-20 14:23:37 -05:00
|
|
|
Some(values) => values,
|
|
|
|
None => {
|
2012-08-27 16:22:25 -05:00
|
|
|
let values = @DVec();
|
2012-07-24 22:21:32 -05:00
|
|
|
m.insert(key, values);
|
|
|
|
values
|
|
|
|
}
|
|
|
|
};
|
|
|
|
(*values).push(@value)
|
|
|
|
}
|
|
|
|
|
|
|
|
parsing_key = true;
|
|
|
|
key = ~"";
|
|
|
|
value = ~"";
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
'=' => parsing_key = false,
|
|
|
|
ch => {
|
2012-08-01 17:04:33 -05:00
|
|
|
let ch = match ch {
|
2012-08-03 21:59:04 -05:00
|
|
|
'%' => {
|
2012-07-24 22:21:32 -05:00
|
|
|
uint::parse_buf(rdr.read_bytes(2u), 16u).get() as char
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
'+' => ' ',
|
|
|
|
ch => ch
|
2012-07-24 22:21:32 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
if parsing_key {
|
|
|
|
str::push_char(key, ch)
|
|
|
|
} else {
|
|
|
|
str::push_char(value, ch)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if key != ~"" && value != ~"" {
|
2012-08-01 17:04:33 -05:00
|
|
|
let values = match m.find(key) {
|
2012-08-20 14:23:37 -05:00
|
|
|
Some(values) => values,
|
|
|
|
None => {
|
2012-08-27 16:22:25 -05:00
|
|
|
let values = @DVec();
|
2012-07-24 22:21:32 -05:00
|
|
|
m.insert(key, values);
|
|
|
|
values
|
|
|
|
}
|
|
|
|
};
|
|
|
|
(*values).push(@value)
|
|
|
|
}
|
|
|
|
|
|
|
|
m
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn split_char_first(s: &str, c: char) -> (~str, ~str) {
|
2012-07-25 21:40:31 -05:00
|
|
|
let len = str::len(s);
|
|
|
|
let mut index = len;
|
2012-08-01 17:04:33 -05:00
|
|
|
let mut mat = 0;
|
2012-07-25 21:40:31 -05:00
|
|
|
do io::with_str_reader(s) |rdr| {
|
|
|
|
let mut ch : char;
|
|
|
|
while !rdr.eof() {
|
|
|
|
ch = rdr.read_byte() as char;
|
|
|
|
if ch == c {
|
|
|
|
// found a match, adjust markers
|
|
|
|
index = rdr.tell()-1;
|
2012-08-01 17:04:33 -05:00
|
|
|
mat = 1;
|
2012-07-25 21:40:31 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-08-01 17:04:33 -05:00
|
|
|
if index+mat == len {
|
2012-07-25 21:40:31 -05:00
|
|
|
return (str::slice(s, 0, index), ~"");
|
2012-07-09 13:08:07 -05:00
|
|
|
} else {
|
2012-07-31 22:14:50 -05:00
|
|
|
return (str::slice(s, 0, index),
|
2012-08-01 17:04:33 -05:00
|
|
|
str::slice(s, index + mat, str::len(s)));
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn userinfo_from_str(uinfo: &str) -> UserInfo {
|
2012-07-09 13:08:07 -05:00
|
|
|
let (user, p) = split_char_first(uinfo, ':');
|
|
|
|
let pass = if str::len(p) == 0 {
|
2012-08-20 14:23:37 -05:00
|
|
|
option::None
|
2012-07-09 13:08:07 -05:00
|
|
|
} else {
|
2012-08-20 14:23:37 -05:00
|
|
|
option::Some(p)
|
2012-07-09 13:08:07 -05:00
|
|
|
};
|
2012-09-04 13:24:44 -05:00
|
|
|
return UserInfo(user, pass);
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn userinfo_to_str(+userinfo: UserInfo) -> ~str {
|
2012-07-09 13:08:07 -05:00
|
|
|
if option::is_some(userinfo.pass) {
|
2012-08-01 19:30:05 -05:00
|
|
|
return str::concat(~[copy userinfo.user, ~":",
|
2012-07-09 13:08:07 -05:00
|
|
|
option::unwrap(copy userinfo.pass),
|
|
|
|
~"@"]);
|
|
|
|
} else {
|
2012-08-01 19:30:05 -05:00
|
|
|
return str::concat(~[copy userinfo.user, ~"@"]);
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
impl UserInfo : Eq {
|
|
|
|
pure fn eq(&&other: UserInfo) -> bool {
|
2012-08-27 18:26:35 -05:00
|
|
|
self.user == other.user && self.pass == other.pass
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn query_from_str(rawquery: &str) -> Query {
|
2012-08-30 13:01:39 -05:00
|
|
|
let mut query: Query = ~[];
|
2012-07-09 13:08:07 -05:00
|
|
|
if str::len(rawquery) != 0 {
|
|
|
|
for str::split_char(rawquery, '&').each |p| {
|
|
|
|
let (k, v) = split_char_first(p, '=');
|
2012-07-24 22:21:32 -05:00
|
|
|
vec::push(query, (decode_component(k), decode_component(v)));
|
2012-07-09 13:08:07 -05:00
|
|
|
};
|
|
|
|
}
|
2012-08-01 19:30:05 -05:00
|
|
|
return query;
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-09-02 23:43:20 -05:00
|
|
|
fn query_to_str(+query: Query) -> ~str {
|
2012-07-09 13:08:07 -05:00
|
|
|
let mut strvec = ~[];
|
2012-07-27 14:34:55 -05:00
|
|
|
for query.each |kv| {
|
2012-07-31 21:44:37 -05:00
|
|
|
let (k, v) = copy kv;
|
2012-07-24 22:21:32 -05:00
|
|
|
strvec += ~[#fmt("%s=%s", encode_component(k), encode_component(v))];
|
2012-07-09 13:08:07 -05:00
|
|
|
};
|
2012-08-01 19:30:05 -05:00
|
|
|
return str::connect(strvec, ~"&");
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
// returns the scheme and the rest of the url, or a parsing error
|
2012-09-03 19:01:34 -05:00
|
|
|
fn get_scheme(rawurl: &str) -> result::Result<(~str, ~str), @~str> {
|
2012-07-09 13:08:07 -05:00
|
|
|
for str::each_chari(rawurl) |i,c| {
|
2012-08-01 17:04:33 -05:00
|
|
|
match c {
|
2012-09-01 20:38:05 -05:00
|
|
|
'A' .. 'Z' | 'a' .. 'z' => again,
|
|
|
|
'0' .. '9' | '+' | '-' | '.' => {
|
2012-07-25 21:40:31 -05:00
|
|
|
if i == 0 {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"url: Scheme must begin with a letter.");
|
2012-07-25 21:40:31 -05:00
|
|
|
}
|
2012-07-09 13:08:07 -05:00
|
|
|
again;
|
2012-07-25 21:40:31 -05:00
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
':' => {
|
2012-07-25 21:40:31 -05:00
|
|
|
if i == 0 {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"url: Scheme cannot be empty.");
|
2012-07-25 21:40:31 -05:00
|
|
|
} else {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Ok((rawurl.slice(0,i),
|
2012-07-31 21:44:37 -05:00
|
|
|
rawurl.slice(i+1,str::len(rawurl))));
|
2012-07-25 21:40:31 -05:00
|
|
|
}
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
_ => {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"url: Invalid character in scheme.");
|
2012-07-25 21:40:31 -05:00
|
|
|
}
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
};
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"url: Scheme must be terminated with a colon.");
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
enum Input {
|
|
|
|
Digit, // all digits
|
|
|
|
Hex, // digits and letters a-f
|
|
|
|
Unreserved // all other legal characters
|
2012-08-27 18:26:35 -05:00
|
|
|
}
|
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
impl Input: Eq {
|
|
|
|
pure fn eq(&&other: Input) -> bool {
|
2012-08-27 18:26:35 -05:00
|
|
|
match (self, other) {
|
2012-08-30 13:01:39 -05:00
|
|
|
(Digit, Digit) => true,
|
|
|
|
(Hex, Hex) => true,
|
|
|
|
(Unreserved, Unreserved) => true,
|
|
|
|
(Digit, _) => false,
|
|
|
|
(Hex, _) => false,
|
|
|
|
(Unreserved, _) => false
|
2012-08-27 18:26:35 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
// returns userinfo, host, port, and unparsed part, or an error
|
2012-09-03 19:01:34 -05:00
|
|
|
fn get_authority(rawurl: &str) ->
|
2012-08-30 13:01:39 -05:00
|
|
|
result::Result<(Option<UserInfo>, ~str, Option<~str>, ~str), @~str> {
|
2012-07-31 21:44:37 -05:00
|
|
|
if !str::starts_with(rawurl, ~"//") {
|
|
|
|
// there is no authority.
|
2012-09-03 19:01:34 -05:00
|
|
|
return result::Ok((option::None, ~"", option::None, rawurl.to_str()));
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
enum State {
|
|
|
|
Start, // starting state
|
|
|
|
PassHostPort, // could be in user or port
|
|
|
|
Ip6Port, // either in ipv6 host or port
|
|
|
|
Ip6Host, // are in an ipv6 host
|
|
|
|
InHost, // are in a host - may be ipv6, but don't know yet
|
|
|
|
InPort // are in port
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-27 18:26:35 -05:00
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
let len = str::len(rawurl);
|
2012-08-30 13:01:39 -05:00
|
|
|
let mut st : State = Start;
|
|
|
|
let mut in : Input = Digit; // most restricted, start here.
|
2012-07-31 21:44:37 -05:00
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
let mut userinfo : Option<UserInfo> = option::None;
|
2012-07-31 21:44:37 -05:00
|
|
|
let mut host : ~str = ~"";
|
2012-08-20 14:23:37 -05:00
|
|
|
let mut port : option::Option<~str> = option::None;
|
2012-07-31 21:44:37 -05:00
|
|
|
|
|
|
|
let mut colon_count = 0;
|
2012-08-01 16:19:43 -05:00
|
|
|
let mut pos : uint = 0, begin : uint = 2, end : uint = len;
|
2012-07-31 21:44:37 -05:00
|
|
|
|
2012-08-01 16:19:43 -05:00
|
|
|
for str::each_chari(rawurl) |i,c| {
|
2012-07-31 21:44:37 -05:00
|
|
|
if i < 2 { again; } // ignore the leading //
|
|
|
|
|
|
|
|
// deal with input class first
|
2012-08-01 17:04:33 -05:00
|
|
|
match c {
|
2012-09-01 20:38:05 -05:00
|
|
|
'0' .. '9' => (),
|
|
|
|
'A' .. 'F' | 'a' .. 'f' => {
|
2012-08-30 13:01:39 -05:00
|
|
|
if in == Digit {
|
|
|
|
in = Hex;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
2012-09-01 20:38:05 -05:00
|
|
|
'G' .. 'Z' | 'g' .. 'z' | '-' | '.' | '_' | '~' | '%' |
|
2012-08-03 21:59:04 -05:00
|
|
|
'&' |'\'' | '(' | ')' | '+' | '!' | '*' | ',' | ';' | '=' => {
|
2012-08-30 13:01:39 -05:00
|
|
|
in = Unreserved;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
':' | '@' | '?' | '#' | '/' => {
|
2012-07-31 21:44:37 -05:00
|
|
|
// separators, don't change anything
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
_ => {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Illegal character in authority");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
// now process states
|
2012-08-01 17:04:33 -05:00
|
|
|
match c {
|
2012-08-03 21:59:04 -05:00
|
|
|
':' => {
|
2012-07-31 21:44:37 -05:00
|
|
|
colon_count += 1;
|
2012-08-01 17:04:33 -05:00
|
|
|
match st {
|
2012-08-30 13:01:39 -05:00
|
|
|
Start => {
|
2012-07-31 21:44:37 -05:00
|
|
|
pos = i;
|
2012-08-30 13:01:39 -05:00
|
|
|
st = PassHostPort;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
PassHostPort => {
|
2012-07-31 21:44:37 -05:00
|
|
|
// multiple colons means ipv6 address.
|
2012-08-30 13:01:39 -05:00
|
|
|
if in == Unreserved {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(
|
2012-08-03 13:45:52 -05:00
|
|
|
@~"Illegal characters in IPv6 address.");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
st = Ip6Host;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
InHost => {
|
2012-07-31 21:44:37 -05:00
|
|
|
pos = i;
|
|
|
|
// can't be sure whether this is an ipv6 address or a port
|
2012-08-30 13:01:39 -05:00
|
|
|
if in == Unreserved {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Illegal characters in authority.");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
st = Ip6Port;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
Ip6Port => {
|
|
|
|
if in == Unreserved {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Illegal characters in authority.");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
st = Ip6Host;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
Ip6Host => {
|
2012-07-31 21:44:37 -05:00
|
|
|
if colon_count > 7 {
|
|
|
|
host = str::slice(rawurl, begin, i);
|
|
|
|
pos = i;
|
2012-08-30 13:01:39 -05:00
|
|
|
st = InPort;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
_ => {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Invalid ':' in authority.");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
in = Digit; // reset input class
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
|
2012-08-03 21:59:04 -05:00
|
|
|
'@' => {
|
2012-08-30 13:01:39 -05:00
|
|
|
in = Digit; // reset input class
|
2012-07-31 21:44:37 -05:00
|
|
|
colon_count = 0; // reset count
|
2012-08-01 17:04:33 -05:00
|
|
|
match st {
|
2012-08-30 13:01:39 -05:00
|
|
|
Start => {
|
2012-07-31 21:44:37 -05:00
|
|
|
let user = str::slice(rawurl, begin, i);
|
2012-08-20 14:23:37 -05:00
|
|
|
userinfo = option::Some({user : user,
|
|
|
|
pass: option::None});
|
2012-08-30 13:01:39 -05:00
|
|
|
st = InHost;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
PassHostPort => {
|
2012-07-31 21:44:37 -05:00
|
|
|
let user = str::slice(rawurl, begin, pos);
|
|
|
|
let pass = str::slice(rawurl, pos+1, i);
|
2012-08-20 14:23:37 -05:00
|
|
|
userinfo = option::Some({user: user,
|
|
|
|
pass: option::Some(pass)});
|
2012-08-30 13:01:39 -05:00
|
|
|
st = InHost;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
_ => {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Invalid '@' in authority.");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
begin = i+1;
|
|
|
|
}
|
2012-07-31 22:14:50 -05:00
|
|
|
|
2012-08-03 21:59:04 -05:00
|
|
|
'?' | '#' | '/' => {
|
2012-08-01 16:19:43 -05:00
|
|
|
end = i;
|
2012-07-31 21:44:37 -05:00
|
|
|
break;
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
_ => ()
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-01 16:19:43 -05:00
|
|
|
end = i;
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
|
2012-08-05 18:33:28 -05:00
|
|
|
let end = end; // make end immutable so it can be captured
|
|
|
|
|
2012-09-03 19:01:34 -05:00
|
|
|
let host_is_end_plus_one: &fn() -> bool = || {
|
2012-08-05 18:33:28 -05:00
|
|
|
end+1 == len
|
|
|
|
&& !['?', '#', '/'].contains(rawurl[end] as char)
|
|
|
|
};
|
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
// finish up
|
2012-08-01 17:04:33 -05:00
|
|
|
match st {
|
2012-08-30 13:01:39 -05:00
|
|
|
Start => {
|
2012-08-05 18:33:28 -05:00
|
|
|
if host_is_end_plus_one() {
|
2012-08-01 16:19:43 -05:00
|
|
|
host = str::slice(rawurl, begin, end+1);
|
2012-07-31 21:44:37 -05:00
|
|
|
} else {
|
2012-08-01 16:19:43 -05:00
|
|
|
host = str::slice(rawurl, begin, end);
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
PassHostPort | Ip6Port => {
|
|
|
|
if in != Digit {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Non-digit characters in port.");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
host = str::slice(rawurl, begin, pos);
|
2012-08-20 14:23:37 -05:00
|
|
|
port = option::Some(str::slice(rawurl, pos+1, end));
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
Ip6Host | InHost => {
|
2012-08-01 16:19:43 -05:00
|
|
|
host = str::slice(rawurl, begin, end);
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-30 13:01:39 -05:00
|
|
|
InPort => {
|
|
|
|
if in != Digit {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Non-digit characters in port.");
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
2012-08-20 14:23:37 -05:00
|
|
|
port = option::Some(str::slice(rawurl, pos+1, end));
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-05 18:33:28 -05:00
|
|
|
let rest = if host_is_end_plus_one() { ~"" }
|
2012-08-01 16:19:43 -05:00
|
|
|
else { str::slice(rawurl, end, len) };
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Ok((userinfo, host, port, rest));
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// returns the path and unparsed part of url, or an error
|
2012-09-03 19:01:34 -05:00
|
|
|
fn get_path(rawurl: &str, authority : bool) ->
|
2012-08-26 18:54:31 -05:00
|
|
|
result::Result<(~str, ~str), @~str> {
|
2012-07-31 21:44:37 -05:00
|
|
|
let len = str::len(rawurl);
|
|
|
|
let mut end = len;
|
|
|
|
for str::each_chari(rawurl) |i,c| {
|
2012-08-01 17:04:33 -05:00
|
|
|
match c {
|
2012-09-01 20:38:05 -05:00
|
|
|
'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '&' |'\'' | '(' | ')' | '.'
|
2012-08-06 15:12:49 -05:00
|
|
|
| '@' | ':' | '%' | '/' | '+' | '!' | '*' | ',' | ';' | '='
|
2012-08-06 17:17:08 -05:00
|
|
|
| '_' | '-' => {
|
2012-07-31 21:44:37 -05:00
|
|
|
again;
|
|
|
|
}
|
2012-08-03 21:59:04 -05:00
|
|
|
'?' | '#' => {
|
2012-07-31 21:44:37 -05:00
|
|
|
end = i;
|
|
|
|
break;
|
|
|
|
}
|
2012-08-26 18:54:31 -05:00
|
|
|
_ => return result::Err(@~"Invalid character in path.")
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if authority {
|
|
|
|
if end != 0 && !str::starts_with(rawurl, ~"/") {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(@~"Non-empty path must begin with\
|
2012-07-31 21:44:37 -05:00
|
|
|
'/' in presence of authority.");
|
|
|
|
}
|
|
|
|
}
|
2012-07-31 22:14:50 -05:00
|
|
|
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Ok((decode_component(str::slice(rawurl, 0, end)),
|
2012-07-31 21:44:37 -05:00
|
|
|
str::slice(rawurl, end, len)));
|
|
|
|
}
|
|
|
|
|
|
|
|
// returns the parsed query and the fragment, if present
|
2012-09-03 19:01:34 -05:00
|
|
|
fn get_query_fragment(rawurl: &str) ->
|
2012-08-30 13:01:39 -05:00
|
|
|
result::Result<(Query, Option<~str>), @~str> {
|
2012-07-31 21:44:37 -05:00
|
|
|
if !str::starts_with(rawurl, ~"?") {
|
|
|
|
if str::starts_with(rawurl, ~"#") {
|
2012-07-31 22:14:50 -05:00
|
|
|
let f = decode_component(str::slice(rawurl,
|
|
|
|
1,
|
2012-07-31 21:44:37 -05:00
|
|
|
str::len(rawurl)));
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Ok((~[], option::Some(f)));
|
2012-07-31 21:44:37 -05:00
|
|
|
} else {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Ok((~[], option::None));
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
}
|
2012-07-31 22:14:50 -05:00
|
|
|
let (q, r) = split_char_first(str::slice(rawurl, 1,
|
2012-07-31 21:44:37 -05:00
|
|
|
str::len(rawurl)), '#');
|
2012-07-31 22:14:50 -05:00
|
|
|
let f = if str::len(r) != 0 {
|
2012-08-20 14:23:37 -05:00
|
|
|
option::Some(decode_component(r)) } else { option::None };
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Ok((query_from_str(q), f));
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
|
2012-07-09 13:08:07 -05:00
|
|
|
/**
|
|
|
|
* Parse a `str` to a `url`
|
|
|
|
*
|
|
|
|
* # Arguments
|
|
|
|
*
|
|
|
|
* `rawurl` - a string representing a full url, including scheme.
|
|
|
|
*
|
|
|
|
* # Returns
|
|
|
|
*
|
|
|
|
* a `url` that contains the parsed representation of the url.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2012-09-03 19:01:34 -05:00
|
|
|
fn from_str(rawurl: &str) -> result::Result<Url, ~str> {
|
2012-07-31 21:44:37 -05:00
|
|
|
// scheme
|
2012-07-09 13:08:07 -05:00
|
|
|
let mut schm = get_scheme(rawurl);
|
2012-07-25 21:40:31 -05:00
|
|
|
if result::is_err(schm) {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(copy *result::get_err(schm));
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
2012-07-25 21:40:31 -05:00
|
|
|
let (scheme, rest) = result::unwrap(schm);
|
2012-07-31 21:44:37 -05:00
|
|
|
|
|
|
|
// authority
|
|
|
|
let mut auth = get_authority(rest);
|
|
|
|
if result::is_err(auth) {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(copy *result::get_err(auth));
|
2012-07-31 21:44:37 -05:00
|
|
|
}
|
|
|
|
let (userinfo, host, port, rest) = result::unwrap(auth);
|
|
|
|
|
|
|
|
// path
|
|
|
|
let has_authority = if host == ~"" { false } else { true };
|
|
|
|
let mut pth = get_path(rest, has_authority);
|
|
|
|
if result::is_err(pth) {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(copy *result::get_err(pth));
|
2012-07-31 22:14:50 -05:00
|
|
|
}
|
2012-07-31 21:44:37 -05:00
|
|
|
let (path, rest) = result::unwrap(pth);
|
|
|
|
|
|
|
|
// query and fragment
|
|
|
|
let mut qry = get_query_fragment(rest);
|
|
|
|
if result::is_err(qry) {
|
2012-08-26 18:54:31 -05:00
|
|
|
return result::Err(copy *result::get_err(qry));
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
2012-07-31 21:44:37 -05:00
|
|
|
let (query, fragment) = result::unwrap(qry);
|
2012-07-09 13:08:07 -05:00
|
|
|
|
2012-09-04 13:24:44 -05:00
|
|
|
return result::Ok(Url(scheme, userinfo, host,
|
2012-07-31 21:44:37 -05:00
|
|
|
port, path, query, fragment));
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-09-03 19:01:34 -05:00
|
|
|
impl Url : FromStr {
|
|
|
|
static fn from_str(s: &str) -> Option<Url> {
|
|
|
|
match from_str(s) {
|
|
|
|
Ok(url) => Some(url),
|
|
|
|
Err(_) => None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-09 13:08:07 -05:00
|
|
|
/**
|
|
|
|
* Format a `url` as a string
|
|
|
|
*
|
|
|
|
* # Arguments
|
|
|
|
*
|
|
|
|
* `url` - a url.
|
|
|
|
*
|
|
|
|
* # Returns
|
|
|
|
*
|
|
|
|
* a `str` that contains the formatted url. Note that this will usually
|
|
|
|
* be an inverse of `from_str` but might strip out unneeded separators.
|
|
|
|
* for example, "http://somehost.com?", when parsed and formatted, will
|
|
|
|
* result in just "http://somehost.com".
|
|
|
|
*
|
|
|
|
*/
|
2012-09-02 23:43:20 -05:00
|
|
|
fn to_str(+url: Url) -> ~str {
|
2012-07-09 13:08:07 -05:00
|
|
|
let user = if option::is_some(url.user) {
|
|
|
|
userinfo_to_str(option::unwrap(copy url.user))
|
|
|
|
} else {
|
|
|
|
~""
|
|
|
|
};
|
2012-07-31 23:40:38 -05:00
|
|
|
let authority = if str::len(url.host) != 0 {
|
|
|
|
str::concat(~[~"//", user, copy url.host])
|
|
|
|
} else {
|
|
|
|
~""
|
|
|
|
};
|
2012-07-27 14:34:55 -05:00
|
|
|
let query = if url.query.len() == 0 {
|
2012-07-09 13:08:07 -05:00
|
|
|
~""
|
|
|
|
} else {
|
|
|
|
str::concat(~[~"?", query_to_str(url.query)])
|
|
|
|
};
|
|
|
|
let fragment = if option::is_some(url.fragment) {
|
2012-07-31 22:14:50 -05:00
|
|
|
str::concat(~[~"#", encode_component(
|
|
|
|
option::unwrap(copy url.fragment))])
|
2012-07-09 13:08:07 -05:00
|
|
|
} else {
|
|
|
|
~""
|
|
|
|
};
|
|
|
|
|
2012-08-01 19:30:05 -05:00
|
|
|
return str::concat(~[copy url.scheme,
|
2012-08-01 17:04:33 -05:00
|
|
|
~":",
|
|
|
|
authority,
|
|
|
|
copy url.path,
|
|
|
|
query,
|
|
|
|
fragment]);
|
2012-07-09 13:08:07 -05:00
|
|
|
}
|
|
|
|
|
2012-08-30 13:01:39 -05:00
|
|
|
impl Url: to_str::ToStr {
|
2012-07-28 18:05:06 -05:00
|
|
|
fn to_str() -> ~str {
|
|
|
|
to_str(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-09 13:08:07 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2012-07-25 21:40:31 -05:00
|
|
|
#[test]
|
|
|
|
fn test_split_char_first() {
|
|
|
|
let (u,v) = split_char_first(~"hello, sweet world", ',');
|
|
|
|
assert u == ~"hello";
|
|
|
|
assert v == ~" sweet world";
|
2012-07-31 22:14:50 -05:00
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
let (u,v) = split_char_first(~"hello sweet world", ',');
|
|
|
|
assert u == ~"hello sweet world";
|
|
|
|
assert v == ~"";
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_get_authority() {
|
|
|
|
let (u, h, p, r) = result::unwrap(get_authority(
|
|
|
|
~"//user:pass@rust-lang.org/something"));
|
2012-08-20 14:23:37 -05:00
|
|
|
assert u == option::Some({user: ~"user",
|
|
|
|
pass: option::Some(~"pass")});
|
2012-07-31 21:44:37 -05:00
|
|
|
assert h == ~"rust-lang.org";
|
|
|
|
assert option::is_none(p);
|
|
|
|
assert r == ~"/something";
|
|
|
|
|
|
|
|
let (u, h, p, r) = result::unwrap(get_authority(
|
|
|
|
~"//rust-lang.org:8000?something"));
|
|
|
|
assert option::is_none(u);
|
|
|
|
assert h == ~"rust-lang.org";
|
2012-08-20 14:23:37 -05:00
|
|
|
assert p == option::Some(~"8000");
|
2012-07-31 21:44:37 -05:00
|
|
|
assert r == ~"?something";
|
2012-07-31 22:14:50 -05:00
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
let (u, h, p, r) = result::unwrap(get_authority(
|
|
|
|
~"//rust-lang.org#blah"));
|
|
|
|
assert option::is_none(u);
|
|
|
|
assert h == ~"rust-lang.org";
|
|
|
|
assert option::is_none(p);
|
|
|
|
assert r == ~"#blah";
|
|
|
|
|
|
|
|
// ipv6 tests
|
|
|
|
let (_, h, _, _) = result::unwrap(get_authority(
|
|
|
|
~"//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah"));
|
|
|
|
assert h == ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334";
|
|
|
|
|
|
|
|
let (_, h, p, _) = result::unwrap(get_authority(
|
|
|
|
~"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah"));
|
|
|
|
assert h == ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334";
|
2012-08-20 14:23:37 -05:00
|
|
|
assert p == option::Some(~"8000");
|
2012-07-31 21:44:37 -05:00
|
|
|
|
|
|
|
let (u, h, p, _) = result::unwrap(get_authority(
|
|
|
|
~"//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah"));
|
2012-08-20 14:23:37 -05:00
|
|
|
assert u == option::Some({user: ~"us", pass : option::Some(~"p")});
|
2012-07-31 21:44:37 -05:00
|
|
|
assert h == ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334";
|
2012-08-20 14:23:37 -05:00
|
|
|
assert p == option::Some(~"8000");
|
2012-07-31 21:44:37 -05:00
|
|
|
|
2012-07-31 22:14:50 -05:00
|
|
|
// invalid authorities;
|
|
|
|
assert result::is_err(get_authority(
|
|
|
|
~"//user:pass@rust-lang:something"));
|
|
|
|
assert result::is_err(get_authority(
|
|
|
|
~"//user@rust-lang:something:/path"));
|
2012-07-31 21:44:37 -05:00
|
|
|
assert result::is_err(get_authority(
|
|
|
|
~"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a"));
|
|
|
|
assert result::is_err(get_authority(
|
|
|
|
~"//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00"));
|
|
|
|
|
|
|
|
// these parse as empty, because they don't start with '//'
|
2012-07-31 22:14:50 -05:00
|
|
|
let (_, h, _, _) = result::unwrap(
|
|
|
|
get_authority(~"user:pass@rust-lang"));
|
2012-07-31 21:44:37 -05:00
|
|
|
assert h == ~"";
|
2012-07-31 22:14:50 -05:00
|
|
|
let (_, h, _, _) = result::unwrap(
|
|
|
|
get_authority(~"rust-lang.org"));
|
2012-07-31 21:44:37 -05:00
|
|
|
assert h == ~"";
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_get_path() {
|
2012-07-31 22:14:50 -05:00
|
|
|
let (p, r) = result::unwrap(get_path(
|
|
|
|
~"/something+%20orother", true));
|
2012-07-31 21:44:37 -05:00
|
|
|
assert p == ~"/something+ orother";
|
|
|
|
assert r == ~"";
|
2012-07-31 22:14:50 -05:00
|
|
|
let (p, r) = result::unwrap(get_path(
|
|
|
|
~"test@email.com#fragment", false));
|
2012-07-31 21:44:37 -05:00
|
|
|
assert p == ~"test@email.com";
|
|
|
|
assert r == ~"#fragment";
|
|
|
|
let (p, r) = result::unwrap(get_path(~"/gen/:addr=?q=v", false));
|
|
|
|
assert p == ~"/gen/:addr=";
|
|
|
|
assert r == ~"?q=v";
|
2012-07-31 22:14:50 -05:00
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
//failure cases
|
|
|
|
assert result::is_err(get_path(~"something?q", true));
|
2012-07-31 22:14:50 -05:00
|
|
|
|
2012-07-25 21:40:31 -05:00
|
|
|
}
|
|
|
|
|
2012-07-25 18:22:59 -05:00
|
|
|
#[test]
|
|
|
|
fn test_url_parse() {
|
|
|
|
let url = ~"http://user:pass@rust-lang.org/doc?s=v#something";
|
2012-07-31 22:14:50 -05:00
|
|
|
|
2012-07-31 21:44:37 -05:00
|
|
|
let up = from_str(url);
|
|
|
|
let u = result::unwrap(up);
|
2012-07-25 18:22:59 -05:00
|
|
|
assert u.scheme == ~"http";
|
2012-07-25 21:40:31 -05:00
|
|
|
assert option::unwrap(copy u.user).user == ~"user";
|
2012-07-31 22:14:50 -05:00
|
|
|
assert option::unwrap(copy option::unwrap(copy u.user).pass)
|
|
|
|
== ~"pass";
|
2012-07-25 18:22:59 -05:00
|
|
|
assert u.host == ~"rust-lang.org";
|
|
|
|
assert u.path == ~"/doc";
|
2012-07-25 21:40:31 -05:00
|
|
|
assert u.query.find(|kv| kv.first() == ~"s").get().second() == ~"v";
|
|
|
|
assert option::unwrap(copy u.fragment) == ~"something";
|
2012-07-25 18:22:59 -05:00
|
|
|
}
|
2012-07-25 21:40:31 -05:00
|
|
|
|
2012-08-05 18:33:28 -05:00
|
|
|
#[test]
|
|
|
|
fn test_url_parse_host_slash() {
|
|
|
|
let urlstr = ~"http://0.42.42.42/";
|
|
|
|
let url = from_str(urlstr).get();
|
|
|
|
#debug("url: %?", url);
|
|
|
|
assert url.host == ~"0.42.42.42";
|
|
|
|
assert url.path == ~"/";
|
|
|
|
}
|
|
|
|
|
2012-08-06 15:12:49 -05:00
|
|
|
#[test]
|
|
|
|
fn test_url_with_underscores() {
|
|
|
|
let urlstr = ~"http://dotcom.com/file_name.html";
|
|
|
|
let url = from_str(urlstr).get();
|
|
|
|
#debug("url: %?", url);
|
|
|
|
assert url.path == ~"/file_name.html";
|
|
|
|
}
|
|
|
|
|
2012-08-06 17:17:08 -05:00
|
|
|
#[test]
|
|
|
|
fn test_url_with_dashes() {
|
|
|
|
let urlstr = ~"http://dotcom.com/file-name.html";
|
|
|
|
let url = from_str(urlstr).get();
|
|
|
|
#debug("url: %?", url);
|
|
|
|
assert url.path == ~"/file-name.html";
|
|
|
|
}
|
|
|
|
|
2012-08-05 15:48:26 -05:00
|
|
|
#[test]
|
|
|
|
fn test_no_scheme() {
|
|
|
|
assert result::is_err(get_scheme(~"noschemehere.html"));
|
|
|
|
}
|
|
|
|
|
2012-07-25 21:40:31 -05:00
|
|
|
#[test]
|
|
|
|
fn test_invalid_scheme_errors() {
|
|
|
|
assert result::is_err(from_str(~"99://something"));
|
|
|
|
assert result::is_err(from_str(~"://something"));
|
|
|
|
}
|
|
|
|
|
2012-07-09 13:08:07 -05:00
|
|
|
#[test]
|
|
|
|
fn test_full_url_parse_and_format() {
|
|
|
|
let url = ~"http://user:pass@rust-lang.org/doc?s=v#something";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_userless_url_parse_and_format() {
|
|
|
|
let url = ~"http://rust-lang.org/doc?s=v#something";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_queryless_url_parse_and_format() {
|
|
|
|
let url = ~"http://user:pass@rust-lang.org/doc#something";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_empty_query_url_parse_and_format() {
|
|
|
|
let url = ~"http://user:pass@rust-lang.org/doc?#something";
|
|
|
|
let should_be = ~"http://user:pass@rust-lang.org/doc#something";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == should_be;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fragmentless_url_parse_and_format() {
|
|
|
|
let url = ~"http://user:pass@rust-lang.org/doc?q=v";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_minimal_url_parse_and_format() {
|
|
|
|
let url = ~"http://rust-lang.org/doc";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_scheme_host_only_url_parse_and_format() {
|
|
|
|
let url = ~"http://rust-lang.org";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_pathless_url_parse_and_format() {
|
|
|
|
let url = ~"http://user:pass@rust-lang.org?q=v#something";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_scheme_host_fragment_only_url_parse_and_format() {
|
|
|
|
let url = ~"http://rust-lang.org#something";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
2012-07-24 22:21:32 -05:00
|
|
|
#[test]
|
|
|
|
fn test_url_component_encoding() {
|
|
|
|
let url = ~"http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B";
|
|
|
|
let u = result::unwrap(from_str(url));
|
|
|
|
assert u.path == ~"/doc uments";
|
|
|
|
assert u.query.find(|kv| kv.first() == ~"ba%d ")
|
|
|
|
.get().second() == ~"#&+";
|
|
|
|
}
|
|
|
|
|
2012-07-31 23:40:38 -05:00
|
|
|
#[test]
|
|
|
|
fn test_url_without_authority() {
|
|
|
|
let url = ~"mailto:test@email.com";
|
|
|
|
assert to_str(result::unwrap(from_str(url))) == url;
|
|
|
|
}
|
|
|
|
|
2012-07-24 22:21:32 -05:00
|
|
|
#[test]
|
|
|
|
fn test_encode() {
|
2012-09-03 14:33:51 -05:00
|
|
|
assert encode("") == ~"";
|
|
|
|
assert encode("http://example.com") == ~"http://example.com";
|
|
|
|
assert encode("foo bar% baz") == ~"foo%20bar%25%20baz";
|
|
|
|
assert encode(" ") == ~"%20";
|
|
|
|
assert encode("!") == ~"!";
|
|
|
|
assert encode("\"") == ~"\"";
|
|
|
|
assert encode("#") == ~"#";
|
|
|
|
assert encode("$") == ~"$";
|
|
|
|
assert encode("%") == ~"%25";
|
|
|
|
assert encode("&") == ~"&";
|
|
|
|
assert encode("'") == ~"%27";
|
|
|
|
assert encode("(") == ~"(";
|
|
|
|
assert encode(")") == ~")";
|
|
|
|
assert encode("*") == ~"*";
|
|
|
|
assert encode("+") == ~"+";
|
|
|
|
assert encode(",") == ~",";
|
|
|
|
assert encode("/") == ~"/";
|
|
|
|
assert encode(":") == ~":";
|
|
|
|
assert encode(";") == ~";";
|
|
|
|
assert encode("=") == ~"=";
|
|
|
|
assert encode("?") == ~"?";
|
|
|
|
assert encode("@") == ~"@";
|
|
|
|
assert encode("[") == ~"[";
|
|
|
|
assert encode("]") == ~"]";
|
2012-07-24 22:21:32 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_encode_component() {
|
|
|
|
assert encode_component(~"") == ~"";
|
|
|
|
assert encode_component(~"http://example.com") ==
|
|
|
|
~"http%3A%2F%2Fexample.com";
|
|
|
|
assert encode_component(~"foo bar% baz") == ~"foo%20bar%25%20baz";
|
|
|
|
assert encode_component(~" ") == ~"%20";
|
|
|
|
assert encode_component(~"!") == ~"%21";
|
|
|
|
assert encode_component(~"#") == ~"%23";
|
|
|
|
assert encode_component(~"$") == ~"%24";
|
|
|
|
assert encode_component(~"%") == ~"%25";
|
|
|
|
assert encode_component(~"&") == ~"%26";
|
|
|
|
assert encode_component(~"'") == ~"%27";
|
|
|
|
assert encode_component(~"(") == ~"%28";
|
|
|
|
assert encode_component(~")") == ~"%29";
|
|
|
|
assert encode_component(~"*") == ~"%2A";
|
|
|
|
assert encode_component(~"+") == ~"%2B";
|
|
|
|
assert encode_component(~",") == ~"%2C";
|
|
|
|
assert encode_component(~"/") == ~"%2F";
|
|
|
|
assert encode_component(~":") == ~"%3A";
|
|
|
|
assert encode_component(~";") == ~"%3B";
|
|
|
|
assert encode_component(~"=") == ~"%3D";
|
|
|
|
assert encode_component(~"?") == ~"%3F";
|
|
|
|
assert encode_component(~"@") == ~"%40";
|
|
|
|
assert encode_component(~"[") == ~"%5B";
|
|
|
|
assert encode_component(~"]") == ~"%5D";
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_decode() {
|
|
|
|
assert decode(~"") == ~"";
|
|
|
|
assert decode(~"abc/def 123") == ~"abc/def 123";
|
|
|
|
assert decode(~"abc%2Fdef%20123") == ~"abc%2Fdef 123";
|
|
|
|
assert decode(~"%20") == ~" ";
|
|
|
|
assert decode(~"%21") == ~"%21";
|
|
|
|
assert decode(~"%22") == ~"%22";
|
|
|
|
assert decode(~"%23") == ~"%23";
|
|
|
|
assert decode(~"%24") == ~"%24";
|
|
|
|
assert decode(~"%25") == ~"%";
|
|
|
|
assert decode(~"%26") == ~"%26";
|
|
|
|
assert decode(~"%27") == ~"'";
|
|
|
|
assert decode(~"%28") == ~"%28";
|
|
|
|
assert decode(~"%29") == ~"%29";
|
|
|
|
assert decode(~"%2A") == ~"%2A";
|
|
|
|
assert decode(~"%2B") == ~"%2B";
|
|
|
|
assert decode(~"%2C") == ~"%2C";
|
|
|
|
assert decode(~"%2F") == ~"%2F";
|
|
|
|
assert decode(~"%3A") == ~"%3A";
|
|
|
|
assert decode(~"%3B") == ~"%3B";
|
|
|
|
assert decode(~"%3D") == ~"%3D";
|
|
|
|
assert decode(~"%3F") == ~"%3F";
|
|
|
|
assert decode(~"%40") == ~"%40";
|
|
|
|
assert decode(~"%5B") == ~"%5B";
|
|
|
|
assert decode(~"%5D") == ~"%5D";
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_decode_component() {
|
|
|
|
assert decode_component(~"") == ~"";
|
|
|
|
assert decode_component(~"abc/def 123") == ~"abc/def 123";
|
|
|
|
assert decode_component(~"abc%2Fdef%20123") == ~"abc/def 123";
|
|
|
|
assert decode_component(~"%20") == ~" ";
|
|
|
|
assert decode_component(~"%21") == ~"!";
|
|
|
|
assert decode_component(~"%22") == ~"\"";
|
|
|
|
assert decode_component(~"%23") == ~"#";
|
|
|
|
assert decode_component(~"%24") == ~"$";
|
|
|
|
assert decode_component(~"%25") == ~"%";
|
|
|
|
assert decode_component(~"%26") == ~"&";
|
|
|
|
assert decode_component(~"%27") == ~"'";
|
|
|
|
assert decode_component(~"%28") == ~"(";
|
|
|
|
assert decode_component(~"%29") == ~")";
|
|
|
|
assert decode_component(~"%2A") == ~"*";
|
|
|
|
assert decode_component(~"%2B") == ~"+";
|
|
|
|
assert decode_component(~"%2C") == ~",";
|
|
|
|
assert decode_component(~"%2F") == ~"/";
|
|
|
|
assert decode_component(~"%3A") == ~":";
|
|
|
|
assert decode_component(~"%3B") == ~";";
|
|
|
|
assert decode_component(~"%3D") == ~"=";
|
|
|
|
assert decode_component(~"%3F") == ~"?";
|
|
|
|
assert decode_component(~"%40") == ~"@";
|
|
|
|
assert decode_component(~"%5B") == ~"[";
|
|
|
|
assert decode_component(~"%5D") == ~"]";
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_encode_form_urlencoded() {
|
|
|
|
let m = str_hash();
|
|
|
|
assert encode_form_urlencoded(m) == ~"";
|
|
|
|
|
2012-08-27 16:22:25 -05:00
|
|
|
m.insert(~"", @DVec());
|
|
|
|
m.insert(~"foo", @DVec());
|
2012-07-24 22:21:32 -05:00
|
|
|
assert encode_form_urlencoded(m) == ~"";
|
|
|
|
|
|
|
|
let m = str_hash();
|
|
|
|
m.insert(~"foo", @dvec::from_vec(~[mut @~"bar", @~"123"]));
|
|
|
|
assert encode_form_urlencoded(m) == ~"foo=bar&foo=123";
|
|
|
|
|
|
|
|
let m = str_hash();
|
|
|
|
m.insert(~"foo bar", @dvec::from_vec(~[mut @~"abc", @~"12 = 34"]));
|
|
|
|
assert encode_form_urlencoded(m) == ~"foo+bar=abc&foo+bar=12+%3D+34";
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_decode_form_urlencoded() {
|
|
|
|
import map::hash_from_strs;
|
|
|
|
|
2012-08-27 18:26:35 -05:00
|
|
|
assert decode_form_urlencoded(~[]).size() == 0;
|
2012-07-24 22:21:32 -05:00
|
|
|
|
2012-08-23 17:44:57 -05:00
|
|
|
let s = str::to_bytes(~"a=1&foo+bar=abc&foo+bar=12+%3D+34");
|
2012-08-27 18:26:35 -05:00
|
|
|
assert decode_form_urlencoded(s).size() == 2;
|
2012-07-24 22:21:32 -05:00
|
|
|
}
|
|
|
|
|
2012-07-28 00:23:36 -05:00
|
|
|
}
|
2012-07-24 22:21:32 -05:00
|
|
|
|