// Copyright 2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. /*! Cross-platform path support This module implements support for two flavors of paths. `PosixPath` represents a path on any unix-like system, whereas `WindowsPath` represents a path on Windows. This module also exposes a typedef `Path` which is equal to the appropriate platform-specific path variant. Both `PosixPath` and `WindowsPath` implement a trait `GenericPath`, which contains the set of methods that behave the same for both paths. They each also implement some methods that could not be expressed in `GenericPath`, yet behave identically for both path flavors, such as `.components()`. The three main design goals of this module are 1) to avoid unnecessary allocation, 2) to behave the same regardless of which flavor of path is being used, and 3) to support paths that cannot be represented in UTF-8 (as Linux has no restriction on paths beyond disallowing NUL). ## Usage Usage of this module is fairly straightforward. Unless writing platform-specific code, `Path` should be used to refer to the platform-native path. Creation of a path is typically done with either `Path::init(some_str)` or `Path::init(some_vec)`. This path can be modified with `.push()` and `.pop()` (and other setters). The resulting Path can either be passed to another API that expects a path, or can be turned into a &[u8] with `.as_vec()` or a Option<&str> with `.as_str()`. Similarly, attributes of the path can be queried with methods such as `.filename()`. There are also methods that return a new path instead of modifying the receiver, such as `.join()` or `.dir_path()`. Paths are always kept in normalized form. This means that creating the path `Path::init("a/b/../c")` will return the path `a/c`. Similarly any attempt to mutate the path will always leave it in normalized form. When rendering a path to some form of output, there is a method `.display()` which is compatible with the `format!()` parameter `{}`. This will render the path as a string, replacing all non-utf8 sequences with the Replacement Character (U+FFFD). As such it is not suitable for passing to any API that actually operates on the path; it is only intended for display. ## Example ```rust let mut path = Path::from_str("/tmp/path"); debug!("path: {}", path.display()); path.set_filename("foo"); path.push("bar"); debug!("new path: {}", path.display()); let b = std::os::path_exists(&path); debug!("path exists: {}", b); ``` */ use container::Container; use c_str::CString; use clone::Clone; use fmt; use iter::Iterator; use option::{Option, None, Some}; use str; use str::{OwnedStr, Str, StrSlice}; use to_str::ToStr; use vec; use vec::{CopyableVector, OwnedCopyableVector, OwnedVector, Vector}; use vec::{ImmutableEqVector, ImmutableVector}; /// Typedef for POSIX file paths. /// See `posix::Path` for more info. pub use PosixPath = self::posix::Path; /// Typedef for Windows file paths. /// See `windows::Path` for more info. pub use WindowsPath = self::windows::Path; /// Typedef for the platform-native path type #[cfg(unix)] pub use Path = self::posix::Path; /// Typedef for the platform-native path type #[cfg(windows)] pub use Path = self::windows::Path; /// Typedef for the platform-native component iterator #[cfg(unix)] pub use ComponentIter = self::posix::ComponentIter; /// Typedef for the platform-native reverse component iterator #[cfg(unix)] pub use RevComponentIter = self::posix::RevComponentIter; /// Typedef for the platform-native component iterator #[cfg(windows)] pub use ComponentIter = self::windows::ComponentIter; /// Typedef for the platform-native reverse component iterator #[cfg(windows)] pub use RevComponentIter = self::windows::RevComponentIter; /// Typedef for the platform-native str component iterator #[cfg(unix)] pub use StrComponentIter = self::posix::StrComponentIter; /// Typedef for the platform-native reverse str component iterator #[cfg(unix)] pub use RevStrComponentIter = self::posix::RevStrComponentIter; /// Typedef for the platform-native str component iterator #[cfg(windows)] pub use StrComponentIter = self::windows::StrComponentIter; /// Typedef for the platform-native reverse str component iterator #[cfg(windows)] pub use RevStrComponentIter = self::windows::RevStrComponentIter; /// Typedef for the platform-native separator char func #[cfg(unix)] pub use is_sep = self::posix::is_sep; /// Typedef for the platform-native separator char func #[cfg(windows)] pub use is_sep = self::windows::is_sep; /// Typedef for the platform-native separator byte func #[cfg(unix)] pub use is_sep_byte = self::posix::is_sep_byte; /// Typedef for the platform-native separator byte func #[cfg(windows)] pub use is_sep_byte = self::windows::is_sep_byte; pub mod posix; pub mod windows; // Condition that is raised when a NUL is found in a byte vector given to a Path function condition! { // this should be a &[u8] but there's a lifetime issue null_byte: ~[u8] -> ~[u8]; } /// A trait that represents the generic operations available on paths pub trait GenericPath: Clone + GenericPathUnsafe { /// Creates a new Path from a byte vector or string. /// The resulting Path will always be normalized. /// /// # Failure /// /// Raises the `null_byte` condition if the path contains a NUL. /// /// See individual Path impls for additional restrictions. #[inline] fn init(path: T) -> Self { if contains_nul(path.container_as_bytes()) { let path = self::null_byte::cond.raise(path.container_into_owned_bytes()); assert!(!contains_nul(path)); unsafe { GenericPathUnsafe::init_unchecked(path) } } else { unsafe { GenericPathUnsafe::init_unchecked(path) } } } /// Creates a new Path from a byte vector or string, if possible. /// The resulting Path will always be normalized. #[inline] fn init_opt(path: T) -> Option { if contains_nul(path.container_as_bytes()) { None } else { Some(unsafe { GenericPathUnsafe::init_unchecked(path) }) } } /// Returns the path as a string, if possible. /// If the path is not representable in utf-8, this returns None. #[inline] fn as_str<'a>(&'a self) -> Option<&'a str> { str::from_utf8_slice_opt(self.as_vec()) } /// Returns the path as a byte vector fn as_vec<'a>(&'a self) -> &'a [u8]; /// Converts the Path into an owned byte vector fn into_vec(self) -> ~[u8]; /// Returns an object that implements `fmt::Default` for printing paths /// /// This will print the equivalent of `to_display_str()` when used with a {} format parameter. fn display<'a>(&'a self) -> Display<'a, Self> { Display{ path: self, filename: false } } /// Returns an object that implements `fmt::Default` for printing filenames /// /// This will print the equivalent of `to_filename_display_str()` when used with a {} /// format parameter. If there is no filename, nothing will be printed. fn filename_display<'a>(&'a self) -> Display<'a, Self> { Display{ path: self, filename: true } } /// Returns the directory component of `self`, as a byte vector (with no trailing separator). /// If `self` has no directory component, returns ['.']. fn dirname<'a>(&'a self) -> &'a [u8]; /// Returns the directory component of `self`, as a string, if possible. /// See `dirname` for details. #[inline] fn dirname_str<'a>(&'a self) -> Option<&'a str> { str::from_utf8_slice_opt(self.dirname()) } /// Returns the file component of `self`, as a byte vector. /// If `self` represents the root of the file hierarchy, returns None. /// If `self` is "." or "..", returns None. fn filename<'a>(&'a self) -> Option<&'a [u8]>; /// Returns the file component of `self`, as a string, if possible. /// See `filename` for details. #[inline] fn filename_str<'a>(&'a self) -> Option<&'a str> { self.filename().and_then(str::from_utf8_slice_opt) } /// Returns the stem of the filename of `self`, as a byte vector. /// The stem is the portion of the filename just before the last '.'. /// If there is no '.', the entire filename is returned. fn filestem<'a>(&'a self) -> Option<&'a [u8]> { match self.filename() { None => None, Some(name) => Some({ let dot = '.' as u8; match name.rposition_elem(&dot) { None | Some(0) => name, Some(1) if name == bytes!("..") => name, Some(pos) => name.slice_to(pos) } }) } } /// Returns the stem of the filename of `self`, as a string, if possible. /// See `filestem` for details. #[inline] fn filestem_str<'a>(&'a self) -> Option<&'a str> { self.filestem().and_then(str::from_utf8_slice_opt) } /// Returns the extension of the filename of `self`, as an optional byte vector. /// The extension is the portion of the filename just after the last '.'. /// If there is no extension, None is returned. /// If the filename ends in '.', the empty vector is returned. fn extension<'a>(&'a self) -> Option<&'a [u8]> { match self.filename() { None => None, Some(name) => { let dot = '.' as u8; match name.rposition_elem(&dot) { None | Some(0) => None, Some(1) if name == bytes!("..") => None, Some(pos) => Some(name.slice_from(pos+1)) } } } } /// Returns the extension of the filename of `self`, as a string, if possible. /// See `extension` for details. #[inline] fn extension_str<'a>(&'a self) -> Option<&'a str> { self.extension().and_then(str::from_utf8_slice_opt) } /// Replaces the filename portion of the path with the given byte vector or string. /// If the replacement name is [], this is equivalent to popping the path. /// /// # Failure /// /// Raises the `null_byte` condition if the filename contains a NUL. #[inline] fn set_filename(&mut self, filename: T) { if contains_nul(filename.container_as_bytes()) { let filename = self::null_byte::cond.raise(filename.container_into_owned_bytes()); assert!(!contains_nul(filename)); unsafe { self.set_filename_unchecked(filename) } } else { unsafe { self.set_filename_unchecked(filename) } } } /// Replaces the extension with the given byte vector or string. /// If there is no extension in `self`, this adds one. /// If the argument is [] or "", this removes the extension. /// If `self` has no filename, this is a no-op. /// /// # Failure /// /// Raises the `null_byte` condition if the extension contains a NUL. fn set_extension(&mut self, extension: T) { // borrowck causes problems here too let val = { match self.filename() { None => None, Some(name) => { let dot = '.' as u8; match name.rposition_elem(&dot) { None | Some(0) => { if extension.container_as_bytes().is_empty() { None } else { let mut v; if contains_nul(extension.container_as_bytes()) { let ext = extension.container_into_owned_bytes(); let extension = self::null_byte::cond.raise(ext); assert!(!contains_nul(extension)); v = vec::with_capacity(name.len() + extension.len() + 1); v.push_all(name); v.push(dot); v.push_all(extension); } else { let extension = extension.container_as_bytes(); v = vec::with_capacity(name.len() + extension.len() + 1); v.push_all(name); v.push(dot); v.push_all(extension); } Some(v) } } Some(idx) => { if extension.container_as_bytes().is_empty() { Some(name.slice_to(idx).to_owned()) } else { let mut v; if contains_nul(extension.container_as_bytes()) { let ext = extension.container_into_owned_bytes(); let extension = self::null_byte::cond.raise(ext); assert!(!contains_nul(extension)); v = vec::with_capacity(idx + extension.len() + 1); v.push_all(name.slice_to(idx+1)); v.push_all(extension); } else { let extension = extension.container_as_bytes(); v = vec::with_capacity(idx + extension.len() + 1); v.push_all(name.slice_to(idx+1)); v.push_all(extension); } Some(v) } } } } } }; match val { None => (), Some(v) => unsafe { self.set_filename_unchecked(v) } } } /// Returns a new Path constructed by replacing the filename with the given /// byte vector or string. /// See `set_filename` for details. /// /// # Failure /// /// Raises the `null_byte` condition if the filename contains a NUL. #[inline] fn with_filename(&self, filename: T) -> Self { let mut p = self.clone(); p.set_filename(filename); p } /// Returns a new Path constructed by setting the extension to the given /// byte vector or string. /// See `set_extension` for details. /// /// # Failure /// /// Raises the `null_byte` condition if the extension contains a NUL. #[inline] fn with_extension(&self, extension: T) -> Self { let mut p = self.clone(); p.set_extension(extension); p } /// Returns the directory component of `self`, as a Path. /// If `self` represents the root of the filesystem hierarchy, returns `self`. fn dir_path(&self) -> Self { // self.dirname() returns a NUL-free vector unsafe { GenericPathUnsafe::init_unchecked(self.dirname()) } } /// Returns a Path that represents the filesystem root that `self` is rooted in. /// /// If `self` is not absolute, or vol-relative in the case of Windows, this returns None. fn root_path(&self) -> Option; /// Pushes a path (as a byte vector or string) onto `self`. /// If the argument represents an absolute path, it replaces `self`. /// /// # Failure /// /// Raises the `null_byte` condition if the path contains a NUL. #[inline] fn push(&mut self, path: T) { if contains_nul(path.container_as_bytes()) { let path = self::null_byte::cond.raise(path.container_into_owned_bytes()); assert!(!contains_nul(path)); unsafe { self.push_unchecked(path) } } else { unsafe { self.push_unchecked(path) } } } /// Pushes multiple paths (as byte vectors or strings) onto `self`. /// See `push` for details. #[inline] fn push_many(&mut self, paths: &[T]) { let t: Option = None; if BytesContainer::is_str(t) { for p in paths.iter() { self.push(p.container_as_str()) } } else { for p in paths.iter() { self.push(p.container_as_bytes()) } } } /// Removes the last path component from the receiver. /// Returns `true` if the receiver was modified, or `false` if it already /// represented the root of the file hierarchy. fn pop(&mut self) -> bool; /// Returns a new Path constructed by joining `self` with the given path /// (as a byte vector or string). /// If the given path is absolute, the new Path will represent just that. /// /// # Failure /// /// Raises the `null_byte` condition if the path contains a NUL. #[inline] fn join(&self, path: T) -> Self { let mut p = self.clone(); p.push(path); p } /// Returns a new Path constructed by joining `self` with the given paths /// (as byte vectors or strings). /// See `join` for details. #[inline] fn join_many(&self, paths: &[T]) -> Self { let mut p = self.clone(); p.push_many(paths); p } /// Returns whether `self` represents an absolute path. /// An absolute path is defined as one that, when joined to another path, will /// yield back the same absolute path. fn is_absolute(&self) -> bool; /// Returns whether `self` represents a relative path. /// Typically this is the inverse of `is_absolute`. /// But for Windows paths, it also means the path is not volume-relative or /// relative to the current working directory. fn is_relative(&self) -> bool { !self.is_absolute() } /// Returns whether `self` is equal to, or is an ancestor of, the given path. /// If both paths are relative, they are compared as though they are relative /// to the same parent path. fn is_ancestor_of(&self, other: &Self) -> bool; /// Returns the Path that, were it joined to `base`, would yield `self`. /// If no such path exists, None is returned. /// If `self` is absolute and `base` is relative, or on Windows if both /// paths refer to separate drives, an absolute path is returned. fn path_relative_from(&self, base: &Self) -> Option; /// Returns whether the relative path `child` is a suffix of `self`. fn ends_with_path(&self, child: &Self) -> bool; } /// A trait that represents something bytes-like (e.g. a &[u8] or a &str) pub trait BytesContainer { /// Returns a &[u8] representing the receiver fn container_as_bytes<'a>(&'a self) -> &'a [u8]; /// Consumes the receiver and converts it into ~[u8] #[inline] fn container_into_owned_bytes(self) -> ~[u8] { self.container_as_bytes().to_owned() } /// Returns the receiver interpreted as a utf-8 string /// /// # Failure /// /// Raises `str::null_byte` if not utf-8 #[inline] fn container_as_str<'a>(&'a self) -> &'a str { str::from_utf8_slice(self.container_as_bytes()) } /// Returns the receiver interpreted as a utf-8 string, if possible #[inline] fn container_as_str_opt<'a>(&'a self) -> Option<&'a str> { str::from_utf8_slice_opt(self.container_as_bytes()) } /// Returns whether .container_as_str() is guaranteed to not fail // FIXME (#8888): Remove unused arg once :: works #[inline] fn is_str(_: Option) -> bool { false } } /// A trait that represents the unsafe operations on GenericPaths pub trait GenericPathUnsafe { /// Creates a new Path without checking for null bytes. /// The resulting Path will always be normalized. unsafe fn init_unchecked(path: T) -> Self; /// Replaces the filename portion of the path without checking for null /// bytes. /// See `set_filename` for details. unsafe fn set_filename_unchecked(&mut self, filename: T); /// Pushes a path onto `self` without checking for null bytes. /// See `push` for details. unsafe fn push_unchecked(&mut self, path: T); } /// Helper struct for printing paths with format!() pub struct Display<'self, P> { priv path: &'self P, priv filename: bool } impl<'self, P: GenericPath> fmt::Default for Display<'self, P> { fn fmt(d: &Display

, f: &mut fmt::Formatter) { d.with_str(|s| f.pad(s)) } } impl<'self, P: GenericPath> ToStr for Display<'self, P> { /// Returns the path as a string /// /// If the path is not UTF-8, invalid sequences with be replaced with the /// unicode replacement char. This involves allocation. fn to_str(&self) -> ~str { if self.filename { match self.path.filename() { None => ~"", Some(v) => from_utf8_with_replacement(v) } } else { from_utf8_with_replacement(self.path.as_vec()) } } } impl<'self, P: GenericPath> Display<'self, P> { /// Provides the path as a string to a closure /// /// If the path is not UTF-8, invalid sequences will be replaced with the /// unicode replacement char. This involves allocation. #[inline] pub fn with_str(&self, f: |&str| -> T) -> T { let opt = if self.filename { self.path.filename_str() } else { self.path.as_str() }; match opt { Some(s) => f(s), None => { let s = self.to_str(); f(s.as_slice()) } } } } impl<'self> BytesContainer for &'self str { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { self.as_bytes() } #[inline] fn container_as_str<'a>(&'a self) -> &'a str { *self } #[inline] fn container_as_str_opt<'a>(&'a self) -> Option<&'a str> { Some(*self) } #[inline] fn is_str(_: Option<&'self str>) -> bool { true } } impl BytesContainer for ~str { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { self.as_bytes() } #[inline] fn container_into_owned_bytes(self) -> ~[u8] { self.into_bytes() } #[inline] fn container_as_str<'a>(&'a self) -> &'a str { self.as_slice() } #[inline] fn container_as_str_opt<'a>(&'a self) -> Option<&'a str> { Some(self.as_slice()) } #[inline] fn is_str(_: Option<~str>) -> bool { true } } impl BytesContainer for @str { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { self.as_bytes() } #[inline] fn container_as_str<'a>(&'a self) -> &'a str { self.as_slice() } #[inline] fn container_as_str_opt<'a>(&'a self) -> Option<&'a str> { Some(self.as_slice()) } #[inline] fn is_str(_: Option<@str>) -> bool { true } } impl<'self> BytesContainer for &'self [u8] { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { *self } } impl BytesContainer for ~[u8] { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { self.as_slice() } #[inline] fn container_into_owned_bytes(self) -> ~[u8] { self } } impl BytesContainer for @[u8] { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { self.as_slice() } } impl BytesContainer for CString { #[inline] fn container_as_bytes<'a>(&'a self) -> &'a [u8] { let s = self.as_bytes(); s.slice_to(s.len()-1) } } #[inline(always)] fn contains_nul(v: &[u8]) -> bool { v.iter().any(|&x| x == 0) } #[inline(always)] fn from_utf8_with_replacement(mut v: &[u8]) -> ~str { // FIXME (#9516): Don't decode utf-8 manually here once we have a good way to do it in str // This is a truly horrifically bad implementation, done as a functionality stopgap until // we have a proper utf-8 decoder. I don't really want to write one here. static REPLACEMENT_CHAR: char = '\uFFFD'; let mut s = str::with_capacity(v.len()); while !v.is_empty() { let w = str::utf8_char_width(v[0]); if w == 0u { s.push_char(REPLACEMENT_CHAR); v = v.slice_from(1); } else if v.len() < w || !str::is_utf8(v.slice_to(w)) { s.push_char(REPLACEMENT_CHAR); v = v.slice_from(1); } else { s.push_str(unsafe { ::cast::transmute(v.slice_to(w)) }); v = v.slice_from(w); } } s } #[cfg(test)] mod tests { use super::{GenericPath, PosixPath, WindowsPath}; use c_str::ToCStr; #[test] fn test_cstring() { let input = "/foo/bar/baz"; let path: PosixPath = PosixPath::init(input.to_c_str()); assert_eq!(path.as_vec(), input.as_bytes()); let input = r"\foo\bar\baz"; let path: WindowsPath = WindowsPath::init(input.to_c_str()); assert_eq!(path.as_str().unwrap(), input.as_slice()); } }