From 03e8cc56f89a53abd4c78a8e93dd18e04a7efa55 Mon Sep 17 00:00:00 2001 From: pjht Date: Sun, 9 Jun 2024 18:32:21 -0500 Subject: [PATCH] Initial commit --- .cargo/config.toml | 5 + .gitignore | 1 + Cargo.lock | 288 +++++ Cargo.toml | 11 + rust-toolchain.toml | 2 + src/main.rs | 79 ++ tar-0.4.41/.cargo_vcs_info.json | 6 + tar-0.4.41/.github/dependabot.yml | 8 + tar-0.4.41/.github/workflows/main.yml | 63 + tar-0.4.41/.gitignore | 2 + tar-0.4.41/Cargo.toml | 46 + tar-0.4.41/Cargo.toml.orig | 32 + tar-0.4.41/LICENSE-APACHE | 201 +++ tar-0.4.41/LICENSE-MIT | 25 + tar-0.4.41/README.md | 76 ++ tar-0.4.41/examples/extract_file.rs | 25 + tar-0.4.41/examples/list.rs | 17 + tar-0.4.41/examples/raw_list.rs | 48 + tar-0.4.41/examples/write.rs | 13 + tar-0.4.41/src/archive.rs | 559 +++++++++ tar-0.4.41/src/builder.rs | 663 ++++++++++ tar-0.4.41/src/entry.rs | 388 ++++++ tar-0.4.41/src/entry_type.rs | 199 +++ tar-0.4.41/src/error.rs | 41 + tar-0.4.41/src/header.rs | 1647 +++++++++++++++++++++++++ tar-0.4.41/src/lib.rs | 44 + tar-0.4.41/src/pax.rs | 147 +++ tar-0.4.41/tests/all.rs | 1514 +++++++++++++++++++++++ tar-0.4.41/tests/entry.rs | 410 ++++++ tar-0.4.41/tests/header/mod.rs | 247 ++++ 30 files changed, 6807 insertions(+) create mode 100644 .cargo/config.toml create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 rust-toolchain.toml create mode 100644 src/main.rs create mode 100644 tar-0.4.41/.cargo_vcs_info.json create mode 100644 tar-0.4.41/.github/dependabot.yml create mode 100644 tar-0.4.41/.github/workflows/main.yml create mode 100644 tar-0.4.41/.gitignore create mode 100644 tar-0.4.41/Cargo.toml create mode 100644 tar-0.4.41/Cargo.toml.orig create mode 100644 tar-0.4.41/LICENSE-APACHE create mode 100644 tar-0.4.41/LICENSE-MIT create mode 100644 tar-0.4.41/README.md create mode 100644 tar-0.4.41/examples/extract_file.rs create mode 100644 tar-0.4.41/examples/list.rs create mode 100644 tar-0.4.41/examples/raw_list.rs create mode 100644 tar-0.4.41/examples/write.rs create mode 100644 tar-0.4.41/src/archive.rs create mode 100644 tar-0.4.41/src/builder.rs create mode 100644 tar-0.4.41/src/entry.rs create mode 100644 tar-0.4.41/src/entry_type.rs create mode 100644 tar-0.4.41/src/error.rs create mode 100644 tar-0.4.41/src/header.rs create mode 100644 tar-0.4.41/src/lib.rs create mode 100644 tar-0.4.41/src/pax.rs create mode 100644 tar-0.4.41/tests/all.rs create mode 100644 tar-0.4.41/tests/entry.rs create mode 100644 tar-0.4.41/tests/header/mod.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..bec4b7a --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,5 @@ +[build] +target = "x86_64-unknown-mikros" + +[install] +root = "../os_build/sysroot" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..38e493f --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,288 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "file_rpc" +version = "0.1.0" +dependencies = [ + "parking_lot", + "rmp-serde", + "serde", +] + +[[package]] +name = "fs_rpc" +version = "0.1.0" +dependencies = [ + "parking_lot", + "rmp-serde", + "serde", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rmp" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +dependencies = [ + "byteorder", + "rmp", + "serde", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.41" +dependencies = [ + "libc", +] + +[[package]] +name = "tarfs" +version = "0.1.0" +dependencies = [ + "file_rpc", + "fs_rpc", + "parking_lot", + "tar", + "vfs_rpc", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "vfs_rpc" +version = "0.1.0" +dependencies = [ + "parking_lot", + "rmp-serde", + "serde", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f1e5a28 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "tarfs" +version = "0.1.0" +edition = "2021" + +[dependencies] +file_rpc = { version = "0.1.0", path = "../file_rpc" } +fs_rpc = { version = "0.1.0", path = "../fs_rpc" } +parking_lot = "0.12.3" +tar = { version = "0.4.41", default-features = false, path = "tar-0.4.41" } +vfs_rpc = { version = "0.1.0", path = "../vfs/vfs_rpc" } diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..a5535e3 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "dev-x86_64-unknown-mikros" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..bc66975 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,79 @@ +use std::{fs::File, io::{Read, Seek}, os::mikros::{ipc, syscalls}, path::PathBuf, sync::Arc, usize}; + +use parking_lot::RwLock; +use tar::Archive; + +#[derive(Clone)] +struct Serv { + mounts: Arc)>>>, + files: Arc>>, +} + +impl fs_rpc::Server for Serv { + fn mount(&self, dev: &std::path::Path, _path: &std::path::Path) -> Result { + let archive = File::open(dev).map_err(|_| ())?; + let mut archive = Archive::new(archive); + let entries = archive.entries_with_seek().unwrap().map(|entry| { + let entry = entry.unwrap(); + let path = entry.path().unwrap().into_owned(); + let file_offset = entry.raw_file_position(); + (path, file_offset, entry.size() as usize) + }).collect(); + self.mounts.write().push((archive.into_inner(), entries)); + Ok((self.mounts.read().len() - 1) as u64) + } + + fn open(&self, path: &std::path::Path, mount_id: u64) -> Result<(Option, u64), ()> { + let mounts = self.mounts.read(); + let mount = &mounts[mount_id as usize]; + let (&file_offset, &file_size) = mount.1.iter().find(|(entry_path, _offset, _size)| entry_path == path).map(|(_x, y, z)| (y, z)).ok_or(())?; + self.files.write().push((mount_id as usize, file_offset, file_size)); + Ok((None, (self.files.read().len() - 1) as u64)) + } +} + +impl file_rpc::Server for Serv { + fn read(&self, fd: u64, pos: u64, len: usize) -> Result, ()> { + let (mount_id, file_offset, size) = self.files.read()[fd as usize]; + let read_len = usize::min(len, size - (pos as usize)); + let mounts = self.mounts.read(); + let mount = &mounts[mount_id as usize]; + (&mount.0).seek(std::io::SeekFrom::Start(file_offset + pos)).unwrap(); + let mut buf = vec![0; read_len]; + let read_bytes = (&mount.0).read(&mut buf).map_err(|_| ()) ?; + buf.truncate(read_bytes); + Ok(buf) + } + + fn write(&self, _fd: u64, _pos: u64, _data: &[u8]) -> Result<(), ()> { + Err(()) + } + + fn close(&self, _fd: u64) {} + + fn size(&self, fd: u64) -> Option { + let (_mount_id, _file_offset, size) = self.files.read()[fd as usize]; + Some(size as u64) + } +} + +fn main() { + let serv = Serv { + mounts: Arc::new(RwLock::new(Vec::new())), + files: Arc::new(RwLock::new(Vec::new())), + }; + fs_rpc::register_server(Box::new(serv.clone())); + file_rpc::register_server(Box::new(serv)); + + let vfs_pid; + loop { + if let Some(pid) = syscalls::try_get_registered(0) { + vfs_pid = pid; + break; + } + } + vfs_rpc::Client::new(vfs_pid).register_fs("tarfs").unwrap(); + loop { + ipc::process_messages() + } +} diff --git a/tar-0.4.41/.cargo_vcs_info.json b/tar-0.4.41/.cargo_vcs_info.json new file mode 100644 index 0000000..33f9676 --- /dev/null +++ b/tar-0.4.41/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "2cb0c7b53f5748d84f83d0bc74abe8669f2d2187" + }, + "path_in_vcs": "" +} \ No newline at end of file diff --git a/tar-0.4.41/.github/dependabot.yml b/tar-0.4.41/.github/dependabot.yml new file mode 100644 index 0000000..7377d37 --- /dev/null +++ b/tar-0.4.41/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: +- package-ecosystem: cargo + directory: "/" + schedule: + interval: daily + time: "08:00" + open-pull-requests-limit: 10 diff --git a/tar-0.4.41/.github/workflows/main.yml b/tar-0.4.41/.github/workflows/main.yml new file mode 100644 index 0000000..269110e --- /dev/null +++ b/tar-0.4.41/.github/workflows/main.yml @@ -0,0 +1,63 @@ +name: CI +on: [push, pull_request] + +jobs: + test: + name: Test + runs-on: ${{ matrix.os }} + strategy: + matrix: + build: [stable, beta, nightly, macos, windows] + include: + - build: stable + os: ubuntu-latest + rust: stable + - build: beta + os: ubuntu-latest + rust: beta + - build: nightly + os: ubuntu-latest + rust: nightly + - build: macos + os: macos-latest + rust: stable + - build: windows + os: windows-latest + rust: stable + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} + shell: bash + - run: cargo test + - run: cargo test --no-default-features + - name: Run cargo test with root + run: sudo -E $(which cargo) test + if: ${{ matrix.os == 'ubuntu-latest' }} + + rustfmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update stable && rustup default stable && rustup component add rustfmt + - run: cargo fmt -- --check + + publish_docs: + name: Publish Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update stable && rustup default stable + - name: Build documentation + run: cargo doc --no-deps --all-features + - name: Publish documentation + run: | + cd target/doc + git init + git add . + git -c user.name='ci' -c user.email='ci' commit -m init + git push -f -q https://git:${{ secrets.github_token }}@github.com/${{ github.repository }} HEAD:gh-pages + if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' diff --git a/tar-0.4.41/.gitignore b/tar-0.4.41/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/tar-0.4.41/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/tar-0.4.41/Cargo.toml b/tar-0.4.41/Cargo.toml new file mode 100644 index 0000000..31e4f07 --- /dev/null +++ b/tar-0.4.41/Cargo.toml @@ -0,0 +1,46 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "tar" +version = "0.4.41" +authors = ["Alex Crichton "] +exclude = ["tests/archives/*"] +description = """ +A Rust implementation of a TAR file reader and writer. This library does not +currently handle compression, but it is abstract over all I/O readers and +writers. Additionally, great lengths are taken to ensure that the entire +contents are never required to be entirely resident in memory all at once. +""" +homepage = "https://github.com/alexcrichton/tar-rs" +documentation = "https://docs.rs/tar" +readme = "README.md" +keywords = [ + "tar", + "tarfile", + "encoding", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/alexcrichton/tar-rs" + +[dev-dependencies.tempfile] +version = "3" + +[features] +default = ["xattr"] + +[target."cfg(unix)".dependencies.libc] +version = "0.2" + +[target."cfg(unix)".dependencies.xattr] +version = "1.1.3" +optional = true diff --git a/tar-0.4.41/Cargo.toml.orig b/tar-0.4.41/Cargo.toml.orig new file mode 100644 index 0000000..eac8207 --- /dev/null +++ b/tar-0.4.41/Cargo.toml.orig @@ -0,0 +1,32 @@ +[package] +name = "tar" +version = "0.4.41" +authors = ["Alex Crichton "] +homepage = "https://github.com/alexcrichton/tar-rs" +repository = "https://github.com/alexcrichton/tar-rs" +documentation = "https://docs.rs/tar" +license = "MIT OR Apache-2.0" +keywords = ["tar", "tarfile", "encoding"] +readme = "README.md" +edition = "2021" +exclude = ["tests/archives/*"] + +description = """ +A Rust implementation of a TAR file reader and writer. This library does not +currently handle compression, but it is abstract over all I/O readers and +writers. Additionally, great lengths are taken to ensure that the entire +contents are never required to be entirely resident in memory all at once. +""" + +[dependencies] +filetime = "0.2.8" + +[dev-dependencies] +tempfile = "3" + +[target."cfg(unix)".dependencies] +xattr = { version = "1.1.3", optional = true } +libc = "0.2" + +[features] +default = ["xattr"] diff --git a/tar-0.4.41/LICENSE-APACHE b/tar-0.4.41/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/tar-0.4.41/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/tar-0.4.41/LICENSE-MIT b/tar-0.4.41/LICENSE-MIT new file mode 100644 index 0000000..39e0ed6 --- /dev/null +++ b/tar-0.4.41/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 Alex Crichton + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/tar-0.4.41/README.md b/tar-0.4.41/README.md new file mode 100644 index 0000000..25291e8 --- /dev/null +++ b/tar-0.4.41/README.md @@ -0,0 +1,76 @@ +# tar-rs + +[Documentation](https://docs.rs/tar) + +A tar archive reading/writing library for Rust. + +```toml +# Cargo.toml +[dependencies] +tar = "0.4" +``` + +## Reading an archive + +```rust,no_run +extern crate tar; + +use std::io::prelude::*; +use std::fs::File; +use tar::Archive; + +fn main() { + let file = File::open("foo.tar").unwrap(); + let mut a = Archive::new(file); + + for file in a.entries().unwrap() { + // Make sure there wasn't an I/O error + let mut file = file.unwrap(); + + // Inspect metadata about the file + println!("{:?}", file.header().path().unwrap()); + println!("{}", file.header().size().unwrap()); + + // files implement the Read trait + let mut s = String::new(); + file.read_to_string(&mut s).unwrap(); + println!("{}", s); + } +} + +``` + +## Writing an archive + +```rust,no_run +extern crate tar; + +use std::io::prelude::*; +use std::fs::File; +use tar::Builder; + +fn main() { + let file = File::create("foo.tar").unwrap(); + let mut a = Builder::new(file); + + a.append_path("file1.txt").unwrap(); + a.append_file("file2.txt", &mut File::open("file3.txt").unwrap()).unwrap(); +} +``` + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this project by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/tar-0.4.41/examples/extract_file.rs b/tar-0.4.41/examples/extract_file.rs new file mode 100644 index 0000000..425e9a3 --- /dev/null +++ b/tar-0.4.41/examples/extract_file.rs @@ -0,0 +1,25 @@ +//! An example of extracting a file in an archive. +//! +//! Takes a tarball on standard input, looks for an entry with a listed file +//! name as the first argument provided, and then prints the contents of that +//! file to stdout. + +extern crate tar; + +use std::env::args_os; +use std::io::{copy, stdin, stdout}; +use std::path::Path; + +use tar::Archive; + +fn main() { + let first_arg = args_os().skip(1).next().unwrap(); + let filename = Path::new(&first_arg); + let mut ar = Archive::new(stdin()); + for file in ar.entries().unwrap() { + let mut f = file.unwrap(); + if f.path().unwrap() == filename { + copy(&mut f, &mut stdout()).unwrap(); + } + } +} diff --git a/tar-0.4.41/examples/list.rs b/tar-0.4.41/examples/list.rs new file mode 100644 index 0000000..e2d58da --- /dev/null +++ b/tar-0.4.41/examples/list.rs @@ -0,0 +1,17 @@ +//! An example of listing the file names of entries in an archive. +//! +//! Takes a tarball on stdin and prints out all of the entries inside. + +extern crate tar; + +use std::io::stdin; + +use tar::Archive; + +fn main() { + let mut ar = Archive::new(stdin()); + for file in ar.entries().unwrap() { + let f = file.unwrap(); + println!("{}", f.path().unwrap().display()); + } +} diff --git a/tar-0.4.41/examples/raw_list.rs b/tar-0.4.41/examples/raw_list.rs new file mode 100644 index 0000000..9b86da2 --- /dev/null +++ b/tar-0.4.41/examples/raw_list.rs @@ -0,0 +1,48 @@ +//! An example of listing raw entries in an archive. +//! +//! Takes a tarball on stdin and prints out all of the entries inside. + +extern crate tar; + +use std::io::stdin; + +use tar::Archive; + +fn main() { + let mut ar = Archive::new(stdin()); + for (i, file) in ar.entries().unwrap().raw(true).enumerate() { + println!("-------------------------- Entry {}", i); + let mut f = file.unwrap(); + println!("path: {}", f.path().unwrap().display()); + println!("size: {}", f.header().size().unwrap()); + println!("entry size: {}", f.header().entry_size().unwrap()); + println!("link name: {:?}", f.link_name().unwrap()); + println!("file type: {:#x}", f.header().entry_type().as_byte()); + println!("mode: {:#o}", f.header().mode().unwrap()); + println!("uid: {}", f.header().uid().unwrap()); + println!("gid: {}", f.header().gid().unwrap()); + println!("mtime: {}", f.header().mtime().unwrap()); + println!("username: {:?}", f.header().username().unwrap()); + println!("groupname: {:?}", f.header().groupname().unwrap()); + + if f.header().as_ustar().is_some() { + println!("kind: UStar"); + } else if f.header().as_gnu().is_some() { + println!("kind: GNU"); + } else { + println!("kind: normal"); + } + + if let Ok(Some(extensions)) = f.pax_extensions() { + println!("pax extensions:"); + for e in extensions { + let e = e.unwrap(); + println!( + "\t{:?} = {:?}", + String::from_utf8_lossy(e.key_bytes()), + String::from_utf8_lossy(e.value_bytes()) + ); + } + } + } +} diff --git a/tar-0.4.41/examples/write.rs b/tar-0.4.41/examples/write.rs new file mode 100644 index 0000000..167263d --- /dev/null +++ b/tar-0.4.41/examples/write.rs @@ -0,0 +1,13 @@ +extern crate tar; + +use std::fs::File; +use tar::Builder; + +fn main() { + let file = File::create("foo.tar").unwrap(); + let mut a = Builder::new(file); + + a.append_path("README.md").unwrap(); + a.append_file("lib.rs", &mut File::open("src/lib.rs").unwrap()) + .unwrap(); +} diff --git a/tar-0.4.41/src/archive.rs b/tar-0.4.41/src/archive.rs new file mode 100644 index 0000000..0bf677c --- /dev/null +++ b/tar-0.4.41/src/archive.rs @@ -0,0 +1,559 @@ +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::convert::TryFrom; +use std::fs; +use std::io::prelude::*; +use std::io::{self, SeekFrom}; +use std::marker; +use std::path::Path; + +use crate::entry::{EntryFields, EntryIo}; +use crate::error::TarError; +use crate::other; +use crate::pax::*; +use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; + +/// A top-level representation of an archive file. +/// +/// This archive can have an entry added to it and it can be iterated over. +pub struct Archive { + inner: ArchiveInner, +} + +pub struct ArchiveInner { + pos: Cell, + mask: u32, + unpack_xattrs: bool, + preserve_permissions: bool, + preserve_ownerships: bool, + preserve_mtime: bool, + overwrite: bool, + ignore_zeros: bool, + obj: RefCell, +} + +/// An iterator over the entries of an archive. +pub struct Entries<'a, R: 'a + Read> { + fields: EntriesFields<'a>, + _ignored: marker::PhantomData<&'a Archive>, +} + +trait SeekRead: Read + Seek {} +impl SeekRead for R {} + +struct EntriesFields<'a> { + archive: &'a Archive, + seekable_archive: Option<&'a Archive>, + next: u64, + done: bool, + raw: bool, +} + +impl Archive { + /// Create a new archive with the underlying object as the reader. + pub fn new(obj: R) -> Archive { + Archive { + inner: ArchiveInner { + mask: u32::MIN, + unpack_xattrs: false, + preserve_permissions: false, + preserve_ownerships: false, + preserve_mtime: true, + overwrite: true, + ignore_zeros: false, + obj: RefCell::new(obj), + pos: Cell::new(0), + }, + } + } + + /// Unwrap this archive, returning the underlying object. + pub fn into_inner(self) -> R { + self.inner.obj.into_inner() + } + + /// Construct an iterator over the entries in this archive. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries(&mut self) -> io::Result> { + let me: &mut Archive = self; + me._entries(None).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } + + /// Set the mask of the permission bits when unpacking this entry. + /// + /// The mask will be inverted when applying against a mode, similar to how + /// `umask` works on Unix. In logical notation it looks like: + /// + /// ```text + /// new_mode = old_mode & (~mask) + /// ``` + /// + /// The mask is 0 by default and is currently only implemented on Unix. + pub fn set_mask(&mut self, mask: u32) { + self.inner.mask = mask; + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this archive. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { + self.inner.unpack_xattrs = unpack_xattrs; + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.inner.preserve_permissions = preserve; + } + + /// Indicate whether numeric ownership ids (like uid and gid on Unix) + /// are preserved when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_ownerships(&mut self, preserve: bool) { + self.inner.preserve_ownerships = preserve; + } + + /// Indicate whether files and symlinks should be overwritten on extraction. + pub fn set_overwrite(&mut self, overwrite: bool) { + self.inner.overwrite = overwrite; + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.inner.preserve_mtime = preserve; + } + + /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more + /// entries. + /// + /// This can be used in case multiple tar archives have been concatenated together. + pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) { + self.inner.ignore_zeros = ignore_zeros; + } +} + +impl Archive { + /// Construct an iterator over the entries in this archive for a seekable + /// reader. Seek will be used to efficiently skip over file contents. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_with_seek(&mut self) -> io::Result> { + let me: &Archive = self; + let me_seekable: &Archive = self; + me._entries(Some(me_seekable)).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } +} + +impl Archive { + fn _entries<'a>( + &'a self, + seekable_archive: Option<&'a Archive>, + ) -> io::Result> { + if self.inner.pos.get() != 0 { + return Err(other( + "cannot call entries unless archive is at \ + position 0", + )); + } + Ok(EntriesFields { + archive: self, + seekable_archive, + done: false, + next: 0, + raw: false, + }) + } +} + +impl<'a, R: Read> Entries<'a, R> { + /// Indicates whether this iterator will return raw entries or not. + /// + /// If the raw list of entries are returned, then no preprocessing happens + /// on account of this library, for example taking into account GNU long name + /// or long link archive members. Raw iteration is disabled by default. + pub fn raw(self, raw: bool) -> Entries<'a, R> { + Entries { + fields: EntriesFields { + raw: raw, + ..self.fields + }, + _ignored: marker::PhantomData, + } + } +} +impl<'a, R: Read> Iterator for Entries<'a, R> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + self.fields + .next() + .map(|result| result.map(|e| EntryFields::from(e).into_entry())) + } +} + +impl<'a> EntriesFields<'a> { + fn next_entry_raw( + &mut self, + pax_extensions: Option<&[u8]>, + ) -> io::Result>> { + let mut header = Header::new_old(); + let mut header_pos = self.next; + loop { + // Seek to the start of the next header in the archive + let delta = self.next - self.archive.inner.pos.get(); + self.skip(delta)?; + + // EOF is an indicator that we are at the end of the archive. + if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { + return Ok(None); + } + + // If a header is not all zeros, we have another valid header. + // Otherwise, check if we are ignoring zeros and continue, or break as if this is the + // end of the archive. + if !header.as_bytes().iter().all(|i| *i == 0) { + self.next += 512; + break; + } + + if !self.archive.inner.ignore_zeros { + return Ok(None); + } + self.next += 512; + header_pos = self.next; + } + + // Make sure the checksum is ok + let sum = header.as_bytes()[..148] + .iter() + .chain(&header.as_bytes()[156..]) + .fold(0, |a, b| a + (*b as u32)) + + 8 * 32; + let cksum = header.cksum()?; + if sum != cksum { + return Err(other("archive header checksum mismatch")); + } + + let mut pax_size: Option = None; + if let Some(pax_extensions_ref) = &pax_extensions { + pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE); + + if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) { + header.set_uid(pax_uid); + } + + if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) { + header.set_gid(pax_gid); + } + } + + let file_pos = self.next; + let mut size = header.entry_size()?; + if size == 0 { + if let Some(pax_size) = pax_size { + size = pax_size; + } + } + let ret = EntryFields { + size: size, + header_pos: header_pos, + file_pos: file_pos, + data: vec![EntryIo::Data((&self.archive.inner).take(size))], + header: header, + long_pathname: None, + long_linkname: None, + pax_extensions: None, + mask: self.archive.inner.mask, + unpack_xattrs: self.archive.inner.unpack_xattrs, + preserve_permissions: self.archive.inner.preserve_permissions, + preserve_mtime: self.archive.inner.preserve_mtime, + overwrite: self.archive.inner.overwrite, + preserve_ownerships: self.archive.inner.preserve_ownerships, + }; + + // Store where the next entry is, rounding up by 512 bytes (the size of + // a header); + let size = size + .checked_add(511) + .ok_or_else(|| other("size overflow"))?; + self.next = self + .next + .checked_add(size & !(512 - 1)) + .ok_or_else(|| other("size overflow"))?; + + Ok(Some(ret.into_entry())) + } + + fn next_entry(&mut self) -> io::Result>> { + if self.raw { + return self.next_entry_raw(None); + } + + let mut gnu_longname = None; + let mut gnu_longlink = None; + let mut pax_extensions = None; + let mut processed = 0; + loop { + processed += 1; + let entry = match self.next_entry_raw(pax_extensions.as_deref())? { + Some(entry) => entry, + None if processed > 1 => { + return Err(other( + "members found describing a future member \ + but no future member found", + )); + } + None => return Ok(None), + }; + + let is_recognized_header = + entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); + + if is_recognized_header && entry.header().entry_type().is_gnu_longname() { + if gnu_longname.is_some() { + return Err(other( + "two long name entries describing \ + the same member", + )); + } + gnu_longname = Some(EntryFields::from(entry).read_all()?); + continue; + } + + if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { + if gnu_longlink.is_some() { + return Err(other( + "two long name entries describing \ + the same member", + )); + } + gnu_longlink = Some(EntryFields::from(entry).read_all()?); + continue; + } + + if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { + if pax_extensions.is_some() { + return Err(other( + "two pax extensions entries describing \ + the same member", + )); + } + pax_extensions = Some(EntryFields::from(entry).read_all()?); + continue; + } + + let mut fields = EntryFields::from(entry); + fields.long_pathname = gnu_longname; + fields.long_linkname = gnu_longlink; + fields.pax_extensions = pax_extensions; + self.parse_sparse_header(&mut fields)?; + return Ok(Some(fields.into_entry())); + } + } + + fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { + if !entry.header.entry_type().is_gnu_sparse() { + return Ok(()); + } + let gnu = match entry.header.as_gnu() { + Some(gnu) => gnu, + None => return Err(other("sparse entry type listed but not GNU header")), + }; + + // Sparse files are represented internally as a list of blocks that are + // read. Blocks are either a bunch of 0's or they're data from the + // underlying archive. + // + // Blocks of a sparse file are described by the `GnuSparseHeader` + // structure, some of which are contained in `GnuHeader` but some of + // which may also be contained after the first header in further + // headers. + // + // We read off all the blocks here and use the `add_block` function to + // incrementally add them to the list of I/O block (in `entry.data`). + // The `add_block` function also validates that each chunk comes after + // the previous, we don't overrun the end of the file, and each block is + // aligned to a 512-byte boundary in the archive itself. + // + // At the end we verify that the sparse file size (`Header::size`) is + // the same as the current offset (described by the list of blocks) as + // well as the amount of data read equals the size of the entry + // (`Header::entry_size`). + entry.data.truncate(0); + + let mut cur = 0; + let mut remaining = entry.size; + { + let data = &mut entry.data; + let reader = &self.archive.inner; + let size = entry.size; + let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { + if block.is_empty() { + return Ok(()); + } + let off = block.offset()?; + let len = block.length()?; + if len != 0 && (size - remaining) % 512 != 0 { + return Err(other( + "previous block in sparse file was not \ + aligned to 512-byte boundary", + )); + } else if off < cur { + return Err(other( + "out of order or overlapping sparse \ + blocks", + )); + } else if cur < off { + let block = io::repeat(0).take(off - cur); + data.push(EntryIo::Pad(block)); + } + cur = off + .checked_add(len) + .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?; + remaining = remaining.checked_sub(len).ok_or_else(|| { + other( + "sparse file consumed more data than the header \ + listed", + ) + })?; + data.push(EntryIo::Data(reader.take(len))); + Ok(()) + }; + for block in gnu.sparse.iter() { + add_block(block)? + } + if gnu.is_extended() { + let mut ext = GnuExtSparseHeader::new(); + ext.isextended[0] = 1; + while ext.is_extended() { + if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { + return Err(other("failed to read extension")); + } + + self.next += 512; + for block in ext.sparse.iter() { + add_block(block)?; + } + } + } + } + if cur != gnu.real_size()? { + return Err(other( + "mismatch in sparse file chunks and \ + size in header", + )); + } + entry.size = cur; + if remaining > 0 { + return Err(other( + "mismatch in sparse file chunks and \ + entry size in header", + )); + } + Ok(()) + } + + fn skip(&mut self, mut amt: u64) -> io::Result<()> { + if let Some(seekable_archive) = self.seekable_archive { + let pos = io::SeekFrom::Current( + i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?, + ); + (&seekable_archive.inner).seek(pos)?; + } else { + let mut buf = [0u8; 4096 * 8]; + while amt > 0 { + let n = cmp::min(amt, buf.len() as u64); + let n = (&self.archive.inner).read(&mut buf[..n as usize])?; + if n == 0 { + return Err(other("unexpected EOF during skip")); + } + amt -= n as u64; + } + } + Ok(()) + } +} + +impl<'a> Iterator for EntriesFields<'a> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + if self.done { + None + } else { + match self.next_entry() { + Ok(Some(e)) => Some(Ok(e)), + Ok(None) => { + self.done = true; + None + } + Err(e) => { + self.done = true; + Some(Err(e)) + } + } + } + } +} + +impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner { + fn read(&mut self, into: &mut [u8]) -> io::Result { + let i = self.obj.borrow_mut().read(into)?; + self.pos.set(self.pos.get() + i as u64); + Ok(i) + } +} + +impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + let pos = self.obj.borrow_mut().seek(pos)?; + self.pos.set(pos); + Ok(pos) + } +} + +/// Try to fill the buffer from the reader. +/// +/// If the reader reaches its end before filling the buffer at all, returns `false`. +/// Otherwise returns `true`. +fn try_read_all(r: &mut R, buf: &mut [u8]) -> io::Result { + let mut read = 0; + while read < buf.len() { + match r.read(&mut buf[read..])? { + 0 => { + if read == 0 { + return Ok(false); + } + + return Err(other("failed to read entire block")); + } + n => read += n, + } + } + Ok(true) +} diff --git a/tar-0.4.41/src/builder.rs b/tar-0.4.41/src/builder.rs new file mode 100644 index 0000000..eaae556 --- /dev/null +++ b/tar-0.4.41/src/builder.rs @@ -0,0 +1,663 @@ +use std::fs; +use std::io; +use std::io::prelude::*; +use std::path::Path; +use std::str; + +use crate::header::{path2bytes, HeaderMode}; +use crate::{other, EntryType, Header}; + +/// A structure for building archives +/// +/// This structure has methods for building up an archive from scratch into any +/// arbitrary writer. +pub struct Builder { + mode: HeaderMode, + follow: bool, + finished: bool, + obj: Option, +} + +impl Builder { + /// Create a new archive builder with the underlying object as the + /// destination of all data written. The builder will use + /// `HeaderMode::Complete` by default. + pub fn new(obj: W) -> Builder { + Builder { + mode: HeaderMode::Complete, + follow: true, + finished: false, + obj: Some(obj), + } + } + + /// Changes the HeaderMode that will be used when reading fs Metadata for + /// methods that implicitly read metadata for an input Path. Notably, this + /// does _not_ apply to `append(Header)`. + pub fn mode(&mut self, mode: HeaderMode) { + self.mode = mode; + } + + /// Follow symlinks, archiving the contents of the file they point to rather + /// than adding a symlink to the archive. Defaults to true. + pub fn follow_symlinks(&mut self, follow: bool) { + self.follow = follow; + } + + /// Gets shared reference to the underlying object. + pub fn get_ref(&self) -> &W { + self.obj.as_ref().unwrap() + } + + /// Gets mutable reference to the underlying object. + /// + /// Note that care must be taken while writing to the underlying + /// object. But, e.g. `get_mut().flush()` is claimed to be safe and + /// useful in the situations when one needs to be ensured that + /// tar entry was flushed to the disk. + pub fn get_mut(&mut self) -> &mut W { + self.obj.as_mut().unwrap() + } + + /// Unwrap this archive, returning the underlying object. + /// + /// This function will finish writing the archive if the `finish` function + /// hasn't yet been called, returning any I/O error which happens during + /// that operation. + pub fn into_inner(mut self) -> io::Result { + if !self.finished { + self.finish()?; + } + Ok(self.obj.take().unwrap()) + } + + /// Adds a new entry to this archive. + /// + /// This function will append the header specified, followed by contents of + /// the stream specified by `data`. To produce a valid archive the `size` + /// field of `header` must be the same as the length of the stream that's + /// being written. Additionally the checksum for the header should have been + /// set via the `set_cksum` method. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// use tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_path("foo").unwrap(); + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append(&header, data).unwrap(); + /// let data = ar.into_inner().unwrap(); + /// ``` + pub fn append(&mut self, header: &Header, mut data: R) -> io::Result<()> { + append(self.get_mut(), header, &mut data) + } + + /// Adds a new entry to this archive with the specified path. + /// + /// This function will set the specified path in the given header, which may + /// require appending a GNU long-name extension entry to the archive first. + /// The checksum for the header will be automatically updated via the + /// `set_cksum` method after setting the path. No other metadata in the + /// header will be modified. + /// + /// Then it will append the header, followed by contents of the stream + /// specified by `data`. To produce a valid archive the `size` field of + /// `header` must be the same as the length of the stream that's being + /// written. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// use tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap(); + /// let data = ar.into_inner().unwrap(); + /// ``` + pub fn append_data, R: Read>( + &mut self, + header: &mut Header, + path: P, + data: R, + ) -> io::Result<()> { + prepare_header_path(self.get_mut(), header, path.as_ref())?; + header.set_cksum(); + self.append(&header, data) + } + + /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target. + /// + /// This function is similar to [`Self::append_data`] which supports long filenames, + /// but also supports long link targets using GNU extensions if necessary. + /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`]. + /// The `set_cksum` method will be invoked after setting the path. No other metadata in the + /// header will be modified. + /// + /// If you are intending to use GNU extensions, you must use this method over calling + /// [`Header::set_link_name`] because that function will fail on long links. + /// + /// Similar constraints around the position of the archive and completion + /// apply as with [`Self::append_data`]. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// use tar::{Builder, Header, EntryType}; + /// + /// let mut ar = Builder::new(Vec::new()); + /// let mut header = Header::new_gnu(); + /// header.set_username("foo"); + /// header.set_entry_type(EntryType::Symlink); + /// header.set_size(0); + /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap(); + /// let data = ar.into_inner().unwrap(); + /// ``` + pub fn append_link, T: AsRef>( + &mut self, + header: &mut Header, + path: P, + target: T, + ) -> io::Result<()> { + self._append_link(header, path.as_ref(), target.as_ref()) + } + + fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> { + prepare_header_path(self.get_mut(), header, path)?; + prepare_header_link(self.get_mut(), header, target)?; + header.set_cksum(); + self.append(&header, std::io::empty()) + } + + // /// Adds a file on the local filesystem to this archive. + // /// + // /// This function will open the file specified by `path` and insert the file + // /// into the archive with the appropriate metadata set, returning any I/O + // /// error which occurs while writing. The path name for the file inside of + // /// this archive will be the same as `path`, and it is required that the + // /// path is a relative path. + // /// + // /// Note that this will not attempt to seek the archive to a valid position, + // /// so if the archive is in the middle of a read or some other similar + // /// operation then this may corrupt the archive. + // /// + // /// Also note that after all files have been written to an archive the + // /// `finish` function needs to be called to finish writing the archive. + // /// + // /// # Examples + // /// + // /// ```no_run + // /// use tar::Builder; + // /// + // /// let mut ar = Builder::new(Vec::new()); + // /// + // /// ar.append_path("foo/bar.txt").unwrap(); + // /// ``` + // pub fn append_path>(&mut self, path: P) -> io::Result<()> { + // let mode = self.mode.clone(); + // let follow = self.follow; + // append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow) + // } + // + // /// Adds a file on the local filesystem to this archive under another name. + // /// + // /// This function will open the file specified by `path` and insert the file + // /// into the archive as `name` with appropriate metadata set, returning any + // /// I/O error which occurs while writing. The path name for the file inside + // /// of this archive will be `name` is required to be a relative path. + // /// + // /// Note that this will not attempt to seek the archive to a valid position, + // /// so if the archive is in the middle of a read or some other similar + // /// operation then this may corrupt the archive. + // /// + // /// Note if the `path` is a directory. This will just add an entry to the archive, + // /// rather than contents of the directory. + // /// + // /// Also note that after all files have been written to an archive the + // /// `finish` function needs to be called to finish writing the archive. + // /// + // /// # Examples + // /// + // /// ```no_run + // /// use tar::Builder; + // /// + // /// let mut ar = Builder::new(Vec::new()); + // /// + // /// // Insert the local file "foo/bar.txt" in the archive but with the name + // /// // "bar/foo.txt". + // /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap(); + // /// ``` + // pub fn append_path_with_name, N: AsRef>( + // &mut self, + // path: P, + // name: N, + // ) -> io::Result<()> { + // let mode = self.mode.clone(); + // let follow = self.follow; + // append_path_with_name( + // self.get_mut(), + // path.as_ref(), + // Some(name.as_ref()), + // mode, + // follow, + // ) + // } + // + // /// Adds a file to this archive with the given path as the name of the file + // /// in the archive. + // /// + // /// This will use the metadata of `file` to populate a `Header`, and it will + // /// then append the file to the archive with the name `path`. + // /// + // /// Note that this will not attempt to seek the archive to a valid position, + // /// so if the archive is in the middle of a read or some other similar + // /// operation then this may corrupt the archive. + // /// + // /// Also note that after all files have been written to an archive the + // /// `finish` function needs to be called to finish writing the archive. + // /// + // /// # Examples + // /// + // /// ```no_run + // /// use std::fs::File; + // /// use tar::Builder; + // /// + // /// let mut ar = Builder::new(Vec::new()); + // /// + // /// // Open the file at one location, but insert it into the archive with a + // /// // different name. + // /// let mut f = File::open("foo/bar/baz.txt").unwrap(); + // /// ar.append_file("bar/baz.txt", &mut f).unwrap(); + // /// ``` + // pub fn append_file>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> { + // let mode = self.mode.clone(); + // append_file(self.get_mut(), path.as_ref(), file, mode) + // } + + // /// Adds a directory to this archive with the given path as the name of the + // /// directory in the archive. + // /// + // /// This will use `stat` to populate a `Header`, and it will then append the + // /// directory to the archive with the name `path`. + // /// + // /// Note that this will not attempt to seek the archive to a valid position, + // /// so if the archive is in the middle of a read or some other similar + // /// operation then this may corrupt the archive. + // /// + // /// Note this will not add the contents of the directory to the archive. + // /// See `append_dir_all` for recusively adding the contents of the directory. + // /// + // /// Also note that after all files have been written to an archive the + // /// `finish` function needs to be called to finish writing the archive. + // /// + // /// # Examples + // /// + // /// ``` + // /// use std::fs; + // /// use tar::Builder; + // /// + // /// let mut ar = Builder::new(Vec::new()); + // /// + // /// // Use the directory at one location, but insert it into the archive + // /// // with a different name. + // /// ar.append_dir("bardir", ".").unwrap(); + // /// ``` + // pub fn append_dir(&mut self, path: P, src_path: Q) -> io::Result<()> + // where + // P: AsRef, + // Q: AsRef, + // { + // let mode = self.mode.clone(); + // append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode) + // } + + // /// Adds a directory and all of its contents (recursively) to this archive + // /// with the given path as the name of the directory in the archive. + // /// + // /// Note that this will not attempt to seek the archive to a valid position, + // /// so if the archive is in the middle of a read or some other similar + // /// operation then this may corrupt the archive. + // /// + // /// Also note that after all files have been written to an archive the + // /// `finish` function needs to be called to finish writing the archive. + // /// + // /// # Examples + // /// + // /// ``` + // /// use std::fs; + // /// use tar::Builder; + // /// + // /// let mut ar = Builder::new(Vec::new()); + // /// + // /// // Use the directory at one location, but insert it into the archive + // /// // with a different name. + // /// ar.append_dir_all("bardir", ".").unwrap(); + // /// ``` + // pub fn append_dir_all(&mut self, path: P, src_path: Q) -> io::Result<()> + // where + // P: AsRef, + // Q: AsRef, + // { + // let mode = self.mode.clone(); + // let follow = self.follow; + // append_dir_all( + // self.get_mut(), + // path.as_ref(), + // src_path.as_ref(), + // mode, + // follow, + // ) + // } + + /// Finish writing this archive, emitting the termination sections. + /// + /// This function should only be called when the archive has been written + /// entirely and if an I/O error happens the underlying object still needs + /// to be acquired. + /// + /// In most situations the `into_inner` method should be preferred. + pub fn finish(&mut self) -> io::Result<()> { + if self.finished { + return Ok(()); + } + self.finished = true; + self.get_mut().write_all(&[0; 1024]) + } +} + +fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> { + dst.write_all(header.as_bytes())?; + let len = io::copy(&mut data, &mut dst)?; + + // Pad with zeros if necessary. + let buf = [0; 512]; + let remaining = 512 - (len % 512); + if remaining < 512 { + dst.write_all(&buf[..remaining as usize])?; + } + + Ok(()) +} + +// fn append_path_with_name( +// dst: &mut dyn Write, +// path: &Path, +// name: Option<&Path>, +// mode: HeaderMode, +// follow: bool, +// ) -> io::Result<()> { +// let stat = if follow { +// fs::metadata(path).map_err(|err| { +// io::Error::new( +// err.kind(), +// format!("{} when getting metadata for {}", err, path.display()), +// ) +// })? +// } else { +// fs::symlink_metadata(path).map_err(|err| { +// io::Error::new( +// err.kind(), +// format!("{} when getting metadata for {}", err, path.display()), +// ) +// })? +// }; +// let ar_name = name.unwrap_or(path); +// if stat.is_file() { +// append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None) +// } else if stat.is_dir() { +// append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None) +// } else if stat.file_type().is_symlink() { +// let link_name = fs::read_link(path)?; +// append_fs( +// dst, +// ar_name, +// &stat, +// &mut io::empty(), +// mode, +// Some(&link_name), +// ) +// } else { +// #[cfg(unix)] +// { +// append_special(dst, path, &stat, mode) +// } +// #[cfg(not(unix))] +// { +// Err(other(&format!("{} has unknown file type", path.display()))) +// } +// } +// } + +#[cfg(unix)] +fn append_special( + dst: &mut dyn Write, + path: &Path, + stat: &fs::Metadata, + mode: HeaderMode, +) -> io::Result<()> { + use ::std::os::unix::fs::{FileTypeExt, MetadataExt}; + + let file_type = stat.file_type(); + let entry_type; + if file_type.is_socket() { + // sockets can't be archived + return Err(other(&format!( + "{}: socket can not be archived", + path.display() + ))); + } else if file_type.is_fifo() { + entry_type = EntryType::Fifo; + } else if file_type.is_char_device() { + entry_type = EntryType::Char; + } else if file_type.is_block_device() { + entry_type = EntryType::Block; + } else { + return Err(other(&format!("{} has unknown file type", path.display()))); + } + + let mut header = Header::new_gnu(); + header.set_metadata_in_mode(stat, mode); + prepare_header_path(dst, &mut header, path)?; + + header.set_entry_type(entry_type); + let dev_id = stat.rdev(); + let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff); + let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff); + header.set_device_major(dev_major as u32)?; + header.set_device_minor(dev_minor as u32)?; + + header.set_cksum(); + dst.write_all(header.as_bytes())?; + + Ok(()) +} + +// fn append_file( +// dst: &mut dyn Write, +// path: &Path, +// file: &mut fs::File, +// mode: HeaderMode, +// ) -> io::Result<()> { +// let stat = file.metadata()?; +// append_fs(dst, path, &stat, file, mode, None) +// } +// +// fn append_dir( +// dst: &mut dyn Write, +// path: &Path, +// src_path: &Path, +// mode: HeaderMode, +// ) -> io::Result<()> { +// let stat = fs::metadata(src_path)?; +// append_fs(dst, path, &stat, &mut io::empty(), mode, None) +// } + +fn prepare_header(size: u64, entry_type: u8) -> Header { + let mut header = Header::new_gnu(); + let name = b"././@LongLink"; + header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]); + header.set_mode(0o644); + header.set_uid(0); + header.set_gid(0); + header.set_mtime(0); + // + 1 to be compliant with GNU tar + header.set_size(size + 1); + header.set_entry_type(EntryType::new(entry_type)); + header.set_cksum(); + header +} + +fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> { + // Try to encode the path directly in the header, but if it ends up not + // working (probably because it's too long) then try to use the GNU-specific + // long name extension by emitting an entry which indicates that it's the + // filename. + if let Err(e) = header.set_path(path) { + let data = path2bytes(&path)?; + let max = header.as_old().name.len(); + // Since `e` isn't specific enough to let us know the path is indeed too + // long, verify it first before using the extension. + if data.len() < max { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, b'L'); + // null-terminated string + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2)?; + + // Truncate the path to store in the header we're about to emit to + // ensure we've got something at least mentioned. Note that we use + // `str`-encoding to be compatible with Windows, but in general the + // entry in the header itself shouldn't matter too much since extraction + // doesn't look at it. + let truncated = match str::from_utf8(&data[..max]) { + Ok(s) => s, + Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(), + }; + header.set_path(truncated)?; + } + Ok(()) +} + +fn prepare_header_link( + dst: &mut dyn Write, + header: &mut Header, + link_name: &Path, +) -> io::Result<()> { + // Same as previous function but for linkname + if let Err(e) = header.set_link_name(&link_name) { + let data = path2bytes(&link_name)?; + if data.len() < header.as_old().linkname.len() { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, b'K'); + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2)?; + } + Ok(()) +} + +// fn append_fs( +// dst: &mut dyn Write, +// path: &Path, +// meta: &fs::Metadata, +// read: &mut dyn Read, +// mode: HeaderMode, +// link_name: Option<&Path>, +// ) -> io::Result<()> { +// let mut header = Header::new_gnu(); +// +// prepare_header_path(dst, &mut header, path)?; +// header.set_metadata_in_mode(meta, mode); +// if let Some(link_name) = link_name { +// prepare_header_link(dst, &mut header, link_name)?; +// } +// header.set_cksum(); +// append(dst, &header, read) +// } + +// fn append_dir_all( +// dst: &mut dyn Write, +// path: &Path, +// src_path: &Path, +// mode: HeaderMode, +// follow: bool, +// ) -> io::Result<()> { +// let mut stack = vec![(src_path.to_path_buf(), true, false)]; +// while let Some((src, is_dir, is_symlink)) = stack.pop() { +// let dest = path.join(src.strip_prefix(&src_path).unwrap()); +// // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true +// if is_dir || (is_symlink && follow && src.is_dir()) { +// for entry in fs::read_dir(&src)? { +// let entry = entry?; +// let file_type = entry.file_type()?; +// stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink())); +// } +// if dest != Path::new("") { +// append_dir(dst, &dest, &src, mode)?; +// } +// } else if !follow && is_symlink { +// let stat = fs::symlink_metadata(&src)?; +// let link_name = fs::read_link(&src)?; +// append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?; +// } else { +// #[cfg(unix)] +// { +// let stat = fs::metadata(&src)?; +// if !stat.is_file() { +// append_special(dst, &dest, &stat, mode)?; +// continue; +// } +// } +// append_file(dst, &dest, &mut fs::File::open(src)?, mode)?; +// } +// } +// Ok(()) +// } + +impl Drop for Builder { + fn drop(&mut self) { + let _ = self.finish(); + } +} diff --git a/tar-0.4.41/src/entry.rs b/tar-0.4.41/src/entry.rs new file mode 100644 index 0000000..8eacb4d --- /dev/null +++ b/tar-0.4.41/src/entry.rs @@ -0,0 +1,388 @@ +use std::borrow::Cow; +use std::cmp; +use std::fs; +use std::fs::OpenOptions; +use std::io::prelude::*; +use std::io::{self, Error, ErrorKind, SeekFrom}; +use std::marker; +use std::path::{Component, Path, PathBuf}; + +use crate::archive::ArchiveInner; +use crate::error::TarError; +use crate::header::bytes2path; +use crate::other; +use crate::{Archive, Header, PaxExtensions}; + +/// A read-only view into an entry of an archive. +/// +/// This structure is a window into a portion of a borrowed archive which can +/// be inspected. It acts as a file handle by implementing the Reader trait. An +/// entry cannot be rewritten once inserted into an archive. +pub struct Entry<'a, R: 'a + Read> { + fields: EntryFields<'a>, + _ignored: marker::PhantomData<&'a Archive>, +} + +// private implementation detail of `Entry`, but concrete (no type parameters) +// and also all-public to be constructed from other modules. +pub struct EntryFields<'a> { + pub long_pathname: Option>, + pub long_linkname: Option>, + pub pax_extensions: Option>, + pub mask: u32, + pub header: Header, + pub size: u64, + pub header_pos: u64, + pub file_pos: u64, + pub data: Vec>, + pub unpack_xattrs: bool, + pub preserve_permissions: bool, + pub preserve_ownerships: bool, + pub preserve_mtime: bool, + pub overwrite: bool, +} + +pub enum EntryIo<'a> { + Pad(io::Take), + Data(io::Take<&'a ArchiveInner>), +} + +/// When unpacking items the unpacked thing is returned to allow custom +/// additional handling by users. Today the File is returned, in future +/// the enum may be extended with kinds for links, directories etc. +#[derive(Debug)] +pub enum Unpacked { + /// A file was unpacked. + File(std::fs::File), + /// A directory, hardlink, symlink, or other node was unpacked. + #[doc(hidden)] + __Nonexhaustive, +} + +impl<'a, R: Read> Entry<'a, R> { + /// Returns the path name for this entry. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path()` as some archive formats have support for longer + /// path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn path(&self) -> io::Result> { + self.fields.path() + } + + /// Returns the raw bytes listed for this entry. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn path_bytes(&self) -> Cow<[u8]> { + self.fields.path_bytes() + } + + /// Returns the link name for this entry, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().link_name()` as some archive formats have support for + /// longer path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn link_name(&self) -> io::Result>> { + self.fields.link_name() + } + + /// Returns the link name for this entry, in bytes, if listed. + /// + /// Note that this will not always return the same value as + /// `self.header().link_name_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn link_name_bytes(&self) -> Option> { + self.fields.link_name_bytes() + } + + /// Returns an iterator over the pax extensions contained in this entry. + /// + /// Pax extensions are a form of archive where extra metadata is stored in + /// key/value pairs in entries before the entry they're intended to + /// describe. For example this can be used to describe long file name or + /// other metadata like atime/ctime/mtime in more precision. + /// + /// The returned iterator will yield key/value pairs for each extension. + /// + /// `None` will be returned if this entry does not indicate that it itself + /// contains extensions, or if there were no previous extensions describing + /// it. + /// + /// Note that global pax extensions are intended to be applied to all + /// archive entries. + /// + /// Also note that this function will read the entire entry if the entry + /// itself is a list of extensions. + pub fn pax_extensions(&mut self) -> io::Result> { + self.fields.pax_extensions() + } + + /// Returns access to the header of this entry in the archive. + /// + /// This provides access to the metadata for this entry in the archive. + pub fn header(&self) -> &Header { + &self.fields.header + } + + /// Returns access to the size of this entry in the archive. + /// + /// In the event the size is stored in a pax extension, that size value + /// will be referenced. Otherwise, the entry size will be stored in the header. + pub fn size(&self) -> u64 { + self.fields.size + } + + /// Returns the starting position, in bytes, of the header of this entry in + /// the archive. + /// + /// The header is always a contiguous section of 512 bytes, so if the + /// underlying reader implements `Seek`, then the slice from `header_pos` to + /// `header_pos + 512` contains the raw header bytes. + pub fn raw_header_position(&self) -> u64 { + self.fields.header_pos + } + + /// Returns the starting position, in bytes, of the file of this entry in + /// the archive. + /// + /// If the file of this entry is continuous (e.g. not a sparse file), and + /// if the underlying reader implements `Seek`, then the slice from + /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. + pub fn raw_file_position(&self) -> u64 { + self.fields.file_pos + } + + /// Set the mask of the permission bits when unpacking this entry. + /// + /// The mask will be inverted when applying against a mode, similar to how + /// `umask` works on Unix. In logical notation it looks like: + /// + /// ```text + /// new_mode = old_mode & (~mask) + /// ``` + /// + /// The mask is 0 by default and is currently only implemented on Unix. + pub fn set_mask(&mut self, mask: u32) { + self.fields.mask = mask; + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { + self.fields.unpack_xattrs = unpack_xattrs; + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.fields.preserve_permissions = preserve; + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.fields.preserve_mtime = preserve; + } +} + +impl<'a, R: Read> Read for Entry<'a, R> { + fn read(&mut self, into: &mut [u8]) -> io::Result { + self.fields.read(into) + } +} + +impl<'a> EntryFields<'a> { + pub fn from(entry: Entry) -> EntryFields { + entry.fields + } + + pub fn into_entry(self) -> Entry<'a, R> { + Entry { + fields: self, + _ignored: marker::PhantomData, + } + } + + pub fn read_all(&mut self) -> io::Result> { + // Preallocate some data but don't let ourselves get too crazy now. + let cap = cmp::min(self.size, 128 * 1024); + let mut v = Vec::with_capacity(cap as usize); + self.read_to_end(&mut v).map(|_| v) + } + + fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + fn path_bytes(&self) -> Cow<[u8]> { + match self.long_pathname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Cow::Borrowed(&bytes[..bytes.len() - 1]) + } else { + Cow::Borrowed(bytes) + } + } + None => { + if let Some(ref pax) = self.pax_extensions { + let pax = PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"path") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + return Cow::Borrowed(field); + } + } + self.header.path_bytes() + } + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + fn link_name_bytes(&self) -> Option> { + match self.long_linkname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) + } else { + Some(Cow::Borrowed(bytes)) + } + } + None => { + if let Some(ref pax) = self.pax_extensions { + let pax = PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"linkpath") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + return Some(Cow::Borrowed(field)); + } + } + self.header.link_name_bytes() + } + } + } + + fn pax_extensions(&mut self) -> io::Result> { + if self.pax_extensions.is_none() { + if !self.header.entry_type().is_pax_global_extensions() + && !self.header.entry_type().is_pax_local_extensions() + { + return Ok(None); + } + self.pax_extensions = Some(self.read_all()?); + } + Ok(Some(PaxExtensions::new( + self.pax_extensions.as_ref().unwrap(), + ))) + } + + fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> { + let mut ancestor = dir; + let mut dirs_to_create = Vec::new(); + while ancestor.symlink_metadata().is_err() { + dirs_to_create.push(ancestor); + if let Some(parent) = ancestor.parent() { + ancestor = parent; + } else { + break; + } + } + for ancestor in dirs_to_create.into_iter().rev() { + if let Some(parent) = ancestor.parent() { + self.validate_inside_dst(dst, parent)?; + } + fs::create_dir_all(ancestor)?; + } + Ok(()) + } + + fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result { + // Abort if target (canonical) parent is outside of `dst` + let canon_parent = file_dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, file_dst.display()), + ) + })?; + let canon_target = dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, dst.display()), + ) + })?; + if !canon_parent.starts_with(&canon_target) { + let err = TarError::new( + format!( + "trying to unpack outside of destination path: {}", + canon_target.display() + ), + // TODO: use ErrorKind::InvalidInput here? (minor breaking change) + Error::new(ErrorKind::Other, "Invalid argument"), + ); + return Err(err.into()); + } + Ok(canon_target) + } +} + +impl<'a> Read for EntryFields<'a> { + fn read(&mut self, into: &mut [u8]) -> io::Result { + loop { + match self.data.get_mut(0).map(|io| io.read(into)) { + Some(Ok(0)) => { + self.data.remove(0); + } + Some(r) => return r, + None => return Ok(0), + } + } + } +} + +impl<'a> Read for EntryIo<'a> { + fn read(&mut self, into: &mut [u8]) -> io::Result { + match *self { + EntryIo::Pad(ref mut io) => io.read(into), + EntryIo::Data(ref mut io) => io.read(into), + } + } +} diff --git a/tar-0.4.41/src/entry_type.rs b/tar-0.4.41/src/entry_type.rs new file mode 100644 index 0000000..7f2494a --- /dev/null +++ b/tar-0.4.41/src/entry_type.rs @@ -0,0 +1,199 @@ +// See https://en.wikipedia.org/wiki/Tar_%28computing%29#UStar_format +/// Indicate for the type of file described by a header. +/// +/// Each `Header` has an `entry_type` method returning an instance of this type +/// which can be used to inspect what the header is describing. + +/// A non-exhaustive enum representing the possible entry types +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum EntryType { + /// Regular file + Regular, + /// Hard link + Link, + /// Symbolic link + Symlink, + /// Character device + Char, + /// Block device + Block, + /// Directory + Directory, + /// Named pipe (fifo) + Fifo, + /// Implementation-defined 'high-performance' type, treated as regular file + Continuous, + /// GNU extension - long file name + GNULongName, + /// GNU extension - long link name (link target) + GNULongLink, + /// GNU extension - sparse file + GNUSparse, + /// Global extended header + XGlobalHeader, + /// Extended Header + XHeader, + /// Hints that destructuring should not be exhaustive. + /// + /// This enum may grow additional variants, so this makes sure clients + /// don't count on exhaustive matching. (Otherwise, adding a new variant + /// could break existing code.) + #[doc(hidden)] + __Nonexhaustive(u8), +} + +impl EntryType { + /// Creates a new entry type from a raw byte. + /// + /// Note that the other named constructors of entry type may be more + /// appropriate to create a file type from. + pub fn new(byte: u8) -> EntryType { + match byte { + b'\x00' | b'0' => EntryType::Regular, + b'1' => EntryType::Link, + b'2' => EntryType::Symlink, + b'3' => EntryType::Char, + b'4' => EntryType::Block, + b'5' => EntryType::Directory, + b'6' => EntryType::Fifo, + b'7' => EntryType::Continuous, + b'x' => EntryType::XHeader, + b'g' => EntryType::XGlobalHeader, + b'L' => EntryType::GNULongName, + b'K' => EntryType::GNULongLink, + b'S' => EntryType::GNUSparse, + b => EntryType::__Nonexhaustive(b), + } + } + + /// Returns the raw underlying byte that this entry type represents. + pub fn as_byte(&self) -> u8 { + match *self { + EntryType::Regular => b'0', + EntryType::Link => b'1', + EntryType::Symlink => b'2', + EntryType::Char => b'3', + EntryType::Block => b'4', + EntryType::Directory => b'5', + EntryType::Fifo => b'6', + EntryType::Continuous => b'7', + EntryType::XHeader => b'x', + EntryType::XGlobalHeader => b'g', + EntryType::GNULongName => b'L', + EntryType::GNULongLink => b'K', + EntryType::GNUSparse => b'S', + EntryType::__Nonexhaustive(b) => b, + } + } + + /// Creates a new entry type representing a regular file. + pub fn file() -> EntryType { + EntryType::Regular + } + + /// Creates a new entry type representing a hard link. + pub fn hard_link() -> EntryType { + EntryType::Link + } + + /// Creates a new entry type representing a symlink. + pub fn symlink() -> EntryType { + EntryType::Symlink + } + + /// Creates a new entry type representing a character special device. + pub fn character_special() -> EntryType { + EntryType::Char + } + + /// Creates a new entry type representing a block special device. + pub fn block_special() -> EntryType { + EntryType::Block + } + + /// Creates a new entry type representing a directory. + pub fn dir() -> EntryType { + EntryType::Directory + } + + /// Creates a new entry type representing a FIFO. + pub fn fifo() -> EntryType { + EntryType::Fifo + } + + /// Creates a new entry type representing a contiguous file. + pub fn contiguous() -> EntryType { + EntryType::Continuous + } + + /// Returns whether this type represents a regular file. + pub fn is_file(&self) -> bool { + self == &EntryType::Regular + } + + /// Returns whether this type represents a hard link. + pub fn is_hard_link(&self) -> bool { + self == &EntryType::Link + } + + /// Returns whether this type represents a symlink. + pub fn is_symlink(&self) -> bool { + self == &EntryType::Symlink + } + + /// Returns whether this type represents a character special device. + pub fn is_character_special(&self) -> bool { + self == &EntryType::Char + } + + /// Returns whether this type represents a block special device. + pub fn is_block_special(&self) -> bool { + self == &EntryType::Block + } + + /// Returns whether this type represents a directory. + pub fn is_dir(&self) -> bool { + self == &EntryType::Directory + } + + /// Returns whether this type represents a FIFO. + pub fn is_fifo(&self) -> bool { + self == &EntryType::Fifo + } + + /// Returns whether this type represents a contiguous file. + pub fn is_contiguous(&self) -> bool { + self == &EntryType::Continuous + } + + /// Returns whether this type represents a GNU long name header. + pub fn is_gnu_longname(&self) -> bool { + self == &EntryType::GNULongName + } + + /// Returns whether this type represents a GNU sparse header. + pub fn is_gnu_sparse(&self) -> bool { + self == &EntryType::GNUSparse + } + + /// Returns whether this type represents a GNU long link header. + pub fn is_gnu_longlink(&self) -> bool { + self == &EntryType::GNULongLink + } + + /// Returns whether this type represents PAX global extensions, that + /// should affect all following entries. For more, see [PAX]. + /// + /// [PAX]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html + pub fn is_pax_global_extensions(&self) -> bool { + self == &EntryType::XGlobalHeader + } + + /// Returns whether this type represents PAX local extensions; these + /// only affect the current entry. For more, see [PAX]. + /// + /// [PAX]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html + pub fn is_pax_local_extensions(&self) -> bool { + self == &EntryType::XHeader + } +} diff --git a/tar-0.4.41/src/error.rs b/tar-0.4.41/src/error.rs new file mode 100644 index 0000000..0d3877f --- /dev/null +++ b/tar-0.4.41/src/error.rs @@ -0,0 +1,41 @@ +use std::borrow::Cow; +use std::error; +use std::fmt; +use std::io::{self, Error}; + +#[derive(Debug)] +pub struct TarError { + desc: Cow<'static, str>, + io: io::Error, +} + +impl TarError { + pub fn new(desc: impl Into>, err: Error) -> TarError { + TarError { + desc: desc.into(), + io: err, + } + } +} + +impl error::Error for TarError { + fn description(&self) -> &str { + &self.desc + } + + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + Some(&self.io) + } +} + +impl fmt::Display for TarError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.desc.fmt(f) + } +} + +impl From for Error { + fn from(t: TarError) -> Error { + Error::new(t.io.kind(), t) + } +} diff --git a/tar-0.4.41/src/header.rs b/tar-0.4.41/src/header.rs new file mode 100644 index 0000000..fd6cfab --- /dev/null +++ b/tar-0.4.41/src/header.rs @@ -0,0 +1,1647 @@ +#[cfg(unix)] +use std::os::unix::prelude::*; +#[cfg(windows)] +use std::os::windows::prelude::*; +#[cfg(target_os = "mikros")] +use std::os::mikros::prelude::*; + +use std::borrow::Cow; +use std::fmt; +use std::fs; +use std::io; +use std::iter; +use std::iter::repeat; +use std::mem; +use std::path::{Component, Path, PathBuf}; +use std::str; + +use crate::other; +use crate::EntryType; + +/// A deterministic, arbitrary, non-zero timestamp that use used as `mtime` +/// of headers when [`HeaderMode::Deterministic`] is used. +/// +/// This value, chosen after careful deliberation, corresponds to _Jul 23, 2006_, +/// which is the date of the first commit for what would become Rust. +#[cfg(any(unix, windows))] +const DETERMINISTIC_TIMESTAMP: u64 = 1153704088; + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct Header { + bytes: [u8; 512], +} + +/// Declares the information that should be included when filling a Header +/// from filesystem metadata. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[non_exhaustive] +pub enum HeaderMode { + /// All supported metadata, including mod/access times and ownership will + /// be included. + Complete, + + /// Only metadata that is directly relevant to the identity of a file will + /// be included. In particular, ownership and mod/access times are excluded. + Deterministic, +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct OldHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub linkflag: [u8; 1], + pub linkname: [u8; 100], + pub pad: [u8; 255], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct UstarHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // UStar format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub prefix: [u8; 155], + pub pad: [u8; 12], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // GNU format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub atime: [u8; 12], + pub ctime: [u8; 12], + pub offset: [u8; 12], + pub longnames: [u8; 4], + pub unused: [u8; 1], + pub sparse: [GnuSparseHeader; 4], + pub isextended: [u8; 1], + pub realsize: [u8; 12], + pub pad: [u8; 17], +} + +/// Description of the header of a spare entry. +/// +/// Specifies the offset/number of bytes of a chunk of data in octal. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuSparseHeader { + pub offset: [u8; 12], + pub numbytes: [u8; 12], +} + +/// Representation of the entry found to represent extended GNU sparse files. +/// +/// When a `GnuHeader` has the `isextended` flag set to `1` then the contents of +/// the next entry will be one of these headers. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuExtSparseHeader { + pub sparse: [GnuSparseHeader; 21], + pub isextended: [u8; 1], + pub padding: [u8; 7], +} + +impl Header { + /// Creates a new blank GNU header. + /// + /// The GNU style header is the default for this library and allows various + /// extensions such as long path names, long link names, and setting the + /// atime/ctime metadata attributes of files. + pub fn new_gnu() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, GnuHeader>(&mut header); + gnu.magic = *b"ustar "; + gnu.version = *b" \0"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank UStar header. + /// + /// The UStar style header is an extension of the original archive header + /// which enables some extra metadata along with storing a longer (but not + /// too long) path name. + /// + /// UStar is also the basis used for pax archives. + pub fn new_ustar() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, UstarHeader>(&mut header); + gnu.magic = *b"ustar\0"; + gnu.version = *b"00"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank old header. + /// + /// This header format is the original archive header format which all other + /// versions are compatible with (e.g. they are a superset). This header + /// format limits the path name limit and isn't able to contain extra + /// metadata like atime/ctime. + pub fn new_old() -> Header { + let mut header = Header { bytes: [0; 512] }; + header.set_mtime(0); + header + } + + fn is_ustar(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar\0"[..] && ustar.version[..] == b"00"[..] + } + + fn is_gnu(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar "[..] && ustar.version[..] == b" \0"[..] + } + + /// View this archive header as a raw "old" archive header. + /// + /// This view will always succeed as all archive header formats will fill + /// out at least the fields specified in the old header format. + pub fn as_old(&self) -> &OldHeader { + unsafe { cast(self) } + } + + /// Same as `as_old`, but the mutable version. + pub fn as_old_mut(&mut self) -> &mut OldHeader { + unsafe { cast_mut(self) } + } + + /// View this archive header as a raw UStar archive header. + /// + /// The UStar format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the UStar format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_ustar(&self) -> Option<&UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_ustar_mut`, but the mutable version. + pub fn as_ustar_mut(&mut self) -> Option<&mut UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// View this archive header as a raw GNU archive header. + /// + /// The GNU format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the GNU format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_gnu(&self) -> Option<&GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_gnu`, but the mutable version. + pub fn as_gnu_mut(&mut self) -> Option<&mut GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// Treats the given byte slice as a header. + /// + /// Panics if the length of the passed slice is not equal to 512. + pub fn from_byte_slice(bytes: &[u8]) -> &Header { + assert_eq!(bytes.len(), mem::size_of::
()); + assert_eq!(mem::align_of_val(bytes), mem::align_of::
()); + unsafe { &*(bytes.as_ptr() as *const Header) } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + &self.bytes + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + &mut self.bytes + } + + // /// Blanket sets the metadata in this header from the metadata argument + // /// provided. + // /// + // /// This is useful for initializing a `Header` from the OS's metadata from a + // /// file. By default, this will use `HeaderMode::Complete` to include all + // /// metadata. + // pub fn set_metadata(&mut self, meta: &fs::Metadata) { + // self.fill_from(meta, HeaderMode::Complete); + // } + // + // /// Sets only the metadata relevant to the given HeaderMode in this header + // /// from the metadata argument provided. + // pub fn set_metadata_in_mode(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + // self.fill_from(meta, mode); + // } + + /// Returns the size of entry's data this header represents. + /// + /// This is different from `Header::size` for sparse files, which have + /// some longer `size()` but shorter `entry_size()`. The `entry_size()` + /// listed here should be the number of bytes in the archive this header + /// describes. + /// + /// May return an error if the field is corrupted. + pub fn entry_size(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().size).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting size for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the file size this header represents. + /// + /// May return an error if the field is corrupted. + pub fn size(&self) -> io::Result { + if self.entry_type().is_gnu_sparse() { + self.as_gnu() + .ok_or_else(|| other("sparse header was not a gnu header")) + .and_then(|h| h.real_size()) + } else { + self.entry_size() + } + } + + /// Encodes the `size` argument into the size field of this header. + pub fn set_size(&mut self, size: u64) { + num_field_wrapper_into(&mut self.as_old_mut().size, size); + } + + /// Returns the raw path name stored in this header. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + /// Returns the pathname stored in this header as a byte array. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `path` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path_bytes(&self) -> Cow<[u8]> { + if let Some(ustar) = self.as_ustar() { + ustar.path_bytes() + } else { + let name = truncate(&self.as_old().name); + Cow::Borrowed(name) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// Sets the path name for this header. + /// + /// This function will set the pathname listed in this header, encoding it + /// in the appropriate format. May fail if the path is too long or if the + /// path specified is not Unicode and this is a Windows platform. Will + /// strip out any "." path component, which signifies the current directory. + /// + /// Note: This function does not support names over 100 bytes, or paths + /// over 255 bytes, even for formats that support longer names. Instead, + /// use `Builder` methods to insert a long-name extension at the same time + /// as the file content. + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_path(path); + } + copy_path_into(&mut self.as_old_mut().name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the link name stored in this header, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the link name stored in this header as a byte array, if any. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `link_name` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name_bytes(&self) -> Option> { + let old = self.as_old(); + if old.linkname[0] != 0 { + Some(Cow::Borrowed(truncate(&old.linkname))) + } else { + None + } + } + + /// Sets the link name for this header. + /// + /// This function will set the linkname listed in this header, encoding it + /// in the appropriate format. May fail if the link name is too long or if + /// the path specified is not Unicode and this is a Windows platform. Will + /// strip out any "." path component, which signifies the current directory. + /// + /// To use GNU long link names, prefer instead [`crate::Builder::append_link`]. + pub fn set_link_name>(&mut self, p: P) -> io::Result<()> { + self._set_link_name(p.as_ref()) + } + + fn _set_link_name(&mut self, path: &Path) -> io::Result<()> { + copy_path_into(&mut self.as_old_mut().linkname, path, true).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting link name for {}", err, self.path_lossy()), + ) + }) + } + + /// Sets the link name for this header without any transformation. + /// + /// This function is like [`Self::set_link_name`] but accepts an arbitrary byte array. + /// Hence it will not perform any canonicalization, such as replacing duplicate `//` with `/`. + pub fn set_link_name_literal>(&mut self, p: P) -> io::Result<()> { + self._set_link_name_literal(p.as_ref()) + } + + fn _set_link_name_literal(&mut self, bytes: &[u8]) -> io::Result<()> { + copy_into(&mut self.as_old_mut().linkname, bytes) + } + + /// Returns the mode bits for this file + /// + /// May return an error if the field is corrupted. + pub fn mode(&self) -> io::Result { + octal_from(&self.as_old().mode) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mode for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mode` provided into this header. + pub fn set_mode(&mut self, mode: u32) { + octal_into(&mut self.as_old_mut().mode, mode); + } + + /// Returns the value of the owner's user ID field + /// + /// May return an error if the field is corrupted. + pub fn uid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().uid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting uid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `uid` provided into this header. + pub fn set_uid(&mut self, uid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().uid, uid); + } + + /// Returns the value of the group's user ID field + pub fn gid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().gid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting gid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `gid` provided into this header. + pub fn set_gid(&mut self, gid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().gid, gid); + } + + /// Returns the last modification time in Unix time format + pub fn mtime(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().mtime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mtime for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mtime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_mtime(&mut self, mtime: u64) { + num_field_wrapper_into(&mut self.as_old_mut().mtime, mtime); + } + + /// Return the user name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the user name was + /// present and was valid utf-8, `Ok(None)` indicates that the user name is + /// not present in this archive format, and `Err` indicates that the user + /// name was present but was not valid utf-8. + pub fn username(&self) -> Result, str::Utf8Error> { + match self.username_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the user name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the user name is not present in + /// this header format. + pub fn username_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.username_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.username_bytes()) + } else { + None + } + } + + /// Sets the username inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// user name or the name is too long. + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_username(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_username(name) + } else { + Err(other("not a ustar or gnu archive, cannot set username")) + } + } + + /// Return the group name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the group name was + /// present and was valid utf-8, `Ok(None)` indicates that the group name is + /// not present in this archive format, and `Err` indicates that the group + /// name was present but was not valid utf-8. + pub fn groupname(&self) -> Result, str::Utf8Error> { + match self.groupname_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the group name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the group name is not present in + /// this header format. + pub fn groupname_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.groupname_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.groupname_bytes()) + } else { + None + } + } + + /// Sets the group name inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// group name or the name is too long. + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_groupname(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_groupname(name) + } else { + Err(other("not a ustar or gnu archive, cannot set groupname")) + } + } + + /// Returns the device major number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device major number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_major(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_major().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_major().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `major` into the dev_major field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// major device number. + pub fn set_device_major(&mut self, major: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_major(major); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_major(major); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_major")) + } + } + + /// Returns the device minor number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device minor number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_minor(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_minor().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_minor().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `minor` into the dev_minor field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// minor device number. + pub fn set_device_minor(&mut self, minor: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_minor(minor); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_minor(minor); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_minor")) + } + } + + /// Returns the type of file described by this header. + pub fn entry_type(&self) -> EntryType { + EntryType::new(self.as_old().linkflag[0]) + } + + /// Sets the type of file that will be described by this header. + pub fn set_entry_type(&mut self, ty: EntryType) { + self.as_old_mut().linkflag = [ty.as_byte()]; + } + + /// Returns the checksum field of this header. + /// + /// May return an error if the field is corrupted. + pub fn cksum(&self) -> io::Result { + octal_from(&self.as_old().cksum) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting cksum for {}", err, self.path_lossy()), + ) + }) + } + + /// Sets the checksum field of this header based on the current fields in + /// this header. + pub fn set_cksum(&mut self) { + let cksum = self.calculate_cksum(); + octal_into(&mut self.as_old_mut().cksum, cksum); + } + + fn calculate_cksum(&self) -> u32 { + let old = self.as_old(); + let start = old as *const _ as usize; + let cksum_start = old.cksum.as_ptr() as *const _ as usize; + let offset = cksum_start - start; + let len = old.cksum.len(); + self.bytes[0..offset] + .iter() + .chain(iter::repeat(&b' ').take(len)) + .chain(&self.bytes[offset + len..]) + .fold(0, |a, b| a + (*b as u32)) + } + + // fn fill_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + // self.fill_platform_from(meta, mode); + // // Set size of directories to zero + // self.set_size(if meta.is_dir() || meta.file_type().is_symlink() { + // 0 + // } else { + // meta.len() + // }); + // if let Some(ustar) = self.as_ustar_mut() { + // ustar.set_device_major(0); + // ustar.set_device_minor(0); + // } + // if let Some(gnu) = self.as_gnu_mut() { + // gnu.set_device_major(0); + // gnu.set_device_minor(0); + // } + // } + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + unimplemented!(); + } + + #[cfg(unix)] + fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + match mode { + HeaderMode::Complete => { + self.set_mtime(meta.mtime() as u64); + self.set_uid(meta.uid() as u64); + self.set_gid(meta.gid() as u64); + self.set_mode(meta.mode() as u32); + } + HeaderMode::Deterministic => { + // We could in theory set the mtime to zero here, but not all tools seem to behave + // well when ingesting files with a 0 timestamp. + // For example, rust-lang/cargo#9512 shows that lldb doesn't ingest files with a + // zero timestamp correctly. + self.set_mtime(DETERMINISTIC_TIMESTAMP); + + self.set_uid(0); + self.set_gid(0); + + // Use a default umask value, but propagate the (user) execute bit. + let fs_mode = if meta.is_dir() || (0o100 & meta.mode() == 0o100) { + 0o755 + } else { + 0o644 + }; + self.set_mode(fs_mode); + } + } + + // Note that if we are a GNU header we *could* set atime/ctime, except + // the `tar` utility doesn't do that by default and it causes problems + // with 7-zip [1]. + // + // It's always possible to fill them out manually, so we just don't fill + // it out automatically here. + // + // [1]: https://github.com/alexcrichton/tar-rs/issues/70 + + // TODO: need to bind more file types + self.set_entry_type(entry_type(meta.mode())); + + fn entry_type(mode: u32) -> EntryType { + match mode as libc::mode_t & libc::S_IFMT { + libc::S_IFREG => EntryType::file(), + libc::S_IFLNK => EntryType::symlink(), + libc::S_IFCHR => EntryType::character_special(), + libc::S_IFBLK => EntryType::block_special(), + libc::S_IFDIR => EntryType::dir(), + libc::S_IFIFO => EntryType::fifo(), + _ => EntryType::new(b' '), + } + } + } + + #[cfg(windows)] + fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + // There's no concept of a file mode on Windows, so do a best approximation here. + match mode { + HeaderMode::Complete => { + self.set_uid(0); + self.set_gid(0); + // The dates listed in tarballs are always seconds relative to + // January 1, 1970. On Windows, however, the timestamps are returned as + // dates relative to January 1, 1601 (in 100ns intervals), so we need to + // add in some offset for those dates. + let mtime = (meta.last_write_time() / (1_000_000_000 / 100)) - 11644473600; + self.set_mtime(mtime); + let fs_mode = { + const FILE_ATTRIBUTE_READONLY: u32 = 0x00000001; + let readonly = meta.file_attributes() & FILE_ATTRIBUTE_READONLY; + match (meta.is_dir(), readonly != 0) { + (true, false) => 0o755, + (true, true) => 0o555, + (false, false) => 0o644, + (false, true) => 0o444, + } + }; + self.set_mode(fs_mode); + } + HeaderMode::Deterministic => { + self.set_uid(0); + self.set_gid(0); + self.set_mtime(DETERMINISTIC_TIMESTAMP); // see above in unix + let fs_mode = if meta.is_dir() { 0o755 } else { 0o644 }; + self.set_mode(fs_mode); + } + } + + let ft = meta.file_type(); + self.set_entry_type(if ft.is_dir() { + EntryType::dir() + } else if ft.is_file() { + EntryType::file() + } else if ft.is_symlink() { + EntryType::symlink() + } else { + EntryType::new(b' ') + }); + } + + fn debug_fields(&self, b: &mut fmt::DebugStruct) { + if let Ok(entry_size) = self.entry_size() { + b.field("entry_size", &entry_size); + } + if let Ok(size) = self.size() { + b.field("size", &size); + } + if let Ok(path) = self.path() { + b.field("path", &path); + } + if let Ok(link_name) = self.link_name() { + b.field("link_name", &link_name); + } + if let Ok(mode) = self.mode() { + b.field("mode", &DebugAsOctal(mode)); + } + if let Ok(uid) = self.uid() { + b.field("uid", &uid); + } + if let Ok(gid) = self.gid() { + b.field("gid", &gid); + } + if let Ok(mtime) = self.mtime() { + b.field("mtime", &mtime); + } + if let Ok(username) = self.username() { + b.field("username", &username); + } + if let Ok(groupname) = self.groupname() { + b.field("groupname", &groupname); + } + if let Ok(device_major) = self.device_major() { + b.field("device_major", &device_major); + } + if let Ok(device_minor) = self.device_minor() { + b.field("device_minor", &device_minor); + } + if let Ok(cksum) = self.cksum() { + b.field("cksum", &cksum); + b.field("cksum_valid", &(cksum == self.calculate_cksum())); + } + } +} + +struct DebugAsOctal(T); + +impl fmt::Debug for DebugAsOctal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Octal::fmt(&self.0, f) + } +} + +unsafe fn cast(a: &T) -> &U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &*(a as *const T as *const U) +} + +unsafe fn cast_mut(a: &mut T) -> &mut U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &mut *(a as *mut T as *mut U) +} + +impl Clone for Header { + fn clone(&self) -> Header { + Header { bytes: self.bytes } + } +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(me) = self.as_ustar() { + me.fmt(f) + } else if let Some(me) = self.as_gnu() { + me.fmt(f) + } else { + self.as_old().fmt(f) + } + } +} + +impl OldHeader { + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for OldHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("OldHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl UstarHeader { + /// See `Header::path_bytes` + pub fn path_bytes(&self) -> Cow<[u8]> { + if self.prefix[0] == 0 && !self.name.contains(&b'\\') { + Cow::Borrowed(truncate(&self.name)) + } else { + let mut bytes = Vec::new(); + let prefix = truncate(&self.prefix); + if !prefix.is_empty() { + bytes.extend_from_slice(prefix); + bytes.push(b'/'); + } + bytes.extend_from_slice(truncate(&self.name)); + Cow::Owned(bytes) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// See `Header::set_path` + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + // This can probably be optimized quite a bit more, but for now just do + // something that's relatively easy and readable. + // + // First up, if the path fits within `self.name` then we just shove it + // in there. If not then we try to split it between some existing path + // components where it can fit in name/prefix. To do that we peel off + // enough until the path fits in `prefix`, then we try to put both + // halves into their destination. + let bytes = path2bytes(path)?; + let (maxnamelen, maxprefixlen) = (self.name.len(), self.prefix.len()); + if bytes.len() <= maxnamelen { + copy_path_into(&mut self.name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } else { + let mut prefix = path; + let mut prefixlen; + loop { + match prefix.parent() { + Some(parent) => prefix = parent, + None => { + return Err(other(&format!( + "path cannot be split to be inserted into archive: {}", + path.display() + ))); + } + } + prefixlen = path2bytes(prefix)?.len(); + if prefixlen <= maxprefixlen { + break; + } + } + copy_path_into(&mut self.prefix, prefix, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + let path = bytes2path(Cow::Borrowed(&bytes[prefixlen + 1..]))?; + copy_path_into(&mut self.name, &path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } + Ok(()) + } + + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting username for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting groupname for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for UstarHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("UstarHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl GnuHeader { + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// Gets the fullname (group:user) in a "lossy" way, used for error reporting ONLY. + fn fullname_lossy(&self) -> String { + format!( + "{}:{}", + String::from_utf8_lossy(self.groupname_bytes()), + String::from_utf8_lossy(self.username_bytes()), + ) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting username for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting groupname for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Returns the last modification time in Unix time format + pub fn atime(&self) -> io::Result { + num_field_wrapper_from(&self.atime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting atime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `atime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_atime(&mut self, atime: u64) { + num_field_wrapper_into(&mut self.atime, atime); + } + + /// Returns the last modification time in Unix time format + pub fn ctime(&self) -> io::Result { + num_field_wrapper_from(&self.ctime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting ctime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `ctime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_ctime(&mut self, ctime: u64) { + num_field_wrapper_into(&mut self.ctime, ctime); + } + + /// Returns the "real size" of the file this header represents. + /// + /// This is applicable for sparse files where the returned size here is the + /// size of the entire file after the sparse regions have been filled in. + pub fn real_size(&self) -> io::Result { + octal_from(&self.realsize).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting real_size for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// Indicates whether this header will be followed by additional + /// sparse-header records. + /// + /// Note that this is handled internally by this library, and is likely only + /// interesting if a `raw` iterator is being used. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for GnuHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuHeader"); + self.as_header().debug_fields(&mut f); + if let Ok(atime) = self.atime() { + f.field("atime", &atime); + } + if let Ok(ctime) = self.ctime() { + f.field("ctime", &ctime); + } + f.field("is_extended", &self.is_extended()) + .field("sparse", &DebugSparseHeaders(&self.sparse)) + .finish() + } +} + +struct DebugSparseHeaders<'a>(&'a [GnuSparseHeader]); + +impl<'a> fmt::Debug for DebugSparseHeaders<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_list(); + for header in self.0 { + if !header.is_empty() { + f.entry(header); + } + } + f.finish() + } +} + +impl GnuSparseHeader { + /// Returns true if block is empty + pub fn is_empty(&self) -> bool { + self.offset[0] == 0 || self.numbytes[0] == 0 + } + + /// Offset of the block from the start of the file + /// + /// Returns `Err` for a malformed `offset` field. + pub fn offset(&self) -> io::Result { + octal_from(&self.offset).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting offset from sparse header", err), + ) + }) + } + + /// Length of the block + /// + /// Returns `Err` for a malformed `numbytes` field. + pub fn length(&self) -> io::Result { + octal_from(&self.numbytes).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting length from sparse header", err), + ) + }) + } +} + +impl fmt::Debug for GnuSparseHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuSparseHeader"); + if let Ok(offset) = self.offset() { + f.field("offset", &offset); + } + if let Ok(length) = self.length() { + f.field("length", &length); + } + f.finish() + } +} + +impl GnuExtSparseHeader { + /// Crates a new zero'd out sparse header entry. + pub fn new() -> GnuExtSparseHeader { + unsafe { mem::zeroed() } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { mem::transmute(self) } + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { mem::transmute(self) } + } + + /// Returns a slice of the underlying sparse headers. + /// + /// Some headers may represent empty chunks of both the offset and numbytes + /// fields are 0. + pub fn sparse(&self) -> &[GnuSparseHeader; 21] { + &self.sparse + } + + /// Indicates if another sparse header should be following this one. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } +} + +impl Default for GnuExtSparseHeader { + fn default() -> Self { + Self::new() + } +} + +fn octal_from(slice: &[u8]) -> io::Result { + let trun = truncate(slice); + let num = match str::from_utf8(trun) { + Ok(n) => n, + Err(_) => { + return Err(other(&format!( + "numeric field did not have utf-8 text: {}", + String::from_utf8_lossy(trun) + ))); + } + }; + match u64::from_str_radix(num.trim(), 8) { + Ok(n) => Ok(n), + Err(_) => Err(other(&format!("numeric field was not a number: {}", num))), + } +} + +fn octal_into(dst: &mut [u8], val: T) { + let o = format!("{:o}", val); + let value = o.bytes().rev().chain(repeat(b'0')); + for (slot, value) in dst.iter_mut().rev().skip(1).zip(value) { + *slot = value; + } +} + +// Wrapper to figure out if we should fill the header field using tar's numeric +// extension (binary) or not (octal). +fn num_field_wrapper_into(dst: &mut [u8], src: u64) { + if src >= 8589934592 || (src >= 2097152 && dst.len() == 8) { + numeric_extended_into(dst, src); + } else { + octal_into(dst, src); + } +} + +// Wrapper to figure out if we should read the header field in binary (numeric +// extension) or octal (standard encoding). +fn num_field_wrapper_from(src: &[u8]) -> io::Result { + if src[0] & 0x80 != 0 { + Ok(numeric_extended_from(src)) + } else { + octal_from(src) + } +} + +// When writing numeric fields with is the extended form, the high bit of the +// first byte is set to 1 and the remainder of the field is treated as binary +// instead of octal ascii. +// This handles writing u64 to 8 (uid, gid) or 12 (size, *time) bytes array. +fn numeric_extended_into(dst: &mut [u8], src: u64) { + let len: usize = dst.len(); + for (slot, val) in dst.iter_mut().zip( + repeat(0) + .take(len - 8) // to zero init extra bytes + .chain((0..8).rev().map(|x| ((src >> (8 * x)) & 0xff) as u8)), + ) { + *slot = val; + } + dst[0] |= 0x80; +} + +fn numeric_extended_from(src: &[u8]) -> u64 { + let mut dst: u64 = 0; + let mut b_to_skip = 1; + if src.len() == 8 { + // read first byte without extension flag bit + dst = (src[0] ^ 0x80) as u64; + } else { + // only read last 8 bytes + b_to_skip = src.len() - 8; + } + for byte in src.iter().skip(b_to_skip) { + dst <<= 8; + dst |= *byte as u64; + } + dst +} + +fn truncate(slice: &[u8]) -> &[u8] { + match slice.iter().position(|i| *i == 0) { + Some(i) => &slice[..i], + None => slice, + } +} + +/// Copies `bytes` into the `slot` provided, returning an error if the `bytes` +/// array is too long or if it contains any nul bytes. +fn copy_into(slot: &mut [u8], bytes: &[u8]) -> io::Result<()> { + if bytes.len() > slot.len() { + Err(other("provided value is too long")) + } else if bytes.iter().any(|b| *b == 0) { + Err(other("provided value contains a nul byte")) + } else { + for (slot, val) in slot.iter_mut().zip(bytes.iter().chain(Some(&0))) { + *slot = *val; + } + Ok(()) + } +} + +/// Copies `path` into the `slot` provided +/// +/// Returns an error if: +/// +/// * the path is too long to fit +/// * a nul byte was found +/// * an invalid path component is encountered (e.g. a root path or parent dir) +/// * the path itself is empty +fn copy_path_into(mut slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> { + let mut emitted = false; + let mut needs_slash = false; + for component in path.components() { + let bytes = path2bytes(Path::new(component.as_os_str()))?; + match (component, is_link_name) { + (Component::Prefix(..), false) | (Component::RootDir, false) => { + return Err(other("paths in archives must be relative")); + } + (Component::ParentDir, false) => { + return Err(other("paths in archives must not have `..`")); + } + // Allow "./" as the path + (Component::CurDir, false) if path.components().count() == 1 => {} + (Component::CurDir, false) => continue, + (Component::Normal(_), _) | (_, true) => {} + }; + if needs_slash { + copy(&mut slot, b"/")?; + } + if bytes.contains(&b'/') { + if let Component::Normal(..) = component { + return Err(other("path component in archive cannot contain `/`")); + } + } + copy(&mut slot, &*bytes)?; + if &*bytes != b"/" { + needs_slash = true; + } + emitted = true; + } + if !emitted { + return Err(other("paths in archives must have at least one component")); + } + if ends_with_slash(path) { + copy(&mut slot, &[b'/'])?; + } + return Ok(()); + + fn copy(slot: &mut &mut [u8], bytes: &[u8]) -> io::Result<()> { + copy_into(*slot, bytes)?; + let tmp = mem::replace(slot, &mut []); + *slot = &mut tmp[bytes.len()..]; + Ok(()) + } +} + +#[cfg(target_arch = "wasm32")] +fn ends_with_slash(p: &Path) -> bool { + p.to_string_lossy().ends_with('/') +} + +#[cfg(windows)] +fn ends_with_slash(p: &Path) -> bool { + let last = p.as_os_str().encode_wide().last(); + last == Some(b'/' as u16) || last == Some(b'\\' as u16) +} + +#[cfg(any(unix, target_os = "mikros"))] +fn ends_with_slash(p: &Path) -> bool { + p.as_os_str().as_bytes().ends_with(&[b'/']) +} + +#[cfg(any(windows, target_arch = "wasm32"))] +pub fn path2bytes(p: &Path) -> io::Result> { + p.as_os_str() + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| other(&format!("path {} was not valid Unicode", p.display()))) + .map(|bytes| { + if bytes.contains(&b'\\') { + // Normalize to Unix-style path separators + let mut bytes = bytes.to_owned(); + for b in &mut bytes { + if *b == b'\\' { + *b = b'/'; + } + } + Cow::Owned(bytes) + } else { + Cow::Borrowed(bytes) + } + }) +} + +#[cfg(any(unix, target_os = "mikros"))] +/// On unix this will never fail +pub fn path2bytes(p: &Path) -> io::Result> { + Ok(p.as_os_str().as_bytes()).map(Cow::Borrowed) +} + +#[cfg(windows)] +/// On windows we cannot accept non-Unicode bytes because it +/// is impossible to convert it to UTF-16. +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + return match bytes { + Cow::Borrowed(bytes) => { + let s = str::from_utf8(bytes).map_err(|_| not_unicode(bytes))?; + Ok(Cow::Borrowed(Path::new(s))) + } + Cow::Owned(bytes) => { + let s = String::from_utf8(bytes).map_err(|uerr| not_unicode(&uerr.into_bytes()))?; + Ok(Cow::Owned(PathBuf::from(s))) + } + }; + + fn not_unicode(v: &[u8]) -> io::Error { + other(&format!( + "only Unicode paths are supported on Windows: {}", + String::from_utf8_lossy(v) + )) + } +} + +#[cfg(any(unix, target_os = "mikros"))] +/// On unix this operation can never fail. +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + use std::ffi::{OsStr, OsString}; + + Ok(match bytes { + Cow::Borrowed(bytes) => Cow::Borrowed(Path::new(OsStr::from_bytes(bytes))), + Cow::Owned(bytes) => Cow::Owned(PathBuf::from(OsString::from_vec(bytes))), + }) +} + +#[cfg(target_arch = "wasm32")] +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + Ok(match bytes { + Cow::Borrowed(bytes) => { + Cow::Borrowed({ Path::new(str::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + Cow::Owned(bytes) => { + Cow::Owned({ PathBuf::from(String::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + }) +} + +#[cfg(target_arch = "wasm32")] +fn invalid_utf8(_: T) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, "Invalid utf-8") +} diff --git a/tar-0.4.41/src/lib.rs b/tar-0.4.41/src/lib.rs new file mode 100644 index 0000000..52251cd --- /dev/null +++ b/tar-0.4.41/src/lib.rs @@ -0,0 +1,44 @@ +//! A library for reading and writing TAR archives +//! +//! This library provides utilities necessary to manage [TAR archives][1] +//! abstracted over a reader or writer. Great strides are taken to ensure that +//! an archive is never required to be fully resident in memory, and all objects +//! provide largely a streaming interface to read bytes from. +//! +//! [1]: http://en.wikipedia.org/wiki/Tar_%28computing%29 + +// More docs about the detailed tar format can also be found here: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current + +// NB: some of the coding patterns and idioms here may seem a little strange. +// This is currently attempting to expose a super generic interface while +// also not forcing clients to codegen the entire crate each time they use +// it. To that end lots of work is done to ensure that concrete +// implementations are all found in this crate and the generic functions are +// all just super thin wrappers (e.g. easy to codegen). + +#![doc(html_root_url = "https://docs.rs/tar/0.4")] +#![deny(missing_docs)] +#![cfg_attr(test, deny(warnings))] + +use std::io::{Error, ErrorKind}; + +pub use crate::archive::{Archive, Entries}; +pub use crate::builder::Builder; +pub use crate::entry::{Entry, Unpacked}; +pub use crate::entry_type::EntryType; +pub use crate::header::GnuExtSparseHeader; +pub use crate::header::{GnuHeader, GnuSparseHeader, Header, HeaderMode, OldHeader, UstarHeader}; +pub use crate::pax::{PaxExtension, PaxExtensions}; + +mod archive; +mod builder; +mod entry; +mod entry_type; +mod error; +mod header; +mod pax; + +fn other(msg: &str) -> Error { + Error::new(ErrorKind::Other, msg) +} diff --git a/tar-0.4.41/src/pax.rs b/tar-0.4.41/src/pax.rs new file mode 100644 index 0000000..6e83edc --- /dev/null +++ b/tar-0.4.41/src/pax.rs @@ -0,0 +1,147 @@ +#![allow(dead_code)] +use std::io; +use std::slice; +use std::str; + +use crate::other; + +// Keywords for PAX extended header records. +pub const PAX_NONE: &str = ""; // Indicates that no PAX key is suitable +pub const PAX_PATH: &str = "path"; +pub const PAX_LINKPATH: &str = "linkpath"; +pub const PAX_SIZE: &str = "size"; +pub const PAX_UID: &str = "uid"; +pub const PAX_GID: &str = "gid"; +pub const PAX_UNAME: &str = "uname"; +pub const PAX_GNAME: &str = "gname"; +pub const PAX_MTIME: &str = "mtime"; +pub const PAX_ATIME: &str = "atime"; +pub const PAX_CTIME: &str = "ctime"; // Removed from later revision of PAX spec, but was valid +pub const PAX_CHARSET: &str = "charset"; // Currently unused +pub const PAX_COMMENT: &str = "comment"; // Currently unused + +pub const PAX_SCHILYXATTR: &str = "SCHILY.xattr."; + +// Keywords for GNU sparse files in a PAX extended header. +pub const PAX_GNUSPARSE: &str = "GNU.sparse."; +pub const PAX_GNUSPARSENUMBLOCKS: &str = "GNU.sparse.numblocks"; +pub const PAX_GNUSPARSEOFFSET: &str = "GNU.sparse.offset"; +pub const PAX_GNUSPARSENUMBYTES: &str = "GNU.sparse.numbytes"; +pub const PAX_GNUSPARSEMAP: &str = "GNU.sparse.map"; +pub const PAX_GNUSPARSENAME: &str = "GNU.sparse.name"; +pub const PAX_GNUSPARSEMAJOR: &str = "GNU.sparse.major"; +pub const PAX_GNUSPARSEMINOR: &str = "GNU.sparse.minor"; +pub const PAX_GNUSPARSESIZE: &str = "GNU.sparse.size"; +pub const PAX_GNUSPARSEREALSIZE: &str = "GNU.sparse.realsize"; + +/// An iterator over the pax extensions in an archive entry. +/// +/// This iterator yields structures which can themselves be parsed into +/// key/value pairs. +pub struct PaxExtensions<'entry> { + data: slice::Split<'entry, u8, fn(&u8) -> bool>, +} + +impl<'entry> PaxExtensions<'entry> { + /// Create new pax extensions iterator from the given entry data. + pub fn new(a: &'entry [u8]) -> Self { + fn is_newline(a: &u8) -> bool { + *a == b'\n' + } + PaxExtensions { + data: a.split(is_newline), + } + } +} + +/// A key/value pair corresponding to a pax extension. +pub struct PaxExtension<'entry> { + key: &'entry [u8], + value: &'entry [u8], +} + +pub fn pax_extensions_value(a: &[u8], key: &str) -> Option { + for extension in PaxExtensions::new(a) { + let current_extension = match extension { + Ok(ext) => ext, + Err(_) => return None, + }; + if current_extension.key() != Ok(key) { + continue; + } + + let value = match current_extension.value() { + Ok(value) => value, + Err(_) => return None, + }; + let result = match value.parse::() { + Ok(result) => result, + Err(_) => return None, + }; + return Some(result); + } + None +} + +impl<'entry> Iterator for PaxExtensions<'entry> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + let line = match self.data.next() { + Some(line) if line.is_empty() => return None, + Some(line) => line, + None => return None, + }; + + Some( + line.iter() + .position(|b| *b == b' ') + .and_then(|i| { + str::from_utf8(&line[..i]) + .ok() + .and_then(|len| len.parse::().ok().map(|j| (i + 1, j))) + }) + .and_then(|(kvstart, reported_len)| { + if line.len() + 1 == reported_len { + line[kvstart..] + .iter() + .position(|b| *b == b'=') + .map(|equals| (kvstart, equals)) + } else { + None + } + }) + .map(|(kvstart, equals)| PaxExtension { + key: &line[kvstart..kvstart + equals], + value: &line[kvstart + equals + 1..], + }) + .ok_or_else(|| other("malformed pax extension")), + ) + } +} + +impl<'entry> PaxExtension<'entry> { + /// Returns the key for this key/value pair parsed as a string. + /// + /// May fail if the key isn't actually utf-8. + pub fn key(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.key) + } + + /// Returns the underlying raw bytes for the key of this key/value pair. + pub fn key_bytes(&self) -> &'entry [u8] { + self.key + } + + /// Returns the value for this key/value pair parsed as a string. + /// + /// May fail if the value isn't actually utf-8. + pub fn value(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.value) + } + + /// Returns the underlying raw bytes for this value of this key/value pair. + pub fn value_bytes(&self) -> &'entry [u8] { + self.value + } +} diff --git a/tar-0.4.41/tests/all.rs b/tar-0.4.41/tests/all.rs new file mode 100644 index 0000000..1e7b264 --- /dev/null +++ b/tar-0.4.41/tests/all.rs @@ -0,0 +1,1514 @@ +extern crate filetime; +extern crate tar; +extern crate tempfile; +#[cfg(all(unix, feature = "xattr"))] +extern crate xattr; + +use std::fs::{self, File}; +use std::io::prelude::*; +use std::io::{self, Cursor}; +use std::iter::repeat; +use std::path::{Path, PathBuf}; + +use filetime::FileTime; +use tar::{Archive, Builder, Entries, EntryType, Header, HeaderMode}; +use tempfile::{Builder as TempBuilder, TempDir}; + +macro_rules! t { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => panic!("{} returned {}", stringify!($e), e), + } + }; +} + +macro_rules! tar { + ($e:expr) => { + &include_bytes!(concat!("archives/", $e))[..] + }; +} + +mod header; + +/// test that we can concatenate the simple.tar archive and extract the same entries twice when we +/// use the ignore_zeros option. +#[test] +fn simple_concat() { + let bytes = tar!("simple.tar"); + let mut archive_bytes = Vec::new(); + archive_bytes.extend(bytes); + + let original_names: Vec = decode_names(&mut Archive::new(Cursor::new(&archive_bytes))); + let expected: Vec<&str> = original_names.iter().map(|n| n.as_str()).collect(); + + // concat two archives (with null in-between); + archive_bytes.extend(bytes); + + // test now that when we read the archive, it stops processing at the first zero header. + let actual = decode_names(&mut Archive::new(Cursor::new(&archive_bytes))); + assert_eq!(expected, actual); + + // extend expected by itself. + let expected: Vec<&str> = { + let mut o = Vec::new(); + o.extend(&expected); + o.extend(&expected); + o + }; + + let mut ar = Archive::new(Cursor::new(&archive_bytes)); + ar.set_ignore_zeros(true); + + let actual = decode_names(&mut ar); + assert_eq!(expected, actual); + + fn decode_names(ar: &mut Archive) -> Vec + where + R: Read, + { + let mut names = Vec::new(); + + for entry in t!(ar.entries()) { + let e = t!(entry); + names.push(t!(::std::str::from_utf8(&e.path_bytes())).to_string()); + } + + names + } +} + +#[test] +fn header_impls() { + let mut ar = Archive::new(Cursor::new(tar!("simple.tar"))); + let hn = Header::new_old(); + let hnb = hn.as_bytes(); + for file in t!(ar.entries()) { + let file = t!(file); + let h1 = file.header(); + let h1b = h1.as_bytes(); + let h2 = h1.clone(); + let h2b = h2.as_bytes(); + assert!(h1b[..] == h2b[..] && h2b[..] != hnb[..]) + } +} + +#[test] +fn header_impls_missing_last_header() { + let mut ar = Archive::new(Cursor::new(tar!("simple_missing_last_header.tar"))); + let hn = Header::new_old(); + let hnb = hn.as_bytes(); + for file in t!(ar.entries()) { + let file = t!(file); + let h1 = file.header(); + let h1b = h1.as_bytes(); + let h2 = h1.clone(); + let h2b = h2.as_bytes(); + assert!(h1b[..] == h2b[..] && h2b[..] != hnb[..]) + } +} + +#[test] +fn reading_files() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + let mut a = t!(entries.next().unwrap()); + assert_eq!(&*a.header().path_bytes(), b"a"); + let mut s = String::new(); + t!(a.read_to_string(&mut s)); + assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\na\n"); + + let mut b = t!(entries.next().unwrap()); + assert_eq!(&*b.header().path_bytes(), b"b"); + s.truncate(0); + t!(b.read_to_string(&mut s)); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + + assert!(entries.next().is_none()); +} + +#[test] +fn writing_files() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path)).write_all(b"test")); + + t!(ar.append_file("test2", &mut t!(File::open(&path)))); + + let data = t!(ar.into_inner()); + let mut ar = Archive::new(Cursor::new(data)); + let mut entries = t!(ar.entries()); + let mut f = t!(entries.next().unwrap()); + + assert_eq!(&*f.header().path_bytes(), b"test2"); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s)); + assert_eq!(s, "test"); + + assert!(entries.next().is_none()); +} + +#[test] +fn large_filename() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path)).write_all(b"test")); + + let filename = repeat("abcd/").take(50).collect::(); + let mut header = Header::new_ustar(); + header.set_path(&filename).unwrap(); + header.set_metadata(&t!(fs::metadata(&path))); + header.set_cksum(); + t!(ar.append(&header, &b"test"[..])); + let too_long = repeat("abcd").take(200).collect::(); + t!(ar.append_file(&too_long, &mut t!(File::open(&path)))); + t!(ar.append_data(&mut header, &too_long, &b"test"[..])); + + let rd = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + // The short entry added with `append` + let mut f = entries.next().unwrap().unwrap(); + assert_eq!(&*f.header().path_bytes(), filename.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s)); + assert_eq!(s, "test"); + + // The long entry added with `append_file` + let mut f = entries.next().unwrap().unwrap(); + assert_eq!(&*f.path_bytes(), too_long.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s)); + assert_eq!(s, "test"); + + // The long entry added with `append_data` + let mut f = entries.next().unwrap().unwrap(); + assert!(f.header().path_bytes().len() < too_long.len()); + assert_eq!(&*f.path_bytes(), too_long.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s)); + assert_eq!(s, "test"); + + assert!(entries.next().is_none()); +} + +fn reading_entries_common(mut entries: Entries) { + let mut a = t!(entries.next().unwrap()); + assert_eq!(&*a.header().path_bytes(), b"a"); + let mut s = String::new(); + t!(a.read_to_string(&mut s)); + assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\na\n"); + s.truncate(0); + t!(a.read_to_string(&mut s)); + assert_eq!(s, ""); + + let mut b = t!(entries.next().unwrap()); + assert_eq!(&*b.header().path_bytes(), b"b"); + s.truncate(0); + t!(b.read_to_string(&mut s)); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + assert!(entries.next().is_none()); +} + +#[test] +fn reading_entries() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + reading_entries_common(t!(ar.entries())); +} + +#[test] +fn reading_entries_with_seek() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + reading_entries_common(t!(ar.entries_with_seek())); +} + +struct LoggingReader { + inner: R, + read_bytes: u64, +} + +impl LoggingReader { + fn new(reader: R) -> LoggingReader { + LoggingReader { + inner: reader, + read_bytes: 0, + } + } +} + +impl Read for LoggingReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.inner.read(buf).map(|i| { + self.read_bytes += i as u64; + i + }) + } +} + +impl Seek for LoggingReader { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.inner.seek(pos) + } +} + +#[test] +fn skipping_entries_with_seek() { + let mut reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_reader = Archive::new(&mut reader); + let files: Vec<_> = t!(ar_reader.entries()) + .map(|entry| entry.unwrap().path().unwrap().to_path_buf()) + .collect(); + + let mut seekable_reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_seekable_reader = Archive::new(&mut seekable_reader); + let files_seekable: Vec<_> = t!(ar_seekable_reader.entries_with_seek()) + .map(|entry| entry.unwrap().path().unwrap().to_path_buf()) + .collect(); + + assert!(files == files_seekable); + assert!(seekable_reader.read_bytes < reader.read_bytes); +} + +fn check_dirtree(td: &TempDir) { + let dir_a = td.path().join("a"); + let dir_b = td.path().join("a/b"); + let file_c = td.path().join("a/c"); + assert!(fs::metadata(&dir_a).map(|m| m.is_dir()).unwrap_or(false)); + assert!(fs::metadata(&dir_b).map(|m| m.is_dir()).unwrap_or(false)); + assert!(fs::metadata(&file_c).map(|m| m.is_file()).unwrap_or(false)); +} + +#[test] +fn extracting_directories() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let rdr = Cursor::new(tar!("directory.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path())); + check_dirtree(&td); +} + +#[test] +fn extracting_duplicate_file_fail() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let path_present = td.path().join("a"); + t!(File::create(path_present)); + + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + ar.set_overwrite(false); + if let Err(err) = ar.unpack(td.path()) { + if err.kind() == std::io::ErrorKind::AlreadyExists { + // as expected with overwrite false + return; + } + panic!("unexpected error: {:?}", err); + } + panic!( + "unpack() should have returned an error of kind {:?}, returned Ok", + std::io::ErrorKind::AlreadyExists + ) +} + +#[test] +fn extracting_duplicate_file_succeed() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let path_present = td.path().join("a"); + t!(File::create(path_present)); + + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + ar.set_overwrite(true); + t!(ar.unpack(td.path())); +} + +#[test] +#[cfg(unix)] +fn extracting_duplicate_link_fail() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let path_present = td.path().join("lnk"); + t!(std::os::unix::fs::symlink("file", path_present)); + + let rdr = Cursor::new(tar!("link.tar")); + let mut ar = Archive::new(rdr); + ar.set_overwrite(false); + if let Err(err) = ar.unpack(td.path()) { + if err.kind() == std::io::ErrorKind::AlreadyExists { + // as expected with overwrite false + return; + } + panic!("unexpected error: {:?}", err); + } + panic!( + "unpack() should have returned an error of kind {:?}, returned Ok", + std::io::ErrorKind::AlreadyExists + ) +} + +#[test] +#[cfg(unix)] +fn extracting_duplicate_link_succeed() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let path_present = td.path().join("lnk"); + t!(std::os::unix::fs::symlink("file", path_present)); + + let rdr = Cursor::new(tar!("link.tar")); + let mut ar = Archive::new(rdr); + ar.set_overwrite(true); + t!(ar.unpack(td.path())); +} + +#[test] +#[cfg(all(unix, feature = "xattr"))] +fn xattrs() { + // If /tmp is a tmpfs, xattr will fail + // The xattr crate's unit tests also use /var/tmp for this reason + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir_in("/var/tmp")); + let rdr = Cursor::new(tar!("xattrs.tar")); + let mut ar = Archive::new(rdr); + ar.set_unpack_xattrs(true); + t!(ar.unpack(td.path())); + + let val = xattr::get(td.path().join("a/b"), "user.pax.flags").unwrap(); + assert_eq!(val.unwrap(), "epm".as_bytes()); +} + +#[test] +#[cfg(all(unix, feature = "xattr"))] +fn no_xattrs() { + // If /tmp is a tmpfs, xattr will fail + // The xattr crate's unit tests also use /var/tmp for this reason + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir_in("/var/tmp")); + let rdr = Cursor::new(tar!("xattrs.tar")); + let mut ar = Archive::new(rdr); + ar.set_unpack_xattrs(false); + t!(ar.unpack(td.path())); + + assert_eq!( + xattr::get(td.path().join("a/b"), "user.pax.flags").unwrap(), + None + ); +} + +#[test] +fn writing_and_extracting_directories() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut ar = Builder::new(Vec::new()); + let tmppath = td.path().join("tmpfile"); + t!(t!(File::create(&tmppath)).write_all(b"c")); + t!(ar.append_dir("a", ".")); + t!(ar.append_dir("a/b", ".")); + t!(ar.append_file("a/c", &mut t!(File::open(&tmppath)))); + t!(ar.finish()); + + let rdr = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path())); + check_dirtree(&td); +} + +#[test] +fn writing_directories_recursively() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let base_dir = td.path().join("base"); + t!(fs::create_dir(&base_dir)); + t!(t!(File::create(base_dir.join("file1"))).write_all(b"file1")); + let sub_dir = base_dir.join("sub"); + t!(fs::create_dir(&sub_dir)); + t!(t!(File::create(sub_dir.join("file2"))).write_all(b"file2")); + + let mut ar = Builder::new(Vec::new()); + t!(ar.append_dir_all("foobar", base_dir)); + let data = t!(ar.into_inner()); + + let mut ar = Archive::new(Cursor::new(data)); + t!(ar.unpack(td.path())); + let base_dir = td.path().join("foobar"); + assert!(fs::metadata(&base_dir).map(|m| m.is_dir()).unwrap_or(false)); + let file1_path = base_dir.join("file1"); + assert!(fs::metadata(&file1_path) + .map(|m| m.is_file()) + .unwrap_or(false)); + let sub_dir = base_dir.join("sub"); + assert!(fs::metadata(&sub_dir).map(|m| m.is_dir()).unwrap_or(false)); + let file2_path = sub_dir.join("file2"); + assert!(fs::metadata(&file2_path) + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[test] +fn append_dir_all_blank_dest() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let base_dir = td.path().join("base"); + t!(fs::create_dir(&base_dir)); + t!(t!(File::create(base_dir.join("file1"))).write_all(b"file1")); + let sub_dir = base_dir.join("sub"); + t!(fs::create_dir(&sub_dir)); + t!(t!(File::create(sub_dir.join("file2"))).write_all(b"file2")); + + let mut ar = Builder::new(Vec::new()); + t!(ar.append_dir_all("", base_dir)); + let data = t!(ar.into_inner()); + + let mut ar = Archive::new(Cursor::new(data)); + t!(ar.unpack(td.path())); + let base_dir = td.path(); + assert!(fs::metadata(&base_dir).map(|m| m.is_dir()).unwrap_or(false)); + let file1_path = base_dir.join("file1"); + assert!(fs::metadata(&file1_path) + .map(|m| m.is_file()) + .unwrap_or(false)); + let sub_dir = base_dir.join("sub"); + assert!(fs::metadata(&sub_dir).map(|m| m.is_dir()).unwrap_or(false)); + let file2_path = sub_dir.join("file2"); + assert!(fs::metadata(&file2_path) + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[test] +fn append_dir_all_does_not_work_on_non_directory() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let path = td.path().join("test"); + t!(t!(File::create(&path)).write_all(b"test")); + + let mut ar = Builder::new(Vec::new()); + let result = ar.append_dir_all("test", path); + assert!(result.is_err()); +} + +#[test] +fn extracting_duplicate_dirs() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let rdr = Cursor::new(tar!("duplicate_dirs.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path())); + + let some_dir = td.path().join("some_dir"); + assert!(fs::metadata(&some_dir).map(|m| m.is_dir()).unwrap_or(false)); +} + +#[test] +fn unpack_old_style_bsd_dir() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let mut header = Header::new_old(); + header.set_entry_type(EntryType::Regular); + t!(header.set_path("testdir/")); + header.set_size(0); + header.set_cksum(); + t!(ar.append(&header, &mut io::empty())); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path())); + + // Iterating + let rdr = Cursor::new(ar.into_inner().into_inner()); + let mut ar = Archive::new(rdr); + assert!(t!(ar.entries()).all(|fr| fr.is_ok())); + + assert!(td.path().join("testdir").is_dir()); +} + +#[test] +fn handling_incorrect_file_size() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let path = td.path().join("tmpfile"); + t!(File::create(&path)); + let mut file = t!(File::open(&path)); + let mut header = Header::new_old(); + t!(header.set_path("somepath")); + header.set_metadata(&t!(file.metadata())); + header.set_size(2048); // past the end of file null blocks + header.set_cksum(); + t!(ar.append(&header, &mut file)); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rdr); + assert!(ar.unpack(td.path()).is_err()); + + // Iterating + let rdr = Cursor::new(ar.into_inner().into_inner()); + let mut ar = Archive::new(rdr); + assert!(t!(ar.entries()).any(|fr| fr.is_err())); +} + +#[test] +fn extracting_malicious_tarball() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut evil_tar = Vec::new(); + + { + let mut a = Builder::new(&mut evil_tar); + let mut append = |path: &str| { + let mut header = Header::new_gnu(); + assert!(header.set_path(path).is_err(), "was ok: {:?}", path); + { + let h = header.as_gnu_mut().unwrap(); + for (a, b) in h.name.iter_mut().zip(path.as_bytes()) { + *a = *b; + } + } + header.set_size(1); + header.set_cksum(); + t!(a.append(&header, io::repeat(1).take(1))); + }; + append("/tmp/abs_evil.txt"); + // std parse `//` as UNC path, see rust-lang/rust#100833 + append( + #[cfg(not(windows))] + "//tmp/abs_evil2.txt", + #[cfg(windows)] + "C://tmp/abs_evil2.txt", + ); + append("///tmp/abs_evil3.txt"); + append("/./tmp/abs_evil4.txt"); + append( + #[cfg(not(windows))] + "//./tmp/abs_evil5.txt", + #[cfg(windows)] + "C://./tmp/abs_evil5.txt", + ); + append("///./tmp/abs_evil6.txt"); + append("/../tmp/rel_evil.txt"); + append("../rel_evil2.txt"); + append("./../rel_evil3.txt"); + append("some/../../rel_evil4.txt"); + append(""); + append("././//./.."); + append(".."); + append("/////////.."); + append("/////////"); + } + + let mut ar = Archive::new(&evil_tar[..]); + t!(ar.unpack(td.path())); + + assert!(fs::metadata("/tmp/abs_evil.txt").is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt2").is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt3").is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt4").is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt5").is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt6").is_err()); + assert!(fs::metadata("/tmp/rel_evil.txt").is_err()); + assert!(fs::metadata("/tmp/rel_evil.txt").is_err()); + assert!(fs::metadata(td.path().join("../tmp/rel_evil.txt")).is_err()); + assert!(fs::metadata(td.path().join("../rel_evil2.txt")).is_err()); + assert!(fs::metadata(td.path().join("../rel_evil3.txt")).is_err()); + assert!(fs::metadata(td.path().join("../rel_evil4.txt")).is_err()); + + // The `some` subdirectory should not be created because the only + // filename that references this has '..'. + assert!(fs::metadata(td.path().join("some")).is_err()); + + // The `tmp` subdirectory should be created and within this + // subdirectory, there should be files named `abs_evil.txt` through + // `abs_evil6.txt`. + assert!(fs::metadata(td.path().join("tmp")) + .map(|m| m.is_dir()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil.txt")) + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil2.txt")) + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil3.txt")) + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil4.txt")) + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil5.txt")) + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil6.txt")) + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[test] +fn octal_spaces() { + let rdr = Cursor::new(tar!("spaces.tar")); + let mut ar = Archive::new(rdr); + + let entry = ar.entries().unwrap().next().unwrap().unwrap(); + assert_eq!(entry.header().mode().unwrap() & 0o777, 0o777); + assert_eq!(entry.header().uid().unwrap(), 0); + assert_eq!(entry.header().gid().unwrap(), 0); + assert_eq!(entry.header().size().unwrap(), 2); + assert_eq!(entry.header().mtime().unwrap(), 0o12440016664); + assert_eq!(entry.header().cksum().unwrap(), 0o4253); +} + +#[test] +fn extracting_malformed_tar_null_blocks() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let path1 = td.path().join("tmpfile1"); + let path2 = td.path().join("tmpfile2"); + t!(File::create(&path1)); + t!(File::create(&path2)); + t!(ar.append_file("tmpfile1", &mut t!(File::open(&path1)))); + let mut data = t!(ar.into_inner()); + let amt = data.len(); + data.truncate(amt - 512); + let mut ar = Builder::new(data); + t!(ar.append_file("tmpfile2", &mut t!(File::open(&path2)))); + t!(ar.finish()); + + let data = t!(ar.into_inner()); + let mut ar = Archive::new(&data[..]); + assert!(ar.unpack(td.path()).is_ok()); +} + +#[test] +fn empty_filename() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let rdr = Cursor::new(tar!("empty_filename.tar")); + let mut ar = Archive::new(rdr); + assert!(ar.unpack(td.path()).is_ok()); +} + +#[test] +fn file_times() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let rdr = Cursor::new(tar!("file_times.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path())); + + let meta = fs::metadata(td.path().join("a")).unwrap(); + let mtime = FileTime::from_last_modification_time(&meta); + let atime = FileTime::from_last_access_time(&meta); + assert_eq!(mtime.unix_seconds(), 1000000000); + assert_eq!(mtime.nanoseconds(), 0); + assert_eq!(atime.unix_seconds(), 1000000000); + assert_eq!(atime.nanoseconds(), 0); +} + +#[test] +fn zero_file_times() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut ar = Builder::new(Vec::new()); + ar.mode(HeaderMode::Deterministic); + let path = td.path().join("tmpfile"); + t!(File::create(&path)); + t!(ar.append_path_with_name(&path, "a")); + + let data = t!(ar.into_inner()); + let mut ar = Archive::new(&data[..]); + assert!(ar.unpack(td.path()).is_ok()); + + let meta = fs::metadata(td.path().join("a")).unwrap(); + let mtime = FileTime::from_last_modification_time(&meta); + let atime = FileTime::from_last_access_time(&meta); + assert!(mtime.unix_seconds() != 0); + assert!(atime.unix_seconds() != 0); +} + +#[test] +fn backslash_treated_well() { + // Insert a file into an archive with a backslash + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let mut ar = Builder::new(Vec::::new()); + t!(ar.append_dir("foo\\bar", td.path())); + let mut ar = Archive::new(Cursor::new(t!(ar.into_inner()))); + let f = t!(t!(ar.entries()).next().unwrap()); + if cfg!(unix) { + assert_eq!(t!(f.header().path()).to_str(), Some("foo\\bar")); + } else { + assert_eq!(t!(f.header().path()).to_str(), Some("foo/bar")); + } + + // Unpack an archive with a backslash in the name + let mut ar = Builder::new(Vec::::new()); + let mut header = Header::new_gnu(); + header.set_metadata(&t!(fs::metadata(td.path()))); + header.set_size(0); + for (a, b) in header.as_old_mut().name.iter_mut().zip(b"foo\\bar\x00") { + *a = *b; + } + header.set_cksum(); + t!(ar.append(&header, &mut io::empty())); + let data = t!(ar.into_inner()); + let mut ar = Archive::new(&data[..]); + let f = t!(t!(ar.entries()).next().unwrap()); + assert_eq!(t!(f.header().path()).to_str(), Some("foo\\bar")); + + let mut ar = Archive::new(&data[..]); + t!(ar.unpack(td.path())); + assert!(fs::metadata(td.path().join("foo\\bar")).is_ok()); +} + +#[test] +#[cfg(unix)] +fn set_mask() { + use ::std::os::unix::fs::PermissionsExt; + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_mode(0o777); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("bar")); + header.set_mode(0o421); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + ar.set_mask(0o211); + t!(ar.unpack(td.path())); + + let md = t!(fs::metadata(td.path().join("foo"))); + assert_eq!(md.permissions().mode(), 0o100566); + let md = t!(fs::metadata(td.path().join("bar"))); + assert_eq!(md.permissions().mode(), 0o100420); +} + +#[cfg(unix)] +#[test] +fn nul_bytes_in_path() { + use std::ffi::OsStr; + use std::os::unix::prelude::*; + + let nul_path = OsStr::from_bytes(b"foo\0"); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let mut ar = Builder::new(Vec::::new()); + let err = ar.append_dir(nul_path, td.path()).unwrap_err(); + assert!(err.to_string().contains("contains a nul byte")); +} + +#[test] +fn links() { + let mut ar = Archive::new(Cursor::new(tar!("link.tar"))); + let mut entries = t!(ar.entries()); + let link = t!(entries.next().unwrap()); + assert_eq!( + t!(link.header().link_name()).as_ref().map(|p| &**p), + Some(Path::new("file")) + ); + let other = t!(entries.next().unwrap()); + assert!(t!(other.header().link_name()).is_none()); +} + +#[test] +#[cfg(unix)] // making symlinks on windows is hard +fn unpack_links() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let mut ar = Archive::new(Cursor::new(tar!("link.tar"))); + t!(ar.unpack(td.path())); + + let md = t!(fs::symlink_metadata(td.path().join("lnk"))); + assert!(md.file_type().is_symlink()); + + let mtime = FileTime::from_last_modification_time(&md); + assert_eq!(mtime.unix_seconds(), 1448291033); + + assert_eq!( + &*t!(fs::read_link(td.path().join("lnk"))), + Path::new("file") + ); + t!(File::open(td.path().join("lnk"))); +} + +#[test] +fn pax_size() { + let mut ar = Archive::new(tar!("pax_size.tar")); + let mut entries = t!(ar.entries()); + let mut entry = t!(entries.next().unwrap()); + let mut attributes = t!(entry.pax_extensions()).unwrap(); + + let _first = t!(attributes.next().unwrap()); + let _second = t!(attributes.next().unwrap()); + let _third = t!(attributes.next().unwrap()); + let fourth = t!(attributes.next().unwrap()); + assert!(attributes.next().is_none()); + + assert_eq!(fourth.key(), Ok("size")); + assert_eq!(fourth.value(), Ok("4")); + + assert_eq!(entry.header().size().unwrap(), 0); + assert_eq!(entry.size(), 4); +} + +#[test] +fn pax_simple() { + let mut ar = Archive::new(tar!("pax.tar")); + let mut entries = t!(ar.entries()); + + let mut first = t!(entries.next().unwrap()); + let mut attributes = t!(first.pax_extensions()).unwrap(); + let first = t!(attributes.next().unwrap()); + let second = t!(attributes.next().unwrap()); + let third = t!(attributes.next().unwrap()); + assert!(attributes.next().is_none()); + + assert_eq!(first.key(), Ok("mtime")); + assert_eq!(first.value(), Ok("1453146164.953123768")); + assert_eq!(second.key(), Ok("atime")); + assert_eq!(second.value(), Ok("1453251915.24892486")); + assert_eq!(third.key(), Ok("ctime")); + assert_eq!(third.value(), Ok("1453146164.953123768")); +} + +#[test] +fn pax_path() { + let mut ar = Archive::new(tar!("pax2.tar")); + let mut entries = t!(ar.entries()); + + let first = t!(entries.next().unwrap()); + assert!(first.path().unwrap().ends_with("aaaaaaaaaaaaaaa")); +} + +#[test] +fn pax_linkpath() { + let mut ar = Archive::new(tar!("pax2.tar")); + let mut links = t!(ar.entries()).skip(3).take(2); + + let long_symlink = t!(links.next().unwrap()); + let link_name = long_symlink.link_name().unwrap().unwrap(); + assert!(link_name.to_str().unwrap().len() > 99); + assert!(link_name.ends_with("bbbbbbbbbbbbbbb")); + + let long_hardlink = t!(links.next().unwrap()); + let link_name = long_hardlink.link_name().unwrap().unwrap(); + assert!(link_name.to_str().unwrap().len() > 99); + assert!(link_name.ends_with("ccccccccccccccc")); +} + +#[test] +fn long_name_trailing_nul() { + let mut b = Builder::new(Vec::::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'L')); + h.set_cksum(); + t!(b.append(&h, "foo\0".as_bytes())); + let mut h = Header::new_gnu(); + + t!(h.set_path("bar")); + h.set_size(6); + h.set_entry_type(EntryType::file()); + h.set_cksum(); + t!(b.append(&h, "foobar".as_bytes())); + + let contents = t!(b.into_inner()); + let mut a = Archive::new(&contents[..]); + + let e = t!(t!(a.entries()).next().unwrap()); + assert_eq!(&*e.path_bytes(), b"foo"); +} + +#[test] +fn long_linkname_trailing_nul() { + let mut b = Builder::new(Vec::::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'K')); + h.set_cksum(); + t!(b.append(&h, "foo\0".as_bytes())); + let mut h = Header::new_gnu(); + + t!(h.set_path("bar")); + h.set_size(6); + h.set_entry_type(EntryType::file()); + h.set_cksum(); + t!(b.append(&h, "foobar".as_bytes())); + + let contents = t!(b.into_inner()); + let mut a = Archive::new(&contents[..]); + + let e = t!(t!(a.entries()).next().unwrap()); + assert_eq!(&*e.link_name_bytes().unwrap(), b"foo"); +} + +#[test] +fn long_linkname_gnu() { + for t in [tar::EntryType::Symlink, tar::EntryType::Link] { + let mut b = Builder::new(Vec::::new()); + let mut h = Header::new_gnu(); + h.set_entry_type(t); + h.set_size(0); + let path = "usr/lib/.build-id/05/159ed904e45ff5100f7acd3d3b99fa7e27e34f"; + let target = "../../../../usr/lib64/qt5/plugins/wayland-graphics-integration-server/libqt-wayland-compositor-xcomposite-egl.so"; + t!(b.append_link(&mut h, path, target)); + + let contents = t!(b.into_inner()); + let mut a = Archive::new(&contents[..]); + + let e = &t!(t!(a.entries()).next().unwrap()); + assert_eq!(e.header().entry_type(), t); + assert_eq!(e.path().unwrap().to_str().unwrap(), path); + assert_eq!(e.link_name().unwrap().unwrap().to_str().unwrap(), target); + } +} + +#[test] +fn linkname_literal() { + for t in [tar::EntryType::Symlink, tar::EntryType::Link] { + let mut b = Builder::new(Vec::::new()); + let mut h = Header::new_gnu(); + h.set_entry_type(t); + h.set_size(0); + let path = "usr/lib/systemd/systemd-sysv-install"; + let target = "../../..//sbin/chkconfig"; + h.set_link_name_literal(target).unwrap(); + t!(b.append_data(&mut h, path, std::io::empty())); + + let contents = t!(b.into_inner()); + let mut a = Archive::new(&contents[..]); + + let e = &t!(t!(a.entries()).next().unwrap()); + assert_eq!(e.header().entry_type(), t); + assert_eq!(e.path().unwrap().to_str().unwrap(), path); + assert_eq!(e.link_name().unwrap().unwrap().to_str().unwrap(), target); + } +} + +#[test] +fn encoded_long_name_has_trailing_nul() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let path = td.path().join("foo"); + t!(t!(File::create(&path)).write_all(b"test")); + + let mut b = Builder::new(Vec::::new()); + let long = repeat("abcd").take(200).collect::(); + + t!(b.append_file(&long, &mut t!(File::open(&path)))); + + let contents = t!(b.into_inner()); + let mut a = Archive::new(&contents[..]); + + let mut e = t!(t!(a.entries()).raw(true).next().unwrap()); + let mut name = Vec::new(); + t!(e.read_to_end(&mut name)); + assert_eq!(name[name.len() - 1], 0); + + let header_name = &e.header().as_gnu().unwrap().name; + assert!(header_name.starts_with(b"././@LongLink\x00")); +} + +#[test] +fn reading_sparse() { + let rdr = Cursor::new(tar!("sparse.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + let mut a = t!(entries.next().unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_begin.txt"); + t!(a.read_to_string(&mut s)); + assert_eq!(&s[..5], "test\n"); + assert!(s[5..].chars().all(|x| x == '\u{0}')); + + let mut a = t!(entries.next().unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_end.txt"); + t!(a.read_to_string(&mut s)); + assert!(s[..s.len() - 9].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[s.len() - 9..], "test_end\n"); + + let mut a = t!(entries.next().unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_ext.txt"); + t!(a.read_to_string(&mut s)); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 5], "text\n"); + assert!(s[0x1000 + 5..0x3000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x3000..0x3000 + 5], "text\n"); + assert!(s[0x3000 + 5..0x5000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x5000..0x5000 + 5], "text\n"); + assert!(s[0x5000 + 5..0x7000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x7000..0x7000 + 5], "text\n"); + assert!(s[0x7000 + 5..0x9000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x9000..0x9000 + 5], "text\n"); + assert!(s[0x9000 + 5..0xb000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0xb000..0xb000 + 5], "text\n"); + + let mut a = t!(entries.next().unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse.txt"); + t!(a.read_to_string(&mut s)); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 6], "hello\n"); + assert!(s[0x1000 + 6..0x2fa0].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x2fa0..0x2fa0 + 6], "world\n"); + assert!(s[0x2fa0 + 6..0x4000].chars().all(|x| x == '\u{0}')); + + assert!(entries.next().is_none()); +} + +#[test] +fn extract_sparse() { + let rdr = Cursor::new(tar!("sparse.tar")); + let mut ar = Archive::new(rdr); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + t!(ar.unpack(td.path())); + + let mut s = String::new(); + t!(t!(File::open(td.path().join("sparse_begin.txt"))).read_to_string(&mut s)); + assert_eq!(&s[..5], "test\n"); + assert!(s[5..].chars().all(|x| x == '\u{0}')); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse_end.txt"))).read_to_string(&mut s)); + assert!(s[..s.len() - 9].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[s.len() - 9..], "test_end\n"); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse_ext.txt"))).read_to_string(&mut s)); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 5], "text\n"); + assert!(s[0x1000 + 5..0x3000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x3000..0x3000 + 5], "text\n"); + assert!(s[0x3000 + 5..0x5000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x5000..0x5000 + 5], "text\n"); + assert!(s[0x5000 + 5..0x7000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x7000..0x7000 + 5], "text\n"); + assert!(s[0x7000 + 5..0x9000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x9000..0x9000 + 5], "text\n"); + assert!(s[0x9000 + 5..0xb000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0xb000..0xb000 + 5], "text\n"); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse.txt"))).read_to_string(&mut s)); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 6], "hello\n"); + assert!(s[0x1000 + 6..0x2fa0].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x2fa0..0x2fa0 + 6], "world\n"); + assert!(s[0x2fa0 + 6..0x4000].chars().all(|x| x == '\u{0}')); +} + +#[test] +fn sparse_with_trailing() { + let rdr = Cursor::new(tar!("sparse-1.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + let mut a = t!(entries.next().unwrap()); + let mut s = String::new(); + t!(a.read_to_string(&mut s)); + assert_eq!(0x100_00c, s.len()); + assert_eq!(&s[..0xc], "0MB through\n"); + assert!(s[0xc..0x100_000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x100_000..], "1MB through\n"); +} + +#[test] +fn path_separators() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path)).write_all(b"test")); + + let short_path: PathBuf = repeat("abcd").take(2).collect(); + let long_path: PathBuf = repeat("abcd").take(50).collect(); + + // Make sure UStar headers normalize to Unix path separators + let mut header = Header::new_ustar(); + + t!(header.set_path(&short_path)); + assert_eq!(t!(header.path()), short_path); + assert!(!header.path_bytes().contains(&b'\\')); + + t!(header.set_path(&long_path)); + assert_eq!(t!(header.path()), long_path); + assert!(!header.path_bytes().contains(&b'\\')); + + // Make sure GNU headers normalize to Unix path separators, + // including the `@LongLink` fallback used by `append_file`. + t!(ar.append_file(&short_path, &mut t!(File::open(&path)))); + t!(ar.append_file(&long_path, &mut t!(File::open(&path)))); + + let rd = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + let entry = t!(entries.next().unwrap()); + assert_eq!(t!(entry.path()), short_path); + assert!(!entry.path_bytes().contains(&b'\\')); + + let entry = t!(entries.next().unwrap()); + assert_eq!(t!(entry.path()), long_path); + assert!(!entry.path_bytes().contains(&b'\\')); + + assert!(entries.next().is_none()); +} + +#[test] +#[cfg(unix)] +fn append_path_symlink() { + use std::borrow::Cow; + use std::env; + use std::os::unix::fs::symlink; + + let mut ar = Builder::new(Vec::new()); + ar.follow_symlinks(false); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let long_linkname = repeat("abcd").take(30).collect::(); + let long_pathname = repeat("dcba").take(30).collect::(); + t!(env::set_current_dir(td.path())); + // "short" path name / short link name + t!(symlink("testdest", "test")); + t!(ar.append_path("test")); + // short path name / long link name + t!(symlink(&long_linkname, "test2")); + t!(ar.append_path("test2")); + // long path name / long link name + t!(symlink(&long_linkname, &long_pathname)); + t!(ar.append_path(&long_pathname)); + + let rd = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + let entry = t!(entries.next().unwrap()); + assert_eq!(t!(entry.path()), Path::new("test")); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new("testdest"))) + ); + assert_eq!(t!(entry.header().size()), 0); + + let entry = t!(entries.next().unwrap()); + assert_eq!(t!(entry.path()), Path::new("test2")); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new(&long_linkname))) + ); + assert_eq!(t!(entry.header().size()), 0); + + let entry = t!(entries.next().unwrap()); + assert_eq!(t!(entry.path()), Path::new(&long_pathname)); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new(&long_linkname))) + ); + assert_eq!(t!(entry.header().size()), 0); + + assert!(entries.next().is_none()); +} + +#[test] +fn name_with_slash_doesnt_fool_long_link_and_bsd_compat() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'L')); + h.set_cksum(); + t!(ar.append(&h, "foo\0".as_bytes())); + + let mut header = Header::new_gnu(); + header.set_entry_type(EntryType::Regular); + t!(header.set_path("testdir/")); + header.set_size(0); + header.set_cksum(); + t!(ar.append(&header, &mut io::empty())); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path())); + + // Iterating + let rdr = Cursor::new(ar.into_inner().into_inner()); + let mut ar = Archive::new(rdr); + assert!(t!(ar.entries()).all(|fr| fr.is_ok())); + + assert!(td.path().join("foo").is_file()); +} + +#[test] +fn insert_local_file_different_name() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let path = td.path().join("directory"); + t!(fs::create_dir(&path)); + ar.append_path_with_name(&path, "archive/dir").unwrap(); + let path = td.path().join("file"); + t!(t!(File::create(&path)).write_all(b"test")); + ar.append_path_with_name(&path, "archive/dir/f").unwrap(); + + let rd = Cursor::new(t!(ar.into_inner())); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + let entry = t!(entries.next().unwrap()); + assert_eq!(t!(entry.path()), Path::new("archive/dir")); + let entry = t!(entries.next().unwrap()); + assert_eq!(t!(entry.path()), Path::new("archive/dir/f")); + assert!(entries.next().is_none()); +} + +#[test] +#[cfg(unix)] +fn tar_directory_containing_symlink_to_directory() { + use std::os::unix::fs::symlink; + + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let dummy_src = t!(TempBuilder::new().prefix("dummy_src").tempdir()); + let dummy_dst = td.path().join("dummy_dst"); + let mut ar = Builder::new(Vec::new()); + t!(symlink(dummy_src.path().display().to_string(), &dummy_dst)); + + assert!(dummy_dst.read_link().is_ok()); + assert!(dummy_dst.read_link().unwrap().is_dir()); + ar.append_dir_all("symlinks", td.path()).unwrap(); + ar.finish().unwrap(); +} + +#[test] +fn long_path() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let rdr = Cursor::new(tar!("7z_long_path.tar")); + let mut ar = Archive::new(rdr); + assert!(ar.unpack(td.path()).is_ok()); +} + +#[test] +fn unpack_path_larger_than_windows_max_path() { + let dir_name = "iamaprettylongnameandtobepreciseiam91characterslongwhichsomethinkisreallylongandothersdonot"; + // 183 character directory name + let really_long_path = format!("{}{}", dir_name, dir_name); + let td = t!(TempBuilder::new().prefix(&really_long_path).tempdir()); + // directory in 7z_long_path.tar is over 100 chars + let rdr = Cursor::new(tar!("7z_long_path.tar")); + let mut ar = Archive::new(rdr); + // should unpack path greater than windows MAX_PATH length of 260 characters + assert!(ar.unpack(td.path()).is_ok()); +} + +#[test] +fn append_long_multibyte() { + let mut x = tar::Builder::new(Vec::new()); + let mut name = String::new(); + let data: &[u8] = &[]; + for _ in 0..512 { + name.push('a'); + name.push('ð‘¢®'); + x.append_data(&mut Header::new_gnu(), &name, data).unwrap(); + name.pop(); + } +} + +#[test] +fn read_only_directory_containing_files() { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + + let mut b = Builder::new(Vec::::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("dir/")); + h.set_size(0); + h.set_entry_type(EntryType::dir()); + h.set_mode(0o444); + h.set_cksum(); + t!(b.append(&h, "".as_bytes())); + + let mut h = Header::new_gnu(); + t!(h.set_path("dir/file")); + h.set_size(2); + h.set_entry_type(EntryType::file()); + h.set_cksum(); + t!(b.append(&h, "hi".as_bytes())); + + let contents = t!(b.into_inner()); + let mut ar = Archive::new(&contents[..]); + assert!(ar.unpack(td.path()).is_ok()); +} + +// This test was marked linux only due to macOS CI can't handle `set_current_dir` correctly +#[test] +#[cfg(target_os = "linux")] +fn tar_directory_containing_special_files() { + use std::env; + use std::ffi::CString; + + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let fifo = td.path().join("fifo"); + + unsafe { + let fifo_path = t!(CString::new(fifo.to_str().unwrap())); + let ret = libc::mknod(fifo_path.as_ptr(), libc::S_IFIFO | 0o644, 0); + if ret != 0 { + libc::perror(fifo_path.as_ptr()); + panic!("Failed to create a FIFO file"); + } + } + + t!(env::set_current_dir(td.path())); + let mut ar = Builder::new(Vec::new()); + // append_path has a different logic for processing files, so we need to test it as well + t!(ar.append_path("fifo")); + t!(ar.append_dir_all("special", td.path())); + t!(env::set_current_dir("/dev/")); + // CI systems seem to have issues with creating a chr device + t!(ar.append_path("null")); + t!(ar.finish()); +} + +#[test] +fn header_size_overflow() { + // maximal file size doesn't overflow anything + let mut ar = Builder::new(Vec::new()); + let mut header = Header::new_gnu(); + header.set_size(u64::MAX); + header.set_cksum(); + ar.append(&mut header, "x".as_bytes()).unwrap(); + let result = t!(ar.into_inner()); + let mut ar = Archive::new(&result[..]); + let mut e = ar.entries().unwrap(); + let err = e.next().unwrap().err().unwrap(); + assert!( + err.to_string().contains("size overflow"), + "bad error: {}", + err + ); + + // back-to-back entries that would overflow also don't panic + let mut ar = Builder::new(Vec::new()); + let mut header = Header::new_gnu(); + header.set_size(1_000); + header.set_cksum(); + ar.append(&mut header, &[0u8; 1_000][..]).unwrap(); + let mut header = Header::new_gnu(); + header.set_size(u64::MAX - 513); + header.set_cksum(); + ar.append(&mut header, "x".as_bytes()).unwrap(); + let result = t!(ar.into_inner()); + let mut ar = Archive::new(&result[..]); + let mut e = ar.entries().unwrap(); + e.next().unwrap().unwrap(); + let err = e.next().unwrap().err().unwrap(); + assert!( + err.to_string().contains("size overflow"), + "bad error: {}", + err + ); +} + +#[test] +#[cfg(unix)] +fn ownership_preserving() { + use std::os::unix::prelude::*; + + let mut rdr = Vec::new(); + let mut ar = Builder::new(&mut rdr); + let data: &[u8] = &[]; + let mut header = Header::new_gnu(); + // file 1 with uid = 580800000, gid = 580800000 + header.set_gid(580800000); + header.set_uid(580800000); + t!(header.set_path("iamuid580800000")); + header.set_size(0); + header.set_cksum(); + t!(ar.append(&header, data)); + // file 2 with uid = 580800001, gid = 580800000 + header.set_uid(580800001); + t!(header.set_path("iamuid580800001")); + header.set_cksum(); + t!(ar.append(&header, data)); + // file 3 with uid = 580800002, gid = 580800002 + header.set_gid(580800002); + header.set_uid(580800002); + t!(header.set_path("iamuid580800002")); + header.set_cksum(); + t!(ar.append(&header, data)); + t!(ar.finish()); + + let rdr = Cursor::new(t!(ar.into_inner())); + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let mut ar = Archive::new(rdr); + ar.set_preserve_ownerships(true); + + if unsafe { libc::getuid() } == 0 { + assert!(ar.unpack(td.path()).is_ok()); + // validate against premade files + // iamuid580800001 has this ownership: 580800001:580800000 + let meta = std::fs::metadata(td.path().join("iamuid580800000")).unwrap(); + assert_eq!(meta.uid(), 580800000); + assert_eq!(meta.gid(), 580800000); + let meta = std::fs::metadata(td.path().join("iamuid580800001")).unwrap(); + assert_eq!(meta.uid(), 580800001); + assert_eq!(meta.gid(), 580800000); + let meta = std::fs::metadata(td.path().join("iamuid580800002")).unwrap(); + assert_eq!(meta.uid(), 580800002); + assert_eq!(meta.gid(), 580800002); + } else { + // it's not possible to unpack tar while preserving ownership + // without root permissions + assert!(ar.unpack(td.path()).is_err()); + } +} + +#[test] +#[cfg(unix)] +fn pax_and_gnu_uid_gid() { + let tarlist = [tar!("biguid_gnu.tar"), tar!("biguid_pax.tar")]; + + for file in &tarlist { + let td = t!(TempBuilder::new().prefix("tar-rs").tempdir()); + let rdr = Cursor::new(file); + let mut ar = Archive::new(rdr); + ar.set_preserve_ownerships(true); + + if unsafe { libc::getuid() } == 0 { + t!(ar.unpack(td.path())); + let meta = fs::metadata(td.path().join("test.txt")).unwrap(); + let uid = std::os::unix::prelude::MetadataExt::uid(&meta); + let gid = std::os::unix::prelude::MetadataExt::gid(&meta); + // 4294967294 = u32::MAX - 1 + assert_eq!(uid, 4294967294); + assert_eq!(gid, 4294967294); + } else { + // it's not possible to unpack tar while preserving ownership + // without root permissions + assert!(ar.unpack(td.path()).is_err()); + } + } +} diff --git a/tar-0.4.41/tests/entry.rs b/tar-0.4.41/tests/entry.rs new file mode 100644 index 0000000..62df663 --- /dev/null +++ b/tar-0.4.41/tests/entry.rs @@ -0,0 +1,410 @@ +extern crate tar; +extern crate tempfile; + +use std::fs::{create_dir, File}; +use std::io::Read; + +use tempfile::Builder; + +macro_rules! t { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => panic!("{} returned {}", stringify!($e), e), + } + }; +} + +#[test] +fn absolute_symlink() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path())); + + t!(td.path().join("foo").symlink_metadata()); + + let mut ar = tar::Archive::new(&bytes[..]); + let mut entries = t!(ar.entries()); + let entry = t!(entries.next().unwrap()); + assert_eq!(&*entry.link_name_bytes().unwrap(), b"/bar"); +} + +#[test] +fn absolute_hardlink() { + let td = t!(Builder::new().prefix("tar").tempdir()); + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Link); + t!(header.set_path("bar")); + // This absolute path under tempdir will be created at unpack time + t!(header.set_link_name(td.path().join("foo"))); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + t!(ar.unpack(td.path())); + t!(td.path().join("foo").metadata()); + t!(td.path().join("bar").metadata()); +} + +#[test] +fn relative_hardlink() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Link); + t!(header.set_path("bar")); + t!(header.set_link_name("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path())); + t!(td.path().join("foo").metadata()); + t!(td.path().join("bar").metadata()); +} + +#[test] +fn absolute_link_deref_error() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("/")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).is_err()); +} + +#[test] +fn relative_link_deref_error() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("../../../../")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).is_err()); +} + +#[test] +#[cfg(unix)] +fn directory_maintains_permissions() { + use ::std::os::unix::fs::PermissionsExt; + + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Directory); + t!(header.set_path("foo")); + header.set_mode(0o777); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path())); + let f = t!(File::open(td.path().join("foo"))); + let md = t!(f.metadata()); + assert!(md.is_dir()); + assert_eq!(md.permissions().mode(), 0o40777); +} + +#[test] +#[cfg(unix)] +fn set_entry_mask() { + use ::std::os::unix::fs::PermissionsExt; + + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_mode(0o777); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + let td = t!(Builder::new().prefix("tar").tempdir()); + let foo_path = td.path().join("foo"); + + let mut entries = t!(ar.entries()); + let mut foo = t!(entries.next().unwrap()); + foo.set_mask(0o027); + t!(foo.unpack(&foo_path)); + + let f = t!(File::open(foo_path)); + let md = t!(f.metadata()); + assert!(md.is_file()); + assert_eq!(md.permissions().mode(), 0o100750); +} + +#[test] +#[cfg(not(windows))] // dangling symlinks have weird permissions +fn modify_link_just_created() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("bar/foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path())); + + t!(File::open(td.path().join("bar/foo"))); + t!(File::open(td.path().join("bar/bar"))); + t!(File::open(td.path().join("foo/foo"))); + t!(File::open(td.path().join("foo/bar"))); +} + +#[test] +#[cfg(not(windows))] // dangling symlinks have weird permissions +fn modify_outside_with_relative_symlink() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path("symlink")); + t!(header.set_link_name("..")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("symlink/foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + let tar_dir = td.path().join("tar"); + create_dir(&tar_dir).unwrap(); + assert!(ar.unpack(tar_dir).is_err()); + assert!(!td.path().join("foo").exists()); +} + +#[test] +fn parent_paths_error() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("..")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).is_err()); +} + +#[test] +#[cfg(unix)] +fn good_parent_paths_ok() { + use std::path::PathBuf; + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path(PathBuf::from("foo").join("bar"))); + t!(header.set_link_name(PathBuf::from("..").join("bar"))); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path())); + t!(td.path().join("foo").join("bar").read_link()); + let dst = t!(td.path().join("foo").join("bar").canonicalize()); + t!(File::open(dst)); +} + +#[test] +fn modify_hard_link_just_created() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Link); + t!(header.set_path("foo")); + t!(header.set_link_name("../test")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(1); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &b"x"[..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + + let test = td.path().join("test"); + t!(File::create(&test)); + + let dir = td.path().join("dir"); + assert!(ar.unpack(&dir).is_err()); + + let mut contents = Vec::new(); + t!(t!(File::open(&test)).read_to_end(&mut contents)); + assert_eq!(contents.len(), 0); +} + +#[test] +fn modify_symlink_just_created() { + let mut ar = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("../test")); + header.set_cksum(); + t!(ar.append(&header, &[][..])); + + let mut header = tar::Header::new_gnu(); + header.set_size(1); + header.set_entry_type(tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &b"x"[..])); + + let bytes = t!(ar.into_inner()); + let mut ar = tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + + let test = td.path().join("test"); + t!(File::create(&test)); + + let dir = td.path().join("dir"); + t!(ar.unpack(&dir)); + + let mut contents = Vec::new(); + t!(t!(File::open(&test)).read_to_end(&mut contents)); + assert_eq!(contents.len(), 0); +} diff --git a/tar-0.4.41/tests/header/mod.rs b/tar-0.4.41/tests/header/mod.rs new file mode 100644 index 0000000..14865b8 --- /dev/null +++ b/tar-0.4.41/tests/header/mod.rs @@ -0,0 +1,247 @@ +use std::fs::{self, File}; +use std::io::{self, Write}; +use std::path::Path; +use std::{iter, mem, thread, time}; + +use tempfile::Builder; + +use tar::{GnuHeader, Header, HeaderMode}; + +#[test] +fn default_gnu() { + let mut h = Header::new_gnu(); + assert!(h.as_gnu().is_some()); + assert!(h.as_gnu_mut().is_some()); + assert!(h.as_ustar().is_none()); + assert!(h.as_ustar_mut().is_none()); +} + +#[test] +fn goto_old() { + let mut h = Header::new_old(); + assert!(h.as_gnu().is_none()); + assert!(h.as_gnu_mut().is_none()); + assert!(h.as_ustar().is_none()); + assert!(h.as_ustar_mut().is_none()); +} + +#[test] +fn goto_ustar() { + let mut h = Header::new_ustar(); + assert!(h.as_gnu().is_none()); + assert!(h.as_gnu_mut().is_none()); + assert!(h.as_ustar().is_some()); + assert!(h.as_ustar_mut().is_some()); +} + +#[test] +fn link_name() { + let mut h = Header::new_gnu(); + t!(h.set_link_name("foo")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo")); + t!(h.set_link_name("../foo")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("../foo")); + t!(h.set_link_name("foo/bar")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/bar")); + t!(h.set_link_name("foo\\ba")); + if cfg!(windows) { + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/ba")); + } else { + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\ba")); + } + + let name = "foo\\bar\0"; + for (slot, val) in h.as_old_mut().linkname.iter_mut().zip(name.as_bytes()) { + *slot = *val; + } + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\bar")); + + assert!(h.set_link_name("\0").is_err()); +} + +#[test] +fn mtime() { + let h = Header::new_gnu(); + assert_eq!(t!(h.mtime()), 0); + + let h = Header::new_ustar(); + assert_eq!(t!(h.mtime()), 0); + + let h = Header::new_old(); + assert_eq!(t!(h.mtime()), 0); +} + +#[test] +fn user_and_group_name() { + let mut h = Header::new_gnu(); + t!(h.set_username("foo")); + t!(h.set_groupname("bar")); + assert_eq!(t!(h.username()), Some("foo")); + assert_eq!(t!(h.groupname()), Some("bar")); + + h = Header::new_ustar(); + t!(h.set_username("foo")); + t!(h.set_groupname("bar")); + assert_eq!(t!(h.username()), Some("foo")); + assert_eq!(t!(h.groupname()), Some("bar")); + + h = Header::new_old(); + assert_eq!(t!(h.username()), None); + assert_eq!(t!(h.groupname()), None); + assert!(h.set_username("foo").is_err()); + assert!(h.set_groupname("foo").is_err()); +} + +#[test] +fn dev_major_minor() { + let mut h = Header::new_gnu(); + t!(h.set_device_major(1)); + t!(h.set_device_minor(2)); + assert_eq!(t!(h.device_major()), Some(1)); + assert_eq!(t!(h.device_minor()), Some(2)); + + h = Header::new_ustar(); + t!(h.set_device_major(1)); + t!(h.set_device_minor(2)); + assert_eq!(t!(h.device_major()), Some(1)); + assert_eq!(t!(h.device_minor()), Some(2)); + + h.as_ustar_mut().unwrap().dev_minor[0] = 0x7f; + h.as_ustar_mut().unwrap().dev_major[0] = 0x7f; + assert!(h.device_major().is_err()); + assert!(h.device_minor().is_err()); + + h.as_ustar_mut().unwrap().dev_minor[0] = b'g'; + h.as_ustar_mut().unwrap().dev_major[0] = b'h'; + assert!(h.device_major().is_err()); + assert!(h.device_minor().is_err()); + + h = Header::new_old(); + assert_eq!(t!(h.device_major()), None); + assert_eq!(t!(h.device_minor()), None); + assert!(h.set_device_major(1).is_err()); + assert!(h.set_device_minor(1).is_err()); +} + +#[test] +fn set_path() { + let mut h = Header::new_gnu(); + t!(h.set_path("foo")); + assert_eq!(t!(h.path()).to_str(), Some("foo")); + t!(h.set_path("foo/")); + assert_eq!(t!(h.path()).to_str(), Some("foo/")); + t!(h.set_path("foo/bar")); + assert_eq!(t!(h.path()).to_str(), Some("foo/bar")); + t!(h.set_path("foo\\bar")); + if cfg!(windows) { + assert_eq!(t!(h.path()).to_str(), Some("foo/bar")); + } else { + assert_eq!(t!(h.path()).to_str(), Some("foo\\bar")); + } + + // set_path documentation explictly states it removes any ".", signfying the + // current directory, from the path. This test ensures that documented + // beavhior occurs + t!(h.set_path("./control")); + assert_eq!(t!(h.path()).to_str(), Some("control")); + + let long_name = iter::repeat("foo").take(100).collect::(); + let medium1 = iter::repeat("foo").take(52).collect::(); + let medium2 = iter::repeat("fo/").take(52).collect::(); + + assert!(h.set_path(&long_name).is_err()); + assert!(h.set_path(&medium1).is_err()); + assert!(h.set_path(&medium2).is_err()); + assert!(h.set_path("\0").is_err()); + + assert!(h.set_path("..").is_err()); + assert!(h.set_path("foo/..").is_err()); + assert!(h.set_path("foo/../bar").is_err()); + + h = Header::new_ustar(); + t!(h.set_path("foo")); + assert_eq!(t!(h.path()).to_str(), Some("foo")); + + assert!(h.set_path(&long_name).is_err()); + assert!(h.set_path(&medium1).is_err()); + t!(h.set_path(&medium2)); + assert_eq!(t!(h.path()).to_str(), Some(&medium2[..])); +} + +#[test] +fn set_ustar_path_hard() { + let mut h = Header::new_ustar(); + let p = Path::new("a").join(&vec!["a"; 100].join("")); + t!(h.set_path(&p)); + assert_eq!(t!(h.path()), p); +} + +#[test] +fn set_metadata_deterministic() { + let td = t!(Builder::new().prefix("tar-rs").tempdir()); + let tmppath = td.path().join("tmpfile"); + + fn mk_header(path: &Path, readonly: bool) -> Result { + let mut file = t!(File::create(path)); + t!(file.write_all(b"c")); + let mut perms = t!(file.metadata()).permissions(); + perms.set_readonly(readonly); + t!(fs::set_permissions(path, perms)); + let mut h = Header::new_ustar(); + h.set_metadata_in_mode(&t!(path.metadata()), HeaderMode::Deterministic); + Ok(h) + } + + // Create "the same" File twice in a row, one second apart, with differing readonly values. + let one = t!(mk_header(tmppath.as_path(), false)); + thread::sleep(time::Duration::from_millis(1050)); + let two = t!(mk_header(tmppath.as_path(), true)); + + // Always expected to match. + assert_eq!(t!(one.size()), t!(two.size())); + assert_eq!(t!(one.path()), t!(two.path())); + assert_eq!(t!(one.mode()), t!(two.mode())); + + // Would not match without `Deterministic`. + assert_eq!(t!(one.mtime()), t!(two.mtime())); + assert_eq!(t!(one.mtime()), 1153704088); + // TODO: No great way to validate that these would not be filled, but + // check them anyway. + assert_eq!(t!(one.uid()), t!(two.uid())); + assert_eq!(t!(one.gid()), t!(two.gid())); +} + +#[test] +fn extended_numeric_format() { + let mut h: GnuHeader = unsafe { mem::zeroed() }; + h.as_header_mut().set_size(42); + assert_eq!(h.size, [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 50, 0]); + h.as_header_mut().set_size(8589934593); + assert_eq!(h.size, [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 1]); + h.size = [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 0]; + assert_eq!(h.as_header().entry_size().unwrap(), 0x0200000000); + h.size = [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 51, 0]; + assert_eq!(h.as_header().entry_size().unwrap(), 43); + + h.as_header_mut().set_gid(42); + assert_eq!(h.gid, [48, 48, 48, 48, 48, 53, 50, 0]); + assert_eq!(h.as_header().gid().unwrap(), 42); + h.as_header_mut().set_gid(0x7fffffffffffffff); + assert_eq!(h.gid, [0xff; 8]); + assert_eq!(h.as_header().gid().unwrap(), 0x7fffffffffffffff); + h.uid = [0x80, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78]; + assert_eq!(h.as_header().uid().unwrap(), 0x12345678); + + h.mtime = [ + 0x80, 0, 0, 0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + ]; + assert_eq!(h.as_header().mtime().unwrap(), 0x0123456789abcdef); +} + +#[test] +fn byte_slice_conversion() { + let h = Header::new_gnu(); + let b: &[u8] = h.as_bytes(); + let b_conv: &[u8] = Header::from_byte_slice(h.as_bytes()).as_bytes(); + assert_eq!(b, b_conv); +}