From fec26926a83de45ff0ec6870608b9828d20fdc8d Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Thu, 25 Feb 2021 11:20:47 +1100 Subject: [PATCH 1/2] Squashed 'tokio-tar/' content from commit 43dd166 git-subtree-dir: tokio-tar git-subtree-split: 43dd166d0f3aff67891cd1c1bf4d6bfb984bb789 --- .github/workflows/main.yml | 49 + .gitignore | 2 + Cargo.toml | 44 + LICENSE-APACHE | 201 ++ LICENSE-MIT | 25 + README.md | 97 + examples/extract_file.rs | 28 + examples/list.rs | 21 + examples/raw_list.rs | 54 + examples/write.rs | 16 + src/archive.rs | 610 +++++++ src/builder.rs | 633 +++++++ src/entry.rs | 955 ++++++++++ src/entry_type.rs | 189 ++ src/error.rs | 40 + src/header.rs | 1620 +++++++++++++++++ src/lib.rs | 45 + src/pax.rs | 88 + tests/all.rs | 1117 ++++++++++++ tests/archives/directory.tar | Bin 0 -> 10240 bytes tests/archives/duplicate_dirs.tar | Bin 0 -> 2048 bytes tests/archives/empty_filename.tar | Bin 0 -> 512 bytes tests/archives/file_times.tar | Bin 0 -> 1536 bytes tests/archives/link.tar | Bin 0 -> 10240 bytes tests/archives/pax.tar | Bin 0 -> 10240 bytes tests/archives/pax2.tar | Bin 0 -> 10240 bytes tests/archives/reading_files.tar | Bin 0 -> 10240 bytes tests/archives/simple.tar | Bin 0 -> 10240 bytes tests/archives/simple_missing_last_header.tar | Bin 0 -> 9728 bytes tests/archives/spaces.tar | Bin 0 -> 2048 bytes tests/archives/sparse.tar | Bin 0 -> 10240 bytes tests/archives/xattrs.tar | Bin 0 -> 10240 bytes tests/entry.rs | 350 ++++ tests/header/mod.rs | 243 +++ 34 files changed, 6427 insertions(+) create mode 100644 .github/workflows/main.yml create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 examples/extract_file.rs create mode 100644 examples/list.rs create mode 100644 examples/raw_list.rs create mode 100644 examples/write.rs create mode 100644 src/archive.rs create mode 100644 src/builder.rs create mode 100644 src/entry.rs create mode 100644 src/entry_type.rs create mode 100644 src/error.rs create mode 100644 src/header.rs create mode 100644 src/lib.rs create mode 100644 src/pax.rs create mode 100644 tests/all.rs create mode 100644 tests/archives/directory.tar create mode 100644 tests/archives/duplicate_dirs.tar create mode 100644 tests/archives/empty_filename.tar create mode 100644 tests/archives/file_times.tar create mode 100644 tests/archives/link.tar create mode 100644 tests/archives/pax.tar create mode 100644 tests/archives/pax2.tar create mode 100644 tests/archives/reading_files.tar create mode 100644 tests/archives/simple.tar create mode 100644 tests/archives/simple_missing_last_header.tar create mode 100644 tests/archives/spaces.tar create mode 100644 tests/archives/sparse.tar create mode 100644 tests/archives/xattrs.tar create mode 100644 tests/entry.rs create mode 100644 tests/header/mod.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..e546e533 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,49 @@ +on: [push, pull_request] + +name: Continuous integration + +jobs: + ci: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + + steps: + - uses: actions/checkout@v2 + + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + components: rustfmt, clippy + + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + - uses: actions-rs/cargo@v1 + with: + command: install + args: cargo-hack + + - uses: actions-rs/cargo@v1 + with: + command: hack + args: check --all --ignore-private --each-feature --no-dev-deps + + - uses: actions-rs/cargo@v1 + with: + command: check + args: --all --all-targets --all-features + + - uses: actions-rs/cargo@v1 + with: + command: test + + - uses: actions-rs/cargo@v1 + with: + command: clippy + args: -- -D warnings diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4fffb2f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..4c6b935f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "tokio-tar" +version = "0.2.0" +authors = [ + "Alex Crichton ", + "dignifiedquire ", + "Artem Vorotnikov ", + "Aiden McClelland ", +] +homepage = "https://github.com/vorot93/tokio-tar" +repository = "https://github.com/vorot93/tokio-tar" +documentation = "https://docs.rs/tokio-tar" +license = "MIT/Apache-2.0" +keywords = ["tar", "tarfile", "encoding"] +readme = "README.md" +edition = "2018" +exclude = ["tests/archives/*"] + +description = """ +A Rust implementation of an async TAR file reader and writer. This library does not +currently handle compression, but it is abstract over all I/O readers and +writers. Additionally, great lengths are taken to ensure that the entire +contents are never required to be entirely resident in memory all at once. +""" + +[dependencies] +filetime = "0.2.13" +futures-core = "0.3" +tokio = { version = "1.0.1", features = ["fs", "io-util", "rt"] } +tokio-stream = "0.1.1" + +[dev-dependencies] +tempfile = "3" +tokio = { version = "1.0.1", features = ["full"] } + +[target."cfg(unix)".dependencies] +xattr = { version = "0.2", optional = true } +libc = "0.2" + +[target.'cfg(target_os = "redox")'.dependencies] +redox_syscall = "0.2" + +[features] +default = ["xattr"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 00000000..16fe87b0 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 00000000..39e0ed66 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 Alex Crichton + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..dc8c2436 --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +

tokio-tar

+
+ + A tar archive reading/writing library for async Rust. + +
+ +
+ +
+ + + Crates.io version + + + + Download + + + + docs.rs docs + +
+ + +
+ +> Based on the great [tar-rs](https://github.com/alexcrichton/tar-rs). + +## Reading an archive + +```rust,no_run +use tokio::io::stdin; +use tokio::prelude::*; + +use tokio_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut ar = Archive::new(stdin()); + let mut entries = ar.entries().unwrap(); + while let Some(file) = entries.next().await { + let f = file.unwrap(); + println!("{}", f.path().unwrap().display()); + } + }); +} +``` + +## Writing an archive + +```rust,no_run +use tokio::fs::File; +use tokio_tar::Builder; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let file = File::create("foo.tar").await.unwrap(); + let mut a = Builder::new(file); + + a.append_path("README.md").await.unwrap(); + a.append_file("lib.rs", &mut File::open("src/lib.rs").await.unwrap()) + .await + .unwrap(); + }); +} +``` + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this project by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/examples/extract_file.rs b/examples/extract_file.rs new file mode 100644 index 00000000..ba414bb1 --- /dev/null +++ b/examples/extract_file.rs @@ -0,0 +1,28 @@ +//! An example of extracting a file in an archive. +//! +//! Takes a tarball on standard input, looks for an entry with a listed file +//! name as the first argument provided, and then prints the contents of that +//! file to stdout. + +extern crate tokio_tar as async_tar; + +use std::{env::args_os, path::Path}; +use tokio::io::{copy, stdin, stdout}; +use tokio_stream::*; + +use async_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let first_arg = args_os().nth(1).unwrap(); + let filename = Path::new(&first_arg); + let mut ar = Archive::new(stdin()); + let mut entries = ar.entries().unwrap(); + while let Some(file) = entries.next().await { + let mut f = file.unwrap(); + if f.path().unwrap() == filename { + copy(&mut f, &mut stdout()).await.unwrap(); + } + } + }); +} diff --git a/examples/list.rs b/examples/list.rs new file mode 100644 index 00000000..b05e29ec --- /dev/null +++ b/examples/list.rs @@ -0,0 +1,21 @@ +//! An example of listing the file names of entries in an archive. +//! +//! Takes a tarball on stdin and prints out all of the entries inside. + +extern crate tokio_tar as async_tar; + +use tokio::io::stdin; +use tokio_stream::*; + +use async_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut ar = Archive::new(stdin()); + let mut entries = ar.entries().unwrap(); + while let Some(file) = entries.next().await { + let f = file.unwrap(); + println!("{}", f.path().unwrap().display()); + } + }); +} diff --git a/examples/raw_list.rs b/examples/raw_list.rs new file mode 100644 index 00000000..27e06152 --- /dev/null +++ b/examples/raw_list.rs @@ -0,0 +1,54 @@ +//! An example of listing raw entries in an archive. +//! +//! Takes a tarball on stdin and prints out all of the entries inside. + +extern crate tokio_tar as async_tar; + +use tokio::io::stdin; +use tokio_stream::*; + +use async_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut ar = Archive::new(stdin()); + let mut i = 0; + let mut entries = ar.entries_raw().unwrap(); + while let Some(file) = entries.next().await { + println!("-------------------------- Entry {}", i); + let mut f = file.unwrap(); + println!("path: {}", f.path().unwrap().display()); + println!("size: {}", f.header().size().unwrap()); + println!("entry size: {}", f.header().entry_size().unwrap()); + println!("link name: {:?}", f.link_name().unwrap()); + println!("file type: {:#x}", f.header().entry_type().as_byte()); + println!("mode: {:#o}", f.header().mode().unwrap()); + println!("uid: {}", f.header().uid().unwrap()); + println!("gid: {}", f.header().gid().unwrap()); + println!("mtime: {}", f.header().mtime().unwrap()); + println!("username: {:?}", f.header().username().unwrap()); + println!("groupname: {:?}", f.header().groupname().unwrap()); + + if f.header().as_ustar().is_some() { + println!("kind: UStar"); + } else if f.header().as_gnu().is_some() { + println!("kind: GNU"); + } else { + println!("kind: normal"); + } + + if let Ok(Some(extensions)) = f.pax_extensions().await { + println!("pax extensions:"); + for e in extensions { + let e = e.unwrap(); + println!( + "\t{:?} = {:?}", + String::from_utf8_lossy(e.key_bytes()), + String::from_utf8_lossy(e.value_bytes()) + ); + } + } + i += 1; + } + }); +} diff --git a/examples/write.rs b/examples/write.rs new file mode 100644 index 00000000..1fcc50ea --- /dev/null +++ b/examples/write.rs @@ -0,0 +1,16 @@ +extern crate tokio_tar as async_tar; + +use async_tar::Builder; +use tokio::fs::File; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let file = File::create("foo.tar").await.unwrap(); + let mut a = Builder::new(file); + + a.append_path("README.md").await.unwrap(); + a.append_file("lib.rs", &mut File::open("src/lib.rs").await.unwrap()) + .await + .unwrap(); + }); +} diff --git a/src/archive.rs b/src/archive.rs new file mode 100644 index 00000000..1e4d3b3b --- /dev/null +++ b/src/archive.rs @@ -0,0 +1,610 @@ +use std::{ + cmp, + path::Path, + pin::Pin, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + task::{Context, Poll}, +}; +use tokio::{ + io::{self, AsyncRead as Read, AsyncReadExt}, + sync::Mutex, +}; +use tokio_stream::*; + +use crate::{ + entry::{EntryFields, EntryIo}, + error::TarError, + other, Entry, GnuExtSparseHeader, GnuSparseHeader, Header, +}; + +/// A top-level representation of an archive file. +/// +/// This archive can have an entry added to it and it can be iterated over. +#[derive(Debug)] +pub struct Archive { + inner: Arc>, +} + +impl Clone for Archive { + fn clone(&self) -> Self { + Archive { + inner: self.inner.clone(), + } + } +} + +#[derive(Debug)] +pub struct ArchiveInner { + pos: AtomicU64, + unpack_xattrs: bool, + preserve_permissions: bool, + preserve_mtime: bool, + ignore_zeros: bool, + obj: Mutex, +} + +/// Configure the archive. +pub struct ArchiveBuilder { + obj: R, + unpack_xattrs: bool, + preserve_permissions: bool, + preserve_mtime: bool, + ignore_zeros: bool, +} + +impl ArchiveBuilder { + /// Create a new builder. + pub fn new(obj: R) -> Self { + ArchiveBuilder { + unpack_xattrs: false, + preserve_permissions: false, + preserve_mtime: true, + ignore_zeros: false, + obj, + } + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this archive. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(mut self, unpack_xattrs: bool) -> Self { + self.unpack_xattrs = unpack_xattrs; + self + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(mut self, preserve: bool) -> Self { + self.preserve_permissions = preserve; + self + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(mut self, preserve: bool) -> Self { + self.preserve_mtime = preserve; + self + } + + /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more + /// entries. + /// + /// This can be used in case multiple tar archives have been concatenated together. + pub fn set_ignore_zeros(mut self, ignore_zeros: bool) -> Self { + self.ignore_zeros = ignore_zeros; + self + } + + /// Construct the archive, ready to accept inputs. + pub fn build(self) -> Archive { + let Self { + unpack_xattrs, + preserve_permissions, + preserve_mtime, + ignore_zeros, + obj, + } = self; + + Archive { + inner: Arc::new(ArchiveInner { + unpack_xattrs, + preserve_permissions, + preserve_mtime, + ignore_zeros, + obj: Mutex::new(obj), + pos: 0.into(), + }), + } + } +} + +impl Archive { + /// Create a new archive with the underlying object as the reader. + pub fn new(obj: R) -> Archive { + Archive { + inner: Arc::new(ArchiveInner { + unpack_xattrs: false, + preserve_permissions: false, + preserve_mtime: true, + ignore_zeros: false, + obj: Mutex::new(obj), + pos: 0.into(), + }), + } + } + + /// Unwrap this archive, returning the underlying object. + pub fn into_inner(self) -> Result { + let Self { inner } = self; + + match Arc::try_unwrap(inner) { + Ok(inner) => Ok(inner.obj.into_inner()), + Err(inner) => Err(Self { inner }), + } + } + + /// Construct an stream over the entries in this archive. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// stream returns), then the contents read for each entry may be + /// corrupted. + pub fn entries(&mut self) -> io::Result> { + if self.inner.pos.load(Ordering::SeqCst) != 0 { + return Err(other( + "cannot call entries unless archive is at \ + position 0", + )); + } + + Ok(Entries { + archive: self.clone(), + next: 0, + gnu_longlink: None, + gnu_longname: None, + pax_extensions: None, + }) + } + + /// Construct an stream over the raw entries in this archive. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// stream returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_raw(&mut self) -> io::Result> { + if self.inner.pos.load(Ordering::SeqCst) != 0 { + return Err(other( + "cannot call entries_raw unless archive is at \ + position 0", + )); + } + + Ok(RawEntries { + archive: self.clone(), + next: 0, + }) + } + + /// Unpacks the contents tarball into the specified `dst`. + /// + /// This function will iterate over the entire contents of this tarball, + /// extracting each file in turn to the location specified by the entry's + /// path name. + /// + /// This operation is relatively sensitive in that it will not write files + /// outside of the path specified by `dst`. Files in the archive which have + /// a '..' in their path are skipped during the unpacking process. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs::File; + /// use tokio_tar::Archive; + /// + /// let mut ar = Archive::new(File::open("foo.tar").await?); + /// ar.unpack("foo").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn unpack>(&mut self, dst: P) -> io::Result<()> { + let mut entries = self.entries()?; + let mut pinned = Pin::new(&mut entries); + while let Some(entry) = pinned.next().await { + let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; + file.unpack_in(dst.as_ref()).await?; + } + Ok(()) + } +} + +/// Stream of `Entry`s. +pub struct Entries { + archive: Archive, + next: u64, + gnu_longname: Option>, + gnu_longlink: Option>, + pax_extensions: Option>, +} + +macro_rules! ready_opt_err { + ($val:expr) => { + match futures_core::ready!($val) { + Some(Ok(val)) => val, + Some(Err(err)) => return Poll::Ready(Some(Err(err))), + None => return Poll::Ready(None), + } + }; +} + +macro_rules! ready_err { + ($val:expr) => { + match futures_core::ready!($val) { + Ok(val) => val, + Err(err) => return Poll::Ready(Some(Err(err))), + } + }; +} + +impl Stream for Entries { + type Item = io::Result>>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + loop { + let entry = ready_opt_err!(poll_next_raw(self.archive.clone(), &mut self.next, cx)); + + if entry.header().as_gnu().is_some() && entry.header().entry_type().is_gnu_longname() { + if self.gnu_longname.is_some() { + return Poll::Ready(Some(Err(other( + "two long name entries describing \ + the same member", + )))); + } + + let mut ef = EntryFields::from(entry); + let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx)); + self.gnu_longname = Some(val); + continue; + } + + if entry.header().as_gnu().is_some() && entry.header().entry_type().is_gnu_longlink() { + if self.gnu_longlink.is_some() { + return Poll::Ready(Some(Err(other( + "two long name entries describing \ + the same member", + )))); + } + let mut ef = EntryFields::from(entry); + let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx)); + self.gnu_longlink = Some(val); + continue; + } + + if entry.header().as_ustar().is_some() + && entry.header().entry_type().is_pax_local_extensions() + { + if self.pax_extensions.is_some() { + return Poll::Ready(Some(Err(other( + "two pax extensions entries describing \ + the same member", + )))); + } + let mut ef = EntryFields::from(entry); + let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx)); + self.pax_extensions = Some(val); + continue; + } + + let mut fields = EntryFields::from(entry); + fields.long_pathname = self.gnu_longname.take(); + fields.long_linkname = self.gnu_longlink.take(); + fields.pax_extensions = self.pax_extensions.take(); + + ready_err!(poll_parse_sparse_header( + self.archive.clone(), + &mut self.next, + &mut fields, + cx + )); + + return Poll::Ready(Some(Ok(fields.into_entry()))); + } + } +} + +/// Stream of raw `Entry`s. +pub struct RawEntries { + archive: Archive, + next: u64, +} + +impl Stream for RawEntries { + type Item = io::Result>>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + poll_next_raw(self.archive.clone(), &mut self.next, cx) + } +} + +fn poll_next_raw( + mut archive: Archive, + next: &mut u64, + cx: &mut Context<'_>, +) -> Poll>>>> { + let mut header = Header::new_old(); + let mut header_pos = *next; + + loop { + // Seek to the start of the next header in the archive + let delta = *next - archive.inner.pos.load(Ordering::SeqCst); + + match futures_core::ready!(poll_skip(&mut archive, cx, delta)) { + Ok(_) => {} + Err(err) => return Poll::Ready(Some(Err(err))), + } + + // EOF is an indicator that we are at the end of the archive. + match futures_core::ready!(poll_try_read_all(&mut archive, cx, header.as_mut_bytes())) { + Ok(true) => {} + Ok(false) => return Poll::Ready(None), + Err(err) => return Poll::Ready(Some(Err(err))), + } + + // If a header is not all zeros, we have another valid header. + // Otherwise, check if we are ignoring zeros and continue, or break as if this is the + // end of the archive. + if !header.as_bytes().iter().all(|i| *i == 0) { + *next += 512; + break; + } + + if !archive.inner.ignore_zeros { + return Poll::Ready(None); + } + + *next += 512; + header_pos = *next; + } + + // Make sure the checksum is ok + let sum = header.as_bytes()[..148] + .iter() + .chain(&header.as_bytes()[156..]) + .fold(0, |a, b| a + (*b as u32)) + + 8 * 32; + let cksum = header.cksum()?; + if sum != cksum { + return Poll::Ready(Some(Err(other("archive header checksum mismatch")))); + } + + let file_pos = *next; + let size = header.entry_size()?; + + let data = EntryIo::Data(archive.clone().take(size)); + + let ret = EntryFields { + size, + header_pos, + file_pos, + data: vec![data], + header, + long_pathname: None, + long_linkname: None, + pax_extensions: None, + unpack_xattrs: archive.inner.unpack_xattrs, + preserve_permissions: archive.inner.preserve_permissions, + preserve_mtime: archive.inner.preserve_mtime, + read_state: None, + }; + + // Store where the next entry is, rounding up by 512 bytes (the size of + // a header); + let size = (size + 511) & !(512 - 1); + *next += size; + + Poll::Ready(Some(Ok(ret.into_entry()))) +} + +fn poll_parse_sparse_header( + mut archive: Archive, + next: &mut u64, + entry: &mut EntryFields>, + cx: &mut Context<'_>, +) -> Poll> { + if !entry.header.entry_type().is_gnu_sparse() { + return Poll::Ready(Ok(())); + } + + let gnu = match entry.header.as_gnu() { + Some(gnu) => gnu, + None => return Poll::Ready(Err(other("sparse entry type listed but not GNU header"))), + }; + + // Sparse files are represented internally as a list of blocks that are + // read. Blocks are either a bunch of 0's or they're data from the + // underlying archive. + // + // Blocks of a sparse file are described by the `GnuSparseHeader` + // structure, some of which are contained in `GnuHeader` but some of + // which may also be contained after the first header in further + // headers. + // + // We read off all the blocks here and use the `add_block` function to + // incrementally add them to the list of I/O block (in `entry.data`). + // The `add_block` function also validates that each chunk comes after + // the previous, we don't overrun the end of the file, and each block is + // aligned to a 512-byte boundary in the archive itself. + // + // At the end we verify that the sparse file size (`Header::size`) is + // the same as the current offset (described by the list of blocks) as + // well as the amount of data read equals the size of the entry + // (`Header::entry_size`). + entry.data.truncate(0); + + let mut cur = 0; + let mut remaining = entry.size; + { + let data = &mut entry.data; + let reader = archive.clone(); + let size = entry.size; + let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { + if block.is_empty() { + return Ok(()); + } + let off = block.offset()?; + let len = block.length()?; + + if (size - remaining) % 512 != 0 { + return Err(other( + "previous block in sparse file was not \ + aligned to 512-byte boundary", + )); + } else if off < cur { + return Err(other( + "out of order or overlapping sparse \ + blocks", + )); + } else if cur < off { + let block = io::repeat(0).take(off - cur); + data.push(EntryIo::Pad(block)); + } + cur = off + .checked_add(len) + .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?; + remaining = remaining.checked_sub(len).ok_or_else(|| { + other( + "sparse file consumed more data than the header \ + listed", + ) + })?; + data.push(EntryIo::Data(reader.clone().take(len))); + Ok(()) + }; + for block in gnu.sparse.iter() { + add_block(block)? + } + if gnu.is_extended() { + let mut ext = GnuExtSparseHeader::new(); + ext.isextended[0] = 1; + while ext.is_extended() { + match futures_core::ready!(poll_try_read_all(&mut archive, cx, ext.as_mut_bytes())) + { + Ok(true) => {} + Ok(false) => return Poll::Ready(Err(other("failed to read extension"))), + Err(err) => return Poll::Ready(Err(err)), + } + + *next += 512; + for block in ext.sparse.iter() { + add_block(block)?; + } + } + } + } + if cur != gnu.real_size()? { + return Poll::Ready(Err(other( + "mismatch in sparse file chunks and \ + size in header", + ))); + } + entry.size = cur; + if remaining > 0 { + return Poll::Ready(Err(other( + "mismatch in sparse file chunks and \ + entry size in header", + ))); + } + + Poll::Ready(Ok(())) +} + +impl Read for Archive { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + let mut r = if let Ok(v) = self.inner.obj.try_lock() { + v + } else { + return Poll::Pending; + }; + + let res = futures_core::ready!(Pin::new(&mut *r).poll_read(cx, into)); + match res { + Ok(()) => { + self.inner + .pos + .fetch_add(into.filled().len() as u64, Ordering::SeqCst); + Poll::Ready(Ok(())) + } + Err(err) => Poll::Ready(Err(err)), + } + } +} + +/// Try to fill the buffer from the reader. +/// +/// If the reader reaches its end before filling the buffer at all, returns `false`. +/// Otherwise returns `true`. +fn poll_try_read_all( + mut source: R, + cx: &mut Context<'_>, + buf: &mut [u8], +) -> Poll> { + let mut read = 0; + while read < buf.len() { + let mut read_buf = io::ReadBuf::new(&mut buf[read..]); + match futures_core::ready!(Pin::new(&mut source).poll_read(cx, &mut read_buf)) { + Ok(()) if read_buf.filled().is_empty() => { + if read == 0 { + return Poll::Ready(Ok(false)); + } + + return Poll::Ready(Err(other("failed to read entire block"))); + } + Ok(()) => read += read_buf.filled().len(), + Err(err) => return Poll::Ready(Err(err)), + } + } + + Poll::Ready(Ok(true)) +} + +/// Skip n bytes on the given source. +fn poll_skip( + mut source: R, + cx: &mut Context<'_>, + mut amt: u64, +) -> Poll> { + let mut buf = [0u8; 4096 * 8]; + while amt > 0 { + let n = cmp::min(amt, buf.len() as u64); + let mut read_buf = io::ReadBuf::new(&mut buf[..n as usize]); + match futures_core::ready!(Pin::new(&mut source).poll_read(cx, &mut read_buf)) { + Ok(()) if read_buf.filled().is_empty() => { + return Poll::Ready(Err(other("unexpected EOF during skip"))); + } + Ok(()) => { + amt -= read_buf.filled().len() as u64; + } + Err(err) => return Poll::Ready(Err(err)), + } + } + + Poll::Ready(Ok(())) +} diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 00000000..08c46ba0 --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,633 @@ +use crate::{ + header::{bytes2path, path2bytes, HeaderMode}, + other, EntryType, Header, +}; +use std::{borrow::Cow, fs::Metadata, path::Path}; +use tokio::{ + fs, + io::{self, AsyncRead as Read, AsyncReadExt, AsyncWrite as Write, AsyncWriteExt}, +}; + +/// A structure for building archives +/// +/// This structure has methods for building up an archive from scratch into any +/// arbitrary writer. +pub struct Builder { + mode: HeaderMode, + follow: bool, + finished: bool, + obj: Option, + cancellation: Option>, +} + +impl Builder { + /// Create a new archive builder with the underlying object as the + /// destination of all data written. The builder will use + /// `HeaderMode::Complete` by default. + pub fn new(obj: W) -> Builder { + let (tx, rx) = tokio::sync::oneshot::channel::(); + tokio::spawn(async move { + if let Ok(mut w) = rx.await { + let _ = w.write_all(&[0; 1024]).await; + } + }); + Builder { + mode: HeaderMode::Complete, + follow: true, + finished: false, + obj: Some(obj), + cancellation: Some(tx), + } + } + + /// Changes the HeaderMode that will be used when reading fs Metadata for + /// methods that implicitly read metadata for an input Path. Notably, this + /// does _not_ apply to `append(Header)`. + pub fn mode(&mut self, mode: HeaderMode) { + self.mode = mode; + } + + /// Follow symlinks, archiving the contents of the file they point to rather + /// than adding a symlink to the archive. Defaults to true. + pub fn follow_symlinks(&mut self, follow: bool) { + self.follow = follow; + } + + /// Gets shared reference to the underlying object. + pub fn get_ref(&self) -> &W { + self.obj.as_ref().unwrap() + } + + /// Gets mutable reference to the underlying object. + /// + /// Note that care must be taken while writing to the underlying + /// object. But, e.g. `get_mut().flush()` is claimed to be safe and + /// useful in the situations when one needs to be ensured that + /// tar entry was flushed to the disk. + pub fn get_mut(&mut self) -> &mut W { + self.obj.as_mut().unwrap() + } + + /// Unwrap this archive, returning the underlying object. + /// + /// This function will finish writing the archive if the `finish` function + /// hasn't yet been called, returning any I/O error which happens during + /// that operation. + pub async fn into_inner(mut self) -> io::Result { + if !self.finished { + self.finish().await?; + } + Ok(self.obj.take().unwrap()) + } + + /// Adds a new entry to this archive. + /// + /// This function will append the header specified, followed by contents of + /// the stream specified by `data`. To produce a valid archive the `size` + /// field of `header` must be the same as the length of the stream that's + /// being written. Additionally the checksum for the header should have been + /// set via the `set_cksum` method. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_path("foo")?; + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append(&header, data).await?; + /// let data = ar.into_inner().await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append( + &mut self, + header: &Header, + mut data: R, + ) -> io::Result<()> { + append(self.get_mut(), header, &mut data).await?; + + Ok(()) + } + + /// Adds a new entry to this archive with the specified path. + /// + /// This function will set the specified path in the given header, which may + /// require appending a GNU long-name extension entry to the archive first. + /// The checksum for the header will be automatically updated via the + /// `set_cksum` method after setting the path. No other metadata in the + /// header will be modified. + /// + /// Then it will append the header, followed by contents of the stream + /// specified by `data`. To produce a valid archive the `size` field of + /// `header` must be the same as the length of the stream that's being + /// written. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?; + /// let data = ar.into_inner().await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_data, R: Read + Unpin>( + &mut self, + header: &mut Header, + path: P, + data: R, + ) -> io::Result<()> { + prepare_header_path(self.get_mut(), header, path.as_ref()).await?; + header.set_cksum(); + self.append(&header, data).await?; + + Ok(()) + } + + /// Adds a file on the local filesystem to this archive. + /// + /// This function will open the file specified by `path` and insert the file + /// into the archive with the appropriate metadata set, returning any I/O + /// error which occurs while writing. The path name for the file inside of + /// this archive will be the same as `path`, and it is required that the + /// path is a relative path. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// ar.append_path("foo/bar.txt").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_path>(&mut self, path: P) -> io::Result<()> { + let mode = self.mode; + let follow = self.follow; + append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?; + Ok(()) + } + + /// Adds a file on the local filesystem to this archive under another name. + /// + /// This function will open the file specified by `path` and insert the file + /// into the archive as `name` with appropriate metadata set, returning any + /// I/O error which occurs while writing. The path name for the file inside + /// of this archive will be `name` is required to be a relative path. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Insert the local file "foo/bar.txt" in the archive but with the name + /// // "bar/foo.txt". + /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_path_with_name, N: AsRef>( + &mut self, + path: P, + name: N, + ) -> io::Result<()> { + let mode = self.mode; + let follow = self.follow; + append_path_with_name( + self.get_mut(), + path.as_ref(), + Some(name.as_ref()), + mode, + follow, + ) + .await?; + Ok(()) + } + + /// Adds a file to this archive with the given path as the name of the file + /// in the archive. + /// + /// This will use the metadata of `file` to populate a `Header`, and it will + /// then append the file to the archive with the name `path`. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs::File; + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Open the file at one location, but insert it into the archive with a + /// // different name. + /// let mut f = File::open("foo/bar/baz.txt").await?; + /// ar.append_file("bar/baz.txt", &mut f).await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_file>( + &mut self, + path: P, + file: &mut fs::File, + ) -> io::Result<()> { + let mode = self.mode; + append_file(self.get_mut(), path.as_ref(), file, mode).await?; + Ok(()) + } + + /// Adds a directory to this archive with the given path as the name of the + /// directory in the archive. + /// + /// This will use `stat` to populate a `Header`, and it will then append the + /// directory to the archive with the name `path`. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs; + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Use the directory at one location, but insert it into the archive + /// // with a different name. + /// ar.append_dir("bardir", ".").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_dir(&mut self, path: P, src_path: Q) -> io::Result<()> + where + P: AsRef, + Q: AsRef, + { + let mode = self.mode; + append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?; + Ok(()) + } + + /// Adds a directory and all of its contents (recursively) to this archive + /// with the given path as the name of the directory in the archive. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs; + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Use the directory at one location, but insert it into the archive + /// // with a different name. + /// ar.append_dir_all("bardir", ".").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_dir_all(&mut self, path: P, src_path: Q) -> io::Result<()> + where + P: AsRef, + Q: AsRef, + { + let mode = self.mode; + let follow = self.follow; + append_dir_all( + self.get_mut(), + path.as_ref(), + src_path.as_ref(), + mode, + follow, + ) + .await?; + Ok(()) + } + + /// Finish writing this archive, emitting the termination sections. + /// + /// This function should only be called when the archive has been written + /// entirely and if an I/O error happens the underlying object still needs + /// to be acquired. + /// + /// In most situations the `into_inner` method should be preferred. + pub async fn finish(&mut self) -> io::Result<()> { + if self.finished { + return Ok(()); + } + self.finished = true; + self.get_mut().write_all(&[0; 1024]).await?; + Ok(()) + } +} + +async fn append( + mut dst: &mut Dst, + header: &Header, + mut data: &mut Data, +) -> io::Result<()> { + dst.write_all(header.as_bytes()).await?; + let len = io::copy(&mut data, &mut dst).await?; + + // Pad with zeros if necessary. + let buf = [0; 512]; + let remaining = 512 - (len % 512); + if remaining < 512 { + dst.write_all(&buf[..remaining as usize]).await?; + } + + Ok(()) +} + +async fn append_path_with_name( + dst: &mut Dst, + path: &Path, + name: Option<&Path>, + mode: HeaderMode, + follow: bool, +) -> io::Result<()> { + let stat = if follow { + fs::metadata(path).await.map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting metadata for {}", err, path.display()), + ) + })? + } else { + fs::symlink_metadata(path).await.map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting metadata for {}", err, path.display()), + ) + })? + }; + let ar_name = name.unwrap_or(path); + if stat.is_file() { + append_fs( + dst, + ar_name, + &stat, + &mut fs::File::open(path).await?, + mode, + None, + ) + .await?; + Ok(()) + } else if stat.is_dir() { + append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?; + Ok(()) + } else if stat.file_type().is_symlink() { + let link_name = fs::read_link(path).await?; + append_fs( + dst, + ar_name, + &stat, + &mut io::empty(), + mode, + Some(&link_name), + ) + .await?; + Ok(()) + } else { + Err(other(&format!("{} has unknown file type", path.display()))) + } +} + +async fn append_file( + dst: &mut Dst, + path: &Path, + file: &mut fs::File, + mode: HeaderMode, +) -> io::Result<()> { + let stat = file.metadata().await?; + append_fs(dst, path, &stat, file, mode, None).await?; + Ok(()) +} + +async fn append_dir( + dst: &mut Dst, + path: &Path, + src_path: &Path, + mode: HeaderMode, +) -> io::Result<()> { + let stat = fs::metadata(src_path).await?; + append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?; + Ok(()) +} + +fn prepare_header(size: u64, entry_type: EntryType) -> Header { + let mut header = Header::new_gnu(); + let name = b"././@LongLink"; + header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]); + header.set_mode(0o644); + header.set_uid(0); + header.set_gid(0); + header.set_mtime(0); + // + 1 to be compliant with GNU tar + header.set_size(size + 1); + header.set_entry_type(entry_type); + header.set_cksum(); + header +} + +async fn prepare_header_path( + dst: &mut Dst, + header: &mut Header, + path: &Path, +) -> io::Result<()> { + // Try to encode the path directly in the header, but if it ends up not + // working (probably because it's too long) then try to use the GNU-specific + // long name extension by emitting an entry which indicates that it's the + // filename. + if let Err(e) = header.set_path(path) { + let data = path2bytes(&path)?; + let max = header.as_old().name.len(); + // Since e isn't specific enough to let us know the path is indeed too + // long, verify it first before using the extension. + if data.len() < max { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, EntryType::GNULongName); + // null-terminated string + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2).await?; + // Truncate the path to store in the header we're about to emit to + // ensure we've got something at least mentioned. + let path = bytes2path(Cow::Borrowed(&data[..max]))?; + header.set_path(&path)?; + } + Ok(()) +} + +async fn prepare_header_link( + dst: &mut Dst, + header: &mut Header, + link_name: &Path, +) -> io::Result<()> { + // Same as previous function but for linkname + if let Err(e) = header.set_link_name(&link_name) { + let data = path2bytes(&link_name)?; + if data.len() < header.as_old().linkname.len() { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink); + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2).await?; + } + Ok(()) +} + +async fn append_fs( + dst: &mut Dst, + path: &Path, + meta: &Metadata, + read: &mut R, + mode: HeaderMode, + link_name: Option<&Path>, +) -> io::Result<()> { + let mut header = Header::new_gnu(); + + prepare_header_path(dst, &mut header, path).await?; + header.set_metadata_in_mode(meta, mode); + if let Some(link_name) = link_name { + prepare_header_link(dst, &mut header, link_name).await?; + } + header.set_cksum(); + append(dst, &header, read).await?; + + Ok(()) +} + +async fn append_dir_all( + dst: &mut Dst, + path: &Path, + src_path: &Path, + mode: HeaderMode, + follow: bool, +) -> io::Result<()> { + let mut stack = vec![(src_path.to_path_buf(), true, false)]; + while let Some((src, is_dir, is_symlink)) = stack.pop() { + let dest = path.join(src.strip_prefix(&src_path).unwrap()); + + // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true + if is_dir || (is_symlink && follow && src.is_dir()) { + let mut entries = fs::read_dir(&src).await?; + while let Some(entry) = entries.next_entry().await.transpose() { + let entry = entry?; + let file_type = entry.file_type().await?; + stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink())); + } + if dest != Path::new("") { + append_dir(dst, &dest, &src, mode).await?; + } + } else if !follow && is_symlink { + let stat = fs::symlink_metadata(&src).await?; + let link_name = fs::read_link(&src).await?; + append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?; + } else { + append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?; + } + } + Ok(()) +} + +impl Drop for Builder { + fn drop(&mut self) { + // TODO: proper async cancellation + if !self.finished { + let _ = self + .cancellation + .take() + .unwrap() + .send(self.obj.take().unwrap()); + } + } +} diff --git a/src/entry.rs b/src/entry.rs new file mode 100644 index 00000000..e239799b --- /dev/null +++ b/src/entry.rs @@ -0,0 +1,955 @@ +use crate::{ + error::TarError, header::bytes2path, other, pax::pax_extensions, Archive, Header, PaxExtensions, +}; +use filetime::{self, FileTime}; +use std::{ + borrow::Cow, + cmp, fmt, + io::{Error, ErrorKind, SeekFrom}, + marker, + path::{Component, Path, PathBuf}, + pin::Pin, + task::{Context, Poll}, +}; +use tokio::{ + fs, + fs::OpenOptions, + io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt}, +}; + +/// A read-only view into an entry of an archive. +/// +/// This structure is a window into a portion of a borrowed archive which can +/// be inspected. It acts as a file handle by implementing the Reader trait. An +/// entry cannot be rewritten once inserted into an archive. +pub struct Entry { + fields: EntryFields, + _ignored: marker::PhantomData>, +} + +impl fmt::Debug for Entry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Entry") + .field("fields", &self.fields) + .finish() + } +} + +// private implementation detail of `Entry`, but concrete (no type parameters) +// and also all-public to be constructed from other modules. +pub struct EntryFields { + pub long_pathname: Option>, + pub long_linkname: Option>, + pub pax_extensions: Option>, + pub header: Header, + pub size: u64, + pub header_pos: u64, + pub file_pos: u64, + pub data: Vec>, + pub unpack_xattrs: bool, + pub preserve_permissions: bool, + pub preserve_mtime: bool, + pub(crate) read_state: Option>, +} + +impl fmt::Debug for EntryFields { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("EntryFields") + .field("long_pathname", &self.long_pathname) + .field("long_linkname", &self.long_linkname) + .field("pax_extensions", &self.pax_extensions) + .field("header", &self.header) + .field("size", &self.size) + .field("header_pos", &self.header_pos) + .field("file_pos", &self.file_pos) + .field("data", &self.data) + .field("unpack_xattrs", &self.unpack_xattrs) + .field("preserve_permissions", &self.preserve_permissions) + .field("preserve_mtime", &self.preserve_mtime) + .field("read_state", &self.read_state) + .finish() + } +} + +pub enum EntryIo { + Pad(io::Take), + Data(io::Take), +} + +impl fmt::Debug for EntryIo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + EntryIo::Pad(_) => write!(f, "EntryIo::Pad"), + EntryIo::Data(_) => write!(f, "EntryIo::Data"), + } + } +} + +/// When unpacking items the unpacked thing is returned to allow custom +/// additional handling by users. Today the File is returned, in future +/// the enum may be extended with kinds for links, directories etc. +#[derive(Debug)] +#[non_exhaustive] +pub enum Unpacked { + /// A file was unpacked. + File(fs::File), + /// A directory, hardlink, symlink, or other node was unpacked. + Other, +} + +impl Entry { + /// Returns the path name for this entry. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path()` as some archive formats have support for longer + /// path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn path(&self) -> io::Result> { + self.fields.path() + } + + /// Returns the raw bytes listed for this entry. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn path_bytes(&self) -> Cow<[u8]> { + self.fields.path_bytes() + } + + /// Returns the link name for this entry, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().link_name()` as some archive formats have support for + /// longer path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn link_name(&self) -> io::Result>> { + self.fields.link_name() + } + + /// Returns the link name for this entry, in bytes, if listed. + /// + /// Note that this will not always return the same value as + /// `self.header().link_name_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn link_name_bytes(&self) -> Option> { + self.fields.link_name_bytes() + } + + /// Returns an iterator over the pax extensions contained in this entry. + /// + /// Pax extensions are a form of archive where extra metadata is stored in + /// key/value pairs in entries before the entry they're intended to + /// describe. For example this can be used to describe long file name or + /// other metadata like atime/ctime/mtime in more precision. + /// + /// The returned iterator will yield key/value pairs for each extension. + /// + /// `None` will be returned if this entry does not indicate that it itself + /// contains extensions, or if there were no previous extensions describing + /// it. + /// + /// Note that global pax extensions are intended to be applied to all + /// archive entries. + /// + /// Also note that this function will read the entire entry if the entry + /// itself is a list of extensions. + pub async fn pax_extensions(&mut self) -> io::Result>> { + self.fields.pax_extensions().await + } + + /// Returns access to the header of this entry in the archive. + /// + /// This provides access to the metadata for this entry in the archive. + pub fn header(&self) -> &Header { + &self.fields.header + } + + /// Returns the starting position, in bytes, of the header of this entry in + /// the archive. + /// + /// The header is always a contiguous section of 512 bytes, so if the + /// underlying reader implements `Seek`, then the slice from `header_pos` to + /// `header_pos + 512` contains the raw header bytes. + pub fn raw_header_position(&self) -> u64 { + self.fields.header_pos + } + + /// Returns the starting position, in bytes, of the file of this entry in + /// the archive. + /// + /// If the file of this entry is continuous (e.g. not a sparse file), and + /// if the underlying reader implements `Seek`, then the slice from + /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. + pub fn raw_file_position(&self) -> u64 { + self.fields.file_pos + } + + /// Writes this file to the specified location. + /// + /// This function will write the entire contents of this file into the + /// location specified by `dst`. Metadata will also be propagated to the + /// path `dst`. + /// + /// This function will create a file at the path `dst`, and it is required + /// that the intermediate directories are created. Any existing file at the + /// location `dst` will be overwritten. + /// + /// > **Note**: This function does not have as many sanity checks as + /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're + /// > thinking of unpacking untrusted tarballs you may want to review the + /// > implementations of the previous two functions and perhaps implement + /// > similar logic yourself. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs::File; + /// use tokio_tar::Archive; + /// use tokio_stream::*; + /// + /// let mut ar = Archive::new(File::open("foo.tar").await?); + /// let mut entries = ar.entries()?; + /// let mut i = 0; + /// while let Some(file) = entries.next().await { + /// let mut file = file?; + /// file.unpack(format!("file-{}", i)).await?; + /// i += 1; + /// } + /// # + /// # Ok(()) }) } + /// ``` + pub async fn unpack>(&mut self, dst: P) -> io::Result { + self.fields.unpack(None, dst.as_ref()).await + } + + /// Extracts this file under the specified path, avoiding security issues. + /// + /// This function will write the entire contents of this file into the + /// location obtained by appending the path of this file in the archive to + /// `dst`, creating any intermediate directories if needed. Metadata will + /// also be propagated to the path `dst`. Any existing file at the location + /// `dst` will be overwritten. + /// + /// This function carefully avoids writing outside of `dst`. If the file has + /// a '..' in its path, this function will skip it and return false. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::{fs::File, stream::*}; + /// use tokio_tar::Archive; + /// use tokio_stream::*; + /// + /// let mut ar = Archive::new(File::open("foo.tar").await?); + /// let mut entries = ar.entries()?; + /// let mut i = 0; + /// while let Some(file) = entries.next().await { + /// let mut file = file.unwrap(); + /// file.unpack_in("target").await?; + /// i += 1; + /// } + /// # + /// # Ok(()) }) } + /// ``` + pub async fn unpack_in>(&mut self, dst: P) -> io::Result { + self.fields.unpack_in(dst.as_ref()).await + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { + self.fields.unpack_xattrs = unpack_xattrs; + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.fields.preserve_permissions = preserve; + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.fields.preserve_mtime = preserve; + } +} + +impl Read for Entry { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + Pin::new(&mut self.as_mut().fields).poll_read(cx, into) + } +} + +impl EntryFields { + pub fn from(entry: Entry) -> Self { + entry.fields + } + + pub fn into_entry(self) -> Entry { + Entry { + fields: self, + _ignored: marker::PhantomData, + } + } + + pub(crate) fn poll_read_all( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll>> { + // Preallocate some data but don't let ourselves get too crazy now. + let cap = cmp::min(self.size, 128 * 1024); + let mut buf = Vec::with_capacity(cap as usize); + + // Copied from futures::ReadToEnd + match futures_core::ready!(poll_read_all_internal(self, cx, &mut buf)) { + Ok(_) => Poll::Ready(Ok(buf)), + Err(err) => Poll::Ready(Err(err)), + } + } + + pub async fn read_all(&mut self) -> io::Result> { + // Preallocate some data but don't let ourselves get too crazy now. + let cap = cmp::min(self.size, 128 * 1024); + let mut v = Vec::with_capacity(cap as usize); + self.read_to_end(&mut v).await.map(|_| v) + } + + fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + fn path_bytes(&self) -> Cow<[u8]> { + match self.long_pathname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Cow::Borrowed(&bytes[..bytes.len() - 1]) + } else { + Cow::Borrowed(bytes) + } + } + None => { + if let Some(ref pax) = self.pax_extensions { + let pax = pax_extensions(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"path") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + return Cow::Borrowed(field); + } + } + self.header.path_bytes() + } + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + fn link_name_bytes(&self) -> Option> { + match self.long_linkname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) + } else { + Some(Cow::Borrowed(bytes)) + } + } + None => self.header.link_name_bytes(), + } + } + + async fn pax_extensions(&mut self) -> io::Result>> { + if self.pax_extensions.is_none() { + if !self.header.entry_type().is_pax_global_extensions() + && !self.header.entry_type().is_pax_local_extensions() + { + return Ok(None); + } + self.pax_extensions = Some(self.read_all().await?); + } + Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap()))) + } + + async fn unpack_in(&mut self, dst: &Path) -> io::Result { + // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: + // * Leading '/'s are trimmed. For example, `///test` is treated as + // `test`. + // * If the filename contains '..', then the file is skipped when + // extracting the tarball. + // * '//' within a filename is effectively skipped. An error is + // logged, but otherwise the effect is as if any two or more + // adjacent '/'s within the filename were consolidated into one + // '/'. + // + // Most of this is handled by the `path` module of the standard + // library, but we specially handle a few cases here as well. + + let mut file_dst = dst.to_path_buf(); + { + let path = self.path().map_err(|e| { + TarError::new( + &format!("invalid path in entry header: {}", self.path_lossy()), + e, + ) + })?; + for part in path.components() { + match part { + // Leading '/' characters, root paths, and '.' + // components are just ignored and treated as "empty + // components" + Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, + + // If any part of the filename is '..', then skip over + // unpacking the file to prevent directory traversal + // security issues. See, e.g.: CVE-2001-1267, + // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 + Component::ParentDir => return Ok(false), + + Component::Normal(part) => file_dst.push(part), + } + } + } + + // Skip cases where only slashes or '.' parts were seen, because + // this is effectively an empty filename. + if *dst == *file_dst { + return Ok(true); + } + + // Skip entries without a parent (i.e. outside of FS root) + let parent = match file_dst.parent() { + Some(p) => p, + None => return Ok(false), + }; + + if parent.symlink_metadata().is_err() { + println!("create_dir_all {:?}", parent); + fs::create_dir_all(&parent).await.map_err(|e| { + TarError::new(&format!("failed to create `{}`", parent.display()), e) + })?; + } + + let canon_target = self.validate_inside_dst(&dst, parent).await?; + + self.unpack(Some(&canon_target), &file_dst) + .await + .map_err(|e| TarError::new(&format!("failed to unpack `{}`", file_dst.display()), e))?; + + Ok(true) + } + + /// Unpack as destination directory `dst`. + async fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> { + // If the directory already exists just let it slide + match fs::create_dir(dst).await { + Ok(()) => Ok(()), + Err(err) => { + if err.kind() == ErrorKind::AlreadyExists { + let prev = fs::metadata(dst).await; + if prev.map(|m| m.is_dir()).unwrap_or(false) { + return Ok(()); + } + } + Err(Error::new( + err.kind(), + format!("{} when creating dir {}", err, dst.display()), + )) + } + } + } + + /// Returns access to the header of this entry in the archive. + async fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result { + let kind = self.header.entry_type(); + + if kind.is_dir() { + self.unpack_dir(dst).await?; + if let Ok(mode) = self.header.mode() { + set_perms(dst, None, mode, self.preserve_permissions).await?; + } + return Ok(Unpacked::Other); + } else if kind.is_hard_link() || kind.is_symlink() { + let src = match self.link_name()? { + Some(name) => name, + None => { + return Err(other(&format!( + "hard link listed for {} but no link name found", + String::from_utf8_lossy(self.header.as_bytes()) + ))); + } + }; + + if src.iter().count() == 0 { + return Err(other(&format!( + "symlink destination for {} is empty", + String::from_utf8_lossy(self.header.as_bytes()) + ))); + } + + if kind.is_hard_link() { + let link_src = match target_base { + // If we're unpacking within a directory then ensure that + // the destination of this hard link is both present and + // inside our own directory. This is needed because we want + // to make sure to not overwrite anything outside the root. + // + // Note that this logic is only needed for hard links + // currently. With symlinks the `validate_inside_dst` which + // happens before this method as part of `unpack_in` will + // use canonicalization to ensure this guarantee. For hard + // links though they're canonicalized to their existing path + // so we need to validate at this time. + Some(ref p) => { + let link_src = p.join(src); + self.validate_inside_dst(p, &link_src).await?; + link_src + } + None => src.into_owned(), + }; + fs::hard_link(&link_src, dst).await.map_err(|err| { + Error::new( + err.kind(), + format!( + "{} when hard linking {} to {}", + err, + link_src.display(), + dst.display() + ), + ) + })?; + } else { + symlink(&src, dst).await.map_err(|err| { + Error::new( + err.kind(), + format!( + "{} when symlinking {} to {}", + err, + src.display(), + dst.display() + ), + ) + })?; + }; + return Ok(Unpacked::Other); + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + async fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) + } + + #[cfg(windows)] + async fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + tokio::fs::os::windows::symlink_file(src, dst).await + } + + #[cfg(any(unix, target_os = "redox"))] + async fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + tokio::fs::symlink(src, dst).await + } + } else if kind.is_pax_global_extensions() + || kind.is_pax_local_extensions() + || kind.is_gnu_longname() + || kind.is_gnu_longlink() + { + return Ok(Unpacked::Other); + }; + + // Old BSD-tar compatibility. + // Names that have a trailing slash should be treated as a directory. + // Only applies to old headers. + if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") { + self.unpack_dir(dst).await?; + if let Ok(mode) = self.header.mode() { + set_perms(dst, None, mode, self.preserve_permissions).await?; + } + return Ok(Unpacked::Other); + } + + // Note the lack of `else` clause above. According to the FreeBSD + // documentation: + // + // > A POSIX-compliant implementation must treat any unrecognized + // > typeflag value as a regular file. + // + // As a result if we don't recognize the kind we just write out the file + // as we would normally. + + // Ensure we write a new file rather than overwriting in-place which + // is attackable; if an existing file is found unlink it. + async fn open(dst: &Path) -> io::Result { + OpenOptions::new() + .write(true) + .create_new(true) + .open(dst) + .await + } + + let mut f = async { + let mut f = match open(dst).await { + Ok(f) => Ok(f), + Err(err) => { + if err.kind() != ErrorKind::AlreadyExists { + Err(err) + } else { + match fs::remove_file(dst).await { + Ok(()) => open(dst).await, + Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst).await, + Err(e) => Err(e), + } + } + } + }?; + for io in self.data.drain(..) { + match io { + EntryIo::Data(mut d) => { + let expected = d.limit(); + if io::copy(&mut d, &mut f).await? != expected { + return Err(other("failed to write entire file")); + } + } + EntryIo::Pad(d) => { + // TODO: checked cast to i64 + let to = SeekFrom::Current(d.limit() as i64); + let size = f.seek(to).await?; + f.set_len(size).await?; + } + } + } + Ok::(f) + } + .await + .map_err(|e| { + let header = self.header.path_bytes(); + TarError::new( + &format!( + "failed to unpack `{}` into `{}`", + String::from_utf8_lossy(&header), + dst.display() + ), + e, + ) + })?; + + if self.preserve_mtime { + if let Ok(mtime) = self.header.mtime() { + let mtime = FileTime::from_unix_time(mtime as i64, 0); + filetime::set_file_times(&dst, mtime, mtime).map_err(|e| { + TarError::new(&format!("failed to set mtime for `{}`", dst.display()), e) + })?; + } + } + if let Ok(mode) = self.header.mode() { + set_perms(dst, Some(&mut f), mode, self.preserve_permissions).await?; + } + if self.unpack_xattrs { + set_xattrs(self, dst).await?; + } + return Ok(Unpacked::File(f)); + + async fn set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + preserve: bool, + ) -> Result<(), TarError> { + _set_perms(dst, f, mode, preserve).await.map_err(|e| { + TarError::new( + &format!( + "failed to set permissions to {:o} \ + for `{}`", + mode, + dst.display() + ), + e, + ) + }) + } + + #[cfg(any(unix, target_os = "redox"))] + async fn _set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + preserve: bool, + ) -> io::Result<()> { + use std::os::unix::prelude::*; + + let mode = if preserve { mode } else { mode & 0o777 }; + let perm = std::fs::Permissions::from_mode(mode as _); + match f { + Some(f) => f.set_permissions(perm).await, + None => fs::set_permissions(dst, perm).await, + } + } + + #[cfg(windows)] + async fn _set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + _preserve: bool, + ) -> io::Result<()> { + if mode & 0o200 == 0o200 { + return Ok(()); + } + match f { + Some(f) => { + let mut perm = f.metadata().await?.permissions(); + perm.set_readonly(true); + f.set_permissions(perm).await + } + None => { + let mut perm = fs::metadata(dst).await?.permissions(); + perm.set_readonly(true); + fs::set_permissions(dst, perm).await + } + } + } + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + async fn _set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + _preserve: bool, + ) -> io::Result<()> { + Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) + } + + #[cfg(all(unix, feature = "xattr"))] + async fn set_xattrs( + me: &mut EntryFields, + dst: &Path, + ) -> io::Result<()> { + use std::{ffi::OsStr, os::unix::prelude::*}; + + let exts = match me.pax_extensions().await { + Ok(Some(e)) => e, + _ => return Ok(()), + }; + let exts = exts + .filter_map(|e| e.ok()) + .filter_map(|e| { + let key = e.key_bytes(); + let prefix = b"SCHILY.xattr."; + if key.starts_with(prefix) { + Some((&key[prefix.len()..], e)) + } else { + None + } + }) + .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes())); + + for (key, value) in exts { + xattr::set(dst, key, value).map_err(|e| { + TarError::new( + &format!( + "failed to set extended \ + attributes to {}. \ + Xattrs: key={:?}, value={:?}.", + dst.display(), + key, + String::from_utf8_lossy(value) + ), + e, + ) + })?; + } + + Ok(()) + } + // Windows does not completely support posix xattrs + // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT + #[cfg(any( + windows, + target_os = "redox", + not(feature = "xattr"), + target_arch = "wasm32" + ))] + async fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> { + Ok(()) + } + } + + async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result { + // Abort if target (canonical) parent is outside of `dst` + let canon_parent = file_dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, file_dst.display()), + ) + })?; + let canon_target = dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, dst.display()), + ) + })?; + if !canon_parent.starts_with(&canon_target) { + let err = TarError::new( + &format!( + "trying to unpack outside of destination path: {}", + canon_target.display() + ), + // TODO: use ErrorKind::InvalidInput here? (minor breaking change) + Error::new(ErrorKind::Other, "Invalid argument"), + ); + return Err(err.into()); + } + Ok(canon_target) + } +} + +impl Read for EntryFields { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + let mut this = self.get_mut(); + loop { + if this.read_state.is_none() { + if this.data.is_empty() { + this.read_state = None; + } else { + let data = &mut this.data; + this.read_state = Some(data.remove(0)); + } + } + + if let Some(ref mut io) = &mut this.read_state { + let ret = Pin::new(io).poll_read(cx, into); + match ret { + Poll::Ready(Ok(())) if into.filled().is_empty() => { + this.read_state = None; + if this.data.is_empty() { + return Poll::Ready(Ok(())); + } + continue; + } + Poll::Ready(Ok(())) => { + return Poll::Ready(Ok(())); + } + Poll::Ready(Err(err)) => { + return Poll::Ready(Err(err)); + } + Poll::Pending => { + return Poll::Pending; + } + } + } else { + // Unable to pull another value from `data`, so we are done. + return Poll::Ready(Ok(())); + } + } + } +} + +impl Read for EntryIo { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + match self.get_mut() { + EntryIo::Pad(ref mut io) => Pin::new(io).poll_read(cx, into), + EntryIo::Data(ref mut io) => Pin::new(io).poll_read(cx, into), + } + } +} + +struct Guard<'a> { + buf: &'a mut Vec, + len: usize, +} + +impl Drop for Guard<'_> { + fn drop(&mut self) { + unsafe { + self.buf.set_len(self.len); + } + } +} + +fn poll_read_all_internal( + mut rd: Pin<&mut R>, + cx: &mut Context<'_>, + buf: &mut Vec, +) -> Poll> { + let mut g = Guard { + len: buf.len(), + buf, + }; + let ret; + loop { + if g.len == g.buf.len() { + unsafe { + g.buf.reserve(32); + let capacity = g.buf.capacity(); + g.buf.set_len(capacity); + + let buf = &mut g.buf[g.len..]; + std::ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len()); + } + } + + let mut read_buf = io::ReadBuf::new(&mut g.buf[g.len..]); + match futures_core::ready!(rd.as_mut().poll_read(cx, &mut read_buf)) { + Ok(()) if read_buf.filled().is_empty() => { + ret = Poll::Ready(Ok(g.len)); + break; + } + Ok(()) => g.len += read_buf.filled().len(), + Err(e) => { + ret = Poll::Ready(Err(e)); + break; + } + } + } + + ret +} diff --git a/src/entry_type.rs b/src/entry_type.rs new file mode 100644 index 00000000..8c1106a5 --- /dev/null +++ b/src/entry_type.rs @@ -0,0 +1,189 @@ +// See https://en.wikipedia.org/wiki/Tar_%28computing%29#UStar_format +/// Indicate for the type of file described by a header. +/// +/// Each `Header` has an `entry_type` method returning an instance of this type +/// which can be used to inspect what the header is describing. + +/// A non-exhaustive enum representing the possible entry types +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[non_exhaustive] +pub enum EntryType { + /// Regular file + Regular, + /// Hard link + Link, + /// Symbolic link + Symlink, + /// Character device + Char, + /// Block device + Block, + /// Directory + Directory, + /// Named pipe (fifo) + Fifo, + /// Implementation-defined 'high-performance' type, treated as regular file + Continuous, + /// GNU extension - long file name + GNULongName, + /// GNU extension - long link name (link target) + GNULongLink, + /// GNU extension - sparse file + GNUSparse, + /// Global extended header + XGlobalHeader, + /// Extended Header + XHeader, + /// Unknown header, + Other(u8), +} + +impl EntryType { + /// Creates a new entry type from a raw byte. + /// + /// Note that the other named constructors of entry type may be more + /// appropriate to create a file type from. + pub fn new(byte: u8) -> EntryType { + match byte { + b'\x00' | b'0' => EntryType::Regular, + b'1' => EntryType::Link, + b'2' => EntryType::Symlink, + b'3' => EntryType::Char, + b'4' => EntryType::Block, + b'5' => EntryType::Directory, + b'6' => EntryType::Fifo, + b'7' => EntryType::Continuous, + b'x' => EntryType::XHeader, + b'g' => EntryType::XGlobalHeader, + b'L' => EntryType::GNULongName, + b'K' => EntryType::GNULongLink, + b'S' => EntryType::GNUSparse, + other => EntryType::Other(other), + } + } + + /// Returns the raw underlying byte that this entry type represents. + pub fn as_byte(self) -> u8 { + match self { + EntryType::Regular => b'0', + EntryType::Link => b'1', + EntryType::Symlink => b'2', + EntryType::Char => b'3', + EntryType::Block => b'4', + EntryType::Directory => b'5', + EntryType::Fifo => b'6', + EntryType::Continuous => b'7', + EntryType::XHeader => b'x', + EntryType::XGlobalHeader => b'g', + EntryType::GNULongName => b'L', + EntryType::GNULongLink => b'K', + EntryType::GNUSparse => b'S', + EntryType::Other(other) => other, + } + } + + /// Creates a new entry type representing a regular file. + pub fn file() -> EntryType { + EntryType::Regular + } + + /// Creates a new entry type representing a hard link. + pub fn hard_link() -> EntryType { + EntryType::Link + } + + /// Creates a new entry type representing a symlink. + pub fn symlink() -> EntryType { + EntryType::Symlink + } + + /// Creates a new entry type representing a character special device. + pub fn character_special() -> EntryType { + EntryType::Char + } + + /// Creates a new entry type representing a block special device. + pub fn block_special() -> EntryType { + EntryType::Block + } + + /// Creates a new entry type representing a directory. + pub fn dir() -> EntryType { + EntryType::Directory + } + + /// Creates a new entry type representing a FIFO. + pub fn fifo() -> EntryType { + EntryType::Fifo + } + + /// Creates a new entry type representing a contiguous file. + pub fn contiguous() -> EntryType { + EntryType::Continuous + } + + /// Returns whether this type represents a regular file. + pub fn is_file(self) -> bool { + self == EntryType::Regular + } + + /// Returns whether this type represents a hard link. + pub fn is_hard_link(self) -> bool { + self == EntryType::Link + } + + /// Returns whether this type represents a symlink. + pub fn is_symlink(self) -> bool { + self == EntryType::Symlink + } + + /// Returns whether this type represents a character special device. + pub fn is_character_special(self) -> bool { + self == EntryType::Char + } + + /// Returns whether this type represents a block special device. + pub fn is_block_special(self) -> bool { + self == EntryType::Block + } + + /// Returns whether this type represents a directory. + pub fn is_dir(self) -> bool { + self == EntryType::Directory + } + + /// Returns whether this type represents a FIFO. + pub fn is_fifo(self) -> bool { + self == EntryType::Fifo + } + + /// Returns whether this type represents a contiguous file. + pub fn is_contiguous(self) -> bool { + self == EntryType::Continuous + } + + /// Returns whether this type represents a GNU long name header. + pub fn is_gnu_longname(self) -> bool { + self == EntryType::GNULongName + } + + /// Returns whether this type represents a GNU sparse header. + pub fn is_gnu_sparse(self) -> bool { + self == EntryType::GNUSparse + } + + /// Returns whether this type represents a GNU long link header. + pub fn is_gnu_longlink(self) -> bool { + self == EntryType::GNULongLink + } + + /// Returns whether this type represents a GNU long name header. + pub fn is_pax_global_extensions(self) -> bool { + self == EntryType::XGlobalHeader + } + + /// Returns whether this type represents a GNU long link header. + pub fn is_pax_local_extensions(self) -> bool { + self == EntryType::XHeader + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 00000000..ab24583b --- /dev/null +++ b/src/error.rs @@ -0,0 +1,40 @@ +use std::{error, fmt}; + +use tokio::io::{self, Error}; + +#[derive(Debug)] +pub struct TarError { + desc: String, + io: io::Error, +} + +impl TarError { + pub fn new(desc: &str, err: Error) -> TarError { + TarError { + desc: desc.to_string(), + io: err, + } + } +} + +impl error::Error for TarError { + fn description(&self) -> &str { + &self.desc + } + + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + Some(&self.io) + } +} + +impl fmt::Display for TarError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.desc.fmt(f) + } +} + +impl From for Error { + fn from(t: TarError) -> Error { + Error::new(t.io.kind(), t) + } +} diff --git a/src/header.rs b/src/header.rs new file mode 100644 index 00000000..71f0deed --- /dev/null +++ b/src/header.rs @@ -0,0 +1,1620 @@ +#[cfg(any(unix, target_os = "redox"))] +use std::os::unix::prelude::*; +#[cfg(windows)] +use std::os::windows::prelude::*; + +use std::{borrow::Cow, fmt, iter, iter::repeat, mem, str}; + +use std::{ + fs::Metadata, + path::{Component, Path, PathBuf}, +}; +use tokio::io; + +use crate::{other, EntryType}; + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct Header { + bytes: [u8; 512], +} + +/// Declares the information that should be included when filling a Header +/// from filesystem metadata. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[non_exhaustive] +pub enum HeaderMode { + /// All supported metadata, including mod/access times and ownership will + /// be included. + Complete, + + /// Only metadata that is directly relevant to the identity of a file will + /// be included. In particular, ownership and mod/access times are excluded. + Deterministic, +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct OldHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub linkflag: [u8; 1], + pub linkname: [u8; 100], + pub pad: [u8; 255], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct UstarHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // UStar format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub prefix: [u8; 155], + pub pad: [u8; 12], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // GNU format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub atime: [u8; 12], + pub ctime: [u8; 12], + pub offset: [u8; 12], + pub longnames: [u8; 4], + pub unused: [u8; 1], + pub sparse: [GnuSparseHeader; 4], + pub isextended: [u8; 1], + pub realsize: [u8; 12], + pub pad: [u8; 17], +} + +/// Description of the header of a spare entry. +/// +/// Specifies the offset/number of bytes of a chunk of data in octal. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuSparseHeader { + pub offset: [u8; 12], + pub numbytes: [u8; 12], +} + +/// Representation of the entry found to represent extended GNU sparse files. +/// +/// When a `GnuHeader` has the `isextended` flag set to `1` then the contents of +/// the next entry will be one of these headers. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuExtSparseHeader { + pub sparse: [GnuSparseHeader; 21], + pub isextended: [u8; 1], + pub padding: [u8; 7], +} + +impl Header { + /// Creates a new blank GNU header. + /// + /// The GNU style header is the default for this library and allows various + /// extensions such as long path names, long link names, and setting the + /// atime/ctime metadata attributes of files. + pub fn new_gnu() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, GnuHeader>(&mut header); + gnu.magic = *b"ustar "; + gnu.version = *b" \0"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank UStar header. + /// + /// The UStar style header is an extension of the original archive header + /// which enables some extra metadata along with storing a longer (but not + /// too long) path name. + /// + /// UStar is also the basis used for pax archives. + pub fn new_ustar() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, UstarHeader>(&mut header); + gnu.magic = *b"ustar\0"; + gnu.version = *b"00"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank old header. + /// + /// This header format is the original archive header format which all other + /// versions are compatible with (e.g. they are a superset). This header + /// format limits the path name limit and isn't able to contain extra + /// metadata like atime/ctime. + pub fn new_old() -> Header { + let mut header = Header { bytes: [0; 512] }; + header.set_mtime(0); + header + } + + fn is_ustar(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar\0"[..] && ustar.version[..] == b"00"[..] + } + + fn is_gnu(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar "[..] && ustar.version[..] == b" \0"[..] + } + + /// View this archive header as a raw "old" archive header. + /// + /// This view will always succeed as all archive header formats will fill + /// out at least the fields specified in the old header format. + pub fn as_old(&self) -> &OldHeader { + unsafe { cast(self) } + } + + /// Same as `as_old`, but the mutable version. + pub fn as_old_mut(&mut self) -> &mut OldHeader { + unsafe { cast_mut(self) } + } + + /// View this archive header as a raw UStar archive header. + /// + /// The UStar format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the UStar format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_ustar(&self) -> Option<&UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_ustar_mut`, but the mutable version. + pub fn as_ustar_mut(&mut self) -> Option<&mut UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// View this archive header as a raw GNU archive header. + /// + /// The GNU format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the GNU format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_gnu(&self) -> Option<&GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_gnu`, but the mutable version. + pub fn as_gnu_mut(&mut self) -> Option<&mut GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// Treats the given byte slice as a header. + /// + /// Panics if the length of the passed slice is not equal to 512. + pub fn from_byte_slice(bytes: &[u8]) -> &Header { + assert_eq!(bytes.len(), mem::size_of::
()); + assert_eq!(mem::align_of_val(bytes), mem::align_of::
()); + unsafe { &*(bytes.as_ptr() as *const Header) } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + &self.bytes + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + &mut self.bytes + } + + /// Blanket sets the metadata in this header from the metadata argument + /// provided. + /// + /// This is useful for initializing a `Header` from the OS's metadata from a + /// file. By default, this will use `HeaderMode::Complete` to include all + /// metadata. + pub fn set_metadata(&mut self, meta: &Metadata) { + self.fill_from(meta, HeaderMode::Complete); + } + + /// Sets only the metadata relevant to the given HeaderMode in this header + /// from the metadata argument provided. + pub fn set_metadata_in_mode(&mut self, meta: &Metadata, mode: HeaderMode) { + self.fill_from(meta, mode); + } + + /// Returns the size of entry's data this header represents. + /// + /// This is different from `Header::size` for sparse files, which have + /// some longer `size()` but shorter `entry_size()`. The `entry_size()` + /// listed here should be the number of bytes in the archive this header + /// describes. + /// + /// May return an error if the field is corrupted. + pub fn entry_size(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().size).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting size for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the file size this header represents. + /// + /// May return an error if the field is corrupted. + pub fn size(&self) -> io::Result { + if self.entry_type().is_gnu_sparse() { + self.as_gnu() + .ok_or_else(|| other("sparse header was not a gnu header")) + .and_then(|h| h.real_size()) + } else { + self.entry_size() + } + } + + /// Encodes the `size` argument into the size field of this header. + pub fn set_size(&mut self, size: u64) { + num_field_wrapper_into(&mut self.as_old_mut().size, size); + } + + /// Returns the raw path name stored in this header. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + /// Returns the pathname stored in this header as a byte array. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `path` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path_bytes(&self) -> Cow<[u8]> { + if let Some(ustar) = self.as_ustar() { + ustar.path_bytes() + } else { + let name = truncate(&self.as_old().name); + Cow::Borrowed(name) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// Sets the path name for this header. + /// + /// This function will set the pathname listed in this header, encoding it + /// in the appropriate format. May fail if the path is too long or if the + /// path specified is not Unicode and this is a Windows platform. + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_path(path); + } + copy_path_into(&mut self.as_old_mut().name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the link name stored in this header, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the link name stored in this header as a byte array, if any. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `link_name` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name_bytes(&self) -> Option> { + let old = self.as_old(); + if old.linkname[0] != 0 { + Some(Cow::Borrowed(truncate(&old.linkname))) + } else { + None + } + } + + /// Sets the link name for this header. + /// + /// This function will set the linkname listed in this header, encoding it + /// in the appropriate format. May fail if the link name is too long or if + /// the path specified is not Unicode and this is a Windows platform. + pub fn set_link_name>(&mut self, p: P) -> io::Result<()> { + self._set_link_name(p.as_ref()) + } + + fn _set_link_name(&mut self, path: &Path) -> io::Result<()> { + copy_path_into(&mut self.as_old_mut().linkname, path, true).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting link name for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the mode bits for this file + /// + /// May return an error if the field is corrupted. + pub fn mode(&self) -> io::Result { + octal_from(&self.as_old().mode) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mode for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mode` provided into this header. + pub fn set_mode(&mut self, mode: u32) { + octal_into(&mut self.as_old_mut().mode, mode); + } + + /// Returns the value of the owner's user ID field + /// + /// May return an error if the field is corrupted. + pub fn uid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().uid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting uid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `uid` provided into this header. + pub fn set_uid(&mut self, uid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().uid, uid); + } + + /// Returns the value of the group's user ID field + pub fn gid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().gid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting gid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `gid` provided into this header. + pub fn set_gid(&mut self, gid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().gid, gid); + } + + /// Returns the last modification time in Unix time format + pub fn mtime(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().mtime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mtime for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mtime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_mtime(&mut self, mtime: u64) { + num_field_wrapper_into(&mut self.as_old_mut().mtime, mtime); + } + + /// Return the user name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the user name was + /// present and was valid utf-8, `Ok(None)` indicates that the user name is + /// not present in this archive format, and `Err` indicates that the user + /// name was present but was not valid utf-8. + pub fn username(&self) -> Result, str::Utf8Error> { + match self.username_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the user name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the user name is not present in + /// this header format. + pub fn username_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.username_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.username_bytes()) + } else { + None + } + } + + /// Sets the username inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// user name or the name is too long. + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_username(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_username(name) + } else { + Err(other("not a ustar or gnu archive, cannot set username")) + } + } + + /// Return the group name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the group name was + /// present and was valid utf-8, `Ok(None)` indicates that the group name is + /// not present in this archive format, and `Err` indicates that the group + /// name was present but was not valid utf-8. + pub fn groupname(&self) -> Result, str::Utf8Error> { + match self.groupname_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the group name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the group name is not present in + /// this header format. + pub fn groupname_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.groupname_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.groupname_bytes()) + } else { + None + } + } + + /// Sets the group name inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// group name or the name is too long. + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_groupname(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_groupname(name) + } else { + Err(other("not a ustar or gnu archive, cannot set groupname")) + } + } + + /// Returns the device major number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device major number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_major(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_major().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_major().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `major` into the dev_major field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// major device number. + pub fn set_device_major(&mut self, major: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_major(major); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_major(major); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_major")) + } + } + + /// Returns the device minor number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device minor number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_minor(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_minor().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_minor().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `minor` into the dev_minor field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// minor device number. + pub fn set_device_minor(&mut self, minor: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_minor(minor); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_minor(minor); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_minor")) + } + } + + /// Returns the type of file described by this header. + pub fn entry_type(&self) -> EntryType { + EntryType::new(self.as_old().linkflag[0]) + } + + /// Sets the type of file that will be described by this header. + pub fn set_entry_type(&mut self, ty: EntryType) { + self.as_old_mut().linkflag = [ty.as_byte()]; + } + + /// Returns the checksum field of this header. + /// + /// May return an error if the field is corrupted. + pub fn cksum(&self) -> io::Result { + octal_from(&self.as_old().cksum) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting cksum for {}", err, self.path_lossy()), + ) + }) + } + + /// Sets the checksum field of this header based on the current fields in + /// this header. + pub fn set_cksum(&mut self) { + let cksum = self.calculate_cksum(); + octal_into(&mut self.as_old_mut().cksum, cksum); + } + + fn calculate_cksum(&self) -> u32 { + let old = self.as_old(); + let start = old as *const _ as usize; + let cksum_start = old.cksum.as_ptr() as *const _ as usize; + let offset = cksum_start - start; + let len = old.cksum.len(); + self.bytes[0..offset] + .iter() + .chain(iter::repeat(&b' ').take(len)) + .chain(&self.bytes[offset + len..]) + .fold(0, |a, b| a + (*b as u32)) + } + + fn fill_from(&mut self, meta: &Metadata, mode: HeaderMode) { + self.fill_platform_from(meta, mode); + // Set size of directories to zero + self.set_size(if meta.is_dir() || meta.file_type().is_symlink() { + 0 + } else { + meta.len() + }); + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_major(0); + ustar.set_device_minor(0); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_major(0); + gnu.set_device_minor(0); + } + } + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) { + unimplemented!(); + } + + #[cfg(any(unix, target_os = "redox"))] + fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) { + match mode { + HeaderMode::Complete => { + self.set_mtime(meta.mtime() as u64); + self.set_uid(meta.uid() as u64); + self.set_gid(meta.gid() as u64); + self.set_mode(meta.mode() as u32); + } + HeaderMode::Deterministic => { + self.set_mtime(0); + self.set_uid(0); + self.set_gid(0); + + // Use a default umask value, but propagate the (user) execute bit. + let fs_mode = if meta.is_dir() || (0o100 & meta.mode() == 0o100) { + 0o755 + } else { + 0o644 + }; + self.set_mode(fs_mode); + } + } + + // Note that if we are a GNU header we *could* set atime/ctime, except + // the `tar` utility doesn't do that by default and it causes problems + // with 7-zip [1]. + // + // It's always possible to fill them out manually, so we just don't fill + // it out automatically here. + // + // [1]: https://github.com/alexcrichton/tar-rs/issues/70 + + // TODO: need to bind more file types + self.set_entry_type(entry_type(meta.mode())); + + #[cfg(not(target_os = "redox"))] + fn entry_type(mode: u32) -> EntryType { + match mode as libc::mode_t & libc::S_IFMT { + libc::S_IFREG => EntryType::file(), + libc::S_IFLNK => EntryType::symlink(), + libc::S_IFCHR => EntryType::character_special(), + libc::S_IFBLK => EntryType::block_special(), + libc::S_IFDIR => EntryType::dir(), + libc::S_IFIFO => EntryType::fifo(), + _ => EntryType::new(b' '), + } + } + + #[cfg(target_os = "redox")] + fn entry_type(mode: u32) -> EntryType { + use syscall; + match mode as u16 & syscall::MODE_TYPE { + syscall::MODE_FILE => EntryType::file(), + syscall::MODE_SYMLINK => EntryType::symlink(), + syscall::MODE_DIR => EntryType::dir(), + _ => EntryType::new(b' '), + } + } + } + + #[cfg(windows)] + fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) { + // There's no concept of a file mode on Windows, so do a best approximation here. + match mode { + HeaderMode::Complete => { + self.set_uid(0); + self.set_gid(0); + // The dates listed in tarballs are always seconds relative to + // January 1, 1970. On Windows, however, the timestamps are returned as + // dates relative to January 1, 1601 (in 100ns intervals), so we need to + // add in some offset for those dates. + let mtime = (meta.last_write_time() / (1_000_000_000 / 100)) - 11644473600; + self.set_mtime(mtime); + let fs_mode = { + const FILE_ATTRIBUTE_READONLY: u32 = 0x00000001; + let readonly = meta.file_attributes() & FILE_ATTRIBUTE_READONLY; + match (meta.is_dir(), readonly != 0) { + (true, false) => 0o755, + (true, true) => 0o555, + (false, false) => 0o644, + (false, true) => 0o444, + } + }; + self.set_mode(fs_mode); + } + HeaderMode::Deterministic => { + self.set_uid(0); + self.set_gid(0); + self.set_mtime(0); + let fs_mode = if meta.is_dir() { 0o755 } else { 0o644 }; + self.set_mode(fs_mode); + } + } + + let ft = meta.file_type(); + self.set_entry_type(if ft.is_dir() { + EntryType::dir() + } else if ft.is_file() { + EntryType::file() + } else if ft.is_symlink() { + EntryType::symlink() + } else { + EntryType::new(b' ') + }); + } + + fn debug_fields(&self, b: &mut fmt::DebugStruct) { + if let Ok(entry_size) = self.entry_size() { + b.field("entry_size", &entry_size); + } + if let Ok(size) = self.size() { + b.field("size", &size); + } + if let Ok(path) = self.path() { + b.field("path", &path); + } + if let Ok(link_name) = self.link_name() { + b.field("link_name", &link_name); + } + if let Ok(mode) = self.mode() { + b.field("mode", &DebugAsOctal(mode)); + } + if let Ok(uid) = self.uid() { + b.field("uid", &uid); + } + if let Ok(gid) = self.gid() { + b.field("gid", &gid); + } + if let Ok(mtime) = self.mtime() { + b.field("mtime", &mtime); + } + if let Ok(username) = self.username() { + b.field("username", &username); + } + if let Ok(groupname) = self.groupname() { + b.field("groupname", &groupname); + } + if let Ok(device_major) = self.device_major() { + b.field("device_major", &device_major); + } + if let Ok(device_minor) = self.device_minor() { + b.field("device_minor", &device_minor); + } + if let Ok(cksum) = self.cksum() { + b.field("cksum", &cksum); + b.field("cksum_valid", &(cksum == self.calculate_cksum())); + } + } +} + +struct DebugAsOctal(T); + +impl fmt::Debug for DebugAsOctal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Octal::fmt(&self.0, f) + } +} + +unsafe fn cast(a: &T) -> &U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &*(a as *const T as *const U) +} + +unsafe fn cast_mut(a: &mut T) -> &mut U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &mut *(a as *mut T as *mut U) +} + +impl Clone for Header { + fn clone(&self) -> Header { + Header { bytes: self.bytes } + } +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(me) = self.as_ustar() { + me.fmt(f) + } else if let Some(me) = self.as_gnu() { + me.fmt(f) + } else { + self.as_old().fmt(f) + } + } +} + +impl OldHeader { + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for OldHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("OldHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl UstarHeader { + /// See `Header::path_bytes` + pub fn path_bytes(&self) -> Cow<[u8]> { + if self.prefix[0] == 0 && !self.name.contains(&b'\\') { + Cow::Borrowed(truncate(&self.name)) + } else { + let mut bytes = Vec::new(); + let prefix = truncate(&self.prefix); + if !prefix.is_empty() { + bytes.extend_from_slice(prefix); + bytes.push(b'/'); + } + bytes.extend_from_slice(truncate(&self.name)); + Cow::Owned(bytes) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// See `Header::set_path` + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + // This can probably be optimized quite a bit more, but for now just do + // something that's relatively easy and readable. + // + // First up, if the path fits within `self.name` then we just shove it + // in there. If not then we try to split it between some existing path + // components where it can fit in name/prefix. To do that we peel off + // enough until the path fits in `prefix`, then we try to put both + // halves into their destination. + let bytes = path2bytes(path)?; + let (maxnamelen, maxprefixlen) = (self.name.len(), self.prefix.len()); + if bytes.len() <= maxnamelen { + copy_path_into(&mut self.name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } else { + let mut prefix = path; + let mut prefixlen; + loop { + match prefix.parent() { + Some(parent) => prefix = parent, + None => { + return Err(other(&format!( + "path cannot be split to be inserted into archive: {}", + path.display() + ))); + } + } + prefixlen = path2bytes(prefix)?.len(); + if prefixlen <= maxprefixlen { + break; + } + } + copy_path_into(&mut self.prefix, prefix, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + let path = bytes2path(Cow::Borrowed(&bytes[prefixlen + 1..]))?; + copy_path_into(&mut self.name, &path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } + Ok(()) + } + + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting username for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting groupname for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for UstarHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("UstarHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl GnuHeader { + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// Gets the fullname (group:user) in a "lossy" way, used for error reporting ONLY. + fn fullname_lossy(&self) -> String { + format!( + "{}:{}", + String::from_utf8_lossy(&self.groupname_bytes()), + String::from_utf8_lossy(&self.username_bytes()), + ) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting username for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting groupname for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Returns the last modification time in Unix time format + pub fn atime(&self) -> io::Result { + num_field_wrapper_from(&self.atime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting atime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `atime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_atime(&mut self, atime: u64) { + num_field_wrapper_into(&mut self.atime, atime); + } + + /// Returns the last modification time in Unix time format + pub fn ctime(&self) -> io::Result { + num_field_wrapper_from(&self.ctime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting ctime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `ctime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_ctime(&mut self, ctime: u64) { + num_field_wrapper_into(&mut self.ctime, ctime); + } + + /// Returns the "real size" of the file this header represents. + /// + /// This is applicable for sparse files where the returned size here is the + /// size of the entire file after the sparse regions have been filled in. + pub fn real_size(&self) -> io::Result { + octal_from(&self.realsize).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting real_size for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// Indicates whether this header will be followed by additional + /// sparse-header records. + /// + /// Note that this is handled internally by this library, and is likely only + /// interesting if a `raw` iterator is being used. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for GnuHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuHeader"); + self.as_header().debug_fields(&mut f); + if let Ok(atime) = self.atime() { + f.field("atime", &atime); + } + if let Ok(ctime) = self.ctime() { + f.field("ctime", &ctime); + } + f.field("is_extended", &self.is_extended()) + .field("sparse", &DebugSparseHeaders(&self.sparse)) + .finish() + } +} + +struct DebugSparseHeaders<'a>(&'a [GnuSparseHeader]); + +impl<'a> fmt::Debug for DebugSparseHeaders<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_list(); + for header in self.0 { + if !header.is_empty() { + f.entry(header); + } + } + f.finish() + } +} + +impl GnuSparseHeader { + /// Returns true if block is empty + pub fn is_empty(&self) -> bool { + self.offset[0] == 0 || self.numbytes[0] == 0 + } + + /// Offset of the block from the start of the file + /// + /// Returns `Err` for a malformed `offset` field. + pub fn offset(&self) -> io::Result { + octal_from(&self.offset).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting offset from sparse header", err), + ) + }) + } + + /// Length of the block + /// + /// Returns `Err` for a malformed `numbytes` field. + pub fn length(&self) -> io::Result { + octal_from(&self.numbytes).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting length from sparse header", err), + ) + }) + } +} + +impl fmt::Debug for GnuSparseHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuSparseHeader"); + if let Ok(offset) = self.offset() { + f.field("offset", &offset); + } + if let Ok(length) = self.length() { + f.field("length", &length); + } + f.finish() + } +} + +impl GnuExtSparseHeader { + /// Crates a new zero'd out sparse header entry. + pub fn new() -> GnuExtSparseHeader { + unsafe { mem::zeroed() } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { &*(self as *const GnuExtSparseHeader as *const [u8; 512]) } + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { &mut *(self as *mut GnuExtSparseHeader as *mut [u8; 512]) } + } + + /// Returns a slice of the underlying sparse headers. + /// + /// Some headers may represent empty chunks of both the offset and numbytes + /// fields are 0. + pub fn sparse(&self) -> &[GnuSparseHeader; 21] { + &self.sparse + } + + /// Indicates if another sparse header should be following this one. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } +} + +impl Default for GnuExtSparseHeader { + fn default() -> Self { + Self::new() + } +} + +fn octal_from(slice: &[u8]) -> io::Result { + let trun = truncate(slice); + let num = match str::from_utf8(trun) { + Ok(n) => n, + Err(_) => { + return Err(other(&format!( + "numeric field did not have utf-8 text: {}", + String::from_utf8_lossy(trun) + ))); + } + }; + match u64::from_str_radix(num.trim(), 8) { + Ok(n) => Ok(n), + Err(_) => Err(other(&format!("numeric field was not a number: {}", num))), + } +} + +fn octal_into(dst: &mut [u8], val: T) { + let o = format!("{:o}", val); + let value = o.bytes().rev().chain(repeat(b'0')); + for (slot, value) in dst.iter_mut().rev().skip(1).zip(value) { + *slot = value; + } +} + +// Wrapper to figure out if we should fill the header field using tar's numeric +// extension (binary) or not (octal). +fn num_field_wrapper_into(dst: &mut [u8], src: u64) { + if src >= 8_589_934_592 || (src >= 2_097_152 && dst.len() == 8) { + numeric_extended_into(dst, src); + } else { + octal_into(dst, src); + } +} + +// Wrapper to figure out if we should read the header field in binary (numeric +// extension) or octal (standard encoding). +fn num_field_wrapper_from(src: &[u8]) -> io::Result { + if src[0] & 0x80 != 0 { + Ok(numeric_extended_from(src)) + } else { + octal_from(src) + } +} + +// When writing numeric fields with is the extended form, the high bit of the +// first byte is set to 1 and the remainder of the field is treated as binary +// instead of octal ascii. +// This handles writing u64 to 8 (uid, gid) or 12 (size, *time) bytes array. +fn numeric_extended_into(dst: &mut [u8], src: u64) { + let len: usize = dst.len(); + for (slot, val) in dst.iter_mut().zip( + repeat(0) + .take(len - 8) // to zero init extra bytes + .chain((0..8).rev().map(|x| ((src >> (8 * x)) & 0xff) as u8)), + ) { + *slot = val; + } + dst[0] |= 0x80; +} + +fn numeric_extended_from(src: &[u8]) -> u64 { + let mut dst: u64 = 0; + let mut b_to_skip = 1; + if src.len() == 8 { + // read first byte without extension flag bit + dst = (src[0] ^ 0x80) as u64; + } else { + // only read last 8 bytes + b_to_skip = src.len() - 8; + } + for byte in src.iter().skip(b_to_skip) { + dst <<= 8; + dst |= *byte as u64; + } + dst +} + +fn truncate(slice: &[u8]) -> &[u8] { + match slice.iter().position(|i| *i == 0) { + Some(i) => &slice[..i], + None => slice, + } +} + +/// Copies `bytes` into the `slot` provided, returning an error if the `bytes` +/// array is too long or if it contains any nul bytes. +fn copy_into(slot: &mut [u8], bytes: &[u8]) -> io::Result<()> { + if bytes.len() > slot.len() { + Err(other("provided value is too long")) + } else if bytes.iter().any(|b| *b == 0) { + Err(other("provided value contains a nul byte")) + } else { + for (slot, val) in slot.iter_mut().zip(bytes.iter().chain(Some(&0))) { + *slot = *val; + } + Ok(()) + } +} + +/// Copies `path` into the `slot` provided +/// +/// Returns an error if: +/// +/// * the path is too long to fit +/// * a nul byte was found +/// * an invalid path component is encountered (e.g. a root path or parent dir) +/// * the path itself is empty +fn copy_path_into(mut slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> { + let mut emitted = false; + let mut needs_slash = false; + for component in path.components() { + let bytes = path2bytes(Path::new(component.as_os_str()))?; + match (component, is_link_name) { + (Component::Prefix(..), false) | (Component::RootDir, false) => { + return Err(other("paths in archives must be relative")); + } + (Component::ParentDir, false) => { + return Err(other("paths in archives must not have `..`")); + } + // Allow "./" as the path + (Component::CurDir, false) if path.components().count() == 1 => {} + (Component::CurDir, false) => continue, + (Component::Normal(_), _) | (_, true) => {} + }; + if needs_slash { + copy(&mut slot, b"/")?; + } + if bytes.contains(&b'/') { + if let Component::Normal(..) = component { + return Err(other("path component in archive cannot contain `/`")); + } + } + copy(&mut slot, &*bytes)?; + if &*bytes != b"/" { + needs_slash = true; + } + emitted = true; + } + if !emitted { + return Err(other("paths in archives must have at least one component")); + } + if ends_with_slash(path) { + copy(&mut slot, &[b'/'])?; + } + return Ok(()); + + fn copy(slot: &mut &mut [u8], bytes: &[u8]) -> io::Result<()> { + copy_into(*slot, bytes)?; + let tmp = mem::replace(slot, &mut []); + *slot = &mut tmp[bytes.len()..]; + Ok(()) + } +} + +#[cfg(target_arch = "wasm32")] +fn ends_with_slash(p: &Path) -> bool { + p.to_string_lossy().ends_with('/') +} + +#[cfg(windows)] +fn ends_with_slash(p: &Path) -> bool { + let last = p.as_os_str().encode_wide().last(); + last == Some(b'/' as u16) || last == Some(b'\\' as u16) +} + +#[cfg(any(unix, target_os = "redox"))] +fn ends_with_slash(p: &Path) -> bool { + p.as_os_str().as_bytes().ends_with(&[b'/']) +} + +#[cfg(any(windows, target_arch = "wasm32"))] +pub fn path2bytes(p: &Path) -> io::Result> { + p.as_os_str() + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| other(&format!("path {} was not valid Unicode", p.display()))) + .map(|bytes| { + if bytes.contains(&b'\\') { + // Normalize to Unix-style path separators + let mut bytes = bytes.to_owned(); + for b in &mut bytes { + if *b == b'\\' { + *b = b'/'; + } + } + Cow::Owned(bytes) + } else { + Cow::Borrowed(bytes) + } + }) +} + +#[cfg(any(unix, target_os = "redox"))] +/// On unix this will never fail +pub fn path2bytes(p: &Path) -> io::Result> { + Ok(p.as_os_str().as_bytes()).map(Cow::Borrowed) +} + +#[cfg(windows)] +/// On windows we cannot accept non-Unicode bytes because it +/// is impossible to convert it to UTF-16. +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + return match bytes { + Cow::Borrowed(bytes) => { + let s = str::from_utf8(bytes).map_err(|_| not_unicode(bytes))?; + Ok(Cow::Borrowed(Path::new(s))) + } + Cow::Owned(bytes) => { + let s = String::from_utf8(bytes).map_err(|uerr| not_unicode(&uerr.into_bytes()))?; + Ok(Cow::Owned(PathBuf::from(s))) + } + }; + + fn not_unicode(v: &[u8]) -> io::Error { + other(&format!( + "only Unicode paths are supported on Windows: {}", + String::from_utf8_lossy(v) + )) + } +} + +#[cfg(any(unix, target_os = "redox"))] +/// On unix this operation can never fail. +pub fn bytes2path(bytes: Cow<'_, [u8]>) -> io::Result> { + use std::ffi::{OsStr, OsString}; + + Ok(match bytes { + Cow::Borrowed(bytes) => Cow::Borrowed(Path::new(OsStr::from_bytes(bytes))), + Cow::Owned(bytes) => Cow::Owned(PathBuf::from(OsString::from_vec(bytes))), + }) +} + +#[cfg(target_arch = "wasm32")] +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + Ok(match bytes { + Cow::Borrowed(bytes) => { + Cow::Borrowed({ Path::new(str::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + Cow::Owned(bytes) => { + Cow::Owned({ PathBuf::from(String::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + }) +} + +#[cfg(target_arch = "wasm32")] +fn invalid_utf8(_: T) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, "Invalid utf-8") +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..b22607d5 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,45 @@ +//! A library for reading and writing TAR archives in an async fashion. +//! +//! This library provides utilities necessary to manage [TAR archives][1] +//! abstracted over a reader or writer. Great strides are taken to ensure that +//! an archive is never required to be fully resident in memory, and all objects +//! provide largely a streaming interface to read bytes from. +//! +//! [1]: http://en.wikipedia.org/wiki/Tar_%28computing%29 + +// More docs about the detailed tar format can also be found here: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current + +// NB: some of the coding patterns and idioms here may seem a little strange. +// This is currently attempting to expose a super generic interface while +// also not forcing clients to codegen the entire crate each time they use +// it. To that end lots of work is done to ensure that concrete +// implementations are all found in this crate and the generic functions are +// all just super thin wrappers (e.g. easy to codegen). + +#![deny(missing_docs)] + +use std::io::{Error, ErrorKind}; + +pub use crate::{ + archive::{Archive, ArchiveBuilder, Entries}, + builder::Builder, + entry::{Entry, Unpacked}, + entry_type::EntryType, + header::{ + GnuExtSparseHeader, GnuHeader, GnuSparseHeader, Header, HeaderMode, OldHeader, UstarHeader, + }, + pax::{PaxExtension, PaxExtensions}, +}; + +mod archive; +mod builder; +mod entry; +mod entry_type; +mod error; +mod header; +mod pax; + +fn other(msg: &str) -> Error { + Error::new(ErrorKind::Other, msg) +} diff --git a/src/pax.rs b/src/pax.rs new file mode 100644 index 00000000..0405899a --- /dev/null +++ b/src/pax.rs @@ -0,0 +1,88 @@ +use std::{slice, str}; + +use tokio::io; + +use crate::other; + +/// An iterator over the pax extensions in an archive entry. +/// +/// This iterator yields structures which can themselves be parsed into +/// key/value pairs. +pub struct PaxExtensions<'entry> { + data: slice::Split<'entry, u8, fn(&u8) -> bool>, +} + +/// A key/value pair corresponding to a pax extension. +pub struct PaxExtension<'entry> { + key: &'entry [u8], + value: &'entry [u8], +} + +pub fn pax_extensions(a: &[u8]) -> PaxExtensions { + PaxExtensions { + data: a.split(|a| *a == b'\n'), + } +} + +impl<'entry> Iterator for PaxExtensions<'entry> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + let line = match self.data.next() { + Some(line) if line.is_empty() => return None, + Some(line) => line, + None => return None, + }; + + Some( + line.iter() + .position(|b| *b == b' ') + .and_then(|i| { + str::from_utf8(&line[..i]) + .ok() + .and_then(|len| len.parse::().ok().map(|j| (i + 1, j))) + }) + .and_then(|(kvstart, reported_len)| { + if line.len() + 1 == reported_len { + line[kvstart..] + .iter() + .position(|b| *b == b'=') + .map(|equals| (kvstart, equals)) + } else { + None + } + }) + .map(|(kvstart, equals)| PaxExtension { + key: &line[kvstart..kvstart + equals], + value: &line[kvstart + equals + 1..], + }) + .ok_or_else(|| other("malformed pax extension")), + ) + } +} + +impl<'entry> PaxExtension<'entry> { + /// Returns the key for this key/value pair parsed as a string. + /// + /// May fail if the key isn't actually utf-8. + pub fn key(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.key) + } + + /// Returns the underlying raw bytes for the key of this key/value pair. + pub fn key_bytes(&self) -> &'entry [u8] { + self.key + } + + /// Returns the value for this key/value pair parsed as a string. + /// + /// May fail if the value isn't actually utf-8. + pub fn value(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.value) + } + + /// Returns the underlying raw bytes for this value of this key/value pair. + pub fn value_bytes(&self) -> &'entry [u8] { + self.value + } +} diff --git a/tests/all.rs b/tests/all.rs new file mode 100644 index 00000000..7fefb10a --- /dev/null +++ b/tests/all.rs @@ -0,0 +1,1117 @@ +extern crate tokio_tar as async_tar; + +extern crate filetime; +extern crate tempfile; +#[cfg(all(unix, feature = "xattr"))] +extern crate xattr; + +use std::{ + io::Cursor, + iter::repeat, + path::{Path, PathBuf}, +}; +use tokio::{ + fs::{self, File}, + io::{self, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}, +}; +use tokio_stream::*; + +use async_tar::{Archive, ArchiveBuilder, Builder, EntryType, Header}; +use filetime::FileTime; +use tempfile::{Builder as TempBuilder, TempDir}; + +macro_rules! t { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => panic!("{} returned {}", stringify!($e), e), + } + }; +} + +macro_rules! tar { + ($e:expr) => { + &include_bytes!(concat!("archives/", $e))[..] + }; +} + +mod header; + +/// test that we can concatenate the simple.tar archive and extract the same entries twice when we +/// use the ignore_zeros option. +#[tokio::test] +async fn simple_concat() { + let bytes = tar!("simple.tar"); + let mut archive_bytes = Vec::new(); + archive_bytes.extend(bytes); + + let original_names: Vec = + decode_names(&mut Archive::new(Cursor::new(&archive_bytes))).await; + let expected: Vec<&str> = original_names.iter().map(|n| n.as_str()).collect(); + + // concat two archives (with null in-between); + archive_bytes.extend(bytes); + + // test now that when we read the archive, it stops processing at the first zero header. + let actual = decode_names(&mut Archive::new(Cursor::new(&archive_bytes))).await; + assert_eq!(expected, actual); + + // extend expected by itself. + let expected: Vec<&str> = { + let mut o = Vec::new(); + o.extend(&expected); + o.extend(&expected); + o + }; + + let builder = ArchiveBuilder::new(Cursor::new(&archive_bytes)).set_ignore_zeros(true); + let mut ar = builder.build(); + + let actual = decode_names(&mut ar).await; + assert_eq!(expected, actual); + + async fn decode_names(ar: &mut Archive) -> Vec + where + R: AsyncRead + Unpin + Sync + Send, + { + let mut names = Vec::new(); + let mut entries = t!(ar.entries()); + + while let Some(entry) = entries.next().await { + let e = t!(entry); + names.push(t!(::std::str::from_utf8(&e.path_bytes())).to_string()); + } + + names + } +} + +#[tokio::test] +async fn header_impls() { + let mut ar = Archive::new(Cursor::new(tar!("simple.tar"))); + let hn = Header::new_old(); + let hnb = hn.as_bytes(); + let mut entries = t!(ar.entries()); + while let Some(file) = entries.next().await { + let file = t!(file); + let h1 = file.header(); + let h1b = h1.as_bytes(); + let h2 = h1.clone(); + let h2b = h2.as_bytes(); + assert!(h1b[..] == h2b[..] && h2b[..] != hnb[..]) + } +} + +#[tokio::test] +async fn header_impls_missing_last_header() { + let mut ar = Archive::new(Cursor::new(tar!("simple_missing_last_header.tar"))); + let hn = Header::new_old(); + let hnb = hn.as_bytes(); + let mut entries = t!(ar.entries()); + + while let Some(file) = entries.next().await { + let file = t!(file); + let h1 = file.header(); + let h1b = h1.as_bytes(); + let h2 = h1.clone(); + let h2b = h2.as_bytes(); + assert!(h1b[..] == h2b[..] && h2b[..] != hnb[..]) + } +} + +#[tokio::test] +async fn reading_files() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + let mut a = t!(entries.next().await.unwrap()); + assert_eq!(&*a.header().path_bytes(), b"a"); + let mut s = String::new(); + t!(a.read_to_string(&mut s).await); + assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\na\n"); + + let mut b = t!(entries.next().await.unwrap()); + assert_eq!(&*b.header().path_bytes(), b"b"); + s.truncate(0); + t!(b.read_to_string(&mut s).await); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn writing_files() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + t!(ar + .append_file("test2", &mut t!(File::open(&path).await)) + .await); + + let data = t!(ar.into_inner().await); + let mut ar = Archive::new(Cursor::new(data)); + let mut entries = t!(ar.entries()); + let mut f = t!(entries.next().await.unwrap()); + + assert_eq!(&*f.header().path_bytes(), b"test2"); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn large_filename() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let filename = repeat("abcd/").take(50).collect::(); + let mut header = Header::new_ustar(); + header.set_path(&filename).unwrap(); + header.set_metadata(&t!(fs::metadata(&path).await)); + header.set_cksum(); + t!(ar.append(&header, &b"test"[..]).await); + let too_long = repeat("abcd").take(200).collect::(); + t!(ar + .append_file(&too_long, &mut t!(File::open(&path).await)) + .await); + t!(ar.append_data(&mut header, &too_long, &b"test"[..]).await); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + // The short entry added with `append` + let mut f = entries.next().await.unwrap().unwrap(); + assert_eq!(&*f.header().path_bytes(), filename.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + // The long entry added with `append_file` + let mut f = entries.next().await.unwrap().unwrap(); + assert_eq!(&*f.path_bytes(), too_long.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + // The long entry added with `append_data` + let mut f = entries.next().await.unwrap().unwrap(); + assert!(f.header().path_bytes().len() < too_long.len()); + assert_eq!(&*f.path_bytes(), too_long.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn reading_entries() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + let mut a = t!(entries.next().await.unwrap()); + assert_eq!(&*a.header().path_bytes(), b"a"); + let mut s = String::new(); + t!(a.read_to_string(&mut s).await); + assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\na\n"); + s.truncate(0); + t!(a.read_to_string(&mut s).await); + assert_eq!(s, ""); + let mut b = t!(entries.next().await.unwrap()); + + assert_eq!(&*b.header().path_bytes(), b"b"); + s.truncate(0); + t!(b.read_to_string(&mut s).await); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + assert!(entries.next().await.is_none()); +} + +async fn check_dirtree(td: &TempDir) { + let dir_a = td.path().join("a"); + let dir_b = td.path().join("a/b"); + let file_c = td.path().join("a/c"); + assert!(fs::metadata(&dir_a) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + assert!(fs::metadata(&dir_b) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + assert!(fs::metadata(&file_c) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn extracting_directories() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("directory.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + check_dirtree(&td).await; +} + +#[tokio::test] +#[cfg(all(unix, feature = "xattr"))] +async fn xattrs() { + // If /tmp is a tmpfs, xattr will fail + // The xattr crate's unit tests also use /var/tmp for this reason + let td = t!(TempBuilder::new() + .prefix("async-tar") + .tempdir_in("/var/tmp")); + let rdr = Cursor::new(tar!("xattrs.tar")); + let builder = ArchiveBuilder::new(rdr).set_unpack_xattrs(true); + let mut ar = builder.build(); + t!(ar.unpack(td.path()).await); + + let val = xattr::get(td.path().join("a/b"), "user.pax.flags").unwrap(); + assert_eq!(val.unwrap(), b"epm"); +} + +#[tokio::test] +#[cfg(all(unix, feature = "xattr"))] +async fn no_xattrs() { + // If /tmp is a tmpfs, xattr will fail + // The xattr crate's unit tests also use /var/tmp for this reason + let td = t!(TempBuilder::new() + .prefix("async-tar") + .tempdir_in("/var/tmp")); + let rdr = Cursor::new(tar!("xattrs.tar")); + let builder = ArchiveBuilder::new(rdr).set_unpack_xattrs(false); + let mut ar = builder.build(); + t!(ar.unpack(td.path()).await); + + assert_eq!( + xattr::get(td.path().join("a/b"), "user.pax.flags").unwrap(), + None + ); +} + +#[tokio::test] +async fn writing_and_extracting_directories() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + let tmppath = td.path().join("tmpfile"); + t!(t!(File::create(&tmppath).await).write_all(b"c").await); + t!(ar.append_dir("a", ".").await); + t!(ar.append_dir("a/b", ".").await); + t!(ar + .append_file("a/c", &mut t!(File::open(&tmppath).await)) + .await); + t!(ar.finish().await); + + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + check_dirtree(&td).await; +} + +#[tokio::test] +async fn writing_directories_recursively() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let base_dir = td.path().join("base"); + t!(fs::create_dir(&base_dir).await); + t!(t!(File::create(base_dir.join("file1")).await) + .write_all(b"file1") + .await); + let sub_dir = base_dir.join("sub"); + t!(fs::create_dir(&sub_dir).await); + t!(t!(File::create(sub_dir.join("file2")).await) + .write_all(b"file2") + .await); + + let mut ar = Builder::new(Vec::new()); + t!(ar.append_dir_all("foobar", base_dir).await); + let data = t!(ar.into_inner().await); + + let mut ar = Archive::new(Cursor::new(data)); + t!(ar.unpack(td.path()).await); + let base_dir = td.path().join("foobar"); + assert!(fs::metadata(&base_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file1_path = base_dir.join("file1"); + assert!(fs::metadata(&file1_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + let sub_dir = base_dir.join("sub"); + assert!(fs::metadata(&sub_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file2_path = sub_dir.join("file2"); + assert!(fs::metadata(&file2_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn append_dir_all_blank_dest() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let base_dir = td.path().join("base"); + t!(fs::create_dir(&base_dir).await); + t!(t!(File::create(base_dir.join("file1")).await) + .write_all(b"file1") + .await); + let sub_dir = base_dir.join("sub"); + t!(fs::create_dir(&sub_dir).await); + t!(t!(File::create(sub_dir.join("file2")).await) + .write_all(b"file2") + .await); + + let mut ar = Builder::new(Vec::new()); + t!(ar.append_dir_all("", base_dir).await); + let data = t!(ar.into_inner().await); + + let mut ar = Archive::new(Cursor::new(data)); + t!(ar.unpack(td.path()).await); + let base_dir = td.path(); + assert!(fs::metadata(&base_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file1_path = base_dir.join("file1"); + assert!(fs::metadata(&file1_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + let sub_dir = base_dir.join("sub"); + assert!(fs::metadata(&sub_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file2_path = sub_dir.join("file2"); + assert!(fs::metadata(&file2_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn append_dir_all_does_not_work_on_non_directory() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let mut ar = Builder::new(Vec::new()); + let result = ar.append_dir_all("test", path).await; + assert!(result.is_err()); +} + +#[tokio::test] +async fn extracting_duplicate_dirs() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("duplicate_dirs.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + let some_dir = td.path().join("some_dir"); + assert!(fs::metadata(&some_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn unpack_old_style_bsd_dir() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let mut header = Header::new_old(); + header.set_entry_type(EntryType::Regular); + t!(header.set_path("testdir/")); + header.set_size(0); + header.set_cksum(); + t!(ar.append(&header, &mut io::empty()).await); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + // Iterating + let rdr = Cursor::new(ar.into_inner().map_err(|_| ()).unwrap().into_inner()); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + while let Some(e) = entries.next().await { + assert!(e.is_ok()); + } + + assert!(td.path().join("testdir").is_dir()); +} + +#[tokio::test] +async fn handling_incorrect_file_size() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let path = td.path().join("tmpfile"); + t!(File::create(&path).await); + let mut file = t!(File::open(&path).await); + let mut header = Header::new_old(); + t!(header.set_path("somepath")); + header.set_metadata(&t!(file.metadata().await)); + header.set_size(2048); // past the end of file null blocks + header.set_cksum(); + t!(ar.append(&header, &mut file).await); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + assert!(ar.unpack(td.path()).await.is_err()); + + // Iterating + let rdr = Cursor::new(ar.into_inner().map_err(|_| ()).unwrap().into_inner()); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + while let Some(fr) = entries.next().await { + if fr.is_err() { + return; + } + } + panic!("Should have errorred"); +} + +#[tokio::test] +async fn extracting_malicious_tarball() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut evil_tar = Vec::new(); + + evil_tar = { + let mut a = Builder::new(evil_tar); + async fn append(a: &mut Builder, path: &'static str) { + let mut header = Header::new_gnu(); + assert!(header.set_path(path).is_err(), "was ok: {:?}", path); + { + let h = header.as_gnu_mut().unwrap(); + for (a, b) in h.name.iter_mut().zip(path.as_bytes()) { + *a = *b; + } + } + header.set_size(1); + header.set_cksum(); + t!(a.append(&header, io::repeat(1).take(1)).await); + } + + append(&mut a, "/tmp/abs_evil.txt").await; + append(&mut a, "//tmp/abs_evil2.txt").await; + append(&mut a, "///tmp/abs_evil3.txt").await; + append(&mut a, "/./tmp/abs_evil4.txt").await; + append(&mut a, "//./tmp/abs_evil5.txt").await; + append(&mut a, "///./tmp/abs_evil6.txt").await; + append(&mut a, "/../tmp/rel_evil.txt").await; + append(&mut a, "../rel_evil2.txt").await; + append(&mut a, "./../rel_evil3.txt").await; + append(&mut a, "some/../../rel_evil4.txt").await; + append(&mut a, "").await; + append(&mut a, "././//./..").await; + append(&mut a, "..").await; + append(&mut a, "/////////..").await; + append(&mut a, "/////////").await; + a.into_inner().await.unwrap() + }; + + let mut ar = Archive::new(&evil_tar[..]); + t!(ar.unpack(td.path()).await); + + assert!(fs::metadata("/tmp/abs_evil.txt").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt2").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt3").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt4").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt5").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt6").await.is_err()); + assert!(fs::metadata("/tmp/rel_evil.txt").await.is_err()); + assert!(fs::metadata("/tmp/rel_evil.txt").await.is_err()); + assert!(fs::metadata(td.path().join("../tmp/rel_evil.txt")) + .await + .is_err()); + assert!(fs::metadata(td.path().join("../rel_evil2.txt")) + .await + .is_err()); + assert!(fs::metadata(td.path().join("../rel_evil3.txt")) + .await + .is_err()); + assert!(fs::metadata(td.path().join("../rel_evil4.txt")) + .await + .is_err()); + + // The `some` subdirectory should not be created because the only + // filename that references this has '..'. + assert!(fs::metadata(td.path().join("some")).await.is_err()); + + // The `tmp` subdirectory should be created and within this + // subdirectory, there should be files named `abs_evil.txt` through + // `abs_evil6.txt`. + assert!(fs::metadata(td.path().join("tmp")) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil2.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil3.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil4.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil5.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil6.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn octal_spaces() { + let rdr = Cursor::new(tar!("spaces.tar")); + let mut ar = Archive::new(rdr); + + let entry = ar.entries().unwrap().next().await.unwrap().unwrap(); + assert_eq!(entry.header().mode().unwrap() & 0o777, 0o777); + assert_eq!(entry.header().uid().unwrap(), 0); + assert_eq!(entry.header().gid().unwrap(), 0); + assert_eq!(entry.header().size().unwrap(), 2); + assert_eq!(entry.header().mtime().unwrap(), 0o12_440_016_664); + assert_eq!(entry.header().cksum().unwrap(), 0o4253); +} + +#[tokio::test] +async fn extracting_malformed_tar_null_blocks() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let path1 = td.path().join("tmpfile1"); + let path2 = td.path().join("tmpfile2"); + t!(File::create(&path1).await); + t!(File::create(&path2).await); + t!(ar + .append_file("tmpfile1", &mut t!(File::open(&path1).await)) + .await); + let mut data = t!(ar.into_inner().await); + let amt = data.len(); + data.truncate(amt - 512); + let mut ar = Builder::new(data); + t!(ar + .append_file("tmpfile2", &mut t!(File::open(&path2).await)) + .await); + t!(ar.finish().await); + + let data = t!(ar.into_inner().await); + let mut ar = Archive::new(&data[..]); + assert!(ar.unpack(td.path()).await.is_ok()); +} + +#[tokio::test] +async fn empty_filename() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("empty_filename.tar")); + let mut ar = Archive::new(rdr); + assert!(ar.unpack(td.path()).await.is_ok()); +} + +#[tokio::test] +async fn file_times() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("file_times.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + let meta = fs::metadata(td.path().join("a")).await.unwrap(); + let mtime = FileTime::from_last_modification_time(&meta); + let atime = FileTime::from_last_access_time(&meta); + assert_eq!(mtime.unix_seconds(), 1_000_000_000); + assert_eq!(mtime.nanoseconds(), 0); + assert_eq!(atime.unix_seconds(), 1_000_000_000); + assert_eq!(atime.nanoseconds(), 0); +} + +#[tokio::test] +async fn backslash_treated_well() { + // Insert a file into an archive with a backslash + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let mut ar = Builder::new(Vec::::new()); + t!(ar.append_dir("foo\\bar", td.path()).await); + let mut ar = Archive::new(Cursor::new(t!(ar.into_inner().await))); + let f = t!(t!(ar.entries()).next().await.unwrap()); + if cfg!(unix) { + assert_eq!(t!(f.header().path()).to_str(), Some("foo\\bar")); + } else { + assert_eq!(t!(f.header().path()).to_str(), Some("foo/bar")); + } + + // Unpack an archive with a backslash in the name + let mut ar = Builder::new(Vec::::new()); + let mut header = Header::new_gnu(); + header.set_metadata(&t!(fs::metadata(td.path()).await)); + header.set_size(0); + for (a, b) in header.as_old_mut().name.iter_mut().zip(b"foo\\bar\x00") { + *a = *b; + } + header.set_cksum(); + t!(ar.append(&header, &mut io::empty()).await); + let data = t!(ar.into_inner().await); + let mut ar = Archive::new(&data[..]); + let f = t!(t!(ar.entries()).next().await.unwrap()); + assert_eq!(t!(f.header().path()).to_str(), Some("foo\\bar")); + + let mut ar = Archive::new(&data[..]); + t!(ar.unpack(td.path()).await); + assert!(fs::metadata(td.path().join("foo\\bar")).await.is_ok()); +} + +#[cfg(unix)] +#[tokio::test] +async fn nul_bytes_in_path() { + use std::{ffi::OsStr, os::unix::prelude::*}; + + let nul_path = OsStr::from_bytes(b"foo\0"); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let mut ar = Builder::new(Vec::::new()); + let err = ar.append_dir(nul_path, td.path()).await.unwrap_err(); + assert!(err.to_string().contains("contains a nul byte")); +} + +#[tokio::test] +async fn links() { + let mut ar = Archive::new(Cursor::new(tar!("link.tar"))); + let mut entries = t!(ar.entries()); + let link = t!(entries.next().await.unwrap()); + assert_eq!( + t!(link.header().link_name()).as_ref().map(|p| &**p), + Some(Path::new("file")) + ); + let other = t!(entries.next().await.unwrap()); + assert!(t!(other.header().link_name()).is_none()); +} + +#[tokio::test] +#[cfg(unix)] // making symlinks on windows is hard +async fn unpack_links() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let mut ar = Archive::new(Cursor::new(tar!("link.tar"))); + t!(ar.unpack(td.path()).await); + + let md = t!(fs::symlink_metadata(td.path().join("lnk")).await); + assert!(md.file_type().is_symlink()); + assert_eq!( + &*t!(fs::read_link(td.path().join("lnk")).await), + Path::new("file") + ); + t!(File::open(td.path().join("lnk")).await); +} + +#[tokio::test] +async fn pax_simple() { + let mut ar = Archive::new(tar!("pax.tar")); + let mut entries = t!(ar.entries()); + + let mut first = t!(entries.next().await.unwrap()); + let mut attributes = t!(first.pax_extensions().await).unwrap(); + let first = t!(attributes.next().unwrap()); + let second = t!(attributes.next().unwrap()); + let third = t!(attributes.next().unwrap()); + assert!(attributes.next().is_none()); + + assert_eq!(first.key(), Ok("mtime")); + assert_eq!(first.value(), Ok("1453146164.953123768")); + assert_eq!(second.key(), Ok("atime")); + assert_eq!(second.value(), Ok("1453251915.24892486")); + assert_eq!(third.key(), Ok("ctime")); + assert_eq!(third.value(), Ok("1453146164.953123768")); +} + +#[tokio::test] +async fn pax_path() { + let mut ar = Archive::new(tar!("pax2.tar")); + let mut entries = t!(ar.entries()); + + let first = t!(entries.next().await.unwrap()); + assert!(first.path().unwrap().ends_with("aaaaaaaaaaaaaaa")); +} + +#[tokio::test] +async fn long_name_trailing_nul() { + let mut b = Builder::new(Vec::::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'L')); + h.set_cksum(); + t!(b.append(&h, b"foo\0" as &[u8]).await); + let mut h = Header::new_gnu(); + + t!(h.set_path("bar")); + h.set_size(6); + h.set_entry_type(EntryType::file()); + h.set_cksum(); + t!(b.append(&h, b"foobar" as &[u8]).await); + + let contents = t!(b.into_inner().await); + let mut a = Archive::new(&contents[..]); + + let e = t!(t!(a.entries()).next().await.unwrap()); + assert_eq!(&*e.path_bytes(), b"foo"); +} + +#[tokio::test] +async fn long_linkname_trailing_nul() { + let mut b = Builder::new(Vec::::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'K')); + h.set_cksum(); + t!(b.append(&h, b"foo\0" as &[u8]).await); + let mut h = Header::new_gnu(); + + t!(h.set_path("bar")); + h.set_size(6); + h.set_entry_type(EntryType::file()); + h.set_cksum(); + t!(b.append(&h, b"foobar" as &[u8]).await); + + let contents = t!(b.into_inner().await); + let mut a = Archive::new(&contents[..]); + + let e = t!(t!(a.entries()).next().await.unwrap()); + assert_eq!(&*e.link_name_bytes().unwrap(), b"foo"); +} + +#[tokio::test] +async fn encoded_long_name_has_trailing_nul() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let path = td.path().join("foo"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let mut b = Builder::new(Vec::::new()); + let long = repeat("abcd").take(200).collect::(); + + t!(b.append_file(&long, &mut t!(File::open(&path).await)).await); + + let contents = t!(b.into_inner().await); + let mut a = Archive::new(&contents[..]); + + let mut e = t!(t!(a.entries_raw()).next().await.unwrap()); + let mut name = Vec::new(); + t!(e.read_to_end(&mut name).await); + assert_eq!(name[name.len() - 1], 0); + + let header_name = &e.header().as_gnu().unwrap().name; + assert!(header_name.starts_with(b"././@LongLink\x00")); +} + +#[tokio::test] +async fn reading_sparse() { + let rdr = Cursor::new(tar!("sparse.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_begin.txt"); + t!(a.read_to_string(&mut s).await); + assert_eq!(&s[..5], "test\n"); + assert!(s[5..].chars().all(|x| x == '\u{0}')); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_end.txt"); + t!(a.read_to_string(&mut s).await); + assert!(s[..s.len() - 9].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[s.len() - 9..], "test_end\n"); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_ext.txt"); + t!(a.read_to_string(&mut s).await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 5], "text\n"); + assert!(s[0x1000 + 5..0x3000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x3000..0x3000 + 5], "text\n"); + assert!(s[0x3000 + 5..0x5000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x5000..0x5000 + 5], "text\n"); + assert!(s[0x5000 + 5..0x7000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x7000..0x7000 + 5], "text\n"); + assert!(s[0x7000 + 5..0x9000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x9000..0x9000 + 5], "text\n"); + assert!(s[0x9000 + 5..0xb000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0xb000..0xb000 + 5], "text\n"); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse.txt"); + t!(a.read_to_string(&mut s).await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 6], "hello\n"); + assert!(s[0x1000 + 6..0x2fa0].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x2fa0..0x2fa0 + 6], "world\n"); + assert!(s[0x2fa0 + 6..0x4000].chars().all(|x| x == '\u{0}')); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn extract_sparse() { + let rdr = Cursor::new(tar!("sparse.tar")); + let mut ar = Archive::new(rdr); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + t!(ar.unpack(td.path()).await); + + let mut s = String::new(); + t!(t!(File::open(td.path().join("sparse_begin.txt")).await) + .read_to_string(&mut s) + .await); + assert_eq!(&s[..5], "test\n"); + assert!(s[5..].chars().all(|x| x == '\u{0}')); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse_end.txt")).await) + .read_to_string(&mut s) + .await); + assert!(s[..s.len() - 9].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[s.len() - 9..], "test_end\n"); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse_ext.txt")).await) + .read_to_string(&mut s) + .await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 5], "text\n"); + assert!(s[0x1000 + 5..0x3000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x3000..0x3000 + 5], "text\n"); + assert!(s[0x3000 + 5..0x5000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x5000..0x5000 + 5], "text\n"); + assert!(s[0x5000 + 5..0x7000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x7000..0x7000 + 5], "text\n"); + assert!(s[0x7000 + 5..0x9000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x9000..0x9000 + 5], "text\n"); + assert!(s[0x9000 + 5..0xb000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0xb000..0xb000 + 5], "text\n"); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse.txt")).await) + .read_to_string(&mut s) + .await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 6], "hello\n"); + assert!(s[0x1000 + 6..0x2fa0].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x2fa0..0x2fa0 + 6], "world\n"); + assert!(s[0x2fa0 + 6..0x4000].chars().all(|x| x == '\u{0}')); +} + +#[tokio::test] +async fn path_separators() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let short_path: PathBuf = repeat("abcd").take(2).collect(); + let long_path: PathBuf = repeat("abcd").take(50).collect(); + + // Make sure UStar headers normalize to Unix path separators + let mut header = Header::new_ustar(); + + t!(header.set_path(&short_path)); + assert_eq!(t!(header.path()), short_path); + assert!(!header.path_bytes().contains(&b'\\')); + + t!(header.set_path(&long_path)); + assert_eq!(t!(header.path()), long_path); + assert!(!header.path_bytes().contains(&b'\\')); + + // Make sure GNU headers normalize to Unix path separators, + // including the `@LongLink` fallback used by `append_file`. + t!(ar + .append_file(&short_path, &mut t!(File::open(&path).await)) + .await); + t!(ar + .append_file(&long_path, &mut t!(File::open(&path).await)) + .await); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), short_path); + assert!(!entry.path_bytes().contains(&b'\\')); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), long_path); + assert!(!entry.path_bytes().contains(&b'\\')); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +#[cfg(unix)] +async fn append_path_symlink() { + use std::{borrow::Cow, env, os::unix::fs::symlink}; + + let mut ar = Builder::new(Vec::new()); + ar.follow_symlinks(false); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let long_linkname = repeat("abcd").take(30).collect::(); + let long_pathname = repeat("dcba").take(30).collect::(); + t!(env::set_current_dir(td.path())); + // "short" path name / short link name + t!(symlink("testdest", "test")); + t!(ar.append_path("test").await); + // short path name / long link name + t!(symlink(&long_linkname, "test2")); + t!(ar.append_path("test2").await); + // long path name / long link name + t!(symlink(&long_linkname, &long_pathname)); + t!(ar.append_path(&long_pathname).await); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("test")); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new("testdest"))) + ); + assert_eq!(t!(entry.header().size()), 0); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("test2")); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new(&long_linkname))) + ); + assert_eq!(t!(entry.header().size()), 0); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new(&long_pathname)); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new(&long_linkname))) + ); + assert_eq!(t!(entry.header().size()), 0); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn name_with_slash_doesnt_fool_long_link_and_bsd_compat() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'L')); + h.set_cksum(); + t!(ar.append(&h, b"foo\0" as &[u8]).await); + + let mut header = Header::new_gnu(); + header.set_entry_type(EntryType::Regular); + t!(header.set_path("testdir/")); + header.set_size(0); + header.set_cksum(); + t!(ar.append(&header, &mut io::empty()).await); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + // Iterating + let rdr = Cursor::new(ar.into_inner().map_err(|_| ()).unwrap().into_inner()); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + while let Some(entry) = entries.next().await { + assert!(entry.is_ok()); + } + + assert!(td.path().join("foo").is_file()); +} + +#[tokio::test] +async fn insert_local_file_different_name() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let path = td.path().join("directory"); + t!(fs::create_dir(&path).await); + ar.append_path_with_name(&path, "archive/dir") + .await + .unwrap(); + let path = td.path().join("file"); + t!(t!(File::create(&path).await).write_all(b"test").await); + ar.append_path_with_name(&path, "archive/dir/f") + .await + .unwrap(); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("archive/dir")); + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("archive/dir/f")); + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +#[cfg(unix)] +async fn tar_directory_containing_symlink_to_directory() { + use std::os::unix::fs::symlink; + + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let dummy_src = t!(TempBuilder::new().prefix("dummy_src").tempdir()); + let dummy_dst = td.path().join("dummy_dst"); + let mut ar = Builder::new(Vec::new()); + t!(symlink(dummy_src.path().display().to_string(), &dummy_dst)); + + assert!(dummy_dst.read_link().is_ok()); + assert!(dummy_dst.read_link().unwrap().is_dir()); + ar.append_dir_all("symlinks", td.path()).await.unwrap(); + ar.finish().await.unwrap(); +} diff --git a/tests/archives/directory.tar b/tests/archives/directory.tar new file mode 100644 index 0000000000000000000000000000000000000000..ec6867a4be3a84766e2b4d9dc8903abc66187359 GIT binary patch literal 10240 zcmeIxJq|)448ZZuo`M_DA|L0$r=t!&_58wOT!=2bnE3C~p&=!|w$;n@&=w^nX~{~` z_uNZ8#6pa!!BUB$ra2aIY5LDvgZp}`SLfvXI9^WcxZ|h#_~lPmsy|HMfIi@_=O5ah zSYH6*4)}*Zt?c_4_X+i__^pYYqPV{+Hbr$n-z}0R#|0009IL lKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q3`d@C27|H#z_S literal 0 HcmV?d00001 diff --git a/tests/archives/duplicate_dirs.tar b/tests/archives/duplicate_dirs.tar new file mode 100644 index 0000000000000000000000000000000000000000..fc19b9d01a7cd6a3134b649faceb634e605ec22c GIT binary patch literal 2048 zcmeHFZ3=)e2;EVyl_{~&1Z&vPo{`P46yG~M6|l@5qlGlPn3 z%)zLulS3{7O3k_^B1Qr%nb0WUbdv6$H@{jJq_wt_xs}N}FT*&>-El^tt?%E-$NQQi b4jBNC%BMr0_y6ntfBHDHxa>f7AgKd8zT`G5 literal 0 HcmV?d00001 diff --git a/tests/archives/empty_filename.tar b/tests/archives/empty_filename.tar new file mode 100644 index 0000000000000000000000000000000000000000..dd1c8a9fd5611c88405a307af267a15b302b8970 GIT binary patch literal 512 zcmZQzU|=XME=ep>Pyn)M0}Oz`+|-l-NEw=&8X(gkDG*>VG%_(UFfcMSH8)`}Ff=eT iF=tRPrHyk?O@(EtkZ4_aCRVo00IagfB*srAbeA&w`R literal 0 HcmV?d00001 diff --git a/tests/archives/pax.tar b/tests/archives/pax.tar new file mode 100644 index 0000000000000000000000000000000000000000..6de2d3bd35786d6cc4319404f6825c9baa874dbc GIT binary patch literal 10240 zcmeHLZExH*682~P3U-U8YusA<{^~mIrg7Wg;OoKh0S6d{!IiiYYl&1z%Im!#|NWk! zR_nDBr?`WA7X-UVEWXWf=Hblfp~Cjx`Q0yq$HLffFz8L%KXQ}k(CIuoc&1+mztLcD zKu`Q0KRexi_n_Mw4aU6zEj;L;(;FVJyJy<}lWI!qxPeKZT=eBm_B$+hG8ZS^!LZ*Q zjJl&iI7O@1ACD$MZ_4;~wm0lfyThu5Wx=Bxo`|a; zP&^mxgf&nP8^JB!2U08Qcfx+yZv?z_sWuiPmyJ2VVm}%gr4DPpBW!-4Khb6=jCB68 zaTTOG7X`GFn$)?%&f4uny0l!-iuSHZ8`~Zk+X##(w3SYq^?xE2>!{3yawu1o)wCUU z+ug$NS#OicNGR(qJ9~4{o&$$e9QDFZBe)Ukl{RtZDxuNxpe$u3s3}yWW2qAOXYk^z zKs!I3zdkz+^SBWNv9JK5s4ZzUf;l@!oH3afnYg!7FBxYS^K(X;8R-&+aTT+bkxrP9 zU8K^oOfC!u7_k7u+H$vF^$7yJj>Xd z_OF|5FstIA+Q)`$9>}k+HqZ!Ko=g#-X?)m(Gn*R-kz0u?R9}h-*{`VNi zKcnAnf5qloj~Dj*9LRoRdf25V|6^t0zAAT)lHr#MNaPSv&&3;HMG&qeg&HxW7SDBJkOGJF*gkl_qA2%Zxv5~Rm0c?3SBy~%8`c5r-YbRKSDf|oP4 zVWUEYD|sUe5lbFwleDSVzToEio8E+I9MDtAH@#^9adxIbwLol%%4yI#B3!X>90Ulo ziW`RG%0ZkW4v#bIdDc>wAdsmrVn!-9`>s~2aA|~C*w}9yC9;hZ3Gw{?_fnYk3Dmu? z;)Y1z$?yf{3hvUApCI$z>+@t{fB^Fca;(!<|47_+c?URh+g13?AQ|P~4S-;+y5hdp)fdke`v-mft2;lda;N#W;Kqt-06JZ!|!S5i754ER&Qqeunw%Bf$e9U!R*S%xk+32e} z-t3*G_h8c}M5|`%@u}L_6j~8(KfU?q*=f@t*p4z}ZJSzO<8y7Nf9PwB&){>Jqz>29 z6@{)5-y&FX6;0SGMY3`B`t1cJeHl5-WE^q3lp;f+;+EkKxD9L6F_|d3B2&IqX^I;o zE{Akit~B5R+Ct4MDGpHrZXpu0Li=iLtLtW+B?vlPWl=4?8Im@SusH^}@r4p8ZA#o> zbK!H`r}SpZXXHxwB64q^F3pZJw3}2=$jOyks!+b>Ea-9$MEt$GcIZ}e#27tR4Yq($B_Y4J>hP)wg9>Ukr{nm=d-C1+jD7!>?0wmU!7X-vi1?sS8ap#p))5E~ zMPIUa0mTXB4a55`5e{|d{MZjJ6{-euwuv`Twmf7=D=_ut#8zz#f4;0(%7Z2>ka5{2L#DtDyh@ literal 0 HcmV?d00001 diff --git a/tests/archives/pax2.tar b/tests/archives/pax2.tar new file mode 100644 index 0000000000000000000000000000000000000000..c0c8ed314ce8236a40a841950f0b7fbbfe8d7b44 GIT binary patch literal 10240 zcmeIyOA5j;5P;#VJw;ETGnuBP3l9@SP*4!9;PFkAV(X%AqPX~HGYv!Y_|j>NlUPkg zF%}t1il(kz&MDrr)TyRs8br*iIx(>}xa#J95Z6l|&uL3PSn3~_@Zzge?04<>NOx zA|KM+d<&E6*zb?qwFVQl#)~ec>%4hsHCN-b+AXa%3ylvxS2xq@_}zcJw^Dl=k>^}|y!F4O8+27Db;!AShW?L-?d<>NQ~i04*UxQkO=<`rfB*sr qAbYZ( literal 0 HcmV?d00001 diff --git a/tests/archives/reading_files.tar b/tests/archives/reading_files.tar new file mode 100644 index 0000000000000000000000000000000000000000..67e79ffdfe2d85519305dff62a70a9950058a1d1 GIT binary patch literal 10240 zcmeIxJqpAi5QgEYa|(|zX3flb45Zs?e_)Tl+NF>rg+hu~3&9V^;ep9AerZ`|Ny-s@ z(fwH?)!8J@4=Z6x9!u`#E+Ff9N8 literal 0 HcmV?d00001 diff --git a/tests/archives/spaces.tar b/tests/archives/spaces.tar new file mode 100644 index 0000000000000000000000000000000000000000..68e2a519eebd0d751f17106db3a5722cfc76f223 GIT binary patch literal 2048 zcmYdfpgk}&FfcbaS71;80RuD|ENG-)Xk=nyU|?uwW@Z8uF)=bVR$wroz0*d#4T)SM UJ_tqwU^E0qLtr!nMq~&801;dXMF0Q* literal 0 HcmV?d00001 diff --git a/tests/archives/sparse.tar b/tests/archives/sparse.tar new file mode 100644 index 0000000000000000000000000000000000000000..216aed1d780cf5ec5e1b2143e284bdbc362124aa GIT binary patch literal 10240 zcmeHM;cCMm5O#m}6n%nmMok`M4^X;Iwn1o1w5*T6yV$f1rh$MmTux|XPW^Ix=cn=F zj=Rp;?VJ7l-oN;>7a0m*skIh(k&OYmf(-Y475{{+8VQYRB(+xnl@goR48BB=C+BzF zW+RU8QPaq9a<+3LOQIX1;8J>KurBd;smrF;L%r{{{HXyv07av{b^bZBvSBR@xS)T# z|H!3&(a+=={X=c!RsRZ%0BLk>)}?>h+%%7?B{Gt%T=kz4G?{k{G|K?3&s$Vyt~Gkl z|JnbU{tck!!~U-;trYJ6s@8I4`X`l6?EmV49Z8$)Ps_*{=ZP#J3CP4#Ei}>#$9Pn* zkDiXE34j=qEd?>)?U~>I3W`^1S2_Nv87&Ks<6l<1QhUqsPt9mqcpU$-;+5K4j(=)K z%fbui|9oeEVuQ|-Gxras8s|DOFy7L-vm*+*-|`o7q##7kT#Q~ zGf$W|;l-^V_qBhj+m1V7sqk!V6Tys9EVjYYR*)n{r45g<@m&xJMJylJ4rFe0g_AN6KicgmV>_m8vSzrOC=F_s3qvsdUnj)LEGNc01w?JFS0ZEz9-!Lu2f-{+pq%m;N_&K_M3g#4Lh+iQB@^-{kTq z`v0(cc#+GZ{#%{yf6y1`|LPktv*5D+2XCVPQ;9!004m2zB5iayK{BcHyQ}^6%{?D| z-?uy#5I^{lKfn2xZdV_gY$hIbU6z0^*01+8gGv4WRR4_(_rHtW$u_S4>0aR*1V8`; qKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1VG^L5_ku0yMiD9 literal 0 HcmV?d00001 diff --git a/tests/entry.rs b/tests/entry.rs new file mode 100644 index 00000000..f93df8ae --- /dev/null +++ b/tests/entry.rs @@ -0,0 +1,350 @@ +extern crate tokio_tar as async_tar; + +extern crate tempfile; + +use tokio::{fs::File, io::AsyncReadExt}; +use tokio_stream::*; + +use tempfile::Builder; + +macro_rules! t { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => panic!("{} returned {}", stringify!($e), e), + } + }; +} + +#[tokio::test] +async fn absolute_symlink() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + + t!(td.path().join("foo").symlink_metadata()); + + let mut ar = async_tar::Archive::new(&bytes[..]); + let mut entries = t!(ar.entries()); + let entry = t!(entries.next().await.unwrap()); + assert_eq!(&*entry.link_name_bytes().unwrap(), b"/bar"); +} + +#[tokio::test] +async fn absolute_hardlink() { + let td = t!(Builder::new().prefix("tar").tempdir()); + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Link); + t!(header.set_path("bar")); + // This absolute path under tempdir will be created at unpack time + t!(header.set_link_name(td.path().join("foo"))); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + t!(ar.unpack(td.path()).await); + t!(td.path().join("foo").metadata()); + t!(td.path().join("bar").metadata()); +} + +#[tokio::test] +async fn relative_hardlink() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Link); + t!(header.set_path("bar")); + t!(header.set_link_name("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + t!(td.path().join("foo").metadata()); + t!(td.path().join("bar").metadata()); +} + +#[tokio::test] +async fn absolute_link_deref_error() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("/")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).await.is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).await.is_err()); +} + +#[tokio::test] +async fn relative_link_deref_error() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("../../../../")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).await.is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).await.is_err()); +} + +#[tokio::test] +#[cfg(unix)] +async fn directory_maintains_permissions() { + use ::std::os::unix::fs::PermissionsExt; + + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Directory); + t!(header.set_path("foo")); + header.set_mode(0o777); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + let f = t!(File::open(td.path().join("foo")).await); + let md = t!(f.metadata().await); + assert!(md.is_dir()); + assert_eq!(md.permissions().mode(), 0o40777); +} + +#[tokio::test] +#[cfg(not(windows))] // dangling symlinks have weird permissions +async fn modify_link_just_created() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("bar/foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + + t!(File::open(td.path().join("bar/foo")).await); + t!(File::open(td.path().join("bar/bar")).await); + t!(File::open(td.path().join("foo/foo")).await); + t!(File::open(td.path().join("foo/bar")).await); +} + +#[tokio::test] +async fn parent_paths_error() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("..")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).await.is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).await.is_err()); +} + +#[tokio::test] +#[cfg(unix)] +async fn good_parent_paths_ok() { + use std::path::PathBuf; + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path(PathBuf::from("foo").join("bar"))); + t!(header.set_link_name(PathBuf::from("..").join("bar"))); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + t!(td.path().join("foo").join("bar").read_link()); + let dst = t!(td.path().join("foo").join("bar").canonicalize()); + t!(File::open(dst).await); +} + +#[tokio::test] +async fn modify_hard_link_just_created() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Link); + t!(header.set_path("foo")); + t!(header.set_link_name("../test")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(1); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &b"x"[..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + + let test = td.path().join("test"); + t!(File::create(&test).await); + + let dir = td.path().join("dir"); + assert!(ar.unpack(&dir).await.is_err()); + + let mut contents = Vec::new(); + t!(t!(File::open(&test).await).read_to_end(&mut contents).await); + assert_eq!(contents.len(), 0); +} + +#[tokio::test] +async fn modify_symlink_just_created() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("../test")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(1); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &b"x"[..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + + let test = td.path().join("test"); + t!(File::create(&test).await); + + let dir = td.path().join("dir"); + t!(ar.unpack(&dir).await); + + let mut contents = Vec::new(); + t!(t!(File::open(&test).await).read_to_end(&mut contents).await); + assert_eq!(contents.len(), 0); +} diff --git a/tests/header/mod.rs b/tests/header/mod.rs new file mode 100644 index 00000000..33f479d4 --- /dev/null +++ b/tests/header/mod.rs @@ -0,0 +1,243 @@ +#![allow(clippy::cognitive_complexity)] + +use std::{ + fs::{self, File}, + io::Write, + iter, mem, + path::Path, + thread, time, +}; + +use tempfile::Builder; + +use async_tar::{GnuHeader, Header, HeaderMode}; + +#[test] +fn default_gnu() { + let mut h = Header::new_gnu(); + assert!(h.as_gnu().is_some()); + assert!(h.as_gnu_mut().is_some()); + assert!(h.as_ustar().is_none()); + assert!(h.as_ustar_mut().is_none()); +} + +#[test] +fn goto_old() { + let mut h = Header::new_old(); + assert!(h.as_gnu().is_none()); + assert!(h.as_gnu_mut().is_none()); + assert!(h.as_ustar().is_none()); + assert!(h.as_ustar_mut().is_none()); +} + +#[test] +fn goto_ustar() { + let mut h = Header::new_ustar(); + assert!(h.as_gnu().is_none()); + assert!(h.as_gnu_mut().is_none()); + assert!(h.as_ustar().is_some()); + assert!(h.as_ustar_mut().is_some()); +} + +#[test] +fn link_name() { + let mut h = Header::new_gnu(); + t!(h.set_link_name("foo")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo")); + t!(h.set_link_name("../foo")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("../foo")); + t!(h.set_link_name("foo/bar")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/bar")); + t!(h.set_link_name("foo\\ba")); + if cfg!(windows) { + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/ba")); + } else { + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\ba")); + } + + let name = "foo\\bar\0"; + for (slot, val) in h.as_old_mut().linkname.iter_mut().zip(name.as_bytes()) { + *slot = *val; + } + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\bar")); + + assert!(h.set_link_name("\0").is_err()); +} + +#[test] +fn mtime() { + let h = Header::new_gnu(); + assert_eq!(t!(h.mtime()), 0); + + let h = Header::new_ustar(); + assert_eq!(t!(h.mtime()), 0); + + let h = Header::new_old(); + assert_eq!(t!(h.mtime()), 0); +} + +#[test] +fn user_and_group_name() { + let mut h = Header::new_gnu(); + t!(h.set_username("foo")); + t!(h.set_groupname("bar")); + assert_eq!(t!(h.username()), Some("foo")); + assert_eq!(t!(h.groupname()), Some("bar")); + + h = Header::new_ustar(); + t!(h.set_username("foo")); + t!(h.set_groupname("bar")); + assert_eq!(t!(h.username()), Some("foo")); + assert_eq!(t!(h.groupname()), Some("bar")); + + h = Header::new_old(); + assert_eq!(t!(h.username()), None); + assert_eq!(t!(h.groupname()), None); + assert!(h.set_username("foo").is_err()); + assert!(h.set_groupname("foo").is_err()); +} + +#[test] +fn dev_major_minor() { + let mut h = Header::new_gnu(); + t!(h.set_device_major(1)); + t!(h.set_device_minor(2)); + assert_eq!(t!(h.device_major()), Some(1)); + assert_eq!(t!(h.device_minor()), Some(2)); + + h = Header::new_ustar(); + t!(h.set_device_major(1)); + t!(h.set_device_minor(2)); + assert_eq!(t!(h.device_major()), Some(1)); + assert_eq!(t!(h.device_minor()), Some(2)); + + h.as_ustar_mut().unwrap().dev_minor[0] = 0x7f; + h.as_ustar_mut().unwrap().dev_major[0] = 0x7f; + assert!(h.device_major().is_err()); + assert!(h.device_minor().is_err()); + + h.as_ustar_mut().unwrap().dev_minor[0] = b'g'; + h.as_ustar_mut().unwrap().dev_major[0] = b'h'; + assert!(h.device_major().is_err()); + assert!(h.device_minor().is_err()); + + h = Header::new_old(); + assert_eq!(t!(h.device_major()), None); + assert_eq!(t!(h.device_minor()), None); + assert!(h.set_device_major(1).is_err()); + assert!(h.set_device_minor(1).is_err()); +} + +#[test] +fn set_path() { + let mut h = Header::new_gnu(); + t!(h.set_path("foo")); + assert_eq!(t!(h.path()).to_str(), Some("foo")); + t!(h.set_path("foo/")); + assert_eq!(t!(h.path()).to_str(), Some("foo/")); + t!(h.set_path("foo/bar")); + assert_eq!(t!(h.path()).to_str(), Some("foo/bar")); + t!(h.set_path("foo\\bar")); + if cfg!(windows) { + assert_eq!(t!(h.path()).to_str(), Some("foo/bar")); + } else { + assert_eq!(t!(h.path()).to_str(), Some("foo\\bar")); + } + + let long_name = iter::repeat("foo").take(100).collect::(); + let medium1 = iter::repeat("foo").take(52).collect::(); + let medium2 = iter::repeat("fo/").take(52).collect::(); + + assert!(h.set_path(&long_name).is_err()); + assert!(h.set_path(&medium1).is_err()); + assert!(h.set_path(&medium2).is_err()); + assert!(h.set_path("\0").is_err()); + + h = Header::new_ustar(); + t!(h.set_path("foo")); + assert_eq!(t!(h.path()).to_str(), Some("foo")); + + assert!(h.set_path(&long_name).is_err()); + assert!(h.set_path(&medium1).is_err()); + t!(h.set_path(&medium2)); + assert_eq!(t!(h.path()).to_str(), Some(&medium2[..])); +} + +#[test] +fn set_ustar_path_hard() { + let mut h = Header::new_ustar(); + let p = Path::new("a").join(&vec!["a"; 100].join("")); + t!(h.set_path(&p)); + let path = t!(h.path()); + let actual: &Path = path.as_ref(); + assert_eq!(actual, p); +} + +#[test] +fn set_metadata_deterministic() { + let td = t!(Builder::new().prefix("async-tar").tempdir()); + let tmppath = td.path().join("tmpfile"); + + fn mk_header(path: &Path, readonly: bool) -> Header { + let mut file = t!(File::create(path)); + t!(file.write_all(b"c")); + let mut perms = t!(file.metadata()).permissions(); + perms.set_readonly(readonly); + t!(fs::set_permissions(path, perms)); + let mut h = Header::new_ustar(); + h.set_metadata_in_mode(&t!(path.metadata()), HeaderMode::Deterministic); + h + } + + // Create "the same" File twice in a row, one second apart, with differing readonly values. + let one = mk_header(tmppath.as_path(), false); + thread::sleep(time::Duration::from_millis(1050)); + let two = mk_header(tmppath.as_path(), true); + + // Always expected to match. + assert_eq!(t!(one.size()), t!(two.size())); + assert_eq!(t!(one.path()), t!(two.path())); + assert_eq!(t!(one.mode()), t!(two.mode())); + + // Would not match without `Deterministic`. + assert_eq!(t!(one.mtime()), t!(two.mtime())); + // TODO: No great way to validate that these would not be filled, but + // check them anyway. + assert_eq!(t!(one.uid()), t!(two.uid())); + assert_eq!(t!(one.gid()), t!(two.gid())); +} + +#[test] +fn extended_numeric_format() { + let mut h: GnuHeader = unsafe { mem::zeroed() }; + h.as_header_mut().set_size(42); + assert_eq!(h.size, [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 50, 0]); + h.as_header_mut().set_size(8_589_934_593); + assert_eq!(h.size, [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 1]); + h.size = [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 0]; + assert_eq!(h.as_header().entry_size().unwrap(), 0x0002_0000_0000); + h.size = [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 51, 0]; + assert_eq!(h.as_header().entry_size().unwrap(), 43); + + h.as_header_mut().set_gid(42); + assert_eq!(h.gid, [48, 48, 48, 48, 48, 53, 50, 0]); + assert_eq!(h.as_header().gid().unwrap(), 42); + h.as_header_mut().set_gid(0x7fff_ffff_ffff_ffff); + assert_eq!(h.gid, [0xff; 8]); + assert_eq!(h.as_header().gid().unwrap(), 0x7fff_ffff_ffff_ffff); + h.uid = [0x80, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78]; + assert_eq!(h.as_header().uid().unwrap(), 0x1234_5678); + + h.mtime = [ + 0x80, 0, 0, 0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + ]; + assert_eq!(h.as_header().mtime().unwrap(), 0x0123_4567_89ab_cdef); +} + +#[test] +fn byte_slice_conversion() { + let h = Header::new_gnu(); + let b: &[u8] = h.as_bytes(); + let b_conv: &[u8] = Header::from_byte_slice(h.as_bytes()).as_bytes(); + assert_eq!(b, b_conv); +} From 51c16f23d89543a94f059d9f9fa3a4e2f151dec5 Mon Sep 17 00:00:00 2001 From: rishflab Date: Wed, 24 Feb 2021 18:08:25 +1100 Subject: [PATCH 2/2] Download and run monero wallet rpc on swap cli startup If the monero wallet rpc has not already been downloaded we download the monero cli package and extract the wallet rpc. The unneeded files are cleaned up. The monero wallet rpc is started on a random port which is provided to the swap cli. We added a fork of tokio-tar via a git subtree because we needed a tokio-tar version that was compatible with tokio 1.0. Remove this subtree in favor of a regular cargo dependency when this PR merges: https://github.com/vorot93/tokio-tar/pull/3. --- Cargo.lock | 146 ++++++++++++++++++++++++++++-- swap/Cargo.toml | 7 +- swap/src/bin/swap_cli.rs | 16 +++- swap/src/cli/config.rs | 19 ---- swap/src/monero.rs | 2 + swap/src/monero/wallet_rpc.rs | 164 ++++++++++++++++++++++++++++++++++ tokio-tar/src/archive.rs | 15 ++-- tokio-tar/src/builder.rs | 3 +- tokio-tar/src/entry.rs | 18 ++-- tokio-tar/src/header.rs | 6 +- tokio-tar/tests/all.rs | 7 +- tokio-tar/tests/header/mod.rs | 3 +- 12 files changed, 354 insertions(+), 52 deletions(-) create mode 100644 swap/src/monero/wallet_rpc.rs diff --git a/Cargo.lock b/Cargo.lock index bd5667d0..7c7da167 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -115,6 +115,19 @@ dependencies = [ "syn", ] +[[package]] +name = "async-compression" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b72c1f1154e234325b50864a349b9c8e56939e266a4c307c0f159812df2f9537" +dependencies = [ + "bzip2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-io" version = "1.3.1" @@ -479,6 +492,27 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" +[[package]] +name = "bzip2" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abf8012c8a15d5df745fcf258d93e6149dcf102882c8d8702d9cff778eab43a8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.10+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17fa3d1ac1ca21c5c4e36a97f3c3eb25084576f6fc47bf0139c1123434216c6c" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cache-padded" version = "1.1.1" @@ -888,12 +922,12 @@ checksum = "21453800c95bb1aaa57490458c42d60c6277cb8a3e386030ec2381d5c2d4fa77" dependencies = [ "bitcoin", "log", - "rustls", + "rustls 0.16.0", "serde", "serde_json", "socks", "webpki", - "webpki-roots", + "webpki-roots 0.19.0", ] [[package]] @@ -962,6 +996,18 @@ dependencies = [ "instant", ] +[[package]] +name = "filetime" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall 0.2.4", + "winapi 0.3.9", +] + [[package]] name = "fixed-hash" version = "0.2.5" @@ -1409,6 +1455,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f9f7a97316d44c0af9b0301e65010573a853a9fc97046d7331d7f6bc0fd5a64" +dependencies = [ + "futures-util", + "hyper", + "log", + "rustls 0.19.0", + "tokio", + "tokio-rustls", + "webpki", +] + [[package]] name = "idna" version = "0.2.0" @@ -2851,6 +2912,7 @@ dependencies = [ "http", "http-body", "hyper", + "hyper-rustls", "ipnet", "js-sys", "lazy_static", @@ -2858,14 +2920,17 @@ dependencies = [ "mime", "percent-encoding", "pin-project-lite", + "rustls 0.19.0", "serde", "serde_json", "serde_urlencoded", "tokio", + "tokio-rustls", "url", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", + "webpki-roots 0.21.0", "winreg", ] @@ -2941,6 +3006,19 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustls" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "064fd21ff87c6e87ed4506e68beb42459caa4a0e2eb144932e6776768556980b" +dependencies = [ + "base64 0.13.0", + "log", + "ring", + "sct", + "webpki", +] + [[package]] name = "rw-stream-sink" version = "0.2.1" @@ -3193,6 +3271,15 @@ dependencies = [ "serde", ] +[[package]] +name = "signal-hook-registry" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" +dependencies = [ + "libc", +] + [[package]] name = "signature" version = "1.3.0" @@ -3431,6 +3518,7 @@ name = "swap" version = "0.1.0" dependencies = [ "anyhow", + "async-compression", "async-recursion", "async-trait", "atty", @@ -3475,7 +3563,9 @@ dependencies = [ "thiserror", "time", "tokio", + "tokio-tar", "tokio-tungstenite", + "tokio-util", "toml", "tracing", "tracing-futures", @@ -3687,8 +3777,12 @@ dependencies = [ "memchr", "mio", "num_cpus", + "once_cell", + "parking_lot", "pin-project-lite", + "signal-hook-registry", "tokio-macros", + "winapi 0.3.9", ] [[package]] @@ -3712,6 +3806,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6" +dependencies = [ + "rustls 0.19.0", + "tokio", + "webpki", +] + [[package]] name = "tokio-stream" version = "0.1.2" @@ -3723,6 +3828,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-tar" +version = "0.2.0" +dependencies = [ + "filetime", + "futures-core", + "libc", + "redox_syscall 0.2.4", + "tempfile", + "tokio", + "tokio-stream", + "xattr", +] + [[package]] name = "tokio-tungstenite" version = "0.13.0" @@ -3740,9 +3859,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ae4751faa60b9f96dd8344d74592e5a17c0c9a220413dbc6942d14139bbfcc" +checksum = "ebb7cb2f00c5ae8df755b252306272cd1790d39728363936e01827e11f0b017b" dependencies = [ "bytes", "futures-core", @@ -3750,7 +3869,6 @@ dependencies = [ "log", "pin-project-lite", "tokio", - "tokio-stream", ] [[package]] @@ -4164,6 +4282,15 @@ dependencies = [ "webpki", ] +[[package]] +name = "webpki-roots" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82015b7e0b8bad8185994674a13a93306bea76cf5a16c5a181382fd3a5ec2376" +dependencies = [ + "webpki", +] + [[package]] name = "wepoll-sys" version = "3.0.1" @@ -4256,6 +4383,15 @@ dependencies = [ "zeroize 1.2.0", ] +[[package]] +name = "xattr" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c" +dependencies = [ + "libc", +] + [[package]] name = "yamux" version = "0.8.0" diff --git a/swap/Cargo.toml b/swap/Cargo.toml index d9d15118..73f926d3 100644 --- a/swap/Cargo.toml +++ b/swap/Cargo.toml @@ -13,6 +13,7 @@ name = "swap" [dependencies] anyhow = "1" +async-compression = { version = "0.3", features = ["bzip2", "tokio"] } async-recursion = "0.3.1" async-trait = "0.1" atty = "0.2" @@ -36,7 +37,7 @@ pem = "0.8" prettytable-rs = "0.8" rand = "0.7" rand_chacha = "0.2.0" -reqwest = { version = "0.11", default-features = false } +reqwest = { version = "0.11", features = ["rustls-tls", "stream"], default-features = false } rust_decimal = "1.10" serde = { version = "1", features = ["derive"] } serde_cbor = "0.11" @@ -48,8 +49,10 @@ structopt = "0.3" strum = { version = "0.20", features = ["derive"] } thiserror = "1" time = "0.2" -tokio = { version = "1.0", features = ["rt-multi-thread", "time", "macros", "sync"] } +tokio = { version = "1.0", features = ["rt-multi-thread", "time", "macros", "sync", "process", "fs"] } +tokio-tar = { path = "../tokio-tar" } tokio-tungstenite = { version = "0.13", features = [ "tls" ] } +tokio-util = { version = "0.6.3", features = ["io"] } toml = "0.5" tracing = { version = "0.1", features = ["attributes"] } tracing-futures = { version = "0.2", features = ["std-future", "futures-03"] } diff --git a/swap/src/bin/swap_cli.rs b/swap/src/bin/swap_cli.rs index 4b13e6ba..c4fed8c1 100644 --- a/swap/src/bin/swap_cli.rs +++ b/swap/src/bin/swap_cli.rs @@ -14,6 +14,7 @@ use anyhow::{Context, Result}; use prettytable::{row, Table}; +use reqwest::Url; use std::{path::Path, sync::Arc}; use structopt::StructOpt; use swap::{ @@ -84,6 +85,12 @@ async fn main() -> Result<()> { let monero_network = monero::Network::Stagenet; let execution_params = execution_params::Testnet::get_execution_params(); + let monero_wallet_rpc = monero::WalletRpc::new(config.data.dir.join("monero")).await?; + + let monero_wallet_rpc_process = monero_wallet_rpc + .run(monero_network, "stagenet.community.xmr.to") + .await?; + match opt.cmd { Command::BuyXmr { alice_peer_id, @@ -96,6 +103,7 @@ async fn main() -> Result<()> { &wallet_data_dir, monero_network, seed, + monero_wallet_rpc_process.endpoint(), ) .await?; @@ -149,6 +157,7 @@ async fn main() -> Result<()> { &wallet_data_dir, monero_network, seed, + monero_wallet_rpc_process.endpoint(), ) .await?; @@ -180,6 +189,7 @@ async fn main() -> Result<()> { &wallet_data_dir, monero_network, seed, + monero_wallet_rpc_process.endpoint(), ) .await?; @@ -230,6 +240,7 @@ async fn main() -> Result<()> { &wallet_data_dir, monero_network, seed, + monero_wallet_rpc_process.endpoint(), ) .await?; @@ -268,6 +279,7 @@ async fn init_wallets( bitcoin_wallet_data_dir: &Path, monero_network: monero::Network, seed: Seed, + monero_wallet_rpc_url: Url, ) -> Result<(bitcoin::Wallet, monero::Wallet)> { let bitcoin_wallet = bitcoin::Wallet::new( config.bitcoin.electrum_rpc_url, @@ -290,7 +302,7 @@ async fn init_wallets( ); let monero_wallet = monero::Wallet::new( - config.monero.wallet_rpc_url.clone(), + monero_wallet_rpc_url.clone(), monero_network, MONERO_BLOCKCHAIN_MONITORING_WALLET_NAME.to_string(), ); @@ -306,7 +318,7 @@ async fn init_wallets( .context(format!( "Unable to create Monero wallet for blockchain monitoring.\ Please ensure that the monero-wallet-rpc is available at {}", - config.monero.wallet_rpc_url + monero_wallet_rpc_url ))?; info!( diff --git a/swap/src/cli/config.rs b/swap/src/cli/config.rs index 2252f181..156fe269 100644 --- a/swap/src/cli/config.rs +++ b/swap/src/cli/config.rs @@ -13,13 +13,11 @@ use url::Url; pub const DEFAULT_ELECTRUM_HTTP_URL: &str = "https://blockstream.info/testnet/api/"; const DEFAULT_ELECTRUM_RPC_URL: &str = "ssl://electrum.blockstream.info:60002"; -const DEFAULT_MONERO_WALLET_RPC_TESTNET_URL: &str = "http://127.0.0.1:38083/json_rpc"; #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)] pub struct Config { pub data: Data, pub bitcoin: Bitcoin, - pub monero: Monero, } impl Config { @@ -48,12 +46,6 @@ pub struct Bitcoin { pub electrum_rpc_url: Url, } -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -#[serde(deny_unknown_fields)] -pub struct Monero { - pub wallet_rpc_url: Url, -} - #[derive(thiserror::Error, Debug, Clone, Copy)] #[error("config not initialized")] pub struct ConfigNotInitialized {} @@ -118,11 +110,6 @@ pub fn query_user_for_initial_testnet_config() -> Result { .interact_text()?; let electrum_rpc_url = Url::parse(electrum_rpc_url.as_str())?; - let monero_wallet_rpc_url = Input::with_theme(&ColorfulTheme::default()) - .with_prompt("Enter Monero Wallet RPC URL or hit enter to use default") - .default(DEFAULT_MONERO_WALLET_RPC_TESTNET_URL.to_owned()) - .interact_text()?; - let monero_wallet_rpc_url = monero_wallet_rpc_url.as_str().parse()?; println!(); Ok(Config { @@ -131,9 +118,6 @@ pub fn query_user_for_initial_testnet_config() -> Result { electrum_http_url, electrum_rpc_url, }, - monero: Monero { - wallet_rpc_url: monero_wallet_rpc_url, - }, }) } @@ -156,9 +140,6 @@ mod tests { electrum_http_url: Url::from_str(DEFAULT_ELECTRUM_HTTP_URL).unwrap(), electrum_rpc_url: Url::from_str(DEFAULT_ELECTRUM_RPC_URL).unwrap(), }, - monero: Monero { - wallet_rpc_url: Url::from_str("http://127.0.0.1:38083/json_rpc").unwrap(), - }, }; initial_setup(config_path.clone(), || Ok(expected.clone())).unwrap(); diff --git a/swap/src/monero.rs b/swap/src/monero.rs index e17e137b..38b091b0 100644 --- a/swap/src/monero.rs +++ b/swap/src/monero.rs @@ -1,8 +1,10 @@ pub mod wallet; +mod wallet_rpc; pub use ::monero::{Network, PrivateKey, PublicKey}; pub use curve25519_dalek::scalar::Scalar; pub use wallet::Wallet; +pub use wallet_rpc::{WalletRpc, WalletRpcProcess}; use crate::bitcoin; use ::bitcoin::hashes::core::fmt::Formatter; diff --git a/swap/src/monero/wallet_rpc.rs b/swap/src/monero/wallet_rpc.rs new file mode 100644 index 00000000..82328210 --- /dev/null +++ b/swap/src/monero/wallet_rpc.rs @@ -0,0 +1,164 @@ +use ::monero::Network; +use anyhow::{Context, Result}; +use async_compression::tokio::bufread::BzDecoder; +use futures::{StreamExt, TryStreamExt}; +use reqwest::Url; +use std::{ + io::ErrorKind, + path::{Path, PathBuf}, + process::Stdio, +}; +use tokio::{ + fs::{remove_file, OpenOptions}, + io::{AsyncBufReadExt, AsyncWriteExt, BufReader}, + process::{Child, Command}, +}; +use tokio_tar::Archive; +use tokio_util::{ + codec::{BytesCodec, FramedRead}, + io::StreamReader, +}; + +#[cfg(target_os = "macos")] +const DOWNLOAD_URL: &str = "http://downloads.getmonero.org/cli/monero-mac-x64-v0.17.1.9.tar.bz2"; + +#[cfg(target_os = "linux")] +const DOWNLOAD_URL: &str = "https://downloads.getmonero.org/cli/monero-linux-x64-v0.17.1.9.tar.bz2"; + +#[cfg(not(any(target_os = "macos", target_os = "linux")))] +compile_error!("unsupported operating system"); + +const PACKED_FILE: &str = "monero-wallet-rpc"; + +pub struct WalletRpcProcess { + _child: Child, + port: u16, +} + +impl WalletRpcProcess { + pub fn endpoint(&self) -> Url { + Url::parse(&format!("http://127.0.0.1:{}/json_rpc", self.port)) + .expect("Static url template is always valid") + } +} + +pub struct WalletRpc { + working_dir: PathBuf, +} + +impl WalletRpc { + pub async fn new(working_dir: impl AsRef) -> Result { + let working_dir = working_dir.as_ref(); + + if !working_dir.exists() { + tokio::fs::create_dir(working_dir).await?; + } + + let monero_wallet_rpc = WalletRpc { + working_dir: working_dir.to_path_buf(), + }; + + if monero_wallet_rpc.tar_path().exists() { + remove_file(monero_wallet_rpc.tar_path()).await?; + } + + if !monero_wallet_rpc.exec_path().exists() { + let mut options = OpenOptions::new(); + let mut file = options + .read(true) + .write(true) + .create_new(true) + .open(monero_wallet_rpc.tar_path()) + .await?; + + let byte_stream = reqwest::get(DOWNLOAD_URL) + .await? + .bytes_stream() + .map_err(|err| std::io::Error::new(ErrorKind::Other, err)); + + let mut stream = FramedRead::new( + BzDecoder::new(StreamReader::new(byte_stream)), + BytesCodec::new(), + ) + .map_ok(|bytes| bytes.freeze()); + + while let Some(chunk) = stream.next().await { + file.write(&chunk?).await?; + } + + file.flush().await?; + + let mut options = OpenOptions::new(); + let file = options + .read(true) + .open(monero_wallet_rpc.tar_path()) + .await?; + + let mut ar = Archive::new(file); + let mut entries = ar.entries()?; + + while let Some(file) = entries.next().await { + let mut f = file?; + if f.path()? + .to_str() + .context("Could not find convert path to str in tar ball")? + .contains(PACKED_FILE) + { + f.unpack(monero_wallet_rpc.exec_path()).await?; + } + } + + remove_file(monero_wallet_rpc.tar_path()).await?; + } + + Ok(monero_wallet_rpc) + } + pub async fn run(&self, network: Network, daemon_host: &str) -> Result { + let port = tokio::net::TcpListener::bind("127.0.0.1:0") + .await? + .local_addr()? + .port(); + + let mut child = Command::new(self.exec_path()) + .stdout(Stdio::piped()) + .kill_on_drop(true) + .arg(match network { + Network::Mainnet => "--mainnet", + Network::Stagenet => "--stagenet", + Network::Testnet => "--testnet", + }) + .arg("--daemon-host") + .arg(daemon_host) + .arg("--rpc-bind-port") + .arg(format!("{}", port)) + .arg("--disable-rpc-login") + .arg("--wallet-dir") + .arg(self.working_dir.join("monero-data")) + .spawn()?; + + let stdout = child + .stdout + .take() + .expect("monero wallet rpc stdout was not piped parent process"); + + let mut reader = BufReader::new(stdout).lines(); + + while let Some(line) = reader.next_line().await? { + if line.contains("Starting wallet RPC server") { + break; + } + } + Ok(WalletRpcProcess { + _child: child, + port, + }) + } + + fn tar_path(&self) -> PathBuf { + self.working_dir.join("monero-cli-wallet.tar") + } + + fn exec_path(&self) -> PathBuf { + self.working_dir.join(PACKED_FILE) + } +} diff --git a/tokio-tar/src/archive.rs b/tokio-tar/src/archive.rs index 1e4d3b3b..6bccf558 100644 --- a/tokio-tar/src/archive.rs +++ b/tokio-tar/src/archive.rs @@ -98,10 +98,11 @@ impl ArchiveBuilder { self } - /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more - /// entries. + /// Ignore zeroed headers, which would otherwise indicate to the archive + /// that it has no more entries. /// - /// This can be used in case multiple tar archives have been concatenated together. + /// This can be used in case multiple tar archives have been concatenated + /// together. pub fn set_ignore_zeros(mut self, ignore_zeros: bool) -> Self { self.ignore_zeros = ignore_zeros; self @@ -365,8 +366,8 @@ fn poll_next_raw( } // If a header is not all zeros, we have another valid header. - // Otherwise, check if we are ignoring zeros and continue, or break as if this is the - // end of the archive. + // Otherwise, check if we are ignoring zeros and continue, or break as if this + // is the end of the archive. if !header.as_bytes().iter().all(|i| *i == 0) { *next += 512; break; @@ -559,8 +560,8 @@ impl Read for Archive { /// Try to fill the buffer from the reader. /// -/// If the reader reaches its end before filling the buffer at all, returns `false`. -/// Otherwise returns `true`. +/// If the reader reaches its end before filling the buffer at all, returns +/// `false`. Otherwise returns `true`. fn poll_try_read_all( mut source: R, cx: &mut Context<'_>, diff --git a/tokio-tar/src/builder.rs b/tokio-tar/src/builder.rs index 08c46ba0..15e008b1 100644 --- a/tokio-tar/src/builder.rs +++ b/tokio-tar/src/builder.rs @@ -597,7 +597,8 @@ async fn append_dir_all( while let Some((src, is_dir, is_symlink)) = stack.pop() { let dest = path.join(src.strip_prefix(&src_path).unwrap()); - // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true + // In case of a symlink pointing to a directory, is_dir is false, but + // src.is_dir() will return true if is_dir || (is_symlink && follow && src.is_dir()) { let mut entries = fs::read_dir(&src).await?; while let Some(entry) = entries.next_entry().await.transpose() { diff --git a/tokio-tar/src/entry.rs b/tokio-tar/src/entry.rs index e239799b..d6cec73d 100644 --- a/tokio-tar/src/entry.rs +++ b/tokio-tar/src/entry.rs @@ -144,8 +144,8 @@ impl Entry { /// Returns the link name for this entry, in bytes, if listed. /// /// Note that this will not always return the same value as - /// `self.header().link_name_bytes()` as some archive formats have support for - /// longer path names described in separate entries. + /// `self.header().link_name_bytes()` as some archive formats have support + /// for longer path names described in separate entries. pub fn link_name_bytes(&self) -> Option> { self.fields.link_name_bytes() } @@ -414,14 +414,12 @@ impl EntryFields { async fn unpack_in(&mut self, dst: &Path) -> io::Result { // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: - // * Leading '/'s are trimmed. For example, `///test` is treated as - // `test`. - // * If the filename contains '..', then the file is skipped when - // extracting the tarball. - // * '//' within a filename is effectively skipped. An error is - // logged, but otherwise the effect is as if any two or more - // adjacent '/'s within the filename were consolidated into one - // '/'. + // * Leading '/'s are trimmed. For example, `///test` is treated as `test`. + // * If the filename contains '..', then the file is skipped when extracting the + // tarball. + // * '//' within a filename is effectively skipped. An error is logged, but + // otherwise the effect is as if any two or more adjacent '/'s within the + // filename were consolidated into one '/'. // // Most of this is handled by the `path` module of the standard // library, but we specially handle a few cases here as well. diff --git a/tokio-tar/src/header.rs b/tokio-tar/src/header.rs index 71f0deed..34769c6e 100644 --- a/tokio-tar/src/header.rs +++ b/tokio-tar/src/header.rs @@ -777,7 +777,8 @@ impl Header { #[cfg(windows)] fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) { - // There's no concept of a file mode on Windows, so do a best approximation here. + // There's no concept of a file mode on Windows, so do a best approximation + // here. match mode { HeaderMode::Complete => { self.set_uid(0); @@ -1100,7 +1101,8 @@ impl GnuHeader { truncate(&self.uname) } - /// Gets the fullname (group:user) in a "lossy" way, used for error reporting ONLY. + /// Gets the fullname (group:user) in a "lossy" way, used for error + /// reporting ONLY. fn fullname_lossy(&self) -> String { format!( "{}:{}", diff --git a/tokio-tar/tests/all.rs b/tokio-tar/tests/all.rs index 7fefb10a..46c8841f 100644 --- a/tokio-tar/tests/all.rs +++ b/tokio-tar/tests/all.rs @@ -37,8 +37,8 @@ macro_rules! tar { mod header; -/// test that we can concatenate the simple.tar archive and extract the same entries twice when we -/// use the ignore_zeros option. +/// test that we can concatenate the simple.tar archive and extract the same +/// entries twice when we use the ignore_zeros option. #[tokio::test] async fn simple_concat() { let bytes = tar!("simple.tar"); @@ -52,7 +52,8 @@ async fn simple_concat() { // concat two archives (with null in-between); archive_bytes.extend(bytes); - // test now that when we read the archive, it stops processing at the first zero header. + // test now that when we read the archive, it stops processing at the first zero + // header. let actual = decode_names(&mut Archive::new(Cursor::new(&archive_bytes))).await; assert_eq!(expected, actual); diff --git a/tokio-tar/tests/header/mod.rs b/tokio-tar/tests/header/mod.rs index 33f479d4..d4812655 100644 --- a/tokio-tar/tests/header/mod.rs +++ b/tokio-tar/tests/header/mod.rs @@ -189,7 +189,8 @@ fn set_metadata_deterministic() { h } - // Create "the same" File twice in a row, one second apart, with differing readonly values. + // Create "the same" File twice in a row, one second apart, with differing + // readonly values. let one = mk_header(tmppath.as_path(), false); thread::sleep(time::Duration::from_millis(1050)); let two = mk_header(tmppath.as_path(), true);