Squashed 'tokio-tar/' content from commit 43dd166

git-subtree-dir: tokio-tar
git-subtree-split: 43dd166d0f3aff67891cd1c1bf4d6bfb984bb789
This commit is contained in:
Thomas Eizinger 2021-02-25 11:20:47 +11:00
commit fec26926a8
34 changed files with 6427 additions and 0 deletions

49
.github/workflows/main.yml vendored Normal file
View File

@ -0,0 +1,49 @@
on: [push, pull_request]
name: Continuous integration
jobs:
ci:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
components: rustfmt, clippy
- uses: actions-rs/cargo@v1
with:
command: fmt
args: --all -- --check
- uses: actions-rs/cargo@v1
with:
command: install
args: cargo-hack
- uses: actions-rs/cargo@v1
with:
command: hack
args: check --all --ignore-private --each-feature --no-dev-deps
- uses: actions-rs/cargo@v1
with:
command: check
args: --all --all-targets --all-features
- uses: actions-rs/cargo@v1
with:
command: test
- uses: actions-rs/cargo@v1
with:
command: clippy
args: -- -D warnings

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
/Cargo.lock

44
Cargo.toml Normal file
View File

@ -0,0 +1,44 @@
[package]
name = "tokio-tar"
version = "0.2.0"
authors = [
"Alex Crichton <alex@alexcrichton.com>",
"dignifiedquire <me@dignifiequire.com>",
"Artem Vorotnikov <artem@vorotnikov.me>",
"Aiden McClelland <me@drbonez.dev>",
]
homepage = "https://github.com/vorot93/tokio-tar"
repository = "https://github.com/vorot93/tokio-tar"
documentation = "https://docs.rs/tokio-tar"
license = "MIT/Apache-2.0"
keywords = ["tar", "tarfile", "encoding"]
readme = "README.md"
edition = "2018"
exclude = ["tests/archives/*"]
description = """
A Rust implementation of an async TAR file reader and writer. This library does not
currently handle compression, but it is abstract over all I/O readers and
writers. Additionally, great lengths are taken to ensure that the entire
contents are never required to be entirely resident in memory all at once.
"""
[dependencies]
filetime = "0.2.13"
futures-core = "0.3"
tokio = { version = "1.0.1", features = ["fs", "io-util", "rt"] }
tokio-stream = "0.1.1"
[dev-dependencies]
tempfile = "3"
tokio = { version = "1.0.1", features = ["full"] }
[target."cfg(unix)".dependencies]
xattr = { version = "0.2", optional = true }
libc = "0.2"
[target.'cfg(target_os = "redox")'.dependencies]
redox_syscall = "0.2"
[features]
default = ["xattr"]

201
LICENSE-APACHE Normal file
View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
LICENSE-MIT Normal file
View File

@ -0,0 +1,25 @@
Copyright (c) 2014 Alex Crichton
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

97
README.md Normal file
View File

@ -0,0 +1,97 @@
<h1 align="center">tokio-tar</h1>
<div align="center">
<strong>
A tar archive reading/writing library for async Rust.
</strong>
</div>
<br />
<div align="center">
<!-- Crates version -->
<a href="https://crates.io/crates/tokio-tar">
<img src="https://img.shields.io/crates/v/tokio-tar.svg?style=flat-square"
alt="Crates.io version" />
</a>
<!-- Downloads -->
<a href="https://crates.io/crates/tokio-tar">
<img src="https://img.shields.io/crates/d/tokio-tar.svg?style=flat-square"
alt="Download" />
</a>
<!-- docs.rs docs -->
<a href="https://docs.rs/tokio-tar">
<img src="https://img.shields.io/badge/docs-latest-blue.svg?style=flat-square"
alt="docs.rs docs" />
</a>
</div>
<div align="center">
<h3>
<a href="https://docs.rs/tokio-tar">
API Docs
</a>
<span> | </span>
<a href="https://github.com/vorot93/tokio-tar/releases">
Releases
</a>
</h3>
</div>
<br/>
> Based on the great [tar-rs](https://github.com/alexcrichton/tar-rs).
## Reading an archive
```rust,no_run
use tokio::io::stdin;
use tokio::prelude::*;
use tokio_tar::Archive;
fn main() {
tokio::runtime::Runtime::new().unwrap().block_on(async {
let mut ar = Archive::new(stdin());
let mut entries = ar.entries().unwrap();
while let Some(file) = entries.next().await {
let f = file.unwrap();
println!("{}", f.path().unwrap().display());
}
});
}
```
## Writing an archive
```rust,no_run
use tokio::fs::File;
use tokio_tar::Builder;
fn main() {
tokio::runtime::Runtime::new().unwrap().block_on(async {
let file = File::create("foo.tar").await.unwrap();
let mut a = Builder::new(file);
a.append_path("README.md").await.unwrap();
a.append_file("lib.rs", &mut File::open("src/lib.rs").await.unwrap())
.await
.unwrap();
});
}
```
# License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
http://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or
http://opensource.org/licenses/MIT)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in this project by you, as defined in the Apache-2.0 license,
shall be dual licensed as above, without any additional terms or conditions.

28
examples/extract_file.rs Normal file
View File

@ -0,0 +1,28 @@
//! An example of extracting a file in an archive.
//!
//! Takes a tarball on standard input, looks for an entry with a listed file
//! name as the first argument provided, and then prints the contents of that
//! file to stdout.
extern crate tokio_tar as async_tar;
use std::{env::args_os, path::Path};
use tokio::io::{copy, stdin, stdout};
use tokio_stream::*;
use async_tar::Archive;
fn main() {
tokio::runtime::Runtime::new().unwrap().block_on(async {
let first_arg = args_os().nth(1).unwrap();
let filename = Path::new(&first_arg);
let mut ar = Archive::new(stdin());
let mut entries = ar.entries().unwrap();
while let Some(file) = entries.next().await {
let mut f = file.unwrap();
if f.path().unwrap() == filename {
copy(&mut f, &mut stdout()).await.unwrap();
}
}
});
}

21
examples/list.rs Normal file
View File

@ -0,0 +1,21 @@
//! An example of listing the file names of entries in an archive.
//!
//! Takes a tarball on stdin and prints out all of the entries inside.
extern crate tokio_tar as async_tar;
use tokio::io::stdin;
use tokio_stream::*;
use async_tar::Archive;
fn main() {
tokio::runtime::Runtime::new().unwrap().block_on(async {
let mut ar = Archive::new(stdin());
let mut entries = ar.entries().unwrap();
while let Some(file) = entries.next().await {
let f = file.unwrap();
println!("{}", f.path().unwrap().display());
}
});
}

54
examples/raw_list.rs Normal file
View File

@ -0,0 +1,54 @@
//! An example of listing raw entries in an archive.
//!
//! Takes a tarball on stdin and prints out all of the entries inside.
extern crate tokio_tar as async_tar;
use tokio::io::stdin;
use tokio_stream::*;
use async_tar::Archive;
fn main() {
tokio::runtime::Runtime::new().unwrap().block_on(async {
let mut ar = Archive::new(stdin());
let mut i = 0;
let mut entries = ar.entries_raw().unwrap();
while let Some(file) = entries.next().await {
println!("-------------------------- Entry {}", i);
let mut f = file.unwrap();
println!("path: {}", f.path().unwrap().display());
println!("size: {}", f.header().size().unwrap());
println!("entry size: {}", f.header().entry_size().unwrap());
println!("link name: {:?}", f.link_name().unwrap());
println!("file type: {:#x}", f.header().entry_type().as_byte());
println!("mode: {:#o}", f.header().mode().unwrap());
println!("uid: {}", f.header().uid().unwrap());
println!("gid: {}", f.header().gid().unwrap());
println!("mtime: {}", f.header().mtime().unwrap());
println!("username: {:?}", f.header().username().unwrap());
println!("groupname: {:?}", f.header().groupname().unwrap());
if f.header().as_ustar().is_some() {
println!("kind: UStar");
} else if f.header().as_gnu().is_some() {
println!("kind: GNU");
} else {
println!("kind: normal");
}
if let Ok(Some(extensions)) = f.pax_extensions().await {
println!("pax extensions:");
for e in extensions {
let e = e.unwrap();
println!(
"\t{:?} = {:?}",
String::from_utf8_lossy(e.key_bytes()),
String::from_utf8_lossy(e.value_bytes())
);
}
}
i += 1;
}
});
}

16
examples/write.rs Normal file
View File

@ -0,0 +1,16 @@
extern crate tokio_tar as async_tar;
use async_tar::Builder;
use tokio::fs::File;
fn main() {
tokio::runtime::Runtime::new().unwrap().block_on(async {
let file = File::create("foo.tar").await.unwrap();
let mut a = Builder::new(file);
a.append_path("README.md").await.unwrap();
a.append_file("lib.rs", &mut File::open("src/lib.rs").await.unwrap())
.await
.unwrap();
});
}

610
src/archive.rs Normal file
View File

@ -0,0 +1,610 @@
use std::{
cmp,
path::Path,
pin::Pin,
sync::{
atomic::{AtomicU64, Ordering},
Arc,
},
task::{Context, Poll},
};
use tokio::{
io::{self, AsyncRead as Read, AsyncReadExt},
sync::Mutex,
};
use tokio_stream::*;
use crate::{
entry::{EntryFields, EntryIo},
error::TarError,
other, Entry, GnuExtSparseHeader, GnuSparseHeader, Header,
};
/// A top-level representation of an archive file.
///
/// This archive can have an entry added to it and it can be iterated over.
#[derive(Debug)]
pub struct Archive<R: Read + Unpin> {
inner: Arc<ArchiveInner<R>>,
}
impl<R: Read + Unpin> Clone for Archive<R> {
fn clone(&self) -> Self {
Archive {
inner: self.inner.clone(),
}
}
}
#[derive(Debug)]
pub struct ArchiveInner<R> {
pos: AtomicU64,
unpack_xattrs: bool,
preserve_permissions: bool,
preserve_mtime: bool,
ignore_zeros: bool,
obj: Mutex<R>,
}
/// Configure the archive.
pub struct ArchiveBuilder<R: Read + Unpin> {
obj: R,
unpack_xattrs: bool,
preserve_permissions: bool,
preserve_mtime: bool,
ignore_zeros: bool,
}
impl<R: Read + Unpin> ArchiveBuilder<R> {
/// Create a new builder.
pub fn new(obj: R) -> Self {
ArchiveBuilder {
unpack_xattrs: false,
preserve_permissions: false,
preserve_mtime: true,
ignore_zeros: false,
obj,
}
}
/// Indicate whether extended file attributes (xattrs on Unix) are preserved
/// when unpacking this archive.
///
/// This flag is disabled by default and is currently only implemented on
/// Unix using xattr support. This may eventually be implemented for
/// Windows, however, if other archive implementations are found which do
/// this as well.
pub fn set_unpack_xattrs(mut self, unpack_xattrs: bool) -> Self {
self.unpack_xattrs = unpack_xattrs;
self
}
/// Indicate whether extended permissions (like suid on Unix) are preserved
/// when unpacking this entry.
///
/// This flag is disabled by default and is currently only implemented on
/// Unix.
pub fn set_preserve_permissions(mut self, preserve: bool) -> Self {
self.preserve_permissions = preserve;
self
}
/// Indicate whether access time information is preserved when unpacking
/// this entry.
///
/// This flag is enabled by default.
pub fn set_preserve_mtime(mut self, preserve: bool) -> Self {
self.preserve_mtime = preserve;
self
}
/// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
/// entries.
///
/// This can be used in case multiple tar archives have been concatenated together.
pub fn set_ignore_zeros(mut self, ignore_zeros: bool) -> Self {
self.ignore_zeros = ignore_zeros;
self
}
/// Construct the archive, ready to accept inputs.
pub fn build(self) -> Archive<R> {
let Self {
unpack_xattrs,
preserve_permissions,
preserve_mtime,
ignore_zeros,
obj,
} = self;
Archive {
inner: Arc::new(ArchiveInner {
unpack_xattrs,
preserve_permissions,
preserve_mtime,
ignore_zeros,
obj: Mutex::new(obj),
pos: 0.into(),
}),
}
}
}
impl<R: Read + Unpin + Sync + Send> Archive<R> {
/// Create a new archive with the underlying object as the reader.
pub fn new(obj: R) -> Archive<R> {
Archive {
inner: Arc::new(ArchiveInner {
unpack_xattrs: false,
preserve_permissions: false,
preserve_mtime: true,
ignore_zeros: false,
obj: Mutex::new(obj),
pos: 0.into(),
}),
}
}
/// Unwrap this archive, returning the underlying object.
pub fn into_inner(self) -> Result<R, Self> {
let Self { inner } = self;
match Arc::try_unwrap(inner) {
Ok(inner) => Ok(inner.obj.into_inner()),
Err(inner) => Err(Self { inner }),
}
}
/// Construct an stream over the entries in this archive.
///
/// Note that care must be taken to consider each entry within an archive in
/// sequence. If entries are processed out of sequence (from what the
/// stream returns), then the contents read for each entry may be
/// corrupted.
pub fn entries(&mut self) -> io::Result<Entries<R>> {
if self.inner.pos.load(Ordering::SeqCst) != 0 {
return Err(other(
"cannot call entries unless archive is at \
position 0",
));
}
Ok(Entries {
archive: self.clone(),
next: 0,
gnu_longlink: None,
gnu_longname: None,
pax_extensions: None,
})
}
/// Construct an stream over the raw entries in this archive.
///
/// Note that care must be taken to consider each entry within an archive in
/// sequence. If entries are processed out of sequence (from what the
/// stream returns), then the contents read for each entry may be
/// corrupted.
pub fn entries_raw(&mut self) -> io::Result<RawEntries<R>> {
if self.inner.pos.load(Ordering::SeqCst) != 0 {
return Err(other(
"cannot call entries_raw unless archive is at \
position 0",
));
}
Ok(RawEntries {
archive: self.clone(),
next: 0,
})
}
/// Unpacks the contents tarball into the specified `dst`.
///
/// This function will iterate over the entire contents of this tarball,
/// extracting each file in turn to the location specified by the entry's
/// path name.
///
/// This operation is relatively sensitive in that it will not write files
/// outside of the path specified by `dst`. Files in the archive which have
/// a '..' in their path are skipped during the unpacking process.
///
/// # Examples
///
/// ```no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio::fs::File;
/// use tokio_tar::Archive;
///
/// let mut ar = Archive::new(File::open("foo.tar").await?);
/// ar.unpack("foo").await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
let mut entries = self.entries()?;
let mut pinned = Pin::new(&mut entries);
while let Some(entry) = pinned.next().await {
let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
file.unpack_in(dst.as_ref()).await?;
}
Ok(())
}
}
/// Stream of `Entry`s.
pub struct Entries<R: Read + Unpin> {
archive: Archive<R>,
next: u64,
gnu_longname: Option<Vec<u8>>,
gnu_longlink: Option<Vec<u8>>,
pax_extensions: Option<Vec<u8>>,
}
macro_rules! ready_opt_err {
($val:expr) => {
match futures_core::ready!($val) {
Some(Ok(val)) => val,
Some(Err(err)) => return Poll::Ready(Some(Err(err))),
None => return Poll::Ready(None),
}
};
}
macro_rules! ready_err {
($val:expr) => {
match futures_core::ready!($val) {
Ok(val) => val,
Err(err) => return Poll::Ready(Some(Err(err))),
}
};
}
impl<R: Read + Unpin> Stream for Entries<R> {
type Item = io::Result<Entry<Archive<R>>>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop {
let entry = ready_opt_err!(poll_next_raw(self.archive.clone(), &mut self.next, cx));
if entry.header().as_gnu().is_some() && entry.header().entry_type().is_gnu_longname() {
if self.gnu_longname.is_some() {
return Poll::Ready(Some(Err(other(
"two long name entries describing \
the same member",
))));
}
let mut ef = EntryFields::from(entry);
let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx));
self.gnu_longname = Some(val);
continue;
}
if entry.header().as_gnu().is_some() && entry.header().entry_type().is_gnu_longlink() {
if self.gnu_longlink.is_some() {
return Poll::Ready(Some(Err(other(
"two long name entries describing \
the same member",
))));
}
let mut ef = EntryFields::from(entry);
let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx));
self.gnu_longlink = Some(val);
continue;
}
if entry.header().as_ustar().is_some()
&& entry.header().entry_type().is_pax_local_extensions()
{
if self.pax_extensions.is_some() {
return Poll::Ready(Some(Err(other(
"two pax extensions entries describing \
the same member",
))));
}
let mut ef = EntryFields::from(entry);
let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx));
self.pax_extensions = Some(val);
continue;
}
let mut fields = EntryFields::from(entry);
fields.long_pathname = self.gnu_longname.take();
fields.long_linkname = self.gnu_longlink.take();
fields.pax_extensions = self.pax_extensions.take();
ready_err!(poll_parse_sparse_header(
self.archive.clone(),
&mut self.next,
&mut fields,
cx
));
return Poll::Ready(Some(Ok(fields.into_entry())));
}
}
}
/// Stream of raw `Entry`s.
pub struct RawEntries<R: Read + Unpin> {
archive: Archive<R>,
next: u64,
}
impl<R: Read + Unpin> Stream for RawEntries<R> {
type Item = io::Result<Entry<Archive<R>>>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
poll_next_raw(self.archive.clone(), &mut self.next, cx)
}
}
fn poll_next_raw<R: Read + Unpin>(
mut archive: Archive<R>,
next: &mut u64,
cx: &mut Context<'_>,
) -> Poll<Option<io::Result<Entry<Archive<R>>>>> {
let mut header = Header::new_old();
let mut header_pos = *next;
loop {
// Seek to the start of the next header in the archive
let delta = *next - archive.inner.pos.load(Ordering::SeqCst);
match futures_core::ready!(poll_skip(&mut archive, cx, delta)) {
Ok(_) => {}
Err(err) => return Poll::Ready(Some(Err(err))),
}
// EOF is an indicator that we are at the end of the archive.
match futures_core::ready!(poll_try_read_all(&mut archive, cx, header.as_mut_bytes())) {
Ok(true) => {}
Ok(false) => return Poll::Ready(None),
Err(err) => return Poll::Ready(Some(Err(err))),
}
// If a header is not all zeros, we have another valid header.
// Otherwise, check if we are ignoring zeros and continue, or break as if this is the
// end of the archive.
if !header.as_bytes().iter().all(|i| *i == 0) {
*next += 512;
break;
}
if !archive.inner.ignore_zeros {
return Poll::Ready(None);
}
*next += 512;
header_pos = *next;
}
// Make sure the checksum is ok
let sum = header.as_bytes()[..148]
.iter()
.chain(&header.as_bytes()[156..])
.fold(0, |a, b| a + (*b as u32))
+ 8 * 32;
let cksum = header.cksum()?;
if sum != cksum {
return Poll::Ready(Some(Err(other("archive header checksum mismatch"))));
}
let file_pos = *next;
let size = header.entry_size()?;
let data = EntryIo::Data(archive.clone().take(size));
let ret = EntryFields {
size,
header_pos,
file_pos,
data: vec![data],
header,
long_pathname: None,
long_linkname: None,
pax_extensions: None,
unpack_xattrs: archive.inner.unpack_xattrs,
preserve_permissions: archive.inner.preserve_permissions,
preserve_mtime: archive.inner.preserve_mtime,
read_state: None,
};
// Store where the next entry is, rounding up by 512 bytes (the size of
// a header);
let size = (size + 511) & !(512 - 1);
*next += size;
Poll::Ready(Some(Ok(ret.into_entry())))
}
fn poll_parse_sparse_header<R: Read + Unpin>(
mut archive: Archive<R>,
next: &mut u64,
entry: &mut EntryFields<Archive<R>>,
cx: &mut Context<'_>,
) -> Poll<io::Result<()>> {
if !entry.header.entry_type().is_gnu_sparse() {
return Poll::Ready(Ok(()));
}
let gnu = match entry.header.as_gnu() {
Some(gnu) => gnu,
None => return Poll::Ready(Err(other("sparse entry type listed but not GNU header"))),
};
// Sparse files are represented internally as a list of blocks that are
// read. Blocks are either a bunch of 0's or they're data from the
// underlying archive.
//
// Blocks of a sparse file are described by the `GnuSparseHeader`
// structure, some of which are contained in `GnuHeader` but some of
// which may also be contained after the first header in further
// headers.
//
// We read off all the blocks here and use the `add_block` function to
// incrementally add them to the list of I/O block (in `entry.data`).
// The `add_block` function also validates that each chunk comes after
// the previous, we don't overrun the end of the file, and each block is
// aligned to a 512-byte boundary in the archive itself.
//
// At the end we verify that the sparse file size (`Header::size`) is
// the same as the current offset (described by the list of blocks) as
// well as the amount of data read equals the size of the entry
// (`Header::entry_size`).
entry.data.truncate(0);
let mut cur = 0;
let mut remaining = entry.size;
{
let data = &mut entry.data;
let reader = archive.clone();
let size = entry.size;
let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
if block.is_empty() {
return Ok(());
}
let off = block.offset()?;
let len = block.length()?;
if (size - remaining) % 512 != 0 {
return Err(other(
"previous block in sparse file was not \
aligned to 512-byte boundary",
));
} else if off < cur {
return Err(other(
"out of order or overlapping sparse \
blocks",
));
} else if cur < off {
let block = io::repeat(0).take(off - cur);
data.push(EntryIo::Pad(block));
}
cur = off
.checked_add(len)
.ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
remaining = remaining.checked_sub(len).ok_or_else(|| {
other(
"sparse file consumed more data than the header \
listed",
)
})?;
data.push(EntryIo::Data(reader.clone().take(len)));
Ok(())
};
for block in gnu.sparse.iter() {
add_block(block)?
}
if gnu.is_extended() {
let mut ext = GnuExtSparseHeader::new();
ext.isextended[0] = 1;
while ext.is_extended() {
match futures_core::ready!(poll_try_read_all(&mut archive, cx, ext.as_mut_bytes()))
{
Ok(true) => {}
Ok(false) => return Poll::Ready(Err(other("failed to read extension"))),
Err(err) => return Poll::Ready(Err(err)),
}
*next += 512;
for block in ext.sparse.iter() {
add_block(block)?;
}
}
}
}
if cur != gnu.real_size()? {
return Poll::Ready(Err(other(
"mismatch in sparse file chunks and \
size in header",
)));
}
entry.size = cur;
if remaining > 0 {
return Poll::Ready(Err(other(
"mismatch in sparse file chunks and \
entry size in header",
)));
}
Poll::Ready(Ok(()))
}
impl<R: Read + Unpin> Read for Archive<R> {
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
into: &mut io::ReadBuf<'_>,
) -> Poll<io::Result<()>> {
let mut r = if let Ok(v) = self.inner.obj.try_lock() {
v
} else {
return Poll::Pending;
};
let res = futures_core::ready!(Pin::new(&mut *r).poll_read(cx, into));
match res {
Ok(()) => {
self.inner
.pos
.fetch_add(into.filled().len() as u64, Ordering::SeqCst);
Poll::Ready(Ok(()))
}
Err(err) => Poll::Ready(Err(err)),
}
}
}
/// Try to fill the buffer from the reader.
///
/// If the reader reaches its end before filling the buffer at all, returns `false`.
/// Otherwise returns `true`.
fn poll_try_read_all<R: Read + Unpin>(
mut source: R,
cx: &mut Context<'_>,
buf: &mut [u8],
) -> Poll<io::Result<bool>> {
let mut read = 0;
while read < buf.len() {
let mut read_buf = io::ReadBuf::new(&mut buf[read..]);
match futures_core::ready!(Pin::new(&mut source).poll_read(cx, &mut read_buf)) {
Ok(()) if read_buf.filled().is_empty() => {
if read == 0 {
return Poll::Ready(Ok(false));
}
return Poll::Ready(Err(other("failed to read entire block")));
}
Ok(()) => read += read_buf.filled().len(),
Err(err) => return Poll::Ready(Err(err)),
}
}
Poll::Ready(Ok(true))
}
/// Skip n bytes on the given source.
fn poll_skip<R: Read + Unpin>(
mut source: R,
cx: &mut Context<'_>,
mut amt: u64,
) -> Poll<io::Result<()>> {
let mut buf = [0u8; 4096 * 8];
while amt > 0 {
let n = cmp::min(amt, buf.len() as u64);
let mut read_buf = io::ReadBuf::new(&mut buf[..n as usize]);
match futures_core::ready!(Pin::new(&mut source).poll_read(cx, &mut read_buf)) {
Ok(()) if read_buf.filled().is_empty() => {
return Poll::Ready(Err(other("unexpected EOF during skip")));
}
Ok(()) => {
amt -= read_buf.filled().len() as u64;
}
Err(err) => return Poll::Ready(Err(err)),
}
}
Poll::Ready(Ok(()))
}

633
src/builder.rs Normal file
View File

@ -0,0 +1,633 @@
use crate::{
header::{bytes2path, path2bytes, HeaderMode},
other, EntryType, Header,
};
use std::{borrow::Cow, fs::Metadata, path::Path};
use tokio::{
fs,
io::{self, AsyncRead as Read, AsyncReadExt, AsyncWrite as Write, AsyncWriteExt},
};
/// A structure for building archives
///
/// This structure has methods for building up an archive from scratch into any
/// arbitrary writer.
pub struct Builder<W: Write + Unpin + Send + 'static> {
mode: HeaderMode,
follow: bool,
finished: bool,
obj: Option<W>,
cancellation: Option<tokio::sync::oneshot::Sender<W>>,
}
impl<W: Write + Unpin + Send + 'static> Builder<W> {
/// Create a new archive builder with the underlying object as the
/// destination of all data written. The builder will use
/// `HeaderMode::Complete` by default.
pub fn new(obj: W) -> Builder<W> {
let (tx, rx) = tokio::sync::oneshot::channel::<W>();
tokio::spawn(async move {
if let Ok(mut w) = rx.await {
let _ = w.write_all(&[0; 1024]).await;
}
});
Builder {
mode: HeaderMode::Complete,
follow: true,
finished: false,
obj: Some(obj),
cancellation: Some(tx),
}
}
/// Changes the HeaderMode that will be used when reading fs Metadata for
/// methods that implicitly read metadata for an input Path. Notably, this
/// does _not_ apply to `append(Header)`.
pub fn mode(&mut self, mode: HeaderMode) {
self.mode = mode;
}
/// Follow symlinks, archiving the contents of the file they point to rather
/// than adding a symlink to the archive. Defaults to true.
pub fn follow_symlinks(&mut self, follow: bool) {
self.follow = follow;
}
/// Gets shared reference to the underlying object.
pub fn get_ref(&self) -> &W {
self.obj.as_ref().unwrap()
}
/// Gets mutable reference to the underlying object.
///
/// Note that care must be taken while writing to the underlying
/// object. But, e.g. `get_mut().flush()` is claimed to be safe and
/// useful in the situations when one needs to be ensured that
/// tar entry was flushed to the disk.
pub fn get_mut(&mut self) -> &mut W {
self.obj.as_mut().unwrap()
}
/// Unwrap this archive, returning the underlying object.
///
/// This function will finish writing the archive if the `finish` function
/// hasn't yet been called, returning any I/O error which happens during
/// that operation.
pub async fn into_inner(mut self) -> io::Result<W> {
if !self.finished {
self.finish().await?;
}
Ok(self.obj.take().unwrap())
}
/// Adds a new entry to this archive.
///
/// This function will append the header specified, followed by contents of
/// the stream specified by `data`. To produce a valid archive the `size`
/// field of `header` must be the same as the length of the stream that's
/// being written. Additionally the checksum for the header should have been
/// set via the `set_cksum` method.
///
/// Note that this will not attempt to seek the archive to a valid position,
/// so if the archive is in the middle of a read or some other similar
/// operation then this may corrupt the archive.
///
/// Also note that after all entries have been written to an archive the
/// `finish` function needs to be called to finish writing the archive.
///
/// # Errors
///
/// This function will return an error for any intermittent I/O error which
/// occurs when either reading or writing.
///
/// # Examples
///
/// ```
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio_tar::{Builder, Header};
///
/// let mut header = Header::new_gnu();
/// header.set_path("foo")?;
/// header.set_size(4);
/// header.set_cksum();
///
/// let mut data: &[u8] = &[1, 2, 3, 4];
///
/// let mut ar = Builder::new(Vec::new());
/// ar.append(&header, data).await?;
/// let data = ar.into_inner().await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn append<R: Read + Unpin>(
&mut self,
header: &Header,
mut data: R,
) -> io::Result<()> {
append(self.get_mut(), header, &mut data).await?;
Ok(())
}
/// Adds a new entry to this archive with the specified path.
///
/// This function will set the specified path in the given header, which may
/// require appending a GNU long-name extension entry to the archive first.
/// The checksum for the header will be automatically updated via the
/// `set_cksum` method after setting the path. No other metadata in the
/// header will be modified.
///
/// Then it will append the header, followed by contents of the stream
/// specified by `data`. To produce a valid archive the `size` field of
/// `header` must be the same as the length of the stream that's being
/// written.
///
/// Note that this will not attempt to seek the archive to a valid position,
/// so if the archive is in the middle of a read or some other similar
/// operation then this may corrupt the archive.
///
/// Also note that after all entries have been written to an archive the
/// `finish` function needs to be called to finish writing the archive.
///
/// # Errors
///
/// This function will return an error for any intermittent I/O error which
/// occurs when either reading or writing.
///
/// # Examples
///
/// ```
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio_tar::{Builder, Header};
///
/// let mut header = Header::new_gnu();
/// header.set_size(4);
/// header.set_cksum();
///
/// let mut data: &[u8] = &[1, 2, 3, 4];
///
/// let mut ar = Builder::new(Vec::new());
/// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
/// let data = ar.into_inner().await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn append_data<P: AsRef<Path>, R: Read + Unpin>(
&mut self,
header: &mut Header,
path: P,
data: R,
) -> io::Result<()> {
prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
header.set_cksum();
self.append(&header, data).await?;
Ok(())
}
/// Adds a file on the local filesystem to this archive.
///
/// This function will open the file specified by `path` and insert the file
/// into the archive with the appropriate metadata set, returning any I/O
/// error which occurs while writing. The path name for the file inside of
/// this archive will be the same as `path`, and it is required that the
/// path is a relative path.
///
/// Note that this will not attempt to seek the archive to a valid position,
/// so if the archive is in the middle of a read or some other similar
/// operation then this may corrupt the archive.
///
/// Also note that after all files have been written to an archive the
/// `finish` function needs to be called to finish writing the archive.
///
/// # Examples
///
/// ```no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio_tar::Builder;
///
/// let mut ar = Builder::new(Vec::new());
///
/// ar.append_path("foo/bar.txt").await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
let mode = self.mode;
let follow = self.follow;
append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
Ok(())
}
/// Adds a file on the local filesystem to this archive under another name.
///
/// This function will open the file specified by `path` and insert the file
/// into the archive as `name` with appropriate metadata set, returning any
/// I/O error which occurs while writing. The path name for the file inside
/// of this archive will be `name` is required to be a relative path.
///
/// Note that this will not attempt to seek the archive to a valid position,
/// so if the archive is in the middle of a read or some other similar
/// operation then this may corrupt the archive.
///
/// Also note that after all files have been written to an archive the
/// `finish` function needs to be called to finish writing the archive.
///
/// # Examples
///
/// ```no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio_tar::Builder;
///
/// let mut ar = Builder::new(Vec::new());
///
/// // Insert the local file "foo/bar.txt" in the archive but with the name
/// // "bar/foo.txt".
/// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
&mut self,
path: P,
name: N,
) -> io::Result<()> {
let mode = self.mode;
let follow = self.follow;
append_path_with_name(
self.get_mut(),
path.as_ref(),
Some(name.as_ref()),
mode,
follow,
)
.await?;
Ok(())
}
/// Adds a file to this archive with the given path as the name of the file
/// in the archive.
///
/// This will use the metadata of `file` to populate a `Header`, and it will
/// then append the file to the archive with the name `path`.
///
/// Note that this will not attempt to seek the archive to a valid position,
/// so if the archive is in the middle of a read or some other similar
/// operation then this may corrupt the archive.
///
/// Also note that after all files have been written to an archive the
/// `finish` function needs to be called to finish writing the archive.
///
/// # Examples
///
/// ```no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio::fs::File;
/// use tokio_tar::Builder;
///
/// let mut ar = Builder::new(Vec::new());
///
/// // Open the file at one location, but insert it into the archive with a
/// // different name.
/// let mut f = File::open("foo/bar/baz.txt").await?;
/// ar.append_file("bar/baz.txt", &mut f).await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn append_file<P: AsRef<Path>>(
&mut self,
path: P,
file: &mut fs::File,
) -> io::Result<()> {
let mode = self.mode;
append_file(self.get_mut(), path.as_ref(), file, mode).await?;
Ok(())
}
/// Adds a directory to this archive with the given path as the name of the
/// directory in the archive.
///
/// This will use `stat` to populate a `Header`, and it will then append the
/// directory to the archive with the name `path`.
///
/// Note that this will not attempt to seek the archive to a valid position,
/// so if the archive is in the middle of a read or some other similar
/// operation then this may corrupt the archive.
///
/// Also note that after all files have been written to an archive the
/// `finish` function needs to be called to finish writing the archive.
///
/// # Examples
///
/// ```
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio::fs;
/// use tokio_tar::Builder;
///
/// let mut ar = Builder::new(Vec::new());
///
/// // Use the directory at one location, but insert it into the archive
/// // with a different name.
/// ar.append_dir("bardir", ".").await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
let mode = self.mode;
append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
Ok(())
}
/// Adds a directory and all of its contents (recursively) to this archive
/// with the given path as the name of the directory in the archive.
///
/// Note that this will not attempt to seek the archive to a valid position,
/// so if the archive is in the middle of a read or some other similar
/// operation then this may corrupt the archive.
///
/// Also note that after all files have been written to an archive the
/// `finish` function needs to be called to finish writing the archive.
///
/// # Examples
///
/// ```
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio::fs;
/// use tokio_tar::Builder;
///
/// let mut ar = Builder::new(Vec::new());
///
/// // Use the directory at one location, but insert it into the archive
/// // with a different name.
/// ar.append_dir_all("bardir", ".").await?;
/// #
/// # Ok(()) }) }
/// ```
pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
let mode = self.mode;
let follow = self.follow;
append_dir_all(
self.get_mut(),
path.as_ref(),
src_path.as_ref(),
mode,
follow,
)
.await?;
Ok(())
}
/// Finish writing this archive, emitting the termination sections.
///
/// This function should only be called when the archive has been written
/// entirely and if an I/O error happens the underlying object still needs
/// to be acquired.
///
/// In most situations the `into_inner` method should be preferred.
pub async fn finish(&mut self) -> io::Result<()> {
if self.finished {
return Ok(());
}
self.finished = true;
self.get_mut().write_all(&[0; 1024]).await?;
Ok(())
}
}
async fn append<Dst: Write + Unpin + ?Sized, Data: Read + Unpin + ?Sized>(
mut dst: &mut Dst,
header: &Header,
mut data: &mut Data,
) -> io::Result<()> {
dst.write_all(header.as_bytes()).await?;
let len = io::copy(&mut data, &mut dst).await?;
// Pad with zeros if necessary.
let buf = [0; 512];
let remaining = 512 - (len % 512);
if remaining < 512 {
dst.write_all(&buf[..remaining as usize]).await?;
}
Ok(())
}
async fn append_path_with_name<Dst: Write + Unpin + ?Sized>(
dst: &mut Dst,
path: &Path,
name: Option<&Path>,
mode: HeaderMode,
follow: bool,
) -> io::Result<()> {
let stat = if follow {
fs::metadata(path).await.map_err(|err| {
io::Error::new(
err.kind(),
format!("{} when getting metadata for {}", err, path.display()),
)
})?
} else {
fs::symlink_metadata(path).await.map_err(|err| {
io::Error::new(
err.kind(),
format!("{} when getting metadata for {}", err, path.display()),
)
})?
};
let ar_name = name.unwrap_or(path);
if stat.is_file() {
append_fs(
dst,
ar_name,
&stat,
&mut fs::File::open(path).await?,
mode,
None,
)
.await?;
Ok(())
} else if stat.is_dir() {
append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
Ok(())
} else if stat.file_type().is_symlink() {
let link_name = fs::read_link(path).await?;
append_fs(
dst,
ar_name,
&stat,
&mut io::empty(),
mode,
Some(&link_name),
)
.await?;
Ok(())
} else {
Err(other(&format!("{} has unknown file type", path.display())))
}
}
async fn append_file<Dst: Write + Unpin + ?Sized>(
dst: &mut Dst,
path: &Path,
file: &mut fs::File,
mode: HeaderMode,
) -> io::Result<()> {
let stat = file.metadata().await?;
append_fs(dst, path, &stat, file, mode, None).await?;
Ok(())
}
async fn append_dir<Dst: Write + Unpin + ?Sized>(
dst: &mut Dst,
path: &Path,
src_path: &Path,
mode: HeaderMode,
) -> io::Result<()> {
let stat = fs::metadata(src_path).await?;
append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
Ok(())
}
fn prepare_header(size: u64, entry_type: EntryType) -> Header {
let mut header = Header::new_gnu();
let name = b"././@LongLink";
header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
header.set_mode(0o644);
header.set_uid(0);
header.set_gid(0);
header.set_mtime(0);
// + 1 to be compliant with GNU tar
header.set_size(size + 1);
header.set_entry_type(entry_type);
header.set_cksum();
header
}
async fn prepare_header_path<Dst: Write + Unpin + ?Sized>(
dst: &mut Dst,
header: &mut Header,
path: &Path,
) -> io::Result<()> {
// Try to encode the path directly in the header, but if it ends up not
// working (probably because it's too long) then try to use the GNU-specific
// long name extension by emitting an entry which indicates that it's the
// filename.
if let Err(e) = header.set_path(path) {
let data = path2bytes(&path)?;
let max = header.as_old().name.len();
// Since e isn't specific enough to let us know the path is indeed too
// long, verify it first before using the extension.
if data.len() < max {
return Err(e);
}
let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
// null-terminated string
let mut data2 = data.chain(io::repeat(0).take(1));
append(dst, &header2, &mut data2).await?;
// Truncate the path to store in the header we're about to emit to
// ensure we've got something at least mentioned.
let path = bytes2path(Cow::Borrowed(&data[..max]))?;
header.set_path(&path)?;
}
Ok(())
}
async fn prepare_header_link<Dst: Write + Unpin + ?Sized>(
dst: &mut Dst,
header: &mut Header,
link_name: &Path,
) -> io::Result<()> {
// Same as previous function but for linkname
if let Err(e) = header.set_link_name(&link_name) {
let data = path2bytes(&link_name)?;
if data.len() < header.as_old().linkname.len() {
return Err(e);
}
let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
let mut data2 = data.chain(io::repeat(0).take(1));
append(dst, &header2, &mut data2).await?;
}
Ok(())
}
async fn append_fs<Dst: Write + Unpin + ?Sized, R: Read + Unpin + ?Sized>(
dst: &mut Dst,
path: &Path,
meta: &Metadata,
read: &mut R,
mode: HeaderMode,
link_name: Option<&Path>,
) -> io::Result<()> {
let mut header = Header::new_gnu();
prepare_header_path(dst, &mut header, path).await?;
header.set_metadata_in_mode(meta, mode);
if let Some(link_name) = link_name {
prepare_header_link(dst, &mut header, link_name).await?;
}
header.set_cksum();
append(dst, &header, read).await?;
Ok(())
}
async fn append_dir_all<Dst: Write + Unpin + ?Sized>(
dst: &mut Dst,
path: &Path,
src_path: &Path,
mode: HeaderMode,
follow: bool,
) -> io::Result<()> {
let mut stack = vec![(src_path.to_path_buf(), true, false)];
while let Some((src, is_dir, is_symlink)) = stack.pop() {
let dest = path.join(src.strip_prefix(&src_path).unwrap());
// In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
if is_dir || (is_symlink && follow && src.is_dir()) {
let mut entries = fs::read_dir(&src).await?;
while let Some(entry) = entries.next_entry().await.transpose() {
let entry = entry?;
let file_type = entry.file_type().await?;
stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
}
if dest != Path::new("") {
append_dir(dst, &dest, &src, mode).await?;
}
} else if !follow && is_symlink {
let stat = fs::symlink_metadata(&src).await?;
let link_name = fs::read_link(&src).await?;
append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
} else {
append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
}
}
Ok(())
}
impl<W: Write + Unpin + Send + 'static> Drop for Builder<W> {
fn drop(&mut self) {
// TODO: proper async cancellation
if !self.finished {
let _ = self
.cancellation
.take()
.unwrap()
.send(self.obj.take().unwrap());
}
}
}

955
src/entry.rs Normal file
View File

@ -0,0 +1,955 @@
use crate::{
error::TarError, header::bytes2path, other, pax::pax_extensions, Archive, Header, PaxExtensions,
};
use filetime::{self, FileTime};
use std::{
borrow::Cow,
cmp, fmt,
io::{Error, ErrorKind, SeekFrom},
marker,
path::{Component, Path, PathBuf},
pin::Pin,
task::{Context, Poll},
};
use tokio::{
fs,
fs::OpenOptions,
io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt},
};
/// A read-only view into an entry of an archive.
///
/// This structure is a window into a portion of a borrowed archive which can
/// be inspected. It acts as a file handle by implementing the Reader trait. An
/// entry cannot be rewritten once inserted into an archive.
pub struct Entry<R: Read + Unpin> {
fields: EntryFields<R>,
_ignored: marker::PhantomData<Archive<R>>,
}
impl<R: Read + Unpin> fmt::Debug for Entry<R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Entry")
.field("fields", &self.fields)
.finish()
}
}
// private implementation detail of `Entry`, but concrete (no type parameters)
// and also all-public to be constructed from other modules.
pub struct EntryFields<R: Read + Unpin> {
pub long_pathname: Option<Vec<u8>>,
pub long_linkname: Option<Vec<u8>>,
pub pax_extensions: Option<Vec<u8>>,
pub header: Header,
pub size: u64,
pub header_pos: u64,
pub file_pos: u64,
pub data: Vec<EntryIo<R>>,
pub unpack_xattrs: bool,
pub preserve_permissions: bool,
pub preserve_mtime: bool,
pub(crate) read_state: Option<EntryIo<R>>,
}
impl<R: Read + Unpin> fmt::Debug for EntryFields<R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("EntryFields")
.field("long_pathname", &self.long_pathname)
.field("long_linkname", &self.long_linkname)
.field("pax_extensions", &self.pax_extensions)
.field("header", &self.header)
.field("size", &self.size)
.field("header_pos", &self.header_pos)
.field("file_pos", &self.file_pos)
.field("data", &self.data)
.field("unpack_xattrs", &self.unpack_xattrs)
.field("preserve_permissions", &self.preserve_permissions)
.field("preserve_mtime", &self.preserve_mtime)
.field("read_state", &self.read_state)
.finish()
}
}
pub enum EntryIo<R: Read + Unpin> {
Pad(io::Take<io::Repeat>),
Data(io::Take<R>),
}
impl<R: Read + Unpin> fmt::Debug for EntryIo<R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
EntryIo::Pad(_) => write!(f, "EntryIo::Pad"),
EntryIo::Data(_) => write!(f, "EntryIo::Data"),
}
}
}
/// When unpacking items the unpacked thing is returned to allow custom
/// additional handling by users. Today the File is returned, in future
/// the enum may be extended with kinds for links, directories etc.
#[derive(Debug)]
#[non_exhaustive]
pub enum Unpacked {
/// A file was unpacked.
File(fs::File),
/// A directory, hardlink, symlink, or other node was unpacked.
Other,
}
impl<R: Read + Unpin> Entry<R> {
/// Returns the path name for this entry.
///
/// This method may fail if the pathname is not valid Unicode and this is
/// called on a Windows platform.
///
/// Note that this function will convert any `\` characters to directory
/// separators, and it will not always return the same value as
/// `self.header().path()` as some archive formats have support for longer
/// path names described in separate entries.
///
/// It is recommended to use this method instead of inspecting the `header`
/// directly to ensure that various archive formats are handled correctly.
pub fn path(&self) -> io::Result<Cow<Path>> {
self.fields.path()
}
/// Returns the raw bytes listed for this entry.
///
/// Note that this function will convert any `\` characters to directory
/// separators, and it will not always return the same value as
/// `self.header().path_bytes()` as some archive formats have support for
/// longer path names described in separate entries.
pub fn path_bytes(&self) -> Cow<[u8]> {
self.fields.path_bytes()
}
/// Returns the link name for this entry, if any is found.
///
/// This method may fail if the pathname is not valid Unicode and this is
/// called on a Windows platform. `Ok(None)` being returned, however,
/// indicates that the link name was not present.
///
/// Note that this function will convert any `\` characters to directory
/// separators, and it will not always return the same value as
/// `self.header().link_name()` as some archive formats have support for
/// longer path names described in separate entries.
///
/// It is recommended to use this method instead of inspecting the `header`
/// directly to ensure that various archive formats are handled correctly.
pub fn link_name(&self) -> io::Result<Option<Cow<Path>>> {
self.fields.link_name()
}
/// Returns the link name for this entry, in bytes, if listed.
///
/// Note that this will not always return the same value as
/// `self.header().link_name_bytes()` as some archive formats have support for
/// longer path names described in separate entries.
pub fn link_name_bytes(&self) -> Option<Cow<[u8]>> {
self.fields.link_name_bytes()
}
/// Returns an iterator over the pax extensions contained in this entry.
///
/// Pax extensions are a form of archive where extra metadata is stored in
/// key/value pairs in entries before the entry they're intended to
/// describe. For example this can be used to describe long file name or
/// other metadata like atime/ctime/mtime in more precision.
///
/// The returned iterator will yield key/value pairs for each extension.
///
/// `None` will be returned if this entry does not indicate that it itself
/// contains extensions, or if there were no previous extensions describing
/// it.
///
/// Note that global pax extensions are intended to be applied to all
/// archive entries.
///
/// Also note that this function will read the entire entry if the entry
/// itself is a list of extensions.
pub async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
self.fields.pax_extensions().await
}
/// Returns access to the header of this entry in the archive.
///
/// This provides access to the metadata for this entry in the archive.
pub fn header(&self) -> &Header {
&self.fields.header
}
/// Returns the starting position, in bytes, of the header of this entry in
/// the archive.
///
/// The header is always a contiguous section of 512 bytes, so if the
/// underlying reader implements `Seek`, then the slice from `header_pos` to
/// `header_pos + 512` contains the raw header bytes.
pub fn raw_header_position(&self) -> u64 {
self.fields.header_pos
}
/// Returns the starting position, in bytes, of the file of this entry in
/// the archive.
///
/// If the file of this entry is continuous (e.g. not a sparse file), and
/// if the underlying reader implements `Seek`, then the slice from
/// `file_pos` to `file_pos + entry_size` contains the raw file bytes.
pub fn raw_file_position(&self) -> u64 {
self.fields.file_pos
}
/// Writes this file to the specified location.
///
/// This function will write the entire contents of this file into the
/// location specified by `dst`. Metadata will also be propagated to the
/// path `dst`.
///
/// This function will create a file at the path `dst`, and it is required
/// that the intermediate directories are created. Any existing file at the
/// location `dst` will be overwritten.
///
/// > **Note**: This function does not have as many sanity checks as
/// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're
/// > thinking of unpacking untrusted tarballs you may want to review the
/// > implementations of the previous two functions and perhaps implement
/// > similar logic yourself.
///
/// # Examples
///
/// ```no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio::fs::File;
/// use tokio_tar::Archive;
/// use tokio_stream::*;
///
/// let mut ar = Archive::new(File::open("foo.tar").await?);
/// let mut entries = ar.entries()?;
/// let mut i = 0;
/// while let Some(file) = entries.next().await {
/// let mut file = file?;
/// file.unpack(format!("file-{}", i)).await?;
/// i += 1;
/// }
/// #
/// # Ok(()) }) }
/// ```
pub async fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> {
self.fields.unpack(None, dst.as_ref()).await
}
/// Extracts this file under the specified path, avoiding security issues.
///
/// This function will write the entire contents of this file into the
/// location obtained by appending the path of this file in the archive to
/// `dst`, creating any intermediate directories if needed. Metadata will
/// also be propagated to the path `dst`. Any existing file at the location
/// `dst` will be overwritten.
///
/// This function carefully avoids writing outside of `dst`. If the file has
/// a '..' in its path, this function will skip it and return false.
///
/// # Examples
///
/// ```no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #
/// use tokio::{fs::File, stream::*};
/// use tokio_tar::Archive;
/// use tokio_stream::*;
///
/// let mut ar = Archive::new(File::open("foo.tar").await?);
/// let mut entries = ar.entries()?;
/// let mut i = 0;
/// while let Some(file) = entries.next().await {
/// let mut file = file.unwrap();
/// file.unpack_in("target").await?;
/// i += 1;
/// }
/// #
/// # Ok(()) }) }
/// ```
pub async fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> {
self.fields.unpack_in(dst.as_ref()).await
}
/// Indicate whether extended file attributes (xattrs on Unix) are preserved
/// when unpacking this entry.
///
/// This flag is disabled by default and is currently only implemented on
/// Unix using xattr support. This may eventually be implemented for
/// Windows, however, if other archive implementations are found which do
/// this as well.
pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
self.fields.unpack_xattrs = unpack_xattrs;
}
/// Indicate whether extended permissions (like suid on Unix) are preserved
/// when unpacking this entry.
///
/// This flag is disabled by default and is currently only implemented on
/// Unix.
pub fn set_preserve_permissions(&mut self, preserve: bool) {
self.fields.preserve_permissions = preserve;
}
/// Indicate whether access time information is preserved when unpacking
/// this entry.
///
/// This flag is enabled by default.
pub fn set_preserve_mtime(&mut self, preserve: bool) {
self.fields.preserve_mtime = preserve;
}
}
impl<R: Read + Unpin> Read for Entry<R> {
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
into: &mut io::ReadBuf<'_>,
) -> Poll<io::Result<()>> {
Pin::new(&mut self.as_mut().fields).poll_read(cx, into)
}
}
impl<R: Read + Unpin> EntryFields<R> {
pub fn from(entry: Entry<R>) -> Self {
entry.fields
}
pub fn into_entry(self) -> Entry<R> {
Entry {
fields: self,
_ignored: marker::PhantomData,
}
}
pub(crate) fn poll_read_all(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<io::Result<Vec<u8>>> {
// Preallocate some data but don't let ourselves get too crazy now.
let cap = cmp::min(self.size, 128 * 1024);
let mut buf = Vec::with_capacity(cap as usize);
// Copied from futures::ReadToEnd
match futures_core::ready!(poll_read_all_internal(self, cx, &mut buf)) {
Ok(_) => Poll::Ready(Ok(buf)),
Err(err) => Poll::Ready(Err(err)),
}
}
pub async fn read_all(&mut self) -> io::Result<Vec<u8>> {
// Preallocate some data but don't let ourselves get too crazy now.
let cap = cmp::min(self.size, 128 * 1024);
let mut v = Vec::with_capacity(cap as usize);
self.read_to_end(&mut v).await.map(|_| v)
}
fn path(&self) -> io::Result<Cow<'_, Path>> {
bytes2path(self.path_bytes())
}
fn path_bytes(&self) -> Cow<[u8]> {
match self.long_pathname {
Some(ref bytes) => {
if let Some(&0) = bytes.last() {
Cow::Borrowed(&bytes[..bytes.len() - 1])
} else {
Cow::Borrowed(bytes)
}
}
None => {
if let Some(ref pax) = self.pax_extensions {
let pax = pax_extensions(pax)
.filter_map(|f| f.ok())
.find(|f| f.key_bytes() == b"path")
.map(|f| f.value_bytes());
if let Some(field) = pax {
return Cow::Borrowed(field);
}
}
self.header.path_bytes()
}
}
}
/// Gets the path in a "lossy" way, used for error reporting ONLY.
fn path_lossy(&self) -> String {
String::from_utf8_lossy(&self.path_bytes()).to_string()
}
fn link_name(&self) -> io::Result<Option<Cow<Path>>> {
match self.link_name_bytes() {
Some(bytes) => bytes2path(bytes).map(Some),
None => Ok(None),
}
}
fn link_name_bytes(&self) -> Option<Cow<[u8]>> {
match self.long_linkname {
Some(ref bytes) => {
if let Some(&0) = bytes.last() {
Some(Cow::Borrowed(&bytes[..bytes.len() - 1]))
} else {
Some(Cow::Borrowed(bytes))
}
}
None => self.header.link_name_bytes(),
}
}
async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
if self.pax_extensions.is_none() {
if !self.header.entry_type().is_pax_global_extensions()
&& !self.header.entry_type().is_pax_local_extensions()
{
return Ok(None);
}
self.pax_extensions = Some(self.read_all().await?);
}
Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap())))
}
async fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> {
// Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
// * Leading '/'s are trimmed. For example, `///test` is treated as
// `test`.
// * If the filename contains '..', then the file is skipped when
// extracting the tarball.
// * '//' within a filename is effectively skipped. An error is
// logged, but otherwise the effect is as if any two or more
// adjacent '/'s within the filename were consolidated into one
// '/'.
//
// Most of this is handled by the `path` module of the standard
// library, but we specially handle a few cases here as well.
let mut file_dst = dst.to_path_buf();
{
let path = self.path().map_err(|e| {
TarError::new(
&format!("invalid path in entry header: {}", self.path_lossy()),
e,
)
})?;
for part in path.components() {
match part {
// Leading '/' characters, root paths, and '.'
// components are just ignored and treated as "empty
// components"
Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
// If any part of the filename is '..', then skip over
// unpacking the file to prevent directory traversal
// security issues. See, e.g.: CVE-2001-1267,
// CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
Component::ParentDir => return Ok(false),
Component::Normal(part) => file_dst.push(part),
}
}
}
// Skip cases where only slashes or '.' parts were seen, because
// this is effectively an empty filename.
if *dst == *file_dst {
return Ok(true);
}
// Skip entries without a parent (i.e. outside of FS root)
let parent = match file_dst.parent() {
Some(p) => p,
None => return Ok(false),
};
if parent.symlink_metadata().is_err() {
println!("create_dir_all {:?}", parent);
fs::create_dir_all(&parent).await.map_err(|e| {
TarError::new(&format!("failed to create `{}`", parent.display()), e)
})?;
}
let canon_target = self.validate_inside_dst(&dst, parent).await?;
self.unpack(Some(&canon_target), &file_dst)
.await
.map_err(|e| TarError::new(&format!("failed to unpack `{}`", file_dst.display()), e))?;
Ok(true)
}
/// Unpack as destination directory `dst`.
async fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> {
// If the directory already exists just let it slide
match fs::create_dir(dst).await {
Ok(()) => Ok(()),
Err(err) => {
if err.kind() == ErrorKind::AlreadyExists {
let prev = fs::metadata(dst).await;
if prev.map(|m| m.is_dir()).unwrap_or(false) {
return Ok(());
}
}
Err(Error::new(
err.kind(),
format!("{} when creating dir {}", err, dst.display()),
))
}
}
}
/// Returns access to the header of this entry in the archive.
async fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> {
let kind = self.header.entry_type();
if kind.is_dir() {
self.unpack_dir(dst).await?;
if let Ok(mode) = self.header.mode() {
set_perms(dst, None, mode, self.preserve_permissions).await?;
}
return Ok(Unpacked::Other);
} else if kind.is_hard_link() || kind.is_symlink() {
let src = match self.link_name()? {
Some(name) => name,
None => {
return Err(other(&format!(
"hard link listed for {} but no link name found",
String::from_utf8_lossy(self.header.as_bytes())
)));
}
};
if src.iter().count() == 0 {
return Err(other(&format!(
"symlink destination for {} is empty",
String::from_utf8_lossy(self.header.as_bytes())
)));
}
if kind.is_hard_link() {
let link_src = match target_base {
// If we're unpacking within a directory then ensure that
// the destination of this hard link is both present and
// inside our own directory. This is needed because we want
// to make sure to not overwrite anything outside the root.
//
// Note that this logic is only needed for hard links
// currently. With symlinks the `validate_inside_dst` which
// happens before this method as part of `unpack_in` will
// use canonicalization to ensure this guarantee. For hard
// links though they're canonicalized to their existing path
// so we need to validate at this time.
Some(ref p) => {
let link_src = p.join(src);
self.validate_inside_dst(p, &link_src).await?;
link_src
}
None => src.into_owned(),
};
fs::hard_link(&link_src, dst).await.map_err(|err| {
Error::new(
err.kind(),
format!(
"{} when hard linking {} to {}",
err,
link_src.display(),
dst.display()
),
)
})?;
} else {
symlink(&src, dst).await.map_err(|err| {
Error::new(
err.kind(),
format!(
"{} when symlinking {} to {}",
err,
src.display(),
dst.display()
),
)
})?;
};
return Ok(Unpacked::Other);
#[cfg(target_arch = "wasm32")]
#[allow(unused_variables)]
async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
}
#[cfg(windows)]
async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
tokio::fs::os::windows::symlink_file(src, dst).await
}
#[cfg(any(unix, target_os = "redox"))]
async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
tokio::fs::symlink(src, dst).await
}
} else if kind.is_pax_global_extensions()
|| kind.is_pax_local_extensions()
|| kind.is_gnu_longname()
|| kind.is_gnu_longlink()
{
return Ok(Unpacked::Other);
};
// Old BSD-tar compatibility.
// Names that have a trailing slash should be treated as a directory.
// Only applies to old headers.
if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") {
self.unpack_dir(dst).await?;
if let Ok(mode) = self.header.mode() {
set_perms(dst, None, mode, self.preserve_permissions).await?;
}
return Ok(Unpacked::Other);
}
// Note the lack of `else` clause above. According to the FreeBSD
// documentation:
//
// > A POSIX-compliant implementation must treat any unrecognized
// > typeflag value as a regular file.
//
// As a result if we don't recognize the kind we just write out the file
// as we would normally.
// Ensure we write a new file rather than overwriting in-place which
// is attackable; if an existing file is found unlink it.
async fn open(dst: &Path) -> io::Result<fs::File> {
OpenOptions::new()
.write(true)
.create_new(true)
.open(dst)
.await
}
let mut f = async {
let mut f = match open(dst).await {
Ok(f) => Ok(f),
Err(err) => {
if err.kind() != ErrorKind::AlreadyExists {
Err(err)
} else {
match fs::remove_file(dst).await {
Ok(()) => open(dst).await,
Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst).await,
Err(e) => Err(e),
}
}
}
}?;
for io in self.data.drain(..) {
match io {
EntryIo::Data(mut d) => {
let expected = d.limit();
if io::copy(&mut d, &mut f).await? != expected {
return Err(other("failed to write entire file"));
}
}
EntryIo::Pad(d) => {
// TODO: checked cast to i64
let to = SeekFrom::Current(d.limit() as i64);
let size = f.seek(to).await?;
f.set_len(size).await?;
}
}
}
Ok::<fs::File, io::Error>(f)
}
.await
.map_err(|e| {
let header = self.header.path_bytes();
TarError::new(
&format!(
"failed to unpack `{}` into `{}`",
String::from_utf8_lossy(&header),
dst.display()
),
e,
)
})?;
if self.preserve_mtime {
if let Ok(mtime) = self.header.mtime() {
let mtime = FileTime::from_unix_time(mtime as i64, 0);
filetime::set_file_times(&dst, mtime, mtime).map_err(|e| {
TarError::new(&format!("failed to set mtime for `{}`", dst.display()), e)
})?;
}
}
if let Ok(mode) = self.header.mode() {
set_perms(dst, Some(&mut f), mode, self.preserve_permissions).await?;
}
if self.unpack_xattrs {
set_xattrs(self, dst).await?;
}
return Ok(Unpacked::File(f));
async fn set_perms(
dst: &Path,
f: Option<&mut fs::File>,
mode: u32,
preserve: bool,
) -> Result<(), TarError> {
_set_perms(dst, f, mode, preserve).await.map_err(|e| {
TarError::new(
&format!(
"failed to set permissions to {:o} \
for `{}`",
mode,
dst.display()
),
e,
)
})
}
#[cfg(any(unix, target_os = "redox"))]
async fn _set_perms(
dst: &Path,
f: Option<&mut fs::File>,
mode: u32,
preserve: bool,
) -> io::Result<()> {
use std::os::unix::prelude::*;
let mode = if preserve { mode } else { mode & 0o777 };
let perm = std::fs::Permissions::from_mode(mode as _);
match f {
Some(f) => f.set_permissions(perm).await,
None => fs::set_permissions(dst, perm).await,
}
}
#[cfg(windows)]
async fn _set_perms(
dst: &Path,
f: Option<&mut fs::File>,
mode: u32,
_preserve: bool,
) -> io::Result<()> {
if mode & 0o200 == 0o200 {
return Ok(());
}
match f {
Some(f) => {
let mut perm = f.metadata().await?.permissions();
perm.set_readonly(true);
f.set_permissions(perm).await
}
None => {
let mut perm = fs::metadata(dst).await?.permissions();
perm.set_readonly(true);
fs::set_permissions(dst, perm).await
}
}
}
#[cfg(target_arch = "wasm32")]
#[allow(unused_variables)]
async fn _set_perms(
dst: &Path,
f: Option<&mut fs::File>,
mode: u32,
_preserve: bool,
) -> io::Result<()> {
Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
}
#[cfg(all(unix, feature = "xattr"))]
async fn set_xattrs<R: Read + Unpin>(
me: &mut EntryFields<R>,
dst: &Path,
) -> io::Result<()> {
use std::{ffi::OsStr, os::unix::prelude::*};
let exts = match me.pax_extensions().await {
Ok(Some(e)) => e,
_ => return Ok(()),
};
let exts = exts
.filter_map(|e| e.ok())
.filter_map(|e| {
let key = e.key_bytes();
let prefix = b"SCHILY.xattr.";
if key.starts_with(prefix) {
Some((&key[prefix.len()..], e))
} else {
None
}
})
.map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes()));
for (key, value) in exts {
xattr::set(dst, key, value).map_err(|e| {
TarError::new(
&format!(
"failed to set extended \
attributes to {}. \
Xattrs: key={:?}, value={:?}.",
dst.display(),
key,
String::from_utf8_lossy(value)
),
e,
)
})?;
}
Ok(())
}
// Windows does not completely support posix xattrs
// https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT
#[cfg(any(
windows,
target_os = "redox",
not(feature = "xattr"),
target_arch = "wasm32"
))]
async fn set_xattrs<R: Read + Unpin>(_: &mut EntryFields<R>, _: &Path) -> io::Result<()> {
Ok(())
}
}
async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> {
// Abort if target (canonical) parent is outside of `dst`
let canon_parent = file_dst.canonicalize().map_err(|err| {
Error::new(
err.kind(),
format!("{} while canonicalizing {}", err, file_dst.display()),
)
})?;
let canon_target = dst.canonicalize().map_err(|err| {
Error::new(
err.kind(),
format!("{} while canonicalizing {}", err, dst.display()),
)
})?;
if !canon_parent.starts_with(&canon_target) {
let err = TarError::new(
&format!(
"trying to unpack outside of destination path: {}",
canon_target.display()
),
// TODO: use ErrorKind::InvalidInput here? (minor breaking change)
Error::new(ErrorKind::Other, "Invalid argument"),
);
return Err(err.into());
}
Ok(canon_target)
}
}
impl<R: Read + Unpin> Read for EntryFields<R> {
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
into: &mut io::ReadBuf<'_>,
) -> Poll<io::Result<()>> {
let mut this = self.get_mut();
loop {
if this.read_state.is_none() {
if this.data.is_empty() {
this.read_state = None;
} else {
let data = &mut this.data;
this.read_state = Some(data.remove(0));
}
}
if let Some(ref mut io) = &mut this.read_state {
let ret = Pin::new(io).poll_read(cx, into);
match ret {
Poll::Ready(Ok(())) if into.filled().is_empty() => {
this.read_state = None;
if this.data.is_empty() {
return Poll::Ready(Ok(()));
}
continue;
}
Poll::Ready(Ok(())) => {
return Poll::Ready(Ok(()));
}
Poll::Ready(Err(err)) => {
return Poll::Ready(Err(err));
}
Poll::Pending => {
return Poll::Pending;
}
}
} else {
// Unable to pull another value from `data`, so we are done.
return Poll::Ready(Ok(()));
}
}
}
}
impl<R: Read + Unpin> Read for EntryIo<R> {
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
into: &mut io::ReadBuf<'_>,
) -> Poll<io::Result<()>> {
match self.get_mut() {
EntryIo::Pad(ref mut io) => Pin::new(io).poll_read(cx, into),
EntryIo::Data(ref mut io) => Pin::new(io).poll_read(cx, into),
}
}
}
struct Guard<'a> {
buf: &'a mut Vec<u8>,
len: usize,
}
impl Drop for Guard<'_> {
fn drop(&mut self) {
unsafe {
self.buf.set_len(self.len);
}
}
}
fn poll_read_all_internal<R: Read + ?Sized>(
mut rd: Pin<&mut R>,
cx: &mut Context<'_>,
buf: &mut Vec<u8>,
) -> Poll<io::Result<usize>> {
let mut g = Guard {
len: buf.len(),
buf,
};
let ret;
loop {
if g.len == g.buf.len() {
unsafe {
g.buf.reserve(32);
let capacity = g.buf.capacity();
g.buf.set_len(capacity);
let buf = &mut g.buf[g.len..];
std::ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len());
}
}
let mut read_buf = io::ReadBuf::new(&mut g.buf[g.len..]);
match futures_core::ready!(rd.as_mut().poll_read(cx, &mut read_buf)) {
Ok(()) if read_buf.filled().is_empty() => {
ret = Poll::Ready(Ok(g.len));
break;
}
Ok(()) => g.len += read_buf.filled().len(),
Err(e) => {
ret = Poll::Ready(Err(e));
break;
}
}
}
ret
}

189
src/entry_type.rs Normal file
View File

@ -0,0 +1,189 @@
// See https://en.wikipedia.org/wiki/Tar_%28computing%29#UStar_format
/// Indicate for the type of file described by a header.
///
/// Each `Header` has an `entry_type` method returning an instance of this type
/// which can be used to inspect what the header is describing.
/// A non-exhaustive enum representing the possible entry types
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
#[non_exhaustive]
pub enum EntryType {
/// Regular file
Regular,
/// Hard link
Link,
/// Symbolic link
Symlink,
/// Character device
Char,
/// Block device
Block,
/// Directory
Directory,
/// Named pipe (fifo)
Fifo,
/// Implementation-defined 'high-performance' type, treated as regular file
Continuous,
/// GNU extension - long file name
GNULongName,
/// GNU extension - long link name (link target)
GNULongLink,
/// GNU extension - sparse file
GNUSparse,
/// Global extended header
XGlobalHeader,
/// Extended Header
XHeader,
/// Unknown header,
Other(u8),
}
impl EntryType {
/// Creates a new entry type from a raw byte.
///
/// Note that the other named constructors of entry type may be more
/// appropriate to create a file type from.
pub fn new(byte: u8) -> EntryType {
match byte {
b'\x00' | b'0' => EntryType::Regular,
b'1' => EntryType::Link,
b'2' => EntryType::Symlink,
b'3' => EntryType::Char,
b'4' => EntryType::Block,
b'5' => EntryType::Directory,
b'6' => EntryType::Fifo,
b'7' => EntryType::Continuous,
b'x' => EntryType::XHeader,
b'g' => EntryType::XGlobalHeader,
b'L' => EntryType::GNULongName,
b'K' => EntryType::GNULongLink,
b'S' => EntryType::GNUSparse,
other => EntryType::Other(other),
}
}
/// Returns the raw underlying byte that this entry type represents.
pub fn as_byte(self) -> u8 {
match self {
EntryType::Regular => b'0',
EntryType::Link => b'1',
EntryType::Symlink => b'2',
EntryType::Char => b'3',
EntryType::Block => b'4',
EntryType::Directory => b'5',
EntryType::Fifo => b'6',
EntryType::Continuous => b'7',
EntryType::XHeader => b'x',
EntryType::XGlobalHeader => b'g',
EntryType::GNULongName => b'L',
EntryType::GNULongLink => b'K',
EntryType::GNUSparse => b'S',
EntryType::Other(other) => other,
}
}
/// Creates a new entry type representing a regular file.
pub fn file() -> EntryType {
EntryType::Regular
}
/// Creates a new entry type representing a hard link.
pub fn hard_link() -> EntryType {
EntryType::Link
}
/// Creates a new entry type representing a symlink.
pub fn symlink() -> EntryType {
EntryType::Symlink
}
/// Creates a new entry type representing a character special device.
pub fn character_special() -> EntryType {
EntryType::Char
}
/// Creates a new entry type representing a block special device.
pub fn block_special() -> EntryType {
EntryType::Block
}
/// Creates a new entry type representing a directory.
pub fn dir() -> EntryType {
EntryType::Directory
}
/// Creates a new entry type representing a FIFO.
pub fn fifo() -> EntryType {
EntryType::Fifo
}
/// Creates a new entry type representing a contiguous file.
pub fn contiguous() -> EntryType {
EntryType::Continuous
}
/// Returns whether this type represents a regular file.
pub fn is_file(self) -> bool {
self == EntryType::Regular
}
/// Returns whether this type represents a hard link.
pub fn is_hard_link(self) -> bool {
self == EntryType::Link
}
/// Returns whether this type represents a symlink.
pub fn is_symlink(self) -> bool {
self == EntryType::Symlink
}
/// Returns whether this type represents a character special device.
pub fn is_character_special(self) -> bool {
self == EntryType::Char
}
/// Returns whether this type represents a block special device.
pub fn is_block_special(self) -> bool {
self == EntryType::Block
}
/// Returns whether this type represents a directory.
pub fn is_dir(self) -> bool {
self == EntryType::Directory
}
/// Returns whether this type represents a FIFO.
pub fn is_fifo(self) -> bool {
self == EntryType::Fifo
}
/// Returns whether this type represents a contiguous file.
pub fn is_contiguous(self) -> bool {
self == EntryType::Continuous
}
/// Returns whether this type represents a GNU long name header.
pub fn is_gnu_longname(self) -> bool {
self == EntryType::GNULongName
}
/// Returns whether this type represents a GNU sparse header.
pub fn is_gnu_sparse(self) -> bool {
self == EntryType::GNUSparse
}
/// Returns whether this type represents a GNU long link header.
pub fn is_gnu_longlink(self) -> bool {
self == EntryType::GNULongLink
}
/// Returns whether this type represents a GNU long name header.
pub fn is_pax_global_extensions(self) -> bool {
self == EntryType::XGlobalHeader
}
/// Returns whether this type represents a GNU long link header.
pub fn is_pax_local_extensions(self) -> bool {
self == EntryType::XHeader
}
}

40
src/error.rs Normal file
View File

@ -0,0 +1,40 @@
use std::{error, fmt};
use tokio::io::{self, Error};
#[derive(Debug)]
pub struct TarError {
desc: String,
io: io::Error,
}
impl TarError {
pub fn new(desc: &str, err: Error) -> TarError {
TarError {
desc: desc.to_string(),
io: err,
}
}
}
impl error::Error for TarError {
fn description(&self) -> &str {
&self.desc
}
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
Some(&self.io)
}
}
impl fmt::Display for TarError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.desc.fmt(f)
}
}
impl From<TarError> for Error {
fn from(t: TarError) -> Error {
Error::new(t.io.kind(), t)
}
}

1620
src/header.rs Normal file

File diff suppressed because it is too large Load Diff

45
src/lib.rs Normal file
View File

@ -0,0 +1,45 @@
//! A library for reading and writing TAR archives in an async fashion.
//!
//! This library provides utilities necessary to manage [TAR archives][1]
//! abstracted over a reader or writer. Great strides are taken to ensure that
//! an archive is never required to be fully resident in memory, and all objects
//! provide largely a streaming interface to read bytes from.
//!
//! [1]: http://en.wikipedia.org/wiki/Tar_%28computing%29
// More docs about the detailed tar format can also be found here:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
// NB: some of the coding patterns and idioms here may seem a little strange.
// This is currently attempting to expose a super generic interface while
// also not forcing clients to codegen the entire crate each time they use
// it. To that end lots of work is done to ensure that concrete
// implementations are all found in this crate and the generic functions are
// all just super thin wrappers (e.g. easy to codegen).
#![deny(missing_docs)]
use std::io::{Error, ErrorKind};
pub use crate::{
archive::{Archive, ArchiveBuilder, Entries},
builder::Builder,
entry::{Entry, Unpacked},
entry_type::EntryType,
header::{
GnuExtSparseHeader, GnuHeader, GnuSparseHeader, Header, HeaderMode, OldHeader, UstarHeader,
},
pax::{PaxExtension, PaxExtensions},
};
mod archive;
mod builder;
mod entry;
mod entry_type;
mod error;
mod header;
mod pax;
fn other(msg: &str) -> Error {
Error::new(ErrorKind::Other, msg)
}

88
src/pax.rs Normal file
View File

@ -0,0 +1,88 @@
use std::{slice, str};
use tokio::io;
use crate::other;
/// An iterator over the pax extensions in an archive entry.
///
/// This iterator yields structures which can themselves be parsed into
/// key/value pairs.
pub struct PaxExtensions<'entry> {
data: slice::Split<'entry, u8, fn(&u8) -> bool>,
}
/// A key/value pair corresponding to a pax extension.
pub struct PaxExtension<'entry> {
key: &'entry [u8],
value: &'entry [u8],
}
pub fn pax_extensions(a: &[u8]) -> PaxExtensions {
PaxExtensions {
data: a.split(|a| *a == b'\n'),
}
}
impl<'entry> Iterator for PaxExtensions<'entry> {
type Item = io::Result<PaxExtension<'entry>>;
fn next(&mut self) -> Option<io::Result<PaxExtension<'entry>>> {
let line = match self.data.next() {
Some(line) if line.is_empty() => return None,
Some(line) => line,
None => return None,
};
Some(
line.iter()
.position(|b| *b == b' ')
.and_then(|i| {
str::from_utf8(&line[..i])
.ok()
.and_then(|len| len.parse::<usize>().ok().map(|j| (i + 1, j)))
})
.and_then(|(kvstart, reported_len)| {
if line.len() + 1 == reported_len {
line[kvstart..]
.iter()
.position(|b| *b == b'=')
.map(|equals| (kvstart, equals))
} else {
None
}
})
.map(|(kvstart, equals)| PaxExtension {
key: &line[kvstart..kvstart + equals],
value: &line[kvstart + equals + 1..],
})
.ok_or_else(|| other("malformed pax extension")),
)
}
}
impl<'entry> PaxExtension<'entry> {
/// Returns the key for this key/value pair parsed as a string.
///
/// May fail if the key isn't actually utf-8.
pub fn key(&self) -> Result<&'entry str, str::Utf8Error> {
str::from_utf8(self.key)
}
/// Returns the underlying raw bytes for the key of this key/value pair.
pub fn key_bytes(&self) -> &'entry [u8] {
self.key
}
/// Returns the value for this key/value pair parsed as a string.
///
/// May fail if the value isn't actually utf-8.
pub fn value(&self) -> Result<&'entry str, str::Utf8Error> {
str::from_utf8(self.value)
}
/// Returns the underlying raw bytes for this value of this key/value pair.
pub fn value_bytes(&self) -> &'entry [u8] {
self.value
}
}

1117
tests/all.rs Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
tests/archives/link.tar Normal file

Binary file not shown.

BIN
tests/archives/pax.tar Normal file

Binary file not shown.

BIN
tests/archives/pax2.tar Normal file

Binary file not shown.

Binary file not shown.

BIN
tests/archives/simple.tar Normal file

Binary file not shown.

Binary file not shown.

BIN
tests/archives/spaces.tar Normal file

Binary file not shown.

BIN
tests/archives/sparse.tar Normal file

Binary file not shown.

BIN
tests/archives/xattrs.tar Normal file

Binary file not shown.

350
tests/entry.rs Normal file
View File

@ -0,0 +1,350 @@
extern crate tokio_tar as async_tar;
extern crate tempfile;
use tokio::{fs::File, io::AsyncReadExt};
use tokio_stream::*;
use tempfile::Builder;
macro_rules! t {
($e:expr) => {
match $e {
Ok(v) => v,
Err(e) => panic!("{} returned {}", stringify!($e), e),
}
};
}
#[tokio::test]
async fn absolute_symlink() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Symlink);
t!(header.set_path("foo"));
t!(header.set_link_name("/bar"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
t!(ar.unpack(td.path()).await);
t!(td.path().join("foo").symlink_metadata());
let mut ar = async_tar::Archive::new(&bytes[..]);
let mut entries = t!(ar.entries());
let entry = t!(entries.next().await.unwrap());
assert_eq!(&*entry.link_name_bytes().unwrap(), b"/bar");
}
#[tokio::test]
async fn absolute_hardlink() {
let td = t!(Builder::new().prefix("tar").tempdir());
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Link);
t!(header.set_path("bar"));
// This absolute path under tempdir will be created at unpack time
t!(header.set_link_name(td.path().join("foo")));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
t!(ar.unpack(td.path()).await);
t!(td.path().join("foo").metadata());
t!(td.path().join("bar").metadata());
}
#[tokio::test]
async fn relative_hardlink() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Link);
t!(header.set_path("bar"));
t!(header.set_link_name("foo"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
t!(ar.unpack(td.path()).await);
t!(td.path().join("foo").metadata());
t!(td.path().join("bar").metadata());
}
#[tokio::test]
async fn absolute_link_deref_error() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Symlink);
t!(header.set_path("foo"));
t!(header.set_link_name("/"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo/bar"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
assert!(ar.unpack(td.path()).await.is_err());
t!(td.path().join("foo").symlink_metadata());
assert!(File::open(td.path().join("foo").join("bar")).await.is_err());
}
#[tokio::test]
async fn relative_link_deref_error() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Symlink);
t!(header.set_path("foo"));
t!(header.set_link_name("../../../../"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo/bar"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
assert!(ar.unpack(td.path()).await.is_err());
t!(td.path().join("foo").symlink_metadata());
assert!(File::open(td.path().join("foo").join("bar")).await.is_err());
}
#[tokio::test]
#[cfg(unix)]
async fn directory_maintains_permissions() {
use ::std::os::unix::fs::PermissionsExt;
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Directory);
t!(header.set_path("foo"));
header.set_mode(0o777);
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
t!(ar.unpack(td.path()).await);
let f = t!(File::open(td.path().join("foo")).await);
let md = t!(f.metadata().await);
assert!(md.is_dir());
assert_eq!(md.permissions().mode(), 0o40777);
}
#[tokio::test]
#[cfg(not(windows))] // dangling symlinks have weird permissions
async fn modify_link_just_created() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Symlink);
t!(header.set_path("foo"));
t!(header.set_link_name("bar"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("bar/foo"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo/bar"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
t!(ar.unpack(td.path()).await);
t!(File::open(td.path().join("bar/foo")).await);
t!(File::open(td.path().join("bar/bar")).await);
t!(File::open(td.path().join("foo/foo")).await);
t!(File::open(td.path().join("foo/bar")).await);
}
#[tokio::test]
async fn parent_paths_error() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Symlink);
t!(header.set_path("foo"));
t!(header.set_link_name(".."));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo/bar"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
assert!(ar.unpack(td.path()).await.is_err());
t!(td.path().join("foo").symlink_metadata());
assert!(File::open(td.path().join("foo").join("bar")).await.is_err());
}
#[tokio::test]
#[cfg(unix)]
async fn good_parent_paths_ok() {
use std::path::PathBuf;
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Symlink);
t!(header.set_path(PathBuf::from("foo").join("bar")));
t!(header.set_link_name(PathBuf::from("..").join("bar")));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("bar"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
t!(ar.unpack(td.path()).await);
t!(td.path().join("foo").join("bar").read_link());
let dst = t!(td.path().join("foo").join("bar").canonicalize());
t!(File::open(dst).await);
}
#[tokio::test]
async fn modify_hard_link_just_created() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Link);
t!(header.set_path("foo"));
t!(header.set_link_name("../test"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(1);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo"));
header.set_cksum();
t!(ar.append(&header, &b"x"[..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
let test = td.path().join("test");
t!(File::create(&test).await);
let dir = td.path().join("dir");
assert!(ar.unpack(&dir).await.is_err());
let mut contents = Vec::new();
t!(t!(File::open(&test).await).read_to_end(&mut contents).await);
assert_eq!(contents.len(), 0);
}
#[tokio::test]
async fn modify_symlink_just_created() {
let mut ar = async_tar::Builder::new(Vec::new());
let mut header = async_tar::Header::new_gnu();
header.set_size(0);
header.set_entry_type(async_tar::EntryType::Symlink);
t!(header.set_path("foo"));
t!(header.set_link_name("../test"));
header.set_cksum();
t!(ar.append(&header, &[][..]).await);
let mut header = async_tar::Header::new_gnu();
header.set_size(1);
header.set_entry_type(async_tar::EntryType::Regular);
t!(header.set_path("foo"));
header.set_cksum();
t!(ar.append(&header, &b"x"[..]).await);
let bytes = t!(ar.into_inner().await);
let mut ar = async_tar::Archive::new(&bytes[..]);
let td = t!(Builder::new().prefix("tar").tempdir());
let test = td.path().join("test");
t!(File::create(&test).await);
let dir = td.path().join("dir");
t!(ar.unpack(&dir).await);
let mut contents = Vec::new();
t!(t!(File::open(&test).await).read_to_end(&mut contents).await);
assert_eq!(contents.len(), 0);
}

243
tests/header/mod.rs Normal file
View File

@ -0,0 +1,243 @@
#![allow(clippy::cognitive_complexity)]
use std::{
fs::{self, File},
io::Write,
iter, mem,
path::Path,
thread, time,
};
use tempfile::Builder;
use async_tar::{GnuHeader, Header, HeaderMode};
#[test]
fn default_gnu() {
let mut h = Header::new_gnu();
assert!(h.as_gnu().is_some());
assert!(h.as_gnu_mut().is_some());
assert!(h.as_ustar().is_none());
assert!(h.as_ustar_mut().is_none());
}
#[test]
fn goto_old() {
let mut h = Header::new_old();
assert!(h.as_gnu().is_none());
assert!(h.as_gnu_mut().is_none());
assert!(h.as_ustar().is_none());
assert!(h.as_ustar_mut().is_none());
}
#[test]
fn goto_ustar() {
let mut h = Header::new_ustar();
assert!(h.as_gnu().is_none());
assert!(h.as_gnu_mut().is_none());
assert!(h.as_ustar().is_some());
assert!(h.as_ustar_mut().is_some());
}
#[test]
fn link_name() {
let mut h = Header::new_gnu();
t!(h.set_link_name("foo"));
assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo"));
t!(h.set_link_name("../foo"));
assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("../foo"));
t!(h.set_link_name("foo/bar"));
assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/bar"));
t!(h.set_link_name("foo\\ba"));
if cfg!(windows) {
assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/ba"));
} else {
assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\ba"));
}
let name = "foo\\bar\0";
for (slot, val) in h.as_old_mut().linkname.iter_mut().zip(name.as_bytes()) {
*slot = *val;
}
assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\bar"));
assert!(h.set_link_name("\0").is_err());
}
#[test]
fn mtime() {
let h = Header::new_gnu();
assert_eq!(t!(h.mtime()), 0);
let h = Header::new_ustar();
assert_eq!(t!(h.mtime()), 0);
let h = Header::new_old();
assert_eq!(t!(h.mtime()), 0);
}
#[test]
fn user_and_group_name() {
let mut h = Header::new_gnu();
t!(h.set_username("foo"));
t!(h.set_groupname("bar"));
assert_eq!(t!(h.username()), Some("foo"));
assert_eq!(t!(h.groupname()), Some("bar"));
h = Header::new_ustar();
t!(h.set_username("foo"));
t!(h.set_groupname("bar"));
assert_eq!(t!(h.username()), Some("foo"));
assert_eq!(t!(h.groupname()), Some("bar"));
h = Header::new_old();
assert_eq!(t!(h.username()), None);
assert_eq!(t!(h.groupname()), None);
assert!(h.set_username("foo").is_err());
assert!(h.set_groupname("foo").is_err());
}
#[test]
fn dev_major_minor() {
let mut h = Header::new_gnu();
t!(h.set_device_major(1));
t!(h.set_device_minor(2));
assert_eq!(t!(h.device_major()), Some(1));
assert_eq!(t!(h.device_minor()), Some(2));
h = Header::new_ustar();
t!(h.set_device_major(1));
t!(h.set_device_minor(2));
assert_eq!(t!(h.device_major()), Some(1));
assert_eq!(t!(h.device_minor()), Some(2));
h.as_ustar_mut().unwrap().dev_minor[0] = 0x7f;
h.as_ustar_mut().unwrap().dev_major[0] = 0x7f;
assert!(h.device_major().is_err());
assert!(h.device_minor().is_err());
h.as_ustar_mut().unwrap().dev_minor[0] = b'g';
h.as_ustar_mut().unwrap().dev_major[0] = b'h';
assert!(h.device_major().is_err());
assert!(h.device_minor().is_err());
h = Header::new_old();
assert_eq!(t!(h.device_major()), None);
assert_eq!(t!(h.device_minor()), None);
assert!(h.set_device_major(1).is_err());
assert!(h.set_device_minor(1).is_err());
}
#[test]
fn set_path() {
let mut h = Header::new_gnu();
t!(h.set_path("foo"));
assert_eq!(t!(h.path()).to_str(), Some("foo"));
t!(h.set_path("foo/"));
assert_eq!(t!(h.path()).to_str(), Some("foo/"));
t!(h.set_path("foo/bar"));
assert_eq!(t!(h.path()).to_str(), Some("foo/bar"));
t!(h.set_path("foo\\bar"));
if cfg!(windows) {
assert_eq!(t!(h.path()).to_str(), Some("foo/bar"));
} else {
assert_eq!(t!(h.path()).to_str(), Some("foo\\bar"));
}
let long_name = iter::repeat("foo").take(100).collect::<String>();
let medium1 = iter::repeat("foo").take(52).collect::<String>();
let medium2 = iter::repeat("fo/").take(52).collect::<String>();
assert!(h.set_path(&long_name).is_err());
assert!(h.set_path(&medium1).is_err());
assert!(h.set_path(&medium2).is_err());
assert!(h.set_path("\0").is_err());
h = Header::new_ustar();
t!(h.set_path("foo"));
assert_eq!(t!(h.path()).to_str(), Some("foo"));
assert!(h.set_path(&long_name).is_err());
assert!(h.set_path(&medium1).is_err());
t!(h.set_path(&medium2));
assert_eq!(t!(h.path()).to_str(), Some(&medium2[..]));
}
#[test]
fn set_ustar_path_hard() {
let mut h = Header::new_ustar();
let p = Path::new("a").join(&vec!["a"; 100].join(""));
t!(h.set_path(&p));
let path = t!(h.path());
let actual: &Path = path.as_ref();
assert_eq!(actual, p);
}
#[test]
fn set_metadata_deterministic() {
let td = t!(Builder::new().prefix("async-tar").tempdir());
let tmppath = td.path().join("tmpfile");
fn mk_header(path: &Path, readonly: bool) -> Header {
let mut file = t!(File::create(path));
t!(file.write_all(b"c"));
let mut perms = t!(file.metadata()).permissions();
perms.set_readonly(readonly);
t!(fs::set_permissions(path, perms));
let mut h = Header::new_ustar();
h.set_metadata_in_mode(&t!(path.metadata()), HeaderMode::Deterministic);
h
}
// Create "the same" File twice in a row, one second apart, with differing readonly values.
let one = mk_header(tmppath.as_path(), false);
thread::sleep(time::Duration::from_millis(1050));
let two = mk_header(tmppath.as_path(), true);
// Always expected to match.
assert_eq!(t!(one.size()), t!(two.size()));
assert_eq!(t!(one.path()), t!(two.path()));
assert_eq!(t!(one.mode()), t!(two.mode()));
// Would not match without `Deterministic`.
assert_eq!(t!(one.mtime()), t!(two.mtime()));
// TODO: No great way to validate that these would not be filled, but
// check them anyway.
assert_eq!(t!(one.uid()), t!(two.uid()));
assert_eq!(t!(one.gid()), t!(two.gid()));
}
#[test]
fn extended_numeric_format() {
let mut h: GnuHeader = unsafe { mem::zeroed() };
h.as_header_mut().set_size(42);
assert_eq!(h.size, [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 50, 0]);
h.as_header_mut().set_size(8_589_934_593);
assert_eq!(h.size, [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 1]);
h.size = [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 0];
assert_eq!(h.as_header().entry_size().unwrap(), 0x0002_0000_0000);
h.size = [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 51, 0];
assert_eq!(h.as_header().entry_size().unwrap(), 43);
h.as_header_mut().set_gid(42);
assert_eq!(h.gid, [48, 48, 48, 48, 48, 53, 50, 0]);
assert_eq!(h.as_header().gid().unwrap(), 42);
h.as_header_mut().set_gid(0x7fff_ffff_ffff_ffff);
assert_eq!(h.gid, [0xff; 8]);
assert_eq!(h.as_header().gid().unwrap(), 0x7fff_ffff_ffff_ffff);
h.uid = [0x80, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78];
assert_eq!(h.as_header().uid().unwrap(), 0x1234_5678);
h.mtime = [
0x80, 0, 0, 0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
];
assert_eq!(h.as_header().mtime().unwrap(), 0x0123_4567_89ab_cdef);
}
#[test]
fn byte_slice_conversion() {
let h = Header::new_gnu();
let b: &[u8] = h.as_bytes();
let b_conv: &[u8] = Header::from_byte_slice(h.as_bytes()).as_bytes();
assert_eq!(b, b_conv);
}