Skip to content

feat: implement Entry.asBytes() method for single file extraction #89

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,38 +67,52 @@ jobs:
build: TARGET_CC=clang yarn build --target x86_64-unknown-linux-gnu --use-napi-cross
- host: ubuntu-latest
target: x86_64-unknown-linux-musl
build: yarn build --target x86_64-unknown-linux-musl -x
build: |
yarn build --target x86_64-unknown-linux-musl -x
rm tar.linux-x64-gnu.node
- host: macos-latest
target: aarch64-apple-darwin
build: yarn build --target aarch64-apple-darwin
- host: ubuntu-latest
target: aarch64-unknown-linux-gnu
build: TARGET_CC=clang yarn build --target aarch64-unknown-linux-gnu --use-napi-cross
build: |
TARGET_CC=clang yarn build --target aarch64-unknown-linux-gnu --use-napi-cross
rm tar.linux-x64-gnu.node
- host: ubuntu-latest
target: armv7-unknown-linux-gnueabihf
build: TARGET_CC=clang yarn build --target armv7-unknown-linux-gnueabihf --use-napi-cross
build: |
TARGET_CC=clang yarn build --target armv7-unknown-linux-gnueabihf --use-napi-cross
rm tar.linux-x64-gnu.node
- host: ubuntu-latest
target: aarch64-linux-android
build: yarn build --target aarch64-linux-android
build: |
yarn build --target aarch64-linux-android
rm tar.linux-x64-gnu.node
- host: ubuntu-latest
target: armv7-linux-androideabi
build: yarn build --target armv7-linux-androideabi
build: |
yarn build --target armv7-linux-androideabi
rm tar.linux-x64-gnu.node
- host: ubuntu-latest
target: aarch64-unknown-linux-musl
build: yarn build --target aarch64-unknown-linux-musl -x
build: |
yarn build --target aarch64-unknown-linux-musl -x
rm tar.linux-x64-gnu.node
- host: ubuntu-latest
target: powerpc64le-unknown-linux-gnu
build: |
export CC=clang
export TARGET_CC=clang
yarn build --target powerpc64le-unknown-linux-gnu --use-napi-cross
rm tar.linux-x64-gnu.node
- host: ubuntu-latest
target: s390x-unknown-linux-gnu
build: |
export CC=clang
export TARGET_CC=clang
export CFLAGS="-fuse-ld=lld"
yarn build --target s390x-unknown-linux-gnu --use-napi-cross
rm tar.linux-x64-gnu.node
- host: windows-latest
target: aarch64-pc-windows-msvc
build: yarn build --target aarch64-pc-windows-msvc
Expand All @@ -109,6 +123,7 @@ jobs:
tar -xvf wasi-sdk-27.0-x86_64-linux.tar.gz
export WASI_SDK_PATH="$(pwd)/wasi-sdk-27.0-x86_64-linux"
yarn build --target wasm32-wasip1-threads
rm tar.linux-x64-gnu.node
name: stable - ${{ matrix.settings.target }} - node@22
runs-on: ${{ matrix.settings.host }}
steps:
Expand Down
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export class Entries {
}
export class Entry {
path(): string | null
asBytes(): Buffer
}
export class Archive {
/** Create a new archive with the underlying path. */
Expand All @@ -34,6 +35,32 @@ export class Archive {
}
```

## Extract Single File

You can extract a specific file from a tar archive without extracting the entire archive. This is useful for inspecting Docker OCI images or extracting specific configuration files:

```ts
import { Archive } from '@napi-rs/tar'

// Extract a single file (similar to: tar -x -O -f archive.tar filename)
function extractFile(archivePath: string, targetPath: string): Buffer | null {
const archive = new Archive(archivePath)
for (const entry of archive.entries()) {
if (entry.path() === targetPath) {
return entry.asBytes()
}
}
return null
}

// Usage example
const indexContent = extractFile('./docker-image.tar', 'index.json')
if (indexContent) {
const manifest = JSON.parse(indexContent.toString('utf-8'))
console.log(manifest)
}
```

## Install this test package

```
Expand Down
123 changes: 122 additions & 1 deletion __test__/index.spec.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { readFile } from 'node:fs/promises'
import { join } from 'node:path'
import { fileURLToPath } from 'node:url'

import test from 'ava'

import { Archive } from '../index'
import { fileURLToPath } from 'node:url'

const __dirname = join(fileURLToPath(import.meta.url), '..')

Expand All @@ -16,6 +16,10 @@ test('should be able to read archive', (t) => {
})

test('should be able to unpack archive', (t) => {
if (process.env.NAPI_RS_FORCE_WASI) {
t.pass('Skipping unpack test on WASI')
return
}
const archive = new Archive(join(__dirname, 'src.tar'))
archive.unpack(__dirname)
t.pass()
Expand Down Expand Up @@ -49,3 +53,120 @@ test('should be able to handle tar.xz', (t) => {
t.is(typeof entry.path(), 'string')
}
})

test('should be able to extract single file with asBytes', (t) => {
const archive = new Archive(join(__dirname, 'src.tar'))
for (const entry of archive.entries()) {
const path = entry.path()
if (path === 'src/lib.rs') {
const content = entry.asBytes()
t.true(content instanceof Buffer, 'asBytes should return a Buffer')
t.true(content.length > 0, 'Content should not be empty')

// The content should be valid Rust code, so let's check for some expected content
const contentStr = content.toString('utf-8')
t.true(contentStr.includes('use'), 'Should contain Rust use statements')
t.true(contentStr.includes('napi'), 'Should contain napi imports')
return
}
}
t.fail('Could not find src/lib.rs in the archive')
})

test('should be able to extract multiple files with asBytes', (t) => {
const archive = new Archive(join(__dirname, 'src.tar'))
const extractedFiles = new Map<string, Buffer>()

for (const entry of archive.entries()) {
const path = entry.path()
if (path && path.endsWith('.rs')) {
const content = entry.asBytes()
extractedFiles.set(path, content)
}
}

t.true(extractedFiles.size >= 2, 'Should extract at least 2 .rs files')
t.true(extractedFiles.has('src/lib.rs'), 'Should have extracted src/lib.rs')
t.true(extractedFiles.has('src/entry.rs'), 'Should have extracted src/entry.rs')

// Verify all extracted content is non-empty and valid
for (const [path, content] of extractedFiles) {
t.true(content instanceof Buffer, `Content of ${path} should be a Buffer`)
t.true(content.length > 0, `Content of ${path} should not be empty`)
t.true(content.toString('utf-8').includes('use'), `${path} should contain Rust use statements`)
}
})

test('should work with asBytes on compressed archives', async (t) => {
const formats = ['src.tar.gz', 'src.tar.bz2', 'src.tar.xz']

for (const format of formats) {
const archive = new Archive(join(__dirname, format))
let foundFile = false

for (const entry of archive.entries()) {
const path = entry.path()
if (path === 'src/lib.rs') {
const content = entry.asBytes()
t.true(content instanceof Buffer, `asBytes should return Buffer for ${format}`)
t.true(content.length > 0, `Content should not be empty for ${format}`)
foundFile = true
break
}
}

t.true(foundFile, `Should find src/lib.rs in ${format}`)
}
})

test('should work with asBytes from buffer-based archive', async (t) => {
const archiveBuffer = await readFile(join(__dirname, 'src.tar'))
const archive = new Archive(archiveBuffer)

for (const entry of archive.entries()) {
const path = entry.path()
if (path === 'src/lib.rs') {
const content = entry.asBytes()
t.true(content instanceof Buffer, 'asBytes should return a Buffer')
t.true(content.length > 0, 'Content should not be empty')

const contentStr = content.toString('utf-8')
t.true(contentStr.includes('napi'), 'Should contain napi imports')
return
}
}
t.fail('Could not find src/lib.rs in buffer-based archive')
})

test('Docker OCI use case - extract specific file like index.json', (t) => {
// This test demonstrates the exact use case mentioned in issue #58
// where you want to extract a specific file from a tarball (like Docker OCI images)

// Function to extract a specific file by name, similar to: tar -x -O -f something.tar index.json
function extractFile(archivePath: string, targetPath: string): Buffer | null {
const archive = new Archive(archivePath)
for (const entry of archive.entries()) {
const path = entry.path()
if (path === targetPath) {
return entry.asBytes()
}
}
return null
}

const archivePath = join(__dirname, 'src.tar')

// Extract src/lib.rs (simulating extracting index.json from a Docker image)
const libRsContent = extractFile(archivePath, 'src/lib.rs')
t.not(libRsContent, null, 'Should be able to extract src/lib.rs')
t.true(libRsContent instanceof Buffer, 'Extracted content should be a Buffer')
t.true(libRsContent!.length > 0, 'Extracted content should not be empty')

// Verify the content is correct
const contentStr = libRsContent!.toString('utf-8')
t.true(contentStr.includes('#![deny(clippy::all)]'), 'Should contain expected Rust code')

// Try to extract a non-existent file
const nonExistentContent = extractFile(archivePath, 'non-existent.json')
t.is(nonExistentContent, null, 'Should return null for non-existent files')
})
10 changes: 10 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@ export declare class Entry {
*/
path(): string | null
header(): ReadonlyHeader
/**
* Read the entirety of this entry into a byte vector.
*
* This is equivalent to the functionality provided by `tar -x -O -f archive.tar filename`
* which extracts a single file and outputs its contents to stdout.
*
* This method will read the entire contents of this entry into memory.
* For large files, consider using streaming methods if memory usage is a concern.
*/
asBytes(): Buffer
}

export declare class Header {
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
"devDependencies": {
"@napi-rs/cli": "^3.1.3",
"@napi-rs/lzma": "^1.4.4",
"@napi-rs/wasm-runtime": "^1.0.3",
"@oxc-node/core": "^0.0.32",
"@taplo/cli": "^0.7.0",
"@types/node": "^24.2.1",
Expand Down
16 changes: 16 additions & 0 deletions src/entry.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::io::Read;

use napi::{
bindgen_prelude::{Env, Reference, SharedReference},
iterator::Generator,
Expand Down Expand Up @@ -58,4 +60,18 @@ impl Entry {
this.share_with(env, |e| Ok(e.inner.header()))?,
))
}

#[napi]
/// Read the entirety of this entry into a byte vector.
///
/// This is equivalent to the functionality provided by `tar -x -O -f archive.tar filename`
/// which extracts a single file and outputs its contents to stdout.
///
/// This method will read the entire contents of this entry into memory.
/// For large files, consider using streaming methods if memory usage is a concern.
pub fn as_bytes(&mut self) -> napi::Result<napi::bindgen_prelude::Buffer> {
let mut data = Vec::new();
self.inner.read_to_end(&mut data)?;
Ok(data.into())
}
}
1 change: 1 addition & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ __metadata:
dependencies:
"@napi-rs/cli": "npm:^3.1.3"
"@napi-rs/lzma": "npm:^1.4.4"
"@napi-rs/wasm-runtime": "npm:^1.0.3"
"@oxc-node/core": "npm:^0.0.32"
"@taplo/cli": "npm:^0.7.0"
"@types/node": "npm:^24.2.1"
Expand Down
Loading