Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/boxlite/src/images/archive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ mod time;

#[allow(unused_imports)]
pub use tar::extract_layer_tarball_streaming;
pub use tar::verify_diff_id;
163 changes: 163 additions & 0 deletions src/boxlite/src/images/archive/tar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,87 @@ fn to_cstring(path: &Path) -> io::Result<CString> {
})
}

/// Verify a layer's DiffID by decompressing and hashing the uncompressed content.
///
/// DiffID is the SHA256 of the uncompressed tar stream, as specified in the
/// OCI image config's `rootfs.diff_ids` array. This verifies that the actual
/// filesystem content matches what the image author intended.
///
/// # Arguments
/// * `tarball_path` - Path to the compressed layer tarball
/// * `expected_diff_id` - Expected DiffID (e.g., "sha256:abc123...")
///
/// # Returns
/// `Ok(true)` if the DiffID matches, `Ok(false)` if it doesn't.
pub fn verify_diff_id(tarball_path: &Path, expected_diff_id: &str) -> BoxliteResult<bool> {
use sha2::{Digest, Sha256};

let expected_hash = expected_diff_id
.strip_prefix("sha256:")
.ok_or_else(|| BoxliteError::Storage("Invalid diff_id format, expected sha256:".into()))?;

let file = fs::File::open(tarball_path).map_err(|e| {
BoxliteError::Storage(format!(
"Failed to open layer tarball {}: {}",
tarball_path.display(),
e
))
})?;

// Detect compression format
let mut header = [0u8; 2];
{
let file_ref = &file;
file_ref
.take(2)
.read_exact(&mut header)
.map_err(|e| BoxliteError::Storage(format!("Failed to read layer header: {}", e)))?;
}

// Re-open to read from beginning
let file = fs::File::open(tarball_path).map_err(|e| {
BoxliteError::Storage(format!(
"Failed to reopen layer tarball {}: {}",
tarball_path.display(),
e
))
})?;

// Create decompressing reader (same logic as extract_layer_tarball_streaming)
let mut reader: Box<dyn Read> = if header == [0x1f, 0x8b] {
Box::new(GzDecoder::new(BufReader::new(file)))
} else {
Box::new(BufReader::new(file))
};

// Hash the entire decompressed stream
let mut hasher = Sha256::new();
let mut buffer = vec![0u8; 64 * 1024];
loop {
let n = reader.read(&mut buffer).map_err(|e| {
BoxliteError::Storage(format!("Failed to read decompressed layer: {}", e))
})?;
if n == 0 {
break;
}
hasher.update(&buffer[..n]);
}

let computed_hash = format!("{:x}", hasher.finalize());

if computed_hash != expected_hash {
tracing::error!(
"DiffID mismatch for {}:\n Expected: {}\n Computed: sha256:{}",
tarball_path.display(),
expected_diff_id,
computed_hash
);
return Ok(false);
}

Ok(true)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1607,4 +1688,86 @@ mod tests {
let target = std::fs::read_link(&link_path).unwrap();
assert_eq!(target, PathBuf::from("target.txt"));
}

// ========================================================================
// DiffID Verification Tests
// ========================================================================

#[test]
fn test_verify_diff_id_correct_hash() {
use sha2::Digest;

let temp_dir = tempfile::tempdir().unwrap();

// Create an uncompressed tar with known content
let entries = vec![TestEntry {
path: "hello.txt".to_string(),
entry_type: TestEntryType::File {
content: b"hello".to_vec(),
},
}];
let tar_data = create_test_tar(entries);

// Compute expected DiffID (hash of uncompressed tar)
let expected_diff_id = format!("sha256:{:x}", sha2::Sha256::digest(&tar_data));

// Gzip-compress and write to file
let mut gz = GzEncoder::new(Vec::new(), Compression::default());
gz.write_all(&tar_data).unwrap();
let gzipped = gz.finish().unwrap();

let tarball_path = temp_dir.path().join("layer.tar.gz");
std::fs::write(&tarball_path, &gzipped).unwrap();

assert!(verify_diff_id(&tarball_path, &expected_diff_id).unwrap());
}

#[test]
fn test_verify_diff_id_wrong_hash() {
let temp_dir = tempfile::tempdir().unwrap();

let entries = vec![TestEntry {
path: "hello.txt".to_string(),
entry_type: TestEntryType::File {
content: b"hello".to_vec(),
},
}];
let tar_data = create_test_tar(entries);

let mut gz = GzEncoder::new(Vec::new(), Compression::default());
gz.write_all(&tar_data).unwrap();
let gzipped = gz.finish().unwrap();

let tarball_path = temp_dir.path().join("layer.tar.gz");
std::fs::write(&tarball_path, &gzipped).unwrap();

// Use a wrong diff_id
let wrong_diff_id =
"sha256:0000000000000000000000000000000000000000000000000000000000000000";
assert!(!verify_diff_id(&tarball_path, wrong_diff_id).unwrap());
}

#[test]
fn test_verify_diff_id_uncompressed_tarball() {
use sha2::Digest;

let temp_dir = tempfile::tempdir().unwrap();

let entries = vec![TestEntry {
path: "test.txt".to_string(),
entry_type: TestEntryType::File {
content: b"uncompressed test".to_vec(),
},
}];
let tar_data = create_test_tar(entries);

// DiffID of uncompressed tar = hash of the tar itself (no compression layer)
let expected_diff_id = format!("sha256:{:x}", sha2::Sha256::digest(&tar_data));

// Write uncompressed tar directly
let tarball_path = temp_dir.path().join("layer.tar");
std::fs::write(&tarball_path, &tar_data).unwrap();

assert!(verify_diff_id(&tarball_path, &expected_diff_id).unwrap());
}
}
6 changes: 6 additions & 0 deletions src/boxlite/src/images/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,18 @@ pub(super) struct ImageManifest {
pub(super) manifest_digest: String,
pub(super) layers: Vec<LayerInfo>,
pub(super) config_digest: String,
/// DiffIDs from image config's `rootfs.diff_ids` (SHA256 of uncompressed layers).
/// Empty if not available (e.g., config not yet downloaded, or empty in config).
pub(super) diff_ids: Vec<String>,
}

#[derive(Debug, Clone)]
pub(super) struct LayerInfo {
pub(super) digest: String,
pub(super) media_type: String,
/// Expected size from manifest descriptor (bytes).
/// Values <= 0 mean "unknown" and skip size validation.
pub(super) size: i64,
}

// ============================================================================
Expand Down
53 changes: 52 additions & 1 deletion src/boxlite/src/images/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,58 @@ impl ImageObject {
.map(|l| l.digest.clone())
.collect();

self.blob_source.extract_layers(&digests).await
let extracted = self.blob_source.extract_layers(&digests).await?;

// Verify DiffIDs if available
self.verify_diff_ids()?;

Ok(extracted)
}

/// Verify layer DiffIDs against the image config's rootfs.diff_ids.
///
/// DiffIDs are SHA256 hashes of the uncompressed layer tar content.
/// This ensures the decompressed filesystem content matches what the
/// image author intended.
fn verify_diff_ids(&self) -> BoxliteResult<()> {
use crate::images::archive::verify_diff_id;

let diff_ids = &self.manifest.diff_ids;
if diff_ids.is_empty() {
return Ok(());
}

let layers = &self.manifest.layers;
if diff_ids.len() != layers.len() {
tracing::warn!(
"DiffID count ({}) doesn't match layer count ({}), skipping verification",
diff_ids.len(),
layers.len()
);
return Ok(());
}

for (i, (layer, diff_id)) in layers.iter().zip(diff_ids.iter()).enumerate() {
let tarball_path = self.blob_source.layer_tarball_path(&layer.digest);
match verify_diff_id(&tarball_path, diff_id) {
Ok(true) => {
tracing::debug!("DiffID verified for layer {}: {}", i, layer.digest);
}
Ok(false) => {
return Err(BoxliteError::Image(format!(
"DiffID verification failed for layer {} ({}): \
uncompressed content does not match expected diff_id {}",
i, layer.digest, diff_id
)));
}
Err(e) => {
tracing::warn!("DiffID verification error for layer {}: {}", i, e);
// Don't fail the pull on verification errors (e.g., unsupported format)
}
}
}

Ok(())
}

/// Compute a stable digest for this image based on its layers.
Expand Down
Loading
Loading