Skip to content

Commit c389c2e

Browse files
feat: add bulk dataset support with environment variables (#16)
1 parent 3a94be6 commit c389c2e

File tree

6 files changed

+528
-235
lines changed

6 files changed

+528
-235
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pre-compute/src/compute.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod app_runner;
2+
pub mod dataset;
23
pub mod errors;
34
pub mod pre_compute_app;
45
pub mod pre_compute_args;

pre-compute/src/compute/dataset.rs

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
use crate::compute::errors::ReplicateStatusCause;
2+
use crate::compute::utils::file_utils::download_from_url;
3+
use crate::compute::utils::hash_utils::sha256_from_bytes;
4+
use aes::Aes256;
5+
use base64::{Engine as _, engine::general_purpose};
6+
use cbc::{
7+
Decryptor,
8+
cipher::{BlockDecryptMut, KeyIvInit, block_padding::Pkcs7},
9+
};
10+
use log::{error, info};
11+
use multiaddr::Multiaddr;
12+
use std::str::FromStr;
13+
14+
type Aes256CbcDec = Decryptor<Aes256>;
15+
const IPFS_GATEWAYS: &[&str] = &[
16+
"https://ipfs-gateway.v8-bellecour.iex.ec",
17+
"https://gateway.ipfs.io",
18+
"https://gateway.pinata.cloud",
19+
];
20+
const AES_KEY_LENGTH: usize = 32;
21+
const AES_IV_LENGTH: usize = 16;
22+
23+
/// Represents a dataset in a Trusted Execution Environment (TEE).
24+
///
25+
/// This structure contains all the information needed to download, verify, and decrypt
26+
/// a single dataset.
27+
#[cfg_attr(test, derive(Debug))]
28+
#[derive(Clone, Default)]
29+
pub struct Dataset {
30+
pub url: String,
31+
pub checksum: String,
32+
pub filename: String,
33+
pub key: String,
34+
}
35+
36+
impl Dataset {
37+
pub fn new(url: String, checksum: String, filename: String, key: String) -> Self {
38+
Dataset {
39+
url,
40+
checksum,
41+
filename,
42+
key,
43+
}
44+
}
45+
46+
/// Downloads the encrypted dataset file from a URL or IPFS multi-address, and verifies its checksum.
47+
///
48+
/// # Arguments
49+
///
50+
/// * `chain_task_id` - The chain task ID for logging
51+
///
52+
/// # Returns
53+
///
54+
/// * `Ok(Vec<u8>)` containing the dataset's encrypted content if download and verification succeed.
55+
/// * `Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)` if the download fails.
56+
/// * `Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum)` if checksum validation fails.
57+
pub fn download_encrypted_dataset(
58+
&self,
59+
chain_task_id: &str,
60+
) -> Result<Vec<u8>, ReplicateStatusCause> {
61+
info!(
62+
"Downloading encrypted dataset file [chainTaskId:{chain_task_id}, url:{}]",
63+
self.url
64+
);
65+
66+
let encrypted_content = if is_multi_address(&self.url) {
67+
IPFS_GATEWAYS.iter().find_map(|gateway| {
68+
let full_url = format!("{gateway}{}", self.url);
69+
info!("Attempting to download dataset from {full_url}");
70+
71+
if let Some(content) = download_from_url(&full_url) {
72+
info!("Successfully downloaded from {full_url}");
73+
Some(content)
74+
} else {
75+
error!("Failed to download from {full_url}");
76+
None
77+
}
78+
})
79+
} else {
80+
download_from_url(&self.url)
81+
}
82+
.ok_or(ReplicateStatusCause::PreComputeDatasetDownloadFailed)?;
83+
84+
info!("Checking encrypted dataset checksum [chainTaskId:{chain_task_id}]");
85+
let actual_checksum = sha256_from_bytes(&encrypted_content);
86+
87+
if actual_checksum != self.checksum {
88+
error!(
89+
"Invalid dataset checksum [chainTaskId:{chain_task_id}, expected:{}, actual:{actual_checksum}]",
90+
self.checksum
91+
);
92+
return Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
93+
}
94+
95+
info!("Dataset downloaded and verified successfully.");
96+
Ok(encrypted_content)
97+
}
98+
99+
/// Decrypts the provided encrypted dataset bytes using AES-CBC.
100+
///
101+
/// The first 16 bytes of `encrypted_content` are treated as the IV.
102+
/// The rest is the ciphertext. The decryption key is decoded from a Base64 string.
103+
///
104+
/// # Arguments
105+
///
106+
/// * `encrypted_content` - Full encrypted dataset, including the IV prefix.
107+
///
108+
/// # Returns
109+
///
110+
/// * `Ok(Vec<u8>)` containing the plaintext dataset if decryption succeeds.
111+
/// * `Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)` if the key is missing, decoding fails, or decryption fails.
112+
pub fn decrypt_dataset(
113+
&self,
114+
encrypted_content: &[u8],
115+
) -> Result<Vec<u8>, ReplicateStatusCause> {
116+
let key = general_purpose::STANDARD
117+
.decode(&self.key)
118+
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)?;
119+
120+
if encrypted_content.len() < AES_IV_LENGTH || key.len() != AES_KEY_LENGTH {
121+
return Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed);
122+
}
123+
124+
let key_slice = &key[..AES_KEY_LENGTH];
125+
let iv_slice = &encrypted_content[..AES_IV_LENGTH];
126+
let ciphertext = &encrypted_content[AES_IV_LENGTH..];
127+
128+
Aes256CbcDec::new(key_slice.into(), iv_slice.into())
129+
.decrypt_padded_vec_mut::<Pkcs7>(ciphertext)
130+
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
131+
}
132+
}
133+
134+
fn is_multi_address(uri: &str) -> bool {
135+
!uri.trim().is_empty() && Multiaddr::from_str(uri).is_ok()
136+
}
137+
138+
#[cfg(test)]
139+
mod tests {
140+
use super::*;
141+
142+
const CHAIN_TASK_ID: &str = "0x123456789abcdef";
143+
const DATASET_CHECKSUM: &str =
144+
"0x02a12ef127dcfbdb294a090c8f0b69a0ca30b7940fc36cabf971f488efd374d7";
145+
const ENCRYPTED_DATASET_KEY: &str = "ubA6H9emVPJT91/flYAmnKHC0phSV3cfuqsLxQfgow0=";
146+
const HTTP_DATASET_URL: &str = "https://raw.githubusercontent.com/iExecBlockchainComputing/tee-worker-pre-compute-rust/main/src/tests_resources/encrypted-data.bin";
147+
const PLAIN_DATA_FILE: &str = "plain-data.txt";
148+
const IPFS_DATASET_URL: &str = "/ipfs/QmUVhChbLFiuzNK1g2GsWyWEiad7SXPqARnWzGumgziwEp";
149+
150+
fn get_test_dataset() -> Dataset {
151+
Dataset::new(
152+
HTTP_DATASET_URL.to_string(),
153+
DATASET_CHECKSUM.to_string(),
154+
PLAIN_DATA_FILE.to_string(),
155+
ENCRYPTED_DATASET_KEY.to_string(),
156+
)
157+
}
158+
159+
// region download_encrypted_dataset
160+
#[test]
161+
fn download_encrypted_dataset_success() {
162+
let dataset = get_test_dataset();
163+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
164+
assert!(actual_content.is_ok());
165+
}
166+
167+
#[test]
168+
fn download_encrypted_dataset_failure_with_invalid_dataset_url() {
169+
let mut dataset = get_test_dataset();
170+
dataset.url = "http://bad-url".to_string();
171+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
172+
assert_eq!(
173+
actual_content,
174+
Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)
175+
);
176+
}
177+
178+
#[test]
179+
fn download_encrypted_dataset_success_with_valid_iexec_gateway() {
180+
let mut dataset = get_test_dataset();
181+
dataset.url = IPFS_DATASET_URL.to_string();
182+
dataset.checksum =
183+
"0x323b1637c7999942fbebfe5d42fe15dbfe93737577663afa0181938d7ad4a2ac".to_string();
184+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
185+
let expected_content = Ok("hello world !\n".as_bytes().to_vec());
186+
assert_eq!(actual_content, expected_content);
187+
}
188+
189+
#[test]
190+
fn download_encrypted_dataset_failure_with_invalid_gateway() {
191+
let mut dataset = get_test_dataset();
192+
dataset.url = "/ipfs/INVALID_IPFS_DATASET_URL".to_string();
193+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
194+
let expected_content = Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed);
195+
assert_eq!(actual_content, expected_content);
196+
}
197+
198+
#[test]
199+
fn download_encrypted_dataset_failure_with_invalid_dataset_checksum() {
200+
let mut dataset = get_test_dataset();
201+
dataset.checksum = "invalid_dataset_checksum".to_string();
202+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
203+
let expected_content = Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
204+
assert_eq!(actual_content, expected_content);
205+
}
206+
// endregion
207+
208+
// region decrypt_dataset
209+
#[test]
210+
fn decrypt_dataset_success_with_valid_dataset() {
211+
let dataset = get_test_dataset();
212+
213+
let encrypted_data = dataset.download_encrypted_dataset(CHAIN_TASK_ID).unwrap();
214+
let expected_plain_data = Ok("Some very useful data.".as_bytes().to_vec());
215+
let actual_plain_data = dataset.decrypt_dataset(&encrypted_data);
216+
217+
assert_eq!(actual_plain_data, expected_plain_data);
218+
}
219+
220+
#[test]
221+
fn decrypt_dataset_failure_with_bad_key() {
222+
let mut dataset = get_test_dataset();
223+
dataset.key = "bad_key".to_string();
224+
let encrypted_data = dataset.download_encrypted_dataset(CHAIN_TASK_ID).unwrap();
225+
let actual_plain_data = dataset.decrypt_dataset(&encrypted_data);
226+
227+
assert_eq!(
228+
actual_plain_data,
229+
Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
230+
);
231+
}
232+
// endregion
233+
}

0 commit comments

Comments
 (0)