Skip to content

Commit

Permalink
feat: snapshot tests during CI (#137)
Browse files Browse the repository at this point in the history
* feat: make snapshot tests capable of running in parallel.

  This is the default behaviour when running 'cargo test', and it's been slightly annoying to have to run them one by one every time. Now, they happily all run at once by sharing and reusing connections to the on-disk snapshots.

Signed-off-by: KtorZ <[email protected]>

* feat: introduce a basic Makefile

Signed-off-by: jeluard <[email protected]>

* feat: make sure snapshot tests are executed during CI

Signed-off-by: jeluard <[email protected]>

* feat: use local cardano-node in CI snapshot tests.

Signed-off-by: KtorZ <[email protected]>

* chore: use actions/cache@v4, same as network sync workflow.

Signed-off-by: KtorZ <[email protected]>

* fix: make sure the cache path also match for re-use between workflows.

Signed-off-by: KtorZ <[email protected]>

* fix: fix cache-hit strategy for cardano-node-db.

  Interstingly enough, a 'cache-hit' value of 'false' does not indicate
  a miss. It indicates a hit on a restore key (and not a _direct_ hit).

  A miss is denoted by an empty string. :|

Signed-off-by: KtorZ <[email protected]>

* chore: use same host for Haskell cardano node container.

Signed-off-by: KtorZ <[email protected]>

* chore: build Amaru in a separate step

Signed-off-by: KtorZ <[email protected]>

* fix: add missing topology config file to Haskell's node configuration.

Signed-off-by: KtorZ <[email protected]>

* fix: wrong PID access

Signed-off-by: jeluard <[email protected]>

* fix: fix amaru e2e watch script

Signed-off-by: KtorZ <[email protected]>

* chore: update epoch used as stop gap

Signed-off-by: jeluard <[email protected]>

* test

Signed-off-by: jeluard <[email protected]>

* tmp: comment out build part to allow build to finish and cash to be pushed.

Signed-off-by: KtorZ <[email protected]>

* chore: only restore cardano-db cache, to save time and avoid weird cache conflict

Signed-off-by: KtorZ <[email protected]>

* tmp

Signed-off-by: KtorZ <[email protected]>

* fix: fix ledger db snapshots paths + add version key.

Signed-off-by: KtorZ <[email protected]>

* remove intermediate log lines.

Signed-off-by: KtorZ <[email protected]>

* send INT signal, not TERM, to ensure a clean stop.

Signed-off-by: KtorZ <[email protected]>

* .

Signed-off-by: KtorZ <[email protected]>

* fix: end sync right after snapshot exit.

Signed-off-by: KtorZ <[email protected]>

* split ledge cache restore/save

Signed-off-by: KtorZ <[email protected]>

* prune node db.

Signed-off-by: KtorZ <[email protected]>

---------

Signed-off-by: KtorZ <[email protected]>
Signed-off-by: jeluard <[email protected]>
Co-authored-by: KtorZ <[email protected]>
  • Loading branch information
jeluard and KtorZ authored Mar 7, 2025
1 parent 59ae386 commit 543e03e
Show file tree
Hide file tree
Showing 5 changed files with 247 additions and 36 deletions.
115 changes: 115 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,121 @@ jobs:
fi
shell: bash

snapshots:
name: Snapshots
runs-on: ubuntu-latest
if: ${{ !github.event.pull_request.draft }}
env:
AMARU_PEER_ADDRESS: 127.0.0.1:3001
strategy:
matrix:
network: [ preprod ]
cardano_node_version: [ 10.1.4 ]
steps:
- uses: actions/checkout@v4

- name: Restore cardano-node DB
id: cache-cardano-node-db
uses: actions/cache/restore@v4
with:
# The path should match the one used for the 'Nightly Sync' workflow.
path: ${{ runner.temp }}/db-${{ matrix.network }}
# The key should also match
key: cardano-node-ogmios-${{ matrix.network }}
restore-keys: |
cardano-node-ogmios-${{ matrix.network }}
- name: Check if cardano-node-db is available
if: steps.cache-cardano-node-db.outputs.cache-hit == ''
run: |
echo "Haskell node db not available, aborting job."
exit 1
- name: Spawn Haskell Node
id: spawn-cardano-node
shell: bash
run: |
docker pull ghcr.io/intersectmbo/cardano-node:${{ matrix.cardano_node_version }}
make HASKELL_NODE_CONFIG_DIR=cardano-node-config NETWORK=${{ matrix.network }} download-haskell-config
docker run -d --name cardano-node \
-v ${{ runner.temp }}/db-${{ matrix.network }}:/db \
-v ${{ runner.temp }}/ipc:/ipc \
-v ./cardano-node-config:/config \
-v ./cardano-node-config:/genesis \
-p 3001:3001 \
ghcr.io/intersectmbo/cardano-node:${{ matrix.cardano_node_version }} run \
--config /config/config.json \
--database-path /db \
--socket-path /ipc/node.socket \
--topology /config/topology.json
- uses: Swatinem/rust-cache@v2

- name: Build Amaru
run: |
cargo build --release --locked
- name: Cache Amaru's ledger.db
id: cache-ledger-db
uses: actions/cache/restore@v4
with:
path: ./ledger.db
# If the ledger store serialisation format changes and become
# incompatible, it is necessary to bump the index below to invalidate
# the cached ledger snapshots, and recompute them from the CBOR ones
# (i.e. Full bootstrap below)
key: ${{ runner.OS }}-ledger-cache-v1
restore-keys: |
${{ runner.OS }}-ledger-cache-v1
- name: Full bootstrap amaru
if: steps.cache-ledger-db.outputs.cache-hit != 'true'
run: |
make bootstrap
- if: steps.cache-ledger-db.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: ./ledger.db
key: ${{ runner.OS }}-ledger-cache-v1

- name: Light bootstrap amaru
if: steps.cache-ledger-db.outputs.cache-hit == 'true'
run: |
make import-headers
make import-nonces
- name: Run node
timeout-minutes: 30
shell: bash
run: |
set -eo pipefail
# Make sure amaru runs long enough so that snapshot tests can be executed
AMARU_TRACE="amaru=debug" cargo run --release -- --with-json-traces daemon --peer-address=$AMARU_PEER_ADDRESS --network=${{ matrix.network }} | while read line; do
TARGET_EPOCH=173
EVENT=$(echo $line | jq -r '.fields.message' 2>/dev/null)
SPAN=$(echo $line | jq -r '.spans[0].name' 2>/dev/null)
if [ "$EVENT" == "exit" ] && [ "$SPAN" == "snapshot" ]; then
EPOCH=$(echo $line | jq -r '.spans[0].epoch' 2>/dev/null)
if [ "$EPOCH" == "$TARGET_EPOCH" ]; then
echo "Target epoch reached, stopping the process."
pkill -INT -P $$
break
fi
fi
done
- name: Run tests
run: cargo test --release --locked -p amaru -- --ignored

- name: Teardown haskell node
shell: bash
run: |
docker stop cardano-node
docker rm cardano-node
examples:
name: Examples
strategy:
Expand Down
17 changes: 10 additions & 7 deletions .github/workflows/NightlySynchronization.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,25 @@ jobs:

- uses: actions/checkout@v4

- id: date-time
shell: bash
run: |
echo "timestamp=$(/bin/date -u '+%Y%m%d-%H%M%S')" >> $GITHUB_OUTPUT
- id: cache
uses: actions/cache@v4
with:
path: ${{ runner.temp }}/db-${{ matrix.network }}
key: cardano-node-ogmios-${{ matrix.network }}-${{ steps.date-time.outputs.timestamp }}
key: cardano-node-ogmios-${{ matrix.network }}
restore-keys: |
cardano-node-ogmios-${{ matrix.network }}-
cardano-node-ogmios-${{ matrix.network }}
- uses: CardanoSolutions/[email protected]
with:
db-dir: ${{ runner.temp }}/db-${{ matrix.network }}
network: ${{ matrix.network }}
version: ${{ matrix.ogmios_version }}_${{ matrix.cardano_node_version }}
synchronization-level: ${{ inputs.synchronization-level || 1 }}

# Remove old immutable chunks from the database, we don't need them and they make cache & on-disk system larger.
- name: prune node db
shell: bash
working-directory: ${{ runner.temp }}/db-${{ matrix.network }}
run: |
touch clean
rm immutable/00*.chunk immutable/01*.chunk immutable/02*.chunk immutable/030*.chunk
63 changes: 63 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
NETWORK ?= preprod
HASKELL_NODE_CONFIG_DIR ?= cardano-node-config
HASKELL_NODE_CONFIG_SOURCE := https://book.world.dev.cardano.org/environments

.PHONY: help bootstrap run import-snapshots import-headers import-nonces download-haskell-config
all: help

help:
@echo "Build and publish playground components"
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n \033[36m\033[0m\n"} /^[0-9a-zA-Z_-]+:.*?##/ { printf " \033[36m%-25s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

snapshots: ## Download snapshots
mkdir -p $@
curl -s -o - "https://raw.githubusercontent.com/pragma-org/amaru/refs/heads/main/data/snapshots.json" \
| jq -r '.[] | "\(.point) \(.url)"' \
| while read p u; do \
echo "Fetching $$p.cbor"; \
curl --progress-bar -o - $$u | gunzip > $@/$$p.cbor; \
done

download-haskell-config: ## Download Cardano Haskell configuration for ${NETWORK}
mkdir -p $(HASKELL_NODE_CONFIG_DIR)
curl -O --output-dir $(HASKELL_NODE_CONFIG_DIR) $(HASKELL_NODE_CONFIG_SOURCE)/$(NETWORK)/config.json
curl -O --output-dir $(HASKELL_NODE_CONFIG_DIR) $(HASKELL_NODE_CONFIG_SOURCE)/$(NETWORK)/topology.json
curl -O --output-dir $(HASKELL_NODE_CONFIG_DIR) $(HASKELL_NODE_CONFIG_SOURCE)/$(NETWORK)/byron-genesis.json
curl -O --output-dir $(HASKELL_NODE_CONFIG_DIR) $(HASKELL_NODE_CONFIG_SOURCE)/$(NETWORK)/shelley-genesis.json
curl -O --output-dir $(HASKELL_NODE_CONFIG_DIR) $(HASKELL_NODE_CONFIG_SOURCE)/$(NETWORK)/alonzo-genesis.json
curl -O --output-dir $(HASKELL_NODE_CONFIG_DIR) $(HASKELL_NODE_CONFIG_SOURCE)/$(NETWORK)/conway-genesis.json

import-snapshots: snapshots ## Import snapshots
cargo run --release -- import-ledger-state \
--snapshot $^/69206375.6f99b5f3deaeae8dc43fce3db2f3cd36ad8ed174ca3400b5b1bed76fdf248912.cbor \
--snapshot $^/69638382.5da6ba37a4a07df015c4ea92c880e3600d7f098b97e73816f8df04bbb5fad3b7.cbor \
--snapshot $^/70070379.d6fe6439aed8bddc10eec22c1575bf0648e4a76125387d9e985e9a3f8342870d.cbor

import-headers: enforce-peer-address ## Import headers from ${AMARU_PEER_ADDRESS}
cargo run --release -- import-headers \
--peer-address ${AMARU_PEER_ADDRESS} \
--starting-point 69638365.4ec0f5a78431fdcc594eab7db91aff7dfd91c13cc93e9fbfe70cd15a86fadfb2 \
--count 2
cargo run --release -- import-headers \
--peer-address ${AMARU_PEER_ADDRESS} \
--starting-point 70070331.076218aa483344e34620d3277542ecc9e7b382ae2407a60e177bc3700548364c \
--count 2

import-nonces: ## Import nonces
cargo run --release -- import-nonces \
--at 70070379.d6fe6439aed8bddc10eec22c1575bf0648e4a76125387d9e985e9a3f8342870d \
--active a7c4477e9fcfd519bf7dcba0d4ffe35a399125534bc8c60fa89ff6b50a060a7a \
--candidate 74fe03b10c4f52dd41105a16b5f6a11015ec890a001a5253db78a779fe43f6b6 \
--evolving 24bb737ee28652cd99ca41f1f7be568353b4103d769c6e1ddb531fc874dd6718 \
--tail 5da6ba37a4a07df015c4ea92c880e3600d7f098b97e73816f8df04bbb5fad3b7

bootstrap: import-headers import-nonces import-snapshots ## Bootstrap the node

enforce-peer-address:
@if [ -z ${AMARU_PEER_ADDRESS} ]; then \
echo "Error: AMARU_PEER_ADDRESS environment variable is not set."; \
exit 1; \
fi

run: enforce-peer-address ## Run the node
AMARU_TRACE="amaru=debug" cargo run --release -- --with-json-traces daemon --peer-address=${AMARU_PEER_ADDRESS} ${AMARU_RUN_EXTRA}
25 changes: 13 additions & 12 deletions crates/amaru-stores/src/rocksdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ impl RocksDB {
})
}

pub fn for_epoch_with(dir: &Path, epoch: Epoch) -> Result<Self, StoreError> {
let mut opts = Options::default();
opts.set_prefix_extractor(SliceTransform::create_fixed_prefix(PREFIX_LEN));

Ok(RocksDB {
snapshots: vec![epoch],
dir: dir.to_path_buf(),
db: OptimisticTransactionDB::open(&opts, dir.join(PathBuf::from(format!("{epoch:?}"))))
.map_err(|err| StoreError::Internal(err.into()))?,
})
}

pub fn unsafe_transaction(&self) -> rocksdb::Transaction<'_, OptimisticTransactionDB> {
self.db.transaction()
}
Expand Down Expand Up @@ -229,18 +241,7 @@ fn with_prefix_iterator<

impl Store for RocksDB {
fn for_epoch(&self, epoch: Epoch) -> Result<impl Snapshot, StoreError> {
let mut opts = Options::default();
opts.set_prefix_extractor(SliceTransform::create_fixed_prefix(PREFIX_LEN));

Ok(RocksDB {
snapshots: vec![epoch],
dir: self.dir.to_path_buf(),
db: OptimisticTransactionDB::open(
&opts,
self.dir.join(PathBuf::from(format!("{epoch:?}"))),
)
.map_err(|err| StoreError::Internal(err.into()))?,
})
Self::for_epoch_with(self.dir.as_path(), epoch)
}

fn tip(&self) -> Result<Point, StoreError> {
Expand Down
63 changes: 46 additions & 17 deletions crates/amaru/tests/rewards.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,47 @@

use amaru_ledger::{
rewards::StakeDistribution,
store::{RewardsSummary, Store},
store::{RewardsSummary, Snapshot},
};
use amaru_stores::rocksdb::RocksDB;
use pallas_primitives::Epoch;
use std::{path::PathBuf, sync::LazyLock};
use std::{
collections::BTreeMap,
path::PathBuf,
sync::{Arc, LazyLock, Mutex},
};
use test_case::test_case;

pub static LEDGER_DB: LazyLock<PathBuf> = LazyLock::new(|| PathBuf::from("../../ledger.db"));

pub static CONNECTIONS: LazyLock<Mutex<BTreeMap<Epoch, Arc<RocksDB>>>> =
LazyLock::new(|| Mutex::new(BTreeMap::new()));

#[allow(clippy::panic)]
fn open_db(epoch: Epoch) -> RocksDB {
RocksDB::new(&LEDGER_DB)
.unwrap_or_else(|_| panic!("Failed to open ledger snapshot for epoch {}", epoch))
#[allow(clippy::unwrap_used)]
/// Get a read-only handle on a snapshot. This allows to run all test cases in parallel without
/// conflicts (a single scenario typically need 2 snapshots, so two tests may need access to the
/// same snapshot at the same time).
///
/// The following API ensures that this is handled properly, by creating connections only once and
/// sharing them safely between threads.
fn db(epoch: Epoch) -> Arc<impl Snapshot + Send + Sync> {
let mut connections = CONNECTIONS.lock().unwrap();

let handle = connections
.entry(epoch)
.or_insert_with(|| {
Arc::new(
RocksDB::for_epoch_with(&LEDGER_DB, epoch).unwrap_or_else(|_| {
panic!("Failed to open ledger snapshot for epoch {}", epoch)
}),
)
})
.clone();

drop(connections);

handle
}

#[test_case(163)]
Expand All @@ -38,22 +66,23 @@ fn open_db(epoch: Epoch) -> RocksDB {
#[test_case(169)]
#[test_case(170)]
#[test_case(171)]
#[test_case(172)]
#[test_case(173)]
#[test_case(174)]
#[test_case(175)]
#[test_case(176)]
#[test_case(177)]
#[test_case(178)]
#[test_case(179)]
// FIXME: re-enable once governance is implemented, we must be able to track proposal refunds in
// order to get those snapshots right.
//
// #[test_case(172)]
// #[test_case(173)]
// #[test_case(174)]
// #[test_case(175)]
// #[test_case(176)]
// #[test_case(177)]
// #[test_case(178)]
// #[test_case(179)]
#[ignore]
#[allow(clippy::unwrap_used)]
fn compare_preprod_snapshot(epoch: Epoch) {
let db = open_db(epoch);

let snapshot = StakeDistribution::new(&db.for_epoch(epoch).unwrap()).unwrap();
let snapshot = StakeDistribution::new(db(epoch).as_ref()).unwrap();
insta::assert_json_snapshot!(format!("stake_distribution_{}", epoch), snapshot);

let rewards_summary = RewardsSummary::new(&db.for_epoch(epoch + 2).unwrap(), snapshot).unwrap();
let rewards_summary = RewardsSummary::new(db(epoch + 2).as_ref(), snapshot).unwrap();
insta::assert_json_snapshot!(format!("rewards_summary_{}", epoch), rewards_summary);
}

0 comments on commit 543e03e

Please sign in to comment.