From f52806f39bf296efdab6ab73ca0d2ab78f2acd33 Mon Sep 17 00:00:00 2001 From: n00m4d Date: Thu, 14 Nov 2024 13:48:49 +0100 Subject: [PATCH 1/8] feat: add env feature for clap --- Cargo.toml | 2 +- docker-compose.yaml | 53 ++------------------- ingester.Dockerfile | 6 +-- nft_ingester/src/bin/slot_persister/main.rs | 18 +++---- 4 files changed, 15 insertions(+), 64 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ecb94d9a7..8ea950cb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,7 @@ num-traits = "0.2.17" # Configuration, env-vars and cli parsing figment = { version = "0.10.6", features = ["env", "toml", "yaml"] } -clap = { version = "4.2.2", features = ["derive", "cargo"] } +clap = { version = "4.2.2", features = ["derive", "cargo", "env"] } dotenvy = "0.15.7" indicatif = "0.17" diff --git a/docker-compose.yaml b/docker-compose.yaml index e94ad889b..5f8b7fe68 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -97,50 +97,21 @@ services: options: max-size: "2048m" - raw-backfiller: - container_name: raw-backfiller - restart: always - entrypoint: sh -c "if [ -z '$$MALLOC_CONF' ]; then exec ./raw_backfiller; else exec ./profiling_raw_backfiller; fi" - env_file: - - .env - network_mode: host - volumes: - - ${INGESTER_ROCKS_DB_PATH}:${INGESTER_ROCKS_DB_PATH_CONTAINER}:rw - - ${INGESTER_PROFILING_FILE_PATH}:${INGESTER_PROFILING_FILE_PATH_CONTAINER}:rw - - ./creds.json:/usr/src/app/creds.json - - ./heaps:/usr/src/app/heaps:rw - stop_grace_period: 5m - build: - context: . - dockerfile: ingester.Dockerfile - logging: - options: - max-size: "2048m" - slot-persister: container_name: slot-persister restart: always entrypoint: | sh -c " - ARGS=\"--target-db-path $target_db_path\" - ARGS=\"$$ARGS --rpc-host $rpc_host\" - [ -n \"$start_slot\" ] && ARGS=\"$$ARGS --start-slot $start_slot\" - [ -n \"$big_table_credentials\" ] && ARGS=\"$$ARGS --big-table-credentials $big_table_credentials\" - [ -n \"$big_table_timeout\" ] && ARGS=\"$$ARGS --big-table-timeout $big_table_timeout\" - [ -n \"$metrics_port\" ] && ARGS=\"$$ARGS --metrics-port $metrics_port\" - [ -n \"$chunk_size\" ] && ARGS=\"$$ARGS --chunk-size $chunk_size\" - [ -n \"$max_concurrency\" ] && ARGS=\"$$ARGS --max-concurrency $max_concurrency\" - if [ -z \"$MALLOC_CONF\" ]; then - exec ./slot_persister $$ARGS + exec ./slot_persister else - exec ./profiling_slot_persister $$ARGS + exec ./profiling_slot_persister fi" env_file: - .env network_mode: host volumes: - - ${target_db_path}:${target_db_path}:rw + - ${TARGET_DB_PATH}:${TARGET_DB_PATH}:rw - ${INGESTER_PROFILING_FILE_PATH}:${INGESTER_PROFILING_FILE_PATH_CONTAINER}:rw - ${big_table_credentials:-/tmp/creds.json}:${big_table_credentials:-/tmp/creds.json} - ./heaps:/usr/src/app/heaps:rw @@ -152,24 +123,6 @@ services: options: max-size: "2048m" - core-indexing: - container_name: core-indexing - restart: always - entrypoint: sh -c "if [ -z '$$MALLOC_CONF' ]; then exec ./core_indexing; else exec ./profiling_core_indexing; fi" - env_file: - - .env - network_mode: host - volumes: - - ${INGESTER_PROFILING_FILE_PATH}:${INGESTER_PROFILING_FILE_PATH_CONTAINER}:rw - - ./heaps:/usr/src/app/heaps:rw - stop_grace_period: 5m - build: - context: . - dockerfile: ingester.Dockerfile - logging: - options: - max-size: "2048m" - db: container_name: db image: 'postgres:14' diff --git a/ingester.Dockerfile b/ingester.Dockerfile index 249ee2da7..bcc996086 100644 --- a/ingester.Dockerfile +++ b/ingester.Dockerfile @@ -36,12 +36,12 @@ RUN cargo chef cook --release --recipe-path recipe.json # Building the services FROM cacher AS builder COPY . . -RUN cargo build --release --bin ingester --bin api --bin raw_backfiller --bin synchronizer --bin slot_persister +RUN cargo build --release --bin ingester --bin api --bin synchronizer --bin slot_persister # Building the profiling feature services FROM cacher AS builder-with-profiling COPY . . -RUN cargo build --release --features profiling --bin ingester --bin api --bin raw_backfiller --bin synchronizer --bin slot_persister +RUN cargo build --release --features profiling --bin ingester --bin api --bin synchronizer --bin slot_persister # Final image FROM rust:1.76-slim-bullseye AS runtime @@ -52,12 +52,10 @@ ENV TZ=Etc/UTC APP_USER=appuser LD_PRELOAD="/usr/local/lib/libjemalloc.so.2" RUN groupadd $APP_USER && useradd -g $APP_USER $APP_USER && mkdir -p ${APP} COPY --from=builder /rust/target/release/ingester ${APP}/ingester -COPY --from=builder /rust/target/release/raw_backfiller ${APP}/raw_backfiller COPY --from=builder /rust/target/release/api ${APP}/api COPY --from=builder /rust/target/release/synchronizer ${APP}/synchronizer COPY --from=builder /rust/target/release/slot_persister ${APP}/slot_persister COPY --from=builder-with-profiling /rust/target/release/ingester ${APP}/profiling_ingester -COPY --from=builder-with-profiling /rust/target/release/raw_backfiller ${APP}/profiling_raw_backfiller COPY --from=builder-with-profiling /rust/target/release/api ${APP}/profiling_api COPY --from=builder-with-profiling /rust/target/release/synchronizer ${APP}/profiling_synchronizer COPY --from=builder-with-profiling /rust/target/release/slot_persister ${APP}/profiling_slot_persister diff --git a/nft_ingester/src/bin/slot_persister/main.rs b/nft_ingester/src/bin/slot_persister/main.rs index a0913b913..6f5ddb6db 100644 --- a/nft_ingester/src/bin/slot_persister/main.rs +++ b/nft_ingester/src/bin/slot_persister/main.rs @@ -41,40 +41,40 @@ const SLOT_COLLECTION_OFFSET: u64 = 300; )] struct Args { /// Path to the target RocksDB instance with slots - #[arg(short, long)] + #[arg(short, long, env)] target_db_path: PathBuf, /// RPC host - #[arg(short, long)] + #[arg(short, long, env)] rpc_host: String, /// Optional starting slot number, this will override the last saved slot in the RocksDB - #[arg(short, long)] + #[arg(short, long, env)] start_slot: Option, /// Big table credentials file path - #[arg(short, long)] + #[arg(short, long, env)] big_table_credentials: Option, /// Optional big table timeout (default: 1000) - #[arg(short = 'B', long, default_value_t = 1000)] + #[arg(short = 'B', long, env, default_value_t = 1000)] big_table_timeout: u32, /// Metrics port /// Default: 9090 - #[arg(short, long, default_value = "9090")] + #[arg(short, long, env, default_value = "9090")] metrics_port: u16, /// Number of slots to process in each batch - #[arg(short, long, default_value_t = 200)] + #[arg(short, long, env, default_value_t = 200)] chunk_size: usize, /// Maximum number of concurrent requests - #[arg(short = 'M', long, default_value_t = 20)] + #[arg(short = 'M', long, env, default_value_t = 20)] max_concurrency: usize, /// Optional comma-separated list of slot numbers to check - #[arg(long)] + #[arg(long, env)] slots: Option, } pub struct InMemorySlotsDumper { From 509247098d8c672f593374cb069ef4e694a46218 Mon Sep 17 00:00:00 2001 From: n00m4d Date: Thu, 14 Nov 2024 16:06:24 +0100 Subject: [PATCH 2/8] feat: drop profiling for slot persister --- docker-compose.yaml | 8 +------- ingester.Dockerfile | 3 +-- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 5f8b7fe68..3cd922376 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -100,13 +100,7 @@ services: slot-persister: container_name: slot-persister restart: always - entrypoint: | - sh -c " - if [ -z \"$MALLOC_CONF\" ]; then - exec ./slot_persister - else - exec ./profiling_slot_persister - fi" + entrypoint: "./slot_persister" env_file: - .env network_mode: host diff --git a/ingester.Dockerfile b/ingester.Dockerfile index bcc996086..53482b301 100644 --- a/ingester.Dockerfile +++ b/ingester.Dockerfile @@ -41,7 +41,7 @@ RUN cargo build --release --bin ingester --bin api --bin synchronizer --bin slot # Building the profiling feature services FROM cacher AS builder-with-profiling COPY . . -RUN cargo build --release --features profiling --bin ingester --bin api --bin synchronizer --bin slot_persister +RUN cargo build --release --features profiling --bin ingester --bin api --bin synchronizer # Final image FROM rust:1.76-slim-bullseye AS runtime @@ -58,7 +58,6 @@ COPY --from=builder /rust/target/release/slot_persister ${APP}/slot_persister COPY --from=builder-with-profiling /rust/target/release/ingester ${APP}/profiling_ingester COPY --from=builder-with-profiling /rust/target/release/api ${APP}/profiling_api COPY --from=builder-with-profiling /rust/target/release/synchronizer ${APP}/profiling_synchronizer -COPY --from=builder-with-profiling /rust/target/release/slot_persister ${APP}/profiling_slot_persister WORKDIR ${APP} STOPSIGNAL SIGINT \ No newline at end of file From 0725a9a3da362d41806ada2fb8ec4342723b4b16 Mon Sep 17 00:00:00 2001 From: n00m4d Date: Thu, 14 Nov 2024 17:01:50 +0100 Subject: [PATCH 3/8] feat: change the way workers take envs --- .env.example | 130 +++----------------- nft_ingester/src/bin/slot_persister/main.rs | 18 +-- 2 files changed, 25 insertions(+), 123 deletions(-) diff --git a/.env.example b/.env.example index 8c751a648..8a508b7c5 100644 --- a/.env.example +++ b/.env.example @@ -1,120 +1,22 @@ -# Required by Postgre container -POSTGRE_DB_PATH="postgre/db/path" +ASSETS_ROCKS_DB_PATH="path/to/assets/db" +ASSETS_ROCKS_DB_SECONDARY_PATH="path/to/assets_secondary/db" -RUST_BACKTRACE=1 -# Ingester instance config -INGESTER_LOG_LEVEL=info +SLOTS_ROCKS_DB_PATH="path/to/slots/db" +SLOTS_ROCKS_DB_SECONDARY_PATH="path/to/slots_secondary/db" -INGESTER_DATABASE_CONFIG='{max_postgres_connections=10, url="postgres://user:pass@0.0.0.0:5432/database"}' -INGESTER_TCP_CONFIG='{receiver_addr="localhost:2000", receiver_reconnect_interval=5, snapshot_receiver_addr="localhost:5000"}' -INGESTER_REDIS_MESSENGER_CONFIG='{messenger_type="Redis", connection_config={redis_connection_str="redis://:pass@localhost:6379"}}' -INGESTER_MESSAGE_SOURCE=Redis #TCP or Redis +SOLANA_RPC="http://localhost:8080" -INGESTER_ACCOUNTS_BUFFER_SIZE=250 -INGESTER_ACCOUNTS_PARSING_WORKERS=20 -INGESTER_TRANSACTIONS_PARSING_WORKERS=20 +BIG_TABLE_CREDENTIALS="path/to/creds" +BIG_TABLE_TIMEOUT=10 -INGESTER_SNAPSHOT_PARSING_WORKERS=1 -INGESTER_SNAPSHOT_PARSING_BATCH_SIZE=250 +SLOT_PERSISTER_START_SLOT=10 +SLOT_PERSISTER_SLOTS=[1,2,3] +SLOT_PERSISTER_CHUNK_SIZE=1000 +SLOT_PERSISTER_MAX_CONCURRENCY=100 +SLOT_PERSISTER_METRICS_PORT=6090 -INGESTER_GAPFILLER_PEER_ADDR="0.0.0.0" -INGESTER_METRICS_PORT=9091 -INGESTER_SERVER_PORT=9092 -INGESTER_PEER_GRPC_PORT=9099 +SLOT_CHECKER_SLOTS=[1,2,3] -INGESTER_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" -INGESTER_ROCKS_DB_PATH="path/to/rocks/on/disk" - -INGESTER_ARCHIVES_DIR="path/to/rocks/backup/archives" -INGESTER_ROCKS_BACKUP_ARCHIVES_DIR="path/to/rocks/backup/archives" -INGESTER_ROCKS_BACKUP_DIR="path/to/rocks/backup/" - -INGESTER_BACKFILL_RPC_ADDRESS='https://rpc:port' -INGESTER_RPC_HOST='https://rpc:port' - -INGESTER_BACKFILLER_SOURCE_MODE=RPC #RPC or Bigtable -INGESTER_BIG_TABLE_CONFIG='{creds="/usr/src/app/creds.json", timeout=1000}' - -INGESTER_RUN_SEQUENCE_CONSISTENT_CHECKER=true -# Optional, required only if it needs to run fork cleaner, default is false. Unstable as it removes forked items, but also removes some valid leafs. Recommended to use only! for testing purposes. -INGESTER_RUN_FORK_CLEANER=false -INGESTER_RUN_BUBBLEGUM_BACKFILLER=true - -INGESTER_BACKFILLER_MODE=PersistAndIngest -INGESTER_SLOT_UNTIL=0 -INGESTER_SLOT_START_FROM=0 -INGESTER_WORKERS_COUNT=100 -INGESTER_CHUNK_SIZE=20 -INGESTER_PERMITTED_TASKS=1 -INGESTER_WAIT_PERIOD_SEC=30 -INGESTER_SHOULD_REINGEST=false - -INGESTER_PEER_GRPC_MAX_GAP_SLOTS=1000000 - -INGESTER_RUN_PROFILING=false -INGESTER_PROFILING_FILE_PATH_CONTAINER="/usr/src/profiling" -INGESTER_PROFILING_FILE_PATH="/path/to/profiling" - -INGESTER_FILE_STORAGE_PATH_CONTAINER="/usr/src/app/file_storage" -INGESTER_FILE_STORAGE_PATH="path/to/file/storage" -INGESTER_MIGRATION_STORAGE_PATH=/path/to/migration_storage - -INGESTER_ROCKS_FLUSH_BEFORE_BACKUP=false -INGESTER_ROCKS_INTERVAL_IN_SECONDS=3600 -INGESTER_ROCKS_SYNC_INTERVAL_SECONDS=2 - -INGESTER_SYNCHRONIZER_DUMP_PATH="/path/to/dump" - -# API instance config -API_LOG_LEVEL=info - -API_DATABASE_CONFIG='{max_postgres_connections=250, url="postgres://user:pass@0.0.0.0:5432/database"}' - -API_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" -API_ROCKS_DB_SECONDARY_PATH_CONTAINER="path/to/rocks/secondary/db" -API_ARCHIVES_DIR="path/to/rocks/backup/archives" - -API_PEER_GRPC_PORT=8991 -API_METRICS_PORT=8985 -API_SERVER_PORT=8990 - -API_RPC_HOST='https://rpc:port' - -API_ROCKS_SYNC_INTERVAL_SECONDS=2 -API_FILE_STORAGE_PATH_CONTAINER="/usr/src/app/file_storage" -API_FILE_STORAGE_PATH="path/to/file/storage" - -API_PEER_GRPC_MAX_GAP_SLOTS=1000000 -API_JSON_MIDDLEWARE_CONFIG='{is_enabled=true, max_urls_to_parse=10}' - -API_CONSISTENCE_SYNCHRONIZATION_API_THRESHOLD=1000000 -API_CONSISTENCE_BACKFILLING_SLOTS_THRESHOLD=500 - -# if set to true API will not check if tree where user requests assets from has any gaps -API_SKIP_CHECK_TREE_GAPS=false - -# Synchronizer instance config -SYNCHRONIZER_LOG_LEVEL=info - -SYNCHRONIZER_DATABASE_CONFIG='{max_postgres_connections=100, url="postgres://user:pass@0.0.0.0:5432/database"}' -SYNCHRONIZER_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" -SYNCHRONIZER_ROCKS_DB_SECONDARY_PATH_CONTAINER="path/to/rocks/secondary/db" - -SYNCHRONIZER_METRICS_PORT=6091 - -SYNCHRONIZER_DUMP_PATH="/path/to/migration_data" - -SYNCHRONIZER_DUMP_SYNCHRONIZER_BATCH_SIZE=10000 -SYNCHRONIZER_DUMP_SYNC_THRESHOLD=50000000 - -SYNCHRONIZER_PARALLEL_TASKS=30 - -# Profiling config -# Optional, required only if it needs to run memory profiling -MALLOC_CONF="prof:true,prof_leak:true,prof_final:true,prof_active:true,prof_prefix:/usr/src/app/heaps/,lg_prof_interval:32,lg_prof_sample:19" - -# Integrity verification -INTEGRITY_VERIFICATION_TEST_FILE_PATH="./test_keys/test_keys.txt" -INTEGRITY_VERIFICATION_TEST_FILE_PATH_CONTAINER="/test_keys/test_keys.txt" -INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH="./slots_collect" -INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH_CONTAINER="/slots_collect" \ No newline at end of file +BACKFILL_START_SLOT=10 +BACKFILL_WORKERS=50 +BACKFILL_SLOTS=[1,2,3] diff --git a/nft_ingester/src/bin/slot_persister/main.rs b/nft_ingester/src/bin/slot_persister/main.rs index 6f5ddb6db..44b3afb1d 100644 --- a/nft_ingester/src/bin/slot_persister/main.rs +++ b/nft_ingester/src/bin/slot_persister/main.rs @@ -41,40 +41,40 @@ const SLOT_COLLECTION_OFFSET: u64 = 300; )] struct Args { /// Path to the target RocksDB instance with slots - #[arg(short, long, env)] + #[arg(short, long, env="ASSETS_ROCKS_DB_PATH")] target_db_path: PathBuf, /// RPC host - #[arg(short, long, env)] + #[arg(short, long, env="SOLANA_RPC")] rpc_host: String, /// Optional starting slot number, this will override the last saved slot in the RocksDB - #[arg(short, long, env)] + #[arg(short, long, env="SLOT_PERSISTER_START_SLOT")] start_slot: Option, /// Big table credentials file path - #[arg(short, long, env)] + #[arg(short, long, env="BIG_TABLE_CREDENTIALS")] big_table_credentials: Option, /// Optional big table timeout (default: 1000) - #[arg(short = 'B', long, env, default_value_t = 1000)] + #[arg(short = 'B', long, env="BIG_TABLE_TIMEOUT", default_value_t = 1000)] big_table_timeout: u32, /// Metrics port /// Default: 9090 - #[arg(short, long, env, default_value = "9090")] + #[arg(short, long, env="SLOT_PERSISTER_METRICS_PORT", default_value = "9090")] metrics_port: u16, /// Number of slots to process in each batch - #[arg(short, long, env, default_value_t = 200)] + #[arg(short, long, env="SLOT_PERSISTER_CHUNK_SIZE", default_value_t = 200)] chunk_size: usize, /// Maximum number of concurrent requests - #[arg(short = 'M', long, env, default_value_t = 20)] + #[arg(short = 'M', long, env="SLOT_PERSISTER_MAX_CONCURRENCY", default_value_t = 20)] max_concurrency: usize, /// Optional comma-separated list of slot numbers to check - #[arg(long, env)] + #[arg(long, env="SLOT_PERSISTER_SLOTS")] slots: Option, } pub struct InMemorySlotsDumper { From 73debce2016c8a8e108c245c19226377657244b9 Mon Sep 17 00:00:00 2001 From: n00m4d Date: Fri, 15 Nov 2024 08:44:20 +0100 Subject: [PATCH 4/8] feat: change env slightly for secondary DB --- .env.example | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 8a508b7c5..297ff0d30 100644 --- a/.env.example +++ b/.env.example @@ -1,8 +1,9 @@ ASSETS_ROCKS_DB_PATH="path/to/assets/db" -ASSETS_ROCKS_DB_SECONDARY_PATH="path/to/assets_secondary/db" SLOTS_ROCKS_DB_PATH="path/to/slots/db" -SLOTS_ROCKS_DB_SECONDARY_PATH="path/to/slots_secondary/db" + +INGESTER_SECONDARY_SLOTS_ROCKS_DB_PATH="path/to/slots_secondary/db" +# each worker will have it's own secondary DB directory SOLANA_RPC="http://localhost:8080" From 728af3645166199884d34ebecb13938ac55c632b Mon Sep 17 00:00:00 2001 From: n00m4d Date: Fri, 15 Nov 2024 09:45:35 +0100 Subject: [PATCH 5/8] feat: drop profiling from docker compose for slot persister --- docker-compose.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 3cd922376..dae7d5155 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -106,7 +106,6 @@ services: network_mode: host volumes: - ${TARGET_DB_PATH}:${TARGET_DB_PATH}:rw - - ${INGESTER_PROFILING_FILE_PATH}:${INGESTER_PROFILING_FILE_PATH_CONTAINER}:rw - ${big_table_credentials:-/tmp/creds.json}:${big_table_credentials:-/tmp/creds.json} - ./heaps:/usr/src/app/heaps:rw stop_grace_period: 5m From 2d8f4d077bd948aa6bef8e7e5a97c6179512f163 Mon Sep 17 00:00:00 2001 From: n00m4d Date: Mon, 13 Jan 2025 15:22:26 +0100 Subject: [PATCH 6/8] feat: revert changes --- Cargo.toml | 2 +- ingester.Dockerfile | 1 + nft_ingester/src/bin/slot_persister/main.rs | 10 +++++----- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a0ef3f63b..aedc5dc8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,7 +84,7 @@ num-traits = "0.2.17" # Configuration, env-vars and cli parsing figment = { version = "0.10.6", features = ["env", "toml", "yaml"] } -clap = { version = "4.2.2", features = ["derive", "cargo", "env"] } +clap = { version = "4.2.2", features = ["derive", "cargo"] } dotenvy = "0.15.7" indicatif = "0.17" diff --git a/ingester.Dockerfile b/ingester.Dockerfile index d0f9cc3cb..bd1277704 100644 --- a/ingester.Dockerfile +++ b/ingester.Dockerfile @@ -61,6 +61,7 @@ COPY --from=builder-with-profiling /rust/target/release/ingester ${APP}/profilin COPY --from=builder-with-profiling /rust/target/release/backfill ${APP}/profiling_backfill COPY --from=builder-with-profiling /rust/target/release/api ${APP}/profiling_api COPY --from=builder-with-profiling /rust/target/release/synchronizer ${APP}/profiling_synchronizer +COPY --from=builder-with-profiling /rust/target/release/slot_persister ${APP}/profiling_slot_persister WORKDIR ${APP} STOPSIGNAL SIGINT \ No newline at end of file diff --git a/nft_ingester/src/bin/slot_persister/main.rs b/nft_ingester/src/bin/slot_persister/main.rs index 46a1f1ab2..f37e8fca8 100644 --- a/nft_ingester/src/bin/slot_persister/main.rs +++ b/nft_ingester/src/bin/slot_persister/main.rs @@ -49,7 +49,7 @@ struct Args { rpc_host: String, /// Optional starting slot number, this will override the last saved slot in the RocksDB - #[arg(short, long, env="SLOT_PERSISTER_START_SLOT")] + #[arg(short, long)] start_slot: Option, /// Big table credentials file path @@ -57,7 +57,7 @@ struct Args { big_table_credentials: Option, /// Optional big table timeout (default: 1000) - #[arg(short = 'B', long, env="BIG_TABLE_TIMEOUT", default_value_t = 1000)] + #[arg(short = 'B', long, default_value_t = 1000)] big_table_timeout: u32, /// Metrics port @@ -66,15 +66,15 @@ struct Args { metrics_port: u16, /// Number of slots to process in each batch - #[arg(short, long, env="SLOT_PERSISTER_CHUNK_SIZE", default_value_t = 200)] + #[arg(short, long, default_value_t = 200)] chunk_size: usize, /// Maximum number of concurrent requests - #[arg(short = 'M', long, env="SLOT_PERSISTER_MAX_CONCURRENCY", default_value_t = 20)] + #[arg(short = 'M', long, default_value_t = 20)] max_concurrency: usize, /// Optional comma-separated list of slot numbers to check - #[arg(long, env="SLOT_PERSISTER_SLOTS")] + #[arg(long)] slots: Option, } From 5fb21b8f822fde53691d3fc51ebd225ad1c2d5c9 Mon Sep 17 00:00:00 2001 From: n00m4d Date: Mon, 13 Jan 2025 17:40:25 +0100 Subject: [PATCH 7/8] feat: update env.example --- .env.example | 109 +++++++++++++++++++++++++++++++++++++------- docker-compose.yaml | 18 -------- 2 files changed, 93 insertions(+), 34 deletions(-) diff --git a/.env.example b/.env.example index 297ff0d30..40d3eb041 100644 --- a/.env.example +++ b/.env.example @@ -1,23 +1,100 @@ -ASSETS_ROCKS_DB_PATH="path/to/assets/db" +# required for all containers +MALLOC_CONF="prof:true,prof_leak:true,prof_final:true,prof_active:true,prof_prefix:/usr/src/app/heaps/,lg_prof_interval:32,lg_prof_sample:19" +PG_DATABASE_URL=postgres://solana:solana@localhost:5432/aura_db +ROCKS_DB_PATH="/path/to/rocksdb" +ROCKS_DB_PATH_CONTAINER="/path/to/rocksdb/in/container" +ROCKS_SLOTS_DB_PATH="/path/to/rocksdb/with/slots" -SLOTS_ROCKS_DB_PATH="path/to/slots/db" +# required for ingester +REDIS_CONNECTION_CONFIG={'redis_connection_str'='redis://127.0.0.1:6379'} +ROCKS_BACKUP_ARCHIVES_DIR="/path/to/archives/backup" +ROCKS_BACKUP_DIR="/path/to/rocks/backup" +PROFILING_FILE_PATH="/path/to/prifiling/file/" +PROFILING_FILE_PATH_CONTAINER="/path/to/prifiling/file/in/container" +ROCKS_MIGRATION_STORAGE_PATH="/path/to/migration/data/storage" +ROCKS_SECONDARY_SLOTS_DB_PATH="./tmp/file_storage/rocks/secondary/ingester-slots" -INGESTER_SECONDARY_SLOTS_ROCKS_DB_PATH="path/to/slots_secondary/db" -# each worker will have it's own secondary DB directory +# required for ingester and api +RPC_HOST="https://mainnet-aura.metaplex.com" +FILE_STORAGE_PATH="/path/to/dir/to/store/files" +FILE_STORAGE_PATH_CONTAINER="/path/to/dir/to/store/files" -SOLANA_RPC="http://localhost:8080" +# required for migrator +ROCKS_JSON_SOURCE_DB="/path/to/json/source/db" +ROCKS_JSON_TARGET_DB="/path/to/target/db" -BIG_TABLE_CREDENTIALS="path/to/creds" -BIG_TABLE_TIMEOUT=10 +# required for api +ROCKS_ARCHIVES_DIR="/rocksdb/_rocks_backup_archives" -SLOT_PERSISTER_START_SLOT=10 -SLOT_PERSISTER_SLOTS=[1,2,3] -SLOT_PERSISTER_CHUNK_SIZE=1000 -SLOT_PERSISTER_MAX_CONCURRENCY=100 -SLOT_PERSISTER_METRICS_PORT=6090 +# required for api and synchronizer +ROCKS_DB_SECONDARY_PATH_CONTAINER="/path/to/rocksdb/secondary" -SLOT_CHECKER_SLOTS=[1,2,3] +# variables with default values +PG_MIN_DB_CONNECTIONS=10 +PG_MAX_DB_CONNECTIONS=100 +ROCKS_DB_SECONDARY_PATH="./my_rocksdb_secondary" +HEAP_PATH="/usr/src/app/heaps" +SKIP_CHECK_TREE_GAPS=false +IS_RUN_PROFILING=false +CHECK_PROOFS=false +CHECK_PROOFS_PROBABILITY=0.1 +CHECK_PROOFS_COMMITMENT=finalized +MAX_PAGE_LIMIT=50 +NATIVE_MINT_PUBKEY="So11111111111111111111111111111111111111112" +PARALLEL_JSON_DOWNLOADERS=100 +LOG_LEVEL="info" -BACKFILL_START_SLOT=10 -BACKFILL_WORKERS=50 -BACKFILL_SLOTS=[1,2,3] +# ingester specific variables with default values +REDIS_ACCOUNTS_PARSING_WORKERS=20 +REDIS_TRANSACTIONS_PARSING_WORKERS=20 +SEQUENCE_CONSISTENT_CHECKER_WAIT_PERIOD_SEC=60 +ACCOUNT_PROCESSOR_BUFFER_SIZE=250 +ACCOUNT_PROCESSOR_MPL_FEES_BUFFER_SIZE=50 +IS_RUN_API=true +IS_RUN_GAPFILLER=false +IS_RESTORE_ROCKS_DB=false +ROCKS_ENABLE_MIGRATION=true +RUN_SEQUENCE_CONSISTENT_CHECKER=false +ARCHIVES_DIR="/rocksdb/_rocks_backup_archives" +SERVER_PORT=9092 +IS_RUN_BACKFILLER=true +BACKFILLER_SOURCE_MODE=rpc +IS_RUN_BUBBLEGUM_BACKFILLER=true +SHOULD_REINGEST=false +PEER_GRPC_PORT=9099 +PEER_GRPC_MAX_GAP_SLOTS=1000000 +RPC_RETRY_INTERVAL_MILLIS=500 + +# synchronizer specific variables with default values +ROCKS_DUMP_PATH="./tmp/rocks_dump" +DUMP_SYNCHRONIZER_BATCH_SIZE=200000 +DUMP_SYNC_THRESHOLD=150000000 +SYNCHRONIZER_PARALLEL_TASKS=30 +TIMEOUT_BETWEEN_SYNCS_SEC=0 + +# migrator specific variables with default values +MIGRATOR_MODE=full + +# api specific variables with default values +ROCKS_SYNC_INTERVAL_SECONDS=2 + +# optional variables +GAPFILLER_PEER_ADDR="0.0.0.0" +JSON_MIDDLEWARE_CONFIG={'is_enabled':true, 'max_urls_to_parse':10} +ROCKS_BACKUP_URL="http://storage.url" +CONSISTENCE_SYNCHRONIZATION_API_THRESHOLD=10 +CONSISTENCE_BACKFILLING_SLOTS_THRESHOLD=10 +BATCH_MINT_SERVICE_PORT=8080 +STORAGE_SERVICE_BASE_URL"http://localhost" +BACKFILL_RPC_ADDRESS="http://backfill/rpc.url" +BIG_TABLE_CONFIG={'creds': '/path/to/file.json' 'timeout': 10} +INGESTER_METRICS_PORT=8080 +SYNCHRONIZER_METRICS_PORT=8080 +MIGRATOR_METRICS_PORT=8080 +API_METRICS_PORT=8080 + +# required for integrity verification tool +INTEGRITY_VERIFICATION_TEST_FILE_PATH="/path/to/text/result/files" +INTEGRITY_VERIFICATION_TEST_FILE_PATH_CONTAINER="/path/to/text/result/files" +INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH="/path/to/text/result/files" +INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH_CONTAINER="/path/to/text/result/files" \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index b27c4aa5f..16293df17 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -144,24 +144,6 @@ services: options: max-size: "2048m" - core-indexing: - container_name: core-indexing - restart: always - entrypoint: sh -c "if [ -z '$$MALLOC_CONF' ]; then exec ./core_indexing; else exec ./profiling_core_indexing; fi" - env_file: - - .env - network_mode: host - volumes: - - ${PROFILING_FILE_PATH}:${PROFILING_FILE_PATH_CONTAINER}:rw - - ./heaps:/usr/src/app/heaps:rw - stop_grace_period: 5m - build: - context: . - dockerfile: ingester.Dockerfile - logging: - options: - max-size: "2048m" - db: container_name: db image: 'postgres:14' From a96eec8734b5192326299302e1e6da18b9e9039e Mon Sep 17 00:00:00 2001 From: n00m4d Date: Tue, 14 Jan 2025 15:53:57 +0100 Subject: [PATCH 8/8] chore: return back old env example because we have example_new --- .env.example | 227 +++++++++++++++++++++++++++++---------------------- 1 file changed, 128 insertions(+), 99 deletions(-) diff --git a/.env.example b/.env.example index 40d3eb041..b1b5dcb95 100644 --- a/.env.example +++ b/.env.example @@ -1,100 +1,129 @@ -# required for all containers -MALLOC_CONF="prof:true,prof_leak:true,prof_final:true,prof_active:true,prof_prefix:/usr/src/app/heaps/,lg_prof_interval:32,lg_prof_sample:19" -PG_DATABASE_URL=postgres://solana:solana@localhost:5432/aura_db -ROCKS_DB_PATH="/path/to/rocksdb" -ROCKS_DB_PATH_CONTAINER="/path/to/rocksdb/in/container" -ROCKS_SLOTS_DB_PATH="/path/to/rocksdb/with/slots" - -# required for ingester -REDIS_CONNECTION_CONFIG={'redis_connection_str'='redis://127.0.0.1:6379'} -ROCKS_BACKUP_ARCHIVES_DIR="/path/to/archives/backup" -ROCKS_BACKUP_DIR="/path/to/rocks/backup" -PROFILING_FILE_PATH="/path/to/prifiling/file/" -PROFILING_FILE_PATH_CONTAINER="/path/to/prifiling/file/in/container" -ROCKS_MIGRATION_STORAGE_PATH="/path/to/migration/data/storage" -ROCKS_SECONDARY_SLOTS_DB_PATH="./tmp/file_storage/rocks/secondary/ingester-slots" - -# required for ingester and api -RPC_HOST="https://mainnet-aura.metaplex.com" -FILE_STORAGE_PATH="/path/to/dir/to/store/files" -FILE_STORAGE_PATH_CONTAINER="/path/to/dir/to/store/files" - -# required for migrator -ROCKS_JSON_SOURCE_DB="/path/to/json/source/db" -ROCKS_JSON_TARGET_DB="/path/to/target/db" - -# required for api -ROCKS_ARCHIVES_DIR="/rocksdb/_rocks_backup_archives" - -# required for api and synchronizer -ROCKS_DB_SECONDARY_PATH_CONTAINER="/path/to/rocksdb/secondary" - -# variables with default values -PG_MIN_DB_CONNECTIONS=10 -PG_MAX_DB_CONNECTIONS=100 -ROCKS_DB_SECONDARY_PATH="./my_rocksdb_secondary" -HEAP_PATH="/usr/src/app/heaps" -SKIP_CHECK_TREE_GAPS=false -IS_RUN_PROFILING=false -CHECK_PROOFS=false -CHECK_PROOFS_PROBABILITY=0.1 -CHECK_PROOFS_COMMITMENT=finalized -MAX_PAGE_LIMIT=50 -NATIVE_MINT_PUBKEY="So11111111111111111111111111111111111111112" -PARALLEL_JSON_DOWNLOADERS=100 -LOG_LEVEL="info" - -# ingester specific variables with default values -REDIS_ACCOUNTS_PARSING_WORKERS=20 -REDIS_TRANSACTIONS_PARSING_WORKERS=20 -SEQUENCE_CONSISTENT_CHECKER_WAIT_PERIOD_SEC=60 -ACCOUNT_PROCESSOR_BUFFER_SIZE=250 -ACCOUNT_PROCESSOR_MPL_FEES_BUFFER_SIZE=50 -IS_RUN_API=true -IS_RUN_GAPFILLER=false -IS_RESTORE_ROCKS_DB=false -ROCKS_ENABLE_MIGRATION=true -RUN_SEQUENCE_CONSISTENT_CHECKER=false -ARCHIVES_DIR="/rocksdb/_rocks_backup_archives" -SERVER_PORT=9092 -IS_RUN_BACKFILLER=true -BACKFILLER_SOURCE_MODE=rpc -IS_RUN_BUBBLEGUM_BACKFILLER=true -SHOULD_REINGEST=false -PEER_GRPC_PORT=9099 -PEER_GRPC_MAX_GAP_SLOTS=1000000 -RPC_RETRY_INTERVAL_MILLIS=500 - -# synchronizer specific variables with default values -ROCKS_DUMP_PATH="./tmp/rocks_dump" -DUMP_SYNCHRONIZER_BATCH_SIZE=200000 -DUMP_SYNC_THRESHOLD=150000000 +# Required by Postgre container +POSTGRE_DB_PATH="postgre/db/path" + +RUST_BACKTRACE=1 +# Ingester instance config +INGESTER_LOG_LEVEL=info + +INGESTER_DATABASE_CONFIG='{max_postgres_connections=10, url="postgres://user:pass@0.0.0.0:5432/database"}' +INGESTER_TCP_CONFIG='{receiver_addr="localhost:2000", receiver_reconnect_interval=5, snapshot_receiver_addr="localhost:5000"}' +INGESTER_REDIS_MESSENGER_CONFIG='{messenger_type="Redis", connection_config={redis_connection_str="redis://:pass@localhost:6379"}}' +INGESTER_MESSAGE_SOURCE=Redis #TCP or Redis + +INGESTER_ACCOUNTS_BUFFER_SIZE=250 +INGESTER_ACCOUNTS_PARSING_WORKERS=20 +INGESTER_TRANSACTIONS_PARSING_WORKERS=20 + +INGESTER_SNAPSHOT_PARSING_WORKERS=1 +INGESTER_SNAPSHOT_PARSING_BATCH_SIZE=250 + +INGESTER_GAPFILLER_PEER_ADDR="0.0.0.0" +INGESTER_METRICS_PORT=9091 +INGESTER_SERVER_PORT=9092 +INGESTER_PEER_GRPC_PORT=9099 + +INGESTER_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" +INGESTER_ROCKS_DB_PATH="path/to/rocks/on/disk" + +INGESTER_ARCHIVES_DIR="path/to/rocks/backup/archives" +INGESTER_ROCKS_BACKUP_ARCHIVES_DIR="path/to/rocks/backup/archives" +INGESTER_ROCKS_BACKUP_DIR="path/to/rocks/backup/" + +INGESTER_BACKFILL_RPC_ADDRESS='https://rpc:port' +INGESTER_RPC_HOST='https://rpc:port' + +INGESTER_BACKFILLER_SOURCE_MODE=RPC #RPC or Bigtable +INGESTER_BIG_TABLE_CONFIG='{creds="/usr/src/app/creds.json", timeout=1000}' + +INGESTER_RUN_SEQUENCE_CONSISTENT_CHECKER=false # experimental, enable only for testing purposes +# Optional, required only if it needs to run fork cleaner, default is false. Unstable as it removes forked items, but also removes some valid leafs. Recommended to use only! for testing purposes. +INGESTER_RUN_FORK_CLEANER=false +INGESTER_RUN_BUBBLEGUM_BACKFILLER=true + +INGESTER_BACKFILLER_MODE=PersistAndIngest # The only available option, the variable will be removed +INGESTER_SLOT_UNTIL=0 +INGESTER_SLOT_START_FROM=0 +INGESTER_WORKERS_COUNT=100 +INGESTER_CHUNK_SIZE=20 +INGESTER_PERMITTED_TASKS=1 +INGESTER_WAIT_PERIOD_SEC=30 +INGESTER_SHOULD_REINGEST=false + +INGESTER_PEER_GRPC_MAX_GAP_SLOTS=1000000 + +INGESTER_RUN_PROFILING=false +INGESTER_PROFILING_FILE_PATH_CONTAINER="/usr/src/profiling" +INGESTER_PROFILING_FILE_PATH="/path/to/profiling" + +INGESTER_FILE_STORAGE_PATH_CONTAINER="/usr/src/app/file_storage" +INGESTER_FILE_STORAGE_PATH="path/to/file/storage" +INGESTER_MIGRATION_STORAGE_PATH=/path/to/migration_storage # requires explanation + +INGESTER_ROCKS_FLUSH_BEFORE_BACKUP=false +INGESTER_ROCKS_INTERVAL_IN_SECONDS=3600 +INGESTER_ROCKS_SYNC_INTERVAL_SECONDS=2 + +INGESTER_SYNCHRONIZER_DUMP_PATH="/path/to/dump" +INGESTER_DISABLE_SYNCHRONIZER=true +INGESTER_SKIP_CHECK_TREE_GAPS=true +# path to the slots data, required for the backfiller to work +INGESTER_SLOTS_DB_PATH=/path/to/slots-data +INGESTER_SECONDARY_SLOTS_DB_PATH=/path/to/secondary/ingester-slots # should be removed + +# a common log level for all instances, will be overridden by specific log levels, requires refactoring +RUST_LOG=info +# API instance config +API_LOG_LEVEL=info + +API_DATABASE_CONFIG='{max_postgres_connections=250, url="postgres://user:pass@0.0.0.0:5432/database"}' + +API_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" +API_ROCKS_DB_SECONDARY_PATH_CONTAINER="path/to/rocks/secondary/db" +API_ARCHIVES_DIR="path/to/rocks/backup/archives" + +API_PEER_GRPC_PORT=8991 +API_METRICS_PORT=8985 +API_SERVER_PORT=8990 + +API_RPC_HOST='https://rpc:port' + +API_ROCKS_SYNC_INTERVAL_SECONDS=2 +API_FILE_STORAGE_PATH_CONTAINER="/usr/src/app/file_storage" +API_FILE_STORAGE_PATH="path/to/file/storage" + +API_PEER_GRPC_MAX_GAP_SLOTS=1000000 +API_JSON_MIDDLEWARE_CONFIG='{is_enabled=true, max_urls_to_parse=10}' + +API_CONSISTENCE_SYNCHRONIZATION_API_THRESHOLD=1000000 +API_CONSISTENCE_BACKFILLING_SLOTS_THRESHOLD=500 + +# if set to true API will not check if tree where user requests assets from has any gaps +API_SKIP_CHECK_TREE_GAPS=true + +# Synchronizer instance config +SYNCHRONIZER_LOG_LEVEL=info + +SYNCHRONIZER_DATABASE_CONFIG='{max_postgres_connections=100, url="postgres://user:pass@0.0.0.0:5432/database"}' +SYNCHRONIZER_ROCKS_DB_PATH_CONTAINER="/usr/src/rocksdb-data" +SYNCHRONIZER_ROCKS_DB_SECONDARY_PATH_CONTAINER="path/to/rocks/secondary/db" # should be removed + +SYNCHRONIZER_METRICS_PORT=6091 + +SYNCHRONIZER_DUMP_PATH="/path/to/migration_data" + +SYNCHRONIZER_DUMP_SYNCHRONIZER_BATCH_SIZE=10000 +# threshold on the number of updates not being synchronized for the synchronizer to dump-load on start +# 150M - that's a rough threshold after which the synchronizer will likely complete a full dymp-load cycle faster then doing an incremental sync +SYNCHRONIZER_DUMP_SYNC_THRESHOLD=150000000 + SYNCHRONIZER_PARALLEL_TASKS=30 -TIMEOUT_BETWEEN_SYNCS_SEC=0 - -# migrator specific variables with default values -MIGRATOR_MODE=full - -# api specific variables with default values -ROCKS_SYNC_INTERVAL_SECONDS=2 - -# optional variables -GAPFILLER_PEER_ADDR="0.0.0.0" -JSON_MIDDLEWARE_CONFIG={'is_enabled':true, 'max_urls_to_parse':10} -ROCKS_BACKUP_URL="http://storage.url" -CONSISTENCE_SYNCHRONIZATION_API_THRESHOLD=10 -CONSISTENCE_BACKFILLING_SLOTS_THRESHOLD=10 -BATCH_MINT_SERVICE_PORT=8080 -STORAGE_SERVICE_BASE_URL"http://localhost" -BACKFILL_RPC_ADDRESS="http://backfill/rpc.url" -BIG_TABLE_CONFIG={'creds': '/path/to/file.json' 'timeout': 10} -INGESTER_METRICS_PORT=8080 -SYNCHRONIZER_METRICS_PORT=8080 -MIGRATOR_METRICS_PORT=8080 -API_METRICS_PORT=8080 - -# required for integrity verification tool -INTEGRITY_VERIFICATION_TEST_FILE_PATH="/path/to/text/result/files" -INTEGRITY_VERIFICATION_TEST_FILE_PATH_CONTAINER="/path/to/text/result/files" -INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH="/path/to/text/result/files" -INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH_CONTAINER="/path/to/text/result/files" \ No newline at end of file + +# Profiling config +# Optional, required only if it needs to run memory profiling +MALLOC_CONF="prof:true,prof_leak:true,prof_final:true,prof_active:true,prof_prefix:/usr/src/app/heaps/,lg_prof_interval:32,lg_prof_sample:19" + +# Integrity verification +INTEGRITY_VERIFICATION_TEST_FILE_PATH="./test_keys/test_keys.txt" +INTEGRITY_VERIFICATION_TEST_FILE_PATH_CONTAINER="/test_keys/test_keys.txt" +INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH="./slots_collect" +INTEGRITY_VERIFICATION_SLOTS_COLLECT_PATH_CONTAINER="/slots_collect" \ No newline at end of file