diff --git a/.gitignore b/.gitignore index 4df39b6..51b567d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,10 @@ workspace *.orig dependency-reduced-pom.xml + +.cos.yaml + +recovery/sequencefiles/ +recovery/empty_files.log +recovery/logs.log +recovery/hbase/setup/hbase_container_id.txt \ No newline at end of file diff --git a/recovery/Readme.md b/recovery/Readme.md new file mode 100644 index 0000000..b3d7980 --- /dev/null +++ b/recovery/Readme.md @@ -0,0 +1,24 @@ +TODO + +poate ar trb scris cum trb date cheile, in hex, in decimal? si check daca e posibil + + +when starting the hbase instance, you must mount the path where you want to export the sequencefiles otherwise you will lose the data from the hbase because you can t add the volume later (use docker commit to save the day in case you missed this note :D) + +idealy, should mount the export_from_hbase.sh script too in the hbase container + +TODO: when creating tables for hbase, make them have just one region and increase the file size: +hbase shell +hbase> create 'blocks_copy', {NAME => 'x'}, CONFIGURATION => {'hbase.hregion.max.filesize' => '1099511627776', 'hbase.hregion.split.overallfiles' => 'true'} + +check if there are multiple regions after populating the table + +steps: +- create /data/recovery folder +- create a sequencefile folder in solana-syncer/recovery/tencent-upload and mount it to the hbase docker container (this is where the files will be exported) +- start the docker HBase container and set the HBASE_HOST in ./hbase-import/Dockerfile with docker's ip +- start the ./start.sh script + + +-- make a /data/recovery folder +-- make a /data/hbase folder and make ubuntu owner so that hbase can write in it \ No newline at end of file diff --git a/recovery/hbase/export/export_sequencefiles.sh b/recovery/hbase/export/export_sequencefiles.sh new file mode 100644 index 0000000..fb77e00 --- /dev/null +++ b/recovery/hbase/export/export_sequencefiles.sh @@ -0,0 +1,157 @@ +#!/bin/bash + +# Check if required parameters are provided +if [ "$#" -ne 4 ]; then + echo "Usage: $0 " + exit 1 +fi + +TABLE_NAME=$1 +START_KEY=$2 +END_KEY=$3 +OUTPUT_PATH=$4 +ROWS_PER_EXPORT=1000 + +if ! [[ $START_KEY =~ ^[0-9]+$ ]]; then + echo "Error: START_KEY must be a decimal number." + exit 1 +fi + +if ! [[ $END_KEY =~ ^[0-9]+$ ]]; then + echo "Error: END_KEY must be a decimal number." + exit 1 +fi + +echo "All inputs are valid." + +check_multiple_regions() { + # Table name passed as an argument + local TABLE_NAME=$1 + + if [[ -z "$TABLE_NAME" ]]; then + echo "Error: Table name is required." + return 1 + fi + + echo "Checking regions for table: $TABLE_NAME" + + # Fetch the region count directly from the "x rows" line + echo "Fetching region count for table: $TABLE_NAME" + REGION_OUTPUT=$(echo "list_regions '$TABLE_NAME'" | hbase shell) + + # Extract the number of rows (regions) + NUM_REGIONS=$(echo "$REGION_OUTPUT" | grep -oP '\d+(?= rows)' | head -n 1) + + if [[ -z "$NUM_REGIONS" ]]; then + echo "Error: Could not determine the number of regions for table $TABLE_NAME." + return 1 + fi + + # Debug: Print the number of regions detected + echo "Number of regions detected: $NUM_REGIONS" + + if [[ $NUM_REGIONS -eq 1 ]]; then + echo "Table $TABLE_NAME has exactly one region. Exporting..." + return 0 + else + echo "Error: Table $TABLE_NAME does not have exactly one region ($NUM_REGIONS detected)." + return 1 + fi +} + +export_sequencefile () { + # Table name passed as an argument + local TABLE_NAME=$1 + local START_KEY=$2 + local END_KEY=$3 + local OUTPUT_PATH=$4 + + # Calculate the nearest multiple of 1000 greater than START_KEY + NEAREST_MULTIPLE=$(( (START_KEY + ROWS_PER_EXPORT - 1) / ROWS_PER_EXPORT * ROWS_PER_EXPORT )) + + # Export skipped range if START_KEY is not already a multiple of 1000 + if [ "$START_KEY" -lt "$NEAREST_MULTIPLE" ]; then + SKIPPED_RANGE_DIR="${OUTPUT_PATH}/${TABLE_NAME}/range_${START_KEY}_${NEAREST_MULTIPLE}" + echo "Exporting skipped range from $START_KEY to $NEAREST_MULTIPLE" + hbase org.apache.hadoop.hbase.mapreduce.Export \ + -D hbase.mapreduce.scan.row.start=$(printf "%016X" "$START_KEY" | tr 'A-F' 'a-f') \ + -D hbase.mapreduce.scan.row.stop=$(printf "%016X" "$NEAREST_MULTIPLE" | tr 'A-F' 'a-f') \ + -D mapreduce.input.fileinputformat.split.minsize=536870912000 \ + "$TABLE_NAME" "$SKIPPED_RANGE_DIR" + + # Perform file checks and renaming for the skipped range + PART_FILES=("$SKIPPED_RANGE_DIR"/part-m-0000*) + if [ ${#PART_FILES[@]} -gt 1 ]; then + echo "Error: Multiple part-m-0000x files found in $SKIPPED_RANGE_DIR. Stopping script." + exit 1 + fi + + if [[ -f "$SKIPPED_RANGE_DIR/part-m-00000" && -f "$SKIPPED_RANGE_DIR/.part-m-00000.crc" && -f "$SKIPPED_RANGE_DIR/_SUCCESS" && -f "$SKIPPED_RANGE_DIR/._SUCCESS.crc" ]]; then + mv "$SKIPPED_RANGE_DIR/part-m-00000" "$SKIPPED_RANGE_DIR/$TABLE_NAME.seq" + rm "$SKIPPED_RANGE_DIR/.part-m-00000.crc" "$SKIPPED_RANGE_DIR/_SUCCESS" "$SKIPPED_RANGE_DIR/._SUCCESS.crc" + else + echo "Error: Required files not found in $SKIPPED_RANGE_DIR. Stopping script." + exit 1 + fi + + # Update START_KEY to the nearest multiple + START_KEY=$NEAREST_MULTIPLE + fi + + # Loop until START_KEY reaches END_KEY + CURRENT_START=$START_KEY + while [ "$CURRENT_START" -lt "$END_KEY" ]; do + # Calculate the next stop key in decimal + CURRENT_STOP=$((CURRENT_START + ROWS_PER_EXPORT)) + if [ "$CURRENT_STOP" -gt "$END_KEY" ]; then + CURRENT_STOP=$END_KEY + fi + + # Convert start and stop keys to hex and ensure they are lowercase + CURRENT_START_HEX=$(printf "%016X" "$CURRENT_START" | tr 'A-F' 'a-f') + CURRENT_STOP_HEX=$(printf "%016X" "$CURRENT_STOP" | tr 'A-F' 'a-f') + + # Define output directory for this export + EXPORT_DIR="${OUTPUT_PATH}/${TABLE_NAME}/range_${CURRENT_START_HEX}_${CURRENT_STOP_HEX}" + + # Export range using HBase Export tool + echo "Exporting rows from $CURRENT_START_HEX to $CURRENT_STOP_HEX" + hbase org.apache.hadoop.hbase.mapreduce.Export \ + -D hbase.mapreduce.scan.row.start="$CURRENT_START_HEX" \ + -D hbase.mapreduce.scan.row.stop="$CURRENT_STOP_HEX" \ + -D mapreduce.input.fileinputformat.split.minsize=536870912000 \ + "$TABLE_NAME" "$EXPORT_DIR" + + # Perform file checks and renaming + PART_FILES=("$EXPORT_DIR"/part-m-0000*) + if [ ${#PART_FILES[@]} -gt 1 ]; then + echo "Error: Multiple part-m-0000x files found in $EXPORT_DIR. Stopping script." + exit 1 + fi + + if [[ -f "$EXPORT_DIR/part-m-00000" && -f "$EXPORT_DIR/.part-m-00000.crc" && -f "$EXPORT_DIR/_SUCCESS" && -f "$EXPORT_DIR/._SUCCESS.crc" ]]; then + mv "$EXPORT_DIR/part-m-00000" "$EXPORT_DIR/$TABLE_NAME.seq" + rm "$EXPORT_DIR/.part-m-00000.crc" "$EXPORT_DIR/_SUCCESS" "$EXPORT_DIR/._SUCCESS.crc" + else + echo "Error: Required files not found in $EXPORT_DIR. Stopping script." + exit 1 + fi + + # Update CURRENT_START for the next range + CURRENT_START=$CURRENT_STOP + + done +} + + +# Check if the table has multiple regions +check_multiple_regions $TABLE_NAME + +if [ $? -ne 0 ]; then + echo "Error: Table $TABLE_NAME has multiple regions. Exporting is not supported." + exit 1 +fi + +# Export sequence files for the specified range +export_sequencefile $TABLE_NAME $START_KEY $END_KEY $OUTPUT_PATH +echo "Export completed for all ranges from $START_KEY to $END_KEY in table $TABLE_NAME." \ No newline at end of file diff --git a/recovery/hbase/export/main.sh b/recovery/hbase/export/main.sh new file mode 100755 index 0000000..4436c61 --- /dev/null +++ b/recovery/hbase/export/main.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +if [ "$#" -ne 4 ]; then + echo "Usage: $0 " + exit 1 +fi + +HBASE_CONTAINER_ID=$1 +START_BLOCK=$2 +END_BLOCK=$3 +OUTPUT_PATH=$4 + +# Validate START_BLOCK and END_BLOCK +if ! [[ $START_BLOCK =~ ^[0-9]+$ ]]; then + echo "Error: START_BLOCK must be a decimal number." + exit 1 +fi + +if ! [[ $END_BLOCK =~ ^[0-9]+$ ]]; then + echo "Error: END_BLOCK must be a decimal number." + exit 1 +fi + +echo "Triggering export script inside HBase container ID: $HBASE_CONTAINER_ID" + +# Execute the export script for 'blocks' table +echo "Exporting data for table 'blocks'..." +docker exec "$HBASE_CONTAINER_ID" /bin/bash /export_sequencefiles.sh "blocks" "$START_BLOCK" "$END_BLOCK" "$OUTPUT_PATH" + +if [ $? -ne 0 ]; then + echo "Error: Export failed for table 'blocks'." + exit 1 +fi + +# Execute the export script for 'entries' table +echo "Exporting data for table 'entries'..." +docker exec "$HBASE_CONTAINER_ID" /bin/bash /export_sequencefiles.sh "entries" "$START_BLOCK" "$END_BLOCK" "$OUTPUT_PATH" + +if [ $? -ne 0 ]; then + echo "Error: Export failed for table 'entries'." + exit 1 +fi + +echo "Data export completed successfully for both tables." diff --git a/recovery/hbase/import/Dockerfile b/recovery/hbase/import/Dockerfile new file mode 100644 index 0000000..76e9684 --- /dev/null +++ b/recovery/hbase/import/Dockerfile @@ -0,0 +1,65 @@ +# Stage 1: Build the Rust binaries +FROM rust:latest AS builder + +# Install dependencies needed for building +RUN apt-get update && apt-get install -y \ + cmake \ + protobuf-compiler \ + clang \ + pkg-config \ + libssl-dev \ + libudev-dev \ + build-essential \ + git + +# Clone and build agave/ledger-tool +WORKDIR /usr/src/agave +RUN git clone https://github.com/anza-xyz/agave.git . && \ + cd ledger-tool && \ + cargo build --release + +# Clone and build solana-bigtable-hbase-adapter +WORKDIR /usr/src/solana-bigtable-hbase-adapter +RUN git clone https://github.com/bwarelabs/solana-bigtable-hbase-adapter.git . && \ + cargo build --release + +# Stage 2: Prepare runtime image with Ubuntu +FROM ubuntu:22.04 + +# Install runtime dependencies +RUN apt-get update && apt-get install -y \ + apt-transport-https \ + ca-certificates \ + gnupg \ + curl \ + protobuf-compiler \ + libssl-dev \ + netcat \ + wget \ + pv \ + libzstd-dev \ + zstd \ + && rm -rf /var/lib/apt/lists/* + +# Add Google Cloud SDK for gsutil +RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ + | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg && \ + apt-get update -y && \ + apt-get install google-cloud-cli -y + +# Copy the built binaries from the builder stage +COPY --from=builder /usr/src/agave/target/release/agave-ledger-tool /usr/recovery/agave-ledger-tool +COPY --from=builder /usr/src/solana-bigtable-hbase-adapter/target/release/server /usr/recovery/solana-bigtable-hbase-adapter + +# Copy scripts and make them executable +WORKDIR /usr/recovery +COPY download_missing_blocks.sh /usr/recovery/download_missing_blocks.sh +COPY entrypoint.sh /usr/recovery/entrypoint.sh +RUN chmod +x /usr/recovery/download_missing_blocks.sh /usr/recovery/entrypoint.sh + +ENV BIGTABLE_EMULATOR_HOST="localhost:50051" +# ENV HBASE_HOST="localhost:9090" + +# Define entrypoint +ENTRYPOINT ["/usr/recovery/entrypoint.sh"] diff --git a/recovery/hbase/import/download_missing_blocks.sh b/recovery/hbase/import/download_missing_blocks.sh new file mode 100644 index 0000000..9704006 --- /dev/null +++ b/recovery/hbase/import/download_missing_blocks.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +# Input: Start and end block numbers +START_BLOCK=$1 +END_BLOCK=$2 + +# Verify input +if [[ -z "$START_BLOCK" || -z "$END_BLOCK" ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Define the GCS bucket locations +BUCKETS=( + "gs://mainnet-beta-ledger-us-ny5" + "gs://mainnet-beta-ledger-europe-fr2" + "gs://mainnet-beta-ledger-asia-sg1" +) + +# Function to list, filter, and sort folders by numeric structure +list_and_sort_buckets() { + local bucket=$1 + gsutil ls "$bucket" | grep -E "^${bucket}/[0-9]+/$" | sed "s|${bucket}/||" | sed 's|/||' | sort -n +} + +# Function to find the closest bucket for a given block +find_closest_bucket() { + local bucket_list=("$@") + local target_block=$1 + local closest_bucket="" + + for slot in "${bucket_list[@]:1}"; do + if (( slot < target_block )); then + closest_bucket=$slot + else + break + fi + done + + echo "$closest_bucket" +} + +# Main logic to find required buckets and keep track of the specific bucket +declare -A required_buckets # Associative array to store slot:bucket pairs +found_required_buckets=false + +for bucket in "${BUCKETS[@]}"; do + sorted_buckets=($(list_and_sort_buckets "$bucket")) + + closest_start=$(find_closest_bucket "$START_BLOCK" "${sorted_buckets[@]}") + closest_end=$(find_closest_bucket "$END_BLOCK" "${sorted_buckets[@]}") + + if [[ -n "$closest_start" && -n "$closest_end" ]]; then + if (( closest_start == closest_end )); then + required_buckets["$closest_start"]=$bucket + else + for slot in "${sorted_buckets[@]}"; do + if (( slot >= closest_start && slot <= closest_end )); then + required_buckets["$slot"]=$bucket + fi + done + fi + found_required_buckets=true + fi + + if [[ "$found_required_buckets" == true ]]; then + break + fi +done + +echo "Required buckets for slots $START_BLOCK to $END_BLOCK:" +for slot in "${!required_buckets[@]}"; do + echo "Slot $slot: ${required_buckets[$slot]}/$slot" +done + +# Function to download and extract the required archive with progress +download_and_extract_archive() { + local bucket=$1 + local slot=$2 + local download_dir="/data/recovery/rocksdb/$slot" + mkdir -p "$download_dir" + + local url_base="https://storage.googleapis.com/${bucket#gs://}/$slot" + local zst_file="$download_dir/rocksdb.tar.zst" + local bz2_file="$download_dir/rocksdb.tar.bz2" + + Check if file already exists + if [[ -f "$zst_file" || -f "$bz2_file" || -d "$download_dir" ]]; then + echo "File or directory for slot $slot already exists, skipping download." + return 0 + fi + + echo "Downloading and extracting archive for slot $slot from $bucket" + # Try downloading zst first with progress indicator + if wget --show-progress "$url_base/rocksdb.tar.zst" -P "$download_dir"; then + echo "Downloaded rocksdb.tar.zst from $bucket for slot $slot" + echo "Extracting rocksdb.tar.zst..." + pv -f -s $(du -sb "$zst_file" | awk '{print $1}') "$zst_file" | tar --use-compress-program=unzstd -xf - -C "$download_dir" + elif wget --show-progress "$url_base/rocksdb.tar.bz2" -P "$download_dir"; then + echo "Downloaded rocksdb.tar.bz2 from $bucket for slot $slot" + echo "Extracting rocksdb.tar.bz2..." + pv -f -s $(du -sb "$bz2_file" | awk '{print $1}') "$bz2_file" | tar -I lbzip2 -xf - -C "$download_dir" + else + echo "Failed to download rocksdb archive from $bucket for slot $slot" + return 1 + fi + + if curl -# -o "$download_dir/version.txt" "$url_base/version.txt"; then + echo "Downloaded version.txt for slot $slot" + cat "$download_dir/version.txt" + else + echo "Failed to download version.txt for slot $slot" + fi + + if [[ -d "$download_dir/rocksdb" ]]; then + echo "Successfully extracted ledger data for slot $slot" + else + echo "Extraction failed for slot $slot" + return 1 + fi +} + +# Download and extract archives for each required slot from the specific bucket +for slot in "${!required_buckets[@]}"; do + bucket="${required_buckets[$slot]}" + download_and_extract_archive "$bucket" "$slot" +done diff --git a/recovery/hbase/import/entrypoint.sh b/recovery/hbase/import/entrypoint.sh new file mode 100644 index 0000000..38d858a --- /dev/null +++ b/recovery/hbase/import/entrypoint.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# Input parameters for agave-ledger-tool +START_BLOCK=$1 +END_BLOCK=$2 + +# Check if START_BLOCK and END_BLOCK are provided +if [[ -z "$START_BLOCK" || -z "$END_BLOCK" ]]; then + echo "Usage: " + exit 1 +fi + +# Run download_missing_blocks.sh and wait for it to complete +echo "Starting download_missing_blocks.sh..." +/usr/recovery/download_missing_blocks.sh "$START_BLOCK" "$END_BLOCK" + +# Check if download_missing_blocks.sh completed successfully +if [[ $? -ne 0 ]]; then + echo "download_missing_blocks.sh failed. Exiting..." + exit 1 +fi +echo "download_missing_blocks.sh completed successfully." + +# Run solana-bigtable-hbase-adapter +echo "Running solana-bigtable-hbase-adapter..." +/usr/recovery/solana-bigtable-hbase-adapter > ./solana-bigtable-hbase-adapter.log 2>&1 & +ADAPTER_PID=$! + +# Wait until solana-bigtable-hbase-adapter is ready +until nc -z localhost 50051; do + sleep 1 +done +echo "solana-bigtable-hbase-adapter started." + +# Iterate over each RocksDB folder and run agave-ledger-tool sequentially +for slot in /data/recovery/rocksdb/*; do + if [[ -d "$slot" ]]; then + echo "Processing RocksDB folder: $slot" + /usr/recovery/agave-ledger-tool bigtable upload "$START_BLOCK" "$END_BLOCK" -l "$slot" + + # Check if agave-ledger-tool succeeded + if [[ $? -ne 0 ]]; then + echo "agave-ledger-tool failed for $slot. Stopping solana-bigtable-hbase-adapter..." + kill $ADAPTER_PID + exit 1 + fi + else + echo "$slot is not a directory. Skipping..." + fi +done + +echo "All RocksDB folders processed successfully." + +# Stop the solana-bigtable-hbase-adapter gracefully +echo "Stopping solana-bigtable-hbase-adapter..." +kill $ADAPTER_PID +wait $ADAPTER_PID + +echo "solana-bigtable-hbase-adapter stopped. Exiting..." diff --git a/recovery/hbase/import/main.sh b/recovery/hbase/import/main.sh new file mode 100755 index 0000000..ddbd08d --- /dev/null +++ b/recovery/hbase/import/main.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +if [ "$#" -ne 4 ]; then + echo "Usage: $0 " + exit 1 +fi + +HBASE_IP=$1 +START_BLOCK=$2 +END_BLOCK=$3 +MOUNT_PATH=$4 + +IMAGE_NAME="solana-import-data-image" +CONTAINER_NAME="solana-import-data-container" + +echo "Building the Docker image: $IMAGE_NAME" +docker build -t "$IMAGE_NAME" ./hbase/import + +if [ $? -ne 0 ]; then + echo "Failed to build the Docker image. Please check the Dockerfile and try again." + exit 1 +fi + +# Check if the container is already running and stop it if necessary +if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then + echo "Stopping existing container..." + docker stop $CONTAINER_NAME +fi + +# Remove the existing container if it exists +if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then + echo "Removing existing container..." + docker rm $CONTAINER_NAME +fi + +echo "Running the download and import container: $IMAGE_NAME in detached mode" +docker run -d \ + --name "$CONTAINER_NAME" \ + -v "$MOUNT_PATH:$MOUNT_PATH" \ + -e HBASE_HOST="$HBASE_IP:9090" \ + -p 50051:50051 \ + "$IMAGE_NAME" "$START_BLOCK" "$END_BLOCK" + +# Wait for the container to finish +echo "Waiting for the container $CONTAINER_NAME to exit..." +docker wait "$CONTAINER_NAME" + +# Check the exit status of the container +EXIT_CODE=$(docker inspect "$CONTAINER_NAME" --format='{{.State.ExitCode}}') + +if [ "$EXIT_CODE" -ne 0 ]; then + echo "The download and import container exited with errors (exit code: $EXIT_CODE)." +else + echo "The download and import container exited successfully." +fi diff --git a/recovery/hbase/setup/Dockerfile b/recovery/hbase/setup/Dockerfile new file mode 100644 index 0000000..4a24148 --- /dev/null +++ b/recovery/hbase/setup/Dockerfile @@ -0,0 +1,87 @@ +FROM ubuntu:22.04 + +# Install OS packages +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get install -y wget sudo locales xmlstarlet + +# OpenJDK setup +RUN apt-get install -y openjdk-17-jdk openjdk-17-jre +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 + +RUN locale-gen en_US.UTF-8 +ENV LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8 + +RUN useradd -G sudo -U -m -s /bin/bash ubuntu \ + && echo "ubuntu ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +WORKDIR /home/ubuntu + +USER ubuntu + +# HBase setup +ARG HBASE_VERSION=2.5.10 +RUN wget https://dlcdn.apache.org/hbase/${HBASE_VERSION}/hbase-${HBASE_VERSION}-bin.tar.gz \ + && tar -xzvf hbase-${HBASE_VERSION}-bin.tar.gz \ + && rm hbase-${HBASE_VERSION}-bin.tar.gz +ENV PATH=/home/ubuntu/hbase-${HBASE_VERSION}/bin:$PATH + +# Configure Zookeeper to listen on all interfaces +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.zookeeper.property.clientPortAddress" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "0.0.0.0" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.server.keyvalue.maxsize" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "268435456" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.client.keyvalue.maxsize" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "268435456" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.hregion.max.filesize" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "1610612736" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.hregion.memstore.block.multiplier" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "4" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.hregion.memstore.flush.size" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "134217728" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.zookeeper.property.maxClientCnxns" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "300" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "zookeeper.session.timeout" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "1200" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.client.operation.timeout" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "1200" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +RUN xmlstarlet ed -L \ + -s "/configuration" -t elem -n "property" -v "" \ + -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.hstore.blockingStoreFiles" \ + -s "/configuration/property[last()]" -t elem -n "value" -v "200" \ + /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml +# RUN xmlstarlet ed -L \ +# -s "/configuration" -t elem -n "property" -v "" \ +# -s "/configuration/property[last()]" -t elem -n "name" -v "hbase.rootdir" \ +# -s "/configuration/property[last()]" -t elem -n "value" -v "file:///hbase-data" \ +# /home/ubuntu/hbase-${HBASE_VERSION}/conf/hbase-site.xml + +COPY init.sh /home/ubuntu/init.sh +CMD ["/bin/bash", "init.sh"] \ No newline at end of file diff --git a/recovery/hbase/setup/init.sh b/recovery/hbase/setup/init.sh new file mode 100755 index 0000000..a28c0ac --- /dev/null +++ b/recovery/hbase/setup/init.sh @@ -0,0 +1,58 @@ +echo 'Starting HBase...' +start-hbase.sh & +hbase thrift start -p 9090 & + +echo 'Waiting for HBase to be up...' +while ! echo 'status' | hbase shell &>/dev/null; do sleep 5; done + +sleep 10 + +echo 'Creating tables...' +# if [ ! -f table.blocks ]; then +# echo "create 'blocks', 'x'" | hbase shell +# touch table.blocks +# fi + +# if [ ! -f table.entries ]; then +# echo "create 'entries', 'x'" | hbase shell +# touch table.entries +# fi + +function create_table_with_disabled_split_policy() { + TABLE_NAME=$1 + if ! echo "list" | hbase shell | grep -q "$TABLE_NAME"; then + echo "create '$TABLE_NAME', {NAME => 'x'}, {SPLIT_POLICY => 'org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy'}" | hbase shell + touch "table.$TABLE_NAME" + echo "Table '$TABLE_NAME' created." + else + echo "Table '$TABLE_NAME' already exists." + fi +} + +# Create tables with disabled region split policy +echo 'Creating tables with disabled region split policy...' +create_table_with_disabled_split_policy "blocks" +create_table_with_disabled_split_policy "entries" + +echo 'Create tables with normal region split policy...' +if [ ! -f table.tx ]; then + echo "create 'tx', 'x'" | hbase shell + touch table.tx +fi + +if [ ! -f table.tx-by-addr ]; then + echo "create 'tx-by-addr', 'x'" | hbase shell + touch table.tx-by-addr +fi + +if [ ! -f table.tx_full ]; then + echo "create 'tx_full', 'x'" | hbase shell + touch table.tx_full +fi + + + +echo 'Tables created successfully...' + +touch /tmp/hbase_ready +wait \ No newline at end of file diff --git a/recovery/hbase/setup/main.sh b/recovery/hbase/setup/main.sh new file mode 100755 index 0000000..98b1b81 --- /dev/null +++ b/recovery/hbase/setup/main.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +IMAGE_NAME="hbase-image" +CONTAINER_NAME="hbase-container" + +echo "Building HBase Docker image..." +docker build -t "$IMAGE_NAME" ./hbase/setup + +if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then + echo "There is already a running HBase container. This is a safety measure to prevent data loss." + echo "If you are sure you want rebuild and recreate a new HBase container, please remove this check." + echo "The check is located in:" + pwd + echi "Exiting..." + exit 1 +fi + +if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then + echo "Stopping running HBase container..." + docker stop "$CONTAINER_NAME" +fi + +if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then + echo "Removing existing HBase container..." + docker rm "$CONTAINER_NAME" +fi + +echo "HBase Docker image built successfully." +echo "Starting HBase container..." +mkdir -p ./sequencefiles +# mkdir -p /data/hbase +# -v /data/hbase:/hbase-data \ + +HBASE_CONTAINER_ID=$( + docker run -d \ + --name "$CONTAINER_NAME" \ + -v "./hbase/export/export_sequencefiles.sh:/export_sequencefiles.sh" \ + -v "./sequencefiles:/sequencefiles" \ + -v "/tmp:/tmp" \ + -p 16010:16010 \ + -p 16020:16020 \ + -p 16030:16030 \ + -p 9090:9090 \ + "$IMAGE_NAME" +) + +echo "Waiting for HBase readiness..." +while [ ! -f /tmp/hbase_ready ]; do sleep 5; done + +echo "HBase is up and running." +rm /tmp/hbase_ready + +# Output the container ID for use by other scripts +echo "HBase container ID: $HBASE_CONTAINER_ID" +echo "$HBASE_CONTAINER_ID" > ./hbase/setup/hbase_container_id.txt \ No newline at end of file diff --git a/recovery/integrity_check/Readme.md b/recovery/integrity_check/Readme.md new file mode 100644 index 0000000..d187b6a --- /dev/null +++ b/recovery/integrity_check/Readme.md @@ -0,0 +1,2 @@ +coscli ls > live_sync_blocks and entries e.g.: +coscli ls cos://solana-prod/live_sync/entries/ -r > live_sync_entries.tx \ No newline at end of file diff --git a/recovery/integrity_check/live_sync_blocks.txt b/recovery/integrity_check/live_sync_blocks.txt new file mode 100644 index 0000000..bb0fd71 --- /dev/null +++ b/recovery/integrity_check/live_sync_blocks.txt @@ -0,0 +1 @@ +live_sync/blocks/range_00000000110b5f70_00000000110b6358/blocks.seq | STANDARD | 2024-08-26T14:43:24.000Z | 229.56 MB diff --git a/recovery/integrity_check/live_sync_entries.txt b/recovery/integrity_check/live_sync_entries.txt new file mode 100644 index 0000000..7c1f685 --- /dev/null +++ b/recovery/integrity_check/live_sync_entries.txt @@ -0,0 +1 @@ +live_sync/entries/range_00000000110b5f70_00000000110b6358/entries.seq | STANDARD | 2024-08-26T14:43:24.000Z | 15.89 MB diff --git a/recovery/integrity_check/live_sync_integrity_check.py b/recovery/integrity_check/live_sync_integrity_check.py new file mode 100644 index 0000000..73ac91a --- /dev/null +++ b/recovery/integrity_check/live_sync_integrity_check.py @@ -0,0 +1,59 @@ +def hex_to_decimal(hex_str): + """Convert hex string to decimal.""" + return int(hex_str, 16) + +def decimal_to_formatted_hex(decimal, length=16): + """Convert decimal number to a formatted hex string with leading zeros.""" + return f"{decimal:0{length}X}".lower() + +def process_file(file_path): + """Read file, process each line, and check for consecutive hex differences.""" + with open(file_path, 'r') as file: + previous_hex = None + missing_ranges = [] # List to store all missing ranges + + for line in file: + # Split the line by '|' and extract the first part (which contains the hex numbers) + parts = line.split('|') + if len(parts) < 4: + continue # Skip any malformed lines + + # Extract the first hex number from the range in the first part of the split line + try: + hex_range = parts[0].strip().split('_')[2] # Get the correct hex number + except IndexError: + print(f"Could not extract hex number from line: {line.strip()}") + continue # Skip if the format is incorrect + + # Convert the hex number to decimal + try: + current_decimal = hex_to_decimal(hex_range) + except ValueError: + print(f"Invalid hex number found in line: {line.strip()}") + continue # Skip lines with invalid hex numbers + + # If there is a previous number, compare the difference + if previous_hex is not None: + difference = current_decimal - previous_hex + if difference > 1000: + # starting from previous_hex to current_decimal, there are missing slots, + # print each 1000 incremental rage until current_decimal + for i in range(previous_hex, current_decimal + 1000, 1000): + missing_ranges.append(f"{decimal_to_formatted_hex(i)}_{decimal_to_formatted_hex(i + 1000)}") + missing_ranges.append("---------------------") + + # Store the current decimal for the next comparison + previous_hex = current_decimal + + # Print all missing ranges + if missing_ranges: + print("Missing ranges:") + for range_str in missing_ranges: + print(range_str) + else: + print("No missing slots found.") + +# Example usage +file_path = 'live_sync_entries.txt' # Replace with your actual file path +process_file(file_path) + diff --git a/recovery/start_recovery.sh b/recovery/start_recovery.sh new file mode 100755 index 0000000..304ea0f --- /dev/null +++ b/recovery/start_recovery.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +if ! command -v docker &> /dev/null; then + echo "Docker is not installed. Please install Docker and try again." + exit 1 +fi + +function display_help { + echo "Usage: $0 " + echo + echo "Arguments:" + echo " START_BLOCK The starting block number (must be a decimal number)." + echo " END_BLOCK The ending block number (must be a decimal number)." + echo " MOUNT_PATH The path where the snapshot archive will be downloaded (must exist)." + echo " BUCKET_ALIAS The bucket alias defined in the .cos.yaml configuration file." + echo " BUCKET_PATH The path in the bucket to upload the files. Example: /test_recovery" + echo + echo "Options:" + echo " --help Display this help message and exit." + exit 0 +} + +if [ "$1" == "--help" ]; then + display_help +fi + +if [ "$#" -ne 5 ]; then + echo "Error: Invalid number of arguments." + echo + display_help + exit 1 +fi + +START_BLOCK=$1 +END_BLOCK=$2 +MOUNT_PATH=$3 +BUCKET_ALIAS=$4 +BUCKET_PATH=$5 + +if ! [[ $START_BLOCK =~ ^[0-9]+$ ]]; then + echo "Error: START_BLOCK must be a decimal number." + exit 1 +fi + +if ! [[ $END_BLOCK =~ ^[0-9]+$ ]]; then + echo "Error: END_BLOCK must be a decimal number." + exit 1 +fi + +if [ ! -d "$MOUNT_PATH" ]; then + echo "Error: MOUNT_PATH must be a valid directory." + exit 1 +fi + +if [ -z "$BUCKET_ALIAS" ]; then + echo "Error: BUCKET_ALIAS must be a non-empty string." + exit 1 +fi + +echo "Starting HBase setup..." +./hbase/setup/main.sh + +# Retrieve the HBase container ID +if [ ! -f "./hbase/setup/hbase_container_id.txt" ]; then + echo "Error: HBase container ID file not found. Ensure HBase is running." + exit 1 +fi + +HBASE_CONTAINER_ID=$(cat ./hbase/setup/hbase_container_id.txt) +if [ -z "$HBASE_CONTAINER_ID" ]; then + echo "Error: HBase container ID is empty. Ensure HBase is running." + exit 1 +fi + +# Retrieve the HBase container's IP address +HBASE_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$HBASE_CONTAINER_ID") +if [ -z "$HBASE_IP" ]; then + echo "Error: Could not retrieve HBase container IP address." + exit 1 +fi + +echo "HBase is running with IP: $HBASE_IP" + +echo "Calling HBase import script with container ID..." +./hbase/import/main.sh "$HBASE_IP" "$START_BLOCK" "$END_BLOCK" "$MOUNT_PATH" + +if [ $? -ne 0 ]; then + echo "Error: HBase import script failed." + exit 1 +fi + +echo "Exporting data from HBase..." +./hbase/export/main.sh "$HBASE_CONTAINER_ID" "$START_BLOCK" "$END_BLOCK" "/sequencefiles" + +if [ $? -eq 0 ]; then + echo "Export process completed successfully." +else + echo "Export process failed." + exit 1 +fi + +echo "Uploading data to COS..." +./tencent-upload/main.sh "./sequencefiles" "./tencent-upload/.cos.yaml" "$BUCKET_ALIAS" "$BUCKET_PATH" + +if [ $? -eq 0 ]; then + echo "Upload process completed successfully." +else + echo "Upload process failed." + exit 1 +fi + +echo "Recovery process completed successfully." diff --git a/recovery/tencent-upload/.cos.example.yaml b/recovery/tencent-upload/.cos.example.yaml new file mode 100644 index 0000000..804070a --- /dev/null +++ b/recovery/tencent-upload/.cos.example.yaml @@ -0,0 +1,15 @@ +cos: + base: + secretid: secretid + secretkey: secretkey + sessiontoken: sessiontoken + protocol: https + mode: "" + cvmrolename: "" + closeautoswitchhost: "" + buckets: + - name: name + alias: alias + region: "" + endpoint: endpoint + ofs: false \ No newline at end of file diff --git a/recovery/tencent-upload/Dockerfile b/recovery/tencent-upload/Dockerfile new file mode 100644 index 0000000..17b0b73 --- /dev/null +++ b/recovery/tencent-upload/Dockerfile @@ -0,0 +1,30 @@ +# Use a lightweight Linux base image +FROM ubuntu:22.04 + +# Install dependencies +RUN apt-get update && apt-get install -y \ + wget \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Download and install COSCLI +RUN wget -O /usr/local/bin/coscli https://cosbrowser.cloud.tencent.com/software/coscli/coscli-linux-amd64 \ + && chmod +x /usr/local/bin/coscli + +# Verify installation +RUN coscli --version + +# Set the working directory +WORKDIR /app + +# Copy the .cos.yaml configuration file into the container (placeholder if testing) +# COPY .cos.yaml /root/.cos.yaml + +# Copy the script that uploads files to COS +COPY upload_to_cos.sh /app/upload_to_cos.sh +RUN chmod +x /app/upload_to_cos.sh + +# Set the entrypoint to the upload script +ENTRYPOINT ["/app/upload_to_cos.sh"] +# CMD ["sleep", "infinity"] + diff --git a/recovery/tencent-upload/main.sh b/recovery/tencent-upload/main.sh new file mode 100755 index 0000000..395cb91 --- /dev/null +++ b/recovery/tencent-upload/main.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Check if required parameters are provided +if [ "$#" -ne 4 ]; then + echo "Usage: $0 " + echo "Example: $0 /path/to/exported_files /path/to/.cos.yaml mybucket /test_recovery" + exit 1 +fi + +# Parameters +SOURCE_DIRECTORY=$(realpath "$1") # Absolute path to directory with exported sequence files +COS_CONFIG_PATH=$(realpath "$2") # Absolute path to the .cos.yaml configuration file +BUCKET_ALIAS=$3 # Bucket alias defined in .cos.yaml +BUCKET_PATH=$4 # Path in the bucket to upload the files + +# Docker image name +IMAGE_NAME="coscli-uploader" +CONTAINER_NAME="coscli-uploader-container" +MOUNT_PATH="/app/sequencefiles" + +# Build the Docker image +echo "Building the Docker image: $IMAGE_NAME" +docker build -t "$IMAGE_NAME" ./tencent-upload + +if [ $? -ne 0 ]; then + echo "Failed to build the Docker image. Please check the Dockerfile and try again." + exit 1 +fi + +# Check if the container is already running and stop it if necessary +if [ "$(docker ps -q -f name=$CONTAINER_NAME)" ]; then + echo "Stopping existing container..." + docker stop $CONTAINER_NAME +fi + +# Remove the existing container if it exists +if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then + echo "Removing existing container..." + docker rm $CONTAINER_NAME +fi + +touch empty_files.log + +# Run the Docker container with mounted volumes for files and config +echo "Running the Docker container to upload files to COS..." + +docker run \ + --name "$CONTAINER_NAME" \ + -v "$SOURCE_DIRECTORY:$MOUNT_PATH" \ + -v "$COS_CONFIG_PATH:/root/.cos.yaml" \ + -v "./empty_files.log:/app/empty_files.log" \ + "$IMAGE_NAME" "$MOUNT_PATH" "$BUCKET_ALIAS" "$BUCKET_PATH" + diff --git a/recovery/tencent-upload/upload_to_cos.sh b/recovery/tencent-upload/upload_to_cos.sh new file mode 100644 index 0000000..d0b6f7f --- /dev/null +++ b/recovery/tencent-upload/upload_to_cos.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Check if required parameters are provided +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Normalize SOURCE_DIRECTORY to remove any trailing slash +SOURCE_DIRECTORY=$(realpath -m "$1") # Local source directory +BUCKET_ALIAS=$2 # Bucket alias for COS +BUCKET_PATH=$3 # Path in the bucket to upload the files + +# Verify the COSCLI configuration +if ! coscli config show > /dev/null 2>&1; then + echo "Error: COSCLI configuration not found or invalid." + exit 1 +fi + +# Log file for skipped empty files +LOG_FILE="/app/empty_files.log" +echo "Logging skipped files to $LOG_FILE" +> "$LOG_FILE" # Clear the log file if it exists + +# Iterate recursively over each file in the source directory and upload to COS +find "$SOURCE_DIRECTORY" -type f -name "*.seq" | while read -r file; do + # Check file size (in bytes) and skip if <= 500 KB (500 * 1024 bytes) + FILE_SIZE=$(stat --printf="%s" "$file") + if [ "$FILE_SIZE" -le $((500 * 1024)) ]; then + echo "Skipping $file (size: ${FILE_SIZE} bytes, <= 500 KB)" + echo "$file" >> "$LOG_FILE" + continue + fi + + # Remove the SOURCE_DIRECTORY prefix to maintain relative path in COS + RELATIVE_PATH="${file#$SOURCE_DIRECTORY/}" + + # Set the full destination path in COS, including the bucket alias and path + DEST_PATH="cos://$BUCKET_ALIAS/$BUCKET_PATH/$RELATIVE_PATH" + + echo "Uploading $file to COS at $DEST_PATH" + + # Upload file to the specified COS path + coscli cp "$file" "$DEST_PATH" + + # Check if upload was successful + if [ $? -ne 0 ]; then + echo "Error: Failed to upload $file to COS." + exit 1 + fi +done + +echo "All valid files uploaded successfully to COS." +echo "Empty files logged to $LOG_FILE."