Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,10 @@ workspace

*.orig
dependency-reduced-pom.xml

.cos.yaml

recovery/sequencefiles/
recovery/empty_files.log
recovery/logs.log
recovery/hbase/setup/hbase_container_id.txt
24 changes: 24 additions & 0 deletions recovery/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
TODO

poate ar trb scris cum trb date cheile, in hex, in decimal? si check daca e posibil


when starting the hbase instance, you must mount the path where you want to export the sequencefiles otherwise you will lose the data from the hbase because you can t add the volume later (use docker commit to save the day in case you missed this note :D)

idealy, should mount the export_from_hbase.sh script too in the hbase container

TODO: when creating tables for hbase, make them have just one region and increase the file size:
hbase shell
hbase> create 'blocks_copy', {NAME => 'x'}, CONFIGURATION => {'hbase.hregion.max.filesize' => '1099511627776', 'hbase.hregion.split.overallfiles' => 'true'}

check if there are multiple regions after populating the table

steps:
- create /data/recovery folder
- create a sequencefile folder in solana-syncer/recovery/tencent-upload and mount it to the hbase docker container (this is where the files will be exported)
- start the docker HBase container and set the HBASE_HOST in ./hbase-import/Dockerfile with docker's ip
- start the ./start.sh script


-- make a /data/recovery folder
-- make a /data/hbase folder and make ubuntu owner so that hbase can write in it
157 changes: 157 additions & 0 deletions recovery/hbase/export/export_sequencefiles.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#!/bin/bash

# Check if required parameters are provided
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <TABLE_NAME> <START_KEY> <END_KEY> <OUTPUT_PATH>"
exit 1
fi

TABLE_NAME=$1
START_KEY=$2
END_KEY=$3
OUTPUT_PATH=$4
ROWS_PER_EXPORT=1000

if ! [[ $START_KEY =~ ^[0-9]+$ ]]; then
echo "Error: START_KEY must be a decimal number."
exit 1
fi

if ! [[ $END_KEY =~ ^[0-9]+$ ]]; then
echo "Error: END_KEY must be a decimal number."
exit 1
fi

echo "All inputs are valid."

check_multiple_regions() {
# Table name passed as an argument
local TABLE_NAME=$1

if [[ -z "$TABLE_NAME" ]]; then
echo "Error: Table name is required."
return 1
fi

echo "Checking regions for table: $TABLE_NAME"

# Fetch the region count directly from the "x rows" line
echo "Fetching region count for table: $TABLE_NAME"
REGION_OUTPUT=$(echo "list_regions '$TABLE_NAME'" | hbase shell)

# Extract the number of rows (regions)
NUM_REGIONS=$(echo "$REGION_OUTPUT" | grep -oP '\d+(?= rows)' | head -n 1)

if [[ -z "$NUM_REGIONS" ]]; then
echo "Error: Could not determine the number of regions for table $TABLE_NAME."
return 1
fi

# Debug: Print the number of regions detected
echo "Number of regions detected: $NUM_REGIONS"

if [[ $NUM_REGIONS -eq 1 ]]; then
echo "Table $TABLE_NAME has exactly one region. Exporting..."
return 0
else
echo "Error: Table $TABLE_NAME does not have exactly one region ($NUM_REGIONS detected)."
return 1
fi
}

export_sequencefile () {
# Table name passed as an argument
local TABLE_NAME=$1
local START_KEY=$2
local END_KEY=$3
local OUTPUT_PATH=$4

# Calculate the nearest multiple of 1000 greater than START_KEY
NEAREST_MULTIPLE=$(( (START_KEY + ROWS_PER_EXPORT - 1) / ROWS_PER_EXPORT * ROWS_PER_EXPORT ))

# Export skipped range if START_KEY is not already a multiple of 1000
if [ "$START_KEY" -lt "$NEAREST_MULTIPLE" ]; then
SKIPPED_RANGE_DIR="${OUTPUT_PATH}/${TABLE_NAME}/range_${START_KEY}_${NEAREST_MULTIPLE}"
echo "Exporting skipped range from $START_KEY to $NEAREST_MULTIPLE"
hbase org.apache.hadoop.hbase.mapreduce.Export \
-D hbase.mapreduce.scan.row.start=$(printf "%016X" "$START_KEY" | tr 'A-F' 'a-f') \
-D hbase.mapreduce.scan.row.stop=$(printf "%016X" "$NEAREST_MULTIPLE" | tr 'A-F' 'a-f') \
-D mapreduce.input.fileinputformat.split.minsize=536870912000 \
"$TABLE_NAME" "$SKIPPED_RANGE_DIR"

# Perform file checks and renaming for the skipped range
PART_FILES=("$SKIPPED_RANGE_DIR"/part-m-0000*)
if [ ${#PART_FILES[@]} -gt 1 ]; then
echo "Error: Multiple part-m-0000x files found in $SKIPPED_RANGE_DIR. Stopping script."
exit 1
fi

if [[ -f "$SKIPPED_RANGE_DIR/part-m-00000" && -f "$SKIPPED_RANGE_DIR/.part-m-00000.crc" && -f "$SKIPPED_RANGE_DIR/_SUCCESS" && -f "$SKIPPED_RANGE_DIR/._SUCCESS.crc" ]]; then
mv "$SKIPPED_RANGE_DIR/part-m-00000" "$SKIPPED_RANGE_DIR/$TABLE_NAME.seq"
rm "$SKIPPED_RANGE_DIR/.part-m-00000.crc" "$SKIPPED_RANGE_DIR/_SUCCESS" "$SKIPPED_RANGE_DIR/._SUCCESS.crc"
else
echo "Error: Required files not found in $SKIPPED_RANGE_DIR. Stopping script."
exit 1
fi

# Update START_KEY to the nearest multiple
START_KEY=$NEAREST_MULTIPLE
fi

# Loop until START_KEY reaches END_KEY
CURRENT_START=$START_KEY
while [ "$CURRENT_START" -lt "$END_KEY" ]; do
# Calculate the next stop key in decimal
CURRENT_STOP=$((CURRENT_START + ROWS_PER_EXPORT))
if [ "$CURRENT_STOP" -gt "$END_KEY" ]; then
CURRENT_STOP=$END_KEY
fi

# Convert start and stop keys to hex and ensure they are lowercase
CURRENT_START_HEX=$(printf "%016X" "$CURRENT_START" | tr 'A-F' 'a-f')
CURRENT_STOP_HEX=$(printf "%016X" "$CURRENT_STOP" | tr 'A-F' 'a-f')

# Define output directory for this export
EXPORT_DIR="${OUTPUT_PATH}/${TABLE_NAME}/range_${CURRENT_START_HEX}_${CURRENT_STOP_HEX}"

# Export range using HBase Export tool
echo "Exporting rows from $CURRENT_START_HEX to $CURRENT_STOP_HEX"
hbase org.apache.hadoop.hbase.mapreduce.Export \
-D hbase.mapreduce.scan.row.start="$CURRENT_START_HEX" \
-D hbase.mapreduce.scan.row.stop="$CURRENT_STOP_HEX" \
-D mapreduce.input.fileinputformat.split.minsize=536870912000 \
"$TABLE_NAME" "$EXPORT_DIR"

# Perform file checks and renaming
PART_FILES=("$EXPORT_DIR"/part-m-0000*)
if [ ${#PART_FILES[@]} -gt 1 ]; then
echo "Error: Multiple part-m-0000x files found in $EXPORT_DIR. Stopping script."
exit 1
fi

if [[ -f "$EXPORT_DIR/part-m-00000" && -f "$EXPORT_DIR/.part-m-00000.crc" && -f "$EXPORT_DIR/_SUCCESS" && -f "$EXPORT_DIR/._SUCCESS.crc" ]]; then
mv "$EXPORT_DIR/part-m-00000" "$EXPORT_DIR/$TABLE_NAME.seq"
rm "$EXPORT_DIR/.part-m-00000.crc" "$EXPORT_DIR/_SUCCESS" "$EXPORT_DIR/._SUCCESS.crc"
else
echo "Error: Required files not found in $EXPORT_DIR. Stopping script."
exit 1
fi

# Update CURRENT_START for the next range
CURRENT_START=$CURRENT_STOP

done
}


# Check if the table has multiple regions
check_multiple_regions $TABLE_NAME

if [ $? -ne 0 ]; then
echo "Error: Table $TABLE_NAME has multiple regions. Exporting is not supported."
exit 1
fi

# Export sequence files for the specified range
export_sequencefile $TABLE_NAME $START_KEY $END_KEY $OUTPUT_PATH
echo "Export completed for all ranges from $START_KEY to $END_KEY in table $TABLE_NAME."
44 changes: 44 additions & 0 deletions recovery/hbase/export/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

if [ "$#" -ne 4 ]; then
echo "Usage: $0 <HBASE_CONTAINER_ID> <START_BLOCK> <END_BLOCK> <OUTPUT_PATH>"
exit 1
fi

HBASE_CONTAINER_ID=$1
START_BLOCK=$2
END_BLOCK=$3
OUTPUT_PATH=$4

# Validate START_BLOCK and END_BLOCK
if ! [[ $START_BLOCK =~ ^[0-9]+$ ]]; then
echo "Error: START_BLOCK must be a decimal number."
exit 1
fi

if ! [[ $END_BLOCK =~ ^[0-9]+$ ]]; then
echo "Error: END_BLOCK must be a decimal number."
exit 1
fi

echo "Triggering export script inside HBase container ID: $HBASE_CONTAINER_ID"

# Execute the export script for 'blocks' table
echo "Exporting data for table 'blocks'..."
docker exec "$HBASE_CONTAINER_ID" /bin/bash /export_sequencefiles.sh "blocks" "$START_BLOCK" "$END_BLOCK" "$OUTPUT_PATH"

if [ $? -ne 0 ]; then
echo "Error: Export failed for table 'blocks'."
exit 1
fi

# Execute the export script for 'entries' table
echo "Exporting data for table 'entries'..."
docker exec "$HBASE_CONTAINER_ID" /bin/bash /export_sequencefiles.sh "entries" "$START_BLOCK" "$END_BLOCK" "$OUTPUT_PATH"

if [ $? -ne 0 ]; then
echo "Error: Export failed for table 'entries'."
exit 1
fi

echo "Data export completed successfully for both tables."
65 changes: 65 additions & 0 deletions recovery/hbase/import/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Stage 1: Build the Rust binaries
FROM rust:latest AS builder

# Install dependencies needed for building
RUN apt-get update && apt-get install -y \
cmake \
protobuf-compiler \
clang \
pkg-config \
libssl-dev \
libudev-dev \
build-essential \
git

# Clone and build agave/ledger-tool
WORKDIR /usr/src/agave
RUN git clone https://github.com/anza-xyz/agave.git . && \
cd ledger-tool && \
cargo build --release

# Clone and build solana-bigtable-hbase-adapter
WORKDIR /usr/src/solana-bigtable-hbase-adapter
RUN git clone https://github.com/bwarelabs/solana-bigtable-hbase-adapter.git . && \
cargo build --release

# Stage 2: Prepare runtime image with Ubuntu
FROM ubuntu:22.04

# Install runtime dependencies
RUN apt-get update && apt-get install -y \
apt-transport-https \
ca-certificates \
gnupg \
curl \
protobuf-compiler \
libssl-dev \
netcat \
wget \
pv \
libzstd-dev \
zstd \
&& rm -rf /var/lib/apt/lists/*

# Add Google Cloud SDK for gsutil
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg && \
apt-get update -y && \
apt-get install google-cloud-cli -y

# Copy the built binaries from the builder stage
COPY --from=builder /usr/src/agave/target/release/agave-ledger-tool /usr/recovery/agave-ledger-tool
COPY --from=builder /usr/src/solana-bigtable-hbase-adapter/target/release/server /usr/recovery/solana-bigtable-hbase-adapter

# Copy scripts and make them executable
WORKDIR /usr/recovery
COPY download_missing_blocks.sh /usr/recovery/download_missing_blocks.sh
COPY entrypoint.sh /usr/recovery/entrypoint.sh
RUN chmod +x /usr/recovery/download_missing_blocks.sh /usr/recovery/entrypoint.sh

ENV BIGTABLE_EMULATOR_HOST="localhost:50051"
# ENV HBASE_HOST="localhost:9090"

# Define entrypoint
ENTRYPOINT ["/usr/recovery/entrypoint.sh"]
Loading