Skip to content

Multi-staged sync & import-db (aka "node versions manager") #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d51d17b
Sketch multi-version scripts.
andreibancioiu Apr 3, 2025
1320df3
Actually, add only Linux support.
andreibancioiu Apr 3, 2025
1b3e7e0
Adjust / finalize build flow.
andreibancioiu Apr 3, 2025
2b60f56
Update readme.
andreibancioiu Apr 3, 2025
c9a2a5b
Minor update - readme, config.
andreibancioiu Jun 30, 2025
ff4caf1
Build wrt. desired Go version.
andreibancioiu Jul 1, 2025
54d9a63
Logs refactoring.
andreibancioiu Jul 1, 2025
0bcc3bb
Config, refactoring etc.
andreibancioiu Jul 2, 2025
3ad3c65
Refactor / rename.
andreibancioiu Jul 2, 2025
fbd50f2
Multiversion -> multi-stage. Lanes etc. (sketch).
andreibancioiu Jul 2, 2025
02897fd
Adjust config. Sketch coroutines logic / lanes.
andreibancioiu Jul 2, 2025
00b3c9c
Continue working on driver etc.
andreibancioiu Jul 3, 2025
da41127
Lanes, node controller, work in progress.
andreibancioiu Jul 3, 2025
1b1b0d3
Handle node's stages, work in progress.
andreibancioiu Jul 3, 2025
89d37da
Lanes, starting phases etc.
andreibancioiu Jul 4, 2025
ff87207
Lanes, stages, work in progress.
andreibancioiu Jul 4, 2025
7b51131
Configure node, work in progress.
andreibancioiu Jul 4, 2025
618c8a5
Refactor download steps.
andreibancioiu Jul 4, 2025
7f9b163
Sketch get_current_epoch().
andreibancioiu Jul 4, 2025
a33d482
Handle prefs file etc.
andreibancioiu Jul 4, 2025
5fcd39d
Remove spica (and older).
andreibancioiu Jul 4, 2025
3266ae5
Fix config etc.
andreibancioiu Jul 4, 2025
b87c2c7
Transition to next stage.
andreibancioiu Jul 4, 2025
81ca8f6
Prefs not needed - node args are sufficient.
andreibancioiu Jul 4, 2025
d05e7cf
Better logging.
andreibancioiu Jul 4, 2025
6af0cf6
Partial fix after review.
andreibancioiu Jul 4, 2025
72a64fc
Fix after review.
andreibancioiu Jul 7, 2025
c77197a
Fix after review.
andreibancioiu Jul 7, 2025
a02f48f
Additional config, more explanations.
andreibancioiu Jul 7, 2025
c8394b9
Handle ~ in args.
andreibancioiu Jul 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
ignore = E501
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
#Don't track files:
.DS_Store
venv/
__pycache__
44 changes: 44 additions & 0 deletions multistage/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# MultiversX multi-version node scripts

These scripts allow one to use multiple versions of the MultiversX node, in sequence, to _sync_ (from the deep past) or run _import-db_ flows.

**Important:** these scripts are only suitable for observers, not for validators. Furthermore, the MultiversX proxy isn't handled.

## Python virtual environment

Create a virtual environment and install the dependencies:

```
python3 -m venv ./venv
source ./venv/bin/activate
pip install -r ./requirements.txt --upgrade
```

## Building the artifacts

Skip this flow if you choose to download the pre-built Node artifacts, instead of building them.

```
PYTHONPATH=. python3 ./multistage/build.py --workspace=~/mvx-workspace --config=./multistage/samples/build.json
```

## Set up an observer (or a squad)

```
PYTHONPATH=. python3 ./multistage/driver.py --config=./multistage/samples/testnet_sync.json --lane=shard_0 --stage=andromeda

PYTHONPATH=. python3 ./multistage/driver.py --config=./multistage/samples/testnet_sync.json --lane=shard_1 --stage=andromeda
...
```

Once nodes are ready (synchronized to the network), switch to the regular node management scripts.

## Run import-db

```
PYTHONPATH=. python3 ./multistage/driver.py --config=./multistage/samples/testnet_import_db.json --lane=shard_0 --stage=andromeda

PYTHONPATH=. python3 ./multistage/driver.py --config=./multistage/samples/testnet_import_db.json --lane=shard_1 --stage=andromeda

...
```
116 changes: 116 additions & 0 deletions multistage/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import json
import os
import shutil
import sys
import traceback
from argparse import ArgumentParser
from pathlib import Path
from typing import Any

from rich import print
from rich.panel import Panel
from rich.rule import Rule

from multistage import errors, golang
from multistage.config import BuildConfigEntry
from multistage.constants import FILE_MODE_NICE
from multistage.shared import fetch_archive


def main(cli_args: list[str] = sys.argv[1:]):
try:
_do_main(cli_args)
except errors.KnownError as err:
print(Panel(f"[red]{traceback.format_exc()}"))
print(Panel(f"[red]{err.get_pretty()}"))
return 1


def _do_main(cli_args: list[str]):
parser = ArgumentParser()
parser.add_argument("--workspace", required=True, help="path of the build workspace")
parser.add_argument("--config", required=True, help="path of the 'build' configuration file")
args = parser.parse_args(cli_args)

workspace_path = Path(args.workspace).expanduser().resolve()
workspace_path.mkdir(parents=True, exist_ok=True)

config_path = Path(args.config).expanduser().resolve()
config_data = json.loads(config_path.read_text())
config_entries = [BuildConfigEntry.new_from_dictionary(item) for item in config_data]

for entry in config_entries:
print(Rule(f"[bold yellow]{entry.name}"))

golang.install_go(workspace_path, entry.go_url, environment_label=entry.name)
build_environment = golang.acquire_environment(workspace_path, label=entry.name)

source_parent_folder = do_download(workspace_path, entry)
cmd_node_folder = do_build(source_parent_folder, build_environment)
copy_artifacts(cmd_node_folder, entry)


def do_download(workspace: Path, entry: BuildConfigEntry) -> Path:
url = entry.source_url
extraction_folder = workspace / entry.name

fetch_archive(url, extraction_folder)
return extraction_folder


def do_build(source_parent_folder: Path, environment: golang.BuildEnvironment) -> Path:
# If has one subfolder, that one is the source code
subfolders = [Path(item.path) for item in os.scandir(source_parent_folder) if item.is_dir()]
source_folder = subfolders[0] if len(subfolders) == 1 else source_parent_folder

cmd_node = source_folder / "cmd" / "node"
go_mod = source_folder / "go.mod"

golang.build(cmd_node, environment)
copy_wasmer_libraries(environment, go_mod, cmd_node)

return cmd_node


def copy_wasmer_libraries(build_environment: golang.BuildEnvironment, go_mod: Path, destination: Path):
go_path = Path(build_environment.go_path).expanduser().resolve()
vm_go_folder_name = get_chain_vm_go_folder_name(go_mod)
vm_go_path = go_path / "pkg" / "mod" / vm_go_folder_name
libraries = list((vm_go_path / "wasmer").glob("*.so")) + list((vm_go_path / "wasmer2").glob("*.so"))

for library in libraries:
shutil.copy(library, destination)

os.chmod(destination / library.name, FILE_MODE_NICE)


def get_chain_vm_go_folder_name(go_mod: Path) -> str:
lines = go_mod.read_text().splitlines()

matching_lines = [line for line in lines if "github.com/multiversx/mx-chain-vm-go" in line]
if not matching_lines:
raise errors.KnownError("cannot detect location of mx-chain-vm-go")

line_of_interest = matching_lines[0]
parts = line_of_interest.split()
return f"{parts[0]}@{parts[1]}"


def copy_artifacts(cmd_node_folder: Path, entry: BuildConfigEntry):
print(f"Copying artifacts to {entry.destination_folder} ...")

libraries = list(cmd_node_folder.glob("*.so"))
executable = cmd_node_folder / "node"
artifacts = libraries + [executable]

destination_folder = Path(entry.destination_folder).expanduser().resolve()
shutil.rmtree(destination_folder, ignore_errors=True)
destination_folder.mkdir(parents=True, exist_ok=True)

for artifact in artifacts:
shutil.copy(artifact, destination_folder)


if __name__ == "__main__":
ret = main(sys.argv[1:])
sys.exit(ret)
158 changes: 158 additions & 0 deletions multistage/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@


from pathlib import Path
from typing import Any

from multistage import errors


class BuildConfigEntry:
def __init__(self, name: str, go_url: str, source_url: str, destination_folder: str) -> None:
if not name:
raise errors.KnownError("build 'name' is required")
if not go_url:
raise errors.KnownError("build 'go url' is required")
if not source_url:
raise errors.KnownError("build 'source' is required")
if not destination_folder:
raise errors.KnownError("build 'destination' is required")

self.name = name
self.go_url = go_url
self.source_url = source_url
self.destination_folder = destination_folder

@classmethod
def new_from_dictionary(cls, data: dict[str, Any]):
name = data.get("name") or ""
go_url = data.get("goUrl") or ""
source_url = data.get("sourceUrl") or ""
destination_folder = data.get("destinationFolder") or ""

return cls(
name=name,
go_url=go_url,
source_url=source_url,
destination_folder=destination_folder,
)


class DriverConfig:
def __init__(self, lanes: list["LaneConfig"]) -> None:
lanes_names = [lane.name for lane in lanes]

if not lanes:
raise errors.BadConfigurationError("'lanes' are required")
if len(lanes_names) > len(set(lanes_names)):
raise errors.BadConfigurationError("lanes names must be unique")

self.lanes = lanes
self.lanes_by_name = {lane.name: lane for lane in lanes}

@classmethod
def new_from_dictionary(cls, data: dict[str, Any]):
lanes_records = data.get("lanes") or []
lanes = [LaneConfig.new_from_dictionary(record) for record in lanes_records]

return cls(
lanes=lanes,
)

def get_lanes_names(self) -> list[str]:
return [lane.name for lane in self.lanes]

def get_lane(self, name: str) -> "LaneConfig":
return self.lanes_by_name[name]


class LaneConfig:
def __init__(self, name: str, working_directory: str, stages: list["StageConfig"]) -> None:
stages_names = [stage.name for stage in stages]

if not name:
raise errors.BadConfigurationError("for all lanes, 'name' is required")
if not working_directory:
raise errors.BadConfigurationError(f"for lane {name}, 'working directory' is required")
if not stages:
raise errors.BadConfigurationError(f"for lane {name}, 'stages' are required")
if len(stages) > len(set(stages_names)):
raise errors.BadConfigurationError("stages names must be unique")

self.name = name
self.working_directory = Path(working_directory).expanduser().resolve()
self.stages = stages
self.stages_by_name = {stage.name: stage for stage in stages}

@classmethod
def new_from_dictionary(cls, data: dict[str, Any]):
name = data.get("name") or ""
working_directory = data.get("workingDirectory") or ""
stages_records = data.get("stages") or []
stages = [StageConfig.new_from_dictionary(record) for record in stages_records]

return cls(
name=name,
working_directory=working_directory,
stages=stages,
)

def get_stages_names(self) -> list[str]:
return [stage.name for stage in self.stages]

def get_stages_including_and_after(self, initial_stage_name: str) -> list["StageConfig"]:
stages_names = self.get_stages_names()
index_of_initial_stage_name = stages_names.index(initial_stage_name)
return self.stages[index_of_initial_stage_name:]


class StageConfig:
def __init__(self,
name: str,
until_epoch: int,
node_status_url: str,
configuration_archive: str,
bin: str,
node_arguments: list[str],
with_db_lookup_extensions: bool,
with_indexing: bool) -> None:
if not name:
raise errors.BadConfigurationError("for all stages, 'name' is required")
if not until_epoch:
raise errors.BadConfigurationError(f"for stage {name}, 'until epoch' is required")
if not node_status_url:
raise errors.BadConfigurationError(f"for stage {name}, 'node status url' is required")
if not configuration_archive:
raise errors.BadConfigurationError(f"for stage {name}, 'configuration archive' is required")
if not bin:
raise errors.BadConfigurationError(f"for stage {name}, 'bin' is required")

self.name = name
self.until_epoch = until_epoch
self.node_status_url = node_status_url
self.configuration_archive = configuration_archive
self.bin = Path(bin).expanduser().resolve()
self.node_arguments = node_arguments
self.with_db_lookup_extensions = with_db_lookup_extensions
self.with_indexing = with_indexing

@classmethod
def new_from_dictionary(cls, data: dict[str, Any]):
name = data.get("name") or ""
until_epoch = data.get("untilEpoch") or 0
node_status_url = data.get("nodeStatusUrl") or ""
configuration_archive = data.get("configurationArchive") or ""
bin = data.get("bin") or ""
node_arguments = data.get("nodeArguments") or []
with_db_lookup_extensions = data.get("withDbLookupExtensions") or False
with_indexing = data.get("withIndexing") or False

return cls(
name=name,
until_epoch=until_epoch,
node_status_url=node_status_url,
configuration_archive=configuration_archive,
bin=bin,
node_arguments=node_arguments,
with_db_lookup_extensions=with_db_lookup_extensions,
with_indexing=with_indexing,
)
11 changes: 11 additions & 0 deletions multistage/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import stat

METACHAIN_ID = 4294967295
NODE_PROCESS_ULIMIT = 1024 * 512
NODE_MONITORING_PERIOD = 5
NODE_RETURN_CODE_SUCCESS = 0
NODE_RETURN_CODE_SIGKILL = -9
TEMPORARY_DIRECTORIES_PREFIX = "mx_chain_scripts_multistage_"

# Read, write and execute by owner, read and execute by group and others
FILE_MODE_NICE = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH
57 changes: 57 additions & 0 deletions multistage/driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
import sys
import traceback
from argparse import ArgumentParser
from pathlib import Path
from typing import Any

from rich import print
from rich.panel import Panel
from rich.prompt import Prompt
from rich.rule import Rule

from multistage import errors
from multistage.config import DriverConfig
from multistage.lane_controller import LaneController


def main(cli_args: list[str] = sys.argv[1:]):
try:
_do_main(cli_args)
except errors.KnownError as err:
print(Panel(f"[red]{traceback.format_exc()}"))
print(Panel(f"[red]{err.get_pretty()}"))
return 1


def _do_main(cli_args: list[str]):
parser = ArgumentParser()
parser.add_argument("--config", required=True, help="path of the 'driver' configuration file")
parser.add_argument("--lane", required=True, help="which lane to handle")
parser.add_argument("--stage", required=True, help="initial stage on the lane")
args = parser.parse_args(cli_args)

config_path = Path(args.config).expanduser().resolve()
config_data = json.loads(config_path.read_text())
driver_config = DriverConfig.new_from_dictionary(config_data)
lane_name = args.lane
initial_stage_name = args.stage

if lane_name not in driver_config.get_lanes_names():
raise errors.BadConfigurationError(f"unknown lane: {lane_name}")

lane_config = driver_config.get_lane(lane_name)

if initial_stage_name not in lane_config.get_stages_names():
raise errors.BadConfigurationError(f"unknown stage: {initial_stage_name}")

print(f"[bold yellow]Lane: {lane_name}")
print(f"[bold yellow]Initial stage: {initial_stage_name}")

lane = LaneController(lane_config, initial_stage_name)
lane.start()


if __name__ == "__main__":
ret = main(sys.argv[1:])
sys.exit(ret)
Loading