Skip to content

Commit

Permalink
Move container framework to algorith run call and fix hash lengths
Browse files Browse the repository at this point in the history
This commit moves the container framework up a level so that each algorithm
is called with the configured framework instead of determining the framework
internally. The logic behind this is that doing so makes testing much easier;
instead of messing with global configs in the tests, just call the PRM's run
function with a different framework specified.

This also moves prepare_volume to `containers.py` to fix the issue with importing
hash lengths in the function. Arguably, I should have moved prepare_volume into
containers.py in the first place.
  • Loading branch information
jhiemstrawisc committed Oct 10, 2023
1 parent 10923d1 commit 75b746c
Show file tree
Hide file tree
Showing 21 changed files with 86 additions and 110 deletions.
1 change: 1 addition & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ rule reconstruct:
params.pop('spras_placeholder')
# TODO consider the best way to pass global configuration information to the run functions
# This approach requires that all run functions support a singularity option
params['container_framework'] = FRAMEWORK
runner.run(wildcards.algorithm, params)

# Original pathway reconstruction output to universal output
Expand Down
7 changes: 4 additions & 3 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

# The length of the hash used to identify a parameter combination
hash_length: 7
# If true, use Singularity instead of Docker
# Singularity support is only available on Unix
singularity: false

# Specify the container framework. Current supported versions include 'docker' and
# 'singularity'. If container_framework is not specified, SPRAS will default to docker.
container_framework: docker

# Allow the user to configure which container registry containers should be pulled from
# Note that this assumes container names are consistent across registries, and that the
Expand Down
6 changes: 2 additions & 4 deletions src/allpairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
import pandas as pd

import src.config as config
from src.containers import run_container
from src.containers import prepare_volume, run_container
from src.prm import PRM
from src.util import prepare_volume

__all__ = ['AllPairs']

Expand Down Expand Up @@ -50,7 +49,7 @@ def generate_inputs(data, filename_map):
header=["#Interactor1", "Interactor2", "Weight"])

@staticmethod
def run(nodetypes=None, network=None, output_file=None):
def run(nodetypes=None, network=None, output_file=None, container_framework="docker"):
"""
Run All Pairs Shortest Paths with Docker
@param nodetypes: input node types with sources and targets (required)
Expand Down Expand Up @@ -85,7 +84,6 @@ def run(nodetypes=None, network=None, output_file=None):

print('Running All Pairs Shortest Paths with arguments: {}'.format(' '.join(command)), flush=True)

container_framework = config.config.container_framework
container_suffix = "allpairs"

out = run_container(
Expand Down
3 changes: 1 addition & 2 deletions src/analysis/cytoscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from typing import List, Union

import src.config as config
from src.containers import run_container
from src.util import prepare_volume
from src.containers import prepare_volume, run_container


def run_cytoscape(pathways: List[Union[str, PurePath]], output_file: str) -> None:
Expand Down
10 changes: 8 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,14 @@ def process_config(self, raw_config):
# Set up a few top-level config variables
self.out_dir = raw_config["reconstruction_settings"]["locations"]["reconstruction_dir"]

if "singularity" in raw_config and raw_config["singularity"]:
self.container_framework = "singularity"
# We allow the container framework not to be defined in the config. In the case it isn't, default to docker.
# However, if we get a bad value, we raise an exception.
if "container_framework" in raw_config:
container_framework = raw_config["container_framework"].lower()
if container_framework not in ("docker", "singularity"):
msg = "SPRAS was configured to run with an unknown container framework: '" + raw_config["container_framework"] + "'. Accepted values are 'docker' or 'singularity"
raise ValueError(msg)
self.container_framework = container_framework
else:
self.container_framework = "docker"

Expand Down
40 changes: 40 additions & 0 deletions src/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import docker

import src.config as config
from src.util import hash_filename


def prepare_path_docker(orig_path: PurePath) -> str:
Expand Down Expand Up @@ -194,3 +195,42 @@ def run_container_singularity(container: str, command: List[str], volumes: List[
command,
options=singularity_options,
bind=bind_paths)

# Because this is called independently for each file, the same local path can be mounted to multiple volumes
def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]:
"""
Makes a file on the local file system accessible within a container by mapping the local (source) path to a new
container (destination) path and renaming the file to be relative to the destination path.
The destination path will be a new path relative to the volume_base that includes a hash identifier derived from the
original filename.
An example mapped filename looks like '/spras/MG4YPNK/oi1-edges.txt'.
@param filename: The file on the local file system to map
@param volume_base: The base directory in the container, which must be an absolute directory
@return: first returned object is a tuple (source path, destination path) and the second returned object is the
updated filename relative to the destination path
"""
base_path = PurePosixPath(volume_base)
if not base_path.is_absolute():
raise ValueError(f'Volume base must be an absolute path: {volume_base}')

if isinstance(filename, PurePath):
filename = str(filename)

# There's no clear way to get DEFAULT_HASH_LENGTH from config without a circular import...
# For now, hardcoding the value to 7, since it appeared the value wasn't updated by
# config.yaml before anyway.
filename_hash = hash_filename(filename, config.config.hash_length)
dest = PurePosixPath(base_path, filename_hash)

abs_filename = Path(filename).resolve()
container_filename = str(PurePosixPath(dest, abs_filename.name))
if abs_filename.is_dir():
dest = PurePosixPath(dest, abs_filename.name)
src = abs_filename
else:
parent = abs_filename.parent
if parent.as_posix() == '.':
parent = Path.cwd()
src = parent

return (src, dest), container_filename
8 changes: 3 additions & 5 deletions src/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
import pandas as pd

import src.config as config
from src.containers import run_container
from src.containers import prepare_volume, run_container
from src.prm import PRM
from src.util import prepare_volume

__all__ = ['DOMINO', 'pre_domino_id_transform', 'post_domino_id_transform']

Expand Down Expand Up @@ -55,7 +54,7 @@ def generate_inputs(data, filename_map):
header=['ID_interactor_A', 'ppi', 'ID_interactor_B'])

@staticmethod
def run(network=None, active_genes=None, output_file=None, slice_threshold=None, module_threshold=None):
def run(network=None, active_genes=None, output_file=None, slice_threshold=None, module_threshold=None, container_framework="docker"):
"""
Run DOMINO with Docker.
Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
Expand Down Expand Up @@ -98,7 +97,6 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None,

print('Running slicer with arguments: {}'.format(' '.join(slicer_command)), flush=True)

container_framework = config.config.container_framework
container_suffix = "domino"
slicer_out = run_container(container_framework,
container_suffix,
Expand Down Expand Up @@ -127,7 +125,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None,
print('Running DOMINO with arguments: {}'.format(' '.join(domino_command)), flush=True)

domino_out = run_container(container_framework,
'reedcompbio/domino',
container_suffix,
domino_command,
volumes,
work_dir)
Expand Down
7 changes: 2 additions & 5 deletions src/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import pandas as pd

import src.config as config
from src.containers import run_container
from src.containers import prepare_volume, run_container
from src.prm import PRM
from src.util import prepare_volume

__all__ = ['MEO', 'write_properties']

Expand Down Expand Up @@ -87,7 +86,7 @@ def generate_inputs(data, filename_map):
# TODO document required arguments
@staticmethod
def run(edges=None, sources=None, targets=None, output_file=None, max_path_length=None, local_search=None,
rand_restarts=None):
rand_restarts=None, container_framework="docker"):
"""
Run Maximum Edge Orientation in the Docker image with the provided parameters.
The properties file is generated from the provided arguments.
Expand Down Expand Up @@ -139,8 +138,6 @@ def run(edges=None, sources=None, targets=None, output_file=None, max_path_lengt

print('Running Maximum Edge Orientation with arguments: {}'.format(' '.join(command)), flush=True)

# TODO consider making this a string in the config file instead of a Boolean
container_framework = config.config.container_framework
container_suffix = "meo"
out = run_container(container_framework,
container_suffix,
Expand Down
6 changes: 2 additions & 4 deletions src/mincostflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import pandas as pd

import src.config as config
from src.containers import run_container
from src.containers import prepare_volume, run_container
from src.prm import PRM
from src.util import prepare_volume

__all__ = ['MinCostFlow']

Expand Down Expand Up @@ -43,7 +42,7 @@ def generate_inputs(data, filename_map):
edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=["Interactor1","Interactor2","Weight"], header=False)

@staticmethod
def run(sources=None, targets=None, edges=None, output_file=None, flow=None, capacity=None, singularity=False):
def run(sources=None, targets=None, edges=None, output_file=None, flow=None, capacity=None, container_framework="docker"):
"""
Run min cost flow with Docker (or singularity)
@param sources: input sources (required)
Expand Down Expand Up @@ -96,7 +95,6 @@ def run(sources=None, targets=None, edges=None, output_file=None, flow=None, cap
command.extend(['--capacity', str(capacity)])

# choosing to run in docker or singularity container
container_framework = config.config.container_framework
container_suffix = "mincostflow"

# constructs a docker run call
Expand Down
8 changes: 2 additions & 6 deletions src/omicsintegrator1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import pandas as pd

import src.config as config
from src.containers import run_container
from src.containers import prepare_volume, run_container
from src.prm import PRM
from src.util import prepare_volume

__all__ = ['OmicsIntegrator1', 'write_conf']

Expand Down Expand Up @@ -79,7 +78,7 @@ def generate_inputs(data, filename_map):
@staticmethod
def run(edges=None, prizes=None, dummy_mode=None, mu_squared=None, exclude_terms=None,
output_file=None, noisy_edges=None, shuffled_prizes=None, random_terminals=None,
seed=None, w=None, b=None, d=None, mu=None, noise=None, g=None, r=None):
seed=None, w=None, b=None, d=None, mu=None, noise=None, g=None, r=None, container_framework="docker"):
"""
Run Omics Integrator 1 in the Docker image with the provided parameters.
Does not support the garnet, cyto30, knockout, cv, or cv-reps arguments.
Expand Down Expand Up @@ -145,10 +144,7 @@ def run(edges=None, prizes=None, dummy_mode=None, mu_squared=None, exclude_terms

print('Running Omics Integrator 1 with arguments: {}'.format(' '.join(command)), flush=True)

# TODO consider making this a string in the config file instead of a Boolean
container_framework = config.config.container_framework
container_suffix = "omics-integrator-1:no-conda" # no-conda version is the default

out = run_container(container_framework,
container_suffix, # no-conda version is the default
command,
Expand Down
7 changes: 3 additions & 4 deletions src/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import pandas as pd

import src.config as config
from src.containers import run_container
from src.containers import prepare_volume, run_container
from src.dataset import Dataset
from src.prm import PRM
from src.util import add_rank_column, prepare_volume
from src.util import add_rank_column

__all__ = ['OmicsIntegrator2']

Expand Down Expand Up @@ -52,7 +52,7 @@ def generate_inputs(data: Dataset, filename_map):
# TODO document required arguments
@staticmethod
def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise=None, noisy_edges=None,
random_terminals=None, dummy_mode=None, seed=None):
random_terminals=None, dummy_mode=None, seed=None, container_framework="docker"):
"""
Run Omics Integrator 2 in the Docker image with the provided parameters.
Only the .tsv output file is retained and then renamed.
Expand Down Expand Up @@ -103,7 +103,6 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise

print('Running Omics Integrator 2 with arguments: {}'.format(' '.join(command)), flush=True)

container_framework = config.config.container_framework
container_suffix = "omics-integrator-2:v2"
out = run_container(container_framework,
container_suffix,
Expand Down
6 changes: 2 additions & 4 deletions src/pathlinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
import pandas as pd

import src.config as config
from src.containers import run_container
from src.containers import prepare_volume, run_container
from src.prm import PRM
from src.util import prepare_volume

__all__ = ['PathLinker']

Expand Down Expand Up @@ -49,7 +48,7 @@ def generate_inputs(data, filename_map):

# Skips parameter validation step
@staticmethod
def run(nodetypes=None, network=None, output_file=None, k=None):
def run(nodetypes=None, network=None, output_file=None, k=None, container_framework="docker"):
"""
Run PathLinker with Docker
@param nodetypes: input node types with sources and targets (required)
Expand Down Expand Up @@ -97,7 +96,6 @@ def run(nodetypes=None, network=None, output_file=None, k=None):

print('Running PathLinker with arguments: {}'.format(' '.join(command)), flush=True)

container_framework = config.config.container_framework
container_suffix = "pathlinker"
out = run_container(container_framework,
container_suffix,
Expand Down
41 changes: 0 additions & 41 deletions src/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,47 +42,6 @@ def hash_filename(filename: str, length: Optional[int] = None) -> str:
"""
return hash_params_sha1_base32({'filename': filename}, length)


# Because this is called independently for each file, the same local path can be mounted to multiple volumes
def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]:
"""
Makes a file on the local file system accessible within a container by mapping the local (source) path to a new
container (destination) path and renaming the file to be relative to the destination path.
The destination path will be a new path relative to the volume_base that includes a hash identifier derived from the
original filename.
An example mapped filename looks like '/spras/MG4YPNK/oi1-edges.txt'.
@param filename: The file on the local file system to map
@param volume_base: The base directory in the container, which must be an absolute directory
@return: first returned object is a tuple (source path, destination path) and the second returned object is the
updated filename relative to the destination path
"""
base_path = PurePosixPath(volume_base)
if not base_path.is_absolute():
raise ValueError(f'Volume base must be an absolute path: {volume_base}')

if isinstance(filename, PurePath):
filename = str(filename)

# There's no clear way to get DEFAULT_HASH_LENGTH from config without a circular import...
# For now, hardcoding the value to 7, since it appeared the value wasn't updated by
# config.yaml before anyway.
from src.config import DEFAULT_HASH_LENGTH
filename_hash = hash_filename(filename, DEFAULT_HASH_LENGTH)
dest = PurePosixPath(base_path, filename_hash)

abs_filename = Path(filename).resolve()
container_filename = str(PurePosixPath(dest, abs_filename.name))
if abs_filename.is_dir():
dest = PurePosixPath(dest, abs_filename.name)
src = abs_filename
else:
parent = abs_filename.parent
if parent.as_posix() == '.':
parent = Path.cwd()
src = parent

return (src, dest), container_filename

def make_required_dirs(path: str):
"""
Create the directory and parent directories required before an output file can be written to the specified path.
Expand Down
6 changes: 2 additions & 4 deletions test/AllPairs/test_ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ def test_allpairs_singularity(self):
out_path = Path(OUT_DIR+'sample-out.txt')
out_path.unlink(missing_ok=True)
# Only include required arguments and run with Singularity
config.config.container_framework = "singularity"
AllPairs.run(
nodetypes=TEST_DIR+'input/sample-in-nodetypes.txt',
network=TEST_DIR+'input/sample-in-net.txt',
output_file=str(out_path)
)
config.config.container_framework = "docker"
output_file=str(out_path),
container_framework="singularity")
assert out_path.exists()

def test_allpairs_correctness(self):
Expand Down
6 changes: 3 additions & 3 deletions test/DOMINO/test_domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ def test_domino_singularity(self):
out_path = Path(OUT_FILE_DEFAULT)
out_path.unlink(missing_ok=True)
# Only include required arguments and run with Singularity
config.config.container_framework = "singularity"
DOMINO.run(
network=TEST_DIR+'input/domino-network.txt',
active_genes=TEST_DIR+'input/domino-active-genes.txt',
output_file=OUT_FILE_DEFAULT)
output_file=OUT_FILE_DEFAULT,
container_framework="singularity")
assert out_path.exists()
config.config.container_framework = "docker"

def test_pre_id_transform(self):
"""
Test the node ID transformation run before DOMINO executes
Expand Down
Loading

0 comments on commit 75b746c

Please sign in to comment.