stride-research · javidsegura · Sep 11, 2025 · Sep 11, 2025 · Sep 11, 2025 · Sep 11, 2025
diff --git a/examples/protien_binding_usecase/README.md b/examples/protien_binding_usecase/README.md
@@ -0,0 +1,46 @@
+# AlphaFold Pipeline Designs
+
+This document explains the two pipeline designs for running AlphaFold tasks with support for multiple structures and GPU binding.
+
+---
+
+## 1. Single Pipeline with Parallel Structures (Current)
+
+A single pipeline manages multiple structures.  
+AlphaFold tasks run concurrently within the same pipeline, each bound to a GPU.
+
+```
+Single Pipeline with Parallel Structures
+----------------------------------------
+[Pipeline]
+    |
+    +--> [AlphaFold A] --> [Output A]   (GPU0)
+    +--> [AlphaFold B] --> [Output B]   (GPU1)
+    +--> [AlphaFold C] --> [Output C]   (GPU2)
+    +--> [AlphaFold D] --> [Output D]   (GPU3)
+```
+
+- One pipeline orchestrates all structures.  
+
+## 2. Separate Pipelines Design (Supported and can be enabled)
+
+Each structure is processed by its own pipeline.  
+
+```
+Separate Pipelines Design
+-------------------------
+[Pipeline 1] --> [AlphaFold A] --> [Output A]
+                   GPU0
+
+[Pipeline 2] --> [AlphaFold B] --> [Output B]
+                   GPU1
+
+[Pipeline 3] --> [AlphaFold C] --> [Output C]
+                   GPU2
+```
+
+- Each pipeline launches independently.  
+- GPU allocation can be set per pipeline.  
+- Suitable if users prefer keeping pipelines isolated.  
+
+---
diff --git a/examples/protien_binding_usecase/run_protein_binding.py b/examples/protien_binding_usecase/run_protein_binding.py
@@ -45,7 +45,9 @@ async def adaptive_decision(pipeline: ProteinBindingPipeline) -> Optional[Dict[s
 
             name, *_, score_str = line.split(',')
             protein = name.split('.')[0]
+
             score = float(score_str)
+            pipeline.logger.pipeline_log('Appending current scores to the list of scores')
             if protein not in pipeline.score_history: # Appending scores
                 pipeline.score_history[protein] = []
             pipeline.score_history[protein].append(score)
@@ -63,6 +65,7 @@ async def adaptive_decision(pipeline: ProteinBindingPipeline) -> Optional[Dict[s
 
         try:
             decision = await adaptive_criteria(protein, scores, pipeline)
+            pipeline.logger.pipeline_log(f'Adaptive descision: {decision}')
         except Exception as e:
             logger.error(e) 
             continue
@@ -88,6 +91,8 @@ async def adaptive_decision(pipeline: ProteinBindingPipeline) -> Optional[Dict[s
             'type': type(pipeline),
             'adaptive_fn': adaptive_decision,
             'config': {
+                'is_child': True,
+                'start_pass': pipeline.passes,
                 'passes': pipeline.passes,
                 'iter_seqs': sub_iter_seqs,
                 'seq_rank': pipeline.seq_rank + 1,
@@ -99,10 +104,12 @@ async def adaptive_decision(pipeline: ProteinBindingPipeline) -> Optional[Dict[s
         # Submit the request
         pipeline.submit_child_pipeline_request(new_config)
 
-        pipeline.finalize()
+        pipeline.finalize(sub_iter_seqs)
 
         if not pipeline.fasta_list_2:
             pipeline.kill_parent = True
+    else:
+        pipeline.previous_scores = copy.deepcopy(pipeline.current_scores)
 
 
 async def impress_protein_bind() -> None:

diff --git a/src/impress/pipelines/protein_binding.py b/src/impress/pipelines/protein_binding.py
@@ -19,7 +19,10 @@ def __init__(self, name, flow, configs=None, **kwargs):
         # Execution metadata
         if configs is None:
             configs = {}
+
+        self.is_child: bool = kwargs.get("is_child", False)
         self.passes = kwargs.get("passes", 1)
+        self.start_pass: int = kwargs.get("start_pass", 1)
         self.step_id = kwargs.get("step_id", 1)
         self.seq_rank = kwargs.get("seq_rank", 0)
         self.num_seqs = kwargs.get("num_seqs", 10)
@@ -44,7 +47,7 @@ def __init__(self, name, flow, configs=None, **kwargs):
         )
         self.output_path_mpnn = os.path.join(self.output_path, "mpnn")
         self.output_path_af = os.path.join(
-            self.output_path, "/af/prediction/best_models"
+            self.output_path, "af/prediction/best_models"
         )
 
         # might have to do outside of initialization, so new pipelines
@@ -64,6 +67,7 @@ def set_up_new_pipeline_dirs(self, new_pipeline_name):
         # all directories to create
         subdirs = [
             "af/fasta",
+            "af/prediction",
             "af/prediction/best_models",
             "af/prediction/best_ptm",
             "af/prediction/dimer_models",
@@ -83,9 +87,7 @@ def register_pipeline_tasks(self):
         """Register all pipeline tasks"""
 
         @self.auto_register_task()  # MPNN
-        async def s1(task_description=None):
-            if task_description is None:
-                task_description = {"ranks": 1}
+        async def s1(task_description={"gpus_per_rank": 1}):  # noqa: B006
             mpnn_script = os.path.join(self.base_path, "mpnn_wrapper.py")
             output_dir = os.path.join(self.output_path_mpnn, f"job_{self.passes}")
 
@@ -142,9 +144,7 @@ async def s3():
 
         # alphafold, must be run separately for each structure one at a time!
         @self.auto_register_task()
-        async def s4(target_fasta, task_description=None):
-            if task_description is None:
-                task_description = {"gpus_per_rank": 1}
+        async def s4(target_fasta, task_description={"gpus_per_rank": 1}):  # noqa: B006
             cmd = (
                 f"/bin/bash {self.base_path}/af2_multimer_reduced.sh "
                 f"{self.output_path}/af/fasta/ "
@@ -154,10 +154,8 @@ async def s4(target_fasta, task_description=None):
 
             return cmd
 
-        @self.auto_register_task()  # plddt_extract
-        async def s5(task_description=None):
-            if task_description is None:
-                task_description = {}
+        @self.auto_register_task()  # pLDTT_extract
+        async def s5(task_description={}):  # noqa: B006
             return (
                 f"python3 {self.base_path}/plddt_extract_pipeline.py "
                 f"--path={self.base_path} "
@@ -185,13 +183,22 @@ async def run(self):
         while self.passes <= self.max_passes:
             self.logger.pipeline_log(f"Starting pass {self.passes}")
 
-            self.logger.pipeline_log("Submitting MPNN task")
-            await self.s1(task_description={"pre_exec": TASK_PRE_EXEC})
-            self.logger.pipeline_log("MPNN task finished")
+            if self.is_child and self.passes == self.start_pass:
+                self.logger.pipeline_log(
+                    "Skipping MPNN and Ranking steps for this child pipeline "
+                    "in the current pass only."
+                )
+
+                pass
 
-            self.logger.pipeline_log("Submitting sequence ranking task")
-            await self.s2()
-            self.logger.pipeline_log("Sequence ranking task finished")
+            else:
+                self.logger.pipeline_log("Submitting MPNN task")
+                await self.s1(task_description={"pre_exec": TASK_PRE_EXEC})
+                self.logger.pipeline_log("MPNN task finished")
+
+                self.logger.pipeline_log("Submitting sequence ranking task")
+                await self.s2()
+                self.logger.pipeline_log("Sequence ranking task finished")
 
             self.logger.pipeline_log("Submitting scoring task")
             fasta_files = await self.s3()
@@ -245,7 +252,7 @@ async def run(self):
             await asyncio.gather(*alphafold_tasks, return_exceptions=True)
             self.logger.pipeline_log(f"{len(alphafold_tasks)} Alphafold tasks finished")
 
-            self.logger.pipeline_log("Submitting plddt extract")
+            self.logger.pipeline_log("Submitting pLDTT extraction task")
 
             staged_file = f"af_stats_{self.name}_pass_{self.passes}.csv"
 
@@ -260,8 +267,11 @@ async def run(self):
                     ],
                 }
             )
-            self.logger.pipeline_log("Plddt extract finished")
+            self.logger.pipeline_log("pLDTT extract finished")
 
             await self.run_adaptive_step(wait=True)
 
+            if self.kill_parent:
+                break
+
             self.passes += 1