jennyzzt · sidpan1 · Jun 3, 2025
diff --git a/DGM_outer.py b/DGM_outer.py
@@ -47,13 +47,12 @@ def any_exceeding_context_length(output_dir, commit_id, instance_ids):
             return True
     return False
 
-def choose_selfimproves(output_dir, archive, selfimprove_size, method='random', run_baseline=None, polyglot=False):
+def choose_selfimproves(output_dir, archive, selfimprove_size, fusion_probability, method='random', run_baseline=None, polyglot=False):
     """
     Choose self-improve attempts for the current generation.
+    May include single parent entries or two-parent fusion entries.
     """
     selfimprove_entries = []
-
-    # Get parent candidates
     candidates = {}
     for commit in archive:
         try:
@@ -75,77 +74,101 @@ def choose_selfimproves(output_dir, archive, selfimprove_size, method='random',
             print(f"{commit} not eligible for being a parent: {e}")
             continue
 
-    # Choose parents based on method and baseline
-    if run_baseline == 'no_darwin':
-        # Always take the last commit
-        commits = list(candidates.keys())
-        parent_commits = commits[-1:]
-    elif method == 'score_prop':
-        # Choose parents based on score
-        commits = list(candidates.keys())
-        scores = [candidates[commit]['accuracy_score'] for commit in commits]
-        scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
-        probabilities = [score / sum(scores) for score in scores]
-        print(commits)
-        parent_commits = random.choices(commits, probabilities, k=selfimprove_size)
-    elif method == 'score_child_prop':
-        # Choose parents based on score and the number of children
-        commits = list(candidates.keys())
-        scores = [candidates[commit]['accuracy_score'] for commit in commits]
-        scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
-        children_counts = [candidates[commit]['children_count'] for commit in commits]
-        children_counts = [1 / (1 + count) for count in children_counts]
-        probabilities = [score * count for score, count in zip(scores, children_counts)]
-        probabilities = [prob / sum(probabilities) for prob in probabilities]
-        parent_commits = random.choices(commits, probabilities, k=selfimprove_size)
-    elif method == 'best':
-        # Choose parents with the best score
-        sorted_commits = sorted(candidates, key=lambda x: candidates[x]['accuracy_score'])
-        parent_commits = sorted_commits[:min(selfimprove_size, len(sorted_commits))]
-        if len(parent_commits) < selfimprove_size:
-            parent_commits.extend(random.choices(parent_commits, k=selfimprove_size - len(parent_commits)))
-    else:
-        # Choose parents randomly
-        parent_commits = random.choices(list(candidates.keys()), k=selfimprove_size)
-
-    # Choose entries for each parent
-    for parent_commit in parent_commits:
-        empty_ids = candidates[parent_commit]['total_emptypatch_ids']
-        resolved_ids = candidates[parent_commit]['total_resolved_ids']
-        unresolved_ids = candidates[parent_commit]['total_unresolved_ids']
+    candidate_commits_list = list(candidates.keys())
+    if not candidate_commits_list:
+        # No eligible parents, return empty list
+        return []
+
+    for _ in range(selfimprove_size):
+        attempt_fusion = random.random() < fusion_probability
 
-        if polyglot:
-            entry_ids = empty_ids + unresolved_ids
-            if not entry_ids:
-                entry_ids = resolved_ids + empty_ids + unresolved_ids
+        if attempt_fusion and len(candidate_commits_list) >= 2:
+            parent1, parent2 = random.sample(candidate_commits_list, 2)
+            selfimprove_entries.append(((parent1, parent2), "fuse_parents"))
+            continue # Move to next attempt
         else:
-            num_total_ids = len(empty_ids) + len(resolved_ids) + len(unresolved_ids)
-
-            # Solve empty patches
-            if len(empty_ids) >= 0.1 * num_total_ids and random.random() < 0.25:
-                entry = 'solve_empty_patches'
-                selfimprove_entries.append((parent_commit, entry))
+            # Single parent selection (or fallback if len(candidate_commits_list) < 2)
+            if not candidate_commits_list:
                 continue
 
-            # Solve stochasticity
-            if random.random() < 0.25:
-                entry = 'solve_stochasticity'
-                selfimprove_entries.append((parent_commit, entry))
+            parent_commit = None
+            if run_baseline == 'no_darwin':
+                parent_commit = candidate_commits_list[-1] # Always take the last commit
+            elif method == 'score_prop':
+                scores = [candidates[c]['accuracy_score'] for c in candidate_commits_list]
+                # Sigmoid scaling for scores
+                scaled_scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
+                if sum(scaled_scores) == 0: # Avoid division by zero if all scaled scores are 0
+                    parent_commit = random.choice(candidate_commits_list)
+                else:
+                    probabilities = [s / sum(scaled_scores) for s in scaled_scores]
+                    parent_commit = random.choices(candidate_commits_list, probabilities, k=1)[0]
+            elif method == 'score_child_prop':
+                scores = [candidates[c]['accuracy_score'] for c in candidate_commits_list]
+                scaled_scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
+                children_counts = [candidates[c]['children_count'] for c in candidate_commits_list]
+                # Inverse of children count (add 1 to avoid division by zero and to give less weight to more children)
+                child_weights = [1 / (1 + count) for count in children_counts]
+
+                combined_weights = [s * cw for s, cw in zip(scaled_scores, child_weights)]
+                if sum(combined_weights) == 0:
+                    parent_commit = random.choice(candidate_commits_list)
+                else:
+                    probabilities = [w / sum(combined_weights) for w in combined_weights]
+                    parent_commit = random.choices(candidate_commits_list, probabilities, k=1)[0]
+            elif method == 'best':
+                # Sort by accuracy_score descending and pick the top one.
+                # This means if multiple single-parent entries are made, they might all use the same best parent.
+                parent_commit = sorted(candidate_commits_list, key=lambda c: candidates[c]['accuracy_score'], reverse=True)[0]
+            else: # 'random' or default
+                parent_commit = random.choice(candidate_commits_list)
+
+            if parent_commit is None: # Should ideally not happen if candidate_commits_list is not empty
                 continue
 
-            # Solve context length
-            if any_exceeding_context_length(output_dir, parent_commit, empty_ids + unresolved_ids) and \
-                random.random() < 0.25:
-                entry = 'solve_contextlength'
-                selfimprove_entries.append((parent_commit, entry))
+            empty_ids = candidates[parent_commit]['total_emptypatch_ids']
+            resolved_ids = candidates[parent_commit]['total_resolved_ids']
+            unresolved_ids = candidates[parent_commit]['total_unresolved_ids']
+
+            if polyglot:
+                entry_ids = empty_ids + unresolved_ids
+                if not entry_ids:
+                    entry_ids = resolved_ids + empty_ids + unresolved_ids
+            else:
+                num_total_ids = len(empty_ids) + len(resolved_ids) + len(unresolved_ids)
+                if len(empty_ids) >= 0.1 * num_total_ids and random.random() < 0.25:
+                    entry = 'solve_empty_patches'
+                    selfimprove_entries.append((parent_commit, entry))
+                    continue
+                if random.random() < 0.25:
+                    entry = 'solve_stochasticity'
+                    selfimprove_entries.append((parent_commit, entry))
+                    continue
+                if any_exceeding_context_length(output_dir, parent_commit, empty_ids + unresolved_ids) and \
+                    random.random() < 0.25:
+                    entry = 'solve_contextlength'
+                    selfimprove_entries.append((parent_commit, entry))
+                    continue
+                if not unresolved_ids: # Renamed from unresolved_ids == 0 for clarity
+                    # If no specific entry type chosen and no unresolved, what to do?
+                    # Maybe pick from resolved_ids or skip this parent for this iteration?
+                    # For now, if no unresolved, we might not add an entry, leading to fewer than selfimprove_size.
+                    # This needs to be handled: either ensure an entry or adjust loop.
+                    # Current original code has `continue` if unresolved_ids == 0, which means
+                    # it might also produce fewer than selfimprove_size entries.
+                    # Let's keep that behavior for now.
+                    if not (empty_ids + resolved_ids + unresolved_ids): # if truly no tasks at all
+                        continue # skip this parent
+                    entry_ids = unresolved_ids if unresolved_ids else (empty_ids + resolved_ids)
+
+
+            if not entry_ids: # If after all logic, entry_ids is still empty (e.g. polyglot case with no tasks)
+                # This case should ideally be prevented by ensuring 'candidates' only includes those with tasks,
+                # or by having a default task. For now, skip adding an entry.
                 continue
 
-            # Choose a random unresolved entry
-            if unresolved_ids == 0:
-                continue
-            entry_ids = unresolved_ids
-        entry = random.choice(entry_ids)
-        selfimprove_entries.append((parent_commit, entry))
+            entry = random.choice(entry_ids)
+            selfimprove_entries.append((parent_commit, entry))
 
     return selfimprove_entries
 
@@ -237,8 +260,9 @@ def main():
     parser.add_argument("--polyglot", default=False, action='store_true', help="Run single shallow evaluation for self-improvement on swe.")
     parser.add_argument("--eval_noise", type=float, default=0.1, help="Noise leeway for evaluation.")
     parser.add_argument("--no_full_eval", default=False, action='store_true', help="Do not run full evaluation on swe if a node is the top N highest performing.")
-    # baselines
+    # baselines # Use a more descriptive name for the argument for clarity.
     parser.add_argument("--run_baseline", type=str, default=None, choices=['no_selfimprove', 'no_darwin'], help="Baseline to run.")
+    parser.add_argument("--fusion_probability", type=float, default=0.25, help="Probability of attempting a two-parent fusion.")
     args = parser.parse_args()
 
     # Variables for this DGM run
@@ -271,6 +295,7 @@ def main():
         # Choose self-improve attempts
         selfimprove_entries = choose_selfimproves(
             output_dir, archive, args.selfimprove_size,
+            args.fusion_probability, # Pass the new argument
             method=args.choose_selfimproves_method,
             run_baseline=args.run_baseline,
             polyglot=args.polyglot,

diff --git a/coding_agent.py b/coding_agent.py
@@ -74,14 +74,27 @@ def __init__(
             test_description=None,
             self_improve=False,
             instance_id=None,
+            # Fusion task specific arguments
+            is_fusion_task=False,
+            parent1_patch_file=None,
+            parent2_patch_file=None,
+            parent1_commit_id=None,
+            parent2_commit_id=None
         ):
         self.problem_statement = problem_statement
         self.git_tempdir = git_tempdir
-        self.base_commit = base_commit
+        self.base_commit = base_commit # This is the SHA of the common ancestor for fusion
         self.chat_history_file = chat_history_file
         self.test_description = test_description
         self.self_improve = self_improve
         self.instance_id = instance_id if not self_improve else 'dgm'
+
+        self.is_fusion_task = is_fusion_task
+        self.parent1_patch_file = parent1_patch_file
+        self.parent2_patch_file = parent2_patch_file
+        self.parent1_commit_id = parent1_commit_id
+        self.parent2_commit_id = parent2_commit_id
+
         self.code_model = CLAUDE_MODEL
 
         # Initialize logger and store it in thread-local storage
@@ -154,7 +167,34 @@ def forward(self):
         """
         The forward function for the AgenticSystem.
         """
-        instruction = f"""I have uploaded a Python code repository in the directory {self.git_tempdir}. Help solve the following problem.
+        if self.is_fusion_task:
+            self.logger.info("Fusion task detected.")
+            try:
+                with open(self.parent1_patch_file, 'r') as f:
+                    parent1_patch_content = f.read()
+                with open(self.parent2_patch_file, 'r') as f:
+                    parent2_patch_content = f.read()
+            except FileNotFoundError as e:
+                self.logger.error(f"Error: Parent patch file not found: {e}. This will result in an empty patch.")
+                # Allow to proceed, will result in an empty diff as no chat_with_agent call
+                return # Exit early, no instruction to run
+
+            # Dynamically import here to avoid issues if this file is imported elsewhere
+            # where prompts.fusion_prompt might not be immediately available or needed.
+            from prompts.fusion_prompt import get_fusion_prompt
+
+            instruction = get_fusion_prompt(
+                base_commit_id=self.base_commit, # base_commit is the SHA of the common ancestor
+                parent1_commit_id=self.parent1_commit_id,
+                parent1_patch_content=parent1_patch_content,
+                parent2_commit_id=self.parent2_commit_id,
+                parent2_patch_content=parent2_patch_content,
+                existing_problem_statement=self.problem_statement # Original problem statement for context
+            )
+            self.logger.info(f"Fusion instruction generated for base {self.base_commit}, P1 {self.parent1_commit_id}, P2 {self.parent2_commit_id}")
+        else:
+            self.logger.info("Standard task detected.")
+            instruction = f"""I have uploaded a Python code repository in the directory {self.git_tempdir}. Help solve the following problem.
 
 <problem_description>
 {self.problem_statement}
@@ -166,6 +206,7 @@ def forward(self):
 
 Your task is to make changes to the files in the {self.git_tempdir} directory to address the <problem_description>. I have already taken care of the required dependencies.
 """
+        self.logger.info(f"Instruction for chat_with_agent (first 200 chars):\n{instruction[:200]}...")
         new_msg_history = chat_with_agent(instruction, model=self.code_model, msg_history=[], logging=safe_log)
 
 def main():
@@ -178,8 +219,19 @@ def main():
     parser.add_argument('--test_description', default=None, required=False, help='Description of how to test the repository')
     parser.add_argument('--self_improve', default=False, action='store_true', help='Whether to self-improve the repository or solving swe')
     parser.add_argument('--instance_id', default=None, help='Instance ID for SWE issue')
+
+    # Arguments for fusion task
+    parser.add_argument("--is_fusion_task", default=False, action="store_true", help="Indicates if the task is a fusion of two parents.")
+    parser.add_argument("--parent1_patch_file", type=str, default=None, help="Path to the diff file for Parent 1 (changes from base to P1). Required if is_fusion_task is True.")
+    parser.add_argument("--parent2_patch_file", type=str, default=None, help="Path to the diff file for Parent 2 (changes from base to P2). Required if is_fusion_task is True.")
+    parser.add_argument("--parent1_commit_id", type=str, default="Parent1", help="Commit ID for Parent 1 (for prompt context).")
+    parser.add_argument("--parent2_commit_id", type=str, default="Parent2", help="Commit ID for Parent 2 (for prompt context).")
+
     args = parser.parse_args()
 
+    if args.is_fusion_task and (not args.parent1_patch_file or not args.parent2_patch_file):
+        parser.error("--parent1_patch_file and --parent2_patch_file are required when --is_fusion_task is True.")
+
     # Process the repository
     agentic_system = AgenticSystem(
         problem_statement=args.problem_statement,
@@ -189,6 +241,12 @@ def main():
         test_description=args.test_description,
         self_improve=args.self_improve,
         instance_id=args.instance_id,
+        # Fusion arguments
+        is_fusion_task=args.is_fusion_task,
+        parent1_patch_file=args.parent1_patch_file,
+        parent2_patch_file=args.parent2_patch_file,
+        parent1_commit_id=args.parent1_commit_id,
+        parent2_commit_id=args.parent2_commit_id
     )
 
     # Run the agentic system to try to solve the problem