Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 92 additions & 67 deletions DGM_outer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,12 @@ def any_exceeding_context_length(output_dir, commit_id, instance_ids):
return True
return False

def choose_selfimproves(output_dir, archive, selfimprove_size, method='random', run_baseline=None, polyglot=False):
def choose_selfimproves(output_dir, archive, selfimprove_size, fusion_probability, method='random', run_baseline=None, polyglot=False):
"""
Choose self-improve attempts for the current generation.
May include single parent entries or two-parent fusion entries.
"""
selfimprove_entries = []

# Get parent candidates
candidates = {}
for commit in archive:
try:
Expand All @@ -75,77 +74,101 @@ def choose_selfimproves(output_dir, archive, selfimprove_size, method='random',
print(f"{commit} not eligible for being a parent: {e}")
continue

# Choose parents based on method and baseline
if run_baseline == 'no_darwin':
# Always take the last commit
commits = list(candidates.keys())
parent_commits = commits[-1:]
elif method == 'score_prop':
# Choose parents based on score
commits = list(candidates.keys())
scores = [candidates[commit]['accuracy_score'] for commit in commits]
scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
probabilities = [score / sum(scores) for score in scores]
print(commits)
parent_commits = random.choices(commits, probabilities, k=selfimprove_size)
elif method == 'score_child_prop':
# Choose parents based on score and the number of children
commits = list(candidates.keys())
scores = [candidates[commit]['accuracy_score'] for commit in commits]
scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
children_counts = [candidates[commit]['children_count'] for commit in commits]
children_counts = [1 / (1 + count) for count in children_counts]
probabilities = [score * count for score, count in zip(scores, children_counts)]
probabilities = [prob / sum(probabilities) for prob in probabilities]
parent_commits = random.choices(commits, probabilities, k=selfimprove_size)
elif method == 'best':
# Choose parents with the best score
sorted_commits = sorted(candidates, key=lambda x: candidates[x]['accuracy_score'])
parent_commits = sorted_commits[:min(selfimprove_size, len(sorted_commits))]
if len(parent_commits) < selfimprove_size:
parent_commits.extend(random.choices(parent_commits, k=selfimprove_size - len(parent_commits)))
else:
# Choose parents randomly
parent_commits = random.choices(list(candidates.keys()), k=selfimprove_size)

# Choose entries for each parent
for parent_commit in parent_commits:
empty_ids = candidates[parent_commit]['total_emptypatch_ids']
resolved_ids = candidates[parent_commit]['total_resolved_ids']
unresolved_ids = candidates[parent_commit]['total_unresolved_ids']
candidate_commits_list = list(candidates.keys())
if not candidate_commits_list:
# No eligible parents, return empty list
return []

for _ in range(selfimprove_size):
attempt_fusion = random.random() < fusion_probability

if polyglot:
entry_ids = empty_ids + unresolved_ids
if not entry_ids:
entry_ids = resolved_ids + empty_ids + unresolved_ids
if attempt_fusion and len(candidate_commits_list) >= 2:
parent1, parent2 = random.sample(candidate_commits_list, 2)
selfimprove_entries.append(((parent1, parent2), "fuse_parents"))
continue # Move to next attempt
else:
num_total_ids = len(empty_ids) + len(resolved_ids) + len(unresolved_ids)

# Solve empty patches
if len(empty_ids) >= 0.1 * num_total_ids and random.random() < 0.25:
entry = 'solve_empty_patches'
selfimprove_entries.append((parent_commit, entry))
# Single parent selection (or fallback if len(candidate_commits_list) < 2)
if not candidate_commits_list:
continue

# Solve stochasticity
if random.random() < 0.25:
entry = 'solve_stochasticity'
selfimprove_entries.append((parent_commit, entry))
parent_commit = None
if run_baseline == 'no_darwin':
parent_commit = candidate_commits_list[-1] # Always take the last commit
elif method == 'score_prop':
scores = [candidates[c]['accuracy_score'] for c in candidate_commits_list]
# Sigmoid scaling for scores
scaled_scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
if sum(scaled_scores) == 0: # Avoid division by zero if all scaled scores are 0
parent_commit = random.choice(candidate_commits_list)
else:
probabilities = [s / sum(scaled_scores) for s in scaled_scores]
parent_commit = random.choices(candidate_commits_list, probabilities, k=1)[0]
elif method == 'score_child_prop':
scores = [candidates[c]['accuracy_score'] for c in candidate_commits_list]
scaled_scores = [1 / (1 + math.exp(-10*(score-0.5))) for score in scores]
children_counts = [candidates[c]['children_count'] for c in candidate_commits_list]
# Inverse of children count (add 1 to avoid division by zero and to give less weight to more children)
child_weights = [1 / (1 + count) for count in children_counts]

combined_weights = [s * cw for s, cw in zip(scaled_scores, child_weights)]
if sum(combined_weights) == 0:
parent_commit = random.choice(candidate_commits_list)
else:
probabilities = [w / sum(combined_weights) for w in combined_weights]
parent_commit = random.choices(candidate_commits_list, probabilities, k=1)[0]
elif method == 'best':
# Sort by accuracy_score descending and pick the top one.
# This means if multiple single-parent entries are made, they might all use the same best parent.
parent_commit = sorted(candidate_commits_list, key=lambda c: candidates[c]['accuracy_score'], reverse=True)[0]
else: # 'random' or default
parent_commit = random.choice(candidate_commits_list)

if parent_commit is None: # Should ideally not happen if candidate_commits_list is not empty
continue

# Solve context length
if any_exceeding_context_length(output_dir, parent_commit, empty_ids + unresolved_ids) and \
random.random() < 0.25:
entry = 'solve_contextlength'
selfimprove_entries.append((parent_commit, entry))
empty_ids = candidates[parent_commit]['total_emptypatch_ids']
resolved_ids = candidates[parent_commit]['total_resolved_ids']
unresolved_ids = candidates[parent_commit]['total_unresolved_ids']

if polyglot:
entry_ids = empty_ids + unresolved_ids
if not entry_ids:
entry_ids = resolved_ids + empty_ids + unresolved_ids
else:
num_total_ids = len(empty_ids) + len(resolved_ids) + len(unresolved_ids)
if len(empty_ids) >= 0.1 * num_total_ids and random.random() < 0.25:
entry = 'solve_empty_patches'
selfimprove_entries.append((parent_commit, entry))
continue
if random.random() < 0.25:
entry = 'solve_stochasticity'
selfimprove_entries.append((parent_commit, entry))
continue
if any_exceeding_context_length(output_dir, parent_commit, empty_ids + unresolved_ids) and \
random.random() < 0.25:
entry = 'solve_contextlength'
selfimprove_entries.append((parent_commit, entry))
continue
if not unresolved_ids: # Renamed from unresolved_ids == 0 for clarity
# If no specific entry type chosen and no unresolved, what to do?
# Maybe pick from resolved_ids or skip this parent for this iteration?
# For now, if no unresolved, we might not add an entry, leading to fewer than selfimprove_size.
# This needs to be handled: either ensure an entry or adjust loop.
# Current original code has `continue` if unresolved_ids == 0, which means
# it might also produce fewer than selfimprove_size entries.
# Let's keep that behavior for now.
if not (empty_ids + resolved_ids + unresolved_ids): # if truly no tasks at all
continue # skip this parent
entry_ids = unresolved_ids if unresolved_ids else (empty_ids + resolved_ids)


if not entry_ids: # If after all logic, entry_ids is still empty (e.g. polyglot case with no tasks)
# This case should ideally be prevented by ensuring 'candidates' only includes those with tasks,
# or by having a default task. For now, skip adding an entry.
continue

# Choose a random unresolved entry
if unresolved_ids == 0:
continue
entry_ids = unresolved_ids
entry = random.choice(entry_ids)
selfimprove_entries.append((parent_commit, entry))
entry = random.choice(entry_ids)
selfimprove_entries.append((parent_commit, entry))

return selfimprove_entries

Expand Down Expand Up @@ -237,8 +260,9 @@ def main():
parser.add_argument("--polyglot", default=False, action='store_true', help="Run single shallow evaluation for self-improvement on swe.")
parser.add_argument("--eval_noise", type=float, default=0.1, help="Noise leeway for evaluation.")
parser.add_argument("--no_full_eval", default=False, action='store_true', help="Do not run full evaluation on swe if a node is the top N highest performing.")
# baselines
# baselines # Use a more descriptive name for the argument for clarity.
parser.add_argument("--run_baseline", type=str, default=None, choices=['no_selfimprove', 'no_darwin'], help="Baseline to run.")
parser.add_argument("--fusion_probability", type=float, default=0.25, help="Probability of attempting a two-parent fusion.")
args = parser.parse_args()

# Variables for this DGM run
Expand Down Expand Up @@ -271,6 +295,7 @@ def main():
# Choose self-improve attempts
selfimprove_entries = choose_selfimproves(
output_dir, archive, args.selfimprove_size,
args.fusion_probability, # Pass the new argument
method=args.choose_selfimproves_method,
run_baseline=args.run_baseline,
polyglot=args.polyglot,
Expand Down
62 changes: 60 additions & 2 deletions coding_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,27 @@ def __init__(
test_description=None,
self_improve=False,
instance_id=None,
# Fusion task specific arguments
is_fusion_task=False,
parent1_patch_file=None,
parent2_patch_file=None,
parent1_commit_id=None,
parent2_commit_id=None
):
self.problem_statement = problem_statement
self.git_tempdir = git_tempdir
self.base_commit = base_commit
self.base_commit = base_commit # This is the SHA of the common ancestor for fusion
self.chat_history_file = chat_history_file
self.test_description = test_description
self.self_improve = self_improve
self.instance_id = instance_id if not self_improve else 'dgm'

self.is_fusion_task = is_fusion_task
self.parent1_patch_file = parent1_patch_file
self.parent2_patch_file = parent2_patch_file
self.parent1_commit_id = parent1_commit_id
self.parent2_commit_id = parent2_commit_id

self.code_model = CLAUDE_MODEL

# Initialize logger and store it in thread-local storage
Expand Down Expand Up @@ -154,7 +167,34 @@ def forward(self):
"""
The forward function for the AgenticSystem.
"""
instruction = f"""I have uploaded a Python code repository in the directory {self.git_tempdir}. Help solve the following problem.
if self.is_fusion_task:
self.logger.info("Fusion task detected.")
try:
with open(self.parent1_patch_file, 'r') as f:
parent1_patch_content = f.read()
with open(self.parent2_patch_file, 'r') as f:
parent2_patch_content = f.read()
except FileNotFoundError as e:
self.logger.error(f"Error: Parent patch file not found: {e}. This will result in an empty patch.")
# Allow to proceed, will result in an empty diff as no chat_with_agent call
return # Exit early, no instruction to run

# Dynamically import here to avoid issues if this file is imported elsewhere
# where prompts.fusion_prompt might not be immediately available or needed.
from prompts.fusion_prompt import get_fusion_prompt

instruction = get_fusion_prompt(
base_commit_id=self.base_commit, # base_commit is the SHA of the common ancestor
parent1_commit_id=self.parent1_commit_id,
parent1_patch_content=parent1_patch_content,
parent2_commit_id=self.parent2_commit_id,
parent2_patch_content=parent2_patch_content,
existing_problem_statement=self.problem_statement # Original problem statement for context
)
self.logger.info(f"Fusion instruction generated for base {self.base_commit}, P1 {self.parent1_commit_id}, P2 {self.parent2_commit_id}")
else:
self.logger.info("Standard task detected.")
instruction = f"""I have uploaded a Python code repository in the directory {self.git_tempdir}. Help solve the following problem.

<problem_description>
{self.problem_statement}
Expand All @@ -166,6 +206,7 @@ def forward(self):

Your task is to make changes to the files in the {self.git_tempdir} directory to address the <problem_description>. I have already taken care of the required dependencies.
"""
self.logger.info(f"Instruction for chat_with_agent (first 200 chars):\n{instruction[:200]}...")
new_msg_history = chat_with_agent(instruction, model=self.code_model, msg_history=[], logging=safe_log)

def main():
Expand All @@ -178,8 +219,19 @@ def main():
parser.add_argument('--test_description', default=None, required=False, help='Description of how to test the repository')
parser.add_argument('--self_improve', default=False, action='store_true', help='Whether to self-improve the repository or solving swe')
parser.add_argument('--instance_id', default=None, help='Instance ID for SWE issue')

# Arguments for fusion task
parser.add_argument("--is_fusion_task", default=False, action="store_true", help="Indicates if the task is a fusion of two parents.")
parser.add_argument("--parent1_patch_file", type=str, default=None, help="Path to the diff file for Parent 1 (changes from base to P1). Required if is_fusion_task is True.")
parser.add_argument("--parent2_patch_file", type=str, default=None, help="Path to the diff file for Parent 2 (changes from base to P2). Required if is_fusion_task is True.")
parser.add_argument("--parent1_commit_id", type=str, default="Parent1", help="Commit ID for Parent 1 (for prompt context).")
parser.add_argument("--parent2_commit_id", type=str, default="Parent2", help="Commit ID for Parent 2 (for prompt context).")

args = parser.parse_args()

if args.is_fusion_task and (not args.parent1_patch_file or not args.parent2_patch_file):
parser.error("--parent1_patch_file and --parent2_patch_file are required when --is_fusion_task is True.")

# Process the repository
agentic_system = AgenticSystem(
problem_statement=args.problem_statement,
Expand All @@ -189,6 +241,12 @@ def main():
test_description=args.test_description,
self_improve=args.self_improve,
instance_id=args.instance_id,
# Fusion arguments
is_fusion_task=args.is_fusion_task,
parent1_patch_file=args.parent1_patch_file,
parent2_patch_file=args.parent2_patch_file,
parent1_commit_id=args.parent1_commit_id,
parent2_commit_id=args.parent2_commit_id
)

# Run the agentic system to try to solve the problem
Expand Down
Loading