Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c2eae92
feat(llm): add new gemini model and switch to openrouter api
hflyzju Jul 11, 2025
cec54ac
docs(perform_experiments): add comments to clarify code logic
hflyzju Jul 11, 2025
1502844
Add AI-Researcher as a Git submodule
hflyzju Jul 11, 2025
a5a2d9a
docs: update experiment name in README example
hflyzju Jul 11, 2025
9608e62
feat(generate_ideas): add literature review integration for idea gene…
hflyzju Jul 11, 2025
f814203
feat: add nova index support and improve literature review process
hflyzju Jul 12, 2025
ce31e25
feat: add skip-run-experiment flag and enable idea saving
hflyzju Jul 12, 2025
26e7cf9
Merge pull request #1 from aixiv-org/dev_hx
hflyzju Jul 12, 2025
2e0d008
update code and paer
Oliverhuang123 Jul 12, 2025
c9ed739
Merge pull request #2 from Oliverhuang123/main
hflyzju Jul 14, 2025
1e0d6bc
feat: add debug mode and update ideas file handling
hflyzju Jul 15, 2025
ae46faf
feat(llm): add new claude model and api key validation
hflyzju Jul 15, 2025
6ee1b41
add new idea result to template(50 ideas / 3 topic)
hflyzju Jul 15, 2025
a6565d4
perf(experiments): optimize idea selection and deduplication (exp0717…
hflyzju Jul 18, 2025
ba303e7
feat: add experiment2 script and related files for AI scientist paper…
hflyzju Jul 19, 2025
d006730
feat: add experiment(exp2) results for nanoGPT, grokking, and 2d_diff…
hflyzju Jul 21, 2025
1376b2a
feat(experiment1): add experiment1 scripts and documentation
hflyzju Jul 22, 2025
c2e9a9b
refactor(ai_scientist): improve proposal generation prompt and output…
hflyzju Jul 23, 2025
64c6431
feat(experiment2-0723): add support for paper improvement workflow
hflyzju Jul 24, 2025
2a58544
feat: add rating analysis script with visualization and csv output
hflyzju Jul 25, 2025
ee2b769
feat: add experiment v4 scripts and update gitignore
hflyzju Jul 28, 2025
79f223d
update
hflyzju Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*.json
*$py.class

# C extensions
Expand Down Expand Up @@ -172,3 +173,7 @@ ICLR2022-OpenReviewData/
templates/*/run_0/
templates/*/*.png
results/*
example_papers_for_exp2/
results_exp2_v2/
results_exp2_v3/
results_exp2_v4/
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "AI-Researcher"]
path = AI-Researcher
url = ./AI-Researcher
1 change: 1 addition & 0 deletions AI-Researcher
Submodule AI-Researcher added at 0600ed
1 change: 1 addition & 0 deletions NPEET
Submodule NPEET added at 8b0d94
55 changes: 53 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ We provide all runs and data from our paper [here](https://drive.google.com/driv
1. [DualScale Diffusion: Adaptive Feature Balancing for Low-Dimensional Generative Models](https://github.com/SakanaAI/AI-Scientist/blob/main/example_papers/adaptive_dual_scale_denoising.pdf)
2. [Multi-scale Grid Noise Adaptation: Enhancing Diffusion Models For Low-dimensional Data](https://github.com/SakanaAI/AI-Scientist/blob/main/example_papers/grid_based_noise_adaptation.pdf)
3. [GAN-Enhanced Diffusion: Boosting Sample Quality and Diversity](https://github.com/SakanaAI/AI-Scientist/blob/main/example_papers/gan_diffusion.pdf)
4. [DualDiff: Enhancing Mode Capture in Low-dimensional Diffusion Models via Dual-expert Denoising](https://github.com/SakanaAI/AI-Scientist/tree/main/example_papers/dual_expert_denoiser.pdf)
4. [DualDiff: Enhancing Mode Capture in Low-dimensional Diffusion Models via Dual-expert Denoising](https://github.com/SakanaAI/AI-Scientist/tree/main/example_papers/dual_expert_denoiser.pdf)
5. [StyleFusion: Adaptive Multi-style Generation in Character-Level Language Models](https://github.com/SakanaAI/AI-Scientist/blob/main/example_papers/multi_style_adapter.pdf)
6. [Adaptive Learning Rates for Transformers via Q-Learning](https://github.com/SakanaAI/AI-Scientist/tree/main/example_papers/rl_lr_adaptation.pdf)
7. [Unlocking Grokking: A Comparative Study of Weight Initialization Strategies in Transformer Models](https://github.com/SakanaAI/AI-Scientist/tree/main/example_papers/weight_initialization_grokking.pdf)
8. [Grokking Accelerated: Layer-wise Learning Rates for Transformer Generalization](https://github.com/SakanaAI/AI-Scientist/tree/main/example_papers/layerwise_lr_grokking.pdf)
9. [Grokking Through Compression: Unveiling Sudden Generalization via Minimal Description Length](https://github.com/SakanaAI/AI-Scientist/tree/main/example_papers/mdl_grokking_correlation.pdf)
10. [Accelerating Mathematical Insight: Boosting Grokking Through Strategic Data Augmentation](https://github.com/SakanaAI/AI-Scientist/tree/main/example_papers/data_augmentation_grokking.pdf)

> **Note:**
> **Note:**
> **Caution!** This codebase will execute LLM-written code. There are various risks and challenges associated with this autonomy, including the use of potentially dangerous packages, web access, and potential spawning of processes. Use at your own discretion. Please make sure to [containerize](#containerization) and restrict web access appropriately.

<p align="center">
Expand Down Expand Up @@ -232,13 +232,64 @@ This section provides instructions for setting up each of the three templates us
python plot.py
```

## Experiment1
```
python launch_scientist_exp1.py --model "deepseek/deepseek-chat" --experiment nanoGPT --use-literature --review-by pengsong
python launch_scientist_exp1.py --model "deepseek/deepseek-chat" --experiment nanoGPT --use-literature --review-by guowei
python launch_scientist_exp1.py --model "deepseek/deepseek-chat" --experiment grokking --use-literature --review-by pengsong
python launch_scientist_exp1.py --model "deepseek/deepseek-chat" --experiment grokking --use-literature --review-by guowei
python launch_scientist_exp1.py --model "deepseek/deepseek-chat" --experiment 2d_diffusion --use-literature --review-by pengsong
python launch_scientist_exp1.py --model "deepseek/deepseek-chat" --experiment 2d_diffusion --use-literature --review-by guowei

```
## Experiment2_v2
```
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title adaptive_dual_scale_denoising --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title data_augmentation_grokking --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title dual_expert_denoiser --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title gan_diffusion --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title grid_based_noise_adaptation --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title layerwise_lr_grokking --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title mdl_grokking_correlation --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title multi_style_adapter --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title rl_lr_adaptation --improvement --example-papers-dir example_papers_for_exp2
python launch_scientist_exp2_v2.py --model "deepseek/deepseek-chat" --old-paper-title weight_initialization_grokking --improvement --example-papers-dir example_papers_for_exp2

```

## Experiment2
```
python launch_scientist_exp2.py --model "deepseek/deepseek-chat" --experiment nanoGPT --num-ideas 10 --use-literature --run-idea-dedup
python launch_scientist_exp2.py --model "deepseek/deepseek-chat" --experiment nanoGPT --num-ideas 10 --use-literature --run-idea-dedup
python launch_scientist_exp2.py --model "deepseek/deepseek-chat" --experiment 2d_diffusion --num-ideas 10 --use-literature --run-idea-dedup
```


## Run AI Scientist Paper Generation Experiments

**Note:** Please ensure the setup steps above are completed before running these experiments.

```bash
conda activate ai_scientist
# Run the paper generation.
python launch_scientist.py --model "deepseek/deepseek-chat" --experiment nanoGPT --num-ideas 50 --use-literature --skip-idea-generation --exist-idea-file templates/nanoGPT/final_dedup_proposals.json --skip-novelty-check
python launch_scientist.py --model "deepseek/deepseek-chat" --experiment grokking --num-ideas 50 --use-literature --skip-idea-generation --exist-idea-file templates/grokking/final_dedup_proposals.json --skip-novelty-check
python launch_scientist.py --model "deepseek/deepseek-chat" --experiment 2d_diffusion --num-ideas 50 --use-literature --skip-idea-generation --exist-idea-file templates/2d_diffusion/final_dedup_proposals.json --skip-novelty-check




python launch_scientist.py --model "deepseek/deepseek-chat" --experiment nanoGPT --num-ideas 50 --use-literature --skip-run-experiment

python launch_scientist.py --model "deepseek/deepseek-chat" --experiment nanoGPT --num-ideas 50 --use-literature --skip-idea-generation --target-exp-idea-file templates/nanoGPT/exp_idea_0.json --skip-novelty-check


python launch_scientist.py --model "deepseek/deepseek-chat" --experiment nanoGPT --num-ideas 50 --use-literature --skip-idea-generation --exist-idea-file templates/nanoGPT/final_dedup_proposals.json --skip-novelty-check

python launch_scientist.py --model "deepseek/deepseek-chat" --experiment nanoGPT --num-ideas 50 --use-literature --skip-idea-generation --skip-novelty-check --debug

python launch_scientist.py --model "google/gemini-2.5-flash-preview-05-20" --experiment nanoGPT --num-ideas 2 --use-literature
python launch_scientist.py --model "azure/gpt-4o" --experiment nanoGPT --num-ideas 2 --use-literature
python launch_scientist.py --model "gpt-4o-2024-05-13" --experiment nanoGPT_lite --num-ideas 2
python launch_scientist.py --model "claude-3-5-sonnet-20241022" --experiment nanoGPT_lite --num-ideas 2
```
Expand Down
170 changes: 143 additions & 27 deletions ai_scientist/generate_ideas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import requests

from ai_scientist.llm import get_response_from_llm, extract_json_between_markers, create_client, AVAILABLE_LLMS
from utils_tool import load_json_from_file, save_json_data_to_file

S2_API_KEY = os.getenv("S2_API_KEY")

Expand Down Expand Up @@ -51,6 +52,55 @@
You will have {num_reflections} rounds to iterate on the idea, but do not need to use them all.
"""

idea_first_with_lit_review_and_convert_to_proposal_prompt = """{task_description}
<experiment.py>
{code}
</experiment.py>

Here are the ideas that you have already generated:

'''
{prev_ideas_string}
'''

Here are the lit review results(NOT TO COPY THE IDEA FROM LIT REVIEW):

'''
{lit_review_results}
'''

Come up with the next impactful and creative idea for research experiments and directions you can feasibly investigate with the code provided.
Note that you will not have access to any additional resources or datasets.
Make sure any idea is not overfit the specific training dataset or model, and has wider significance.

Respond in the following format:

THOUGHT:
<THOUGHT>

NEW IDEA JSON:
```json
<JSON>
```

In <THOUGHT>, first briefly discuss your intuitions and motivations for the idea. Detail your high-level plan, necessary design choices and ideal outcomes of the experiments. Justify how the idea is different from the existing ones.

In <JSON>, provide the new idea in JSON format with the following fields:
- "Name": A shortened descriptor of the idea. Lowercase, no spaces, underscores allowed.
- "Title": A title for the idea, will be used for the report writing.
- "Problem Statement": A clear and concise description of the problem you are trying to solve.
- "Motivation": A clear and concise description of the motivation behind the problem.
- "Proposed Method": A clear and concise description of the proposed method to solve the problem.
- "Experiment": An outline of the implementation. E.g. which functions need to be added or modified, how results will be obtained, ...
- "Interestingness": A rating from 1 to 10 (lowest to highest).
- "Feasibility": A rating from 1 to 10 (lowest to highest).
- "Novelty": A rating from 1 to 10 (lowest to highest).

Be cautious and realistic on your ratings.
This JSON will be automatically parsed, so ensure the format is precise.
You will have {num_reflections} rounds to iterate on the idea, but do not need to use them all.
"""

idea_reflection_prompt = """Round {current_round}/{num_reflections}.
In your thoughts, first carefully consider the quality, novelty, and feasibility of the idea you just created.
Include any other factors that you think are important in evaluating the idea.
Expand All @@ -71,31 +121,41 @@
If there is nothing to improve, simply repeat the previous JSON EXACTLY after the thought and include "I am done" at the end of the thoughts but before the JSON.
ONLY INCLUDE "I am done" IF YOU ARE MAKING NO MORE CHANGES."""


# GENERATE IDEAS
def generate_ideas(
base_dir,
client,
model,
skip_generation=False,
exist_idea_file=None,
max_num_generations=20,
num_reflections=5,
use_literature=True,
lit_review_size=5,
use_semantic_index=False,
use_nova_index=True
):
print(f"skip_generation: {skip_generation}, exist_idea_file:{exist_idea_file}")
if skip_generation:
# Load existing ideas from file
try:
with open(osp.join(base_dir, "ideas.json"), "r") as f:
if 1:
if exist_idea_file:
ideas = load_json_from_file(exist_idea_file)
print(f"Loaded existing ideas from {exist_idea_file}")
return ideas
with open(osp.join(base_dir, "new_ideas.json"), "r") as f:
ideas = json.load(f)
print("Loaded existing ideas:")
for idea in ideas:
print(idea)
print("Loaded existing ideas from new_ideas.json")
return ideas
except FileNotFoundError:
print("No existing ideas found. Generating new ideas.")
except json.JSONDecodeError:
print("Error decoding existing ideas. Generating new ideas.")
else:
print(1)
# except FileNotFoundError:
# print("No existing ideas found. Generating new ideas.")
# except json.JSONDecodeError:
# print("Error decoding existing ideas. Generating new ideas.")

idea_str_archive = []
new_idea_str_archive = []
with open(osp.join(base_dir, "seed_ideas.json"), "r") as f:
seed_ideas = json.load(f)
for seed_idea in seed_ideas:
Expand All @@ -117,18 +177,61 @@ def generate_ideas(

msg_history = []
print(f"Iteration 1/{num_reflections}")
text, msg_history = get_response_from_llm(
idea_first_prompt.format(
task_description=prompt["task_description"],
code=code,
prev_ideas_string=prev_ideas_string,
num_reflections=num_reflections,
),
client=client,
model=model,
system_message=idea_system_prompt,
msg_history=msg_history,
)
lit_review_results = None
if not use_literature:
text, msg_history = get_response_from_llm(
idea_first_prompt.format(
task_description=prompt["task_description"],
code=code,
prev_ideas_string=prev_ideas_string,
num_reflections=num_reflections,
),
client=client,
model=model,
system_message=idea_system_prompt,
msg_history=msg_history,
)
else:
if use_semantic_index:
import sys
sys.path.append("AI-Researcher/ai_researcher/src")
from lit_review import collect_papers
from lit_review_tools import format_papers_for_printing
paper_bank, total_cost, all_queries = collect_papers(
topic_description=prompt["task_description"],
openai_client=client,
model=model,
seed=2025
)
elif use_nova_index:
import sys
sys.path.append("AI-Researcher/ai_researcher/src")
from lit_review_tools import format_papers_for_printing
from search_paper_from_nova_index import search_papers
paper_bank = search_papers(
query=prompt["task_description"],
topk=lit_review_size,
)
total_cost = 0.0
all_queries = []
else:
raise ValueError("use_literature must be True if use_semantic_paper_search or use_nova_index is True")
print(f"literature view done! paper_bank size:{len(paper_bank)}, total cost: {total_cost}, all queries: {all_queries}")
lit_review_results = format_papers_for_printing(paper_bank[:lit_review_size])
# print(f"===================\n\nlit_review_results:\n{lit_review_results}\n\n===================\n\n")
text, msg_history = get_response_from_llm(
idea_first_with_lit_review_and_convert_to_proposal_prompt.format(
task_description=prompt["task_description"],
code=code,
prev_ideas_string=prev_ideas_string,
num_reflections=num_reflections,
lit_review_results=lit_review_results,
),
client=client,
model=model,
system_message=idea_system_prompt,
msg_history=msg_history,
)
## PARSE OUTPUT
json_output = extract_json_between_markers(text)
assert json_output is not None, "Failed to extract JSON from LLM output"
Expand Down Expand Up @@ -158,7 +261,9 @@ def generate_ideas(
print(f"Idea generation converged after {j + 2} iterations.")
break

json_output['lit_review_results'] = lit_review_results
idea_str_archive.append(json.dumps(json_output))
new_idea_str_archive.append(json.dumps(json_output))
except Exception as e:
print(f"Failed to generate idea: {e}")
continue
Expand All @@ -168,10 +273,17 @@ def generate_ideas(
for idea_str in idea_str_archive:
ideas.append(json.loads(idea_str))

with open(osp.join(base_dir, "ideas.json"), "w") as f:
json.dump(ideas, f, indent=4)
# with open(osp.join(base_dir, "ideas.json"), "w") as f:
# json.dump(ideas, f, indent=4)

return ideas
new_ideas = []
for idea_str in new_idea_str_archive:
new_ideas.append(json.loads(idea_str))
with open(osp.join(base_dir, "new_ideas.json"), "w") as f:
json.dump(new_ideas, f, indent=4)

# return ideas
return new_ideas


# GENERATE IDEAS OPEN-ENDED
Expand Down Expand Up @@ -285,6 +397,10 @@ def on_backoff(details):
def search_for_papers(query, result_limit=10, engine="semanticscholar") -> Union[None, List[Dict]]:
if not query:
return None
if engine == 'nova':
from search_paper_from_nova_index import search_papers
papers = search_papers(query, topk=result_limit)
return papers
if engine == "semanticscholar":
rsp = requests.get(
"https://api.semanticscholar.org/graph/v1/paper/search",
Expand Down Expand Up @@ -408,7 +524,7 @@ def check_idea_novelty(
client,
model,
max_num_iterations=10,
engine="semanticscholar",
engine="nova", # nova, semanticscholar, openalex
):
with open(osp.join(base_dir, "experiment.py"), "r") as f:
code = f.read()
Expand Down Expand Up @@ -485,7 +601,7 @@ def check_idea_novelty(
idea["novel"] = novel

# Save results to JSON file
results_file = osp.join(base_dir, "ideas.json")
results_file = osp.join(base_dir, "new_ideas.json")
with open(results_file, "w") as f:
json.dump(ideas, f, indent=4)

Expand Down
Loading