@@ -9,16 +9,19 @@ comparison_benchmark:
99 - claude_code
1010 - gh_cli
1111 - amplifier_foundation
12+ - openai_codex
1213 - task_name : ppt-2
1314 agent_ids :
1415 - claude_code
1516 - gh_cli
1617 - amplifier_foundation
18+ - openai_codex
1719 - task_name : ppt-3
1820 agent_ids :
1921 - claude_code
2022 - gh_cli
2123 - amplifier_foundation
24+ - openai_codex
2225
2326score_benchmark :
2427 benchmark_type : score
@@ -29,14 +32,19 @@ score_benchmark:
2932 task_names :
3033 - arxiv_conclusion_extraction
3134 - arxiv_paper_summarizer
35+ - chiptune_generator
3236 - code-discrepancy-docs-knack
3337 - code-discrepancy-docstrings-grasp
3438 - code-discrepancy-tutorials-grasp
3539 - cpsc_recall_monitor
3640 - cross_repo_improvement_tool
3741 - email_drafting
42+ - energy_forecast_new_england
3843 - gdpval_extraction
44+ - git_changelog_generator
3945 - github_docs_extractor
46+ - image_tagging
47+ - ipo_tracker
4048 - linkedin_drafting
4149 - markdown_deck_converter
4250 - news_research_tool
@@ -45,6 +53,7 @@ score_benchmark:
4553 - pdf-hr-q3
4654 - pdf-hr-q4
4755 - pdf-hr-q5
56+ - pixel_art_generator
4857 - product_review_finder
4958 - repo_embedding_server
5059 - sec_10q_extractor
@@ -54,14 +63,19 @@ score_benchmark:
5463 task_names :
5564 - arxiv_conclusion_extraction
5665 - arxiv_paper_summarizer
66+ - chiptune_generator
5767 - code-discrepancy-docs-knack
5868 - code-discrepancy-docstrings-grasp
5969 - code-discrepancy-tutorials-grasp
6070 - cpsc_recall_monitor
6171 - cross_repo_improvement_tool
6272 - email_drafting
73+ - energy_forecast_new_england
6374 - gdpval_extraction
75+ - git_changelog_generator
6476 - github_docs_extractor
77+ - image_tagging
78+ - ipo_tracker
6579 - linkedin_drafting
6680 - markdown_deck_converter
6781 - news_research_tool
@@ -70,6 +84,7 @@ score_benchmark:
7084 - pdf-hr-q3
7185 - pdf-hr-q4
7286 - pdf-hr-q5
87+ - pixel_art_generator
7388 - product_review_finder
7489 - repo_embedding_server
7590 - sec_10q_extractor
@@ -79,14 +94,19 @@ score_benchmark:
7994 task_names :
8095 - arxiv_conclusion_extraction
8196 - arxiv_paper_summarizer
97+ - chiptune_generator
8298 - code-discrepancy-docs-knack
8399 - code-discrepancy-docstrings-grasp
84100 - code-discrepancy-tutorials-grasp
85101 - cpsc_recall_monitor
86102 - cross_repo_improvement_tool
87103 - email_drafting
104+ - energy_forecast_new_england
88105 - gdpval_extraction
106+ - git_changelog_generator
89107 - github_docs_extractor
108+ - image_tagging
109+ - ipo_tracker
90110 - linkedin_drafting
91111 - markdown_deck_converter
92112 - news_research_tool
@@ -95,6 +115,38 @@ score_benchmark:
95115 - pdf-hr-q3
96116 - pdf-hr-q4
97117 - pdf-hr-q5
118+ - pixel_art_generator
119+ - product_review_finder
120+ - repo_embedding_server
121+ - sec_10q_extractor
122+ - style_blender
123+ trials : 1
124+ - agent_id : openai_codex
125+ task_names :
126+ - arxiv_conclusion_extraction
127+ - arxiv_paper_summarizer
128+ - chiptune_generator
129+ - code-discrepancy-docs-knack
130+ - code-discrepancy-docstrings-grasp
131+ - code-discrepancy-tutorials-grasp
132+ - cpsc_recall_monitor
133+ - cross_repo_improvement_tool
134+ - email_drafting
135+ - energy_forecast_new_england
136+ - gdpval_extraction
137+ - git_changelog_generator
138+ - github_docs_extractor
139+ - image_tagging
140+ - ipo_tracker
141+ - linkedin_drafting
142+ - markdown_deck_converter
143+ - news_research_tool
144+ - pdf-hr-q1
145+ - pdf-hr-q2
146+ - pdf-hr-q3
147+ - pdf-hr-q4
148+ - pdf-hr-q5
149+ - pixel_art_generator
98150 - product_review_finder
99151 - repo_embedding_server
100152 - sec_10q_extractor
0 commit comments