-
Notifications
You must be signed in to change notification settings - Fork 10
/
autobrowse.py
249 lines (216 loc) · 10.4 KB
/
autobrowse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
from typing import Any, Dict, List
import autogen
from autogen.agentchat.assistant_agent import AssistantAgent
from autogen.code_utils import extract_code
from browser_proxy_agent import BrowserProxyAgent
from retrieve_html_proxy_agent import RetrieveHTMLProxyAgent
import agent_config
def get_code_blocks(message: str) -> List[str]:
return [code_block for _, code_block in extract_code(message)]
def is_termination_message_for_code_generator(message):
"""
function to check if a message is a termination message for the code_generator agent.
"""
content = message.get("content", "")
if not content:
return False
return content.rstrip().endswith("TERMINATE")
def is_termination_message_for_planner(message):
"""
function to check if a message is a termination message for the planner agent.
"""
content = message.get("content", "")
if not content:
return False
return "FINISHED" in content.rstrip()
class AutoBrowse:
def __init__(self, config: Dict[str, Any], browser_console_uri: str = "ws://localhost:3000"):
# global variable tracking code blocks executed thus far
self.code_executed_so_far = []
# browser console uri to send puppeteer.js code to and fetch HTML from
self.browser_console_uri = browser_console_uri
# initialize agents
self.init_html_assistant(config["html_assistant"].get("model"), config["html_assistant"].get("system_message"))
self.init_code_generator(config["code_generator"].get("model"), config["code_generator"].get("system_message"), config["code_generator_user_proxy"].get("max_consecutive_auto_reply", 0), browser_console_uri= self.browser_console_uri)
self.init_planner(config["planner"].get("model"), config["planner"].get("system_message"), config["planner_user_proxy"].get("max_consecutive_auto_reply", 0))
def init_planner(self, model_name = "gpt-4", system_message = "", max_consecutive_auto_reply = 0):
'''
Initialize the planner agent, which generates a plan to fulfill a web browsing task.
'''
################## PLANNER ##############
config_list_planner = autogen.config_list_from_json(
"OAI_CONFIG_LIST",
file_location=".",
filter_dict={
"model": {
model_name,
}
},
)
llm_config_planner = {
"config_list": config_list_planner,
"functions": [
{
"name": "ask_html_assistant",
"description": "ask a question to the html_assistant",
"parameters": {
"type": "object",
"properties": {
"message": {
"type": "string",
"description": "the question to ask the html_assistant about the current page",
},
},
"required": ["message"],
},
},
{
"name": "ask_code_generator",
"description": "ask a question to the code_generator",
"parameters": {
"type": "object",
"properties": {
"message": {
"type": "string",
"description": "the question to ask the code_generator",
},
"context_html": {
"type": "string",
"description": "the HTML from the current page",
},
},
"required": ["message"],
},
},
],
}
# an AssistantAgent named planner that generates a plan to fulfill a web browsing task,
# the agent can propose the usage of two functions : 1. ask_html_assistant (to ask questions about the current
# page in the browser - the result will be HTML code) 2. ask_code_generator (to generate and execute puppeteer.js code in the browser)
self.planner = autogen.AssistantAgent(
name="planner",
llm_config=llm_config_planner,
# the default system message of the AssistantAgent is overwritten here
system_message=system_message,
)
self.planner_user_proxy = autogen.UserProxyAgent(
name="planner_user_proxy",
human_input_mode="NEVER",
max_consecutive_auto_reply= max_consecutive_auto_reply,
is_termination_msg=is_termination_message_for_planner,
function_map={
"ask_html_assistant": self.ask_html_assistant,
"ask_code_generator": self.ask_code_generator,
},
)
def init_html_assistant(self, model_name = "gpt-3.5-turbo-16k", system_message = ""):
llm_config_list = autogen.config_list_from_json(
"OAI_CONFIG_LIST",
file_location=".",
filter_dict={
"model": {
model_name,
}
},
)
self.html_assistant = autogen.AssistantAgent(
name="html_assistant",
llm_config={"config_list": llm_config_list},
# the default system message of the AssistantAgent is overwritten here
system_message=system_message,
)
# create a UserProxyAgent instance to interact with html_assistant
self.html_proxy = RetrieveHTMLProxyAgent(
name="html_user_proxy",
human_input_mode="NEVER",
max_consecutive_auto_reply=0,
)
def init_code_generator(self, model_name = "gpt-4", system_message = "", max_consecutive_auto_reply = 3, browser_console_uri = "ws://localhost:3000"):
llm_config_list = autogen.config_list_from_json(
"OAI_CONFIG_LIST",
file_location=".",
filter_dict={
"model": {
model_name,
}
},
)
# create an AssistantAgent named code_generator that generates puppeteer.js code to interact with the browser.
self.code_generator = autogen.AssistantAgent(
name="code_generator",
llm_config={"config_list": llm_config_list},
# the default system message of the AssistantAgent is overwritten here
system_message=system_message,
)
# create a UserProxyAgent instance to interact with the code_generator
self.code_generator_user_proxy = BrowserProxyAgent(
name="code_generator_user_proxy",
human_input_mode="NEVER",
max_consecutive_auto_reply= max_consecutive_auto_reply,
is_termination_msg=is_termination_message_for_code_generator,
code_execution_config={"work_dir": "code_execution"},
browser_console_uri= browser_console_uri,
)
def ask_planner(self, question: str) -> str:
'''
Entrypoint function to Autobrowse to fulfill a web browsing task.
'''
self.planner_user_proxy.initiate_chat(
self.planner,
message=question
)
final_code_executed = "\n".join(self.code_executed_so_far)
self.code_executed_so_far = []
return final_code_executed
def ask_html_assistant(self, message: str) -> str:
'''
Function to ask the html assistant a question about the HTML content of the current page
'''
self.html_proxy.initiate_chat(self.html_assistant, message=message)
last_message = self.html_proxy.last_message()["content"]
return last_message
def augment_message_to_code_gen(self, message: str, context_html: str):
'''
Augment the question to code_generator by appending the relevant HTML for it to complete the task, and the code executed so far.
'''
code_executed_so_far_str = "\n".join(self.code_executed_so_far)
if code_executed_so_far_str:
if context_html:
return f'''{message}\n\nThis is the relevant HTML from the current page:\n\n{context_html}\n\nThis is the code already executed so far:\n{code_executed_so_far_str}'''
else:
return f'''{message}\n\nThis is the code already executed so far:\n\n{code_executed_so_far_str}'''
else :
return message
def ask_code_generator(self, message: str, context_html = "") -> str:
"""
function to ask code_generator a question.
Args:
message (str): the question to ask code_generator
context_html (str): the relevant HTML for code_generator to complete the task
"""
self.code_generator_user_proxy.initiate_chat(self.code_generator, message=self.augment_message_to_code_gen(message, context_html))
# -2 is the execution result,
# -3 is the last message with a code block
last_code_block_message = self.code_generator_user_proxy.chat_messages[self.code_generator][-3]["content"]
# get code blocks from last_code_block_message
code_blocks = get_code_blocks(last_code_block_message)
code_blocks_str = "\n".join(code_blocks)
if is_termination_message_for_code_generator(self.code_generator_user_proxy.last_message()):
# add code blocks to code_executed_so_far
self.code_executed_so_far.extend(code_blocks)
return f''' Code execution successful. The following code was executed:\n{code_blocks_str}'''
else:
last_error_message = self.code_generator_user_proxy.chat_messages[self.code_generator][-2]["content"]
return f"Code execution failed. Code execution:\n{code_blocks_str}\nError message:\n{last_error_message}"
if __name__ == "__main__":
autobrowse = AutoBrowse(config = agent_config.config)
while True:
question = input("Enter a question to ask Autobrowse: \n")
final_code_executed = autobrowse.ask_planner(question)
print("Final code executed: ")
print(final_code_executed)
print("=====================================")
print("=====================================")
print("=====================================")
print("=====================================")
print("=====================================")