diff --git a/README.md b/README.md index 210f6f4b..6951d0f8 100644 --- a/README.md +++ b/README.md @@ -176,3 +176,92 @@ We'd like to acknowledge the excellent work of the open-source community, especi - [uv](https://github.com/astral-sh/uv) and [ruff](https://github.com/astral-sh/ruff) We're committed to continuing to build the Agents SDK as an open source framework so others in the community can expand on our approach. + +# Firecrawl Integration for OpenAI + +This integration combines the power of Firecrawl for web scraping with OpenAI for information extraction. + +## Features + +- Extract any type of information from any website +- Simple, easy-to-use interface +- Handles natural language prompts + +## Installation + +1. Install the required packages: + +```bash +pip install openai firecrawl-py +``` + +2. Set your API keys as environment variables: + +```bash +export OPENAI_API_KEY=your_openai_api_key +export FIRECRAWL_API_KEY=your_firecrawl_api_key +``` + +## Usage + +Run the script: + +```bash +python firecrawl_agent.py +``` + +Enter any prompt like: + +- "Extract pricing information from mendable.ai" +- "Find the features of Anthropic's Claude model from anthropic.com" +- "Get the latest news from techcrunch.com" + +The script will: + +1. Extract the website URL from your prompt +2. Use Firecrawl to scrape the website +3. Use OpenAI to analyze the content based on your specific request +4. Display the results + +## Example + +``` +Enter your prompt (e.g., 'Extract pricing information from mendable.ai'): Extract pricing information from mendable.ai + +Scraping https://mendable.ai... +Extracting information... + +--- Result --- + +Mendable.ai offers the following pricing plans: + +1. Free Plan + - $0/month + - 100 queries/month + - 1 project + - Basic features + +2. Pro Plan + - $49/month + - 1,000 queries/month + - 3 projects + - All features including API access + +3. Team Plan + - $199/month + - 5,000 queries/month + - 10 projects + - All Pro features plus team collaboration + +4. Enterprise Plan + - Custom pricing + - Unlimited queries + - Unlimited projects + - Custom features and dedicated support +``` + +## Requirements + +- Python 3.7+ +- OpenAI API key +- Firecrawl API key diff --git a/firecrawl_agent.py b/firecrawl_agent.py new file mode 100644 index 00000000..0e7a23a4 --- /dev/null +++ b/firecrawl_agent.py @@ -0,0 +1,81 @@ +import os +from openai import OpenAI +from firecrawl import FirecrawlApp + +# Set your API keys here or use environment variables +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your_openai_api_key") +FIRECRAWL_API_KEY = os.environ.get("FIRECRAWL_API_KEY", "your_firecrawl_api_key") + +# Initialize clients +openai_client = OpenAI(api_key=OPENAI_API_KEY) +firecrawl_client = FirecrawlApp(api_key=FIRECRAWL_API_KEY) + +def extract_url_from_prompt(prompt): + """Extract a URL from the user prompt.""" + response = openai_client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "Extract the website URL from the user's prompt. Return only the URL, nothing else."}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + ) + url = response.choices[0].message.content.strip() + + # Add https:// if missing + if not url.startswith(("http://", "https://")): + url = "https://" + url + + return url + +def main(): + # Check if API keys are set + if OPENAI_API_KEY == "your_openai_api_key": + print("Please set your OpenAI API key as an environment variable or in the script.") + return + + if FIRECRAWL_API_KEY == "your_firecrawl_api_key": + print("Please set your Firecrawl API key as an environment variable or in the script.") + return + + # Get user prompt + user_prompt = input("Enter your prompt (e.g., 'Extract pricing information from mendable.ai'): ") + + # Extract URL from prompt + url = extract_url_from_prompt(user_prompt) + print(f"\nScraping {url}...") + + # Scrape the website + try: + scrape_result = firecrawl_client.scrape_url(url, params={ + "formats": ["markdown"], + "onlyMainContent": True + }) + except Exception as e: + print(f"Error scraping website: {e}") + return + + # Extract content + if "markdown" in scrape_result: + content = scrape_result["markdown"] + else: + print("No content found in scrape result.") + return + + # Process with OpenAI + print("Extracting information...") + response = openai_client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": "You are a helpful assistant that extracts specific information from website content."}, + {"role": "user", "content": f"Based on the following website content, {user_prompt}\n\nContent:\n{content}"} + ], + temperature=0.2, + ) + + # Print result + print("\n--- Result ---\n") + print(response.choices[0].message.content) + +if __name__ == "__main__": + main() \ No newline at end of file