Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Firecrawl Integration for Webpage Scraping #31

Open
wants to merge 8 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ SOLANA_PRIVATE_KEY=
DISCORD_TOKEN=
XAI_API_KEY=
TOGETHER_API_KEY=

FIRECRAWL_API_KEY=
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ similar core functionality as Zerebro. For creative outputs, you'll need to fine
- AI/ML Tools:
- GOAT (Onchain Agent Toolkit)
- Allora (Network inference)
- Others:
- Perplexity
- Firecrawl

### Language Model Support

Expand Down Expand Up @@ -135,6 +138,8 @@ poetry run python main.py
configure-connection hyperbolic # For Hyperbolic
configure-connection groq # For GROQ
configure-connection together # For Together AI
configure-connection perplexity # For Perplexity
configure-connection firecrawl # For Firecrawl
```

2. Use `list-connections` to see all available connections and their status
Expand Down
3 changes: 3 additions & 0 deletions agents/example.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@
"message_read_count": 10,
"message_emoji_name": "❤️",
"server_id": "1234567890"
},
{
"name": "firecrawl"
}
],
"tasks": [
Expand Down
34 changes: 31 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ requests-oauthlib = "^1.3.1"
together = "^1.3.14"
fastapi = { version = "^0.109.0", optional = true }
uvicorn = { version = "^0.27.0", optional = true }
firecrawl-py = "^1.7.0"


[tool.poetry.extras]
server = ["fastapi", "uvicorn", "requests"]


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
6 changes: 5 additions & 1 deletion src/connection_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from src.connections.together_connection import TogetherAIConnection
from src.connections.evm_connection import EVMConnection
from src.connections.perplexity_connection import PerplexityConnection
from src.connections.firecrawl_connection import FirecrawlConnection

logger = logging.getLogger("connection_manager")

Expand Down Expand Up @@ -72,7 +73,10 @@ def _class_name_to_type(class_name: str) -> Type[BaseConnection]:
elif class_name == "evm":
return EVMConnection
elif class_name == "perplexity":
return PerplexityConnection
return PerplexityConnection
elif class_name == "firecrawl":
return FirecrawlConnection

return None

def _register_connection(self, config_dic: Dict[str, Any]) -> None:
Expand Down
138 changes: 138 additions & 0 deletions src/connections/firecrawl_connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import logging
import os
from typing import Dict, Any
from dotenv import load_dotenv, set_key
from src.connections.base_connection import BaseConnection, Action, ActionParameter
from firecrawl import FirecrawlApp


logger = logging.getLogger(__name__)

class FirecrawlConnectionError(Exception):
"""Base exception for FireCrawl connection errors"""
pass


class FirecrawlConfigurationError(FirecrawlConnectionError):
"""Raised when there are configuration/credential issues"""
pass

class FirecrawlAPIError(FirecrawlConnectionError):
"""Raised when Firecrawl API requests fail"""
pass

class FirecrawlConnection(BaseConnection):
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self._client = None

@property
def is_llm_provider(self) -> bool:
return False

def validate_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
"""Validate Firecrawl configuration from JSON"""
required_fields = []
missing_fields = [field for field in required_fields if field not in config]

if missing_fields:
raise ValueError(f"Missing required configuration fields: {', '.join(missing_fields)}")

return config

def register_actions(self) -> None:
"""Register available Firecrawl actions"""
self.actions = {
"scrape-page": Action(
name = "scrape-page",
parameters = [
ActionParameter("url", True, str, "The URL of the page to scrape"),
],
description = "Scrape a page for text data"
)
}

def _get_client(self) -> FirecrawlApp:
"""Get or create Firecrawl app client"""
if not self._client:
api_key = os.getenv("FIRECRAWL_API_KEY")
if not api_key:
raise FirecrawlConfigurationError("Firecrawl API key not found in environment")
self._client = FirecrawlApp(api_key=api_key)
return self._client

def configure(self) -> bool:
"""Sets up Firecrawl authentication"""
print("\n🌍 Firecrawl API SETUP")

if self.is_configured():
print("\Firecrawl API is already configured.")
response = input("Do you want to reconfigure? (y/n): ")
if response.lower() != 'y':
return True

print("\n📝 To get your Firecrawl API credentials:")
print("1. Go to https://www.firecrawl.dev/app/api-keys")
print("2. Navigate to the API keys section and create a new API key")

api_key = input("\nEnter your Firecrawl API key: ")

try:
if not os.path.exists('.env'):
with open('.env', 'w') as f:
f.write('')

set_key('.env', 'FIRECRAWL_API_KEY', api_key)

client = FirecrawlApp(api_key=api_key)

print("\n✅ Firecrawl API configuration successfully saved!")
print("Your API key has been stored in the .env file.")
return True

except Exception as e:
logger.error(f"Configuration failed: {e}")
return False

def is_configured(self, verbose = False) -> bool:
"""Check if Firecrawl API key is configured and valid"""
try:
load_dotenv()
api_key = os.getenv('FIRECRAWL_API_KEY')
if not api_key:
return False

client = FirecrawlApp(api_key=api_key)
return True

except Exception as e:
if verbose:
logger.debug(f"Configuration check failed: {e}")
return False


def scrape_page(self, url: str) -> str:
"""Scrape a page for text data"""
try:
client = self._get_client()
response = client.scrape_url(url, params={'formats': ['markdown']})
logger.info(f"Successfully scraped page : {url}")
return response['markdown']
except Exception as e:
raise FirecrawlAPIError(f"Scraping {url} failed: {e}")


def perform_action(self, action_name: str, kwargs) -> Any:
"""Execute a Twitter action with validation"""
if action_name not in self.actions:
raise KeyError(f"Unknown action: {action_name}")

action = self.actions[action_name]
errors = action.validate_params(kwargs)
if errors:
raise ValueError(f"Invalid parameters: {', '.join(errors)}")

# Call the appropriate method based on action name
method_name = action_name.replace('-', '_')
method = getattr(self, method_name)
return method(**kwargs)