Skip to content

Commit abdb3a7

Browse files
committed
Initial commit
0 parents  commit abdb3a7

File tree

11 files changed

+2941
-0
lines changed

11 files changed

+2941
-0
lines changed

.eslintrc.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"parser": "@typescript-eslint/parser",
3+
"plugins": ["@typescript-eslint"],
4+
"extends": [
5+
"eslint:recommended",
6+
"plugin:@typescript-eslint/recommended",
7+
"prettier"
8+
],
9+
"env": {
10+
"node": true,
11+
"es2022": true
12+
},
13+
"parserOptions": {
14+
"ecmaVersion": 2022,
15+
"sourceType": "module",
16+
"project": "./tsconfig.json"
17+
},
18+
"rules": {
19+
"@typescript-eslint/explicit-function-return-type": "off",
20+
"@typescript-eslint/no-explicit-any": "off",
21+
"@typescript-eslint/no-unused-vars": [
22+
"error",
23+
{ "argsIgnorePattern": "^_" }
24+
]
25+
},
26+
"overrides": [
27+
{
28+
"files": ["**/*.test.ts"],
29+
"rules": {
30+
"@typescript-eslint/no-unused-vars": "off",
31+
"@typescript-eslint/no-explicit-any": "off"
32+
}
33+
}
34+
]
35+
}

.github/workflows/ci.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- uses: actions/checkout@v3
15+
16+
- name: Use Node.js
17+
uses: actions/setup-node@v3
18+
with:
19+
node-version: '20.x'
20+
cache: 'npm'
21+
22+
- name: Install dependencies
23+
run: npm ci
24+
25+
- name: Build
26+
run: npm run build
27+
28+
- name: Lint
29+
run: npm run lint

.github/workflows/publish.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: Publish Package
2+
3+
on:
4+
push:
5+
tags:
6+
- "*"
7+
8+
jobs:
9+
publish:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- uses: actions/checkout@v3
14+
15+
- name: Use Node.js
16+
uses: actions/setup-node@v3
17+
with:
18+
node-version: '20.x'
19+
registry-url: 'https://registry.npmjs.org'
20+
21+
- name: Install dependencies
22+
run: npm ci
23+
24+
- name: Build
25+
run: npm run build
26+
27+
- name: Publish to NPM
28+
run: npm publish
29+
env:
30+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
31+
32+
- name: Create GitHub Release
33+
uses: ncipollo/release-action@v1
34+
with:
35+
token: ${{ secrets.GITHUB_TOKEN }}
36+
generateReleaseNotes: true
37+
prerelease: false

.gitignore

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Dependencies
2+
node_modules/
3+
4+
# Build
5+
dist/
6+
7+
# Logs
8+
logs
9+
*.log
10+
npm-debug.log*
11+
12+
# Environment
13+
.env
14+
.env.local
15+
.env.*.local
16+
claude_desktop_config.json
17+
18+
# IDE
19+
.idea/
20+
.vscode/
21+
*.swp
22+
*.swo
23+
.cursorrules.md
24+
IMPLEMENTATION.md
25+
v1.2.md
26+
27+
# OS
28+
.DS_Store
29+
Thumbs.db

.prettierrc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"semi": true,
3+
"trailingComma": "es5",
4+
"singleQuote": true,
5+
"printWidth": 80,
6+
"tabWidth": 2,
7+
"useTabs": false
8+
}

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 vrknetha
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
# Vectorize MCP Server
2+
3+
A Model Context Protocol (MCP) server implementation that integrates with [Vectorize](https://vectorize.io/) for advanced Vector retrieval and text extraction.
4+
5+
## Features
6+
7+
## Installation
8+
9+
### Running with npx
10+
11+
```bash
12+
env FIRECRAWL_API_KEY=fc-YOUR_API_KEY npx -y @vectorize-io/vectorize-mcp-server
13+
```
14+
15+
### Manual Installation
16+
17+
```bash
18+
npm install -g @vectorize-io/vectorize-mcp-server
19+
```
20+
21+
## Configuration
22+
23+
### Environment Variables
24+
25+
#### Required for Cloud API
26+
27+
- `VECTORIZE_ORG_ID`: Your Vectorize Org ID
28+
- `VECTORIZE_API_KEY`: Your Vectorize API Key
29+
30+
31+
### System Configuration
32+
33+
The server includes several configurable parameters that can be set via environment variables. Here are the default values if not configured:
34+
35+
## Available Tools
36+
37+
### 1. Scrape Tool (`firecrawl_scrape`)
38+
39+
Scrape content from a single URL with advanced options.
40+
41+
```json
42+
{
43+
"name": "firecrawl_scrape",
44+
"arguments": {
45+
"url": "https://example.com",
46+
"formats": ["markdown"],
47+
"onlyMainContent": true,
48+
"waitFor": 1000,
49+
"timeout": 30000,
50+
"mobile": false,
51+
"includeTags": ["article", "main"],
52+
"excludeTags": ["nav", "footer"],
53+
"skipTlsVerification": false
54+
}
55+
}
56+
```
57+
58+
### 2. Batch Scrape Tool (`firecrawl_batch_scrape`)
59+
60+
Scrape multiple URLs efficiently with built-in rate limiting and parallel processing.
61+
62+
```json
63+
{
64+
"name": "firecrawl_batch_scrape",
65+
"arguments": {
66+
"urls": ["https://example1.com", "https://example2.com"],
67+
"options": {
68+
"formats": ["markdown"],
69+
"onlyMainContent": true
70+
}
71+
}
72+
}
73+
```
74+
75+
Response includes operation ID for status checking:
76+
77+
```json
78+
{
79+
"content": [
80+
{
81+
"type": "text",
82+
"text": "Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress."
83+
}
84+
],
85+
"isError": false
86+
}
87+
```
88+
89+
### 3. Check Batch Status (`firecrawl_check_batch_status`)
90+
91+
Check the status of a batch operation.
92+
93+
```json
94+
{
95+
"name": "firecrawl_check_batch_status",
96+
"arguments": {
97+
"id": "batch_1"
98+
}
99+
}
100+
```
101+
102+
### 4. Search Tool (`firecrawl_search`)
103+
104+
Search the web and optionally extract content from search results.
105+
106+
```json
107+
{
108+
"name": "firecrawl_search",
109+
"arguments": {
110+
"query": "your search query",
111+
"limit": 5,
112+
"lang": "en",
113+
"country": "us",
114+
"scrapeOptions": {
115+
"formats": ["markdown"],
116+
"onlyMainContent": true
117+
}
118+
}
119+
}
120+
```
121+
122+
### 5. Crawl Tool (`firecrawl_crawl`)
123+
124+
Start an asynchronous crawl with advanced options.
125+
126+
```json
127+
{
128+
"name": "firecrawl_crawl",
129+
"arguments": {
130+
"url": "https://example.com",
131+
"maxDepth": 2,
132+
"limit": 100,
133+
"allowExternalLinks": false,
134+
"deduplicateSimilarURLs": true
135+
}
136+
}
137+
```
138+
139+
### 6. Extract Tool (`firecrawl_extract`)
140+
141+
Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.
142+
143+
```json
144+
{
145+
"name": "firecrawl_extract",
146+
"arguments": {
147+
"urls": ["https://example.com/page1", "https://example.com/page2"],
148+
"prompt": "Extract product information including name, price, and description",
149+
"systemPrompt": "You are a helpful assistant that extracts product information",
150+
"schema": {
151+
"type": "object",
152+
"properties": {
153+
"name": { "type": "string" },
154+
"price": { "type": "number" },
155+
"description": { "type": "string" }
156+
},
157+
"required": ["name", "price"]
158+
},
159+
"allowExternalLinks": false,
160+
"enableWebSearch": false,
161+
"includeSubdomains": false
162+
}
163+
}
164+
```
165+
166+
Example response:
167+
168+
```json
169+
{
170+
"content": [
171+
{
172+
"type": "text",
173+
"text": {
174+
"name": "Example Product",
175+
"price": 99.99,
176+
"description": "This is an example product description"
177+
}
178+
}
179+
],
180+
"isError": false
181+
}
182+
```
183+
184+
#### Extract Tool Options:
185+
186+
- `urls`: Array of URLs to extract information from
187+
- `prompt`: Custom prompt for the LLM extraction
188+
- `systemPrompt`: System prompt to guide the LLM
189+
- `schema`: JSON schema for structured data extraction
190+
- `allowExternalLinks`: Allow extraction from external links
191+
- `enableWebSearch`: Enable web search for additional context
192+
- `includeSubdomains`: Include subdomains in extraction
193+
194+
When using a self-hosted instance, the extraction will use your configured LLM. For cloud API, it uses FireCrawl's managed LLM service.
195+
196+
## Development
197+
198+
```bash
199+
# Install dependencies
200+
npm install
201+
202+
# Build
203+
npm run build
204+
205+
# Run tests
206+
npm test
207+
```
208+
209+
### Contributing
210+
211+
1. Fork the repository
212+
2. Create your feature branch
213+
3. Run tests: `npm test`
214+
4. Submit a pull request

0 commit comments

Comments
 (0)