-
-
Notifications
You must be signed in to change notification settings - Fork 9
Testing
Haveapp1 edited this page Aug 22, 2025
·
1 revision
Comprehensive testing strategies and tools for Agentwise development and deployment.
Testing in Agentwise involves multiple layers: agent behavior testing, orchestration testing, integration testing, and end-to-end project validation. This guide covers all testing strategies and tools.
╭─────────────────╮
╱ E2E Tests ╲ ← Few, high-value tests
╱ (Project Tests) ╲
╱_____________________╲
╱ ╲
╱ Integration Tests ╲ ← API, Agent coordination
╱ (Agent Orchestration) ╲
╱___________________________╲
╱ ╲
╱ Unit Tests ╲ ← Many, fast tests
╱ (Components, Utilities, Logic) ╲
╱_________________________________╲
{
"testing_layers": {
"unit": {
"scope": "Individual functions, classes, utilities",
"tools": ["Jest", "Vitest", "Mocha"],
"coverage_target": "90%+",
"execution_speed": "< 1ms per test"
},
"integration": {
"scope": "Agent interactions, API endpoints",
"tools": ["Supertest", "Playwright API", "Custom harnesses"],
"coverage_target": "80%+",
"execution_speed": "< 100ms per test"
},
"end_to_end": {
"scope": "Complete project workflows",
"tools": ["Playwright", "Cypress", "Custom validators"],
"coverage_target": "Critical paths",
"execution_speed": "< 30s per test"
}
}
}
Test individual agent capabilities:
// tests/agents/frontend-specialist.test.js
import { FrontendSpecialist } from '../../src/agents/FrontendSpecialist.js';
import { mockTask, mockContext } from '../helpers/mocks.js';
describe('FrontendSpecialist', () => {
let agent;
beforeEach(() => {
agent = new FrontendSpecialist();
});
describe('React Component Generation', () => {
test('should generate functional component', async () => {
const task = mockTask({
type: 'component_creation',
requirements: {
framework: 'react',
componentType: 'functional',
name: 'UserCard',
props: ['user', 'onEdit']
}
});
const result = await agent.processTask(task, mockContext);
expect(result.status).toBe('success');
expect(result.artifacts).toHaveLength(1);
expect(result.artifacts[0].content).toContain('function UserCard');
expect(result.artifacts[0].content).toContain('user');
expect(result.artifacts[0].content).toContain('onEdit');
});
test('should handle TypeScript components', async () => {
const task = mockTask({
type: 'component_creation',
requirements: {
framework: 'react',
language: 'typescript',
name: 'ProductList',
interface: {
products: 'Product[]',
onSelect: '(product: Product) => void'
}
}
});
const result = await agent.processTask(task, mockContext);
expect(result.artifacts[0].content).toContain('interface');
expect(result.artifacts[0].content).toContain('Product[]');
expect(result.artifacts[0].extension).toBe('.tsx');
});
});
describe('Capability Assessment', () => {
test('should score React tasks highly', () => {
const task = mockTask({
requirements: { framework: 'react' }
});
const score = agent.getCapabilityScore(task);
expect(score).toBeGreaterThan(0.8);
});
test('should score backend tasks low', () => {
const task = mockTask({
type: 'api_creation',
requirements: { framework: 'express' }
});
const score = agent.getCapabilityScore(task);
expect(score).toBeLessThan(0.2);
});
});
});
// tests/orchestrator/task-analyzer.test.js
import { TaskAnalyzer } from '../../src/orchestrator/TaskAnalyzer.js';
describe('TaskAnalyzer', () => {
let analyzer;
beforeEach(() => {
analyzer = new TaskAnalyzer();
});
test('should analyze web application request', async () => {
const request = {
description: 'Build e-commerce platform with React and Node.js',
requirements: {
frontend: 'react',
backend: 'nodejs',
database: 'postgresql'
}
};
const analysis = await analyzer.analyze(request);
expect(analysis.projectType).toBe('web-application');
expect(analysis.complexity).toBe('high');
expect(analysis.requiredAgents).toContain('frontend');
expect(analysis.requiredAgents).toContain('backend');
expect(analysis.requiredAgents).toContain('database');
expect(analysis.estimatedTasks).toBeGreaterThan(5);
});
test('should decompose into specific tasks', async () => {
const analysis = {
projectType: 'web-application',
requirements: { frontend: 'react', backend: 'express' }
};
const tasks = await analyzer.decompose(analysis);
expect(tasks).toHaveLength(4);
expect(tasks.map(t => t.type)).toEqual(
expect.arrayContaining([
'project_setup',
'frontend_structure',
'backend_setup',
'integration'
])
);
});
});
// tests/optimization/token-optimizer.test.js
import { TokenOptimizer } from '../../src/optimization/TokenOptimizer.js';
describe('TokenOptimizer', () => {
let optimizer;
beforeEach(() => {
optimizer = new TokenOptimizer();
});
test('should compress context effectively', async () => {
const largeContext = {
projectConfig: '...large config...',
codebase: '...entire codebase...',
documentation: '...full documentation...'
};
const compressed = await optimizer.compressContext(largeContext);
expect(compressed.tokenCount).toBeLessThan(largeContext.tokenCount * 0.7);
expect(compressed.essential_info).toBeDefined();
expect(compressed.preserved_quality).toBeGreaterThan(0.85);
});
test('should reuse templates appropriately', async () => {
const task = {
type: 'react_component',
requirements: { name: 'Button', props: ['onClick', 'children'] }
};
const optimized = await optimizer.optimizeTask(task);
expect(optimized.template_used).toBe('react_functional_component');
expect(optimized.token_savings).toBeGreaterThan(100);
});
});
// tests/integration/agent-coordination.test.js
import { OrchestrationEngine } from '../../src/orchestrator/OrchestrationEngine.js';
import { MockAgentPool } from '../helpers/MockAgentPool.js';
describe('Agent Coordination', () => {
let orchestrator;
let agentPool;
beforeEach(async () => {
agentPool = new MockAgentPool();
orchestrator = new OrchestrationEngine({ agentPool });
await orchestrator.initialize();
});
test('should coordinate frontend and backend agents', async () => {
const project = {
name: 'test-app',
type: 'web-application',
requirements: {
frontend: 'react',
backend: 'express',
features: ['auth', 'crud']
}
};
const result = await orchestrator.processProject(project);
expect(result.status).toBe('completed');
expect(result.agents_used).toContain('frontend');
expect(result.agents_used).toContain('backend');
// Verify shared context was used
expect(agentPool.getAgent('frontend').receivedContext).toMatchObject({
api_endpoints: expect.any(Array),
data_schemas: expect.any(Object)
});
});
test('should handle agent failures gracefully', async () => {
// Simulate agent failure
agentPool.getAgent('backend').simulateFailure();
const project = {
type: 'web-application',
requirements: { backend: 'express' }
};
const result = await orchestrator.processProject(project);
expect(result.status).toBe('completed'); // Should recover
expect(result.retries).toBeGreaterThan(0);
expect(result.fallback_used).toBe(true);
});
});
// tests/integration/api-endpoints.test.js
import request from 'supertest';
import { createApp } from '../../src/api/app.js';
import { testDatabase } from '../helpers/database.js';
describe('API Endpoints', () => {
let app;
beforeAll(async () => {
app = createApp({ database: testDatabase });
});
describe('POST /api/v1/tasks', () => {
test('should create task successfully', async () => {
const taskData = {
description: 'Create React component',
type: 'frontend',
requirements: { framework: 'react' }
};
const response = await request(app)
.post('/api/v1/tasks')
.send(taskData)
.expect(201);
expect(response.body).toMatchObject({
task_id: expect.any(String),
status: 'queued',
assigned_agent: 'frontend-specialist'
});
});
test('should validate required fields', async () => {
const response = await request(app)
.post('/api/v1/tasks')
.send({ description: 'Incomplete task' })
.expect(400);
expect(response.body.error).toContain('type is required');
});
});
describe('WebSocket /api/v1/ws', () => {
test('should provide real-time task updates', async () => {
const ws = new WebSocket('ws://localhost:3001/api/v1/ws');
const updates = [];
ws.onmessage = (event) => {
updates.push(JSON.parse(event.data));
};
// Create task via API
const task = await request(app)
.post('/api/v1/tasks')
.send({ description: 'Test task', type: 'frontend' });
// Wait for WebSocket updates
await new Promise(resolve => setTimeout(resolve, 1000));
expect(updates).toContainEqual(
expect.objectContaining({
event: 'task.created',
task_id: task.body.task_id
})
);
});
});
});
// tests/e2e/project-workflows.test.js
import { test, expect } from '@playwright/test';
import { AgentWiseTestHarness } from '../helpers/TestHarness.js';
test.describe('Complete Project Workflows', () => {
let harness;
test.beforeAll(async () => {
harness = new AgentWiseTestHarness();
await harness.initialize();
});
test('should build complete React + Express application', async () => {
const projectSpec = {
name: 'e2e-test-app',
type: 'web-application',
requirements: {
frontend: {
framework: 'react',
styling: 'tailwindcss',
state: 'zustand'
},
backend: {
framework: 'express',
database: 'sqlite',
auth: 'jwt'
},
features: [
'user_authentication',
'todo_management',
'responsive_design'
]
}
};
// Start project creation
const result = await harness.createProject(projectSpec);
// Verify project structure
expect(result.status).toBe('completed');
expect(result.duration).toBeLessThan(600000); // < 10 minutes
// Check generated files
const files = await harness.getProjectFiles();
expect(files).toContain('package.json');
expect(files).toContain('src/App.jsx');
expect(files).toContain('server/app.js');
expect(files).toContain('database/schema.sql');
// Verify application functionality
await harness.startApplication();
// Test frontend
const page = await harness.getPage();
await page.goto('http://localhost:3000');
await expect(page.locator('h1')).toContainText('Welcome');
// Test authentication
await page.fill('[data-testid=email]', '[email protected]');
await page.fill('[data-testid=password]', 'password123');
await page.click('[data-testid=login]');
await expect(page.locator('[data-testid=dashboard]')).toBeVisible();
// Test API endpoints
const apiResponse = await page.request.get('/api/health');
expect(apiResponse.status()).toBe(200);
});
test('should handle mobile app generation', async () => {
const mobileSpec = {
name: 'mobile-test-app',
type: 'mobile-application',
requirements: {
platform: 'react-native',
navigation: 'react-navigation',
state: 'redux',
backend: 'firebase'
}
};
const result = await harness.createProject(mobileSpec);
expect(result.status).toBe('completed');
const files = await harness.getProjectFiles();
expect(files).toContain('App.js');
expect(files).toContain('android/app/build.gradle');
expect(files).toContain('ios/Podfile');
// Verify it builds
const buildResult = await harness.buildProject();
expect(buildResult.success).toBe(true);
});
});
// tests/performance/load-testing.test.js
import { performance } from 'perf_hooks';
import { OrchestrationEngine } from '../../src/orchestrator/OrchestrationEngine.js';
describe('Performance Tests', () => {
let orchestrator;
beforeAll(async () => {
orchestrator = new OrchestrationEngine();
await orchestrator.initialize();
});
test('should handle concurrent project creation', async () => {
const projectCount = 10;
const projects = Array.from({ length: projectCount }, (_, i) => ({
name: `concurrent-project-${i}`,
type: 'web-application',
requirements: { frontend: 'react', backend: 'express' }
}));
const startTime = performance.now();
const results = await Promise.all(
projects.map(project => orchestrator.processProject(project))
);
const endTime = performance.now();
const duration = endTime - startTime;
// All projects should complete successfully
expect(results.every(r => r.status === 'completed')).toBe(true);
// Should complete within reasonable time (< 2min for 10 projects)
expect(duration).toBeLessThan(120000);
// Average time per project should be reasonable
const avgTime = duration / projectCount;
expect(avgTime).toBeLessThan(30000); // < 30s average
});
test('should maintain token efficiency under load', async () => {
const tasks = Array.from({ length: 50 }, (_, i) => ({
description: `Load test task ${i}`,
type: 'frontend',
requirements: { framework: 'react' }
}));
const results = await Promise.all(
tasks.map(task => orchestrator.processTask(task))
);
const totalTokens = results.reduce((sum, r) => sum + r.tokens_used, 0);
const avgTokensPerTask = totalTokens / tasks.length;
// Should maintain optimization under load
expect(avgTokensPerTask).toBeLessThan(5000);
// Overall efficiency should be maintained
const efficiency = results.reduce((sum, r) => sum + r.token_efficiency, 0) / results.length;
expect(efficiency).toBeGreaterThan(0.8);
});
});
// tests/agents/frontend/component-generation.test.js
import { FrontendSpecialist } from '../../../src/agents/FrontendSpecialist.js';
import { validateReactComponent, validateVueComponent } from '../../helpers/validators.js';
describe('Frontend Agent - Component Generation', () => {
let agent;
beforeEach(() => {
agent = new FrontendSpecialist();
});
test.each([
'react',
'vue',
'angular',
'svelte'
])('should generate valid %s components', async (framework) => {
const task = {
type: 'component_creation',
requirements: {
framework,
name: 'TestComponent',
props: ['title', 'onClick']
}
};
const result = await agent.processTask(task);
expect(result.artifacts).toHaveLength(1);
const component = result.artifacts[0];
switch (framework) {
case 'react':
expect(validateReactComponent(component.content)).toBe(true);
break;
case 'vue':
expect(validateVueComponent(component.content)).toBe(true);
break;
// Add validators for other frameworks
}
});
});
// tests/agents/backend/api-generation.test.js
import { BackendSpecialist } from '../../../src/agents/BackendSpecialist.js';
import { validateExpressRoute, validateAPIResponse } from '../../helpers/validators.js';
describe('Backend Agent - API Generation', () => {
let agent;
beforeEach(() => {
agent = new BackendSpecialist();
});
test('should generate CRUD endpoints', async () => {
const task = {
type: 'api_creation',
requirements: {
framework: 'express',
resource: 'users',
operations: ['create', 'read', 'update', 'delete']
}
};
const result = await agent.processTask(task);
const routes = result.artifacts.filter(a => a.type === 'route');
expect(routes).toHaveLength(4); // One for each CRUD operation
routes.forEach(route => {
expect(validateExpressRoute(route.content)).toBe(true);
});
});
test('should generate authentication middleware', async () => {
const task = {
type: 'middleware_creation',
requirements: {
type: 'authentication',
strategy: 'jwt'
}
};
const result = await agent.processTask(task);
expect(result.artifacts[0].content).toContain('jwt.verify');
expect(result.artifacts[0].content).toContain('req.user');
});
});
// jest.config.js
module.exports = {
testEnvironment: 'node',
setupFilesAfterEnv: ['<rootDir>/tests/setup.js'],
testMatch: [
'<rootDir>/tests/**/*.test.js',
'<rootDir>/tests/**/*.spec.js'
],
collectCoverageFrom: [
'src/**/*.js',
'!src/**/*.test.js',
'!src/index.js'
],
coverageThreshold: {
global: {
branches: 80,
functions: 85,
lines: 85,
statements: 85
}
},
testTimeout: 30000
};
// tests/helpers/mocks.js
export function mockTask(overrides = {}) {
return {
id: 'test-task-123',
type: 'frontend',
description: 'Test task',
requirements: {},
context: {},
priority: 'medium',
token_budget: 5000,
...overrides
};
}
export function mockContext(overrides = {}) {
return {
project: {
name: 'test-project',
type: 'web-application'
},
shared: {
tech_stack: ['react', 'express'],
decisions: {}
},
...overrides
};
}
export class MockAgent {
constructor(id, capabilities = []) {
this.id = id;
this.capabilities = capabilities;
this.tasks_completed = [];
this.receivedContext = null;
}
async processTask(task, context) {
this.receivedContext = context;
this.tasks_completed.push(task);
return {
status: 'success',
artifacts: [{
name: `${task.type}-output.js`,
content: `// Generated ${task.type} code`,
type: 'code'
}],
tokens_used: 1000,
duration: 5000
};
}
simulateFailure() {
this.shouldFail = true;
}
}
# .github/workflows/test.yml
name: Test Suite
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [18, 20]
steps:
- uses: actions/checkout@v4
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
- run: npm ci
- name: Run linting
run: npm run lint
- name: Run unit tests
run: npm run test:unit
- name: Run integration tests
run: npm run test:integration
- name: Run E2E tests
run: npm run test:e2e
- name: Upload coverage reports
uses: codecov/codecov-action@v3
with:
files: ./coverage/lcov.info
performance:
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 18
cache: 'npm'
- run: npm ci
- name: Run performance benchmarks
run: npm run test:performance
- name: Store benchmark results
uses: benchmark-action/github-action-benchmark@v1
with:
tool: 'benchmarkjs'
output-file-path: benchmarks/results.json
- Follow AAA Pattern: Arrange, Act, Assert
- Use Descriptive Names: Test names should explain the scenario
- One Assertion Per Test: Focus on single behavior
- Independent Tests: Each test should run in isolation
- Fast Execution: Unit tests should complete quickly
- Mock External Dependencies: APIs, databases, file systems
- Preserve Business Logic: Don't over-mock core functionality
- Use Test Doubles: Stubs, mocks, spies appropriately
- Verify Interactions: Ensure mocks are called correctly
- Aim for High Coverage: But prioritize meaningful tests
- Test Edge Cases: Handle error conditions and boundaries
- Cover Critical Paths: Focus on most important functionality
- Avoid Coverage Obsession: Quality over quantity
For more information, see Contributing, Performance Tuning, or Architecture.
Support
- Discord: @vibecodingwithphil
- GitHub: @VibeCodingWithPhil