Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions lefthook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ prepare-commit-msg:
skip:
- merge
- rebase
jobs:
- run: ./dist/cli.js --message-only > {1}
commands:
commitment:
# Only run for regular commits (not merge, squash, or when message specified)
# {1} is the commit message file, {2} is the commit source
run: '[ -z "{2}" ] && ./dist/cli.js --message-only > {1} || true'
interactive: true
17 changes: 9 additions & 8 deletions src/eval/evaluators/__tests__/chatgpt-agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import { beforeEach, describe, expect, it, mock } from 'bun:test';
import { z } from 'zod';
import { SilentLogger } from '../../../utils/logger.js';
import { ChatGPTAgent } from '../chatgpt-agent.js';

// Mock the OpenAI Agents SDK
Expand All @@ -30,7 +31,7 @@ describe('ChatGPTAgent', () => {
describe('evaluate()', () => {
it('should use gpt-5 model', async () => {
const schema = z.object({ result: z.string() });
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());

mockAgent.mockReturnValue({});
mockRun.mockResolvedValue({
Expand All @@ -49,7 +50,7 @@ describe('ChatGPTAgent', () => {

it('should use outputType pattern with Zod schema', async () => {
const schema = z.object({ score: z.number() });
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());

mockAgent.mockReturnValue({});
mockRun.mockResolvedValue({
Expand All @@ -68,7 +69,7 @@ describe('ChatGPTAgent', () => {

it('should access data via result.finalOutput', async () => {
const schema = z.object({ data: z.string() });
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());

mockAgent.mockReturnValue({});
mockRun.mockResolvedValue({
Expand All @@ -82,7 +83,7 @@ describe('ChatGPTAgent', () => {

it('should pass instructions to Agent', async () => {
const schema = z.object({ value: z.number() });
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());
const instructions = 'Evaluate on scale 0-10';

mockAgent.mockReturnValue({});
Expand All @@ -101,7 +102,7 @@ describe('ChatGPTAgent', () => {

it('should include agent name in configuration', async () => {
const schema = z.object({ result: z.boolean() });
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());

mockAgent.mockReturnValue({});
mockRun.mockResolvedValue({
Expand All @@ -119,7 +120,7 @@ describe('ChatGPTAgent', () => {

it('should throw EvaluationError on API failure', async () => {
const schema = z.object({ result: z.string() });
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());
const apiError = new Error('API timeout');

mockAgent.mockReturnValue({});
Expand All @@ -132,7 +133,7 @@ describe('ChatGPTAgent', () => {

it('should handle missing finalOutput', async () => {
const schema = z.object({ result: z.string() });
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());

mockAgent.mockReturnValue({});
mockRun.mockResolvedValue({
Expand All @@ -146,7 +147,7 @@ describe('ChatGPTAgent', () => {
const schema = z.object({
score: z.number().min(0).max(10),
});
const agent = new ChatGPTAgent();
const agent = new ChatGPTAgent(new SilentLogger());

mockAgent.mockReturnValue({});
// Simulate OpenAI returning invalid data that fails schema validation
Expand Down
25 changes: 14 additions & 11 deletions src/eval/evaluators/__tests__/meta-evaluator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/

import { beforeEach, describe, expect, it, mock } from 'bun:test';
import { SilentLogger } from '../../../utils/logger.js';
import type { AttemptOutcome } from '../../core/types.js';
import { MetaEvaluator } from '../meta-evaluator.js';

Expand All @@ -18,6 +19,8 @@ const mockEvaluate = mock();
mock.module('../chatgpt-agent.js', () => ({
// biome-ignore lint/style/useNamingConvention: Mock needs to match exported class name
ChatGPTAgent: class MockChatGPTAgent {
// biome-ignore lint/complexity/noUselessConstructor: Mock needs constructor for logger parameter
constructor(_logger: any) {} // Accept logger parameter
evaluate = mockEvaluate;
},
}));
Expand All @@ -28,7 +31,7 @@ describe('MetaEvaluator', () => {
});
describe('evaluate() - 3/3 success', () => {
it('should evaluate all 3 successful attempts', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -73,7 +76,7 @@ describe('MetaEvaluator', () => {
});

it('should have high consistency for similar scores', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -118,7 +121,7 @@ describe('MetaEvaluator', () => {

describe('evaluate() - 2/3 success', () => {
it('should penalize failures in finalScore', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -164,7 +167,7 @@ describe('MetaEvaluator', () => {
});

it('should identify best attempt among successes', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -208,7 +211,7 @@ describe('MetaEvaluator', () => {

describe('evaluate() - 1/3 success', () => {
it('should heavily penalize 2 failures', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -251,7 +254,7 @@ describe('MetaEvaluator', () => {
});

it('should set consistency to 0 with only 1 success', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -294,7 +297,7 @@ describe('MetaEvaluator', () => {

describe('evaluate() - 0/3 success', () => {
it('should provide reasoning even with all failures', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -338,7 +341,7 @@ describe('MetaEvaluator', () => {
});

it('should set bestAttempt to undefined', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -380,7 +383,7 @@ describe('MetaEvaluator', () => {

describe('validate inputs', () => {
it('should throw on invalid attempt count', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand All @@ -405,7 +408,7 @@ describe('MetaEvaluator', () => {
});

it('should handle ChatGPT API errors', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down Expand Up @@ -441,7 +444,7 @@ describe('MetaEvaluator', () => {

describe('build comprehensive prompt', () => {
it('should include all attempts in prompt', async () => {
const evaluator = new MetaEvaluator();
const evaluator = new MetaEvaluator(new SilentLogger());
const attempts: AttemptOutcome[] = [
{
attemptNumber: 1,
Expand Down
23 changes: 13 additions & 10 deletions src/eval/evaluators/__tests__/single-attempt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/

import { beforeEach, describe, expect, it, mock } from 'bun:test';
import { SilentLogger } from '../../../utils/logger.js';
import { SingleAttemptEvaluator } from '../single-attempt.js';

// Mock ChatGPTAgent
Expand All @@ -17,6 +18,8 @@ const mockEvaluate = mock();
mock.module('../chatgpt-agent.js', () => ({
// biome-ignore lint/style/useNamingConvention: Mock needs to match exported class name
ChatGPTAgent: class MockChatGPTAgent {
// biome-ignore lint/complexity/noUselessConstructor: Mock needs constructor for logger parameter
constructor(_logger: any) {} // Accept logger parameter
evaluate = mockEvaluate;
},
}));
Expand All @@ -27,7 +30,7 @@ describe('SingleAttemptEvaluator', () => {
});
describe('evaluate()', () => {
it('should evaluate commit message with 4 metrics', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());
const mockMetrics = {
clarity: 9,
conventionalFormat: 10,
Expand All @@ -47,7 +50,7 @@ describe('SingleAttemptEvaluator', () => {
});

it('should calculate overall score as average of metrics', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());
const mockMetrics = {
clarity: 8,
conventionalFormat: 9,
Expand All @@ -64,7 +67,7 @@ describe('SingleAttemptEvaluator', () => {
});

it('should pass commit message to ChatGPT', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());
const commitMessage = 'feat(api): add user endpoint';

mockEvaluate.mockResolvedValue({
Expand All @@ -83,7 +86,7 @@ describe('SingleAttemptEvaluator', () => {
});

it('should include diff in evaluation context', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());
const diff = 'diff --git a/src/api.ts b/src/api.ts\n+new code';

mockEvaluate.mockResolvedValue({
Expand All @@ -102,7 +105,7 @@ describe('SingleAttemptEvaluator', () => {
});

it('should include fixture name in context', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());
const fixtureName = 'complex-refactoring';

mockEvaluate.mockResolvedValue({
Expand All @@ -121,7 +124,7 @@ describe('SingleAttemptEvaluator', () => {
});

it('should validate metrics are in 0-10 range', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());

// Simulate ChatGPT returning invalid metrics that fail schema validation
mockEvaluate.mockRejectedValue(
Expand All @@ -132,15 +135,15 @@ describe('SingleAttemptEvaluator', () => {
});

it('should handle ChatGPT evaluation errors', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());

mockEvaluate.mockRejectedValue(new Error('API timeout'));

await expect(evaluator.evaluate('message', 'diff', 'fixture')).rejects.toThrow();
});

it('should handle edge case: all metrics are 10', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());

mockEvaluate.mockResolvedValue({
clarity: 10,
Expand All @@ -155,7 +158,7 @@ describe('SingleAttemptEvaluator', () => {
});

it('should handle edge case: all metrics are 0', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());

mockEvaluate.mockResolvedValue({
clarity: 0,
Expand All @@ -170,7 +173,7 @@ describe('SingleAttemptEvaluator', () => {
});

it('should round overall score to 1 decimal place', async () => {
const evaluator = new SingleAttemptEvaluator();
const evaluator = new SingleAttemptEvaluator(new SilentLogger());

mockEvaluate.mockResolvedValue({
clarity: 7,
Expand Down
10 changes: 10 additions & 0 deletions src/eval/evaluators/chatgpt-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

import type { AgentOutputType } from '@openai/agents';
import { Agent, run } from '@openai/agents';
import type { Logger } from '../../utils/logger.js';
import { EvaluationError } from '../core/errors.js';

/**
Expand All @@ -41,6 +42,15 @@ import { EvaluationError } from '../core/errors.js';
* and returns typed results via outputType pattern.
*/
export class ChatGPTAgent {
/**
* Create a new ChatGPT agent
*
* @param _logger - Logger for progress messages (reserved for future use)
*/
constructor(_logger: Logger) {
// Logger reserved for future use
void _logger;
}
/**
* Evaluate using ChatGPT with structured output
*
Expand Down
7 changes: 5 additions & 2 deletions src/eval/evaluators/meta-evaluator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
* ```
*/

import type { Logger } from '../../utils/logger.js';
import { EvaluationError } from '../core/errors.js';
import { metaEvaluationOutputSchema } from '../core/schemas.js';
import type { AttemptOutcome, EvalResult } from '../core/types.js';
Expand All @@ -44,9 +45,11 @@ export class MetaEvaluator {

/**
* Create a new meta-evaluator
*
* @param logger - Logger for progress messages (reserved for future use)
*/
constructor() {
this.chatgpt = new ChatGPTAgent();
constructor(logger: Logger) {
this.chatgpt = new ChatGPTAgent(logger);
}

/**
Expand Down
7 changes: 5 additions & 2 deletions src/eval/evaluators/single-attempt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
* ```
*/

import type { Logger } from '../../utils/logger.js';
import { attemptMetricsSchema } from '../core/schemas.js';
import type { AttemptMetrics } from '../core/types.js';
import { ChatGPTAgent } from './chatgpt-agent.js';
Expand Down Expand Up @@ -54,9 +55,11 @@ export class SingleAttemptEvaluator {

/**
* Create a new single-attempt evaluator
*
* @param logger - Logger for progress messages (reserved for future use)
*/
constructor() {
this.chatgpt = new ChatGPTAgent();
constructor(logger: Logger) {
this.chatgpt = new ChatGPTAgent(logger);
}

/**
Expand Down
Loading