Skip to content

Commit a6d5332

Browse files
committed
Validate services in the plan fulfill at least 95% of required matched use case capabilities.
1 parent 6253ba3 commit a6d5332

File tree

2 files changed

+175
-38
lines changed

2 files changed

+175
-38
lines changed

controlplane/pddl.go

+25-8
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ func (g *PddlGenerator) addActions(domain *strings.Builder) error {
174174
return nil
175175
}
176176

177-
// validateServiceCapabilities checks if any service in the plan can fulfill each required capability
177+
// validateServiceCapabilities checks if services in the plan fulfill at least 95% of required capabilities
178178
func (g *PddlGenerator) validateServiceCapabilities(ctx context.Context, useCase GroundingUseCase) error {
179179
// Collect all service capabilities
180180
var allServiceCapabilities []string
@@ -185,8 +185,9 @@ func (g *PddlGenerator) validateServiceCapabilities(ctx context.Context, useCase
185185
allServiceCapabilities = append(allServiceCapabilities, task.Capabilities...)
186186
}
187187

188-
// Track matched capabilities to ensure all are covered
188+
// Track matched and unmatched capabilities
189189
matchedCapabilities := make(map[string]bool)
190+
unmatchedCapabilities := make([]string, 0)
190191

191192
// For each required capability, check if any service can fulfill it
192193
for _, requiredCap := range useCase.Capabilities {
@@ -214,15 +215,31 @@ func (g *PddlGenerator) validateServiceCapabilities(ctx context.Context, useCase
214215
}
215216

216217
if !capabilityMatched {
217-
return fmt.Errorf("no service found with required capability: %s", requiredCap)
218+
unmatchedCapabilities = append(unmatchedCapabilities, requiredCap)
218219
}
219220
}
220221

221-
// Ensure all required capabilities were matched
222-
for _, requiredCap := range useCase.Capabilities {
223-
if !matchedCapabilities[requiredCap] {
224-
return fmt.Errorf("missing required capability: %s", requiredCap)
225-
}
222+
// Calculate match percentage
223+
totalCapabilities := len(useCase.Capabilities)
224+
matchedCount := len(matchedCapabilities)
225+
matchPercentage := float64(matchedCount) / float64(totalCapabilities)
226+
227+
g.logger.Debug().
228+
Int("totalCapabilities", totalCapabilities).
229+
Int("matchedCount", matchedCount).
230+
Float64("matchPercentage", matchPercentage).
231+
Strs("unmatchedCapabilities", unmatchedCapabilities).
232+
Msg("Capability matching summary")
233+
234+
// Require at least 95% of capabilities to be matched
235+
if matchPercentage < 0.95 {
236+
return fmt.Errorf(
237+
"insufficient capability coverage: %.2f%% (%d/%d capabilities matched). Unmatched capabilities: %v",
238+
matchPercentage*100,
239+
matchedCount,
240+
totalCapabilities,
241+
unmatchedCapabilities,
242+
)
226243
}
227244

228245
return nil

controlplane/pddl_test.go

+150-30
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,41 @@ import (
1919
// FakeMatcher implements both Matcher and Embedder interfaces for testing
2020
type FakeMatcher struct {
2121
// Configured responses for testing
22-
matchResponse bool
23-
matchScore float64
24-
matchError error
25-
embeddings []float32
26-
embeddingsError error
22+
matchResponse bool
23+
matchScore float64
24+
matchCount int
25+
totalCount int
26+
matchError error
27+
embeddings []float32
28+
embeddingsError error
29+
capabilitiesToMatch map[string]any
2730
}
2831

2932
func NewFakeMatcher() *FakeMatcher {
3033
return &FakeMatcher{
31-
matchResponse: true,
32-
matchScore: 0.9,
33-
embeddings: []float32{0.1, 0.2, 0.3},
34+
matchResponse: true,
35+
matchScore: 0.9,
36+
embeddings: []float32{0.1, 0.2, 0.3},
37+
capabilitiesToMatch: make(map[string]any),
3438
}
3539
}
3640

41+
// In the same file, update the FakeMatcher MatchTexts method:
3742
func (f *FakeMatcher) MatchTexts(_ context.Context, _, _ string, _ float64) (bool, float64, error) {
38-
return f.matchResponse, f.matchScore, f.matchError
43+
if f.matchError != nil {
44+
return false, 0, f.matchError
45+
}
46+
47+
// For specific capability testing (like the 88.24% case)
48+
if f.matchCount >= 0 && f.totalCount > 0 {
49+
f.matchCount++
50+
// Only match the first 88.24% of capabilities
51+
match := f.matchCount <= int(float64(f.totalCount)*0.8824)
52+
return match, f.matchScore, nil
53+
}
54+
55+
// Default behavior for other test cases
56+
return f.matchResponse, f.matchScore, nil
3957
}
4058

4159
func (f *FakeMatcher) GenerateEmbeddingVector(_ context.Context, _ string) (*mat.VecDense, error) {
@@ -162,22 +180,25 @@ func TestPddlDomainGenerator_GenerateDomain(t *testing.T) {
162180
}
163181

164182
func TestPddlDomainGenerator_ValidateServiceCapabilities(t *testing.T) {
165-
useCase := GroundingUseCase{
166-
Capabilities: []string{
167-
"Verify refund eligibility",
168-
"Process payment refund",
169-
},
170-
}
171-
172183
tests := []struct {
173-
name string
174-
matchResult bool
175-
tasks []*SubTask
176-
wantErr bool
184+
name string
185+
useCase GroundingUseCase
186+
matchResult bool
187+
matchScore float64
188+
tasks []*SubTask
189+
expectedErr bool
190+
expectedErrMsg string
177191
}{
178192
{
179-
name: "matching capabilities",
193+
name: "all capabilities matched",
194+
useCase: GroundingUseCase{
195+
Capabilities: []string{
196+
"Verify refund eligibility",
197+
"Process payment refund",
198+
},
199+
},
180200
matchResult: true,
201+
matchScore: 0.9,
181202
tasks: []*SubTask{
182203
{
183204
ID: "task1",
@@ -187,38 +208,137 @@ func TestPddlDomainGenerator_ValidateServiceCapabilities(t *testing.T) {
187208
},
188209
},
189210
},
190-
wantErr: false,
211+
expectedErr: false,
191212
},
192213
{
193-
name: "missing capability",
194-
matchResult: false,
214+
name: "exactly 95% capabilities matched",
215+
useCase: GroundingUseCase{
216+
Capabilities: []string{
217+
"Verify refund eligibility",
218+
"Process payment refund",
219+
"Send confirmation email",
220+
"Update order status",
221+
"Notify customer service",
222+
"Update inventory",
223+
"Calculate tax refund",
224+
"Process loyalty points",
225+
"Record transaction",
226+
"Generate receipt",
227+
"Archive refund record",
228+
"Update customer history",
229+
"Check fraud indicators",
230+
"Validate shipping status",
231+
"Update payment gateway",
232+
"Check compliance rules",
233+
"Record audit trail",
234+
"Update financial records",
235+
"Process chargeback",
236+
"Update metrics",
237+
},
238+
},
239+
matchResult: true,
240+
matchScore: 0.8,
195241
tasks: []*SubTask{
196242
{
197243
ID: "task1",
198244
Service: "refund-service",
199245
Capabilities: []string{
200-
"A service that only processes payments",
246+
"Comprehensive refund processing service with validation",
201247
},
202248
},
203249
},
204-
wantErr: true,
250+
expectedErr: false,
251+
},
252+
{
253+
name: "88.24% capabilities matched (below threshold)",
254+
useCase: GroundingUseCase{
255+
Capabilities: []string{
256+
"Cap1", "Cap2", "Cap3", "Cap4", "Cap5",
257+
"Cap6", "Cap7", "Cap8", "Cap9", "Cap10",
258+
"Cap11", "Cap12", "Cap13", "Cap14", "Cap15",
259+
"Cap16", "Cap17",
260+
},
261+
},
262+
matchResult: true, // Match first 16 capabilities
263+
matchScore: 0.8,
264+
tasks: []*SubTask{
265+
{
266+
ID: "task1",
267+
Service: "test-service",
268+
Capabilities: []string{
269+
"Generic service capability",
270+
},
271+
},
272+
},
273+
expectedErr: true,
274+
expectedErrMsg: "insufficient capability coverage: 88.24%",
275+
},
276+
{
277+
name: "no capabilities matched",
278+
useCase: GroundingUseCase{
279+
Capabilities: []string{
280+
"Verify refund eligibility",
281+
"Process payment refund",
282+
},
283+
},
284+
matchResult: false,
285+
matchScore: 0.7,
286+
tasks: []*SubTask{
287+
{
288+
ID: "task1",
289+
Service: "unrelated-service",
290+
Capabilities: []string{
291+
"A completely different service capability",
292+
},
293+
},
294+
},
295+
expectedErr: true,
296+
expectedErrMsg: "insufficient capability coverage: 0.00%",
205297
},
206298
}
207299

208300
for _, tt := range tests {
209301
t.Run(tt.name, func(t *testing.T) {
302+
// Setup test logger to capture logs
303+
var logBuf strings.Builder
304+
testLogger := zerolog.New(&logBuf)
305+
306+
// Configure fake matcher for specific test case
210307
matcher := NewFakeMatcher()
211308
matcher.matchResponse = tt.matchResult
309+
matcher.matchScore = tt.matchScore
212310

213-
generator := NewPddlGenerator("test-action", &ExecutionPlan{Tasks: tt.tasks}, matcher, zerolog.Nop())
311+
// For the 88.24% test case, set up specific matches
312+
if tt.name == "88.24% capabilities matched (below threshold)" {
313+
// Add these two lines here:
314+
matcher.matchCount = 0
315+
matcher.totalCount = len(tt.useCase.Capabilities)
316+
}
317+
318+
// Create generator with test configuration
319+
generator := NewPddlGenerator("test-action", &ExecutionPlan{Tasks: tt.tasks}, matcher, testLogger)
320+
321+
// Execute validation
322+
err := generator.validateServiceCapabilities(context.Background(), tt.useCase)
214323

215-
err := generator.validateServiceCapabilities(context.Background(), useCase)
216-
if tt.wantErr {
324+
// Verify error cases
325+
if tt.expectedErr {
217326
assert.Error(t, err)
218-
assert.Contains(t, err.Error(), "no service found with required capability")
327+
if tt.expectedErrMsg != "" {
328+
assert.Contains(t, err.Error(), tt.expectedErrMsg)
329+
}
219330
} else {
220331
assert.NoError(t, err)
221332
}
333+
334+
// Verify logging
335+
logOutput := logBuf.String()
336+
assert.Contains(t, logOutput, "Matching capabilities")
337+
assert.Contains(t, logOutput, "Capability matching summary")
338+
339+
if !tt.expectedErr {
340+
assert.Contains(t, logOutput, `"matchPercentage":1`) // 100% for full match cases
341+
}
222342
})
223343
}
224344
}

0 commit comments

Comments
 (0)