|
| 1 | +package benches |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "runtime" |
| 7 | + "strings" |
| 8 | + "sync" |
| 9 | + "testing" |
| 10 | + |
| 11 | + "github.com/checkmarx/2ms/v3/engine" |
| 12 | + "github.com/checkmarx/2ms/v3/internal/workerpool" |
| 13 | + "github.com/checkmarx/2ms/v3/lib/reporting" |
| 14 | + "github.com/checkmarx/2ms/v3/lib/secrets" |
| 15 | + "github.com/checkmarx/2ms/v3/plugins" |
| 16 | +) |
| 17 | + |
| 18 | +type mockItem struct { |
| 19 | + content *string |
| 20 | + id string |
| 21 | + source string |
| 22 | +} |
| 23 | + |
| 24 | +func (i *mockItem) GetContent() *string { |
| 25 | + return i.content |
| 26 | +} |
| 27 | + |
| 28 | +func (i *mockItem) GetID() string { |
| 29 | + return i.id |
| 30 | +} |
| 31 | + |
| 32 | +func (i *mockItem) GetSource() string { |
| 33 | + return i.source |
| 34 | +} |
| 35 | + |
| 36 | +func (i *mockItem) GetGitInfo() *plugins.GitInfo { |
| 37 | + return nil |
| 38 | +} |
| 39 | + |
| 40 | +// BenchmarkProcessItems benchmarks ProcessItems with realistic content that includes actual secrets |
| 41 | +// |
| 42 | +// Note: This benchmark will produce logging output because the worker pool logs at Info level. |
| 43 | +// To run without log spam, put somewhere zerolog.SetGlobalLevel(zerolog.Disabled) |
| 44 | +func BenchmarkProcessItems(b *testing.B) { |
| 45 | + nCPU := runtime.GOMAXPROCS(0) |
| 46 | + fmt.Println("nCPU", nCPU) |
| 47 | + workerSizes := []int{nCPU / 2, nCPU, nCPU * 2, nCPU * 4, nCPU * 8, nCPU * 16, nCPU * 32} |
| 48 | + itemSizes := []int{50, 100, 500, 1000, 10000} |
| 49 | + |
| 50 | + // Secret patterns that will trigger detection |
| 51 | + secretPatterns := []string{ |
| 52 | + "github_pat_11ABCDEFG1234567890abcdefghijklmnopqrstuvwxyz123456", |
| 53 | + "sk-1234567890abcdefghijklmnopqrstuvwxyz", |
| 54 | + "ghp_abcdefghijklmnopqrstuvwxyz1234567890", |
| 55 | + "AIzaSyC1234567890abcdefghijklmnopqrstuv", |
| 56 | + "xoxb-123456789012-1234567890123-abcdefghijklmnopqrstuvwx", |
| 57 | + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c", |
| 58 | + } |
| 59 | + |
| 60 | + // Content templates simulating different file types |
| 61 | + contentTemplates := []string{ |
| 62 | + // JavaScript config file |
| 63 | + `const config = { |
| 64 | + apiKey: '%s', |
| 65 | + endpoint: 'https://api.example.com', |
| 66 | + timeout: 5000, |
| 67 | + retries: 3, |
| 68 | + debug: process.env.NODE_ENV === 'development' |
| 69 | +}; |
| 70 | +
|
| 71 | +module.exports = config;`, |
| 72 | + // Python script |
| 73 | + `import requests |
| 74 | +import os |
| 75 | +
|
| 76 | +API_KEY = '%s' |
| 77 | +BASE_URL = 'https://api.service.com/v1' |
| 78 | +
|
| 79 | +def make_request(endpoint): |
| 80 | + headers = { |
| 81 | + 'Authorization': f'Bearer {API_KEY}', |
| 82 | + 'Content-Type': 'application/json' |
| 83 | + } |
| 84 | + return requests.get(f'{BASE_URL}/{endpoint}', headers=headers) |
| 85 | +
|
| 86 | +if __name__ == '__main__': |
| 87 | + response = make_request('users') |
| 88 | + print(response.json())`, |
| 89 | + // Shell script |
| 90 | + `#!/bin/bash |
| 91 | +
|
| 92 | +# Configuration |
| 93 | +export API_TOKEN='%s' |
| 94 | +export SERVICE_URL="https://service.example.com" |
| 95 | +export ENVIRONMENT="production" |
| 96 | +
|
| 97 | +# Function to call API |
| 98 | +call_api() { |
| 99 | + curl -H "Authorization: Bearer $API_TOKEN" \ |
| 100 | + -H "Content-Type: application/json" \ |
| 101 | + "$SERVICE_URL/api/$1" |
| 102 | +} |
| 103 | +
|
| 104 | +# Main execution |
| 105 | +call_api "status"`, |
| 106 | + // YAML config |
| 107 | + `apiVersion: v1 |
| 108 | +kind: ConfigMap |
| 109 | +metadata: |
| 110 | + name: app-config |
| 111 | +data: |
| 112 | + database_url: postgresql://user:pass@localhost/db |
| 113 | + api_key: %s |
| 114 | + redis_url: redis://localhost:6379 |
| 115 | + log_level: info`, |
| 116 | + // JSON config |
| 117 | + `{ |
| 118 | + "name": "production-app", |
| 119 | + "version": "1.0.0", |
| 120 | + "config": { |
| 121 | + "api": { |
| 122 | + "key": "%s", |
| 123 | + "endpoint": "https://api.production.com", |
| 124 | + "timeout": 30000 |
| 125 | + }, |
| 126 | + "database": { |
| 127 | + "host": "db.production.com", |
| 128 | + "port": 5432 |
| 129 | + } |
| 130 | + } |
| 131 | +}`, |
| 132 | + // No secret - regular code |
| 133 | + `package utils |
| 134 | +
|
| 135 | +import ( |
| 136 | + "fmt" |
| 137 | + "strings" |
| 138 | + "time" |
| 139 | +) |
| 140 | +
|
| 141 | +func ProcessData(input string) (string, error) { |
| 142 | + if input == "" { |
| 143 | + return "", fmt.Errorf("input cannot be empty") |
| 144 | + } |
| 145 | + |
| 146 | + processed := strings.ToUpper(input) |
| 147 | + timestamp := time.Now().Format(time.RFC3339) |
| 148 | + |
| 149 | + return fmt.Sprintf("%s - %s", processed, timestamp), nil |
| 150 | +} |
| 151 | +
|
| 152 | +func ValidateInput(data []byte) bool { |
| 153 | + return len(data) > 0 && len(data) < 1048576 |
| 154 | +}`, |
| 155 | + } |
| 156 | + |
| 157 | + for _, workers := range workerSizes { |
| 158 | + for _, items := range itemSizes { |
| 159 | + b.Run(fmt.Sprintf("realistic_workers_%d_items_%d", workers, items), func(b *testing.B) { |
| 160 | + // Pre-create realistic mock items |
| 161 | + mockItems := make([]*mockItem, items) |
| 162 | + for j := 0; j < items; j++ { |
| 163 | + var content string |
| 164 | + |
| 165 | + // 60% of files contain secrets, 40% don't |
| 166 | + if j%10 < 6 { |
| 167 | + // Select a random template and secret |
| 168 | + template := contentTemplates[j%len(contentTemplates)] |
| 169 | + secret := secretPatterns[j%len(secretPatterns)] |
| 170 | + content = fmt.Sprintf(template, secret) |
| 171 | + } else { |
| 172 | + // Use non-secret content |
| 173 | + content = contentTemplates[len(contentTemplates)-1] |
| 174 | + } |
| 175 | + |
| 176 | + // Add some padding to simulate larger files |
| 177 | + padding := generateRealisticPadding(j) |
| 178 | + content += padding |
| 179 | + |
| 180 | + mockItems[j] = &mockItem{ |
| 181 | + content: &content, |
| 182 | + id: fmt.Sprintf("file_%d", j), |
| 183 | + source: fmt.Sprintf("/mock/path/file_%d.js", j), |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + b.ResetTimer() |
| 188 | + for i := 0; i < b.N; i++ { |
| 189 | + // Create engine for each iteration |
| 190 | + engineTest, err := engine.Init(&engine.EngineConfig{ |
| 191 | + DetectorWorkerPoolSize: workers, |
| 192 | + }) |
| 193 | + if err != nil { |
| 194 | + b.Fatal(err) |
| 195 | + } |
| 196 | + |
| 197 | + // Create fresh channels |
| 198 | + itemsChan := make(chan plugins.ISourceItem, items) |
| 199 | + secretsChan := make(chan *secrets.Secret, items*2) // Larger buffer for found secrets |
| 200 | + report := reporting.Init() |
| 201 | + wg := &sync.WaitGroup{} |
| 202 | + wg.Add(1) |
| 203 | + |
| 204 | + // Process items |
| 205 | + go func() { |
| 206 | + defer wg.Done() |
| 207 | + processItemsLocal(engineTest, "mockPlugin", itemsChan, secretsChan, report) |
| 208 | + engineTest.GetFileWalkerWorkerPool().Wait() |
| 209 | + close(secretsChan) |
| 210 | + }() |
| 211 | + |
| 212 | + // Send items |
| 213 | + for _, item := range mockItems { |
| 214 | + itemsChan <- item |
| 215 | + } |
| 216 | + close(itemsChan) |
| 217 | + |
| 218 | + // Wait for processing |
| 219 | + wg.Wait() |
| 220 | + |
| 221 | + // Collect secrets (simulating what the real code does) |
| 222 | + secretsFound := 0 |
| 223 | + for range secretsChan { |
| 224 | + secretsFound++ |
| 225 | + } |
| 226 | + |
| 227 | + // Clean up |
| 228 | + _ = engineTest.Shutdown() |
| 229 | + } |
| 230 | + }) |
| 231 | + } |
| 232 | + } |
| 233 | +} |
| 234 | + |
| 235 | +// generateRealisticPadding generates padding content to simulate realistic file sizes |
| 236 | +func generateRealisticPadding(seed int) string { |
| 237 | + // Size categories: small (1KB), medium (10KB), large (50KB) |
| 238 | + sizes := []int{1024, 10240, 51200} |
| 239 | + sizeIndex := seed % len(sizes) |
| 240 | + targetSize := sizes[sizeIndex] |
| 241 | + |
| 242 | + // Common code patterns for padding |
| 243 | + patterns := []string{ |
| 244 | + "\n\n// Helper functions\n", |
| 245 | + "function helper() { return true; }\n", |
| 246 | + "const data = { id: 1, name: 'test' };\n", |
| 247 | + "if (condition) { console.log('debug'); }\n", |
| 248 | + "// TODO: refactor this later\n", |
| 249 | + "/* eslint-disable no-unused-vars */\n", |
| 250 | + "import { util } from './utils';\n", |
| 251 | + "export default class Component {}\n", |
| 252 | + } |
| 253 | + |
| 254 | + var builder strings.Builder |
| 255 | + currentSize := 0 |
| 256 | + patternIndex := 0 |
| 257 | + |
| 258 | + for currentSize < targetSize { |
| 259 | + pattern := patterns[patternIndex%len(patterns)] |
| 260 | + builder.WriteString(pattern) |
| 261 | + currentSize += len(pattern) |
| 262 | + patternIndex++ |
| 263 | + } |
| 264 | + |
| 265 | + return builder.String() |
| 266 | +} |
| 267 | + |
| 268 | +// Local version of processItems that doesn't use global variables |
| 269 | +func processItemsLocal(eng engine.IEngine, pluginName string, items chan plugins.ISourceItem, secrets chan *secrets.Secret, report *reporting.Report) { |
| 270 | + ctx := context.Background() |
| 271 | + pool := eng.GetFileWalkerWorkerPool() |
| 272 | + |
| 273 | + for item := range items { |
| 274 | + report.TotalItemsScanned++ |
| 275 | + item := item // capture loop variable |
| 276 | + |
| 277 | + var task workerpool.Task |
| 278 | + switch pluginName { |
| 279 | + case "filesystem": |
| 280 | + task = func(context.Context) error { |
| 281 | + return eng.DetectFile(ctx, item, secrets) |
| 282 | + } |
| 283 | + default: |
| 284 | + task = func(context.Context) error { |
| 285 | + return eng.DetectFragment(item, secrets, pluginName) |
| 286 | + } |
| 287 | + } |
| 288 | + |
| 289 | + if err := pool.Submit(task); err != nil { |
| 290 | + // Handle error appropriately |
| 291 | + break |
| 292 | + } |
| 293 | + } |
| 294 | + pool.CloseQueue() |
| 295 | +} |
0 commit comments