Skip to content

feat: restrict requirements for enhanced secret scan matches #6379

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions packages/build/src/plugins_core/secrets_scanning/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,19 +130,19 @@ const MIN_CHARS_AFTER_PREFIX = 12
const prefixMatchingRegex = LIKELY_SECRET_PREFIXES.map((p) => p.replace(/[$*+?.()|[\]{}]/g, '\\$&')).join('|')

// Build regex pattern for matching secrets with various delimiters and quotes:
// (?:["'`]|^|[=:,]) - match either quotes, start of line, or delimiters (=:,) at the start
// (?:["'\`]|[=]) - match either quotes, or = at the start
// Named capturing groups:
// - <token>: captures the entire secret value including its prefix
// - <prefix>: captures just the prefix part (e.g. 'aws_', 'github_pat_')
// (?:${prefixMatchingRegex}) - non-capturing group containing our escaped prefixes (e.g. aws_|github_pat_|etc)
// [^ "'`=:,]{${MIN_CHARS_AFTER_PREFIX}} - match exactly MIN_CHARS_AFTER_PREFIX chars after the prefix
// [^ "'`=:,]*? - lazily match any additional chars that aren't quotes/delimiters
// (?:["'`]|[ =:,]|$) - end with either quotes, delimiters, whitespace, or end of line
// [a-zA-Z0-9-]{${MIN_CHARS_AFTER_PREFIX}} - match exactly MIN_CHARS_AFTER_PREFIX chars (alphanumeric or dash) after the prefix
// [a-zA-Z0-9-]*? - lazily match any additional chars (alphanumeric or dash)
// (?:["'\`]|$) - end with either quotes or end of line
// gi - global and case insensitive flags
// Note: Using the global flag (g) means this regex object maintains state between executions.
// We would need to reset lastIndex to 0 if we wanted to reuse it on the same string multiple times.
const likelySecretRegex = new RegExp(
`(?:["'\`]|^|[=:,]) *(?<token>(?<prefix>${prefixMatchingRegex})[^ "'\`=:,]{${MIN_CHARS_AFTER_PREFIX}}[^ "'\`=:,]*?)(?:["'\`]|[ =:,]|$)`,
`(?:["'\`]|[=]) *(?<token>(?<prefix>${prefixMatchingRegex})[a-zA-Z0-9-]{${MIN_CHARS_AFTER_PREFIX}}[a-zA-Z0-9-]*?)(?:["'\`]|$)`,
'gi',
)

Expand Down
58 changes: 23 additions & 35 deletions packages/build/tests/utils_secretscanning/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ import test from 'ava'

import { findLikelySecrets } from '../../lib/plugins_core/secrets_scanning/utils.js'

const testFile = 'test.txt'

test('findLikelySecrets - should find secrets with common prefixes at the beginning of a line', async (t) => {
test('findLikelySecrets - should not find secrets without quotes or delimiters', async (t) => {
const lines = [
'aws_123456789012345678',
'ghp_1234567890123456789',
Expand All @@ -14,29 +12,22 @@ test('findLikelySecrets - should find secrets with common prefixes at the beginn

lines.forEach((text) => {
const matches = findLikelySecrets({ text })
t.is(matches.length, 1)
t.like(matches[0], {
// match found at the beginning of the line
index: 0,
})
t.is(matches.length, 0)
})
})

test('findLikelySecrets - should find secrets with various delimiters at the beginning', async (t) => {
test('findLikelySecrets - should find secrets with quotes or equals', async (t) => {
const matchingLines = [
'my_secret_key=aws_123456789012345678',
'awsKey: aws_123456789012345678',
'mySecretKey = aws_123456789012345678',
'secretKey="aws_123456789012345678"',
'secretKey = "aws_123456789012345678"',
"secretKey='aws_123456789012345678'",
'secretKey=`aws_123456789012345678`',
'someKey, aws_123456789012345678, otherKey',
]
matchingLines.forEach((text) => {
const matches = findLikelySecrets({ text })
t.is(matches.length, 1)

t.true(matches[0].index > 0, 'Match should not be at the start of the line')
})
})

Expand All @@ -47,12 +38,12 @@ test('findLikelySecrets - should not match values with spaces after prefix', asy
})

test('findLikelySecrets - should not match values that are too short', async (t) => {
const matches = findLikelySecrets({ text: 'aws_key=12345678901' })
const matches = findLikelySecrets({ text: 'aws_key="12345678901"' })
t.is(matches.length, 0)
})

test('findLikelySecrets - should return the matched prefix as the key', async (t) => {
const matches = findLikelySecrets({ text: 'github_pat_123456789012345678' })
const matches = findLikelySecrets({ text: 'mykey = "github_pat_123456789012345678"' })
t.is(matches.length, 1)
t.is(matches[0].prefix, 'github_pat_')
})
Expand All @@ -67,17 +58,13 @@ test('findLikelySecrets - should handle empty or invalid input', async (t) => {
})

test('findLikelySecrets - should match exactly minimum chars after prefix', async (t) => {
const exactMinChars = 'aws_123456789012' // Exactly 12 chars after prefix
const exactMinChars = 'value = "aws_123456789012"' // Exactly 12 chars after prefix
const matches = findLikelySecrets({ text: exactMinChars })
t.is(matches.length, 1)
})

test('findLikelySecrets - should match different prefixes from LIKELY_SECRET_PREFIXES', async (t) => {
const lines = [
'ghp_123456789012345678', // GitHub personal access token
'sk_live_123456789012345678', // Stripe key
'AKIAXXXXXXXXXXXXXXXX', // AWS access key
]
const lines = ['key="ghp_123456789012345678"', 'key="sk_123456789012345678"', 'key="aws_123456789012345678"']

lines.forEach((text) => {
const matches = findLikelySecrets({ text })
Expand All @@ -91,34 +78,35 @@ test('findLikelySecrets - should skip safe-listed values', async (t) => {
t.is(matches.length, 0)
})

test('findLikelySecrets - should match secrets with special characters', async (t) => {
const lines = [
'aws_abc123!@#$%^&*()_+', // Special chars
'ghp_abc-123_456.789', // Common separator chars
'sk_live_123-456_789.000', // Mix of numbers and separators
]
test('findLikelySecrets - should allow dashes and alphanumeric characters only', async (t) => {
const validLines = ['key="aws_abc123-456-789"', 'key="ghp_abc-123-def-456"']

lines.forEach((text) => {
const matches = findLikelySecrets({ text })
t.is(matches.length, 1)
validLines.forEach((line) => {
const matches = findLikelySecrets({ text: line })
t.is(matches.length, 1, `Should match line with dashes: ${line}`)
})

const invalidLines = ['key="aws_abc123!@#$%^&*()_+"', 'key="ghp_abc.123_456.789"', 'key="sk_live_123_456_789"']

invalidLines.forEach((line) => {
const matches = findLikelySecrets({ text: line })
t.is(matches.length, 0, `Should not match line with special characters: ${line}`)
})
})

test('findLikelySecrets - should match full secret value against omitValues', async (t) => {
// Test both partial and full matches to ensure proper behavior
const partialMatch = findLikelySecrets({
text: 'aws_123456789012_extra_chars_here',
text: 'key="aws_123456789012extracharshere"',
// The omitValue only partially matches the secret - we should still detect the secret
omitValuesFromEnhancedScan: ['aws_123456789012'],
})
t.is(partialMatch.length, 1)

const fullMatch = findLikelySecrets({
line: 'aws_123456789012_extra_chars_here',
file: testFile,
lineNumber: 1,
text: 'key="aws_123456789012extracharshere"',
// Omit the full secret value - we should not detect the secret
omitValuesFromEnhancedScan: ['aws_123456789012_extra_chars_here'],
omitValuesFromEnhancedScan: ['aws_123456789012extracharshere'],
})
t.is(fullMatch.length, 0)
})
Loading