From 99e62d66e57a752394b861f70fb413f84f42e422 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 9 May 2025 17:28:40 +0100 Subject: [PATCH 01/10] Rust: Add sensitive data patterns. --- .../internal/SensitiveDataHeuristics.qll | 23 +++++--- .../test/library-tests/sensitivedata/test.rs | 56 +++++++++---------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll index ede88ebf8149..f0b79fbbf3d7 100644 --- a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll +++ b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll @@ -54,7 +54,9 @@ module HeuristicNames { * Gets a regular expression that identifies strings that may indicate the presence of secret * or trusted data. */ - string maybeSecret() { result = "(?is).*((? Date: Fri, 9 May 2025 17:32:46 +0100 Subject: [PATCH 02/10] Rust: Combine regexs where possible (likely better performance). --- .../security/internal/SensitiveDataHeuristics.qll | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll index f0b79fbbf3d7..48681bdf89a5 100644 --- a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll +++ b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll @@ -63,8 +63,7 @@ module HeuristicNames { * user names or other account information. */ string maybeAccountInfo() { - result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or + result = "(?is).*(acc(ou)?nt|puid|user.?(name|id)|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -73,11 +72,9 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" or - result = "(?is).*([_-]|\\b)mfa([_-]|\\b).*" or - result = "(?is).*oauth.*" or - result = "(?is).*api.?(key|token).*" + result = + "(?is).*(pass(wd|word|code|.?phrase)(?!.*question)|(auth(entication|ori[sz]ation)?).?key|oauth|" + + "api.?(key|token)|([_-]|\\b)mfa([_-]|\\b)).*" } /** From a6b4a18d5102a8f9745d2bc7bf94df3c8ede1eb8 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 9 May 2025 17:50:27 +0100 Subject: [PATCH 03/10] Rust: Add negative patterns. --- .../internal/SensitiveDataHeuristics.qll | 9 +++++---- .../test/library-tests/sensitivedata/test.rs | 18 +++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll index 48681bdf89a5..ebc3e0b0e31a 100644 --- a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll +++ b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll @@ -55,7 +55,7 @@ module HeuristicNames { * or trusted data. */ string maybeSecret() { - result = "(?is).*((? Date: Thu, 10 Jul 2025 18:10:24 +0100 Subject: [PATCH 04/10] Sync identical files. --- .../internal/SensitiveDataHeuristics.qll | 29 +++++++++++-------- .../internal/SensitiveDataHeuristics.qll | 29 +++++++++++-------- .../internal/SensitiveDataHeuristics.qll | 29 +++++++++++-------- .../internal/SensitiveDataHeuristics.qll | 29 +++++++++++-------- 4 files changed, 68 insertions(+), 48 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll index ede88ebf8149..ebc3e0b0e31a 100644 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll @@ -54,15 +54,16 @@ module HeuristicNames { * Gets a regular expression that identifies strings that may indicate the presence of secret * or trusted data. */ - string maybeSecret() { result = "(?is).*((? Date: Fri, 11 Jul 2025 11:54:59 +0100 Subject: [PATCH 05/10] Add change notes. --- .../lib/change-notes/2025-07-11-sensitive-data-heuristics.md | 4 ++++ .../lib/change-notes/2025-07-11-sensitive-data-heuristics.md | 4 ++++ .../lib/change-notes/2025-07-11-sensitive-data-heuristics.md | 4 ++++ .../lib/change-notes/2025-07-11-sensitive-data-heuristics.md | 4 ++++ .../lib/change-notes/2025-07-11-sensitive-data-heuristics.md | 4 ++++ 5 files changed, 20 insertions(+) create mode 100644 javascript/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md create mode 100644 python/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md create mode 100644 ruby/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md create mode 100644 rust/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md create mode 100644 swift/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md diff --git a/javascript/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/javascript/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/python/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/python/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/python/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/ruby/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/ruby/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/ruby/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/rust/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/rust/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/rust/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. diff --git a/swift/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md b/swift/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md new file mode 100644 index 000000000000..22f06a998b71 --- /dev/null +++ b/swift/ql/lib/change-notes/2025-07-11-sensitive-data-heuristics.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expressions in `SensitiveDataHeuristics.qll` have been extended to find more instances of sensitive data such as secrets used in authentication, finance and health information, and device data. The heuristics have also been refined to find fewer false positive matches. This will improve results for queries related to sensitive information. From 4778ef616a7109bbcb79b657e29c2c912c743ab6 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 11 Jul 2025 15:43:31 +0100 Subject: [PATCH 06/10] Rust: Add a test case for password_confirmation. --- rust/ql/test/library-tests/sensitivedata/test.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index 87802002e8b8..dda48ea29272 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -23,7 +23,7 @@ impl MyStruct { fn get_password() -> String { get_string() } fn test_passwords( - password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, + password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, password_confirmation: &str, pass_phrase: &str, passphrase: &str, passPhrase: &str, backup_code: &str, auth_key: &str, authkey: &str, authKey: &str, authentication_key: &str, authenticationkey: &str, authenticationKey: &str, oauth: &str, one_time_code: &str, @@ -37,6 +37,7 @@ fn test_passwords( sink(passwd); // $ sensitive=password sink(my_password); // $ sensitive=password sink(password_str); // $ sensitive=password + sink(password_confirmation); // $ sensitive=password sink(pass_phrase); // $ sensitive=password sink(passphrase); // $ sensitive=password sink(passPhrase); // $ sensitive=password From 9f59a3501c32995c7005313e5aff62f26d3e664b Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 14 Jul 2025 11:17:05 +0100 Subject: [PATCH 07/10] Rust: Revert ipaddr and fingerprint terms (too many FPs). --- .../codeql/rust/security/internal/SensitiveDataHeuristics.qll | 2 +- rust/ql/test/library-tests/sensitivedata/test.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll index ebc3e0b0e31a..910749a6c82b 100644 --- a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll +++ b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll @@ -114,7 +114,7 @@ module HeuristicNames { // Relationships - work and family "employ(er|ee)|spouse|maiden.?name|" + // Device information - "([_-]|\\b)ip.?addr|mac.?addr|finger.?print" + + "mac.?addr" + // --- ").*" } diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index dda48ea29272..74d0a0daa0a4 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -164,8 +164,8 @@ impl DeviceInfo { sink(&self.api_key); // $ sensitive=password sink(&other.api_key); // $ sensitive=password sink(&self.deviceApiToken); // $ sensitive=password - sink(&self.finger_print); // $ sensitive=private - sink(&self.ip_address); // $ sensitive=private + sink(&self.finger_print); // $ MISSING: sensitive=private + sink(&self.ip_address); // $ MISSING: sensitive=private sink(self.macaddr12); // $ sensitive=private sink(&self.mac_addr); // $ sensitive=private sink(self.mac_addr.values); // $ sensitive=private From e121579a857da542402e4362d43b0e82e08e21c7 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 14 Jul 2025 11:19:31 +0100 Subject: [PATCH 08/10] Rust: Adjust the test labels slightly. --- rust/ql/test/library-tests/sensitivedata/test.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index 74d0a0daa0a4..0f4965ce2856 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -164,14 +164,17 @@ impl DeviceInfo { sink(&self.api_key); // $ sensitive=password sink(&other.api_key); // $ sensitive=password sink(&self.deviceApiToken); // $ sensitive=password - sink(&self.finger_print); // $ MISSING: sensitive=private - sink(&self.ip_address); // $ MISSING: sensitive=private sink(self.macaddr12); // $ sensitive=private sink(&self.mac_addr); // $ sensitive=private sink(self.mac_addr.values); // $ sensitive=private sink(self.mac_addr.values[0]); // $ sensitive=private sink(&self.networkMacAddress); // $ sensitive=private + // dubious (may or may not be private device info, depending on context) + + sink(&self.finger_print); + sink(&self.ip_address); + // not private device info sink(self.macro_value); From 30c6082b5d1e0c5e0630f45ccaafcf7e40cc7885 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 14 Jul 2025 11:35:18 +0100 Subject: [PATCH 09/10] Sync identical files. --- .../javascript/security/internal/SensitiveDataHeuristics.qll | 2 +- .../semmle/python/security/internal/SensitiveDataHeuristics.qll | 2 +- .../codeql/ruby/security/internal/SensitiveDataHeuristics.qll | 2 +- .../codeql/swift/security/internal/SensitiveDataHeuristics.qll | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll index ebc3e0b0e31a..910749a6c82b 100644 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll @@ -114,7 +114,7 @@ module HeuristicNames { // Relationships - work and family "employ(er|ee)|spouse|maiden.?name|" + // Device information - "([_-]|\\b)ip.?addr|mac.?addr|finger.?print" + + "mac.?addr" + // --- ").*" } diff --git a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll index ebc3e0b0e31a..910749a6c82b 100644 --- a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll +++ b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll @@ -114,7 +114,7 @@ module HeuristicNames { // Relationships - work and family "employ(er|ee)|spouse|maiden.?name|" + // Device information - "([_-]|\\b)ip.?addr|mac.?addr|finger.?print" + + "mac.?addr" + // --- ").*" } diff --git a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll index ebc3e0b0e31a..910749a6c82b 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll +++ b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll @@ -114,7 +114,7 @@ module HeuristicNames { // Relationships - work and family "employ(er|ee)|spouse|maiden.?name|" + // Device information - "([_-]|\\b)ip.?addr|mac.?addr|finger.?print" + + "mac.?addr" + // --- ").*" } diff --git a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll index ebc3e0b0e31a..910749a6c82b 100644 --- a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll +++ b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll @@ -114,7 +114,7 @@ module HeuristicNames { // Relationships - work and family "employ(er|ee)|spouse|maiden.?name|" + // Device information - "([_-]|\\b)ip.?addr|mac.?addr|finger.?print" + + "mac.?addr" + // --- ").*" } From da0742f3ec1cd1d3390ab84af2caab2b7caff5f6 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 14 Jul 2025 11:45:07 +0100 Subject: [PATCH 10/10] Rust: Update path resolution consistency .expected. --- .../PathResolutionConsistency.expected | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected b/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected index 0533774588cc..3d4929f5ac60 100644 --- a/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected +++ b/rust/ql/test/library-tests/sensitivedata/CONSISTENCY/PathResolutionConsistency.expected @@ -1,27 +1,27 @@ multipleCallTargets -| test.rs:55:7:55:26 | ... .as_str() | -| test.rs:56:7:56:21 | ... .as_str() | -| test.rs:72:7:72:26 | ... .as_str() | -| test.rs:73:7:73:36 | ... .as_str() | -| test.rs:74:7:74:34 | ... .as_str() | -| test.rs:75:7:75:27 | ... .as_str() | -| test.rs:258:7:258:36 | ... .as_str() | -| test.rs:260:7:260:33 | ... .as_str() | -| test.rs:261:7:261:36 | ... .as_str() | -| test.rs:262:7:262:26 | ... .as_str() | -| test.rs:266:7:266:28 | ... .as_str() | -| test.rs:267:7:267:37 | ... .as_str() | -| test.rs:268:7:268:36 | ... .as_str() | -| test.rs:271:7:271:32 | ... .as_str() | -| test.rs:281:7:281:34 | ... .as_str() | -| test.rs:284:7:284:36 | ... .as_str() | -| test.rs:288:7:288:39 | ... .as_str() | -| test.rs:295:7:295:53 | ... .as_str() | -| test.rs:296:7:296:45 | ... .as_str() | -| test.rs:298:7:298:39 | ... .as_str() | -| test.rs:299:7:299:34 | ... .as_str() | -| test.rs:300:7:300:42 | ... .as_str() | -| test.rs:302:7:302:48 | ... .as_str() | -| test.rs:303:7:303:35 | ... .as_str() | -| test.rs:304:7:304:35 | ... .as_str() | -| test.rs:343:7:343:39 | ... .as_str() | +| test.rs:56:7:56:26 | ... .as_str() | +| test.rs:57:7:57:21 | ... .as_str() | +| test.rs:73:7:73:26 | ... .as_str() | +| test.rs:74:7:74:36 | ... .as_str() | +| test.rs:75:7:75:34 | ... .as_str() | +| test.rs:76:7:76:27 | ... .as_str() | +| test.rs:262:7:262:36 | ... .as_str() | +| test.rs:264:7:264:33 | ... .as_str() | +| test.rs:265:7:265:36 | ... .as_str() | +| test.rs:266:7:266:26 | ... .as_str() | +| test.rs:270:7:270:28 | ... .as_str() | +| test.rs:271:7:271:37 | ... .as_str() | +| test.rs:272:7:272:36 | ... .as_str() | +| test.rs:275:7:275:32 | ... .as_str() | +| test.rs:285:7:285:34 | ... .as_str() | +| test.rs:288:7:288:36 | ... .as_str() | +| test.rs:292:7:292:39 | ... .as_str() | +| test.rs:299:7:299:53 | ... .as_str() | +| test.rs:300:7:300:45 | ... .as_str() | +| test.rs:302:7:302:39 | ... .as_str() | +| test.rs:303:7:303:34 | ... .as_str() | +| test.rs:304:7:304:42 | ... .as_str() | +| test.rs:306:7:306:48 | ... .as_str() | +| test.rs:307:7:307:35 | ... .as_str() | +| test.rs:308:7:308:35 | ... .as_str() | +| test.rs:347:7:347:39 | ... .as_str() |