diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected index dca1a33e73a3..7cb9e0151907 100644 --- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected +++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected @@ -10,6 +10,8 @@ edges | test.py:48:14:48:35 | ControlFlowNode for social_security_number | test.py:49:15:49:36 | ControlFlowNode for social_security_number | provenance | | | test.py:48:38:48:40 | ControlFlowNode for ssn | test.py:50:15:50:17 | ControlFlowNode for ssn | provenance | | | test.py:48:54:48:63 | ControlFlowNode for passportNo | test.py:52:15:52:24 | ControlFlowNode for passportNo | provenance | | +| test.py:54:14:54:22 | ControlFlowNode for post_code | test.py:55:15:55:23 | ControlFlowNode for post_code | provenance | | +| test.py:54:25:54:31 | ControlFlowNode for zipCode | test.py:56:15:56:21 | ControlFlowNode for zipCode | provenance | | | test.py:54:34:54:45 | ControlFlowNode for home_address | test.py:57:15:57:26 | ControlFlowNode for home_address | provenance | | | test.py:59:14:59:26 | ControlFlowNode for user_latitude | test.py:60:15:60:27 | ControlFlowNode for user_latitude | provenance | | | test.py:59:29:59:42 | ControlFlowNode for user_longitude | test.py:61:15:61:28 | ControlFlowNode for user_longitude | provenance | | @@ -42,7 +44,11 @@ nodes | test.py:49:15:49:36 | ControlFlowNode for social_security_number | semmle.label | ControlFlowNode for social_security_number | | test.py:50:15:50:17 | ControlFlowNode for ssn | semmle.label | ControlFlowNode for ssn | | test.py:52:15:52:24 | ControlFlowNode for passportNo | semmle.label | ControlFlowNode for passportNo | +| test.py:54:14:54:22 | ControlFlowNode for post_code | semmle.label | ControlFlowNode for post_code | +| test.py:54:25:54:31 | ControlFlowNode for zipCode | semmle.label | ControlFlowNode for zipCode | | test.py:54:34:54:45 | ControlFlowNode for home_address | semmle.label | ControlFlowNode for home_address | +| test.py:55:15:55:23 | ControlFlowNode for post_code | semmle.label | ControlFlowNode for post_code | +| test.py:56:15:56:21 | ControlFlowNode for zipCode | semmle.label | ControlFlowNode for zipCode | | test.py:57:15:57:26 | ControlFlowNode for home_address | semmle.label | ControlFlowNode for home_address | | test.py:59:14:59:26 | ControlFlowNode for user_latitude | semmle.label | ControlFlowNode for user_latitude | | test.py:59:29:59:42 | ControlFlowNode for user_longitude | semmle.label | ControlFlowNode for user_longitude | @@ -79,6 +85,8 @@ subpaths | test.py:49:15:49:36 | ControlFlowNode for social_security_number | test.py:48:14:48:35 | ControlFlowNode for social_security_number | test.py:49:15:49:36 | ControlFlowNode for social_security_number | This expression logs $@ as clear text. | test.py:48:14:48:35 | ControlFlowNode for social_security_number | sensitive data (private) | | test.py:50:15:50:17 | ControlFlowNode for ssn | test.py:48:38:48:40 | ControlFlowNode for ssn | test.py:50:15:50:17 | ControlFlowNode for ssn | This expression logs $@ as clear text. | test.py:48:38:48:40 | ControlFlowNode for ssn | sensitive data (private) | | test.py:52:15:52:24 | ControlFlowNode for passportNo | test.py:48:54:48:63 | ControlFlowNode for passportNo | test.py:52:15:52:24 | ControlFlowNode for passportNo | This expression logs $@ as clear text. | test.py:48:54:48:63 | ControlFlowNode for passportNo | sensitive data (private) | +| test.py:55:15:55:23 | ControlFlowNode for post_code | test.py:54:14:54:22 | ControlFlowNode for post_code | test.py:55:15:55:23 | ControlFlowNode for post_code | This expression logs $@ as clear text. | test.py:54:14:54:22 | ControlFlowNode for post_code | sensitive data (private) | +| test.py:56:15:56:21 | ControlFlowNode for zipCode | test.py:54:25:54:31 | ControlFlowNode for zipCode | test.py:56:15:56:21 | ControlFlowNode for zipCode | This expression logs $@ as clear text. | test.py:54:25:54:31 | ControlFlowNode for zipCode | sensitive data (private) | | test.py:57:15:57:26 | ControlFlowNode for home_address | test.py:54:34:54:45 | ControlFlowNode for home_address | test.py:57:15:57:26 | ControlFlowNode for home_address | This expression logs $@ as clear text. | test.py:54:34:54:45 | ControlFlowNode for home_address | sensitive data (private) | | test.py:60:15:60:27 | ControlFlowNode for user_latitude | test.py:59:14:59:26 | ControlFlowNode for user_latitude | test.py:60:15:60:27 | ControlFlowNode for user_latitude | This expression logs $@ as clear text. | test.py:59:14:59:26 | ControlFlowNode for user_latitude | sensitive data (private) | | test.py:61:15:61:28 | ControlFlowNode for user_longitude | test.py:59:29:59:42 | ControlFlowNode for user_longitude | test.py:61:15:61:28 | ControlFlowNode for user_longitude | This expression logs $@ as clear text. | test.py:59:29:59:42 | ControlFlowNode for user_longitude | sensitive data (private) | diff --git a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py index d8d70c56bd57..ff01680ed81c 100644 --- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py +++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py @@ -52,8 +52,8 @@ def log1(social_security_number, ssn, className, passportNo): print(passportNo) # NOT OK def log2(post_code, zipCode, home_address): - print(post_code) # NOT OK, but NOT FOUND - "code" is treated as encrypted and thus not sensitive - print(zipCode) # NOT OK, but NOT FOUND - "code" is treated as encrypted and thus not sensitive + print(post_code) # NOT OK + print(zipCode) # NOT OK print(home_address) # NOT OK def log3(user_latitude, user_longitude): diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index f8d850beeb80..e2bb5a5f595c 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -23,11 +23,14 @@ impl MyStruct { fn get_password() -> String { get_string() } fn test_passwords( - password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, password_confirmation: &str, + password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, password_confirmation: &str, profile_password: &str, pass_phrase: &str, passphrase: &str, passPhrase: &str, backup_code: &str, auth_key: &str, authkey: &str, authKey: &str, authentication_key: &str, authenticationkey: &str, authenticationKey: &str, oauth: &str, - one_time_code: &str, - harmless: &str, encrypted_password: &str, password_hash: &str, passwordFile: &str, + one_time_code: &str, api_token: &str, api_tok: &str, + harmless: &str, + encrypted_password: &str, unencrypted_password: &str, encoded_password: &str, unencoded_password: &str, + password_hash: &str, passwordFile: &str, coauthor: &str, + ms: &MyStruct ) { // passwords @@ -38,6 +41,9 @@ fn test_passwords( sink(my_password); // $ sensitive=password sink(password_str); // $ sensitive=password sink(password_confirmation); // $ sensitive=password + sink(profile_password); // $ sensitive=password + sink(unencrypted_password); // $ sensitive=password + sink(unencoded_password); // $ sensitive=password sink(pass_phrase); // $ sensitive=password sink(passphrase); // $ sensitive=password sink(passPhrase); // $ sensitive=password @@ -51,6 +57,8 @@ fn test_passwords( sink(authenticationKey); // $ sensitive=password sink(oauth); // $ sensitive=password sink(one_time_code); // $ MISSING: sensitive=password + sink(api_token); // $ sensitive=password + sink(api_tok); // $ sensitive=password sink(ms); // $ MISSING: sensitive=password sink(ms.password.as_str()); // $ sensitive=password @@ -67,8 +75,10 @@ fn test_passwords( sink(harmless); sink(encrypted_password); + sink(encoded_password); sink(password_hash); sink(passwordFile); + sink(coauthor); sink(ms.harmless.as_str()); sink(ms.password_file_path.as_str()); @@ -187,6 +197,10 @@ struct Financials { harmless: String, my_bank_account_number: String, credit_card_no: String, + card_no: String, + cardNumber: String, + card_security_code: String, + credit_rating: i32, user_ccn: String, cvv: String, @@ -201,6 +215,7 @@ struct Financials { accounting: i32, unaccounted: bool, multiband: bool, + wildcard_not_matched: bool, } enum Gender { @@ -298,6 +313,9 @@ fn test_private_info( sink(info.financials.my_bank_account_number.as_str()); // $ sensitive=private SPURIOUS: sensitive=id sink(info.financials.credit_card_no.as_str()); // $ sensitive=private + sink(info.financials.card_no.as_str()); // $ sensitive=private + sink(info.financials.cardNumber.as_str()); // $ sensitive=private + sink(info.financials.card_security_code.as_str()); // $ sensitive=private sink(info.financials.credit_rating); // $ sensitive=private sink(info.financials.user_ccn.as_str()); // $ sensitive=private sink(info.financials.cvv.as_str()); // $ sensitive=private @@ -350,6 +368,7 @@ fn test_private_info( sink(info.financials.accounting); sink(info.financials.unaccounted); sink(info.financials.multiband); + sink(info.financials.wildcard_not_matched); sink(ContactDetails::FavouriteColor("blue".to_string())); } diff --git a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll index 4271784577f0..b2bda909e3ba 100644 --- a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll +++ b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll @@ -76,7 +76,7 @@ module HeuristicNames { string maybePassword() { result = "(?is).*(pass(wd|word|code|.?phrase)(?!.*question)|(auth(entication|ori[sz]ation)?).?key|oauth|" - + "api.?(key|token)|([_-]|\\b)mfa([_-]|\\b)).*" + + "api.?(key|tok)|([_-]|\\b)mfa([_-]|\\b)).*" } /** @@ -104,8 +104,9 @@ module HeuristicNames { // Geographic location - where the user is (or was) "latitude|longitude|nationality|" + // Financial data - such as credit card numbers, salary, bank accounts, and debts - "(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|acc(ou)?nt.?(no|num|credit)|routing.?num|" + "(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|(card|acc(ou)?nt).?(no|num|credit)|routing.?num|" + "salary|billing|beneficiary|credit.?(rating|score)|([_-]|\\b)(ccn|cvv|iban)([_-]|\\b)|" + + "security.?code|" + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. // "e(mail|_mail)|" + // this seems too noisy // Health - medical conditions, insurance status, prescription records @@ -145,13 +146,13 @@ module HeuristicNames { * suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query). * * We also filter out common words like `certain` and `concert`, since otherwise these could - * be matched by the certificate regular expressions. Same for `accountable` (account), or - * `secretarial` (secret). + * be matched by the certificate regular expressions. Same for `accountable` (account), + * `secretarial` (secret), `wildcard` (card), `coauthor` (oauth). */ string notSensitiveRegexp() { result = - "(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?