From e5cb1d2dec2d21d609dd0056586a023d3340f7b8 Mon Sep 17 00:00:00 2001 From: Chad Schwenke Date: Wed, 9 Oct 2024 22:24:57 -0600 Subject: [PATCH 1/2] Added initial flags for passing optional to archived and forked. --- .../enterprise_keyword_search.py | 36 +++++++++++++++++++ .../public_keyword_search.py | 32 +++++++++++++++++ .../enterprise_cred_detections.py | 36 +++++++++++++++++++ .../enterprise_key_detections.py | 36 +++++++++++++++++++ .../github-public/public_cred_detections.py | 36 +++++++++++++++++++ .../github-public/public_key_detections.py | 36 +++++++++++++++++++ 6 files changed, 212 insertions(+) diff --git a/xgitguard/custom keyword search/enterprise_keyword_search.py b/xgitguard/custom keyword search/enterprise_keyword_search.py index 71ae27e..c186e35 100644 --- a/xgitguard/custom keyword search/enterprise_keyword_search.py +++ b/xgitguard/custom keyword search/enterprise_keyword_search.py @@ -379,6 +379,28 @@ def arg_parser(): help="Pass the repo name list as comma separated string", ) + argparser.add_argument( + "-a", + "--archived", + metavar="Archived", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Archived repos. Default is Yes", + ) + + argparser.add_argument( + "-f", + "--forked", + metavar="Forked", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Forked repos. Default is Yes", + ) + argparser.add_argument( "-l", "--log_level", @@ -421,6 +443,16 @@ def arg_parser(): else: repo = [] + if args.archived.lower() in flag_choices[:5]: + search_archived = True + else: + search_archived = False + + if args.forked.lower() in flag_choices[:5]: + search_forked = True + else: + search_forked = False + if args.log_level in log_level_choices: log_level = args.log_level else: @@ -434,6 +466,8 @@ def arg_parser(): enterprise_keywords, org, repo, + search_archived, + search_forked, log_level, console_logging, ) @@ -445,6 +479,8 @@ def arg_parser(): enterprise_keywords, org, repo, + search_archived, + search_forked, log_level, console_logging, ) = arg_parser() diff --git a/xgitguard/custom keyword search/public_keyword_search.py b/xgitguard/custom keyword search/public_keyword_search.py index ebcb905..b0b44ee 100644 --- a/xgitguard/custom keyword search/public_keyword_search.py +++ b/xgitguard/custom keyword search/public_keyword_search.py @@ -353,6 +353,26 @@ def arg_parser(): default="", help="Pass the repo name list as comma separated string", ) + argparser.add_argument( + "-a", + "--archived", + metavar="Archived", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Archived repos. Default is Yes", + ) + argparser.add_argument( + "-f", + "--forked", + metavar="Forked", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Forked repos. Default is Yes", + ) argparser.add_argument( "-l", "--log_level", @@ -389,6 +409,14 @@ def arg_parser(): repo = [] else: repo = [] + if args.archived.lower() in flag_choices[:5]: + search_archived = True + else: + search_archived = False + if args.forked.lower() in flag_choices[:5]: + search_forked = True + else: + search_forked = False if args.log_level in log_level_choices: log_level = args.log_level else: @@ -401,6 +429,8 @@ def arg_parser(): public_keywords, org, repo, + search_archived, + search_forked, log_level, console_logging, ) @@ -412,6 +442,8 @@ def arg_parser(): public_keywords, org, repo, + search_archived, + search_forked, log_level, console_logging, ) = arg_parser() diff --git a/xgitguard/github-enterprise/enterprise_cred_detections.py b/xgitguard/github-enterprise/enterprise_cred_detections.py index dfb2685..f90cf16 100644 --- a/xgitguard/github-enterprise/enterprise_cred_detections.py +++ b/xgitguard/github-enterprise/enterprise_cred_detections.py @@ -782,6 +782,28 @@ def arg_parser(): help="Pass the repo name list as comma separated string", ) + argparser.add_argument( + "-a", + "--archived", + metavar="Archived", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Archived repos. Default is Yes", + ) + + argparser.add_argument( + "-f", + "--forked", + metavar="Forked", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Forked repos. Default is Yes", + ) + argparser.add_argument( "-l", "--log_level", @@ -839,6 +861,16 @@ def arg_parser(): else: repo = [] + if args.archived.lower() in flag_choices[:5]: + search_archived = True + else: + search_archived = False + + if args.forked.lower() in flag_choices[:5]: + search_forked = True + else: + search_forked = False + if args.log_level in log_level_choices: log_level = args.log_level else: @@ -855,6 +887,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) @@ -869,6 +903,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) = arg_parser() diff --git a/xgitguard/github-enterprise/enterprise_key_detections.py b/xgitguard/github-enterprise/enterprise_key_detections.py index 95b27e4..3723541 100644 --- a/xgitguard/github-enterprise/enterprise_key_detections.py +++ b/xgitguard/github-enterprise/enterprise_key_detections.py @@ -762,6 +762,28 @@ def arg_parser(): help="Pass the repo name list as comma separated string", ) + argparser.add_argument( + "-a", + "--archived", + metavar="Archived", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Archived repos. Default is Yes", + ) + + argparser.add_argument( + "-f", + "--forked", + metavar="Forked", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Forked repos. Default is Yes", + ) + argparser.add_argument( "-l", "--log_level", @@ -819,6 +841,16 @@ def arg_parser(): else: repo = [] + if args.archived.lower() in flag_choices[:5]: + search_archived = True + else: + search_archived = False + + if args.forked.lower() in flag_choices[:5]: + search_forked = True + else: + search_forked = False + if args.log_level in log_level_choices: log_level = args.log_level else: @@ -835,6 +867,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) @@ -849,6 +883,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) = arg_parser() diff --git a/xgitguard/github-public/public_cred_detections.py b/xgitguard/github-public/public_cred_detections.py index a78e444..107005e 100644 --- a/xgitguard/github-public/public_cred_detections.py +++ b/xgitguard/github-public/public_cred_detections.py @@ -945,6 +945,28 @@ def arg_parser(): help="Pass the Repo name list as comma separated string", ) + argparser.add_argument( + "-a", + "--archived", + metavar="Archived", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Archived repos. Default is Yes", + ) + + argparser.add_argument( + "-f", + "--forked", + metavar="Forked", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Forked repos. Default is Yes", + ) + argparser.add_argument( "-l", "--log_level", @@ -1006,6 +1028,16 @@ def arg_parser(): else: repo = [] + if args.archived.lower() in flag_choices[:5]: + search_archived = True + else: + search_archived = False + + if args.forked.lower() in flag_choices[:5]: + search_forked = True + else: + search_forked = False + if args.log_level in log_level_choices: log_level = args.log_level else: @@ -1023,6 +1055,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) @@ -1038,6 +1072,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) = arg_parser() diff --git a/xgitguard/github-public/public_key_detections.py b/xgitguard/github-public/public_key_detections.py index 0829b6e..574686a 100644 --- a/xgitguard/github-public/public_key_detections.py +++ b/xgitguard/github-public/public_key_detections.py @@ -916,6 +916,28 @@ def arg_parser(): help="Pass the Repo name list as comma separated string", ) + argparser.add_argument( + "-a", + "--archived", + metavar="Archived", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Archived repos. Default is Yes", + ) + + argparser.add_argument( + "-f", + "--forked", + metavar="Forked", + action="store", + type=str, + default="Yes", + choices=flag_choices, + help="Pass Yes or No to search for Forked repos. Default is Yes", + ) + argparser.add_argument( "-l", "--log_level", @@ -976,6 +998,16 @@ def arg_parser(): else: repo = [] + if args.archived.lower() in flag_choices[:5]: + search_archived = True + else: + search_archived = False + + if args.forked.lower() in flag_choices[:5]: + search_forked = True + else: + search_forked = False + if args.log_level in log_level_choices: log_level = args.log_level else: @@ -993,6 +1025,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) @@ -1008,6 +1042,8 @@ def arg_parser(): unmask_secret, org, repo, + search_archived, + search_forked, log_level, console_logging, ) = arg_parser() From 9c6b0bc8a677882642ba208668f6849cb2d05062 Mon Sep 17 00:00:00 2001 From: Chad Schwenke Date: Tue, 29 Oct 2024 10:40:03 -0600 Subject: [PATCH 2/2] implementation of filtering --- xgitguard/common/github_calls.py | 19 ++++++++++++++----- .../enterprise_keyword_search.py | 6 ++++-- .../public_keyword_search.py | 6 ++++-- .../enterprise_cred_detections.py | 6 +++--- .../enterprise_key_detections.py | 6 +++--- .../github-public/public_cred_detections.py | 16 ++++++++++++---- .../github-public/public_key_detections.py | 14 +++++++++++--- 7 files changed, 51 insertions(+), 22 deletions(-) diff --git a/xgitguard/common/github_calls.py b/xgitguard/common/github_calls.py index df79d6f..59af4ec 100644 --- a/xgitguard/common/github_calls.py +++ b/xgitguard/common/github_calls.py @@ -38,7 +38,7 @@ def __init__(self, base_url, token_env, commits_api_url, throttle_time=2): self._commits_api_url = commits_api_url self._throttle_time = throttle_time - def run_github_search(self, search_query, extension, org=[], repo=[]): + def run_github_search(self, search_query, extension, org=[], repo=[], search_archived = True, search_forked = True): """ Run the GitHub API search with given search query Get the items from the response content and Return @@ -73,7 +73,7 @@ def run_github_search(self, search_query, extension, org=[], repo=[]): if not extension or extension == "others" or len(extension) == 0: response = self.__github_api_get_params( - search_query, org_qualifiers, repo_qualifiers + search_query, org_qualifiers, repo_qualifiers, search_archived, search_forked ) elif self._token_env == "public": @@ -81,12 +81,16 @@ def run_github_search(self, search_query, extension, org=[], repo=[]): (search_query + " extension:" + extension), org_qualifiers, repo_qualifiers, + search_archived, + search_forked ) else: response = self.__github_api_get_params( (search_query + " extension:" + extension), org_qualifiers, repo_qualifiers, + search_archived, + search_forked ) if response: @@ -95,7 +99,7 @@ def run_github_search(self, search_query, extension, org=[], repo=[]): return [] def __github_api_get_params( - self, search_query, org_qualifiers="", repo_qualifiers="" + self, search_query, org_qualifiers="", repo_qualifiers="", search_archived = True, search_forked = True ): """ For the given GITHUB API url and search query, call the api @@ -132,13 +136,17 @@ def __github_api_get_params( elif len(repo_qualifiers) > 0: additional_qualifiers = repo_qualifiers + archive_filter = "" if search_archived else "NOT is:archived" + forked_filter = "" if search_forked else "NOT is:fork" + search_response = [] if additional_qualifiers: try: + q_string = f"{search_query} {additional_qualifiers} {archive_filter} {forked_filter}" response = requests.get( self._base_url, params={ - "q": f"{search_query} {additional_qualifiers}", + "q": q_string, "order": "desc", "sort": "indexed", "per_page": 100, @@ -149,10 +157,11 @@ def __github_api_get_params( logger.error(f"Github API call Error: {e}") else: try: + q_string = f"{search_query} {archive_filter} {forked_filter}" response = requests.get( self._base_url, params={ - "q": f"{search_query}", + "q": q_string, "order": "desc", "sort": "indexed", "per_page": 100, diff --git a/xgitguard/custom keyword search/enterprise_keyword_search.py b/xgitguard/custom keyword search/enterprise_keyword_search.py index c186e35..340a589 100644 --- a/xgitguard/custom keyword search/enterprise_keyword_search.py +++ b/xgitguard/custom keyword search/enterprise_keyword_search.py @@ -239,7 +239,7 @@ def format_search_query_list(secondary_keywords): return search_query_list -def run_detection(enterprise_keywords=[], org=[], repo=[]): +def run_detection(enterprise_keywords=[], org=[], repo=[], search_archived = True, search_forked = True): """ Run GitHub search If a Enterprise keyword is provided, perform the search using the Enterprise keyword. @@ -286,6 +286,8 @@ def run_detection(enterprise_keywords=[], org=[], repo=[]): "", org, repo, + search_archived, + search_forked ) # If search has detections, process the result urls else continue next search if search_response_lines: @@ -506,5 +508,5 @@ def arg_parser(): ) sys.exit(1) - run_detection(enterprise_keywords, org, repo) + run_detection(enterprise_keywords, org, repo, search_archived, search_forked) logger.info("xGitGuard Custom keyword search Process Completed") diff --git a/xgitguard/custom keyword search/public_keyword_search.py b/xgitguard/custom keyword search/public_keyword_search.py index b0b44ee..f23465b 100644 --- a/xgitguard/custom keyword search/public_keyword_search.py +++ b/xgitguard/custom keyword search/public_keyword_search.py @@ -221,7 +221,7 @@ def process_search_results(search_response_lines, search_query): return detection_writes_per_query, new_results_per_query, detections_per_query -def run_detection(public_keywords=[], org=[], repo=[]): +def run_detection(public_keywords=[], org=[], repo=[], search_archived = True, search_forked = True): """ Run GitHub search If a primary keyword is provided, perform the search using the primary keyword. @@ -264,6 +264,8 @@ def run_detection(public_keywords=[], org=[], repo=[]): "", org, repo, + search_archived, + search_forked ) # If search has detections, process the result urls else continue next search if search_response_lines: @@ -468,5 +470,5 @@ def arg_parser(): f"GitHub API Token Environment variable '{token_var}' not set. API Search will fail/return no results. Please Setup and retry" ) sys.exit(1) - run_detection(public_keywords, org, repo) + run_detection(public_keywords, org, repo, search_archived, search_forked) logger.info("xGitGuard custom keyword search Process Completed") diff --git a/xgitguard/github-enterprise/enterprise_cred_detections.py b/xgitguard/github-enterprise/enterprise_cred_detections.py index f90cf16..23b29b6 100644 --- a/xgitguard/github-enterprise/enterprise_cred_detections.py +++ b/xgitguard/github-enterprise/enterprise_cred_detections.py @@ -540,7 +540,7 @@ def format_search_query_list(secondary_keywords): def run_detection( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], search_archived = True, search_forked = True ): """ Run GitHub detections @@ -646,7 +646,7 @@ def run_detection( # Search GitHub and return search response confidence_score total_processed_search += 1 search_response_lines = githubCalls.run_github_search( - search_query, extension, org, repo + search_query, extension, org, repo, search_archived, search_forked ) # If search has detections, process the result urls else continue next search if search_response_lines: @@ -932,6 +932,6 @@ def arg_parser(): ) sys.exit(1) - run_detection(secondary_keywords, extensions, ml_prediction, org, repo) + run_detection(secondary_keywords, extensions, ml_prediction, org, repo, search_archived, search_forked) logger.info("xGitGuard Credentials Detection Process Completed") diff --git a/xgitguard/github-enterprise/enterprise_key_detections.py b/xgitguard/github-enterprise/enterprise_key_detections.py index 3723541..ff765c5 100644 --- a/xgitguard/github-enterprise/enterprise_key_detections.py +++ b/xgitguard/github-enterprise/enterprise_key_detections.py @@ -519,7 +519,7 @@ def format_search_query_list(secondary_keywords): def run_detection( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], search_archived = True, search_forked = True ): """ Run GitHub detections @@ -625,7 +625,7 @@ def run_detection( # Search GitHub and return search response confidence_score total_processed_search += 1 search_response_lines = githubCalls.run_github_search( - search_query, extension, org, repo + search_query, extension, org, repo, search_archived, search_forked ) # If search has detections, process the result urls else continue next search if search_response_lines: @@ -912,6 +912,6 @@ def arg_parser(): ) sys.exit(1) - run_detection(secondary_keywords, extensions, ml_prediction, org, repo) + run_detection(secondary_keywords, extensions, ml_prediction, org, repo, search_archived, search_forked) logger.info("xGitGuard Enterprise Keys and Token Detection Process Completed") diff --git a/xgitguard/github-public/public_cred_detections.py b/xgitguard/github-public/public_cred_detections.py index 107005e..0dd298c 100644 --- a/xgitguard/github-public/public_cred_detections.py +++ b/xgitguard/github-public/public_cred_detections.py @@ -544,6 +544,8 @@ def run_detection( ml_prediction=False, org=[], repo=[], + search_archived = True, + search_forked = True ): """ Run GitHub detections @@ -661,7 +663,7 @@ def run_detection( # Search GitHub and return search response confidence_score total_processed_search += 1 search_response_lines = githubCalls.run_github_search( - search_query, extension, org, repo + search_query, extension, org, repo, search_archived, search_forked ) # If search has detections, process the result urls else continue next search @@ -705,7 +707,7 @@ def run_detection( def run_detections_from_file( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], search_archived = True, search_forked = True ): """ Run detection for Primary Keywords present in the default config file @@ -737,6 +739,8 @@ def run_detections_from_file( ml_prediction, org, repo, + search_archived, + search_forked ) status = True except Exception as e: @@ -767,6 +771,8 @@ def run_detections_from_list( ml_prediction=False, org=[], repo=[], + search_archived = True, + search_forked = True ): """ Run detection for Primary Keywords present in the given input list @@ -815,6 +821,8 @@ def run_detections_from_list( ml_prediction, org, repo, + search_archived, + search_forked ) except Exception as e: logger.error(f"Process Error: {e}") @@ -1105,11 +1113,11 @@ def arg_parser(): if primary_keywords: run_detections_from_list( - primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo + primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo, search_archived, search_forked ) else: run_detections_from_file( - secondary_keywords, extensions, ml_prediction, org, repo + secondary_keywords, extensions, ml_prediction, org, repo, search_archived, search_forked ) logger.info("xGitGuard Credentials Detection Process Completed") diff --git a/xgitguard/github-public/public_key_detections.py b/xgitguard/github-public/public_key_detections.py index 574686a..d7109c7 100644 --- a/xgitguard/github-public/public_key_detections.py +++ b/xgitguard/github-public/public_key_detections.py @@ -517,6 +517,8 @@ def run_detection( ml_prediction=False, org=[], repo=[], + search_archived = True, + search_forked = True ): """ Run GitHub detections @@ -677,7 +679,7 @@ def run_detection( def run_detections_from_file( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], search_archived = True, search_forked = True ): """ Run detection for Primary Keywords present in the default config file @@ -709,6 +711,8 @@ def run_detections_from_file( ml_prediction, org, repo, + search_archived, + search_forked ) status = True except Exception as e: @@ -739,6 +743,8 @@ def run_detections_from_list( ml_prediction=False, org=[], repo=[], + search_archived = True, + search_forked = True ): """ Run detection for Primary Keywords present in the given input list @@ -787,6 +793,8 @@ def run_detections_from_list( ml_prediction, org, repo, + search_archived, + search_forked ) except Exception as e: logger.error(f"Process Error: {e}") @@ -1075,11 +1083,11 @@ def arg_parser(): if primary_keywords: run_detections_from_list( - primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo + primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo, search_archived, search_forked ) else: run_detections_from_file( - secondary_keywords, extensions, ml_prediction, org, repo + secondary_keywords, extensions, ml_prediction, org, repo, search_archived, search_forked ) logger.info("xGitGuard Keys and Token Detection Process Completed")