From b240008af311ce5041a76847327aa590dc9790a3 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 16:03:17 -0500 Subject: [PATCH 01/47] stress test --- augur/application/cli/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 439b359abf..36386b2f53 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -170,7 +170,7 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #60% of estimate, Maximum value of 45 - core_num_processes = determine_worker_processes(.6, 80) + core_num_processes = determine_worker_processes(.6, 160) logger.info(f"Starting core worker processes with concurrency={core_num_processes}") core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) From 58cecd4176b1432663de16769fd8aff45ca2556b Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 17:14:07 -0500 Subject: [PATCH 02/47] test --- augur/application/cli/backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 36386b2f53..388782db51 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -170,7 +170,7 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #60% of estimate, Maximum value of 45 - core_num_processes = determine_worker_processes(.6, 160) + core_num_processes = determine_worker_processes(.8, 160) logger.info(f"Starting core worker processes with concurrency={core_num_processes}") core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) @@ -184,7 +184,7 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.2, 40) + facade_num_processes = determine_worker_processes(.2, 20) logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" From 570c09ad8dfa0ebcaa74a57caf1ea80da229c547 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 17:21:48 -0500 Subject: [PATCH 03/47] tweaking --- augur/application/cli/backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 388782db51..a63fe9bd39 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -177,14 +177,14 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.2, 26) + secondary_num_processes = determine_worker_processes(.3, 26) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.2, 20) + facade_num_processes = determine_worker_processes(.1, 20) logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" From e6bf1f1a7f72110c6879fd7f5e0072586760337d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 18:34:20 -0500 Subject: [PATCH 04/47] keys not rotating fast enough --- augur/tasks/github/util/github_api_key_handler.py | 2 +- augur/tasks/util/random_key_auth.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 2406ecef00..3373c44a0d 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -39,7 +39,7 @@ def __init__(self, session: DatabaseSession): self.keys = self.get_api_keys() - # self.logger.debug(f"Retrieved {len(self.keys)} github api keys for use") + self.logger.info(f"Retrieved {len(self.keys)} github api keys for use") def get_random_key(self): """Retrieves a random key from the list of keys diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index 345067ec18..1e6e2b04fd 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -43,6 +43,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: # set the headers of the request with the new key request.headers[self.header_name] = key_string + self.logger.info(f"List of Keys: {self.list_of_keys}") else: self.logger.error(f"There are no valid keys to make a request with: {self.list_of_keys}") From dc00428686a6a4639cd1c3421c3a7ba01f28c7d7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 19:09:52 -0500 Subject: [PATCH 05/47] optimizing core --- augur/application/cli/backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index a63fe9bd39..1ca7d574f3 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -170,21 +170,21 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #60% of estimate, Maximum value of 45 - core_num_processes = determine_worker_processes(.8, 160) + core_num_processes = determine_worker_processes(.9, 160) logger.info(f"Starting core worker processes with concurrency={core_num_processes}") core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.3, 26) + secondary_num_processes = determine_worker_processes(.01, 26) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.1, 20) + facade_num_processes = determine_worker_processes(.1, 10) logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" From 487f7f9750c08adcbeab098c4d60de2c4dc2db2c Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 20:16:36 -0500 Subject: [PATCH 06/47] keys freeze --- augur/tasks/github/issues/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index cb2c9787b6..548eea97d0 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -29,6 +29,8 @@ def collect_issues(repo_git : str) -> int: augur_db = manifest.augur_db + logger.info(f'this is the manifest.key_auth value: {manifest.key_auth}') + try: query = augur_db.session.query(Repo).filter(Repo.repo_git == repo_git) From 0c040f7ab08c48e477b5544d9357b9a9b83694db Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 20:42:10 -0500 Subject: [PATCH 07/47] test --- augur/tasks/github/issues/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 548eea97d0..819f0303a1 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -29,7 +29,7 @@ def collect_issues(repo_git : str) -> int: augur_db = manifest.augur_db - logger.info(f'this is the manifest.key_auth value: {manifest.key_auth}') + logger.info(f'this is the manifest.key_auth value: {str(manifest.key_auth)}') try: From 30f777c057068b15b0009a02ec6d25a4ea59cc3b Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 21:19:22 -0500 Subject: [PATCH 08/47] try --- augur/tasks/github/issues/tasks.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 819f0303a1..bb3364820a 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -5,6 +5,7 @@ from sqlalchemy.exc import IntegrityError +from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask @@ -37,10 +38,13 @@ def collect_issues(repo_git : str) -> int: repo_obj = execute_session_query(query, 'one') repo_id = repo_obj.repo_id + #try this + the_key = GithubApiKeyHandler.get_random_key(manifest.key_auth) + owner, repo = get_owner_repo(repo_git) - issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) - + #issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) + issue_data = retrieve_all_issue_data(repo_git, logger, the_key) if issue_data: total_issues = len(issue_data) From 6d4b07ed10573e7264076f1411515f57eb20fe7c Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 21:21:26 -0500 Subject: [PATCH 09/47] more conservative test. --- augur/tasks/github/issues/tasks.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index bb3364820a..342ddeda37 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -39,12 +39,15 @@ def collect_issues(repo_git : str) -> int: repo_id = repo_obj.repo_id #try this - the_key = GithubApiKeyHandler.get_random_key(manifest.key_auth) + try: + the_key = GithubApiKeyHandler.get_random_key(manifest.key_auth) + except Exception as e: + self.logger.info(f'error: {e}') owner, repo = get_owner_repo(repo_git) - #issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) - issue_data = retrieve_all_issue_data(repo_git, logger, the_key) + issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) + #issue_data = retrieve_all_issue_data(repo_git, logger, the_key) if issue_data: total_issues = len(issue_data) From 05cb234d5c25d247fb5d5c52689bdcd9d44e3584 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 21:35:50 -0500 Subject: [PATCH 10/47] try this! --- augur/tasks/github/issues/tasks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 342ddeda37..a5fd07be45 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -40,9 +40,12 @@ def collect_issues(repo_git : str) -> int: #try this try: - the_key = GithubApiKeyHandler.get_random_key(manifest.key_auth) + randomon = GithubApiKeyHandler() + the_key = randomon.get_random_key() + logger.info(f'The Random Key {the_key}') except Exception as e: - self.logger.info(f'error: {e}') + logger.info(f'error: {e}') + pass owner, repo = get_owner_repo(repo_git) From c53fa880581e4f6ded02acf9951c534f11dc0f53 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 21:45:43 -0500 Subject: [PATCH 11/47] a little more. --- augur/tasks/github/issues/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index a5fd07be45..aeefa3d7ca 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -40,7 +40,7 @@ def collect_issues(repo_git : str) -> int: #try this try: - randomon = GithubApiKeyHandler() + randomon = GithubApiKeyHandler(augur_db.session) the_key = randomon.get_random_key() logger.info(f'The Random Key {the_key}') except Exception as e: From 85711dfaa8a2e446009160572bd1aeb89ff92a69 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 21:58:50 -0500 Subject: [PATCH 12/47] interesting --- augur/tasks/github/issues/tasks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index aeefa3d7ca..2f87eb225b 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -39,18 +39,20 @@ def collect_issues(repo_git : str) -> int: repo_id = repo_obj.repo_id #try this + the_key = manifest.key_auth try: randomon = GithubApiKeyHandler(augur_db.session) the_key = randomon.get_random_key() logger.info(f'The Random Key {the_key}') except Exception as e: logger.info(f'error: {e}') + the_key = manifest.key_auth pass owner, repo = get_owner_repo(repo_git) - issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) - #issue_data = retrieve_all_issue_data(repo_git, logger, the_key) + #issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) + issue_data = retrieve_all_issue_data(repo_git, logger, the_key) if issue_data: total_issues = len(issue_data) From 0a91df747ef0bd0e96062e22c5249ab1f8913eb7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 22:04:06 -0500 Subject: [PATCH 13/47] failed experiment --- augur/tasks/github/issues/tasks.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/augur/tasks/github/issues/tasks.py b/augur/tasks/github/issues/tasks.py index 2f87eb225b..5380b8bf10 100644 --- a/augur/tasks/github/issues/tasks.py +++ b/augur/tasks/github/issues/tasks.py @@ -39,20 +39,20 @@ def collect_issues(repo_git : str) -> int: repo_id = repo_obj.repo_id #try this - the_key = manifest.key_auth - try: - randomon = GithubApiKeyHandler(augur_db.session) - the_key = randomon.get_random_key() - logger.info(f'The Random Key {the_key}') - except Exception as e: - logger.info(f'error: {e}') - the_key = manifest.key_auth - pass + # the_key = manifest.key_auth + # try: + # randomon = GithubApiKeyHandler(augur_db.session) + # the_key = randomon.get_random_key() + # logger.info(f'The Random Key {the_key}') + # except Exception as e: + # logger.info(f'error: {e}') + # the_key = manifest.key_auth + # pass owner, repo = get_owner_repo(repo_git) - #issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) - issue_data = retrieve_all_issue_data(repo_git, logger, the_key) + issue_data = retrieve_all_issue_data(repo_git, logger, manifest.key_auth) + #issue_data = retrieve_all_issue_data(repo_git, logger, the_key) if issue_data: total_issues = len(issue_data) From 57bd526d4f6d6c911934b34c17afa949a7fadea7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 22:21:36 -0500 Subject: [PATCH 14/47] try this. --- augur/tasks/github/util/github_api_key_handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 3373c44a0d..b672a7e7e6 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -71,6 +71,8 @@ def get_api_keys_from_database(self) -> List[str]: from augur.application.db.models import WorkerOauth select = WorkerOauth.access_token + # randomizing the order at db time + select.order_by(func.random()) where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github'] return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()] From 4347415adb38f69faab2664006f980f58c3eae74 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 22:35:09 -0500 Subject: [PATCH 15/47] perhaps --- augur/tasks/github/util/github_api_key_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index b672a7e7e6..1d57f2eee5 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -72,10 +72,10 @@ def get_api_keys_from_database(self) -> List[str]: select = WorkerOauth.access_token # randomizing the order at db time - select.order_by(func.random()) + #select.order_by(func.random()) where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github'] - return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()] + return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()] def get_api_keys(self) -> List[str]: From 48ea4444fef916e4fe42851c69acbcfe8d86bf29 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 22:44:15 -0500 Subject: [PATCH 16/47] import --- augur/tasks/github/util/github_api_key_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 1d57f2eee5..93bb85f7e8 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -7,6 +7,7 @@ from augur.tasks.util.redis_list import RedisList from augur.application.db.session import DatabaseSession from augur.application.config import AugurConfig +from sqlalchemy import func class NoValidKeysError(Exception): From 23da65bc7cd19f10291d393e8f18cf53aca650c1 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 22:59:28 -0500 Subject: [PATCH 17/47] lighten the logs --- augur/tasks/util/random_key_auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index 1e6e2b04fd..7a4e32fcc0 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -43,7 +43,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: # set the headers of the request with the new key request.headers[self.header_name] = key_string - self.logger.info(f"List of Keys: {self.list_of_keys}") + #self.logger.info(f"List of Keys: {self.list_of_keys}") else: self.logger.error(f"There are no valid keys to make a request with: {self.list_of_keys}") From 453bf8b58d620ae927e8d3a873c271fc0fe02ef0 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 23:22:33 -0500 Subject: [PATCH 18/47] spop --- augur/tasks/util/redis_list.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/augur/tasks/util/redis_list.py b/augur/tasks/util/redis_list.py index 0a3eaa79fa..0137273c1e 100644 --- a/augur/tasks/util/redis_list.py +++ b/augur/tasks/util/redis_list.py @@ -168,8 +168,10 @@ def pop(self, index: int = None): """ if index is None: - - redis.rpop(self.redis_list_key) + # This will get a random index from the list and remove it, + # decreasing the likelihood of everyone using the same key all the time + #redis.rpop(self.redis_list_key) + redis.spop(self.redis_list_key) else: # calls __delitem__ From 024a38986b886dea89c5d9a2337543b5cbe31d5c Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 27 Aug 2023 23:49:29 -0500 Subject: [PATCH 19/47] random shuffle --- augur/tasks/github/util/github_api_key_handler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 93bb85f7e8..a52d9a8414 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -133,6 +133,10 @@ def get_api_keys(self) -> List[str]: if not valid_keys: raise NoValidKeysError("No valid github api keys found in the config or worker oauth table") + + # shuffling the keys so not all processes get the same keys in the same order + valid_keys = random.shuffle(valid_keys) + return valid_keys def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool: From 060d4620cd9360be689981bffb98524d4aec3e0a Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 00:00:02 -0500 Subject: [PATCH 20/47] finding the error of my ways. --- augur/tasks/github/util/github_api_key_handler.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index a52d9a8414..a3d5c59440 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -135,7 +135,13 @@ def get_api_keys(self) -> List[str]: # shuffling the keys so not all processes get the same keys in the same order - valid_keys = random.shuffle(valid_keys) + try: + self.logger.info(f'valid keys before shuffle: {valid_keys}') + valid_keys = random.shuffle(valid_keys) + self.logger.info(f'valid keys AFTER shuffle: {valid_keys}') + except Exception as e: + self.logger.info(f'{e}') + pass return valid_keys From 833221b6d2822cc63040cd216bc0e1e82ed04895 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 00:02:13 -0500 Subject: [PATCH 21/47] more checking --- augur/tasks/github/util/github_api_key_handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index a3d5c59440..ec78d56729 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -135,12 +135,14 @@ def get_api_keys(self) -> List[str]: # shuffling the keys so not all processes get the same keys in the same order + valid_now = valid_keys try: self.logger.info(f'valid keys before shuffle: {valid_keys}') valid_keys = random.shuffle(valid_keys) self.logger.info(f'valid keys AFTER shuffle: {valid_keys}') except Exception as e: self.logger.info(f'{e}') + valid_keys = valid_now pass return valid_keys From 18d1c46e80ff49bdad3c70998a5d2d79b18b47d9 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 00:08:19 -0500 Subject: [PATCH 22/47] sample! --- augur/tasks/github/util/github_api_key_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index ec78d56729..073eb36c49 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -138,7 +138,7 @@ def get_api_keys(self) -> List[str]: valid_now = valid_keys try: self.logger.info(f'valid keys before shuffle: {valid_keys}') - valid_keys = random.shuffle(valid_keys) + valid_keys = random.sample(valid_keys, len(valid_keys)) self.logger.info(f'valid keys AFTER shuffle: {valid_keys}') except Exception as e: self.logger.info(f'{e}') From 437733f7fb649df26229d69f14c41f4b0d459a76 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 00:23:03 -0500 Subject: [PATCH 23/47] changed output to debug --- augur/tasks/github/util/github_api_key_handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 073eb36c49..fa42f01807 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -137,11 +137,11 @@ def get_api_keys(self) -> List[str]: # shuffling the keys so not all processes get the same keys in the same order valid_now = valid_keys try: - self.logger.info(f'valid keys before shuffle: {valid_keys}') + self.logger.debug(f'valid keys before shuffle: {valid_keys}') valid_keys = random.sample(valid_keys, len(valid_keys)) - self.logger.info(f'valid keys AFTER shuffle: {valid_keys}') + self.logger.debug(f'valid keys AFTER shuffle: {valid_keys}') except Exception as e: - self.logger.info(f'{e}') + self.logger.debug(f'{e}') valid_keys = valid_now pass From 0651965a5f4513a748fd5e4035fab3e7167466ba Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 00:26:09 -0500 Subject: [PATCH 24/47] Renormalized task type share --- augur/application/cli/backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 1ca7d574f3..1f74d77bda 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -170,21 +170,21 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #60% of estimate, Maximum value of 45 - core_num_processes = determine_worker_processes(.9, 160) + core_num_processes = determine_worker_processes(.6, 80) logger.info(f"Starting core worker processes with concurrency={core_num_processes}") core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.01, 26) + secondary_num_processes = determine_worker_processes(.2, 26) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.1, 10) + facade_num_processes = determine_worker_processes(.2, 35) logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" From 6a6214ccc1fffb81992d6244cb72540e63f01546 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 01:35:20 -0500 Subject: [PATCH 25/47] consistency in the maxes. --- augur/tasks/start_tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 1918efbf8a..9b643f51a8 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -261,16 +261,16 @@ def augur_collection_monitor(): enabled_phase_names = get_enabled_phase_names_from_config(session.logger, session) if primary_repo_collect_phase.__name__ in enabled_phase_names: - start_primary_collection(session, max_repo=40) + start_primary_collection(session, max_repo=80) if secondary_repo_collect_phase.__name__ in enabled_phase_names: - start_secondary_collection(session, max_repo=10) + start_secondary_collection(session, max_repo=26) if facade_phase.__name__ in enabled_phase_names: - start_facade_collection(session, max_repo=30) + start_facade_collection(session, max_repo=35) if machine_learning_phase.__name__ in enabled_phase_names: - start_ml_collection(session,max_repo=5) + start_ml_collection(session,max_repo=2) # have a pipe of 180 From 19949b824b098821dac658841cf99f1a7254d7ce Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 02:41:06 -0500 Subject: [PATCH 26/47] Another randomization to prevent keylock. --- augur/tasks/github/util/github_random_key_auth.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/tasks/github/util/github_random_key_auth.py b/augur/tasks/github/util/github_random_key_auth.py index 158d578a7c..d9f3481bae 100644 --- a/augur/tasks/github/util/github_random_key_auth.py +++ b/augur/tasks/github/util/github_random_key_auth.py @@ -3,6 +3,7 @@ from augur.tasks.util.random_key_auth import RandomKeyAuth from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from augur.application.db.session import DatabaseSession +import random class GithubRandomKeyAuth(RandomKeyAuth): @@ -16,6 +17,7 @@ def __init__(self, session: DatabaseSession, logger): # gets the github api keys from the database via the GithubApiKeyHandler github_api_keys = GithubApiKeyHandler(session).keys + github_api_keys = random.sample(github_api_keys, len(github_api_keys)) if not github_api_keys: print("Failed to find github api keys. This is usually because your key has expired") From be0dd4f177fce284606d718e5552341d42dda217 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 14:41:21 -0500 Subject: [PATCH 27/47] Dial it back on mortal hardware. --- augur/tasks/start_tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 9b643f51a8..6d52db8299 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -261,16 +261,16 @@ def augur_collection_monitor(): enabled_phase_names = get_enabled_phase_names_from_config(session.logger, session) if primary_repo_collect_phase.__name__ in enabled_phase_names: - start_primary_collection(session, max_repo=80) + start_primary_collection(session, max_repo=30) if secondary_repo_collect_phase.__name__ in enabled_phase_names: - start_secondary_collection(session, max_repo=26) + start_secondary_collection(session, max_repo=10) if facade_phase.__name__ in enabled_phase_names: - start_facade_collection(session, max_repo=35) + start_facade_collection(session, max_repo=20) if machine_learning_phase.__name__ in enabled_phase_names: - start_ml_collection(session,max_repo=2) + start_ml_collection(session,max_repo=1) # have a pipe of 180 From 537e46942cb1a01cba3e5c7d85edf948928bfab6 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 28 Aug 2023 14:47:04 -0500 Subject: [PATCH 28/47] Dialing it down. --- augur/application/cli/backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 1f74d77bda..548c1eeff4 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -170,21 +170,21 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #60% of estimate, Maximum value of 45 - core_num_processes = determine_worker_processes(.6, 80) + core_num_processes = determine_worker_processes(.6, 45) logger.info(f"Starting core worker processes with concurrency={core_num_processes}") core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.2, 26) + secondary_num_processes = determine_worker_processes(.2, 25) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.2, 35) + facade_num_processes = determine_worker_processes(.2, 20) logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" From c438717592fb096bbb20b4a48bda8fb48fcbc69f Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 30 Aug 2023 09:08:58 -0500 Subject: [PATCH 29/47] Fixing logic in the repo_info commit_count query to refer to the default branch --- augur/tasks/github/repo_info/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/tasks/github/repo_info/core.py b/augur/tasks/github/repo_info/core.py index 50fa88068e..aa97b9c9e3 100644 --- a/augur/tasks/github/repo_info/core.py +++ b/augur/tasks/github/repo_info/core.py @@ -150,10 +150,10 @@ def repo_info_model(augur_db, key_auth, repo_orm_obj, logger): pr_merged: pullRequests(states: MERGED) { totalCount } - ref(qualifiedName: "master") { + defaultBranchRef { target { ... on Commit { - history(first: 0){ + history { totalCount } } @@ -256,7 +256,7 @@ def repo_info_model(augur_db, key_auth, repo_orm_obj, logger): 'pull_requests_closed': data['pr_closed']['totalCount'] if data['pr_closed'] else None, 'pull_requests_merged': data['pr_merged']['totalCount'] if data['pr_merged'] else None, 'tool_source': 'Repo_info Model', - 'tool_version': '0.42', + 'tool_version': '0.50.0', 'data_source': "Github" } From 3dd65ff4fdb7807b02b8485df9beb61db2431424 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 30 Aug 2023 09:28:50 -0500 Subject: [PATCH 30/47] fixed insert logic on repo_info for commit_count to refer to new variable: defaultBranchRef --- augur/tasks/github/repo_info/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/repo_info/core.py b/augur/tasks/github/repo_info/core.py index aa97b9c9e3..50142f614e 100644 --- a/augur/tasks/github/repo_info/core.py +++ b/augur/tasks/github/repo_info/core.py @@ -248,7 +248,7 @@ def repo_info_model(augur_db, key_auth, repo_orm_obj, logger): 'security_audit_file': None, 'status': None, 'keywords': None, - 'commit_count': data['ref']['target']['history']['totalCount'] if data['ref'] else None, + 'commit_count': data['defaultBranchRef']['target']['history']['totalCount'] if data['defaultBranchRef'] else None, 'issues_count': data['issue_count']['totalCount'] if data['issue_count'] else None, 'issues_closed': data['issues_closed']['totalCount'] if data['issues_closed'] else None, 'pull_request_count': data['pr_count']['totalCount'] if data['pr_count'] else None, From cc2d62ea3f4971ce404957f954a56f49fff2dee1 Mon Sep 17 00:00:00 2001 From: sgoggins Date: Thu, 31 Aug 2023 12:17:47 -0500 Subject: [PATCH 31/47] view fixing. --- .../alembic/versions/25_unique_on_mataview.py | 210 ++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 augur/application/schema/alembic/versions/25_unique_on_mataview.py diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py new file mode 100644 index 0000000000..9bc79b29b7 --- /dev/null +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -0,0 +1,210 @@ +"""a unique index on a materialized view allows it to be refreshed concurrently, preventing blocking behavior + +Revision ID: 25 +Revises: 24 +Create Date: 2023-08-23 18:17:22.651191 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy import text + +# revision identifiers, used by Alembic. +revision = '22' +down_revision = '21' +branch_labels = None +depends_on = None + +# CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); +# CREATE UNIQUE INDEX ON augur_data.api_get_all_repos_commits(repo_id); + +# select cntrb_id, repo_id, month, year, rank, count(*) as counter from augur_new_contributors group by cntrb_id, repo_id, month, year, rank order by counter desc; + +def upgrade(): + + add_uniques_onMV() + +def downgrade(): + + upgrade=False + + add_uniques_onMV(upgrade) + +def add_fix_keys_22(upgrade=True): + + if upgrade: + + conn = op.get_bind() + conn.execute(text("""CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id);""")) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.api_get_all_repos_commits(repo_id); """)) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.api_get_all_repos_issues(repo_id); """)) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + + + conn = op.get_bind() + conn.execute(text(""" + create materialized view augur_data.explorer_contributor_actions as + SELECT + A.ID AS cntrb_id, + A.created_at, + A.repo_id, + A.ACTION, + repo.repo_name, + A.LOGIN, + DENSE_RANK() OVER(PARTITION BY A.ID, A.repo_id ORDER BY A.created_at) AS RANK + FROM ( + select + commits.cmt_ght_author_id AS ID, + commits.cmt_author_timestamp AS created_at, + commits.repo_id, + 'commit' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + ( augur_data.commits LEFT JOIN augur_data.contributors ON ( ( ( contributors.cntrb_id ) :: TEXT = ( commits.cmt_ght_author_id ) :: TEXT ) ) ) + GROUP BY + commits.cmt_commit_hash, + commits.cmt_ght_author_id, + commits.repo_id, + commits.cmt_author_timestamp, + 'commit' :: TEXT, + contributors.cntrb_login + UNION all + SELECT + issues.reporter_id AS ID, + issues.created_at, + issues.repo_id, + 'issue_opened' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + ( augur_data.issues LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = issues.reporter_id ) ) ) + WHERE + ( issues.pull_request IS NULL ) + UNION ALL + SELECT + pull_request_events.cntrb_id AS ID, + pull_request_events.created_at, + pull_requests.repo_id, + 'pull_request_closed' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + augur_data.pull_requests, + ( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) ) + WHERE + pull_requests.pull_request_id = pull_request_events.pull_request_id + AND pull_requests.pr_merged_at IS NULL + AND ( ( pull_request_events.ACTION ) :: TEXT = 'closed' :: TEXT ) + UNION ALL + SELECT + pull_request_events.cntrb_id AS ID, + pull_request_events.created_at, + pull_requests.repo_id, + 'pull_request_merged' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + augur_data.pull_requests, + ( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) ) + WHERE + pull_requests.pull_request_id = pull_request_events.pull_request_id + AND ( ( pull_request_events.ACTION ) :: TEXT = 'merged' :: TEXT ) + UNION ALL + SELECT + issue_events.cntrb_id AS ID, + issue_events.created_at, + issues.repo_id, + 'issue_closed' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + augur_data.issues, + augur_data.issue_events + LEFT JOIN augur_data.contributors ON contributors.cntrb_id = issue_events.cntrb_id + WHERE + issues.issue_id = issue_events.issue_id + AND issues.pull_request IS NULL + AND ( ( issue_events.ACTION ) :: TEXT = 'closed' :: TEXT ) + UNION ALL + SELECT + pull_request_reviews.cntrb_id AS ID, + pull_request_reviews.pr_review_submitted_at AS created_at, + pull_requests.repo_id, + ( 'pull_request_review_' :: TEXT || ( pull_request_reviews.pr_review_state ) :: TEXT ) AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + augur_data.pull_requests, + augur_data.pull_request_reviews + LEFT JOIN augur_data.contributors ON contributors.cntrb_id = pull_request_reviews.cntrb_id + WHERE + pull_requests.pull_request_id = pull_request_reviews.pull_request_id + UNION ALL + SELECT + pull_requests.pr_augur_contributor_id AS ID, + pull_requests.pr_created_at AS created_at, + pull_requests.repo_id, + 'pull_request_open' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + augur_data.pull_requests + LEFT JOIN augur_data.contributors ON pull_requests.pr_augur_contributor_id = contributors.cntrb_id + UNION ALL + SELECT + message.cntrb_id AS ID, + message.msg_timestamp AS created_at, + pull_requests.repo_id, + 'pull_request_comment' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + augur_data.pull_requests, + augur_data.pull_request_message_ref, + augur_data.message + LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id + WHERE + pull_request_message_ref.pull_request_id = pull_requests.pull_request_id + AND pull_request_message_ref.msg_id = message.msg_id + UNION ALL + SELECT + issues.reporter_id AS ID, + message.msg_timestamp AS created_at, + issues.repo_id, + 'issue_comment' :: TEXT AS ACTION, + contributors.cntrb_login AS LOGIN + FROM + augur_data.issues, + augur_data.issue_message_ref, + augur_data.message + LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id + WHERE + issue_message_ref.msg_id = message.msg_id + AND issues.issue_id = issue_message_ref.issue_id + AND issues.closed_at != message.msg_timestamp + ) A, + augur_data.repo + WHERE + A.repo_id = repo.repo_id + ORDER BY + A.created_at DESC""")) + From 9619b6e0f528b2f1e9d821e884d9e40dc51d927b Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 31 Aug 2023 13:25:18 -0500 Subject: [PATCH 32/47] materialized view maintenance. Adding unique keys to enable concurrent refreshes to end blocking during refresh; Changed all but one to do a concurrent refresh. Still looking for what may be unique. Got rid of some Libyear related views that aren't quite what we want. --- .../alembic/versions/25_unique_on_mataview.py | 165 ++---------------- augur/tasks/db/refresh_materialized_views.py | 12 +- 2 files changed, 18 insertions(+), 159 deletions(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 9bc79b29b7..2afd52f953 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -11,15 +11,11 @@ from sqlalchemy import text # revision identifiers, used by Alembic. -revision = '22' -down_revision = '21' +revision = '25' +down_revision = '24' branch_labels = None depends_on = None -# CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); -# CREATE UNIQUE INDEX ON augur_data.api_get_all_repos_commits(repo_id); - -# select cntrb_id, repo_id, month, year, rank, count(*) as counter from augur_new_contributors group by cntrb_id, repo_id, month, year, rank order by counter desc; def upgrade(): @@ -48,163 +44,26 @@ def add_fix_keys_22(upgrade=True): conn = op.get_bind() conn.execute(text(""" - CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + CREATE UNIQUE INDEX ON augur_data.augur_new_contributors( cntrb_id, repo_id, month, login, year, rank); """)) conn = op.get_bind() conn.execute(text(""" - CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + CREATE UNIQUE INDEX ON augur_data.explorer_commits_and_committers_daily_count( repo_id, cmt_committer_date); """)) conn = op.get_bind() conn.execute(text(""" - CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + CREATE UNIQUE INDEX ON augur_data.explorer_new_contributors(cntrb_id, created_at, month, year, repo_id, login, rank); """)) conn = op.get_bind() conn.execute(text(""" - CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) - - conn = op.get_bind() - conn.execute(text(""" - CREATE UNIQUE INDEX ON augur_data.api_get_all_repo_prs(repo_id); """)) + CREATE UNIQUE INDEX ON augur_data.explorer_entry_list(repo_id); """)) + # conn = op.get_bind() + # conn.execute(text(""" + # CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(); """)) conn = op.get_bind() conn.execute(text(""" - create materialized view augur_data.explorer_contributor_actions as - SELECT - A.ID AS cntrb_id, - A.created_at, - A.repo_id, - A.ACTION, - repo.repo_name, - A.LOGIN, - DENSE_RANK() OVER(PARTITION BY A.ID, A.repo_id ORDER BY A.created_at) AS RANK - FROM ( - select - commits.cmt_ght_author_id AS ID, - commits.cmt_author_timestamp AS created_at, - commits.repo_id, - 'commit' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - ( augur_data.commits LEFT JOIN augur_data.contributors ON ( ( ( contributors.cntrb_id ) :: TEXT = ( commits.cmt_ght_author_id ) :: TEXT ) ) ) - GROUP BY - commits.cmt_commit_hash, - commits.cmt_ght_author_id, - commits.repo_id, - commits.cmt_author_timestamp, - 'commit' :: TEXT, - contributors.cntrb_login - UNION all - SELECT - issues.reporter_id AS ID, - issues.created_at, - issues.repo_id, - 'issue_opened' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - ( augur_data.issues LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = issues.reporter_id ) ) ) - WHERE - ( issues.pull_request IS NULL ) - UNION ALL - SELECT - pull_request_events.cntrb_id AS ID, - pull_request_events.created_at, - pull_requests.repo_id, - 'pull_request_closed' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - augur_data.pull_requests, - ( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) ) - WHERE - pull_requests.pull_request_id = pull_request_events.pull_request_id - AND pull_requests.pr_merged_at IS NULL - AND ( ( pull_request_events.ACTION ) :: TEXT = 'closed' :: TEXT ) - UNION ALL - SELECT - pull_request_events.cntrb_id AS ID, - pull_request_events.created_at, - pull_requests.repo_id, - 'pull_request_merged' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - augur_data.pull_requests, - ( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) ) - WHERE - pull_requests.pull_request_id = pull_request_events.pull_request_id - AND ( ( pull_request_events.ACTION ) :: TEXT = 'merged' :: TEXT ) - UNION ALL - SELECT - issue_events.cntrb_id AS ID, - issue_events.created_at, - issues.repo_id, - 'issue_closed' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - augur_data.issues, - augur_data.issue_events - LEFT JOIN augur_data.contributors ON contributors.cntrb_id = issue_events.cntrb_id - WHERE - issues.issue_id = issue_events.issue_id - AND issues.pull_request IS NULL - AND ( ( issue_events.ACTION ) :: TEXT = 'closed' :: TEXT ) - UNION ALL - SELECT - pull_request_reviews.cntrb_id AS ID, - pull_request_reviews.pr_review_submitted_at AS created_at, - pull_requests.repo_id, - ( 'pull_request_review_' :: TEXT || ( pull_request_reviews.pr_review_state ) :: TEXT ) AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - augur_data.pull_requests, - augur_data.pull_request_reviews - LEFT JOIN augur_data.contributors ON contributors.cntrb_id = pull_request_reviews.cntrb_id - WHERE - pull_requests.pull_request_id = pull_request_reviews.pull_request_id - UNION ALL - SELECT - pull_requests.pr_augur_contributor_id AS ID, - pull_requests.pr_created_at AS created_at, - pull_requests.repo_id, - 'pull_request_open' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - augur_data.pull_requests - LEFT JOIN augur_data.contributors ON pull_requests.pr_augur_contributor_id = contributors.cntrb_id - UNION ALL - SELECT - message.cntrb_id AS ID, - message.msg_timestamp AS created_at, - pull_requests.repo_id, - 'pull_request_comment' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - augur_data.pull_requests, - augur_data.pull_request_message_ref, - augur_data.message - LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - WHERE - pull_request_message_ref.pull_request_id = pull_requests.pull_request_id - AND pull_request_message_ref.msg_id = message.msg_id - UNION ALL - SELECT - issues.reporter_id AS ID, - message.msg_timestamp AS created_at, - issues.repo_id, - 'issue_comment' :: TEXT AS ACTION, - contributors.cntrb_login AS LOGIN - FROM - augur_data.issues, - augur_data.issue_message_ref, - augur_data.message - LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - WHERE - issue_message_ref.msg_id = message.msg_id - AND issues.issue_id = issue_message_ref.issue_id - AND issues.closed_at != message.msg_timestamp - ) A, - augur_data.repo - WHERE - A.repo_id = repo.repo_id - ORDER BY - A.created_at DESC""")) - + drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_all; + drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_detail; + drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_summary; """)) diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py index 53b29ddbd2..bdf146116f 100644 --- a/augur/tasks/db/refresh_materialized_views.py +++ b/augur/tasks/db/refresh_materialized_views.py @@ -16,15 +16,15 @@ def refresh_materialized_views(): logger = logging.getLogger(refresh_materialized_views.__name__) refresh_view_query = s.sql.text(""" - REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_issues with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_commits_and_committers_daily_count with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repo_prs with data; REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_commits with data; + REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_issues with data; REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_all with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_detail with data; + REFRESH MATERIALIZED VIEW augur_data.explorer_commits_and_committers_daily_count with data; + REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data; + REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data; REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_summary with data; + REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data; """) with DatabaseSession(logger, engine) as session: From aa59483bb36475ab2a67c05cbd9796c2692521b7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 08:48:51 -0500 Subject: [PATCH 33/47] updated materialized view refresh Signed-off-by: Sean P. Goggins --- augur/tasks/db/refresh_materialized_views.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py index bdf146116f..9b3c03f243 100644 --- a/augur/tasks/db/refresh_materialized_views.py +++ b/augur/tasks/db/refresh_materialized_views.py @@ -17,14 +17,14 @@ def refresh_materialized_views(): refresh_view_query = s.sql.text(""" REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repo_prs with data; - REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_commits with data; - REFRESH MATERIALIZED VIEW augur_data.api_get_all_repos_issues with data; - REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_commits_and_committers_daily_count with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_commits with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_issues with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.augur_new_contributors with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_commits_and_committers_daily_count with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_new_contributors with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_entry_list with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_new_contributors with data; + REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data; """) with DatabaseSession(logger, engine) as session: From 8364aee2504c0d352c3448e40252a6079b6f8493 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 11:51:32 -0500 Subject: [PATCH 34/47] updated schema refresh to include the final materialized view Signed-off-by: Sean P. Goggins --- .../alembic/versions/25_unique_on_mataview.py | 115 +++++++++++++++++- 1 file changed, 109 insertions(+), 6 deletions(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 2afd52f953..120d631fc0 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -58,12 +58,115 @@ def add_fix_keys_22(upgrade=True): conn.execute(text(""" CREATE UNIQUE INDEX ON augur_data.explorer_entry_list(repo_id); """)) - # conn = op.get_bind() - # conn.execute(text(""" - # CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(); """)) + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); """)) conn = op.get_bind() conn.execute(text(""" - drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_all; - drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_detail; - drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_summary; """)) + drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_all; + drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_detail; + drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_summary; + drop MATERIALIZED VIEW if exists augur_data.explorer_contributor_actions; + + create materialized view augur_data.explorer_contributor_actions as + SELECT a.id AS cntrb_id, + a.created_at, + a.repo_id, + a.action, + repo.repo_name, + a.login, + row_number() OVER (PARTITION BY a.id, a.repo_id ORDER BY a.created_at desc) AS rank + FROM ( SELECT commits.cmt_ght_author_id AS id, + commits.cmt_author_timestamp AS created_at, + commits.repo_id, + 'commit'::text AS action, + contributors.cntrb_login AS login + FROM (augur_data.commits + LEFT JOIN augur_data.contributors ON (((contributors.cntrb_id)::text = (commits.cmt_ght_author_id)::text))) + GROUP BY commits.cmt_commit_hash, commits.cmt_ght_author_id, commits.repo_id, commits.cmt_author_timestamp, 'commit'::text, contributors.cntrb_login + UNION ALL + SELECT issues.reporter_id AS id, + issues.created_at, + issues.repo_id, + 'issue_opened'::text AS action, + contributors.cntrb_login AS login + FROM (augur_data.issues + LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = issues.reporter_id))) + WHERE (issues.pull_request IS NULL) + UNION ALL + SELECT pull_request_events.cntrb_id AS id, + pull_request_events.created_at, + pull_requests.repo_id, + 'pull_request_closed'::text AS action, + contributors.cntrb_login AS login + FROM augur_data.pull_requests, + (augur_data.pull_request_events + LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = pull_request_events.cntrb_id))) + WHERE ((pull_requests.pull_request_id = pull_request_events.pull_request_id) AND (pull_requests.pr_merged_at IS NULL) AND ((pull_request_events.action)::text = 'closed'::text)) + UNION ALL + SELECT pull_request_events.cntrb_id AS id, + pull_request_events.created_at, + pull_requests.repo_id, + 'pull_request_merged'::text AS action, + contributors.cntrb_login AS login + FROM augur_data.pull_requests, + (augur_data.pull_request_events + LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = pull_request_events.cntrb_id))) + WHERE ((pull_requests.pull_request_id = pull_request_events.pull_request_id) AND ((pull_request_events.action)::text = 'merged'::text)) + UNION ALL + SELECT issue_events.cntrb_id AS id, + issue_events.created_at, + issues.repo_id, + 'issue_closed'::text AS action, + contributors.cntrb_login AS login + FROM augur_data.issues, + (augur_data.issue_events + LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = issue_events.cntrb_id))) + WHERE ((issues.issue_id = issue_events.issue_id) AND (issues.pull_request IS NULL) AND ((issue_events.action)::text = 'closed'::text)) + UNION ALL + SELECT pull_request_reviews.cntrb_id AS id, + pull_request_reviews.pr_review_submitted_at AS created_at, + pull_requests.repo_id, + ('pull_request_review_'::text || (pull_request_reviews.pr_review_state)::text) AS action, + contributors.cntrb_login AS login + FROM augur_data.pull_requests, + (augur_data.pull_request_reviews + LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = pull_request_reviews.cntrb_id))) + WHERE (pull_requests.pull_request_id = pull_request_reviews.pull_request_id) + UNION ALL + SELECT pull_requests.pr_augur_contributor_id AS id, + pull_requests.pr_created_at AS created_at, + pull_requests.repo_id, + 'pull_request_open'::text AS action, + contributors.cntrb_login AS login + FROM (augur_data.pull_requests + LEFT JOIN augur_data.contributors ON ((pull_requests.pr_augur_contributor_id = contributors.cntrb_id))) + UNION ALL + SELECT message.cntrb_id AS id, + message.msg_timestamp AS created_at, + pull_requests.repo_id, + 'pull_request_comment'::text AS action, + contributors.cntrb_login AS login + FROM augur_data.pull_requests, + augur_data.pull_request_message_ref, + (augur_data.message + LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = message.cntrb_id))) + WHERE ((pull_request_message_ref.pull_request_id = pull_requests.pull_request_id) AND (pull_request_message_ref.msg_id = message.msg_id)) + UNION ALL + SELECT issues.reporter_id AS id, + message.msg_timestamp AS created_at, + issues.repo_id, + 'issue_comment'::text AS action, + contributors.cntrb_login AS login + FROM augur_data.issues, + augur_data.issue_message_ref, + (augur_data.message + LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = message.cntrb_id))) + WHERE ((issue_message_ref.msg_id = message.msg_id) AND (issues.issue_id = issue_message_ref.issue_id) AND (issues.closed_at <> message.msg_timestamp))) a, + augur_data.repo + WHERE (a.repo_id = repo.repo_id) + ORDER BY a.created_at DESC; + + CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); + """)) From 2bf0cd957cd5f09d980bc5d929a2e96c5c9e2f47 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 11:52:28 -0500 Subject: [PATCH 35/47] updating explorer_contributor_actions concurrently now that it has a materialized view index Signed-off-by: Sean P. Goggins --- augur/tasks/db/refresh_materialized_views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py index 9b3c03f243..a5ea157c4d 100644 --- a/augur/tasks/db/refresh_materialized_views.py +++ b/augur/tasks/db/refresh_materialized_views.py @@ -24,7 +24,7 @@ def refresh_materialized_views(): REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_new_contributors with data; REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_entry_list with data; REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_new_contributors with data; - REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data; + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_actions with data; """) with DatabaseSession(logger, engine) as session: From 4e7475ec4cadbb4da80b7067b8e23e31756b431f Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:07:31 -0500 Subject: [PATCH 36/47] config now defaults the refresh for materialized views back to one day, since they can now be refreshed concurrently Signed-off-by: Sean P. Goggins --- augur/application/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index c9aff085b1..7cf1eca3fb 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -68,7 +68,7 @@ def get_development_flag(): }, "Celery": { "worker_process_vmem_cap": 0.25, - "refresh_materialized_views_interval_in_days": 7 + "refresh_materialized_views_interval_in_days": 1 }, "Redis": { "cache_group": 0, From 69e0b426fe4ce78a44c6cafb4ed287b40db8858d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:11:16 -0500 Subject: [PATCH 37/47] included an update to the refresh_materialized_views_interval_in_days setting now that the refreshes are concurrent and will not lock the underlying tables Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 120d631fc0..f33d0dfbcf 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -169,4 +169,6 @@ def add_fix_keys_22(upgrade=True): ORDER BY a.created_at DESC; CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); + + update augur_operations.config set value=str(1) where setting_name = 'refresh_materialized_views_interval_in_days'; """)) From d7778aef9c422d5d8868f7ab093996c2e26fcedb Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:12:14 -0500 Subject: [PATCH 38/47] syntax fix in sql Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index f33d0dfbcf..308e70f234 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -170,5 +170,5 @@ def add_fix_keys_22(upgrade=True): CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); - update augur_operations.config set value=str(1) where setting_name = 'refresh_materialized_views_interval_in_days'; + update augur_operations.config set value='1' where setting_name = 'refresh_materialized_views_interval_in_days'; """)) From fc69726b1c4595c7a7c2e7ca35555f5aa80e8303 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:13:51 -0500 Subject: [PATCH 39/47] updated methods names in alembic script Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 308e70f234..865b8f8588 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -19,15 +19,15 @@ def upgrade(): - add_uniques_onMV() + add_fix_keys_25() def downgrade(): upgrade=False - add_uniques_onMV(upgrade) + add_fix_keys_25(upgrade) -def add_fix_keys_22(upgrade=True): +def add_fix_keys_25(upgrade=True): if upgrade: From f9ec47cbcc0a5c26dd837392be3b4470bc722149 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:18:37 -0500 Subject: [PATCH 40/47] had an extra space in the indents in one of the changed db alembic scripts Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 865b8f8588..876fffe245 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -58,8 +58,8 @@ def add_fix_keys_25(upgrade=True): conn.execute(text(""" CREATE UNIQUE INDEX ON augur_data.explorer_entry_list(repo_id); """)) - conn = op.get_bind() - conn.execute(text(""" + conn = op.get_bind() + conn.execute(text(""" CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); """)) conn = op.get_bind() From 087ff339fd447fa09eddf08ce9c2bdc4bd5f098e Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:21:16 -0500 Subject: [PATCH 41/47] fix' Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 876fffe245..02b7a59276 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -68,7 +68,10 @@ def add_fix_keys_25(upgrade=True): drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_detail; drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_summary; drop MATERIALIZED VIEW if exists augur_data.explorer_contributor_actions; + """) + conn = op.get_bind() + conn.execute(text(""" create materialized view augur_data.explorer_contributor_actions as SELECT a.id AS cntrb_id, a.created_at, From fd2ef2fe53630bb5a4b0f34c40f8bb97c64d84e1 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:22:15 -0500 Subject: [PATCH 42/47] fix' Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 02b7a59276..90449192a5 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -67,8 +67,7 @@ def add_fix_keys_25(upgrade=True): drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_all; drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_detail; drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_summary; - drop MATERIALIZED VIEW if exists augur_data.explorer_contributor_actions; - """) + drop MATERIALIZED VIEW if exists augur_data.explorer_contributor_actions; """) conn = op.get_bind() conn.execute(text(""" @@ -173,5 +172,4 @@ def add_fix_keys_25(upgrade=True): CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); - update augur_operations.config set value='1' where setting_name = 'refresh_materialized_views_interval_in_days'; - """)) + update augur_operations.config set value='1' where setting_name = 'refresh_materialized_views_interval_in_days';""")) From dcc875eaec3be05c4c2a862922af3ced95d8910c Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:23:09 -0500 Subject: [PATCH 43/47] fix' Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index 90449192a5..af689130da 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -67,7 +67,7 @@ def add_fix_keys_25(upgrade=True): drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_all; drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_detail; drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_summary; - drop MATERIALIZED VIEW if exists augur_data.explorer_contributor_actions; """) + drop MATERIALIZED VIEW if exists augur_data.explorer_contributor_actions; """)) conn = op.get_bind() conn.execute(text(""" From bb56462defeb7f8d797104712a6be324e71758a8 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 1 Sep 2023 12:28:10 -0500 Subject: [PATCH 44/47] fix' Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/25_unique_on_mataview.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/augur/application/schema/alembic/versions/25_unique_on_mataview.py b/augur/application/schema/alembic/versions/25_unique_on_mataview.py index af689130da..b9bc11e787 100644 --- a/augur/application/schema/alembic/versions/25_unique_on_mataview.py +++ b/augur/application/schema/alembic/versions/25_unique_on_mataview.py @@ -58,10 +58,6 @@ def add_fix_keys_25(upgrade=True): conn.execute(text(""" CREATE UNIQUE INDEX ON augur_data.explorer_entry_list(repo_id); """)) - conn = op.get_bind() - conn.execute(text(""" - CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); """)) - conn = op.get_bind() conn.execute(text(""" drop MATERIALIZED VIEW if exists augur_data.explorer_libyear_all; @@ -170,6 +166,9 @@ def add_fix_keys_25(upgrade=True): WHERE (a.repo_id = repo.repo_id) ORDER BY a.created_at DESC; - CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); - update augur_operations.config set value='1' where setting_name = 'refresh_materialized_views_interval_in_days';""")) + + conn = op.get_bind() + conn.execute(text(""" + CREATE UNIQUE INDEX ON augur_data.explorer_contributor_actions(cntrb_id,created_at,repo_id, action, repo_name,login, rank); """)) + From e200b2885a8cba982dce6dea424892cea1009850 Mon Sep 17 00:00:00 2001 From: sgoggins Date: Fri, 1 Sep 2023 13:35:39 -0500 Subject: [PATCH 45/47] checking --- augur/tasks/util/random_key_auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index 7a4e32fcc0..866bc738de 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -33,7 +33,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: if self.list_of_keys: key_value = choice(self.list_of_keys) - + self.logger.info(f'Key value used: {key_value}') # formats the key string into a format GitHub will accept if self.key_format: From 24a48638248884ec0e02b901e3d38ee8e577ee0d Mon Sep 17 00:00:00 2001 From: sgoggins Date: Fri, 1 Sep 2023 13:50:51 -0500 Subject: [PATCH 46/47] retest Signed-off-by: sgoggins --- .../github/util/github_api_key_handler.py | 19 ++++++++++--------- .../github/util/github_random_key_auth.py | 2 +- augur/tasks/util/redis_list.py | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index fa42f01807..2a56e9c1c7 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -76,7 +76,8 @@ def get_api_keys_from_database(self) -> List[str]: #select.order_by(func.random()) where = [WorkerOauth.access_token != self.config_key, WorkerOauth.platform == 'github'] - return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()] + #return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).order_by(func.random()).all()] + return [key_tuple[0] for key_tuple in self.session.query(select).filter(*where).all()] def get_api_keys(self) -> List[str]: @@ -136,14 +137,14 @@ def get_api_keys(self) -> List[str]: # shuffling the keys so not all processes get the same keys in the same order valid_now = valid_keys - try: - self.logger.debug(f'valid keys before shuffle: {valid_keys}') - valid_keys = random.sample(valid_keys, len(valid_keys)) - self.logger.debug(f'valid keys AFTER shuffle: {valid_keys}') - except Exception as e: - self.logger.debug(f'{e}') - valid_keys = valid_now - pass + #try: + #self.logger.info(f'valid keys before shuffle: {valid_keys}') + #valid_keys = random.sample(valid_keys, len(valid_keys)) + #self.logger.info(f'valid keys AFTER shuffle: {valid_keys}') + #except Exception as e: + # self.logger.debug(f'{e}') + # valid_keys = valid_now + # pass return valid_keys diff --git a/augur/tasks/github/util/github_random_key_auth.py b/augur/tasks/github/util/github_random_key_auth.py index d9f3481bae..926ac04216 100644 --- a/augur/tasks/github/util/github_random_key_auth.py +++ b/augur/tasks/github/util/github_random_key_auth.py @@ -17,7 +17,7 @@ def __init__(self, session: DatabaseSession, logger): # gets the github api keys from the database via the GithubApiKeyHandler github_api_keys = GithubApiKeyHandler(session).keys - github_api_keys = random.sample(github_api_keys, len(github_api_keys)) + #github_api_keys = random.sample(github_api_keys, len(github_api_keys)) if not github_api_keys: print("Failed to find github api keys. This is usually because your key has expired") diff --git a/augur/tasks/util/redis_list.py b/augur/tasks/util/redis_list.py index 0137273c1e..0380cc6267 100644 --- a/augur/tasks/util/redis_list.py +++ b/augur/tasks/util/redis_list.py @@ -170,8 +170,8 @@ def pop(self, index: int = None): if index is None: # This will get a random index from the list and remove it, # decreasing the likelihood of everyone using the same key all the time - #redis.rpop(self.redis_list_key) - redis.spop(self.redis_list_key) + redis.rpop(self.redis_list_key) + #redis.spop(self.redis_list_key) else: # calls __delitem__ From 98ba73e010681cce40f8ee2a95b4156e41dd382a Mon Sep 17 00:00:00 2001 From: sgoggins Date: Fri, 1 Sep 2023 14:07:03 -0500 Subject: [PATCH 47/47] reducing randomization and logging after tests Signed-off-by: sgoggins --- augur/tasks/util/random_key_auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index 866bc738de..7f7bd65557 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -33,7 +33,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: if self.list_of_keys: key_value = choice(self.list_of_keys) - self.logger.info(f'Key value used: {key_value}') + self.logger.debug(f'Key value used: {key_value}') # formats the key string into a format GitHub will accept if self.key_format: