diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index a2e3a6c4d8..f702d829a3 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -57,7 +57,7 @@ def retrieve_owner_repos(session, owner: str) -> List[str]: # collect repo urls for the given owner repos = [] - for page_data in GithubPaginator(url, session.oauths, logger).iter_pages(): + for page_data, _ in GithubPaginator(url, session.oauths, logger).iter_pages(): if page_data is None: break diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py index f16d7b4587..39ab698fd5 100644 --- a/augur/tasks/db/refresh_materialized_views.py +++ b/augur/tasks/db/refresh_materialized_views.py @@ -17,7 +17,7 @@ def refresh_materialized_views(): from augur.tasks.init.celery_app import engine logger = logging.getLogger(refresh_materialized_views.__name__) - self.logger = logging.getLogger(refresh_materialized_views.__name__) + #self.logger = logging.getLogger(refresh_materialized_views.__name__) mv1_refresh = s.sql.text(""" REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repo_prs with data; @@ -65,7 +65,7 @@ def refresh_materialized_views(): with DatabaseSession(logger, engine) as session: session.execute_sql(mv1_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass @@ -73,56 +73,56 @@ def refresh_materialized_views(): with DatabaseSession(logger, engine) as session: session.execute_sql(mv1_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass try: with DatabaseSession(logger, engine) as session: session.execute_sql(mv2_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass try: with DatabaseSession(logger, engine) as session: session.execute_sql(mv3_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass try: with DatabaseSession(logger, engine) as session: session.execute_sql(mv4_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass try: with DatabaseSession(logger, engine) as session: session.execute_sql(mv5_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass try: with DatabaseSession(logger, engine) as session: session.execute_sql(mv6_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass try: with DatabaseSession(logger, engine) as session: session.execute_sql(mv7_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass try: with DatabaseSession(logger, engine) as session: session.execute_sql(mv8_refresh) except Exception as e: - self.logger.info(f"error is {e}") + logger.info(f"error is {e}") pass diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index a5ba6db7c4..3af6e39e08 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -333,7 +333,7 @@ def collect_pull_request_reviews(repo_git: str) -> None: pr_count = len(prs) - all_raw_pr_reviews = [] + all_pr_reviews = {} for index, pr in enumerate(prs): pr_number = pr.pr_src_number @@ -343,9 +343,9 @@ def collect_pull_request_reviews(repo_git: str) -> None: pr_review_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}/reviews" - pr_reviews = GithubPaginator(pr_review_url, manifest.key_auth, logger) - - for page_data, page in pr_reviews.iter_pages(): + pr_reviews = [] + pr_reviews_generator = GithubPaginator(pr_review_url, manifest.key_auth, logger) + for page_data, page in pr_reviews_generator.iter_pages(): if page_data is None: break @@ -353,30 +353,36 @@ def collect_pull_request_reviews(repo_git: str) -> None: if len(page_data) == 0: break - all_raw_pr_reviews.extend(page_data) + pr_reviews.extend(page_data) + + if pr_reviews: + all_pr_reviews[pull_request_id] = pr_reviews - if not all_raw_pr_reviews: + if not list(all_pr_reviews.keys()): logger.info(f"{owner}/{repo} No pr reviews for repo") return contributors = [] - for raw_pr_review in all_raw_pr_reviews: - contributor = process_pull_request_review_contributor(raw_pr_review, tool_source, tool_version, data_source) - if contributor: - contributors.append(contributor) + for pull_request_id in all_pr_reviews.keys(): + + reviews = all_pr_reviews[pull_request_id] + for review in reviews: + contributor = process_pull_request_review_contributor(review, tool_source, tool_version, data_source) + if contributor: + contributors.append(contributor) logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) pr_reviews = [] - for raw_pr_review in all_raw_pr_reviews: - - logger.info(f"Pr review type: {type(raw_pr_review)}") - logger.info(raw_pr_review) + for pull_request_id in all_pr_reviews.keys(): - if "cntrb_id" in raw_pr_review: - pr_reviews.append(extract_needed_pr_review_data(raw_pr_review, pull_request_id, repo_id, platform_id, tool_source, tool_version)) + reviews = all_pr_reviews[pull_request_id] + for review in reviews: + + if "cntrb_id" in review: + pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version)) logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") pr_review_natural_keys = ["pr_review_src_id",] @@ -395,7 +401,3 @@ def collect_pull_request_reviews(repo_git: str) -> None: - - - -