Skip to content

Commit

Permalink
Merge pull request #2490 from chaoss/dev-index-matview-patch-spg-1
Browse files Browse the repository at this point in the history
Dev index matview patch spg 1
  • Loading branch information
sgoggins authored Aug 25, 2023
2 parents 03e5cf8 + 826c63c commit b995da4
Show file tree
Hide file tree
Showing 15 changed files with 868 additions and 27 deletions.
1 change: 1 addition & 0 deletions add.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dfadffd
6 changes: 3 additions & 3 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,21 +170,21 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45
core_num_processes = determine_worker_processes(.6, 45)
core_num_processes = determine_worker_processes(.6, 80)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
sleep_time += 6

#20% of estimate, Maximum value of 25
secondary_num_processes = determine_worker_processes(.2, 25)
secondary_num_processes = determine_worker_processes(.2, 26)
logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}")
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
process_list.append(subprocess.Popen(secondary_worker.split(" ")))
sleep_time += 6

#15% of estimate, Maximum value of 20
facade_num_processes = determine_worker_processes(.2, 20)
facade_num_processes = determine_worker_processes(.2, 40)
logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}")
facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade"

Expand Down
188 changes: 188 additions & 0 deletions augur/application/schema/alembic/versions/22_mat_view_cntrbid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Fix Keys and materialized view
Revision ID: 22
Revises: 21
Create Date: 2023-08-23 18:17:22.651191
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy import text

# revision identifiers, used by Alembic.
revision = '22'
down_revision = '21'
branch_labels = None
depends_on = None


def upgrade():

add_fix_keys_22()

def downgrade():

upgrade=False

add_fix_keys_22(upgrade)

def add_fix_keys_22(upgrade=True):

if upgrade:

conn = op.get_bind()
conn.execute(text("""
alter TABLE
augur_data.commits DROP CONSTRAINT if exists fk_commits_contributors_3,
DROP CONSTRAINT if exists fk_commits_contributors_4;
alter TABLE augur_data.contributors
DROP CONSTRAINT if exists "GH-UNIQUE-C",
DROP CONSTRAINT if exists
"GL-cntrb-LOGIN-UNIQUE";"""))

conn = op.get_bind()
conn.execute(text("""
drop materialized view if exists augur_data.explorer_contributor_actions; """))

conn = op.get_bind()
conn.execute(text("""
create materialized view augur_data.explorer_contributor_actions as
SELECT
A.ID AS cntrb_id,
A.created_at,
A.repo_id,
A.ACTION,
repo.repo_name,
A.LOGIN,
DENSE_RANK() OVER(PARTITION BY A.ID, A.repo_id ORDER BY A.created_at) AS RANK
FROM (
select
commits.cmt_ght_author_id AS ID,
commits.cmt_author_timestamp AS created_at,
commits.repo_id,
'commit' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
( augur_data.commits LEFT JOIN augur_data.contributors ON ( ( ( contributors.cntrb_id ) :: TEXT = ( commits.cmt_ght_author_id ) :: TEXT ) ) )
GROUP BY
commits.cmt_commit_hash,
commits.cmt_ght_author_id,
commits.repo_id,
commits.cmt_author_timestamp,
'commit' :: TEXT,
contributors.cntrb_login
UNION all
SELECT
issues.reporter_id AS ID,
issues.created_at,
issues.repo_id,
'issue_opened' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
( augur_data.issues LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = issues.reporter_id ) ) )
WHERE
( issues.pull_request IS NULL )
UNION ALL
SELECT
pull_request_events.cntrb_id AS ID,
pull_request_events.created_at,
pull_requests.repo_id,
'pull_request_closed' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) )
WHERE
pull_requests.pull_request_id = pull_request_events.pull_request_id
AND pull_requests.pr_merged_at IS NULL
AND ( ( pull_request_events.ACTION ) :: TEXT = 'closed' :: TEXT )
UNION ALL
SELECT
pull_request_events.cntrb_id AS ID,
pull_request_events.created_at,
pull_requests.repo_id,
'pull_request_merged' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) )
WHERE
pull_requests.pull_request_id = pull_request_events.pull_request_id
AND ( ( pull_request_events.ACTION ) :: TEXT = 'merged' :: TEXT )
UNION ALL
SELECT
issue_events.cntrb_id AS ID,
issue_events.created_at,
issues.repo_id,
'issue_closed' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.issues,
augur_data.issue_events
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = issue_events.cntrb_id
WHERE
issues.issue_id = issue_events.issue_id
AND issues.pull_request IS NULL
AND ( ( issue_events.ACTION ) :: TEXT = 'closed' :: TEXT )
UNION ALL
SELECT
pull_request_reviews.cntrb_id AS ID,
pull_request_reviews.pr_review_submitted_at AS created_at,
pull_requests.repo_id,
( 'pull_request_review_' :: TEXT || ( pull_request_reviews.pr_review_state ) :: TEXT ) AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
augur_data.pull_request_reviews
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = pull_request_reviews.cntrb_id
WHERE
pull_requests.pull_request_id = pull_request_reviews.pull_request_id
UNION ALL
SELECT
pull_requests.pr_augur_contributor_id AS ID,
pull_requests.pr_created_at AS created_at,
pull_requests.repo_id,
'pull_request_open' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests
LEFT JOIN augur_data.contributors ON pull_requests.pr_augur_contributor_id = contributors.cntrb_id
UNION ALL
SELECT
message.cntrb_id AS ID,
message.msg_timestamp AS created_at,
pull_requests.repo_id,
'pull_request_comment' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
augur_data.pull_request_message_ref,
augur_data.message
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id
WHERE
pull_request_message_ref.pull_request_id = pull_requests.pull_request_id
AND pull_request_message_ref.msg_id = message.msg_id
UNION ALL
SELECT
issues.reporter_id AS ID,
message.msg_timestamp AS created_at,
issues.repo_id,
'issue_comment' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.issues,
augur_data.issue_message_ref,
augur_data.message
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id
WHERE
issue_message_ref.msg_id = message.msg_id
AND issues.issue_id = issue_message_ref.issue_id
AND issues.closed_at != message.msg_timestamp
) A,
augur_data.repo
WHERE
A.repo_id = repo.repo_id
ORDER BY
A.created_at DESC"""))

45 changes: 45 additions & 0 deletions augur/application/schema/alembic/versions/23_add_index_ghlogin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""add index
Revision ID: 23
Revises: 22
Create Date: 2023-08-23 18:17:22.651191
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy import text

# revision identifiers, used by Alembic.
revision = '23'
down_revision = '22'
branch_labels = None
depends_on = None


def upgrade():

gh_loginindex()

def downgrade():

upgrade=False

gh_loginindex(upgrade)

def gh_loginindex(upgrade=True):

if upgrade:

conn = op.get_bind()
conn.execute(text("""
CREATE INDEX if not exists "gh_login" ON "augur_data"."contributors" USING btree (
"gh_login" ASC NULLS FIRST);"""))

else:


conn = op.get_bind()
conn.execute(text("""
DROP INDEX if exists "gh_login" ON "augur_data"."contributors" USING btree (
"gh_login" ASC NULLS FIRST);"""))
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Alter repo labor unique
Revision ID: 24
Revises: 23
Create Date: 2023-08-25 18:17:22.651191
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re

# revision identifiers, used by Alembic.
revision = '24'
down_revision = '23'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###

conn = op.get_bind()

#Remove constraint being initially deferred.
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name");
"""))
"""
"""
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()

#Make unique initially deferred
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name") DEFERRABLE INITIALLY DEFERRED;
"""))

# ### end Alembic commands ###
29 changes: 8 additions & 21 deletions augur/tasks/git/dependency_tasks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.application.db.util import execute_session_query
from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc
from augur.tasks.util.worker_util import parse_json_from_subprocess_call

def generate_deps_data(session, repo_id, path):
"""Runs scc on repo and stores data in database
"""Run dependency logic on repo and stores data in database
:param repo_id: Repository ID
:param path: Absolute path of the Repostiory
"""
Expand Down Expand Up @@ -46,22 +47,16 @@ def generate_deps_data(session, repo_id, path):

session.logger.info(f"Inserted {len(deps)} dependencies for repo {repo_id}")

"""
def deps_model(session, repo_id,repo_git,repo_path,repo_name):
# Data collection and storage method
def deps_model(session, repo_id,repo_git,repo_group_id):
""" Data collection and storage method
"""
session.logger.info(f"This is the deps model repo: {repo_git}.")
#result = session.execute_sql(repo_path_sql)
result = re.search(r"https:\/\/(github\.com\/[A-Za-z0-9 \- _]+\/)([A-Za-z0-9 \- _ .]+)$", repo_git).groups()

relative_repo_path = f"{repo_group_id}/{result[0]}{result[1]}"
config = AugurConfig(session.logger, session)
absolute_repo_path = config.get_section("Facade")['repo_directory'] + relative_repo_path

generate_deps_data(session,repo_id, absolute_repo_path)
"""

def generate_scorecard(session,repo_id,path):
"""Runs scorecard on repo and stores data in database
Expand All @@ -86,16 +81,8 @@ def generate_scorecard(session,repo_id,path):
key_handler = GithubApiKeyHandler(session)
os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key()

p= subprocess.run(['./scorecard', command, '--format=json'], cwd= path_to_scorecard ,capture_output=True, text=True, timeout=None)
session.logger.info('subprocess completed successfully... ')
output = p.stdout

try:
required_output = json.loads(output)
except json.decoder.JSONDecodeError as e:
session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
return

required_output = parse_json_from_subprocess_call(session.logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard)

session.logger.info('adding to database...')
session.logger.debug(f"output: {required_output}")

Expand Down
11 changes: 10 additions & 1 deletion augur/tasks/git/dependency_tasks/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask, AugurCoreRepoCollectionTask
from augur.application.db.util import execute_session_query
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path
from augur.application.config import AugurConfig


@celery.task(base=AugurFacadeRepoCollectionTask)
Expand All @@ -21,7 +23,14 @@ def process_dependency_metrics(repo_git):


repo = execute_session_query(query,'one')
deps_model(session, repo.repo_id,repo_git,repo.repo_group_id)

config = AugurConfig(session.logger, session)

absolute_repo_path = get_absolute_repo_path(config.get_section("Facade")['repo_directory'],repo.repo_id,repo.repo_path,repo.repo_name)

session.logger.debug(f"This is the deps model repo: {repo_git}.")

generate_deps_data(session,repo.repo_id,absolute_repo_path)


@celery.task(base=AugurCoreRepoCollectionTask)
Expand Down
Loading

0 comments on commit b995da4

Please sign in to comment.