Skip to content

Commit

Permalink
Merge pull request #1483 from mitodl/edxorg_asset_consolidation
Browse files Browse the repository at this point in the history
refactor: Consolidate edxorg assets into one code location
  • Loading branch information
blarghmatey authored Feb 21, 2025
2 parents 5d95ed0 + 2bcb7d5 commit afc75ec
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 70 deletions.
64 changes: 0 additions & 64 deletions src/ol_orchestrate/definitions/edx/edxorg_api_data_extract.py

This file was deleted.

24 changes: 19 additions & 5 deletions src/ol_orchestrate/definitions/edx/retrieve_edxorg_raw_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,15 @@
from dagster import (
AssetSelection,
Definitions,
ScheduleDefinition,
define_asset_job,
)
from dagster_aws.s3 import S3Resource

from ol_orchestrate.assets.edxorg_api import (
edxorg_mitx_course_metadata,
edxorg_program_metadata,
)
from ol_orchestrate.assets.edxorg_archive import (
dummy_edxorg_course_structure,
edxorg_archive_partitions,
Expand All @@ -84,15 +89,16 @@
extract_edxorg_courserun_metadata,
)
from ol_orchestrate.io_managers.filepath import (
FileObjectIOManager,
S3FileObjectIOManager,
)
from ol_orchestrate.io_managers.gcs import GCSFileIOManager
from ol_orchestrate.jobs.retrieve_edx_exports import (
retrieve_edx_course_exports,
)
from ol_orchestrate.lib.constants import DAGSTER_ENV, VAULT_ADDRESS
from ol_orchestrate.lib.dagster_helpers import default_io_manager
from ol_orchestrate.resources.gcp_gcs import GCSConnection
from ol_orchestrate.resources.openedx import OpenEdxApiClientFactory
from ol_orchestrate.resources.outputs import DailyResultsDir
from ol_orchestrate.resources.secrets.vault import Vault

Expand Down Expand Up @@ -177,21 +183,26 @@ def edxorg_tracking_logs_config(dagster_env):
"logs": r"COLD/mitx-edx-events-\d{4}-\d{2}-\d{2}.log.gz$",
}

edxorg_api_daily_schedule = ScheduleDefinition(
name="edxorg_api_schedule",
target=AssetSelection.assets(edxorg_program_metadata, edxorg_mitx_course_metadata),
cron_schedule="@daily",
execution_timezone="Etc/UTC",
)

retrieve_edx_exports = Definitions(
resources={
"gcp_gcs": gcs_connection,
"s3": S3Resource(),
"exports_dir": DailyResultsDir.configure_at_launch(),
"io_manager": FileObjectIOManager(
vault=Vault(**vault_config),
vault_gcs_token_path="secret-data/pipelines/edx/org/gcp-oauth-client", # noqa: S106
),
"io_manager": default_io_manager(DAGSTER_ENV),
"s3file_io_manager": S3FileObjectIOManager(
bucket=s3_uploads_bucket(DAGSTER_ENV)["bucket"],
path_prefix=s3_uploads_bucket(DAGSTER_ENV)["prefix"],
),
"gcs_input": GCSFileIOManager(gcs=gcs_connection),
"vault": vault,
"edxorg_api": OpenEdxApiClientFactory(deployment="edxorg", vault=vault),
},
sensors=[
gcs_edxorg_archive_sensor.with_updated_job(edxorg_course_data_job),
Expand All @@ -206,5 +217,8 @@ def edxorg_tracking_logs_config(dagster_env):
flatten_edxorg_course_structure,
extract_edxorg_courserun_metadata,
dummy_edxorg_course_xml,
edxorg_program_metadata,
edxorg_mitx_course_metadata,
],
schedules=[edxorg_api_daily_schedule],
)
1 change: 0 additions & 1 deletion src/ol_orchestrate/workspace.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ load_from:
- python_module: ol_orchestrate.definitions.edx.openedx_data_extract
- python_module: ol_orchestrate.definitions.edx.retrieve_edxorg_raw_data
- python_module: ol_orchestrate.definitions.edx.sync_program_credential_reports
- python_module: ol_orchestrate.definitions.edx.edxorg_api_data_extract
- python_module: ol_orchestrate.definitions.lakehouse.elt
- python_module: ol_orchestrate.definitions.platform.notification
- python_module: ol_orchestrate.repositories.edx_gcs_courses
Expand Down

0 comments on commit afc75ec

Please sign in to comment.