From eeff40d68527cb6fd031a42b1ddcd43fe838283a Mon Sep 17 00:00:00 2001 From: V Date: Fri, 17 Oct 2025 14:35:43 -0700 Subject: [PATCH 1/2] fix(precommit): update version --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index afe6d0e13b..2f61c741aa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v6.0.0 hooks: - id: trailing-whitespace exclude_types: ['markdown'] @@ -21,7 +21,7 @@ repos: # Suppress SyntaxWarning about invalid escape sequence from calitp-data-infra dependency without modifying source entry: env PYTHONWARNINGS="ignore::SyntaxWarning" flake8 - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 25.9.0 hooks: - id: black args: ["--config=./pyproject.toml"] From 973e2cf05ea55470f814c7b7990256051b0335e1 Mon Sep 17 00:00:00 2001 From: V Date: Wed, 22 Oct 2025 10:41:22 -0700 Subject: [PATCH 2/2] Implements most black upgrade format fixes --- .../generate_gtfs_download_configs.py | 28 +++++++++++-------- airflow/dags/dags.py | 8 ++++-- airflow/plugins/operators/external_table.py | 1 + airflow/plugins/scripts/gtfs_rt_parser.py | 13 +++++---- .../calitp_data_analysis/geography_utils.py | 1 + .../calitp_data_analysis/styleguide.py | 1 + .../calitp_data_analysis/utils.py | 1 + .../calitp_data_infra/storage.py | 6 ++-- warehouse/scripts/dbt_artifacts/__init__.py | 7 +++-- warehouse/scripts/dbt_artifacts/manifest.py | 6 ++-- warehouse/scripts/publish.py | 7 +++-- warehouse/scripts/visualize.py | 1 + warehouse/seeds/transit_facilities_to_csv.py | 1 + 13 files changed, 48 insertions(+), 33 deletions(-) diff --git a/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py b/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py index 071f7286e6..64580eb626 100644 --- a/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py +++ b/airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py @@ -49,18 +49,22 @@ def gtfs_datasets_to_extract_configs( name=record.name, url=record.pipeline_url, feed_type=record.data, - auth_query_params={ - record.authorization_url_parameter_name: record.url_secret_key_name - } - if record.authorization_url_parameter_name - and record.url_secret_key_name - else {}, - auth_headers={ - record.authorization_header_parameter_name: record.header_secret_key_name - } - if record.authorization_header_parameter_name - and record.header_secret_key_name - else {}, + auth_query_params=( + { + record.authorization_url_parameter_name: record.url_secret_key_name + } + if record.authorization_url_parameter_name + and record.url_secret_key_name + else {} + ), + auth_headers=( + { + record.authorization_header_parameter_name: record.header_secret_key_name + } + if record.authorization_header_parameter_name + and record.header_secret_key_name + else {} + ), ), ) except ValidationError as e: diff --git a/airflow/dags/dags.py b/airflow/dags/dags.py index 8ffcd9f04c..a83a5021d9 100644 --- a/airflow/dags/dags.py +++ b/airflow/dags/dags.py @@ -55,9 +55,11 @@ def log_failure_to_slack(context): wait_for_defaults={"retries": 24, "check_existence": True, "timeout": 10 * 60}, latest_only=False, user_defined_macros={ - "image_tag": lambda: "development" - if os.environ["AIRFLOW_ENV"] == "development" - else "latest", + "image_tag": lambda: ( + "development" + if os.environ["AIRFLOW_ENV"] == "development" + else "latest" + ), "env_var": os.environ.get, }, default_args={ diff --git a/airflow/plugins/operators/external_table.py b/airflow/plugins/operators/external_table.py index 0537180422..449f155225 100644 --- a/airflow/plugins/operators/external_table.py +++ b/airflow/plugins/operators/external_table.py @@ -7,6 +7,7 @@ However, it's cumbersome to convert the http api style schema fields to SQL, so we provide a fallback for these old-style tasks. """ + from google.api_core.exceptions import NotFound from google.cloud import bigquery from utils import CALITP_BQ_LOCATION, CALITP_PROJECT_NAME diff --git a/airflow/plugins/scripts/gtfs_rt_parser.py b/airflow/plugins/scripts/gtfs_rt_parser.py index 312b628ad9..4f27c354f7 100644 --- a/airflow/plugins/scripts/gtfs_rt_parser.py +++ b/airflow/plugins/scripts/gtfs_rt_parser.py @@ -1,6 +1,7 @@ """ Parses binary RT feeds and writes them back to GCS as gzipped newline-delimited JSON """ + import base64 import copy import datetime @@ -414,9 +415,9 @@ def where_base64url(self, base64_url: Optional[str]): def get_aggregates( self, ) -> List[RTHourlyAggregation]: - aggregates: Dict[ - Tuple[pendulum.DateTime, str], List[GTFSRTFeedExtract] - ] = defaultdict(list) + aggregates: Dict[Tuple[pendulum.DateTime, str], List[GTFSRTFeedExtract]] = ( + defaultdict(list) + ) for file in self.files: if self.base64_url is None or file.base64_url == self.base64_url: @@ -892,9 +893,9 @@ def parse_and_validate( def make_dict_bq_safe(d: Dict[str, Any]) -> Dict[str, Any]: return { - make_name_bq_safe(key): make_dict_bq_safe(value) - if isinstance(value, dict) - else value + make_name_bq_safe(key): ( + make_dict_bq_safe(value) if isinstance(value, dict) else value + ) for key, value in d.items() } diff --git a/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py b/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py index c8bb646567..9e4cb3c192 100644 --- a/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py +++ b/packages/calitp-data-analysis/calitp_data_analysis/geography_utils.py @@ -2,6 +2,7 @@ Utility functions for geospatial data. Some functions for dealing with census tract or other geographic unit dfs. """ + from typing import Literal, Union, cast import dask.dataframe as dd diff --git a/packages/calitp-data-analysis/calitp_data_analysis/styleguide.py b/packages/calitp-data-analysis/calitp_data_analysis/styleguide.py index 66f59d1d21..cd618872c1 100644 --- a/packages/calitp-data-analysis/calitp_data_analysis/styleguide.py +++ b/packages/calitp-data-analysis/calitp_data_analysis/styleguide.py @@ -24,6 +24,7 @@ https://github.com/CityOfLosAngeles/los-angeles-citywide-data-style """ + import altair as alt # type: ignore from calitp_data_analysis import calitp_color_palette as cp diff --git a/packages/calitp-data-analysis/calitp_data_analysis/utils.py b/packages/calitp-data-analysis/calitp_data_analysis/utils.py index d09539c3ca..0369cf53b7 100644 --- a/packages/calitp-data-analysis/calitp_data_analysis/utils.py +++ b/packages/calitp-data-analysis/calitp_data_analysis/utils.py @@ -1,6 +1,7 @@ """ General utility functions. """ + import base64 import os import shutil diff --git a/packages/calitp-data-infra/calitp_data_infra/storage.py b/packages/calitp-data-infra/calitp_data_infra/storage.py index faf9945c1b..87deac05b9 100644 --- a/packages/calitp-data-infra/calitp_data_infra/storage.py +++ b/packages/calitp-data-infra/calitp_data_infra/storage.py @@ -643,9 +643,9 @@ def build_request(self, auth_dict: Mapping[str, str]) -> Request: headers = {k: auth_dict[v] for k, v in self.auth_headers.items()} # some web servers require user agents or they will throw a 4XX error - headers[ - "User-Agent" - ] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0" + headers["User-Agent"] = ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0" + ) # inspired by: https://stackoverflow.com/questions/18869074/create-url-without-request-execution return Request( diff --git a/warehouse/scripts/dbt_artifacts/__init__.py b/warehouse/scripts/dbt_artifacts/__init__.py index c8cacc4105..2eae6a0495 100644 --- a/warehouse/scripts/dbt_artifacts/__init__.py +++ b/warehouse/scripts/dbt_artifacts/__init__.py @@ -1,6 +1,7 @@ """ Built off the starting point of https://guitton.co/posts/dbt-artifacts """ + import abc import os from enum import Enum @@ -64,9 +65,9 @@ def num_bytes(self) -> Optional[int]: CatalogTable.num_bytes = property(num_bytes) # type: ignore[attr-defined] DependsOn.resolved_nodes = property( # type: ignore[attr-defined] - lambda self: [NodeModelMixin._instances[node] for node in self.nodes] - if self.nodes - else [] + lambda self: ( + [NodeModelMixin._instances[node] for node in self.nodes] if self.nodes else [] + ) ) ColumnInfo.publish = property(lambda self: self.meta.get("publish.include", False)) # type: ignore[attr-defined] diff --git a/warehouse/scripts/dbt_artifacts/manifest.py b/warehouse/scripts/dbt_artifacts/manifest.py index 4b3a2f7959..b90ecfbaf8 100644 --- a/warehouse/scripts/dbt_artifacts/manifest.py +++ b/warehouse/scripts/dbt_artifacts/manifest.py @@ -24,9 +24,9 @@ class ManifestMetadata(BaseModel): class Config: extra = Extra.allow - dbt_schema_version: Optional[ - str - ] = "https://schemas.getdbt.com/dbt/manifest/v9.json" + dbt_schema_version: Optional[str] = ( + "https://schemas.getdbt.com/dbt/manifest/v9.json" + ) dbt_version: Optional[str] = "1.6.0a1" generated_at: Optional[datetime] = "2023-04-21T11:09:06.496436Z" invocation_id: Optional[str] = "c4b245be-8edb-4ad7-ba54-9337ce594f5d" diff --git a/warehouse/scripts/publish.py b/warehouse/scripts/publish.py index f212fbb601..eaeff9f605 100755 --- a/warehouse/scripts/publish.py +++ b/warehouse/scripts/publish.py @@ -3,6 +3,7 @@ TODO: consider using https://github.com/ckan/ckanapi? """ + import csv import enum import functools @@ -528,9 +529,9 @@ def _publish_exposure( ["geometry_to_publish"] + destination.metadata_columns ] gdf.to_file(geojsonl_fpath, driver="GeoJSONSeq") - layer_geojson_paths[ - strip_modelname(node.name).title() - ] = geojsonl_fpath + layer_geojson_paths[strip_modelname(node.name).title()] = ( + geojsonl_fpath + ) hive_path = destination.hive_path( exposure=exposure, model=strip_modelname(node.name), diff --git a/warehouse/scripts/visualize.py b/warehouse/scripts/visualize.py index 44184a93a0..aa726cf802 100755 --- a/warehouse/scripts/visualize.py +++ b/warehouse/scripts/visualize.py @@ -1,6 +1,7 @@ """ Provide more visualizations than what dbt provides. """ + import json import os import webbrowser diff --git a/warehouse/seeds/transit_facilities_to_csv.py b/warehouse/seeds/transit_facilities_to_csv.py index 209fbdd8b1..aa1c0e645d 100644 --- a/warehouse/seeds/transit_facilities_to_csv.py +++ b/warehouse/seeds/transit_facilities_to_csv.py @@ -1,6 +1,7 @@ """ This script reads a GeoJSON file containing transit facility data, processes it to standardize column names and formats, and then exports the relevant data to a CSV file for import as a seed.""" + import json import geopandas as gpd