Skip to content

Commit a729400

Browse files
authored
Remove extractor specific config from the base config (#492)
* Remove extractor specific config from the base config * Removed obsolete test case
1 parent b5a160f commit a729400

5 files changed

Lines changed: 36 additions & 70 deletions

File tree

cognite/extractorutils/unstable/configuration/models.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"MetricsConfig",
5252
"ScheduleConfig",
5353
"TimeIntervalConfig",
54+
"WithDataSetId",
5455
]
5556

5657

@@ -619,6 +620,32 @@ def _log_handler_default() -> list[LogHandlerConfig]:
619620
return [LogConsoleHandlerConfig(type="console", level=LogLevel.INFO)]
620621

621622

623+
class WithDataSetId(ConfigModel):
624+
"""
625+
Base class for configuration models that include a data set reference.
626+
"""
627+
628+
data_set: EitherIdConfig | None = None
629+
630+
def get_data_set(self, cdf_client: CogniteClient) -> DataSet | None:
631+
"""
632+
Retrieves the DataSet object based on the configuration.
633+
634+
Args:
635+
cdf_client: An instance of CogniteClient to use for retrieving the DataSet.
636+
637+
Returns:
638+
DataSet object if data_set is provided; otherwise None.
639+
"""
640+
if not self.data_set:
641+
return None
642+
643+
return cdf_client.data_sets.retrieve(
644+
id=self.data_set.either_id.internal_id,
645+
external_id=self.data_set.either_id.external_id,
646+
)
647+
648+
622649
class FileSizeConfig:
623650
"""
624651
Configuration parameter for setting a file size.
@@ -868,31 +895,6 @@ class ExtractorConfig(ConfigModel):
868895
state_store: StateStoreConfig | None = None
869896
metrics: MetricsConfig | None = None
870897
log_handlers: list[LogHandlerConfig] = Field(default_factory=_log_handler_default)
871-
retry_startup: bool = True
872-
upload_queue_size: int = 50_000
873-
data_set: EitherIdConfig | None = None
874-
data_set_external_id: str | None = None
875-
876-
def get_data_set(self, cdf_client: CogniteClient) -> DataSet | None:
877-
"""
878-
Retrieves the DataSet object based on the configuration.
879-
880-
Args:
881-
cdf_client: An instance of CogniteClient to use for retrieving the DataSet.
882-
883-
Returns:
884-
DataSet object if data_set, data_set_id, or data_set_external_id is provided; otherwise None.
885-
"""
886-
if self.data_set_external_id:
887-
return cdf_client.data_sets.retrieve(external_id=self.data_set_external_id)
888-
889-
if not self.data_set:
890-
return None
891-
892-
return cdf_client.data_sets.retrieve(
893-
id=self.data_set.either_id.internal_id,
894-
external_id=self.data_set.either_id.external_id,
895-
)
896898

897899

898900
ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)

cognite/extractorutils/unstable/core/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ class Extractor(Generic[ConfigType], CogniteLogger):
142142

143143
CONFIG_TYPE: type[ConfigType]
144144

145+
RETRY_STARTUP: bool = True
145146
RESTART_POLICY: RestartPolicy = WHEN_CONTINUOUS_TASKS_CRASHES
146147
USE_DEFAULT_STATE_STORE: bool = True
147148
_statestore_singleton: AbstractStateStore | None = None

cognite/extractorutils/unstable/core/runtime.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@ def _extractor_process_entrypoint(
8585
checkin_worker.active_revision = config.current_config_revision
8686
checkin_worker.set_on_fatal_error_handler(lambda _: on_fatal_error(controls))
8787
checkin_worker.set_on_revision_change_handler(lambda _: on_revision_changed(controls))
88-
if config.application_config.retry_startup:
89-
checkin_worker.set_retry_startup(config.application_config.retry_startup)
88+
checkin_worker.set_retry_startup(extractor_class.RETRY_STARTUP)
9089
if not metrics:
9190
metrics = BaseMetrics(extractor_name=extractor_class.NAME, extractor_version=extractor_class.VERSION)
9291
extractor = extractor_class._init_from_runtime(config, checkin_worker, metrics)

tests/test_unstable/test_base.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from cognite.extractorutils.unstable.configuration.loaders import ConfigFormat, load_io
1616
from cognite.extractorutils.unstable.configuration.models import (
1717
ConnectionConfig,
18-
ExtractorConfig,
1918
LocalStateStoreConfig,
2019
LogConsoleHandlerConfig,
2120
LogFileHandlerConfig,
@@ -345,16 +344,3 @@ def test_pushgatewayconfig_none_credentials_from_yaml() -> None:
345344
assert pusher.password is None
346345
assert pusher.url == "http://localhost:9091"
347346
assert pusher.job_name == "test-job"
348-
349-
350-
def test_extractor_config_upload_queue_size_with_yaml() -> None:
351-
"""Test upload_queue_size parsing from YAML configuration."""
352-
config_yaml = """
353-
upload-queue-size: 200000
354-
retry-startup: false
355-
"""
356-
stream = StringIO(config_yaml)
357-
config = load_io(stream, ConfigFormat.YAML, ExtractorConfig)
358-
359-
assert config.upload_queue_size == 200_000
360-
assert config.retry_startup is False

tests/test_unstable/test_configuration.py

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
ConfigModel,
1414
ConnectionConfig,
1515
EitherIdConfig,
16-
ExtractorConfig,
1716
FileSizeConfig,
1817
LogLevel,
1918
TimeIntervalConfig,
19+
WithDataSetId,
2020
_ClientCredentialsConfig,
2121
)
2222

@@ -310,61 +310,39 @@ def test_setting_log_level_from_any_case() -> None:
310310

311311

312312
@pytest.mark.parametrize(
313-
"data_set_external_id,data_set_config,expected_call,expected_result_attrs,should_return_none",
313+
"data_set_config,expected_call,expected_result_attrs,should_return_none",
314314
[
315-
# Test with data_set_external_id provided
316-
(
317-
"test-dataset",
318-
None,
319-
{"external_id": "test-dataset"},
320-
{"external_id": "test-dataset", "name": "Test Dataset"},
321-
False,
322-
),
323315
# Test with data_set config using internal ID
324316
(
325-
None,
326317
EitherIdConfig(id=12345),
327318
{"id": 12345, "external_id": None},
328319
{"id": 12345, "name": "Test Dataset"},
329320
False,
330321
),
331322
# Test with data_set config using external ID
332323
(
333-
None,
334324
EitherIdConfig(external_id="config-dataset"),
335325
{"id": None, "external_id": "config-dataset"},
336326
{"external_id": "config-dataset", "name": "Config Dataset"},
337327
False,
338328
),
339-
# Test that data_set_external_id takes priority over data_set
329+
# Test with data_set not provided
340330
(
341-
"priority-dataset",
342-
EitherIdConfig(external_id="should-be-ignored"),
343-
{"external_id": "priority-dataset"},
344-
{"external_id": "priority-dataset", "name": "Priority Dataset"},
345-
False,
346-
),
347-
# Test with neither data_set_external_id nor data_set provided
348-
(
349-
None,
350331
None,
351332
{},
352333
{},
353334
True,
354335
),
355336
],
356337
)
357-
def test_get_data_set_various_configurations(
358-
data_set_external_id: str | None,
338+
def test_with_data_set_id_various_configurations(
359339
data_set_config: EitherIdConfig | None,
360340
expected_call: dict | None,
361341
expected_result_attrs: dict | None,
362342
should_return_none: bool,
363343
) -> None:
364-
"""Test get_data_set method with various configuration scenarios."""
365-
extractor_config = ExtractorConfig(
366-
retry_startup=False,
367-
data_set_external_id=data_set_external_id,
344+
"""Test WithDataSetId.get_data_set method with various configuration scenarios."""
345+
with_data_set_config = WithDataSetId(
368346
data_set=data_set_config,
369347
)
370348

@@ -375,7 +353,7 @@ def test_get_data_set_various_configurations(
375353
mock_dataset = DataSet(**expected_result_attrs)
376354
mock_client.data_sets.retrieve.return_value = mock_dataset
377355

378-
result = extractor_config.get_data_set(mock_client)
356+
result = with_data_set_config.get_data_set(mock_client)
379357

380358
if should_return_none:
381359
assert result is None

0 commit comments

Comments
 (0)