|
33 | 33 | from collections import defaultdict, deque
|
34 | 34 | from collections.abc import Callable, Iterable, Iterator
|
35 | 35 | from dataclasses import dataclass, field
|
36 |
| -from datetime import datetime, timedelta |
| 36 | +from datetime import datetime |
37 | 37 | from importlib import import_module
|
38 | 38 | from operator import attrgetter, itemgetter
|
39 | 39 | from pathlib import Path
|
40 | 40 | from typing import TYPE_CHECKING, Any, NamedTuple, cast
|
41 | 41 |
|
42 | 42 | import attrs
|
43 | 43 | import structlog
|
44 |
| -from sqlalchemy import select, update |
| 44 | +from sqlalchemy import select |
45 | 45 | from sqlalchemy.orm import load_only
|
46 | 46 | from tabulate import tabulate
|
47 | 47 | from uuid6 import uuid7
|
@@ -158,17 +158,9 @@ class DagFileProcessorManager(LoggingMixin):
|
158 | 158 |
|
159 | 159 | _parallelism: int = attrs.field(factory=_config_int_factory("dag_processor", "parsing_processes"))
|
160 | 160 |
|
161 |
| - parsing_cleanup_interval: float = attrs.field( |
162 |
| - factory=_config_int_factory("scheduler", "parsing_cleanup_interval") |
163 |
| - ) |
164 | 161 | _file_process_interval: float = attrs.field(
|
165 | 162 | factory=_config_int_factory("dag_processor", "min_file_process_interval")
|
166 | 163 | )
|
167 |
| - stale_dag_threshold: float = attrs.field( |
168 |
| - factory=_config_int_factory("dag_processor", "stale_dag_threshold") |
169 |
| - ) |
170 |
| - |
171 |
| - _last_deactivate_stale_dags_time: float = attrs.field(default=0, init=False) |
172 | 164 | print_stats_interval: float = attrs.field(
|
173 | 165 | factory=_config_int_factory("dag_processor", "print_stats_interval")
|
174 | 166 | )
|
@@ -251,59 +243,6 @@ def run(self):
|
251 | 243 |
|
252 | 244 | return self._run_parsing_loop()
|
253 | 245 |
|
254 |
| - def _scan_stale_dags(self): |
255 |
| - """Scan and deactivate DAGs which are no longer present in files.""" |
256 |
| - now = time.monotonic() |
257 |
| - elapsed_time_since_refresh = now - self._last_deactivate_stale_dags_time |
258 |
| - if elapsed_time_since_refresh > self.parsing_cleanup_interval: |
259 |
| - last_parsed = { |
260 |
| - file_info: stat.last_finish_time |
261 |
| - for file_info, stat in self._file_stats.items() |
262 |
| - if stat.last_finish_time |
263 |
| - } |
264 |
| - self.deactivate_stale_dags(last_parsed=last_parsed) |
265 |
| - self._last_deactivate_stale_dags_time = time.monotonic() |
266 |
| - |
267 |
| - @provide_session |
268 |
| - def deactivate_stale_dags( |
269 |
| - self, |
270 |
| - last_parsed: dict[DagFileInfo, datetime | None], |
271 |
| - session: Session = NEW_SESSION, |
272 |
| - ): |
273 |
| - """Detect and deactivate DAGs which are no longer present in files.""" |
274 |
| - to_deactivate = set() |
275 |
| - bundle_names = {b.name for b in self._dag_bundles} |
276 |
| - query = select( |
277 |
| - DagModel.dag_id, |
278 |
| - DagModel.bundle_name, |
279 |
| - DagModel.fileloc, |
280 |
| - DagModel.last_parsed_time, |
281 |
| - DagModel.relative_fileloc, |
282 |
| - ).where(DagModel.is_active, DagModel.bundle_name.in_(bundle_names)) |
283 |
| - dags_parsed = session.execute(query) |
284 |
| - |
285 |
| - for dag in dags_parsed: |
286 |
| - # The largest valid difference between a DagFileStat's last_finished_time and a DAG's |
287 |
| - # last_parsed_time is the processor_timeout. Longer than that indicates that the DAG is |
288 |
| - # no longer present in the file. We have a stale_dag_threshold configured to prevent a |
289 |
| - # significant delay in deactivation of stale dags when a large timeout is configured |
290 |
| - file_info = DagFileInfo(rel_path=Path(dag.relative_fileloc), bundle_name=dag.bundle_name) |
291 |
| - if last_finish_time := last_parsed.get(file_info, None): |
292 |
| - if dag.last_parsed_time + timedelta(seconds=self.stale_dag_threshold) < last_finish_time: |
293 |
| - self.log.info("DAG %s is missing and will be deactivated.", dag.dag_id) |
294 |
| - to_deactivate.add(dag.dag_id) |
295 |
| - |
296 |
| - if to_deactivate: |
297 |
| - deactivated_dagmodel = session.execute( |
298 |
| - update(DagModel) |
299 |
| - .where(DagModel.dag_id.in_(to_deactivate)) |
300 |
| - .values(is_active=False) |
301 |
| - .execution_options(synchronize_session="fetch") |
302 |
| - ) |
303 |
| - deactivated = deactivated_dagmodel.rowcount |
304 |
| - if deactivated: |
305 |
| - self.log.info("Deactivated %i DAGs which are no longer present in file.", deactivated) |
306 |
| - |
307 | 246 | def _run_parsing_loop(self):
|
308 | 247 | # initialize cache to mutualize calls to Variable.get in DAGs
|
309 | 248 | # needs to be done before this process is forked to create the DAG parsing processes.
|
@@ -342,7 +281,6 @@ def _run_parsing_loop(self):
|
342 | 281 |
|
343 | 282 | for callback in self._fetch_callbacks():
|
344 | 283 | self._add_callback_to_queue(callback)
|
345 |
| - self._scan_stale_dags() |
346 | 284 | DagWarning.purge_inactive_dag_warnings()
|
347 | 285 |
|
348 | 286 | # Update number of loop iteration.
|
|
0 commit comments