Skip to content

Commit

Permalink
Add ee cleanup tests
Browse files Browse the repository at this point in the history
* Adds cleanup tests to the live test.
  • Loading branch information
chrismeyersfsu committed Jan 24, 2025
1 parent 534c312 commit 7e0321e
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 44 deletions.
10 changes: 6 additions & 4 deletions awx/main/tasks/receptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,22 +228,24 @@ class RemoteJobError(RuntimeError):
pass


def run_until_complete(node, timing_data=None, **kwargs):
def run_until_complete(node, timing_data=None, worktype='ansible-runner', ttl='20s', **kwargs):
"""
Runs an ansible-runner work_type on remote node, waits until it completes, then returns stdout.
"""

config_data = read_receptor_config()
receptor_ctl = get_receptor_ctl(config_data)

use_stream_tls = getattr(get_conn_type(node, receptor_ctl), 'name', None) == "STREAMTLS"
kwargs.setdefault('tlsclient', get_tls_client(config_data, use_stream_tls))
kwargs.setdefault('ttl', '20s')
if ttl is not None:
kwargs['ttl'] = ttl

Check warning on line 242 in awx/main/tasks/receptor.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/receptor.py#L242

Added line #L242 was not covered by tests
kwargs.setdefault('payload', '')
if work_signing_enabled(config_data):
kwargs['signwork'] = True

transmit_start = time.time()
result = receptor_ctl.submit_work(worktype='ansible-runner', node=node, **kwargs)
result = receptor_ctl.submit_work(worktype=worktype, node=node, **kwargs)

Check warning on line 248 in awx/main/tasks/receptor.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/receptor.py#L248

Added line #L248 was not covered by tests

unit_id = result['unitid']
run_start = time.time()
Expand Down Expand Up @@ -371,7 +373,7 @@ def _convert_args_to_cli(vargs):
return args


def worker_cleanup(node_name, vargs, timeout=300.0):
def worker_cleanup(node_name, vargs):
args = _convert_args_to_cli(vargs)

remote_command = ' '.join(args)
Expand Down
87 changes: 54 additions & 33 deletions awx/main/tasks/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from django.utils.translation import gettext_noop
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
from django.db.models.query import QuerySet

# Django-CRUM
from crum import impersonate
Expand Down Expand Up @@ -379,48 +380,68 @@ def purge_old_stdout_files():
logger.debug("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT, f)))


def _cleanup_images_and_files(**kwargs):
if settings.IS_K8S:
return
this_inst = Instance.objects.me()
runner_cleanup_kwargs = this_inst.get_cleanup_task_kwargs(**kwargs)
if runner_cleanup_kwargs:
stdout = ''
with StringIO() as buffer:
with redirect_stdout(buffer):
ansible_runner.cleanup.run_cleanup(runner_cleanup_kwargs)
stdout = buffer.getvalue()
if '(changed: True)' in stdout:
logger.info(f'Performed local cleanup with kwargs {kwargs}, output:\n{stdout}')

# if we are the first instance alphabetically, then run cleanup on execution nodes
checker_instance = (
Instance.objects.filter(node_type__in=['hybrid', 'control'], node_state=Instance.States.READY, enabled=True, capacity__gt=0)
.order_by('-hostname')
.first()
)
if checker_instance and this_inst.hostname == checker_instance.hostname:
for inst in Instance.objects.filter(node_type='execution', node_state=Instance.States.READY, enabled=True, capacity__gt=0):
runner_cleanup_kwargs = inst.get_cleanup_task_kwargs(**kwargs)
if not runner_cleanup_kwargs:
continue
try:
stdout = worker_cleanup(inst.hostname, runner_cleanup_kwargs)
if '(changed: True)' in stdout:
logger.info(f'Performed cleanup on execution node {inst.hostname} with output:\n{stdout}')
except RuntimeError:
logger.exception(f'Error running cleanup on execution node {inst.hostname}')
class CleanupImagesAndFiles:
@classmethod
def get_first_control_instance(cls) -> Instance | None:
return (
Instance.objects.filter(node_type__in=['hybrid', 'control'], node_state=Instance.States.READY, enabled=True, capacity__gt=0)
.order_by('-hostname')
.first()
)

@classmethod
def get_execution_instances(cls) -> QuerySet[Instance]:
return Instance.objects.filter(node_type='execution', node_state=Instance.States.READY, enabled=True, capacity__gt=0)

@classmethod
def run_local(cls, this_inst: Instance, **kwargs):
if settings.IS_K8S:
return

Check warning on line 399 in awx/main/tasks/system.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/system.py#L399

Added line #L399 was not covered by tests
runner_cleanup_kwargs = this_inst.get_cleanup_task_kwargs(**kwargs)
if runner_cleanup_kwargs:
stdout = ''
with StringIO() as buffer:
with redirect_stdout(buffer):
ansible_runner.cleanup.run_cleanup(runner_cleanup_kwargs)
stdout = buffer.getvalue()
if '(changed: True)' in stdout:
logger.info(f'Performed local cleanup with kwargs {kwargs}, output:\n{stdout}')

@classmethod
def run_remote(cls, this_inst: Instance, **kwargs):
# if we are the first instance alphabetically, then run cleanup on execution nodes
checker_instance = cls.get_first_control_instance()

if checker_instance and this_inst.hostname == checker_instance.hostname:
for inst in cls.get_execution_instances():
runner_cleanup_kwargs = inst.get_cleanup_task_kwargs(**kwargs)

Check warning on line 417 in awx/main/tasks/system.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/system.py#L417

Added line #L417 was not covered by tests
if not runner_cleanup_kwargs:
continue
try:
stdout = worker_cleanup(inst.hostname, runner_cleanup_kwargs)

Check warning on line 421 in awx/main/tasks/system.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/system.py#L419-L421

Added lines #L419 - L421 were not covered by tests
if '(changed: True)' in stdout:
logger.info(f'Performed cleanup on execution node {inst.hostname} with output:\n{stdout}')
except RuntimeError:
logger.exception(f'Error running cleanup on execution node {inst.hostname}')

Check warning on line 425 in awx/main/tasks/system.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/system.py#L423-L425

Added lines #L423 - L425 were not covered by tests

@classmethod
def run(cls, **kwargs):
if settings.IS_K8S:
return

Check warning on line 430 in awx/main/tasks/system.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/system.py#L430

Added line #L430 was not covered by tests
this_inst = Instance.objects.me()
cls.run_local(this_inst, **kwargs)
cls.run_remote(this_inst, **kwargs)


@task(queue='tower_broadcast_all')
def handle_removed_image(remove_images=None):
"""Special broadcast invocation of this method to handle case of deleted EE"""
_cleanup_images_and_files(remove_images=remove_images, file_pattern='')
CleanupImagesAndFiles.run(remove_images=remove_images, file_pattern='')

Check warning on line 439 in awx/main/tasks/system.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/system.py#L439

Added line #L439 was not covered by tests


@task(queue=get_task_queuename)
def cleanup_images_and_files():
_cleanup_images_and_files(image_prune=True)
CleanupImagesAndFiles.run(image_prune=True)

Check warning on line 444 in awx/main/tasks/system.py

View check run for this annotation

Codecov / codecov/patch

awx/main/tasks/system.py#L444

Added line #L444 was not covered by tests


@task(queue=get_task_queuename)
Expand Down
12 changes: 6 additions & 6 deletions awx/main/tests/functional/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import shutil

from awx.main.tasks.jobs import RunJob
from awx.main.tasks.system import execution_node_health_check, _cleanup_images_and_files
from awx.main.tasks.system import CleanupImagesAndFiles, execution_node_health_check
from awx.main.models import Instance, Job


Expand Down Expand Up @@ -48,22 +48,22 @@ def mock_job_folder(job_folder_factory):

@pytest.mark.django_db
def test_folder_cleanup_stale_file(mock_job_folder, mock_me):
_cleanup_images_and_files()
CleanupImagesAndFiles.run()
assert os.path.exists(mock_job_folder) # grace period should protect folder from deletion

_cleanup_images_and_files(grace_period=0)
CleanupImagesAndFiles.run(grace_period=0)
assert not os.path.exists(mock_job_folder) # should be deleted


@pytest.mark.django_db
def test_folder_cleanup_running_job(mock_job_folder, me_inst):
job = Job.objects.create(id=1234, controller_node=me_inst.hostname, status='running')
_cleanup_images_and_files(grace_period=0)
CleanupImagesAndFiles.run(grace_period=0)
assert os.path.exists(mock_job_folder) # running job should prevent folder from getting deleted

job.status = 'failed'
job.save(update_fields=['status'])
_cleanup_images_and_files(grace_period=0)
CleanupImagesAndFiles.run(grace_period=0)
assert not os.path.exists(mock_job_folder) # job is finished and no grace period, should delete


Expand All @@ -78,7 +78,7 @@ def test_folder_cleanup_multiple_running_jobs(job_folder_factory, me_inst):
dirs.append(job_folder_factory(job.id))
jobs.append(job)

_cleanup_images_and_files(grace_period=0)
CleanupImagesAndFiles.run(grace_period=0)

assert [os.path.exists(d) for d in dirs] == [True for i in range(num_jobs)]

Expand Down
18 changes: 18 additions & 0 deletions awx/main/tests/live/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import subprocess
import time

import pytest
Expand Down Expand Up @@ -59,3 +60,20 @@ def default_org():
def demo_inv(default_org):
inventory, _ = Inventory.objects.get_or_create(name='Demo Inventory', defaults={'organization': default_org})
return inventory


@pytest.fixture
def podman_image_generator():
"""
Generate a tagless podman image from awx base EE
"""

def fn():
dockerfile = """
FROM quay.io/ansible/awx-ee:latest
RUN echo "Hello, Podman!" > /tmp/hello.txt
"""
cmd = ['podman', 'build', '-f', '-'] # Create an image without a tag
subprocess.run(cmd, capture_output=True, input=dockerfile, text=True, check=True)

return fn
44 changes: 43 additions & 1 deletion awx/main/tests/live/tests/test_cleanup_task.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
import os
import json
import pytest
import tempfile
import subprocess

from awx.main.tasks.receptor import _convert_args_to_cli

from awx.main.tasks.receptor import _convert_args_to_cli, run_until_complete
from awx.main.tasks.system import CleanupImagesAndFiles
from awx.main.models import Instance, JobTemplate


def get_podman_images():
cmd = ['podman', 'images', '--format', 'json']
return json.loads((subprocess.run(cmd, capture_output=True, text=True, check=True)).stdout)


def test_folder_cleanup_multiple_running_jobs_execution_node(request):
demo_jt = JobTemplate.objects.get(name='Demo Job Template')

Expand Down Expand Up @@ -37,3 +46,36 @@ def delete_jobs():
print('ansible-runner worker ' + remote_command)

assert [os.path.exists(job_dir) for job_dir in job_dirs] == [True for i in range(3)]


@pytest.mark.parametrize(
'worktype',
('remote', 'local'),
)
def test_tagless_image(podman_image_generator, worktype: str):
"""
Ensure podman images on Control and Hybrid nodes are deleted during cleanup.
"""
podman_image_generator()

dangling_image = next((image for image in get_podman_images() if image.get('Dangling', False)), None)
assert dangling_image

instance_me = Instance.objects.me()

match worktype:
case 'local':
CleanupImagesAndFiles.run_local(instance_me, image_prune=True)
case 'remote':
with (
mock.patch(
'awx.main.tasks.receptor.run_until_complete', lambda *args, **kwargs: run_until_complete(*args, worktype='local', ttl=None, **kwargs)
),
mock.patch('awx.main.tasks.system.CleanupImagesAndFiles.get_execution_instances', lambda: [Instance.objects.me()]),
):
CleanupImagesAndFiles.run_remote(instance_me, image_prune=True)
case _:
raise ValueError(f'worktype "{worktype}" not supported.')

for image in get_podman_images():
assert image['Id'] != dangling_image['Id']

0 comments on commit 7e0321e

Please sign in to comment.