Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed the monitoring.yml file to merge the telegraf spinup code for all hosts #65

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 68 additions & 64 deletions ansible/monitoring.yml
Original file line number Diff line number Diff line change
@@ -1,78 +1,60 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2022 Dell Inc, or its subsidiaries.
---

- name: Monitoring
# Management server runs it via compose, see below. So skip it here
hosts: hostservers,tgens,DPUs
become: yes
become: true
vars:
bmc_vars: "{{ hostvars[inventory_hostname+'bmc'] }}"
bmc_vars: "{{ hostvars[inventory_hostname+'bmc'] }}"
tasks:

- name: Copy telegraf folder to remote
ansible.builtin.copy: src=../telegraf.d dest=/root
- name: Copy telegraf folder to remote folder
ansible.builtin.copy:
src: ../telegraf.d
dest: /root
mode: "0755"

- name: Remove arista config file
ansible.builtin.file: state=absent path=/root/telegraf.d/arista.conf

# TODO: create new telegraf container or use same for Marvell card
ansible.builtin.file:
path: /root/telegraf.d/arista.conf
state: absent

# TODO: see if there is an opportunity to consolidate and code dup removal

- name: Nvidia | telegraf otel monitoring
- name: Nvidia | Run additional Nvidia specific tasks
when: inventory_hostname == 'bf2'
block:
- name: Nvidia | make sure emulation is running for temperature
ansible.builtin.systemd: state=started name=set_emu_param
- ansible.builtin.systemd: state=stopped name=mlnx_snap
- ansible.builtin.systemd: state=started name=spdk_tgt
- name: Nvidia | Run telegraf container on Nvidia BF
community.docker.docker_container:
name: telegraf
image: docker.io/library/telegraf:1.31
ansible.builtin.systemd:
name: set_emu_param
state: started
restart: true
detach: true
network_mode: host
restart_policy: always
mounts:
- type: bind
source: /root/telegraf.d/telegraf.conf.bf2
target: /etc/telegraf/telegraf.conf
read_only: true
- type: bind
source: /run/emu_param
target: /run/emu_param
read_only: true

# TODO: see if there is an opportunity to consolidate and code dup removal
- name: Nvidia | Stop mlx_snap service
ansible.builtin.systemd:
name: mlnx_snap
state: stopped

- name: Nvidia | Start telegraf service
ansible.builtin.systemd:
name: spdk_tgt
state: started

- name: Intel | telegraf otel monitoring
- name: Intel | Set proxy environment and downgrade requests package due to bug
when: inventory_hostname == 'mev'
environment: "{{ proxy_env | default({}) }}"
block:
- name: Intel | Downgrade requests package due to bug https://github.com/ansible-collections/community.docker/issues/868
ansible.builtin.pip: name=requests<2.32
- name: Intel | Run telegraf container on Intel MEV
community.docker.docker_container:
name: telegraf
image: docker.io/library/telegraf:1.31
state: started
restart: true
detach: true
network_mode: host
restart_policy: always
mounts:
- type: bind
source: /root/telegraf.d/telegraf.conf.mev
target: /etc/telegraf/telegraf.conf
read_only: true
ansible.builtin.pip:
name: requests
version: "<2.32"

- name: Run telegraf container on others
when:
- inventory_hostname != 'mev'
- inventory_hostname != 'bf2'
- name: Define telegraf environment variables (only if not mev or bf2)
when: inventory_hostname not in ['mev', 'bf2']
ansible.builtin.set_fact:
telegraf_env:
REDFISH_HOST: "{{ bmc_vars.ansible_host }}"
REDFISH_USER: "{{ bmc_vars.ansible_user }}"
REDFISH_PASSWORD: "{{ bmc_vars.ansible_password }}"
REDFISH_SYSTEM_ID: "{{ bmc_vars.resource_id }}"

- name: Run telegraf container on all hosts
community.docker.docker_container:
name: telegraf
image: docker.io/library/telegraf:1.31
Expand All @@ -81,13 +63,35 @@
detach: true
network_mode: host
restart_policy: always
mounts:
- type: bind
source: /root/telegraf.d
target: /etc/telegraf/telegraf.d
read_only: true
env:
REDFISH_HOST: "{{ bmc_vars.ansible_host }}"
REDFISH_USER: "{{ bmc_vars.ansible_user }}"
REDFISH_PASSWORD: "{{ bmc_vars.ansible_password }}"
REDFISH_SYSTEM_ID: "{{ bmc_vars.resource_id }}"
mounts: >
{{
[
{
'type': 'bind',
'source': (
'/root/telegraf.d/telegraf.conf.' + inventory_hostname
if inventory_hostname in ['bf2', 'mev']
else '/root/telegraf.d'
),
'target': (
'/etc/telegraf/telegraf.conf'
if inventory_hostname in ['bf2', 'mev']
else '/etc/telegraf/telegraf.d'
),
'read_only': True
}
] + (
[
{
'type': 'bind',
'source': '/run/emu_param',
'target': '/run/emu_param',
'read_only': True
}
] if inventory_hostname == 'bf2' else []
)
}}
env: >
{{
telegraf_env if inventory_hostname not in ['mev', 'bf2'] else {}
}}
Loading