Skip to content

Commit

Permalink
Logging improvement (#351)
Browse files Browse the repository at this point in the history
* log_level env var, shared class for logging

* logging in publishers

* logging in presenters

* gunicorn logging

* logging in bots

* logging in collectors

* logging in core

* add warning log level

* f-strings, core log, remote collectors

* log_level env var, shared class for logging

* logging in publishers

* logging in presenters

* gunicorn logging

* logging in bots

* logging in collectors

* logging in core

* add warning log level

* f-strings, core log, remote collectors

* remove module_id, simplify logging, default threads for gunicorn

* colours of logs

* exception logging in collectors, debug/info balance

* log_level in .env.example

* seperate taranis and other modules log level, traefik, redis and nginx log level settings

* better exception, more log control, cleanup

* fix error logging

---------

Co-authored-by: Progress <[email protected]>
  • Loading branch information
multiflexi and Progress1 authored Oct 21, 2024
1 parent 48af041 commit d573455
Show file tree
Hide file tree
Showing 44 changed files with 3,383 additions and 1,952 deletions.
8 changes: 8 additions & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,11 @@ PRESENTER_PORT=5002
KEYCLOAK_USER=admin
KEYCLOAK_PASSWORD=supersecret
POSTGRES_KEYCLOAK_PASSWORD=supersecret

# Logging of Taranis-NG and used Python modules
TARANIS_LOG_LEVEL=DEBUG
MODULES_LOG_LEVEL=WARN
# To change Traefik logging, go to traefik.yml and set it there there
# To change Nginx log level, go to docker-compose.yml and change NGINX_LOG_LEVEL and NGINX_ACCESS_LOG there
# To change Redis log level, go to docker-compose.yml and change --loglevel there

1 change: 1 addition & 0 deletions docker/Dockerfile.gui
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ FROM nginx:stable-alpine AS production-stage
RUN rm /docker-entrypoint.d/30-tune-worker-processes.sh
COPY ./src/gui/extras/30-envsubst-on-javascript.sh /docker-entrypoint.d/
COPY ./src/gui/extras/40-connection-tuning.sh /docker-entrypoint.d/
COPY ./src/gui/extras/50-logging.sh /docker-entrypoint.d/
RUN chmod +x /docker-entrypoint.d/*.sh

COPY ./src/gui/extras/default.conf /etc/nginx/conf.d/default.conf
Expand Down
16 changes: 14 additions & 2 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ services:
redis:
image: "redis:${REDIS_TAG}"
restart: unless-stopped
command:
- "--loglevel notice" # Change this to 'debug', 'verbose', 'notice', 'warning'
environment:
TZ: "${TZ}"
volumes:
Expand Down Expand Up @@ -67,7 +69,8 @@ services:
CWE_UPDATE_FILE: "${CWE_UPDATE_FILE}"

TZ: "${TZ}"
DEBUG: "true"
TARANIS_LOG_LEVEL: "${TARANIS_LOG_LEVEL}"
MODULES_LOG_LEVEL: "${MODULES_LOG_LEVEL}"
DEBUG_SQL: "false"
labels:
traefik.enable: "true"
Expand Down Expand Up @@ -115,6 +118,8 @@ services:
TARANIS_NG_CORE_SSE: "http://core/sse"
WORKERS_PER_CORE: "1"
TZ: "${TZ}"
TARANIS_LOG_LEVEL: "${TARANIS_LOG_LEVEL}"
MODULES_LOG_LEVEL: "${MODULES_LOG_LEVEL}"
logging:
driver: "json-file"
options:
Expand All @@ -140,8 +145,9 @@ services:
environment:
TARANIS_NG_CORE_URL: "http://core"
WORKERS_PER_CORE: "1"
DEBUG: "true"
TZ: "${TZ}"
TARANIS_LOG_LEVEL: "${TARANIS_LOG_LEVEL}"
MODULES_LOG_LEVEL: "${MODULES_LOG_LEVEL}"
volumes:
- "collector_storage:/app/storage"
logging:
Expand Down Expand Up @@ -170,6 +176,8 @@ services:
TARANIS_NG_CORE_URL: "http://core"
WORKERS_PER_CORE: "1"
TZ: "${TZ}"
TARANIS_LOG_LEVEL: "${TARANIS_LOG_LEVEL}"
MODULES_LOG_LEVEL: "${MODULES_LOG_LEVEL}"
ports:
- "${PRESENTER_PORT}:80"
volumes:
Expand Down Expand Up @@ -200,6 +208,8 @@ services:
TARANIS_NG_CORE_URL: "http://core"
WORKERS_PER_CORE: "1"
TZ: "${TZ}"
TARANIS_LOG_LEVEL: "${TARANIS_LOG_LEVEL}"
MODULES_LOG_LEVEL: "${MODULES_LOG_LEVEL}"
logging:
driver: "json-file"
options:
Expand Down Expand Up @@ -231,6 +241,8 @@ services:
VUE_APP_TARANIS_NG_CORE_SSE: "${TARANIS_NG_HTTPS_URI}/sse"
VUE_APP_TARANIS_NG_LOCALE: "${VUE_APP_TARANIS_NG_LOCALE}"
TZ: "${TZ}"
NGINX_LOG_LEVEL: "warn" # Change this to 'debug', 'info', 'notice', 'warn', 'error', 'crit', 'alert', 'emerg'
NGINX_ACCESS_LOG: "on" # Change this to 'off' or 'on'
labels:
traefik.enable: "true"
traefik.http.services.taranis-gui.loadbalancer.server.port: "80"
Expand Down
33 changes: 27 additions & 6 deletions docker/gunicorn_conf.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,39 @@
"""This module contains the configuration settings for Gunicorn.
The following environment variables are used:
- WORKERS_PER_CORE: Number of workers per CPU core. Default is 2.
- WEB_CONCURRENCY: Number of worker processes. Default is 1.
- HOST: The host IP address to bind to. Default is 0.0.0.0.
- PORT: The port number to bind to. Default is 80.
- BIND: The bind address and port. If not provided, it will use the host and port values.
- MODULES_LOG_LEVEL: The log level. Default is 'WARNING'.
The module defines the following variables:
- loglevel: The log level to be used by Gunicorn.
- workers: The number of worker processes.
- bind: The bind address and port.
- keepalive: The keepalive timeout.
- errorlog: The error log file. '-' means stderr.
For debugging and testing purposes, the module also defines the following variables:
- log_data: A dictionary containing the log level, number of workers, bind address and port, workers per core, host, and port.
"""

from __future__ import print_function

import json
import multiprocessing
import os

workers_per_core_str = os.getenv("WORKERS_PER_CORE", "2")
web_concurrency_str = os.getenv("WEB_CONCURRENCY", None)
web_concurrency_str = os.getenv("WEB_CONCURRENCY", "1")
host = os.getenv("HOST", "0.0.0.0")
port = os.getenv("PORT", "80")
bind_env = os.getenv("BIND", None)
use_loglevel = os.getenv("LOG_LEVEL", "info")
use_loglevel = os.getenv("MODULES_LOG_LEVEL", "WARNING")
if bind_env:
use_bind = bind_env
else:
use_bind = "{host}:{port}".format(host=host, port=port)
use_bind = f"{host}:{port}"

cores = multiprocessing.cpu_count()
workers_per_core = float(workers_per_core_str)
Expand All @@ -25,7 +45,7 @@
web_concurrency = int(default_web_concurrency)

# Gunicorn config variables
loglevel = use_loglevel
loglevel = use_loglevel.lower()
workers = web_concurrency
bind = use_bind
keepalive = 120
Expand All @@ -41,4 +61,5 @@
"host": host,
"port": port,
}
print(json.dumps(log_data))
if loglevel.lower() == "debug":
print(log_data)
5 changes: 5 additions & 0 deletions docker/traefik/traefik.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@ api:
insecure: true
dashboard: true

# Change this to 'TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL', 'PANIC'
log:
level: WARN

providers:
docker:
exposedByDefault: false
file:
directory: /etc/traefik/dynamic
watch: true

# Comment out the following lines to disable the access log
accessLog: {}
93 changes: 64 additions & 29 deletions src/bots/bots/analyst_bot.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,41 @@
"""AnalystBot class."""

import re

from .base_bot import BaseBot
from managers.log_manager import log_debug, log_bot_activity
from managers.log_manager import logger
from shared.schema import news_item
from shared.schema.parameter import Parameter, ParameterType
from remote.core_api import CoreApi


class AnalystBot(BaseBot):
"""AnalystBot class.
This class represents a bot for news items analysis.
Attributes:
type (str): The type of the bot.
name (str): The name of the bot.
description (str): The description of the bot.
parameters (list): The list of parameters for the bot.
regexp (list): The list of regular expressions for data analysis.
attr_name (list): The list of attribute names for extracted data.
news_items (list): The list of news items.
news_items_data (list): The list of news items data.
Methods:
execute(preset): Executes the bot with the given preset.
execute_on_event(preset, event_type, data): Executes the bot on an event with the given preset, event type, and data.
"""

type = "ANALYST_BOT"
name = "Analyst Bot"
description = "Bot for news items analysis"

parameters = [Parameter(0, "SOURCE_GROUP", "Source Group", "OSINT Source group to inspect", ParameterType.STRING),
Parameter(0, "REGULAR_EXPRESSION", "Regular Expression", "Regular expression for data analysis",
ParameterType.STRING),
Parameter(0, "ATTRIBUTE_NAME", "Attribute name", "Name of attribute for extracted data",
ParameterType.STRING)
]
parameters = [
Parameter(0, "SOURCE_GROUP", "Source Group", "OSINT Source group to inspect", ParameterType.STRING),
Parameter(0, "REGULAR_EXPRESSION", "Regular Expression", "Regular expression for data analysis", ParameterType.STRING),
Parameter(0, "ATTRIBUTE_NAME", "Attribute name", "Name of attribute for extracted data", ParameterType.STRING),
]

parameters.extend(BaseBot.parameters)

Expand All @@ -27,33 +45,40 @@ class AnalystBot(BaseBot):
news_items_data = []

def execute(self, preset):
"""Execute the analyst bot with the given preset.
Parameters:
preset (Preset): The preset containing the parameter values.
Raises:
Exception: If an error occurs during execution.
"""
try:
source_group = preset.parameter_values['SOURCE_GROUP']
regexp = preset.parameter_values['REGULAR_EXPRESSION']
attr_name = preset.parameter_values['ATTRIBUTE_NAME']
interval = preset.parameter_values['REFRESH_INTERVAL']
source_group = preset.parameter_values["SOURCE_GROUP"] # noqa F841
regexp = preset.parameter_values["REGULAR_EXPRESSION"]
attr_name = preset.parameter_values["ATTRIBUTE_NAME"]
interval = preset.parameter_values["REFRESH_INTERVAL"]

# support for multiple regexps
regexp = regexp.split(';;;')
attr_name = attr_name.split(';;;')
regexp = regexp.split(";;;")
attr_name = attr_name.split(";;;")
if len(regexp) > len(attr_name):
regexp = regexp[:len(attr_name)]
regexp = regexp[: len(attr_name)]
elif len(attr_name) > len(regexp):
attr_name = attr_name[:len(regexp)]
attr_name = attr_name[: len(regexp)]

bots_params = dict(zip(attr_name, regexp))
limit = BaseBot.history(interval)
log_bot_activity(preset.name, 'running with date limit {}'.format(limit))
logger.info(f"{preset.name}: running with date limit {limit}")
news_items_data, code = CoreApi.get_news_items_data(limit)
if code == 200 and news_items_data is not None:
for item in news_items_data:
if item:
news_item_id = item['id']
title = item['title']
preview = item['review']
content = item['content']
news_item_id = item["id"]
title = item["title"]
preview = item["review"]
content = item["content"]

analyzed_text = ' '.join([title, preview, content])
analyzed_text = " ".join([title, preview, content])

attributes = []
for key, value in bots_params.items():
Expand All @@ -67,27 +92,37 @@ def execute(self, preset):
# print('Found:', found_value, flush=True)
if found_value not in uniq_list:
uniq_list.append(found_value)

# app is checking combination ID + Value in DB before INSERT (attribute_value_identical) so check for some duplicity here (faster)

# app is checking combination ID + Value in DB before INSERT (attribute_value_identical)
# so check for some duplicity here (faster)
for found_value in uniq_list:
binary_mime_type = ''
binary_value = ''
binary_mime_type = ""
binary_value = ""
news_attribute = news_item.NewsItemAttribute(key, found_value, binary_mime_type, binary_value)
attributes.append(news_attribute)

if len(attributes) > 0:
log_debug('Processing item id: {}, {}, Found: {}'.format(news_item_id, item['collected'], len(attributes)))
logger.debug(f"Processing item id: {news_item_id}, {item['collected']}, Found: {len(attributes)}")
news_item_attributes_schema = news_item.NewsItemAttributeSchema(many=True)
CoreApi.update_news_item_attributes(news_item_id, news_item_attributes_schema.dump(attributes))

except Exception as error:
BaseBot.print_exception(preset, error)

def execute_on_event(self, preset, event_type, data):
"""Execute the specified preset on the given event.
Parameters:
preset (Preset): The preset to execute.
event_type (str): The type of the event.
data (dict): The data associated with the event.
Raises:
Exception: If there is an error while executing the preset.
"""
try:
source_group = preset.parameter_values['SOURCE_GROUP']
regexp = preset.parameter_values['REGULAR_EXPRESSION']
attr_name = preset.parameter_values['ATTRIBUTE_NAME']
source_group = preset.parameter_values["SOURCE_GROUP"] # noqa F841
regexp = preset.parameter_values["REGULAR_EXPRESSION"] # noqa F841
attr_name = preset.parameter_values["ATTRIBUTE_NAME"] # noqa F841

except Exception as error:
BaseBot.print_exception(preset, error)
Loading

0 comments on commit d573455

Please sign in to comment.