From 83313f2c1a27cd8193e1a40839dec7f8450cb1bf Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 13 Jan 2025 19:53:43 +0100 Subject: [PATCH] contrib eventdb: add to_json script Extract data from the IntelMQ EventDB and convert it to JSON to use it with intelmqctl --- contrib/eventdb/README.md | 14 +++++++++-- contrib/eventdb/to_json.py | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) create mode 100755 contrib/eventdb/to_json.py diff --git a/contrib/eventdb/README.md b/contrib/eventdb/README.md index 7693a900a..861a5e83e 100644 --- a/contrib/eventdb/README.md +++ b/contrib/eventdb/README.md @@ -10,8 +10,8 @@ EventDB Utilities - Apply Malware Name Mapping: Applies the malware name mapping to the eventdb. Source and destination columns can be given, also a local file. If no local file is present, the mapping can be downloaded on demand. It queries the database for all distinct malware names with the taxonomy "malicious-code" and sets another column to the malware family name. - Apply Domain Suffix: Writes the public domain suffix to the `source.domain_suffix` / `destination.domain_suffix` columns, extracted from `source.fqdn` / `destination.fqdn`. -- PostgreSQL trigger keeping track of the oldest inserted/updated "time.source" data. This can be useful to (re-)generate statistics or aggregation data. -- SQL queries to set up a separate `raws` table, described in https://docs.intelmq.org/latest/admin/database/postgresql/#separating-raw-values-in-postgresql-using-view-and-trigger +- `trigger_oldest_time.source.sql`: PostgreSQL trigger keeping track of the oldest inserted/updated "time.source" data. This can be useful to (re-)generate statistics or aggregation data. +- `to_json.py`: Export EventDB data to JSON, to use it in IntelMQ again. Usage ----- @@ -22,6 +22,16 @@ See `--help` for more information: ``` apply_mapping_eventdb.py -h apply_domain_suffix.py -h +to_json.py -h ``` The SQL script can be executed in the database directly. + +### `to_json.py` + + +- Get an event by ID: `~intevation/to_json.py --id $id` + - You can give multiple IDs +- Pretty printed: Add `--pretty` +- Inject the data into an IntelMQ bot (dry run): + - `intelmqctl run $botid process --dry-run --show-sent --msg '$jsondata'` \ No newline at end of file diff --git a/contrib/eventdb/to_json.py b/contrib/eventdb/to_json.py new file mode 100755 index 000000000..20c9edee2 --- /dev/null +++ b/contrib/eventdb/to_json.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 + +# SPDX-FileCopyrightText: 2024 Institute for Common Good Technology +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +from argparse import ArgumentParser +from datetime import datetime +import json +from sys import exit, stderr +from pprint import pprint + +from psycopg2 import connect +from psycopg2.extras import RealDictCursor + +try: + with open('/etc/intelmq/eventdb-serve.conf') as fody_config: + conninfo = json.load(fody_config)['libpg conninfo'] +except FileNotFoundError as exc: + print(f'Could not load database configuration. {exc}', file=stderr) + exit(2) + +parser = ArgumentParser( + prog='EventDB to JSON', + description='Extract data from the IntelMQ EventDB') +parser.add_argument('-v', '--verbose', action='store_true') +parser.add_argument('-i', '--id', help='Get events by ID') +parser.add_argument('-p', '--pretty', action='store_true', help='Pretty print JSON output') +args = parser.parse_args() + +if args.verbose: + print(f'Using DSN {conninfo!r}.') +db = connect(dsn=conninfo) +cur = db.cursor(cursor_factory=RealDictCursor) +cur.execute ('SELECT * FROM events WHERE id = %s', (args.id, )) + +for row in cur.fetchall(): + del row['id'] + for key in list(row.keys()): + if isinstance(row[key], datetime): + # data from the database has TZ information already included + row[key] = row[key].isoformat() + elif row[key] is None: + del row[key] + if args.pretty: + print(json.dumps(row, indent=2)) + else: + print(json.dumps(row))