-
Notifications
You must be signed in to change notification settings - Fork 298
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
JSON Custom Parser allows can translate json key into harmonized key field and can create events from list of dict within key of JSON
- Loading branch information
1 parent
7aa3c07
commit 3c6c4ee
Showing
11 changed files
with
436 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from dateutil.parser import parse | ||
|
||
from intelmq.lib.bot import ParserBot | ||
from intelmq.lib.harmonization import DateTime | ||
from intelmq.lib.message import Message | ||
from intelmq.lib.utils import base64_decode | ||
from intelmq.lib.harmonization import DateTime | ||
|
||
|
||
class JSONCustomParserBot(ParserBot): | ||
|
||
def init(self): | ||
self.time_format = getattr(self.parameters, "time_format", None) | ||
if self.time_format and self.time_format.split('|')[0] not in DateTime.TIME_CONVERSIONS.keys(): | ||
raise InvalidArgument('time_format', got=self.time_format, | ||
expected=list(DateTime.TIME_CONVERSIONS.keys()), | ||
docs='https://intelmq.readthedocs.io/en/latest/user/Bots.html#json-custom-parser') | ||
|
||
self.json_data_format = getattr(self.parameters, 'json_data_format', False) | ||
self.json_data_key = getattr(self.parameters, 'json_data_key', '') | ||
self.multiple_msg_field = getattr(self.parameters, 'multiple_msg_field', None) | ||
self.translate_fields = getattr(self.parameters, 'translate_fields', {}) | ||
self.split_lines = getattr(self.parameters, 'splitlines', False) | ||
self.default_url_protocol = getattr(self.parameters, 'default_url_protocol', 'http://') | ||
self.classification_type = getattr(self.parameters, 'type') | ||
|
||
def flatten_json(self, json_object): | ||
out = {} | ||
|
||
def flatten(x, name='', separator='.'): | ||
if type(x) is dict: | ||
for a in x: | ||
flatten(x[a], name + a + separator) | ||
else: | ||
out[name[:-1]] = x | ||
|
||
flatten(json_object) | ||
return out | ||
|
||
def process(self): | ||
|
||
report = self.receive_message() | ||
raw_report = base64_decode(report["raw"]) | ||
|
||
if self.json_data_format: | ||
lines = Message.unserialize(raw_report)[self.json_data_key] | ||
elif self.split_lines: | ||
lines = raw_report.splitlines() | ||
else: | ||
lines = [raw_report] | ||
|
||
for line in lines: | ||
if not line: | ||
continue | ||
|
||
msg = Message.unserialize(line) if not self.json_data_format else line | ||
flatten_msg = self.flatten_json(msg) | ||
event_msg = {} | ||
|
||
for key in self.translate_fields: | ||
data = flatten_msg.get(self.translate_fields[key]) | ||
|
||
if key in ["time.source", "time.destination"]: | ||
try: | ||
data = int(data) | ||
except ValueError: | ||
pass | ||
data = DateTime.convert(data, format=self.time_format) | ||
|
||
elif key.endswith('.url'): | ||
if not data: | ||
continue | ||
if '://' not in data: | ||
data = self.default_url_protocol + data | ||
|
||
event_msg[key] = data | ||
|
||
multiple_msgs = [] | ||
if self.multiple_msg_field in event_msg and type(event_msg[self.multiple_msg_field]) is list: | ||
for value in event_msg[self.multiple_msg_field]: | ||
new_msg = event_msg.copy() | ||
new_msg[self.multiple_msg_field] = value | ||
multiple_msgs.append(new_msg) | ||
else: | ||
multiple_msgs = [event_msg] | ||
|
||
for event_msg in multiple_msgs: | ||
event = self.new_event(report) | ||
event.update(event_msg) | ||
|
||
if self.classification_type and "classification.type" not in event: | ||
event.add('classification.type', self.classification_type) | ||
event['raw'] = Message.serialize(line) if self.json_data_format else line | ||
|
||
self.send_message(event) | ||
|
||
self.acknowledge_message() | ||
|
||
|
||
BOT = JSONCustomParserBot |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
{ | ||
"complete": false, | ||
"count": 9632240, | ||
"data": [ | ||
{ | ||
"ip": "179.124.36.196", | ||
"seen": true, | ||
"classification": "malicious", | ||
"spoofable": false, | ||
"first_seen": "2020-01-13", | ||
"last_seen": "2020-12-14", | ||
"actor": "unknown", | ||
"tags": [ | ||
"SSH Scanner", | ||
"SSH Worm" | ||
], | ||
"cve": [], | ||
"metadata": { | ||
"country": "Brazil", | ||
"country_code": "BR", | ||
"city": "São Paulo", | ||
"organization": "EQUINIX BRASIL", | ||
"region": "São Paulo", | ||
"rdns": "196.36.124.179.static.sp2.alog.com.br", | ||
"asn": "AS16397", | ||
"tor": false, | ||
"os": "Linux 3.1-3.10", | ||
"category": "isp", | ||
"vpn": false, | ||
"vpn_service": "" | ||
}, | ||
"raw_data": { | ||
"scan": [ | ||
{ | ||
"port": 22, | ||
"protocol": "TCP" | ||
}, | ||
{ | ||
"port": 2222, | ||
"protocol": "TCP" | ||
} | ||
], | ||
"web": {}, | ||
"ja3": [] | ||
} | ||
}, | ||
{ | ||
"ip": "189.86.227.150", | ||
"seen": true, | ||
"classification": "malicious", | ||
"spoofable": false, | ||
"first_seen": "2019-01-17", | ||
"last_seen": "2020-12-14", | ||
"actor": "unknown", | ||
"tags": [ | ||
"Eternalblue", | ||
"SMB Scanner" | ||
], | ||
"cve": [ | ||
"CVE-2017-0144" | ||
], | ||
"metadata": { | ||
"country": "Brazil", | ||
"country_code": "BR", | ||
"city": "Sorocaba", | ||
"organization": "CLARO S.A.", | ||
"region": "São Paulo", | ||
"rdns": "bkbrasil-g2-0-0-15122-iacc02.gna.embratel.net.br", | ||
"asn": "AS4230", | ||
"tor": false, | ||
"os": "Windows 7/8", | ||
"category": "isp", | ||
"vpn": false, | ||
"vpn_service": "" | ||
}, | ||
"raw_data": { | ||
"scan": [ | ||
{ | ||
"port": 445, | ||
"protocol": "TCP" | ||
} | ||
], | ||
"web": {}, | ||
"ja3": [] | ||
} | ||
} | ||
], | ||
"message": "ok" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"domain": "kreditohneschufa48.de", "fseen": 1576368000, "lseen": 1607731200, "collect": 1607817600, "tags": {"str": ["spam"], "codes": [2]}, "resolved": {"ip": {"a": ["23.60.91.225", "23.200.237.225"], "alias": [], "cname": []}, "whois": {"created": "1970-01-01 00:00:00", "updated": "1970-01-01 00:00:00", "expires": "1970-01-01 00:00:00", "age": 0, "registrar": "unknown", "registrant": "unknown", "havedata": "false"}}, "score": {"total": 3, "src": 60.2, "tags": 0.75, "frequency": 0.07}, "fp": {"alarm": "false", "descr": ""}, "threat": [], "id": "d267c60f-5709-3698-9523-f727f42ab5c7", "title": "RST Threat feed. IOC: kreditohneschufa48.de", "description": "IOC with tags: spam"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"url": "114.234.166.255:39436/mozi.a", "fseen": 1598918400, "lseen": 1601942400, "collect": 1602028800, "tags": {"str": ["malware"], "codes": [10]}, "score": {"total": 10, "src": 73.06, "tags": 0.89, "frequency": 0.58}, "resolved": {"status": 503}, "fp": {"alarm": "true", "descr": "Resource unavailable"}, "threat": [], "id": "987f5038-298f-37eb-a1d5-a17105f6b4b5", "title": "RST Threat feed. IOC: 114.234.166.255:39436/mozi.a", "description": "IOC with tags: malware"} |
64 changes: 64 additions & 0 deletions
64
intelmq/tests/bots/parsers/json_custom/test_json_key_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# -*- coding: utf-8 -*- | ||
import base64 | ||
import os | ||
import unittest | ||
|
||
import intelmq.lib.test as test | ||
from intelmq.bots.parsers.json_custom.parser import JSONCustomParserBot | ||
|
||
with open(os.path.join(os.path.dirname(__file__), 'json_key_data.json'), 'rb') as fh: | ||
RAW = base64.b64encode(fh.read()).decode() | ||
|
||
REPORT = {"feed.name": "Test Feed", | ||
"raw": RAW, | ||
"__type": "Report", | ||
} | ||
EVENT = {'__type': 'Event', | ||
'classification.type': 'malware', | ||
'extra.tags': ['SSH Scanner', 'SSH Worm'], | ||
'feed.name': 'Test Feed', | ||
'raw': 'eyJpcCI6ICIxNzkuMTI0LjM2LjE5NiIsICJzZWVuIjogdHJ1ZSwgImNsYXNzaWZpY2F0aW9' | ||
'uIjogIm1hbGljaW91cyIsICJzcG9vZmFibGUiOiBmYWxzZSwgImZpcnN0X3NlZW4iOiAiMj' | ||
'AyMC0wMS0xMyIsICJsYXN0X3NlZW4iOiAiMjAyMC0xMi0xNCIsICJhY3RvciI6ICJ1bmtub' | ||
'3duIiwgInRhZ3MiOiBbIlNTSCBTY2FubmVyIiwgIlNTSCBXb3JtIl0sICJjdmUiOiBbXSwg' | ||
'Im1ldGFkYXRhIjogeyJjb3VudHJ5IjogIkJyYXppbCIsICJjb3VudHJ5X2NvZGUiOiAiQlI' | ||
'iLCAiY2l0eSI6ICJTXHUwMGUzbyBQYXVsbyIsICJvcmdhbml6YXRpb24iOiAiRVFVSU5JWC' | ||
'BCUkFTSUwiLCAicmVnaW9uIjogIlNcdTAwZTNvIFBhdWxvIiwgInJkbnMiOiAiMTk2LjM2L' | ||
'jEyNC4xNzkuc3RhdGljLnNwMi5hbG9nLmNvbS5iciIsICJhc24iOiAiQVMxNjM5NyIsICJ0' | ||
'b3IiOiBmYWxzZSwgIm9zIjogIkxpbnV4IDMuMS0zLjEwIiwgImNhdGVnb3J5IjogImlzcCI' | ||
'sICJ2cG4iOiBmYWxzZSwgInZwbl9zZXJ2aWNlIjogIiJ9LCAicmF3X2RhdGEiOiB7InNjYW' | ||
'4iOiBbeyJwb3J0IjogMjIsICJwcm90b2NvbCI6ICJUQ1AifSwgeyJwb3J0IjogMjIyMiwgI' | ||
'nByb3RvY29sIjogIlRDUCJ9XSwgIndlYiI6IHt9LCAiamEzIjogW119LCAiX190eXBlIjog' | ||
'ImRpY3QifQ==', | ||
'time.source': '2020-12-14T00:00:00+00:00', | ||
'source.ip': '179.124.36.196' | ||
} | ||
|
||
|
||
class TestJSONCustomParserBot(test.BotTestCase, unittest.TestCase): | ||
""" | ||
A TestCase for a JSONCustomParserBot. | ||
""" | ||
|
||
@classmethod | ||
def set_bot(cls): | ||
cls.bot_reference = JSONCustomParserBot | ||
|
||
def test_sample(self): | ||
""" Test if correct Event has been produced. """ | ||
self.input_message = REPORT | ||
self.sysconfig = {"json_data_format": True, | ||
"json_data_key": "data", | ||
"type": "malware", | ||
"time_format": "from_format_midnight|%Y-%m-%d", | ||
"translate_fields": {"source.ip": "ip", | ||
"time.source": "last_seen", | ||
"extra.tags": "tags" | ||
} | ||
} | ||
self.run_bot() | ||
self.assertMessageEqual(0, EVENT) | ||
|
||
|
||
if __name__ == '__main__': # pragma: no cover | ||
unittest.main() |
74 changes: 74 additions & 0 deletions
74
intelmq/tests/bots/parsers/json_custom/test_multiple_msg.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# -*- coding: utf-8 -*- | ||
import base64 | ||
import os | ||
import unittest | ||
|
||
import intelmq.lib.test as test | ||
from intelmq.bots.parsers.json_custom.parser import JSONCustomParserBot | ||
|
||
with open(os.path.join(os.path.dirname(__file__), 'multiple_msg.json'), 'rb') as fh: | ||
RAW1 = base64.b64encode(fh.read()).decode() | ||
|
||
MULTILINE_REPORT = {"feed.name": "RSTThreats Domain Feed", | ||
"raw": RAW1, | ||
"__type": "Report", | ||
} | ||
|
||
MULTIPLE_EVENT1 = {'__type': 'Event', | ||
'classification.type': 'malware', | ||
'extra.tags': ['spam'], | ||
'extra.threat_info': [], | ||
'feed.name': 'RSTThreats Domain Feed', | ||
'raw': 'eyJkb21haW4iOiAia3JlZGl0b2huZXNjaHVmYTQ4LmRlIiwgImZzZWVuIjogMTU3NjM2O' | ||
'DAwMCwgImxzZWVuIjogMTYwNzczMTIwMCwgImNvbGxlY3QiOiAxNjA3ODE3NjAwLCAidG' | ||
'FncyI6IHsic3RyIjogWyJzcGFtIl0sICJjb2RlcyI6IFsyXX0sICJyZXNvbHZlZCI6IHs' | ||
'iaXAiOiB7ImEiOiBbIjIzLjYwLjkxLjIyNSIsICIyMy4yMDAuMjM3LjIyNSJdLCAiYWxp' | ||
'YXMiOiBbXSwgImNuYW1lIjogW119LCAid2hvaXMiOiB7ImNyZWF0ZWQiOiAiMTk3MC0wM' | ||
'S0wMSAwMDowMDowMCIsICJ1cGRhdGVkIjogIjE5NzAtMDEtMDEgMDA6MDA6MDAiLCAiZX' | ||
'hwaXJlcyI6ICIxOTcwLTAxLTAxIDAwOjAwOjAwIiwgImFnZSI6IDAsICJyZWdpc3RyYXI' | ||
'iOiAidW5rbm93biIsICJyZWdpc3RyYW50IjogInVua25vd24iLCAiaGF2ZWRhdGEiOiAi' | ||
'ZmFsc2UifX0sICJzY29yZSI6IHsidG90YWwiOiAzLCAic3JjIjogNjAuMiwgInRhZ3MiO' | ||
'iAwLjc1LCAiZnJlcXVlbmN5IjogMC4wN30sICJmcCI6IHsiYWxhcm0iOiAiZmFsc2UiLC' | ||
'AiZGVzY3IiOiAiIn0sICJ0aHJlYXQiOiBbXSwgImlkIjogImQyNjdjNjBmLTU3MDktMzY' | ||
'5OC05NTIzLWY3MjdmNDJhYjVjNyIsICJ0aXRsZSI6ICJSU1QgVGhyZWF0IGZlZWQuIElP' | ||
'Qzoga3JlZGl0b2huZXNjaHVmYTQ4LmRlIiwgImRlc2NyaXB0aW9uIjogIklPQyB3aXRoI' | ||
'HRhZ3M6IHNwYW0ifQ==', | ||
'source.fqdn': 'kreditohneschufa48.de', | ||
'source.ip': '23.60.91.225', | ||
'time.source': '2020-12-12T00:00:00+00:00' | ||
} | ||
|
||
MULTIPLE_EVENT2 = MULTIPLE_EVENT1.copy() | ||
MULTIPLE_EVENT2["source.ip"] = "23.200.237.225" | ||
|
||
|
||
class TestJSONCustomParserBot(test.BotTestCase, unittest.TestCase): | ||
""" | ||
A TestCase for a JSONCustomParserBot. | ||
""" | ||
|
||
@classmethod | ||
def set_bot(cls): | ||
cls.bot_reference = JSONCustomParserBot | ||
|
||
def test_multiple_msg(self): | ||
""" Test if correct Event has been produced. """ | ||
self.input_message = MULTILINE_REPORT | ||
self.sysconfig = {"splitlines": True, | ||
"type": "malware", | ||
"time_format": "epoch_millis", | ||
"multiple_msg_field": "source.ip", | ||
"translate_fields": {"source.fqdn": "domain", | ||
"time.source": "lseen", | ||
"extra.tags": "tags.str", | ||
"extra.threat_info": "threat", | ||
"source.ip": "resolved.ip.a" | ||
} | ||
} | ||
self.run_bot() | ||
self.assertMessageEqual(0, MULTIPLE_EVENT1) | ||
self.assertMessageEqual(1, MULTIPLE_EVENT2) | ||
|
||
|
||
if __name__ == '__main__': # pragma: no cover | ||
unittest.main() |
Oops, something went wrong.