Skip to content

Commit

Permalink
Merge branch 'main' into attribute_description
Browse files Browse the repository at this point in the history
  • Loading branch information
multiflexi committed Mar 26, 2024
2 parents 2941ccf + 35ec27f commit 9073d16
Show file tree
Hide file tree
Showing 75 changed files with 2,060 additions and 1,129 deletions.
37 changes: 37 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file

version: 2
updates:
- package-ecosystem: "docker"
directory: "/docker"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/src/bots"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/src/collectors"
schedule:
interval: "weekly"
ignore:
- dependency-name: "slackclient"
# - package-ecosystem: "pip"
# directory: "/src/core"
# schedule:
# interval: "weekly"
- package-ecosystem: "pip"
directory: "/src/presenters"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/src/publishers"
schedule:
interval: "weekly"
- package-ecosystem: "npm"
directory: "/src/gui"
schedule:
interval: "weekly"
3 changes: 3 additions & 0 deletions docker/.env
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ TARANIS_NG_TAG=v23.12.1
# Timezone for all containers
TZ=Europe/Bratislava

# Force language
VUE_APP_TARANIS_NG_LOCALE=""

# Default passwords. CHANGE THESE FOR PRODUCTION!
POSTGRES_PASSWORD=supersecret
POSTGRES_KEYCLOAK_PASSWORD=supersecret
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.bots
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12-alpine3.18 AS build_shared
FROM python:3.12-alpine3.19 AS build_shared

WORKDIR /build_shared/

Expand All @@ -8,7 +8,7 @@ RUN python -m build



FROM python:3.12-alpine3.18 AS production
FROM python:3.12-alpine3.19 AS production

WORKDIR /app/

Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.collectors
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12-alpine3.18 AS build_shared
FROM python:3.12-alpine3.19 AS build_shared

WORKDIR /build_shared/

Expand All @@ -8,7 +8,7 @@ RUN python -m build



FROM python:3.12-alpine3.18 AS production
FROM python:3.12-alpine3.19 AS production

WORKDIR /app/

Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.presenters
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12-alpine3.18 AS build_shared
FROM python:3.12-alpine3.19 AS build_shared

WORKDIR /build_shared/

Expand All @@ -8,7 +8,7 @@ RUN python -m build



FROM python:3.12-alpine3.18 AS production
FROM python:3.12-alpine3.19 AS production

WORKDIR /app/

Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.publishers
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12-alpine3.18 AS build_shared
FROM python:3.12-alpine3.19 AS build_shared

WORKDIR /build_shared/

Expand All @@ -8,7 +8,7 @@ RUN python -m build



FROM python:3.12-alpine3.18 AS production
FROM python:3.12-alpine3.19 AS production

WORKDIR /app/

Expand Down
6 changes: 4 additions & 2 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ docker compose -f docker/docker-compose.yml up --no-build
or, alternatively, build and run the containers with:

```bash
TARANIS_NG_TAG=build docker-compose -f docker/docker-compose.yml up --build --pull
TARANIS_NG_TAG=build docker-compose -f docker/docker-compose.yml build --pull
TARANIS_NG_TAG=build docker-compose -f docker/docker-compose.yml up
```
or
```bash
TARANIS_NG_TAG=build docker compose -f docker/docker-compose.yml up --build --pull
TARANIS_NG_TAG=build docker compose -f docker/docker-compose.yml build --pull
TARANIS_NG_TAG=build docker compose -f docker/docker-compose.yml up
```
(`--pull` updates the base images)

Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ services:
VUE_APP_TARANIS_NG_URL: "${TARANIS_NG_HTTPS_URI}"
VUE_APP_TARANIS_NG_CORE_API: "${TARANIS_NG_HTTPS_URI}/api/v1"
VUE_APP_TARANIS_NG_CORE_SSE: "${TARANIS_NG_HTTPS_URI}/sse"
VUE_APP_TARANIS_NG_LOCALE: en
VUE_APP_TARANIS_NG_LOCALE: "${VUE_APP_TARANIS_NG_LOCALE}"
TZ: "${TZ}"
labels:
traefik.enable: "true"
Expand Down
8 changes: 4 additions & 4 deletions src/bots/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Flask==3.0.0
Flask==3.0.2
Flask-Cors==4.0.0
Flask-RESTful==0.3.10
gevent==23.9.1
gevent==24.2.1
gunicorn==21.2.0
marshmallow==3.20.1
marshmallow==3.21.1
marshmallow-enum==1.5.1
oauthlib==3.2.2
PySocks==1.7.1
python-dotenv==1.0.0
python-dotenv==1.0.1
requests==2.31.0
schedule==1.2.1
sseclient-py==1.8.0
88 changes: 48 additions & 40 deletions src/collectors/collectors/atom_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ class AtomCollector(BaseCollector):
description = "Collector for gathering data from Atom feeds"

parameters = [Parameter(0, "ATOM_FEED_URL", "Atom feed URL", "Full url for Atom feed", ParameterType.STRING),
Parameter(0, "USER_AGENT", "User agent", "Type of user agent", ParameterType.STRING)
Parameter(0, "USER_AGENT", "User agent", "Type of user agent", ParameterType.STRING),
Parameter(0, "LINKS_LIMIT", "Limit for article links",
"OPTIONAL: Maximum number of article links to process. Default: all", ParameterType.NUMBER)
]

parameters.extend(BaseCollector.parameters)
Expand All @@ -32,6 +34,8 @@ def collect(self, source):
feed_url = source.parameter_values['ATOM_FEED_URL']
user_agent = source.parameter_values['USER_AGENT']
interval = source.parameter_values['REFRESH_INTERVAL']
links_limit = BaseCollector.read_int_parameter("LINKS_LIMIT", 0, source)

log_manager.log_collector_activity("atom", source.name, "Starting collector for url: {}".format(feed_url))

proxies = {}
Expand All @@ -55,48 +59,52 @@ def collect(self, source):

news_items = []

limit = BaseCollector.history(interval)
count = 0
for feed_entry in feed['entries']:
published = feed_entry['updated']
published = parse(published, tzinfos=BaseCollector.timezone_info())
# comment this at the beginning of the testing to get some initial data
if str(published) > str(limit):
link_for_article = feed_entry['link']
log_manager.log_collector_activity("atom", source.name, "Processing entry [{}]".format(link_for_article))
if proxies:
page = requests.get(link_for_article, headers={'User-Agent': user_agent}, proxies=proxies)
else:
page = requests.get(link_for_article, headers={'User-Agent': user_agent})

html_content = page.text

if html_content:
content = BeautifulSoup(html_content, features='html.parser').text
else:
content = ''

description = feed_entry['summary'][:500].replace('<p>', ' ')

for_hash = feed_entry['author'] + feed_entry['title'] + feed_entry['link']

news_item = NewsItemData(
uuid.uuid4(),
hashlib.sha256(for_hash.encode()).hexdigest(),
feed_entry['title'],
description,
feed_url,
feed_entry['link'],
feed_entry['updated'],
feed_entry['author'],
datetime.datetime.now(),
content,
source.id,
[]
)

news_items.append(news_item)
count += 1
link_for_article = feed_entry['link']
log_manager.log_collector_activity("atom", source.name, "Visiting article {}/{}: {}".format(count, len(feed["entries"]), link_for_article))
if proxies:
page = requests.get(link_for_article, headers={'User-Agent': user_agent}, proxies=proxies)
else:
page = requests.get(link_for_article, headers={'User-Agent': user_agent})

html_content = page.text

if html_content:
content = BeautifulSoup(html_content, features='html.parser').text
else:
content = ''

description = feed_entry['summary'][:500].replace('<p>', ' ')

# author can exist/miss in header/entry
author = feed_entry['author'] if "author" in feed_entry else ""
for_hash = author + feed_entry['title'] + feed_entry['link']

news_item = NewsItemData(
uuid.uuid4(),
hashlib.sha256(for_hash.encode()).hexdigest(),
feed_entry['title'],
description,
feed_url,
feed_entry['link'],
feed_entry['updated'],
author,
datetime.datetime.now(),
content,
source.id,
[]
)

news_items.append(news_item)

if count >= links_limit & links_limit > 0:
log_manager.log_collector_activity('atom', source.name, 'Limit for article links reached ({})'.format(links_limit))
break

BaseCollector.publish(news_items, source)

except Exception as error:
log_manager.log_collector_activity("atom", source.name, "ATOM collection exceptionally failed")
BaseCollector.print_exception(source, error)
Expand Down
17 changes: 13 additions & 4 deletions src/collectors/collectors/base_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,11 @@ def wrapper(self, source):

@staticmethod
def print_exception(source, error):
log_info('OSINTSource ID: ' + source.id)
log_info('OSINTSource name: ' + source.name)
log_warning('OSINTSource name: ' + source.name)
if str(error).startswith('b'):
log_info('ERROR: ' + str(error)[2:-1])
log_warning('ERROR: ' + str(error)[2:-1])
else:
log_info('ERROR: ' + str(error))
log_warning('ERROR: ' + str(error))

@staticmethod
def timezone_info():
Expand Down Expand Up @@ -306,3 +305,13 @@ def refresh(self):

def initialize(self):
self.refresh()

@staticmethod
def read_int_parameter(name, default_value, source):
try:
val = int(source.parameter_values[name])
if val <= 0:
val = default_value
except Exception:
val = default_value
return val
Loading

0 comments on commit 9073d16

Please sign in to comment.