Skip to content

Commit

Permalink
data collector (#74)
Browse files Browse the repository at this point in the history
  • Loading branch information
eistrati authored Nov 18, 2024
1 parent 1a93e67 commit c546e97
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 11 deletions.
41 changes: 32 additions & 9 deletions app/anomaly-detector/anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
from concurrent.futures import ProcessPoolExecutor
from boto3 import client as boto3_client
from pandas import get_dummies, to_datetime, concat, read_csv
from numpy import concatenate, array
from numpy import concatenate, array, ndarray, pad
from psycopg import connect
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer
from kubernetes import client as k8s_client, config as k8s_config
from botocore.exceptions import ClientError
from warnings import warn

def get_config_map_values(config_map_name = "config-map"):
"""
Expand Down Expand Up @@ -203,7 +204,7 @@ def connect_to_database(dbname, dbuser, dbpass, dbhost, dbport):
print(f"Error while connecting to PostgreSQL: {e}")
raise

def is_transaction_anomaly(conn, embeddings, df):
def is_transaction_anomaly(conn, df, embeddings, target_dim=847):
"""
Checks if the given embeddings are anomalies based on the transaction anomalies table
and returns the original dataframe with anomaly scores.
Expand All @@ -212,13 +213,35 @@ def is_transaction_anomaly(conn, embeddings, df):
scores = []
query = "SELECT MAX(1 - (embedding <=> %s::vector)) FROM transaction_anomalies"

for embedding in embeddings:
cursor.execute(query, (embedding.tolist(),))
results = cursor.fetchall()
scores.append(results[0][0])
# Ensure embeddings are the target dimensions
if isinstance(embeddings, ndarray):
current_dim = embeddings.shape[1]
if current_dim != target_dim:
warn(f"Adjusting embeddings from {current_dim} to {target_dim} dimensions")
if current_dim < target_dim:
# Pad embeddings to target dimensions
padding_size = target_dim - current_dim
embeddings = pad(
embeddings,
((0, 0), (0, padding_size)),
mode='constant',
constant_values=0
)
elif current_dim > target_dim:
# Truncate to target dimensions
embeddings = embeddings[:, :target_dim]

if cursor:
cursor.close()
try:
for embedding in embeddings:
cursor.execute(query, (embedding.tolist(),))
results = cursor.fetchall()
scores.append(results[0][0])
except Exception as e:
print(f"Error processing embeddings: {str(e)}")
raise
finally:
if cursor:
cursor.close()

# Add the scores as a new column to the dataframe
df['anomaly_score'] = scores
Expand Down Expand Up @@ -282,7 +305,7 @@ def main():
embeddings = array(embeddings, dtype=float)

# Get DataFrame with anomaly scores
df_with_scores = is_transaction_anomaly(conn, embeddings, df)
df_with_scores = is_transaction_anomaly(conn, df, embeddings)

# Create output filename for the scored data
output_filename = path.splitext(local_file_path)[0] + '_scored.csv'
Expand Down
10 changes: 8 additions & 2 deletions iac/core/vpc_endpoint/state.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@
# SPDX-License-Identifier: MIT-0

output "arn" {
value = join(",", aws_vpc_endpoint.this.*.arn)
value = {
for idx, val in aws_vpc_endpoint.this.*.arn:
local.interfaces[idx] => val
}
}

output "id" {
value = join(",", aws_vpc_endpoint.this.*.id)
value = {
for idx, val in aws_vpc_endpoint.this.*.id:
local.interfaces[idx] => val
}
}

0 comments on commit c546e97

Please sign in to comment.