Skip to content

Commit 46db8c5

Browse files
committed
test invokation
1 parent 3133271 commit 46db8c5

File tree

1 file changed

+37
-40
lines changed

1 file changed

+37
-40
lines changed

backend/raw_data_handler/raw_data_handler.py

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,43 +17,43 @@
1717
role = 'arn:aws:iam::269854564686:role/hackathon-comprehend-role'
1818
def lambda_handler(event, context):
1919
try:
20-
conn = get_postgresql_connection()
21-
cursor = conn.cursor()
22-
comprehend = boto3.client('comprehend', region_name='us-east-1')
23-
for record in event['Records']:
24-
print(f"New record: {record}")
25-
bucket = record['s3']['bucket']['name']
26-
key = record['s3']['object']['key']
27-
print(f"Processing file from bucket: {bucket}, key: {key}")
28-
s3 = boto3.client('s3')
29-
print(f"Connecting to S3 bucket: {bucket}")
30-
obj = s3.get_object(Bucket=bucket, Key=key)
31-
stream = io.BytesIO(obj['Body'].read())
32-
articles = extract_articles(stream)
33-
print(f"Extracted {len(articles)} articles from part")
34-
for article in articles:
35-
print(f"Processing article: {article['Title']}")
36-
# Check if article is relevant
37-
is_relevant = is_relevance(article)
38-
if not is_relevant:
39-
print(f"Article {article['Title']} is not relevant, skipping")
40-
continue
41-
output_csv = io.StringIO()
42-
writer = csv.DictWriter(output_csv, fieldnames=["Title", "Source", "Date", "Content"])
43-
# writer.writeheader()
44-
writer.writerow(article)
45-
article_id = str(uuid.uuid4())
46-
# Generate unique filename
47-
csv_filename = f"input/articles-{article_id}.csv"
48-
cursor.execute("""
49-
INSERT INTO articles (article_id, title, body, source, published_date)
50-
VALUES (%s, %s, %s, %s, %s)""", (article_id, article['Title'], article['Content'], article['Source'], article['Date']))
51-
# Upload to S3
52-
print(f"Uploading CSV to S3: {csv_filename}")
53-
conn.commit()
54-
get_data_inline(output_csv.getvalue(), article_id, article['Date'], comprehend, cursor, conn)
55-
cursor.close()
56-
conn.close()
20+
# conn = get_postgresql_connection()
21+
# cursor = conn.cursor()
22+
# comprehend = boto3.client('comprehend', region_name='us-east-1')
23+
# for record in event['Records']:
24+
# print(f"New record: {record}")
25+
# bucket = record['s3']['bucket']['name']
26+
# key = record['s3']['object']['key']
27+
# print(f"Processing file from bucket: {bucket}, key: {key}")
28+
# s3 = boto3.client('s3')
29+
# print(f"Connecting to S3 bucket: {bucket}")
30+
# obj = s3.get_object(Bucket=bucket, Key=key)
31+
# stream = io.BytesIO(obj['Body'].read())
32+
# articles = extract_articles(stream)
33+
# print(f"Extracted {len(articles)} articles from part")
34+
# for article in articles:
35+
# print(f"Processing article: {article['Title']}")
36+
# # Check if article is relevant
37+
# is_relevant = is_relevance(article)
38+
# if not is_relevant:
39+
# print(f"Article {article['Title']} is not relevant, skipping")
40+
# continue
41+
# output_csv = io.StringIO()
42+
# writer = csv.DictWriter(output_csv, fieldnames=["Title", "Source", "Date", "Content"])
43+
# # writer.writeheader()
44+
# writer.writerow(article)
45+
# article_id = str(uuid.uuid4())
46+
# # Generate unique filename
47+
# csv_filename = f"input/articles-{article_id}.csv"
48+
# cursor.execute("""
49+
# INSERT INTO articles (article_id, title, body, source, published_date)
50+
# VALUES (%s, %s, %s, %s, %s)""", (article_id, article['Title'], article['Content'], article['Source'], article['Date']))
51+
# # Upload to S3
52+
# print(f"Uploading CSV to S3: {csv_filename}")
53+
# conn.commit()
54+
# get_data_inline(output_csv.getvalue(), article_id, article['Date'], comprehend, cursor, conn)
55+
# cursor.close()
56+
# conn.close()
5757
lambda_client = boto3.client('lambda')
5858
response = lambda_client.invoke(
5959
FunctionName='clustering_service',
@@ -68,14 +68,12 @@ def get_data_inline(data, articles_id, article_date, comprehend, cursor, conn):
6868
print(f"Processing data for article ID: {articles_id}")
6969
entities_response = comprehend.detect_entities(
7070
Text=data,
71-
# DataAccessRoleArn=role_arn,
7271
LanguageCode='en'
7372
)
7473
print(f"Entities detected: {entities_response['Entities']}")
7574
add_entities_to_article(conn, cursor, articles_id, entities_response['Entities'])
7675
response = comprehend.detect_key_phrases(
7776
Text=data,
78-
# DataAccessRoleArn=role_arn,
7977
LanguageCode='en'
8078
)
8179
print(f"Key phrases detected: {response['KeyPhrases']}")
@@ -84,7 +82,6 @@ def get_data_inline(data, articles_id, article_date, comprehend, cursor, conn):
8482
add_keyphrase_to_article(conn, cursor, articles_id, article_date, response['KeyPhrases'])
8583
sentiment_response = comprehend.detect_sentiment(
8684
Text=data,
87-
# DataAccessRoleArn=role_arn,
8885
LanguageCode='en'
8986
)
9087
print(f"Sentiment detected: {sentiment_response['Sentiment']}")

0 commit comments

Comments
 (0)