1717role = 'arn:aws:iam::269854564686:role/hackathon-comprehend-role'
1818def lambda_handler (event , context ):
1919 try :
20- conn = get_postgresql_connection ()
21- cursor = conn .cursor ()
22- comprehend = boto3 .client ('comprehend' , region_name = 'us-east-1' )
23- for record in event ['Records' ]:
24- print (f"New record: { record } " )
25- bucket = record ['s3' ]['bucket' ]['name' ]
26- key = record ['s3' ]['object' ]['key' ]
27- print (f"Processing file from bucket: { bucket } , key: { key } " )
28- s3 = boto3 .client ('s3' )
29- print (f"Connecting to S3 bucket: { bucket } " )
30- obj = s3 .get_object (Bucket = bucket , Key = key )
31- stream = io .BytesIO (obj ['Body' ].read ())
32- articles = extract_articles (stream )
33- print (f"Extracted { len (articles )} articles from part" )
34- for article in articles :
35- print (f"Processing article: { article ['Title' ]} " )
36- # Check if article is relevant
37- is_relevant = is_relevance (article )
38- if not is_relevant :
39- print (f"Article { article ['Title' ]} is not relevant, skipping" )
40- continue
41- output_csv = io .StringIO ()
42- writer = csv .DictWriter (output_csv , fieldnames = ["Title" , "Source" , "Date" , "Content" ])
43- # writer.writeheader()
44- writer .writerow (article )
45- article_id = str (uuid .uuid4 ())
46- # Generate unique filename
47- csv_filename = f"input/articles-{ article_id } .csv"
48- cursor .execute ("""
49- INSERT INTO articles (article_id, title, body, source, published_date)
50- VALUES (%s, %s, %s, %s, %s)""" , (article_id , article ['Title' ], article ['Content' ], article ['Source' ], article ['Date' ]))
51- # Upload to S3
52- print (f"Uploading CSV to S3: { csv_filename } " )
53- conn .commit ()
54- get_data_inline (output_csv .getvalue (), article_id , article ['Date' ], comprehend , cursor , conn )
55- cursor .close ()
56- conn .close ()
20+ # conn = get_postgresql_connection()
21+ # cursor = conn.cursor()
22+ # comprehend = boto3.client('comprehend', region_name='us-east-1')
23+ # for record in event['Records']:
24+ # print(f"New record: {record}")
25+ # bucket = record['s3']['bucket']['name']
26+ # key = record['s3']['object']['key']
27+ # print(f"Processing file from bucket: {bucket}, key: {key}")
28+ # s3 = boto3.client('s3')
29+ # print(f"Connecting to S3 bucket: {bucket}")
30+ # obj = s3.get_object(Bucket=bucket, Key=key)
31+ # stream = io.BytesIO(obj['Body'].read())
32+ # articles = extract_articles(stream)
33+ # print(f"Extracted {len(articles)} articles from part")
34+ # for article in articles:
35+ # print(f"Processing article: {article['Title']}")
36+ # # Check if article is relevant
37+ # is_relevant = is_relevance(article)
38+ # if not is_relevant:
39+ # print(f"Article {article['Title']} is not relevant, skipping")
40+ # continue
41+ # output_csv = io.StringIO()
42+ # writer = csv.DictWriter(output_csv, fieldnames=["Title", "Source", "Date", "Content"])
43+ # # writer.writeheader()
44+ # writer.writerow(article)
45+ # article_id = str(uuid.uuid4())
46+ # # Generate unique filename
47+ # csv_filename = f"input/articles-{article_id}.csv"
48+ # cursor.execute("""
49+ # INSERT INTO articles (article_id, title, body, source, published_date)
50+ # VALUES (%s, %s, %s, %s, %s)""", (article_id, article['Title'], article['Content'], article['Source'], article['Date']))
51+ # # Upload to S3
52+ # print(f"Uploading CSV to S3: {csv_filename}")
53+ # conn.commit()
54+ # get_data_inline(output_csv.getvalue(), article_id, article['Date'], comprehend, cursor, conn)
55+ # cursor.close()
56+ # conn.close()
5757 lambda_client = boto3 .client ('lambda' )
5858 response = lambda_client .invoke (
5959 FunctionName = 'clustering_service' ,
@@ -68,14 +68,12 @@ def get_data_inline(data, articles_id, article_date, comprehend, cursor, conn):
6868 print (f"Processing data for article ID: { articles_id } " )
6969 entities_response = comprehend .detect_entities (
7070 Text = data ,
71- # DataAccessRoleArn=role_arn,
7271 LanguageCode = 'en'
7372 )
7473 print (f"Entities detected: { entities_response ['Entities' ]} " )
7574 add_entities_to_article (conn , cursor , articles_id , entities_response ['Entities' ])
7675 response = comprehend .detect_key_phrases (
7776 Text = data ,
78- # DataAccessRoleArn=role_arn,
7977 LanguageCode = 'en'
8078 )
8179 print (f"Key phrases detected: { response ['KeyPhrases' ]} " )
@@ -84,7 +82,6 @@ def get_data_inline(data, articles_id, article_date, comprehend, cursor, conn):
8482 add_keyphrase_to_article (conn , cursor , articles_id , article_date , response ['KeyPhrases' ])
8583 sentiment_response = comprehend .detect_sentiment (
8684 Text = data ,
87- # DataAccessRoleArn=role_arn,
8885 LanguageCode = 'en'
8986 )
9087 print (f"Sentiment detected: { sentiment_response ['Sentiment' ]} " )
0 commit comments