Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion document_retriever_service/retriever_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def __init__(self, parser):
self.solr_password=os.getenv("SOLR_PASSWORD")

self.solr_retriever_query_params = {
'q': '*:*',
'rows': SOLR_TOTAL_ROWS,
'wt': 'json'
}
Expand Down
6 changes: 5 additions & 1 deletion document_retriever_service/retriever_services_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from catalog_metadata.catalog_metadata import CatalogItemMetadata, CatalogRecordMetadata
from ht_queue_service.queue_producer import QueueProducer
from ht_utils.ht_logger import get_ht_logger
Expand All @@ -15,7 +17,9 @@ def publish_document(queue_producer: QueueProducer, content: dict = None):
:param content: dict with the content of the message
"""
message = content
logger.info(f"Sending message with id {content.get('ht_id')} to queue {queue_producer.queue_name}")
entry_data = json.dumps(message)
entry_size = len(entry_data.encode('utf-8')) # Convert to bytes and get length
logger.info(f"Sending message with id {content.get('ht_id')} and Size={entry_size} bytes to queue {queue_producer.queue_name}")
queue_producer.publish_messages(message)

@staticmethod
Expand Down
17 changes: 16 additions & 1 deletion ht_queue_service/queue_connection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
import pika
MAX_DOCUMENT_IN_QUEUE = 30000 # 30k is the maximum number of messages in the queue

# Calculate the maximum number of messages in the queue

# The average size of a document_generator message is 1.8 MB
# The average size of a document_retriever message is 0.0132 MB

# The total disk space of the RabbitMQ server is 50 GB.
# 1 GB = 1024 MB, so 50 GB = 50 * 1024 MB = 51,200 MB.

# Let's calculate using 90% of the total disk space 51,200 MB * 0.90 = 46,080 MB

# The maximum number of document_generator messages in the queue is 46,080 MB / 1.8 MB = 25,600 messages
# The maximum number of document_retriever messages in the queue is 46,080 MB / 0.0132 MB = 3,487,878 messages

# To set the maximum number of messages in the retriever queue, I'll set it to 500,000 messages
MAX_DOCUMENT_IN_QUEUE = 200000 # 200000 is the maximum number of messages in the retriever queue

class QueueConnection:

Expand Down