From ab69f3dbcc7a2cf7d2421ace0ee3802c6f8441bb Mon Sep 17 00:00:00 2001 From: Valentin Kuznetsov Date: Mon, 15 Apr 2024 12:23:53 -0400 Subject: [PATCH 1/2] Check WMArchive doc size and cut off big docs --- .../ArchiveDataReporter/ArchiveDataPoller.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py b/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py index 9efcaa3297..67ec72fef2 100644 --- a/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py +++ b/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py @@ -7,6 +7,7 @@ import traceback from Utils.IteratorTools import grouper from Utils.Timers import timeFunction +from Utils.Utilities import getSize from WMCore.WorkerThreads.BaseWorkerThread import BaseWorkerThread from WMCore.Services.WMArchive.DataMap import createArchiverDoc from WMCore.Services.WMArchive.WMArchive import WMArchive @@ -24,6 +25,8 @@ def __init__(self, config): """ BaseWorkerThread.__init__(self) self.config = config + # setup size threshold to fit CMSWEB nginx/frontend, i.e. 8MB + self.sizeThreshold = self.config.get('sizeThredhold', 8*1024*1024) def setup(self, parameters): """ @@ -51,8 +54,17 @@ def algorithm(self, parameters): archiveDocs = [] for job in slicedData: doc = createArchiverDoc(job) - archiveDocs.append(doc) - jobIDs.append(job["id"]) + # check document size before accepting to send to WMArchive service + size = getSize(doc) + if size > self.sizeThreshold: + shortDoc = {'id': doc['id'], + 'fwjr': doc['doc']['fwjr'], + 'jobtype': doc['doc']['jobtype'], + 'jobstate': doc['doc']['jobstate']} + logging.warning("Created document is too large for WMArchive, size=%s thredshold=%s, document slice=%s", size, self.sizeThreshold, shortDoc) + else: + archiveDocs.append(doc) + jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiveDocs) From 4045e21e92ed6fed6e0ff2e2bbed7e7925ccc6e9 Mon Sep 17 00:00:00 2001 From: Valentin Kuznetsov Date: Wed, 17 Apr 2024 09:41:55 -0400 Subject: [PATCH 2/2] Put sizeThreshold under ArchiveDataReporter config section --- src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py b/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py index 67ec72fef2..3133ab7c1b 100644 --- a/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py +++ b/src/python/WMComponent/ArchiveDataReporter/ArchiveDataPoller.py @@ -26,7 +26,7 @@ def __init__(self, config): BaseWorkerThread.__init__(self) self.config = config # setup size threshold to fit CMSWEB nginx/frontend, i.e. 8MB - self.sizeThreshold = self.config.get('sizeThredhold', 8*1024*1024) + self.sizeThreshold = getattr(config.ArchiveDataReporter, "sizeThreshold", 8*1024*1024) def setup(self, parameters): """