diff --git a/corehq/apps/enterprise/enterprise.py b/corehq/apps/enterprise/enterprise.py index 63609afbcbc3..f84bdeca875c 100644 --- a/corehq/apps/enterprise/enterprise.py +++ b/corehq/apps/enterprise/enterprise.py @@ -3,13 +3,15 @@ from django.utils.translation import gettext as _ from django.utils.translation import gettext_lazy +from django.conf import settings from memoized import memoized from couchforms.analytics import get_last_form_submission_received from dimagi.utils.dates import DateSpan -from corehq.apps.enterprise.exceptions import EnterpriseReportError +from corehq.apps.enterprise.exceptions import EnterpriseReportError, TooMuchRequestedDataError +from corehq.apps.enterprise.iterators import raise_after_max_elements from corehq.apps.accounting.models import BillingAccount from corehq.apps.accounting.utils import get_default_domain_url from corehq.apps.app_manager.dbaccessors import get_brief_apps_in_domain @@ -209,6 +211,8 @@ def total_for_domain(self, domain_obj): class EnterpriseFormReport(EnterpriseReport): title = _('Mobile Form Submissions') + MAXIMUM_USERS_PER_DOMAIN = getattr(settings, 'ENTERPRISE_REPORT_DOMAIN_USER_LIMIT', 20_000) + MAXIMUM_ROWS_PER_REQUEST = getattr(settings, 'ENTERPRISE_REPORT_ROW_LIMIT', 1_000_000) def __init__(self, account, couch_user, start_date=None, end_date=None, num_days=30, include_form_id=False): super().__init__(account, couch_user) @@ -245,23 +249,56 @@ def headers(self): def _query(self, domain_name): time_filter = form_es.submitted - users_filter = form_es.user_id(UserES().domain(domain_name).mobile_users().show_inactive() - .values_list('_id', flat=True)) + users_filter = form_es.user_id( + UserES().domain(domain_name).mobile_users().show_inactive().size(self.MAXIMUM_USERS_PER_DOMAIN + 1) + .values_list('_id', flat=True) + ) - query = (form_es.FormES() - .domain(domain_name) - .filter(time_filter(gte=self.datespan.startdate, - lt=self.datespan.enddate_adjusted)) - .filter(users_filter)) + if len(users_filter) > self.MAXIMUM_USERS_PER_DOMAIN: + raise TooMuchRequestedDataError( + _('Domain {name} has too many users. Maximum allowed is: {amount}') + .format(name=domain_name, amount=self.MAXIMUM_USERS_PER_DOMAIN) + ) + + query = ( + form_es.FormES() + .domain(domain_name) + .filter(time_filter(gte=self.datespan.startdate, lt=self.datespan.enddate_adjusted)) + .filter(users_filter) + ) return query def hits(self, domain_name): - return self._query(domain_name).run().hits + return raise_after_max_elements( + self._query(domain_name).scroll(), + self.MAXIMUM_ROWS_PER_REQUEST, + self._generate_data_error() + ) + + def _generate_data_error(self): + return TooMuchRequestedDataError( + _('{name} contains too many rows. Maximum allowed is: {amount}. Please narrow the date range' + ' to fetch a smaller amount of data').format( + name=self.account.name, amount=self.MAXIMUM_ROWS_PER_REQUEST) + ) + + @property + def rows(self): + total_rows = 0 + rows = [] + for domain_obj in self.domains(): + domain_rows = self.rows_for_domain(domain_obj) + total_rows += len(domain_rows) + if total_rows > self.MAXIMUM_ROWS_PER_REQUEST: + raise self._generate_data_error() + rows += domain_rows + return rows def rows_for_domain(self, domain_obj): apps = get_brief_apps_in_domain(domain_obj.name) apps = {a.id: a.name for a in apps} rows = [] + for hit in self.hits(domain_obj.name): if hit['form'].get('#type') == 'system': continue diff --git a/corehq/apps/enterprise/exceptions.py b/corehq/apps/enterprise/exceptions.py index 2f3f4f1585cc..bf25e90bdad3 100644 --- a/corehq/apps/enterprise/exceptions.py +++ b/corehq/apps/enterprise/exceptions.py @@ -1,2 +1,6 @@ class EnterpriseReportError(Exception): pass + + +class TooMuchRequestedDataError(Exception): + pass diff --git a/corehq/apps/enterprise/iterators.py b/corehq/apps/enterprise/iterators.py new file mode 100644 index 000000000000..1fa96c54c132 --- /dev/null +++ b/corehq/apps/enterprise/iterators.py @@ -0,0 +1,7 @@ +def raise_after_max_elements(it, max_elements, exception=None): + for total_yielded, ele in enumerate(it): + if total_yielded >= max_elements: + exception = exception or Exception('Too Many Elements') + raise exception + + yield ele diff --git a/corehq/apps/enterprise/tasks.py b/corehq/apps/enterprise/tasks.py index 05b2980254cc..da671e449da7 100644 --- a/corehq/apps/enterprise/tasks.py +++ b/corehq/apps/enterprise/tasks.py @@ -15,6 +15,7 @@ EnterpriseMobileWorkerSettings, EnterprisePermissions, ) +from corehq.apps.enterprise.exceptions import TooMuchRequestedDataError from corehq.apps.hqwebapp.tasks import send_html_email_async from corehq.apps.users.models import DeactivateMobileWorkerTrigger from corehq.const import ONE_DAY @@ -32,7 +33,19 @@ def email_enterprise_report(domain: str, slug, couch_user): csv_file = io.StringIO() writer = csv.writer(csv_file) writer.writerow(report.headers) - writer.writerows(report.rows) + try: + writer.writerows(report.rows) + except TooMuchRequestedDataError as e: + subject = _("Enterprise Dashboard Error: {}").format(report.title) + body = str(e) + send_html_email_async( + subject, + couch_user.get_email(), + body, + domain=domain, + use_domain_gateway=True, + ) + return # Store file in redis hash_id = uuid.uuid4().hex diff --git a/corehq/apps/enterprise/tests/test_iterators.py b/corehq/apps/enterprise/tests/test_iterators.py new file mode 100644 index 000000000000..3ffc80c12a7c --- /dev/null +++ b/corehq/apps/enterprise/tests/test_iterators.py @@ -0,0 +1,19 @@ +from django.test import SimpleTestCase + +from corehq.apps.enterprise.iterators import raise_after_max_elements + + +class TestRaiseAfterMaxElements(SimpleTestCase): + def test_iterating_beyond_max_items_will_raise_the_default_exception(self): + it = raise_after_max_elements([1, 2, 3], 2) + with self.assertRaisesMessage(Exception, 'Too Many Elements'): + list(it) + + def test_iterating_beyond_max_items_will_raise_provided_exception(self): + it = raise_after_max_elements([1, 2, 3], 2, Exception('Test Message')) + with self.assertRaisesMessage(Exception, 'Test Message'): + list(it) + + def test_can_iterate_through_all_elements_with_no_exception(self): + it = raise_after_max_elements([1, 2, 3], 3) + self.assertEqual(list(it), [1, 2, 3])