Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Force async processing for large data sources on restricted domains #35465

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions corehq/apps/userreports/tests/test_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from corehq.apps.userreports.reports.view import ConfigurableReportView
from corehq.apps.userreports.util import get_indicator_adapter
from corehq.apps.userreports.views import (
_number_of_records_to_be_iterated_for_rebuild,
number_of_records_to_be_processed,
)
from corehq.apps.users.models import HQApiKey, HqPermissions, UserRole, WebUser
from corehq.form_processor.models import CommCareCase
Expand Down Expand Up @@ -500,7 +500,7 @@ def _send_data_source_rebuild_request(self):
return self.client.post(path)

def test_number_of_records_to_be_iterated_for_rebuild(self):
number_of_cases = _number_of_records_to_be_iterated_for_rebuild(self.data_source_config)
number_of_cases = number_of_records_to_be_processed(self.data_source_config)
self.assertEqual(number_of_cases, 3)

def test_feature_flag(self):
Expand Down Expand Up @@ -533,8 +533,7 @@ def test_blocked_rebuild_for_restricted_data_source(self):
'Rebuilt was not initiated due to high number of records this data source is expected to '
'iterate during a rebuild. Expected records to be processed is currently 3 '
'which is above the limit of 2. '
'Please consider creating a new data source instead or reach out to support if '
'you need to rebuild this data source.'
'Please update the data source to have asynchronous processing.'
)
)

Expand Down
17 changes: 17 additions & 0 deletions corehq/apps/userreports/ui/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from corehq.apps.domain.models import AllowedUCRExpressionSettings
from corehq.apps.hqwebapp import crispy as hqcrispy
from corehq.apps.hqwebapp.widgets import BootstrapCheckboxInput
from corehq.apps.userreports.const import DATA_SOURCE_REBUILD_RESTRICTED_AT
from corehq.apps.userreports.models import guess_data_source_type
from corehq.apps.userreports.ui import help_text
from corehq.apps.userreports.ui.fields import JsonField, ReportDataSourceField
Expand Down Expand Up @@ -266,8 +267,24 @@ def clean(self):
if settings.DEBUG:
raise
raise ValidationError(_('Problem with data source spec: {}').format(e))
self._check_for_asynchronous(config)
return cleaned_data

def _check_for_asynchronous(self, config):
from corehq.apps.userreports.views import number_of_records_to_be_processed

if not config.asynchronous and toggles.RESTRICT_DATA_SOURCE_REBUILD.enabled(self.domain):
number_of_records = number_of_records_to_be_processed(
datasource_configuration=config
)
if number_of_records and number_of_records > DATA_SOURCE_REBUILD_RESTRICTED_AT:
self.add_error(
'asynchronous',
_('This data source covers more than {record_limit} records. '
'Please mark it for asynchronous processing for effective building/rebuilding'
).format(record_limit=f"{DATA_SOURCE_REBUILD_RESTRICTED_AT:,}")
)

def save(self, commit=False):
self.instance.meta.build.finished = False
self.instance.meta.build.initiated = None
Expand Down
13 changes: 7 additions & 6 deletions corehq/apps/userreports/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,8 @@ def post(self, request, *args, **kwargs):
return HttpResponseRedirect(reverse(
EditDataSourceView.urlname, args=[self.domain, config._id])
)
else:
messages.error(request, _('Data source not saved!'))
except BadSpecError as e:
messages.error(request, str(e))
Copy link
Contributor

@zandre-eng zandre-eng Dec 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't part of your changes so feel free to ignore, but it seems we are returning an exception message back to the user here. Might be good to confirm that this is simply returning a general message and not any sort of stack trace.

return self.get(request, *args, **kwargs)
Expand Down Expand Up @@ -1322,8 +1324,8 @@ def undelete_data_source(request, domain, config_id):
def rebuild_data_source(request, domain, config_id):
config, is_static = get_datasource_config_or_404(config_id, domain)

if toggles.RESTRICT_DATA_SOURCE_REBUILD.enabled(domain):
number_of_records = _number_of_records_to_be_iterated_for_rebuild(datasource_configuration=config)
if not config.asynchronous and toggles.RESTRICT_DATA_SOURCE_REBUILD.enabled(domain):
number_of_records = number_of_records_to_be_processed(datasource_configuration=config)
if number_of_records and number_of_records > DATA_SOURCE_REBUILD_RESTRICTED_AT:
messages.error(
request,
Expand All @@ -1350,7 +1352,7 @@ def rebuild_data_source(request, domain, config_id):
))


def _number_of_records_to_be_iterated_for_rebuild(datasource_configuration):
def number_of_records_to_be_processed(datasource_configuration):
if datasource_configuration.referenced_doc_type == 'CommCareCase':
es_query = CaseSearchES().domain(datasource_configuration.domain)
case_types = [
Expand Down Expand Up @@ -1384,11 +1386,10 @@ def _error_message_for_restricting_rebuild(number_of_records_to_be_iterated):
'Rebuilt was not initiated due to high number of records this data source is expected to '
'iterate during a rebuild. Expected records to be processed is currently {number_of_records} '
'which is above the limit of {rebuild_limit}. '
'Please consider creating a new data source instead or reach out to support if '
'you need to rebuild this data source.'
'Please update the data source to have asynchronous processing.'
).format(
number_of_records=number_of_records_to_be_iterated,
rebuild_limit=DATA_SOURCE_REBUILD_RESTRICTED_AT
rebuild_limit=f"{DATA_SOURCE_REBUILD_RESTRICTED_AT:,}"
)


Expand Down
Loading