-
-
Notifications
You must be signed in to change notification settings - Fork 218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Enterprise CommCare Client Version Compliance Report #35468
base: master
Are you sure you want to change the base?
Changes from all commits
52520df
52b62e7
4c2c923
2ee9d14
5609175
94f54c6
9cc4bda
7338fcc
36138f7
0512997
9dc9a81
344a76c
1aec926
180eb0c
b8cb113
b494264
f38e1c3
fcf3c61
d72d426
b1a6545
cd61d41
57c489a
c03d909
5470ea5
4bc93c8
1c1abb6
0d85d21
34e358b
88252a0
8b75af1
c0fd790
05f6058
adb7d81
4a81844
b4d59b4
d3da1d7
1cc3178
7599675
44d9db5
bb87820
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from django.test import SimpleTestCase | ||
from corehq.apps.builds.utils import is_out_of_date | ||
|
||
|
||
class TestVersionUtils(SimpleTestCase): | ||
|
||
def test_is_out_of_date(self): | ||
test_cases = [ | ||
# (version_in_use, latest_version, expected_result) | ||
('2.53.0', '2.53.1', True), # Normal case - out of date | ||
('2.53.1', '2.53.1', False), # Same version - not out of date | ||
('2.53.2', '2.53.1', False), # Higher version - not out of date | ||
(None, '2.53.1', False), # None version_in_use | ||
('2.53.1', None, False), # None latest_version | ||
('invalid', '2.53.1', False), # Invalid version string | ||
('2.53.1', 'invalid', False), # Invalid latest version | ||
('6', '7', True), # Normal case - app version is integer | ||
(None, None, False), # None version_in_use and latest_version | ||
] | ||
|
||
for version_in_use, latest_version, expected in test_cases: | ||
with self.subTest(version_in_use=version_in_use, latest_version=latest_version): | ||
result = is_out_of_date(version_in_use, latest_version) | ||
self.assertEqual( | ||
result, | ||
expected, | ||
f"Expected is_out_of_date('{version_in_use}', '{latest_version}') to be {expected}" | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,26 +2,38 @@ | |
import re | ||
from django.db.models import Count | ||
from datetime import datetime, timedelta | ||
from functools import partial | ||
from gevent.pool import Pool | ||
|
||
from django.conf import settings | ||
from dimagi.utils.chunked import chunked | ||
from dimagi.utils.parsing import ISO_DATETIME_FORMAT | ||
from django.utils.translation import gettext as _ | ||
from django.utils.translation import gettext_lazy | ||
from django.conf import settings | ||
|
||
from memoized import memoized | ||
|
||
from couchforms.analytics import get_last_form_submission_received | ||
from dimagi.utils.dates import DateSpan | ||
|
||
from corehq.apps.enterprise.exceptions import EnterpriseReportError, TooMuchRequestedDataError | ||
from corehq.apps.enterprise.iterators import raise_after_max_elements | ||
from corehq.apps.accounting.models import BillingAccount | ||
from corehq.apps.accounting.utils import get_default_domain_url | ||
from corehq.apps.app_manager.dbaccessors import get_brief_apps_in_domain | ||
from corehq.apps.builds.utils import get_latest_version_at_time, is_out_of_date | ||
from corehq.apps.builds.models import CommCareBuildConfig | ||
from corehq.apps.domain.calculations import sms_in_last | ||
from corehq.apps.domain.models import Domain | ||
from corehq.apps.enterprise.exceptions import ( | ||
EnterpriseReportError, | ||
TooMuchRequestedDataError, | ||
) | ||
from corehq.apps.enterprise.iterators import raise_after_max_elements | ||
from corehq.apps.es import forms as form_es | ||
from corehq.apps.es.users import UserES | ||
from corehq.apps.export.dbaccessors import ODataExportFetcher | ||
from corehq.apps.reports.util import ( | ||
get_commcare_version_and_date_from_last_usage, | ||
) | ||
from corehq.apps.sms.models import SMS, OUTGOING, INCOMING | ||
from corehq.apps.users.dbaccessors import ( | ||
get_all_user_rows, | ||
|
@@ -30,13 +42,16 @@ | |
) | ||
from corehq.apps.users.models import CouchUser, Invitation | ||
|
||
ES_CLIENT_CONNECTION_POOL_LIMIT = 10 # Matches ES client connection pool limit | ||
|
||
|
||
class EnterpriseReport(ABC): | ||
DOMAINS = 'domains' | ||
WEB_USERS = 'web_users' | ||
MOBILE_USERS = 'mobile_users' | ||
FORM_SUBMISSIONS = 'form_submissions' | ||
ODATA_FEEDS = 'odata_feeds' | ||
COMMCARE_VERSION_COMPLIANCE = 'commcare_version_compliance' | ||
SMS = 'sms' | ||
|
||
DATE_ROW_FORMAT = '%Y/%m/%d %H:%M:%S' | ||
|
@@ -81,6 +96,8 @@ def create(cls, slug, account_id, couch_user, **kwargs): | |
report = EnterpriseFormReport(account, couch_user, **kwargs) | ||
elif slug == cls.ODATA_FEEDS: | ||
report = EnterpriseODataReport(account, couch_user, **kwargs) | ||
elif slug == cls.COMMCARE_VERSION_COMPLIANCE: | ||
report = EnterpriseCommCareVersionReport(account, couch_user, **kwargs) | ||
elif slug == cls.SMS: | ||
report = EnterpriseSMSReport(account, couch_user, **kwargs) | ||
|
||
|
@@ -405,6 +422,94 @@ def _get_individual_export_rows(self, exports, export_line_counts): | |
return rows | ||
|
||
|
||
class EnterpriseCommCareVersionReport(EnterpriseReport): | ||
title = gettext_lazy('CommCare Version Compliance') | ||
total_description = gettext_lazy('% of Mobile Workers on the Latest CommCare Version') | ||
|
||
@property | ||
def headers(self): | ||
return [ | ||
_('Mobile Worker'), | ||
_('Project Space'), | ||
_('Latest Version Available at Submission'), | ||
_('Version in Use'), | ||
] | ||
|
||
@property | ||
def rows(self): | ||
config = CommCareBuildConfig.fetch() | ||
partial_func = partial(self.rows_for_domain, config) | ||
return _process_domains_in_parallel(partial_func, self.account.get_domains()) | ||
|
||
@property | ||
def total(self): | ||
total_mobile_workers = 0 | ||
total_up_to_date = 0 | ||
config = CommCareBuildConfig.fetch() | ||
|
||
def total_for_domain(domain): | ||
mobile_workers = get_mobile_user_count(domain, include_inactive=False) | ||
outdated_users = len(self.rows_for_domain(domain, config)) | ||
return mobile_workers, outdated_users | ||
|
||
results = _process_domains_in_parallel(total_for_domain, self.account.get_domains()) | ||
|
||
for domain_mobile_workers, outdated_users in results: | ||
if domain_mobile_workers: | ||
total_mobile_workers += domain_mobile_workers | ||
total_up_to_date += domain_mobile_workers - outdated_users | ||
|
||
return _format_percentage_for_enterprise_tile(total_up_to_date, total_mobile_workers) | ||
|
||
def rows_for_domain(self, domain, config): | ||
rows = [] | ||
|
||
user_query = (UserES() | ||
.domain(domain) | ||
.mobile_users() | ||
.source([ | ||
'username', | ||
'reporting_metadata.last_submission_for_user.commcare_version', | ||
'reporting_metadata.last_submission_for_user.submission_date', | ||
'last_device.commcare_version', | ||
'last_device.last_used' | ||
])) | ||
|
||
for user in user_query.run().hits: | ||
last_submission = user.get('reporting_metadata', {}).get('last_submission_for_user', {}) | ||
last_device = user.get('last_device', {}) | ||
|
||
version_in_use, date_of_use = get_commcare_version_and_date_from_last_usage(last_submission, | ||
last_device) | ||
|
||
if not version_in_use: | ||
continue | ||
|
||
# Remove seconds and microseconds to reduce the number of unique timestamps | ||
# This helps with performance because get_latest_version_at_time is memoized | ||
if isinstance(date_of_use, str): | ||
date_of_use = datetime.strptime(date_of_use, ISO_DATETIME_FORMAT) | ||
date_of_use_minute_precision = date_of_use.replace(second=0, microsecond=0) | ||
|
||
latest_version_at_time_of_use = get_latest_version_at_time(config, date_of_use_minute_precision) | ||
|
||
if is_out_of_date(version_in_use, latest_version_at_time_of_use): | ||
rows.append([ | ||
user['username'], | ||
domain, | ||
latest_version_at_time_of_use, | ||
version_in_use, | ||
]) | ||
|
||
return rows | ||
|
||
|
||
def _format_percentage_for_enterprise_tile(dividend, divisor): | ||
if not divisor: | ||
return '--' | ||
return f"{dividend / divisor * 100:.1f}%" | ||
|
||
|
||
class EnterpriseSMSReport(EnterpriseReport): | ||
title = gettext_lazy('SMS Usage') | ||
total_description = gettext_lazy('# of SMS Sent') | ||
|
@@ -464,3 +569,18 @@ def rows_for_domain(self, domain_obj): | |
num_errors = sum([result['direction_count'] for result in results if result['error']]) | ||
|
||
return [(domain_obj.name, num_sent, num_received, num_errors), ] | ||
|
||
|
||
def _process_domains_in_parallel(process_func, domains): | ||
""" | ||
Helper method to process domains in parallel using gevent pooling. | ||
""" | ||
results = [] | ||
|
||
for chunk in chunked(domains, ES_CLIENT_CONNECTION_POOL_LIMIT): | ||
pool = Pool(size=ES_CLIENT_CONNECTION_POOL_LIMIT) | ||
chunk_results = pool.map(process_func, chunk) | ||
pool.join() | ||
results.extend(chunk_results) | ||
|
||
return results | ||
Comment on lines
+574
to
+586
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious if this is worth doing. I'm assuming that ElasticSearch has a connection limit across machines, and so while we can attempt to use all of them to process a single request, and that request will now be faster, I'd imagine it comes at the expense of any other requests trying to use ElasticSearch. Specifically for computing Enterprise Tile totals, we are going to be running multiple queries in parallel already, as loading that console page triggers multiple tile total requests. If each of them tries to max out their elasticsearch connections, it seems like we're basically back to running operations in sequence, rather than in parallel? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain why it is safe to remove seconds and microseconds from
date_of_use
? Admittedly its an edge case, but it seems possible for us to have multiple builds that differ only by seconds or milliseconds. Removing those would then give inaccurate informationThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seconds and microseconds are removed from the form submission time, not our build. So the result is, for example, if we release a new version
Version 3.3
at 13:10:30, a mobile worker submit a form usingVersion 3.2
in 13:10:40, after removing seconds, it is become 13:10:00, the latest version at 13:10:00 is stillVersion 3.2
, so this mobile worker won't be considered as using the out-of-date commcare version, although technically he is using out-of-date commcare version. I'm sacrificing a bit accuracy for caching the result ofget_latest_version_at_time
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we choose to go forward with this approach, I'd want it documented that we're reducing the accuracy of results in order to improve performance. I'm not sure reducing the accuracy of report data for the sake of a quicker summary makes sense. However, can we avoid this entirely now? It looks like the main bottlenecks for this function were fetching the config and build time, but both of those things have now been moved into their own areas. The config is only fetched once, outside of
get_latest_version_at_time
, andget_build_time
is now cached. So I don't think we need to do any caching onget_latest_version_at_time
anymore. At least on staging, our config has 65 items. I don't think it should be expensive to iterate backward through those items until we find a build time less than our target.