Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Couch to SQL forms & cases: add system for encoding diffs in patch forms #27327

Merged
merged 5 commits into from
Apr 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 57 additions & 11 deletions corehq/apps/couch_sql_migration/casediff.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import logging
from collections import defaultdict
from contextlib import contextmanager
from functools import partial
from xml.sax.saxutils import unescape

import attr

Expand Down Expand Up @@ -32,6 +34,7 @@
from corehq.util.metrics import metrics_counter

from .diff import filter_case_diffs, filter_ledger_diffs
from .diffrule import ANY
from .rebuildcase import rebuild_and_diff_cases
from .statedb import Change
from .util import retry_on_sql_error
Expand Down Expand Up @@ -115,7 +118,7 @@ def diff(couch_json, sql_json):
return couch_case, diffs, changes
diffs = diff(couch_case, sql_json)
if diffs:
if is_case_patched(diffs):
if is_case_patched(case_id, diffs):
return couch_case, [], []
form_diffs = diff_case_forms(couch_case, sql_json)
if form_diffs:
Expand Down Expand Up @@ -312,15 +315,58 @@ def iter_stock_transactions(self, form_id):
yield report.report_type, tx


def is_case_patched(diffs):
from .casepatch import PatchForm
if not (len(diffs) == 1
and diffs[0].diff_type == "set_mismatch"
and list(diffs[0].path) == ["xform_ids", "[*]"]
and diffs[0].new_value):
def is_case_patched(case_id, diffs):
"""Check if case has been patched

The case has been patched if at least one patch form has been
applied to the SQL case and if all of the given diffs are
unpatchable and match an unpatchable diff encoded in one of the
patch forms.

Additionally, diffs having a MISSING `old_value` are patched with an
empty string, which is semantically equivalent to removing the case
property in CommCare. However, a difference is detectable at the
storage level even after the patch has been applied, and therefore
these subsequent patch diffs are considered to be patched.

The "xform_ids" diff is a special exception because it is not
patchable and is not required to be present in the patch form.

:returns: True if the case has been patched else False.
"""
def is_patched(form_ids):
forms = get_sql_forms(form_ids, ordered=True)
for form in reversed(forms):
if form.xmlns == PatchForm.xmlns:
discard_expected_diffs(form.form_data.get("diff"))
if not unpatched:
return True
return False
form_ids = diffs[0].new_value.split(",")
return any(f.xmlns == PatchForm.xmlns for f in get_sql_forms(form_ids))

def discard_expected_diffs(patch_data):
data = json.loads(unescape(patch_data)) if patch_data else {}
if data.get("case_id") != case_id:
return
for diff in data.get("diffs", []):
diff.pop("reason", None)
path = tuple(diff["path"])
if path in unpatched and diff_to_json(unpatched[path], ANY) == diff:
unpatched.pop(path)

def expected_patch_diff(diff):
return not is_patchable(diff) or (
diff.old_value is MISSING and diff.new_value == "")

from .casepatch import PatchForm, is_patchable, diff_to_json
unpatched = {tuple(d.path): d for d in diffs if expected_patch_diff(d)}
xform_ids = unpatched.pop(("xform_ids", "[*]"), None)
return (
xform_ids is not None
and xform_ids.diff_type == "set_mismatch"
and xform_ids.new_value
and len(diffs) == len(unpatched) + 1 # false if any diffs are patchable
and is_patched(xform_ids.new_value.split(","))
)


def diff_case_forms(couch_json, sql_json):
Expand Down Expand Up @@ -497,5 +543,5 @@ def get_couch_form(form_id):


@retry_on_sql_error
def get_sql_forms(form_id):
return FormAccessorSQL.get_forms(form_id)
def get_sql_forms(form_id, **kw):
return FormAccessorSQL.get_forms(form_id, **kw)
110 changes: 87 additions & 23 deletions corehq/apps/couch_sql_migration/casepatch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import logging
from datetime import datetime
from functools import partial, wraps
from uuid import uuid4
from xml.sax.saxutils import escape

from django.template.loader import render_to_string

Expand Down Expand Up @@ -98,7 +100,7 @@ def __attrs_post_init__(self):
updates.extend([const.CASE_ACTION_CREATE, const.CASE_ACTION_UPDATE])
self._dynamic_properties = self.case.dynamic_case_properties()
else:
if cannot_patch(self.diffs):
if has_illegal_props(self.diffs):
raise CannotPatch(self.diffs)
props = dict(iter_dynamic_properties(self.diffs))
self._dynamic_properties = props
Expand Down Expand Up @@ -133,8 +135,42 @@ def indices(self):
yield CommCareCaseIndex.wrap(diff.old_value)


ILLEGAL_PROPS = {"actions", "*"}
IGNORE_PROPS = {"opened_by", "external_id"}
def has_illegal_props(diffs):
return any(d.path[0] in ILLEGAL_PROPS for d in diffs)


def has_known_props(diffs):
return any(d.path[0] in KNOWN_PROPERTIES for d in diffs)


def iter_dynamic_properties(diffs):
for diff in diffs:
name = diff.path[0]
if name in STATIC_PROPS:
continue
if diff.old_value is MISSING:
value = ""
elif len(diff.path) > 1 or not isinstance(diff.old_value, str):
raise CannotPatch([diff])
else:
value = diff.old_value
yield name, value


ILLEGAL_PROPS = {"actions", "case_id", "domain", "*"}
UNPATCHABLE_PROPS = {
"closed_by",
"closed_on",
"deleted_on",
"deletion_id",
"external_id",
"modified_by",
"modified_on",
"opened_by",
"opened_on",
"server_modified_on",
"xform_ids",
}
STATIC_PROPS = {
"case_id",
"closed",
Expand Down Expand Up @@ -167,25 +203,6 @@ def indices(self):
}


def cannot_patch(diffs):
return any(d.path[0] in ILLEGAL_PROPS for d in diffs) \
or all(d.path[0] in IGNORE_PROPS for d in diffs)


def has_known_props(diffs):
return any(d.path[0] in KNOWN_PROPERTIES for d in diffs)


def iter_dynamic_properties(diffs):
for diff in diffs:
name = diff.path[0]
if name in STATIC_PROPS:
continue
if len(diff.path) > 1 or not isinstance(diff.old_value, str):
raise CannotPatch([diff])
yield name, diff.old_value


@attr.s
class PatchForm:
_case = attr.ib()
Expand All @@ -204,9 +221,10 @@ def __getattr__(self, name):
def get_xml(self):
updates = self._case._updates
case_block = get_case_xml(self._case, updates, version='2.0')
diff_block = get_diff_block(self._case)
return render_to_string('hqcase/xml/case_block.xml', {
'xmlns': self.xmlns,
'case_block': case_block.decode('utf-8'),
'case_block': case_block.decode('utf-8') + diff_block,
'time': json_format_datetime(self.received_on),
'uid': self.form_id,
'username': "",
Expand Down Expand Up @@ -245,6 +263,52 @@ def add_patch_operation(sql_form):
))


def get_diff_block(case):
"""Get XML element containing case diff data

Some early patch forms were submitted without this element.

:param case: `PatchCase` instance.
:returns: A "<diff>" XML element string containing XML-escaped
JSON-encoded case diff data, some of which may be patched.

```json
{
"case_id": case.case_id,
"diffs": [
{
"path": diff.path,
"old": diff.old_value, # omitted if old_value is MISSING
"new": diff.new_value, # omitted if new_value is MISSING
"patch": true if patched else false
"reason": "...", # omitted if reason for change is unknown
},
...
]
}
```
"""
diffs = [diff_to_json(d) for d in sorted(case.diffs, key=lambda d: d.path)]
data = {"case_id": case.case_id, "diffs": diffs}
return f"<diff>{escape(json.dumps(data))}</diff>"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you think about a slightly more expressive name for this block, like <case_migration_diff>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not excited about changing it at this point because some of these forms already exist on prod, and I'd rather not have to check for two different names in places that extract diff data. The form XMLNS is http://commcarehq.org/couch-to-sql/patch-case-diff, which clearly indicates it's related to Couch-to-SQL case diffs.



def diff_to_json(diff, new_value=None):
assert diff.old_value is not MISSING or diff.new_value is not MISSING, diff
obj = {"path": list(diff.path), "patch": is_patchable(diff)}
if diff.old_value is not MISSING:
obj["old"] = diff.old_value
if diff.new_value is not MISSING:
obj["new"] = diff.new_value if new_value is None else new_value
if getattr(diff, "reason", ""):
obj["reason"] = diff.reason
return obj


def is_patchable(diff):
return diff.path[0] not in UNPATCHABLE_PROPS


class CannotPatch(Exception):

def __init__(self, json_diffs):
Expand Down
Loading