diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index a98a1e57..6760ca57 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -208,5 +208,10 @@ entry: trailing-whitespace-fixer language: python types: [text] - stages: [pre-commit, pre-push, manual] + stages: [pre-commit, pre-push, manual, commit, push] minimum_pre_commit_version: 3.2.0 +- id: notify-duplicate-entry + name: Notify duplicate entry + description: Notifies duplicate entry in the same file + entry: notify-duplicate-entry + language: python diff --git a/pre_commit_hooks/notify_duplicate_entry.py b/pre_commit_hooks/notify_duplicate_entry.py new file mode 100644 index 00000000..703d8a5f --- /dev/null +++ b/pre_commit_hooks/notify_duplicate_entry.py @@ -0,0 +1,93 @@ +import argparse +import json +from typing import Optional +from typing import Sequence +from pathlib import Path + + +def _check_duplicate_entry(json_entries, pkeys): + """ Check duplicate entry based on pkey criteria. + + :param json_entries: List of json entries + :param pkeys: List of Primary keys + :return: list of duplicated entry pkey value tuples + """ + unique_entries = set() + duplicate_entries = set() + for entry in json_entries: + pkey_value_tuple = tuple(entry[pkey] for pkey in pkeys) + if pkey_value_tuple not in unique_entries: + unique_entries.add(pkey_value_tuple) + else: + duplicate_entries.add(pkey_value_tuple) + return duplicate_entries, len(duplicate_entries) + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', type=str, + help='Names of the JSON files to check duplicate entries' + ) + table_uuid_mapping = { + 'action': ['uuid'], + 'env_property_group': ['uuid'], + 'environment': ['uuid'], + 'environment_property': ['code'], + 'report_summary': ['uuid'], + 'runner': ['uuid'], + 'scenario': ['uuid'], + 'sla': ['uuid'], + 'sla_scenario_association': ['sla', 'scenario'], + 'tag': ['uuid'], + 'tag_action_association': ['tag_uuid', 'action_uuid'], + 'tag_case_association': ['test_case_uuid', 'tag_uuid'], + 'teams': ['uuid'], + 'test_case': ['uuid'], + 'test_suit': ['uuid'], + 'test_supported_version': ['test_case_uuid', 'version'], + 'testcase_workload_association': ['uuid'], + 'user': ['uuid'], + 'user_tokens': ['user_token'], + 'workflow_task': ['workflow_id'], + 'context': ['uuid'], + 'test_sla_association': ['test_case', 'sla'], + 'teams_association': ['user_uuid', 'team_uuid'], + 'teams_resource_permission': ['team_uuid', 'resource_name'], + 'label': ['uuid'], + 'authentication_config_rules': ['auth_type'], + 'authentication': ['uuid'], + 'user_authentication_association': + ['user_uuid', 'authentication_uuid'], + } + + args = vars(parser.parse_args(argv)) + filenames = args['filenames'] + flag = False + + for i in range(len(filenames)): + json_file = filenames[i] + file_name = Path(filenames[i]).stem + if file_name not in table_uuid_mapping: + print( + f"Table {file_name} has no primary key specified to validate " + f"duplicate entries. Please update the plugin code in " + f"https://git.voereir.io/voereir/pre-commit-hooks" + ) + continue + + primary_keys = table_uuid_mapping[file_name] + with open(json_file, encoding='UTF-8') as f: + json_entries = json.load(f) + duplicate_entries, status = _check_duplicate_entry( + json_entries, primary_keys) + + if status: + print(f"Duplicate entries found - {duplicate_entries} in file " + f"{json_file}") + flag = True + + return flag + + +if __name__ == "__main__": + exit(main()) diff --git a/pre_commit_hooks/pretty_format_json.py b/pre_commit_hooks/pretty_format_json.py index 501f37f7..81449766 100644 --- a/pre_commit_hooks/pretty_format_json.py +++ b/pre_commit_hooks/pretty_format_json.py @@ -7,6 +7,250 @@ from collections.abc import Sequence from difflib import unified_diff +INFINITY = float('inf') + + +def _make_iterencode( + markers, _default, _encoder, _indent, _floatstr, + _key_separator, _item_separator, _sort_keys, _skipkeys, + _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + ValueError=ValueError, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + str=str, + tuple=tuple, + _intstr=int.__str__, +): + + if _indent is not None and not isinstance(_indent, str): + _indent = ' ' * _indent + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError('Circular reference detected') + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + separator = _item_separator.rstrip() + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, str): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, int): + # Subclasses of int/float may override __str__, but we still + # want to encode them as integers/floats in JSON. One example + # within the standard library is IntEnum. + yield buf + _intstr(value) + elif isinstance(value, float): + # see comment above for int + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + _indent = None # No newlines or indentation for the k-v pairs. + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError('Circular reference detected') + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = sorted(dct.items(), key=lambda kv: kv[0]) + else: + items = dct.items() + for key, value in items: + if isinstance(key, str): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + # see comment for int/float in _make_iterencode + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, int): + # see comment for int/float in _make_iterencode + key = _intstr(key) + elif _skipkeys: + continue + else: + raise TypeError( + f'keys must be str, int, float, bool or None, ' + f'not {key.__class__.__name__}', + ) + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, str): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, int): + # see comment for int/float in _make_iterencode + yield _intstr(value) + elif isinstance(value, float): + # see comment for int/float in _make_iterencode + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, str): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, int): + # see comment for int/float in _make_iterencode + yield _intstr(o) + elif isinstance(o, float): + # see comment for int/float in _make_iterencode + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + yield from _iterencode_list(o, _current_indent_level) + elif isinstance(o, dict): + yield from _iterencode_dict(o, _current_indent_level) + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError('Circular reference detected') + markers[markerid] = o + o = _default(o) + yield from _iterencode(o, _current_indent_level) + if markers is not None: + del markers[markerid] + return _iterencode + + +class CustomJSONEncoder(json.JSONEncoder): + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + + def floatstr( + o, allow_nan=self.allow_nan, + _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY, + ): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on the + # internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + 'Out of range float values are not JSON compliant: ' + + repr(o), + ) + + return text + + _encoder = json.encoder.py_encode_basestring + + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot, + ) + return _iterencode(o, 0) + def _get_pretty_format( contents: str, @@ -14,6 +258,7 @@ def _get_pretty_format( ensure_ascii: bool = True, sort_keys: bool = True, top_keys: Sequence[str] = (), + sort_by_first_key: bool = False, ) -> str: def pairs_first(pairs: Sequence[tuple[str, str]]) -> Mapping[str, str]: before = [pair for pair in pairs if pair[0] in top_keys] @@ -22,10 +267,16 @@ def pairs_first(pairs: Sequence[tuple[str, str]]) -> Mapping[str, str]: if sort_keys: after.sort() return dict(before + after) + + json_contents = json.loads(contents, object_pairs_hook=pairs_first) + if sort_by_first_key: + json_contents.sort(key=lambda row: list(row.values())[0]) json_pretty = json.dumps( - json.loads(contents, object_pairs_hook=pairs_first), + json_contents, indent=indent, ensure_ascii=ensure_ascii, + cls=CustomJSONEncoder, + separators=(', ', ': '), ) return f'{json_pretty}\n' @@ -96,6 +347,13 @@ def main(argv: Sequence[str] | None = None) -> int: default=[], help='Ordered list of keys to keep at the top of JSON hashes', ) + parser.add_argument( + '--sort-by-first-key', + dest='sort_by_first_key', + action='store_true', + default=False, + help='Sort the json by a specific key', + ) parser.add_argument('filenames', nargs='*', help='Filenames to fix') args = parser.parse_args(argv) @@ -109,6 +367,7 @@ def main(argv: Sequence[str] | None = None) -> int: pretty_contents = _get_pretty_format( contents, args.indent, ensure_ascii=not args.no_ensure_ascii, sort_keys=not args.no_sort_keys, top_keys=args.top_keys, + sort_by_first_key=args.sort_by_first_key, ) except ValueError: print( diff --git a/setup.cfg b/setup.cfg index c5e6e0bd..099fd5fb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,6 +54,7 @@ console_scripts = forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main mixed-line-ending = pre_commit_hooks.mixed_line_ending:main name-tests-test = pre_commit_hooks.tests_should_end_in_test:main + notify-duplicate-entry = pre_commit_hooks.notify_duplicate_entry:main no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main pre-commit-hooks-removed = pre_commit_hooks.removed:main pretty-format-json = pre_commit_hooks.pretty_format_json:main