diff --git a/ISSUE-457-TOOLS/COMPACT_ERRORS.py b/ISSUE-457-TOOLS/COMPACT_ERRORS.py
new file mode 100644
index 000000000..bcc6f412e
--- /dev/null
+++ b/ISSUE-457-TOOLS/COMPACT_ERRORS.py
@@ -0,0 +1,46 @@
+import re
+import argparse
+import numpy as np
+
+
+def do_the_work(file_name):
+ with open(file_name, "r") as fh:
+ in_text = fh.readlines()
+
+ err_dict = {}
+ for line in in_text:
+ line = line.strip()
+ if line:
+ if line.startswith("/home"):
+ version = re.search(r"(?<=names/)\d{1,2}(?=/src)", line)
+ version = version.group()
+ else:
+ line = re.sub(r"Line \d+? : ", "", line)
+ if line in err_dict.keys():
+ err_dict[line].append(version)
+ else:
+ err_dict[line] = [version]
+ out_dict = {}
+ for line, version_list in err_dict.items():
+ v0 = int(version_list[0])
+ version_string = ", ".join(version_list)
+ text = f"{version_string} | {line}"
+ if v0 in out_dict:
+ out_dict[v0].append(text)
+ else:
+ out_dict[v0] = [text]
+ for v0 in range(1, 84):
+ text_list = out_dict.pop(v0, "")
+ for text in text_list:
+ print(text)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ prog="compact_errors",
+ description=("\nCompact error lists to show in which version each error occur.")
+ )
+ parser.add_argument("-f", "--file_name", type=str,
+ help="Name of input error file")
+ args = parser.parse_args()
+
+ do_the_work(args.file_name)
diff --git a/ISSUE-457-TOOLS/LIST_ERRORS.py b/ISSUE-457-TOOLS/LIST_ERRORS.py
new file mode 100644
index 000000000..27bc3c41e
--- /dev/null
+++ b/ISSUE-457-TOOLS/LIST_ERRORS.py
@@ -0,0 +1,154 @@
+import os
+import re
+import argparse
+import urllib.request
+from io import BytesIO
+
+from cfunits import Units as cfUnits
+from cf_units import Unit as uuUnits
+from lxml import etree
+
+
+
+PATH0 = "../"
+BASE_PATH = PATH0 + "cf-conventions/cf-convention.github.io/Data/cf-standard-names/"
+
+def parse_xml(xml_raw):
+ try:
+ xml_tree = etree.parse(BytesIO(xml_raw))
+ except etree.XMLSyntaxError:
+ print(f"{':'*100}\n{xml_raw[:1000]}")
+ raise
+ return xml_tree
+
+
+def get_schema(xml_tree):
+ root = xml_tree.getroot()
+ xsd_uri = root.values()[0]
+ link = urllib.request.urlopen(xsd_uri)
+ xsd_raw = link.read()
+ xsd_tree = parse_xml(xsd_raw)
+ schema = etree.XMLSchema(xsd_tree)
+ return schema
+
+
+def find_xml_errors(xml_tree, schema, xml_raw):
+ try:
+ schema.assertValid(xml_tree)
+ print(" ---- Valid and Well-formed")
+ except etree.DocumentInvalid:
+ xml_list = xml_raw.split(b"\n")
+ for error in schema.error_log:
+ for element in ["description", "canonical_units"]:
+ if f"( {element}" in error.message:
+ std_name = xml_list[error.line - 1].split(b'"')[1].decode("utf-8)")
+ print(f"Line {error.line} : Standard name entry for '{std_name}' has no <{element}>")
+ break
+ else:
+ print(f"Line {error.line} : {error.message}")
+
+
+def check_units(can_units, std_name):
+ uu = cfUnits(can_units)
+ if not uu.isvalid:
+ print(f"Canonical units '{can_units}' is not accepted by CF-UNITS for '{std_name}'")
+ else:
+ try:
+ uu = uuUnits(can_units)
+ if " -" in can_units:
+ try:
+ uu = uuUnits(can_units.replace(" -", "-"))
+ print(f"Canonical units '{can_units}' has a spurious space for '{std_name}'")
+ except ValueError:
+ print(f"Canonical unit '{can_units}' is really weird for '{std_name}'")
+ elif "/" in can_units:
+ print(f"Canonical units '{can_units}' used '/' for '{std_name}'")
+ except ValueError:
+ print(f"Canonical unit '{can_units} is a special CF unit for '{std_name}'")
+
+
+def find_missing_and_duplicates(xml_raw, old_entry_list, old_alias_list):
+ def _extract_entries(xml_raw):
+ entry_list = []
+ for entry in re.finditer(rb'.+?', xml_raw, re.S):
+ e = re.search(rb'(?<=\").+?(?=\")', entry.group())
+ std_name = e.group().decode("utf-8")
+ entry_list.append(std_name)
+ can_units = re.search(rb'(?<=_units>).+?(?=', xml_raw, re.S):
+ alias_from = re.search(rb'(?<=\").+?(?=\")', alias.group())
+ alias_to = re.search(rb'(?<=entry_id>).+?(?= 0:
+ version_list = range(1, args.version + 1)
+ else:
+ version_list = [args.version]
+
+ entry_list = []
+ alias_list = []
+ for version in version_list:
+ try:
+ if version != 38:
+ print("\n")
+ entry_list, alias_list = do_the_work(
+ version, args.severity, entry_list, alias_list
+ )
+ except:
+ break
+ print()
diff --git a/ISSUE-457-TOOLS/STEP_1-2.py b/ISSUE-457-TOOLS/STEP_1-2.py
new file mode 100644
index 000000000..801bd9573
--- /dev/null
+++ b/ISSUE-457-TOOLS/STEP_1-2.py
@@ -0,0 +1,102 @@
+# -*- coding: -*-
+
+import re
+from datetime import datetime, UTC
+from pathlib import Path
+
+MY_PATH = "../"
+BASE_PATH = MY_PATH + "cf-conventions/cf-convention.github.io/Data/cf-standard-names/"
+# NEW_XSD = b"../../schema-files/cf-standard-name-table-2.0.xsd"
+NEW_XSD = (b"https://raw.githubusercontent.com/larsbarring/cf-convention.github.io/"
+ b"test-all-issue-457/Data/schema-files/cf-standard-name-table-2.0.xsd")
+
+def fix_v1_datetime(xml_raw):
+ txt1 = b">1\n"
+ txt2 = txt1 + b" 2002-04-02T12:00:00Z\n"
+ xml_raw = xml_raw.replace(txt1, txt2)
+ print("ADDED : DATETIME in version 1")
+ return xml_raw
+
+
+def fix_v71_datetime(xml_raw):
+ if b"2020-02-04T12:00Z" in xml_raw:
+ xml_raw = xml_raw.replace(b"2020-02-04T12:00Z", b"2020-02-04T12:00:00Z")
+ print("FIXED : DATETIME in version 71")
+ return xml_raw
+
+def fix_v12_duplicate_entry(xml_raw):
+ pat = rb'\n *.+? *?(?=\n)'
+ xml_raw = re.sub(pat, b"", xml_raw, 1, re.S)
+ print("FIXED : Removed first duplicate of 'sea_surface_height_above_reference_ellipsoid'")
+ return xml_raw
+
+
+def add_modified_date(xml_raw):
+ time_stamp = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ").encode("utf-8")
+ modified = b"last_modified"
+ modified_start = b"<" + modified + b">"
+ modified_end = modified_start.replace(b"<", b"")
+ modified_element = modified_start + time_stamp + modified_end
+ inst_text = b""
+ n = len( inst_text)
+ inst = re.search((b"\n( *)" + inst_text), xml_raw)
+ spaces = inst.group()[1: -n]
+ position = inst.span()[0]
+ xml_raw = xml_raw[:position] + b"\n" + spaces + modified_element + xml_raw[position:]
+ print("ADDED : MODIFIED DATE")
+ return xml_raw
+
+
+def do_the_work(version):
+ xml_original = f"{BASE_PATH}{version}/src/cf-standard-name-table.xml"
+ xml_saved = xml_original.replace("-table", "-table__SAVED")
+
+ my_file = Path(xml_saved)
+ if my_file.is_file():
+ # work on original files that are already saved
+ with open(xml_saved, "rb") as fh:
+ xml_raw = fh.read()
+ print(f"READING SAVED ORIGINAL FILE: {xml_original}")
+ else:
+ # work on original files that have not yet been saved
+ with open(xml_original, "rb") as fh:
+ xml_raw = fh.read()
+ # then save the original before changing the original
+ with open(xml_saved, "wb") as fh:
+ fh.write(xml_raw)
+ print(f"READING AND SAVING ORIGINAL FILE: {xml_original}")
+
+ if xml_raw[:6] != b"\n' + xml_raw
+ print("ADDED : '")
+ for old_xsd in [b"CFStandardNameTable-1.0.xsd",
+ b"CFStandardNameTable-1.1.xsd",
+ b"cf-standard-name-table-1.1.xsd"]:
+ if old_xsd in xml_raw:
+ xml_raw = xml_raw.replace(old_xsd, NEW_XSD)
+ print(f"CHANGED : XSD FILE NAME {old_xsd.decode('utf-8')} --> {NEW_XSD.decode('utf-8')}")
+
+ if version == 1:
+ xml_raw = fix_v1_datetime(xml_raw)
+ elif version == 12:
+ xml_raw = fix_v12_duplicate_entry(xml_raw)
+ elif version == 71:
+ xml_raw = fix_v71_datetime(xml_raw)
+
+ xml_raw = xml_raw.replace(b"last_modified", b"first_published_date")
+ print("CHANGED : 'last_modified' --> 'first_published_date'")
+
+ xml_raw = add_modified_date(xml_raw)
+
+ with open(xml_original, "wb") as fh:
+ fh.write(xml_raw)
+
+
+if __name__ == "__main__":
+ for version in range(1, 100):
+ try:
+ if version != 38:
+ print("\n")
+ do_the_work(version)
+ except:
+ break
diff --git a/ISSUE-457-TOOLS/STEP_3-4.py b/ISSUE-457-TOOLS/STEP_3-4.py
new file mode 100644
index 000000000..3e6e21059
--- /dev/null
+++ b/ISSUE-457-TOOLS/STEP_3-4.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+import re
+from datetime import datetime, UTC
+from pathlib import Path
+
+
+MY_PATH = "../"
+BASE_PATH = MY_PATH + "cf-conventions/cf-convention.github.io/Data/cf-standard-names/"
+NL = b"\n"
+
+
+def find_duplicate_aliases(xml_raw):
+ pat = (rb' +?)', found.group())
+ res = res.group()
+ if res in alias_dict:
+ alias_dict[res] += 1
+ else:
+ alias_dict[res] = 1
+ for k in list(alias_dict):
+ if alias_dict[k] == 1:
+ _ = alias_dict.pop(k, 0)
+ # _ = [print(f' {k.decode("utf-8")}: {v}') for k,v in alias_dict.items()]
+ return list(alias_dict.keys())
+
+
+def fix_duplicate_aliases(xml_raw, std_name):
+ pat = (rb' +? *?\n')
+ result = [r for r in re.finditer(pat, xml_raw, re.S)]
+ if len(result) > 1:
+ collected_entries = []
+ for k, r in enumerate(result):
+ lines = r.group().splitlines()
+ for s in lines:
+ if b"" in s and s not in collected_entries:
+ collected_entries.append(s)
+ new_alias = []
+ for line in result[0].group().splitlines():
+ if b"entry_id" in line:
+ new_alias.extend(collected_entries)
+ elif line:
+ new_alias.append(line)
+ _ = [print(f' {line.decode("utf-8")}') for line in new_alias]
+ result_0 = NL.join(new_alias)
+ for r in reversed(result[1:]):
+ span = r.span()
+ xml_raw = xml_raw[: span[0]] + xml_raw[span[1]: ]
+ span = result[0].span()
+ xml_raw = xml_raw[: span[0]] + NL + NL.join(new_alias) + NL + xml_raw[span[1]: ]
+ else:
+ xml_raw = ""
+ return xml_raw
+
+
+def add_conventions(xml_raw):
+ pat = rb"\n +?\d+?"
+ old_elem = re.search(pat, xml_raw)
+ old_elem = old_elem.group()
+ version = re.search(rb"\d{1,3}", old_elem)
+ version = version.group()
+ new_elem = old_elem + b"\n CF-StandardNameTable-" + version + b""
+ xml_raw = xml_raw.replace(old_elem, new_elem)
+ return xml_raw
+
+
+def update_last_modified(xml_raw):
+ time_stamp = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ").encode("utf-8")
+ pat = rb".+?Z"
+ new = rb"" + time_stamp + rb""
+ xml_raw = re.sub(pat, new, xml_raw)
+ return xml_raw
+
+
+def do_the_work(version):
+ xml_file = f"{BASE_PATH}{version}/src/cf-standard-name-table.xml"
+ with open(xml_file, "rb") as fh:
+ xml_raw = fh.read()
+ print(f"..../{xml_file[58:]}")
+
+ duplicate_aliases = find_duplicate_aliases(xml_raw)
+ for std_name in duplicate_aliases:
+ result = fix_duplicate_aliases(xml_raw, std_name)
+ if result:
+ xml_raw = result
+ else:
+ print(" No change")
+
+ xml_raw = add_conventions(xml_raw)
+
+ xml_raw = update_last_modified(xml_raw)
+ with open(xml_file, "wb") as fh:
+ fh.write(xml_raw)
+
+
+if __name__ == "__main__":
+ # update_schema()
+ for version in range(1, 100):
+ try:
+ if version != 38:
+ print("\n")
+ do_the_work(version)
+ except:
+ break
diff --git a/ISSUE-457-TOOLS/STEP_5u.py b/ISSUE-457-TOOLS/STEP_5u.py
new file mode 100644
index 000000000..2302cc373
--- /dev/null
+++ b/ISSUE-457-TOOLS/STEP_5u.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+import re
+from datetime import datetime, UTC
+from pathlib import Path
+
+
+MY_PATH = "../"
+BASE_PATH = MY_PATH + "cf-conventions/cf-convention.github.io/Data/cf-standard-names/"
+
+
+def prettify(line):
+ fmt = {
+ "": (0, 1),
+ "": (2,1),
+ "": (2,1),
+ "": (2, 1),
+ "": (2, 1),
+ "": (2, 1),
+ "": (2,3),
+ "": (4, 1),
+ "": (4, 1),
+ "": (4, 1),
+ "": (4, 1),
+ "": (2, 2),
+ "": (2, 2),
+ "": (0, 1),
+ }
+ line2 = line.strip()
+ for k in fmt.keys():
+ if k in line2:
+ indent, newlines = fmt[k]
+ pretty = ' '*indent + line2 + '\n'*newlines
+ return pretty
+ return line2
+
+def cleanup(in_xml):
+ in_xml = in_xml.replace("\n>", ">")
+ return in_xml
+
+
+def extract_header(in_xml):
+ header = re.search(".+?(?=.+?', in_xml, re.S):
+ entry1 = entry.group()
+ entry1 = entry1.split("\n")
+ entry1 = "\n".join([t.strip() for t in entry1 if t])
+ e = re.search(r'(?<=\").+?(?=\")', entry1)
+ std_name = e.group()
+ entry2 = f'\n'
+ for t in tags:
+ payload = re.search(rf"(?<=\<{t}>).*?(?={t}>)", entry1, re.S)
+ if payload:
+ p = payload.group()
+ if t == "description":
+ p = re.sub(" *\n *", " ", p, re.S)
+ entry2 += f"<{t}>{p}{t}>\n"
+ elif t in ["canonical_units", "description"]:
+ entry2 += f"<{t}>{t}>\n"
+ print(f"ADDED: {std_name} '{t}'")
+ entry2 += " \n\n"
+ entry_dict[std_name] = entry2
+ out_xml = "\n".join([entry_dict[k] for k in maybe_sorted(entry_dict)])
+ return out_xml
+
+
+def extract_aliases(in_xml):
+ alias_dict = {}
+ for alias in re.finditer(r'', in_xml, re.S):
+ alias1 = alias.group()
+ a = re.search(r'(?<=\").+?(?=\")', alias1)
+ std_name = a.group()
+ alias1 = "\n".join([a.strip() for a in alias1.split("\n") if a])
+ alias_dict[std_name] = alias1
+ out_xml = "\n".join([alias_dict[k] for k in maybe_sorted(alias_dict)])
+ return out_xml
+
+
+def do_the_work(version):
+ xml_file = f"{BASE_PATH}{version}/src/cf-standard-name-table.xml"
+ with open(xml_file, "r") as fh:
+ in_xml = fh.read()
+ print(xml_file[58:])
+
+ in_xml = cleanup(in_xml)
+ out_xml = extract_header(in_xml)
+ out_xml += extract_entries(in_xml)
+ out_xml += extract_aliases(in_xml)
+ out_xml += "\n\n"
+ out_xml = out_xml.split("\n")
+
+ with open(xml_file, "w") as fh:
+ for line in out_xml:
+ fh.write(prettify(line))
+
+ #for line in out_xml: # [:100]:
+ # #print(f"1§{line}§")
+ # print(prettify(line), end="")
+ #raise RuntimeError
+
+if __name__ == "__main__":
+ for version in range(1, 100):
+ try:
+ if version != 38:
+ print("\n")
+ do_the_work(version)
+ except:
+ break
diff --git a/ISSUE-457-TOOLS/restore_files b/ISSUE-457-TOOLS/restore_files
new file mode 100755
index 000000000..3857c93ee
--- /dev/null
+++ b/ISSUE-457-TOOLS/restore_files
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+dir=/home/a001257/CODE/cf-conventions/cf-convention.github.io/Data/cf-standard-names
+for d in $(seq 1 84); do
+ if [[ "$d" != "38" ]]; then
+ f=$(ls $dir/$d/src/*__SAVED.xml)
+ mv $f ${f/__SAVED/}
+ # rm $dir/$d/src/*
+ fi
+done