Merge pull request #175 from collective/support_iterator

collective · Feb 10, 2023 · 1d3f5d3 · 1d3f5d3
2 parents f37abfc + fe46627
commit 1d3f5d3
Show file tree

Hide file tree

Showing 3 changed files with 279 additions and 1 deletion.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -19,6 +19,13 @@ Changelog
 - Create documents as containers for items without parent when documents are folderish.
   [JeffersonBledsoe]
 
+- Add support for passing any iterator as data-source to the import.
+  [pbauer]
+
+- Add example for importing collective.jsonify data to documentation.
+  [pbauer]
+
+
 1.7 (2023-01-20)
 ----------------
 

diff --git a/README.rst b/README.rst
@@ -1864,6 +1864,265 @@ See https://6.docs.plone.org/backend/upgrading/version-specific-migration/migrat
         return soup.decode()
 
 
+Migrate very old Plone Versions with data created by collective.jsonify
+-----------------------------------------------------------------------
+
+Versions older than Plone 4 do not support ``plone.restapi`` which is required to serialize the content used by ``collective.exportimport``.
+
+To migrate Plone 1, 2 and 3 to Plone 6 you can use ``collective.jsonify`` for the export and ``collective.exportimport`` for the import.
+
+Export
+******
+
+Use https://github.com/collective/collective.jsonify to export content.
+
+You include the methods of ``collective.jsonify`` using `External Methods`.
+See https://github.com/collective/collective.jsonify/blob/master/docs/install.rst for more info.
+
+To work better with ``collective.exportimport`` you could extend the exported data using the feature ``additional_wrappers``.
+Add info on the parent of an item to make it easier for ``collective.exportimport`` to import the data.
+
+Here is a full example for `json_methods.py` which should be in `BUILDOUT_ROOT/parts/instance/Extensions/`
+
+.. code-block:: python
+
+    def extend_item(obj, item):
+        """Extend to work better well with collective.exportimport"""
+        from Acquisition import aq_parent
+        parent = aq_parent(obj)
+        item["parent"] = {
+            "@id": parent.absolute_url(),
+            "@type": getattr(parent, "portal_type", None),
+        }
+        if getattr(parent.aq_base, "UID", None) is not None:
+            item["parent"]["UID"] = parent.UID()
+
+        return item
+
+
+Here is a full example for ``json_methods.py`` which should be in ``<BUILDOUT_ROOT>/parts/instance/Extensions/``
+
+.. code-block:: python
+
+    from collective.jsonify.export import export_content as export_content_orig
+    from collective.jsonify.export import get_item
+
+    EXPORTED_TYPES = [
+        "Folder",
+        "Document",
+        "News Item",
+        "Event",
+        "Link",
+        "Topic",
+        "File",
+        "Image",
+        "RichTopic",
+    ]
+
+    EXTRA_SKIP_PATHS = [
+        "/Plone/archiv/",
+        "/Plone/do-not-import/",
+    ]
+
+    # Path from which to continue the export.
+    # The export walks the whole site respecting the order.
+    # It will ignore everything untill this path is reached.
+    PREVIOUS = ""
+
+    def export_content(self):
+        return export_content_orig(
+            self,
+            basedir="/var/lib/zope/json",
+            skip_callback=skip_item,
+            extra_skip_classname=[],
+            extra_skip_id=[],
+            extra_skip_paths=EXTRA_SKIP_PATHS,
+            batch_start=0,
+            batch_size=10000,
+            batch_previous_path=PREVIOUS or None,
+        )
+
+    def skip_item(item):
+        """Return True if the item should be skipped"""
+        portal_type = getattr(item, "portal_type", None)
+        if portal_type not in EXPORTED_TYPES:
+            return True
+
+    def extend_item(obj, item):
+        """Extend to work better well with collective.exportimport"""
+        from Acquisition import aq_parent
+        parent = aq_parent(obj)
+        item["parent"] = {
+            "@id": parent.absolute_url(),
+            "@type": getattr(parent, "portal_type", None),
+        }
+        if getattr(parent.aq_base, "UID", None) is not None:
+            item["parent"]["UID"] = parent.UID()
+
+        return item
+
+To use these create three "External Method" in the ZMI root at the Zope root to use that:
+
+* id: "export_content", module name: "json_methods", function name: "export_content"
+* id: "get_item", module name: "json_methods", function name: "get_item"
+* id: "extend_item", module name: "json_methods", function name: "extend_item"
+
+Then you can pass the extender to the export using a query-string: http://localhost:8080/Plone/export_content?additional_wrappers=extend_item
+
+
+Import
+******
+
+Two issues need to be dealt with to allow ``collective.exportimport`` to import the data generated by ``collective.jsonify``.
+
+#. The data is in directories instead of in one large json-file.
+#. The json is not in the expected format.
+
+Starting with version 1.8 you can pass an iterator to the import.
+
+You need to create a directory-walker that sorts the json-files the right way.
+By default it would import them in the order `1.json`, `10.json`, `100.json`, `101.json` and so on.
+
+.. code-block:: python
+
+    from pathlib import Path
+
+    def filesystem_walker(path=None):
+        root = Path(path)
+        assert(root.is_dir())
+        folders = sorted([i for i in root.iterdir() if i.is_dir() and i.name.isdecimal()], key=lambda i: int(i.name))
+        for folder in folders:
+            json_files = sorted([i for i in folder.glob("*.json") if i.stem.isdecimal()], key=lambda i: int(i.stem))
+            for json_file in json_files:
+                logger.debug("Importing %s", json_file)
+                item = json.loads(json_file.read_text())
+                item["json_file"] = str(json_file)
+                item = prepare_data(item)
+                if item:
+                    yield item
+
+The walker takes the path to be the root with one or more directories holding the json-files.
+The sorting of the files is done using the number in the filename.
+
+The method ``prepare_data`` modifies the data before passing it to the import.
+A very similar task is done by ``collective.exportimport`` during export.
+
+.. code-block:: python
+
+    def prepare_data(item):
+        """modify jsonify data to work with c.exportimport"""
+
+        # Drop relationfields or defer the import
+        item.pop("relatedItems", None)
+
+        mapping = {
+            # jsonify => exportimport
+            "_uid": "UID",
+            "_type": "@type",
+            "_path": "@id",
+            "_layout": "layout",
+            # AT fieldnames => DX fieldnames
+            "excludeFromNav": "exclude_from_nav",
+            "allowDiscussion": "allow_discussion",
+            "subject": "subjects",
+            "expirationDate": "expires",
+            "effectiveDate": "effective",
+            "creation_date": "created",
+            "modification_date": "modified",
+            "startDate": "start",
+            "endDate": "end",
+            "openEnd": "open_end",
+            "eventUrl": "event_url",
+            "wholeDay": "whole_day",
+            "contactEmail": "contact_email",
+            "contactName": "contact_name",
+            "contactPhone": "contact_phone",
+            "imageCaption": "image_caption",
+        }
+        for old, new in mapping.items():
+            item = migrate_field(item, old, new)
+
+        if item.get("constrainTypesMode", None) == 1:
+            item = migrate_field(item, "constrainTypesMode", "constrain_types_mode")
+        else:
+            item.pop("locallyAllowedTypes", None)
+            item.pop("immediatelyAddableTypes", None)
+            item.pop("constrainTypesMode", None)
+
+        if "id" not in item:
+            item["id"] = item["_id"]
+        return item
+
+
+    def migrate_field(item, old, new):
+        if item.get(old, _marker) is not _marker:
+            item[new] = item.pop(old)
+        return item
+
+You can pass the generator ``filesystem_walker`` to the import:
+
+.. code-block:: python
+
+    class ImportAll(BrowserView):
+
+        def __call__(self):
+            # ...
+            cfg = getConfiguration()
+            directory = Path(cfg.clienthome) / "import"
+
+            # import content
+            view = api.content.get_view("import_content", portal, request)
+            request.form["form.submitted"] = True
+            request.form["commit"] = 1000
+            view(iterator=filesystem_walker(directory / "mydata"))
+
+            # import default-pages
+            import_deferred = api.content.get_view("import_deferred", portal, request)
+            import_deferred()
+
+
+    class ImportDeferred(BrowserView):
+
+        def __call__(self):
+            self.title = "Import Deferred Settings (default pages)"
+            if not self.request.form.get("form.submitted", False):
+                return self.index()
+
+            for brain in api.content.find(portal_type="Folder"):
+                obj = brain.getObject()
+                annotations = IAnnotations(obj)
+                if DEFERRED_KEY not in annotations:
+                    continue
+
+                default = annotations[DEFERRED_KEY].pop("_defaultpage", None)
+                if default and default in obj:
+                    logger.info("Setting %s as default page for %s", default, obj.absolute_url())
+                    obj.setDefaultPage(default)
+                if not annotations[DEFERRED_KEY]:
+                    annotations.pop(DEFERRED_KEY)
+            api.portal.show_message("Done", self.request)
+            return self.index()
+
+``collective.jsonify`` puts the info on relations, translations and default-pages in the export-file.
+You can use the approach to defer imports to deal with that data after all items were imported.
+The example ``ImportDeferred`` above uses that approach to set the default pages.
+
+This ``global_obj_hook`` below stores that data in a annotation:
+
+.. code-block:: python
+
+    def global_obj_hook(self, obj, item):
+        # Store deferred data in an annotation.
+        keys = ["_defaultpage"]
+        data = {}
+        for key in keys:
+            if value := item.get(key, None):
+                data[key] = value
+        if data:
+            annotations = IAnnotations(obj)
+            annotations[DEFERRED_KEY] = data
+
+
 Written by
 ==========
 

diff --git a/src/collective/exportimport/import_content.py b/src/collective/exportimport/import_content.py
@@ -105,7 +105,14 @@ class ImportContent(BrowserView):
     # Example: {'which_price': 'normal'}
     DEFAULTS = {}
 
-    def __call__(self, jsonfile=None, return_json=False, limit=None, server_file=None):
+    def __call__(
+        self,
+        jsonfile=None,
+        return_json=False,
+        limit=None,
+        server_file=None,
+        iterator=None
+    ):
         request = self.request
         self.limit = limit
         self.commit = int(request["commit"]) if request.get("commit") else None
@@ -179,6 +186,11 @@ def __call__(self, jsonfile=None, return_json=False, limit=None, server_file=Non
         if close_file:
             jsonfile.close()
 
+        if not jsonfile and iterator:
+            self.start()
+            msg = self.do_import(iterator)
+            api.portal.show_message(msg, self.request)
+
         self.finish()
 
         if return_json: