Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hierarchical export/import to server, single json per item. Moving and deletion allowed #225

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d0535c2
hierarchical export import.
rber474 Oct 10, 2023
341b17e
Revert "hierarchical export import."
rber474 Oct 10, 2023
82c794d
hierarchical export. keeps py2 compatibility
rber474 Oct 10, 2023
27142c1
adds form controls for hierarchical export
rber474 Oct 10, 2023
df8388c
checks if export_dir exists
rber474 Oct 10, 2023
332cd2a
added translations to spanish
rber474 Oct 10, 2023
388972f
Fix missing comparison value
rber474 Oct 10, 2023
44241d0
fix translation
rber474 Oct 10, 2023
b913a6b
changes successful message
rber474 Oct 10, 2023
84e4f48
use boostrap classes. convert checkbox to switch
rber474 Oct 11, 2023
8958eac
use boostrap classes. convert checkbox to switch
rber474 Oct 11, 2023
d10245b
add new translation
rber474 Oct 11, 2023
b19c919
Merge branch 'main' into hierarchy_export
rber474 Oct 12, 2023
ef53b30
Fixes and adapts spanish translations
rber474 Oct 12, 2023
8b11669
reorder hierarchycal export to fit latest changes
rber474 Oct 12, 2023
4ac9127
Reformat templates
rber474 Oct 12, 2023
c30c5e7
translate dynamic msg
rber474 Oct 13, 2023
6975256
controls order, export names, remove item control
rber474 Oct 13, 2023
f6bcb1f
updates .gitignore
rber474 Oct 13, 2023
1a56ef4
fixes label missing attr for solving test fail
rber474 Oct 13, 2023
a5a0ade
Fix dynamic translation
rber474 Oct 13, 2023
73a771a
fix missing parenthesis
rber474 Oct 15, 2023
2de23c2
Merge branch 'main' into hierarchy_export
rber474 Oct 15, 2023
2c17ae8
Adds test for export and import
rber474 Oct 15, 2023
632408c
Fixed tests for plone 4 / 5
rber474 Oct 15, 2023
35bed47
fixes test for plone 4 / 5
rber474 Oct 15, 2023
53b6d28
rename file
rber474 Oct 15, 2023
1e1933b
Merge pull request #1 from rber474/hierarchy_export
rber474 Oct 15, 2023
871f4f6
isort
rber474 Oct 16, 2023
050caca
Merge pull request #2 from rber474/hierarchy_export
rber474 Oct 16, 2023
2004ffc
Merge branch 'main' into fixes-readme-conflicts
rber474 Nov 4, 2023
d42a74e
Merge branch 'main' into fixes-readme-conflicts
rber474 Nov 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@ report.html
.tox/
reports/
# excludes
.DS_Store
*.bak
10 changes: 10 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ Changelog
- Add principals to groups that already exist during import (#228)
[pbauer]

- Adds hierarchical content export/import. A folder structure will be created and a json file per item.
This will allow to keep track of changes for each item. Also allow to move o delete content.
[rber474]

- Some fixes for spanish translations
[rber474]

- Modifies import_content and export_content templates to include boostrap classes and change checkboxes to switches.
[rber474]

1.10 (2023-10-11)
-----------------

Expand Down
2 changes: 2 additions & 0 deletions src/collective/exportimport/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@

# Discussion Item has its own export / import views, don't show it in the exportable content type list
SKIPPED_CONTENTTYPE_IDS = ['Discussion Item']

TREE_DIRECTORY = "exported_tree"
24 changes: 21 additions & 3 deletions src/collective/exportimport/export_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from App.config import getConfiguration
from collective.exportimport import _
from collective.exportimport import config
from collective.exportimport.filesystem_exporter import FileSystemContentExporter
from collective.exportimport.interfaces import IBase64BlobsMarker
from collective.exportimport.interfaces import IMigrationMarker
from collective.exportimport.interfaces import IPathBlobsMarker
Expand Down Expand Up @@ -223,7 +224,26 @@ def __call__(
noLongerProvides(self.request, IPathBlobsMarker)
self.finish()
self.request.response.redirect(self.request["ACTUAL_URL"])
elif download_to_server == 3:
exporter = FileSystemContentExporter()
self.start()
for number, datum in enumerate(content_generator, start=1):
exporter.save(number, datum)
self.finish()

msg = self.context.translate(_(
"hierarchycal_export_success",
u"Exported ${number} items (${types}) as tree to ${target} with ${errors} errors",
mapping={
u"number": number,
u"types": ", ".join(self.portal_type),
u"target": exporter.root,
u"errors": len(self.errors)
}
))
logger.info(msg)
api.portal.show_message(msg, self.request)
self.request.response.redirect(self.request["ACTUAL_URL"])
# Export all items into one json-file in the filesystem
elif download_to_server:
directory = config.CENTRAL_DIRECTORY
Expand Down Expand Up @@ -262,8 +282,6 @@ def __call__(
noLongerProvides(self.request, IPathBlobsMarker)
self.finish()
self.request.response.redirect(self.request["ACTUAL_URL"])

# Export as one json-file through the browser
else:
with tempfile.TemporaryFile(mode="w+") as f:
self.start()
Expand All @@ -274,7 +292,7 @@ def __call__(
f.write(",")
json.dump(datum, f, sort_keys=True, indent=4)
if number:
if self.errors and self.write_errors:
if self.errors and self.write_errors:
f.write(",")
errors = {"unexported_paths": self.errors}
json.dump(errors, f, indent=4)
Expand Down
106 changes: 106 additions & 0 deletions src/collective/exportimport/filesystem_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
from App.config import getConfiguration
from collective.exportimport import config
from plone import api
from six.moves.urllib.parse import unquote, urlparse

import json
import logging
import os


class FileSystemExporter(object):
"""Base FS Exporter"""

logger = logging.getLogger(__name__)

def __init__(self):
self._create_base_dirs()

def _create_base_dirs(self):
"""Creates base content directory and subdir deleted_items"""
# Will generate a directory tree with one json file per item
portal_id = api.portal.get().getId()
directory = config.CENTRAL_DIRECTORY
if not directory:
cfg = getConfiguration()
directory = cfg.clienthome

self.root = os.path.join(
directory, "exported_tree/%s/content" % portal_id
)
self._make_dir(self.root)

remove_dir = os.path.join(
directory, "exported_tree/%s/removed_items" % portal_id
)
self._make_dir(remove_dir)

return self.root

def _make_dir(self, path):
"""Make directory"""
if not os.path.exists(path):
os.makedirs(path)
self.logger.info("Created path %s", path)

def create_dir(self, dirname):
"""Creates a directory if does not exist

Args:
dirname (str): dirname to be created
"""
dirpath = os.path.join(self.root, dirname)
self._make_dir(dirpath)

def get_parents(self, parent):
"""Extracts parents of item

Args:
parent (dict): Parent info dict

Returns:
(str): relative path
"""

if not parent:
return ""

parent_url = unquote(parent["@id"])
parent_url_parsed = urlparse(parent_url)

# Get the path part, split it, remove the always empty first element.
parent_path = parent_url_parsed.path.split("/")[1:]
if (
len(parent_url_parsed.netloc.split(":")) > 1
or parent_url_parsed.netloc == "nohost"
):
# For example localhost:8080, or nohost when running tests.
# First element will then be a Plone Site id.
# Get rid of it.
parent_path = parent_path[1:]

return "/".join(parent_path)


class FileSystemContentExporter(FileSystemExporter):
"""Deserializes JSON items into a FS tree"""

def save(self, number, item):
"""Saves a json file to filesystem tree
Target directory is related as original parent position in site.
"""

parent_path = self.get_parents(item.get("parent"))

if item.get("is_folderish", False):
item_path = os.path.join(parent_path, item.get("id"))
self.create_dir(item_path)
else:
self.create_dir(parent_path)

# filename = "%s_%s_%s.json" % (number, item.get("@type"), item.get("UID"))
filename = "%s_%s.json" % (number, item.get("id"))
filepath = os.path.join(self.root, parent_path, filename)
with open(filepath, "w") as f:
json.dump(item, f, sort_keys=True, indent=4)
140 changes: 140 additions & 0 deletions src/collective/exportimport/filesystem_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# -*- coding: utf-8 -*-
from collective.exportimport import _
from glob import iglob
from plone import api
from six.moves.urllib.parse import unquote, urlparse

import json
import logging
import os
import six


if six.PY2:
from pathlib2 import Path
else:
from pathlib import Path


class FileSystemImporter(object):
"""Base FS Importer"""

logger = logging.getLogger(__name__)

def __init__(self, context, server_tree_file):
self.path = server_tree_file
self.context = context

def get_parents(self, parent):
"""Extracts parents of item

Args:
parent (dict): Parent info dict

Returns:
(str): relative path
"""

if not parent:
return ""

parent_url = unquote(parent["@id"])
parent_url_parsed = urlparse(parent_url)

# Get the path part, split it, remove the always empty first element.
parent_path = parent_url_parsed.path.split("/")[1:]
if (
len(parent_url_parsed.netloc.split(":")) > 1
or parent_url_parsed.netloc == "nohost"
):
# For example localhost:8080, or nohost when running tests.
# First element will then be a Plone Site id.
# Get rid of it.
parent_path = parent_path[1:]

return "/".join(parent_path)


class FileSystemContentImporter(FileSystemImporter):
"""Deserializes JSON items into a FS tree"""

def list_files(self):
"""Loads all json files from filesystem tree"""
files = iglob(os.path.join(self.path, "**/*.json"), recursive=True)
return files

def get_hierarchical_files(self):
"""Gets all files and folders"""
root = Path(self.path)
portal = api.portal.get()
assert root.is_dir()

json_files = root.glob("**/*.json")
for json_file in json_files:
self.logger.debug("Importing %s", json_file)
item = json.loads(json_file.read_text())
json_parent = item.get("parent", {})

# Find the real parent nodes as they could be moved
# among directories
prefix = os.path.commonprefix([str(json_file.parent), self.path])

# relative path will be the diference between base export path
# and the position of the json file
relative_path = os.path.relpath(str(json_file.parent), prefix)
parent_path = "%s/%s" % (
"/".join(self.context.getPhysicalPath()),
relative_path)
parents = self.get_parents(json_parent)

if json_file.parent == Path(os.path.join(self.path, parents)):
yield item
else:
parent_obj = api.content.get(path=parent_path)
if not parent_obj:
# if parent_path is "." or parent_obj doesn't yet exist
parent_obj = portal

# Modify parent data into json to be yield
# local files won't be modified
if parent_obj:
self.delete_old_if_moved(item.get("UID"))
item["@id"] = item.get("@id")
json_parent.update(
{"@id": parent_obj.absolute_url(), "UID": parent_obj.UID()}
)
item["parent"] = json_parent
yield item

def delete_old_if_moved(self, UID):
"""Checks if json file was moved by
getting object by UID. If exists, removes object"""
check_if_moved = api.content.get(UID=UID)
if check_if_moved:
# delete all object
api.content.delete(obj=check_if_moved, check_linkintegrity=False)
self.logger.info("Removed old object %s", check_if_moved.UID())

def process_deleted(self):
"""Will process all elements in removed_items dir"""
root = Path(self.path).parent
removed_items_dir = root / "removed_items"
json_files = removed_items_dir.glob("**/*.json")
deleted_items = []
for json_file in json_files:
self.logger.debug("Deleting %s", json_file)
item = json.loads(json_file.read_text())
uid = item.get("UID")
obj = api.content.get(UID=uid)
if obj:
api.content.delete(obj=obj, check_linkintegrity=False)
self.logger.info("Deleted object %s", item.get("UID"))
deleted_items.append(uid)

return self.context.translate(
_(
"deleted_items_msg",
default=u"Deleted ${items} items.",
mapping={u"items": len(deleted_items)}
)
)
Loading