Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: taxonomy patch instead of re-generating #554

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
12 changes: 6 additions & 6 deletions .github/workflows/github-projects-for-openfoodfacts-design.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ jobs:
project-url: https://github.com/orgs/openfoodfacts/projects/5 # Add issue to the folksonomy project
github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
labeled: 🏷️ Folksonomy Project
label-operator: OR
label-operator: OR
- uses: actions/add-to-project@main
with:
project-url: https://github.com/orgs/openfoodfacts/projects/44 # Add issue to the data quality project
github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
labeled: 🧽 Data quality
label-operator: OR
label-operator: OR
- uses: actions/add-to-project@main
with:
project-url: https://github.com/orgs/openfoodfacts/projects/82 # Add issue to the search project
Expand All @@ -77,19 +77,19 @@ jobs:
project-url: https://github.com/orgs/openfoodfacts/projects/41 # Add issue to the producer platform project
github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
labeled: 🏭 Producers Platform
label-operator: OR
label-operator: OR
- uses: actions/add-to-project@main
with:
project-url: https://github.com/orgs/openfoodfacts/projects/19 # Add issue to the infrastructure project
github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
labeled: infrastructure
label-operator: OR
label-operator: OR
- uses: actions/add-to-project@main
with:
project-url: https://github.com/orgs/openfoodfacts/projects/92 # Add issue to the Nutri-Score project
github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
labeled: 🚦 Nutri-Score
label-operator: OR
label-operator: OR
- uses: actions/add-to-project@main
with:
project-url: https://github.com/orgs/openfoodfacts/projects/132 # Add issue to the Top upvoted issues board
Expand All @@ -107,4 +107,4 @@ jobs:
project-url: https://github.com/orgs/openfoodfacts/projects/35 # Add issue to the ♿️ accessibility project
github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
labeled: ♿️ accessibility
label-operator: OR
label-operator: OR
13 changes: 9 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,14 @@ local_config_quality: ## Run on lint configuration files

build: ## Build docker images
@echo "🍜 Building docker images"
${DOCKER_COMPOSE} build
${DOCKER_COMPOSE} build ${args}
@echo "🍜 Project setup done"

backend_poetry_update: ## Update poetry.lock file
@echo "🍜 Updating poetry.lock file"
${DOCKER_COMPOSE} run --user root --rm taxonomy_api bash -c "pip install poetry==1.4.2 && poetry lock --no-update"


up: ## Run the project
@echo "🍜 Running project (ctrl+C to stop)"
@echo "🍜 The React app will be available on http://ui.taxonomy.localhost:8091"
Expand Down Expand Up @@ -177,11 +182,11 @@ config_quality: ## Run quality checks on configuration files

tests: backend_tests ## Run all tests

backend_tests: ## Run python tests
backend_tests: ## Run python tests, you might provide additional arguments witr args="…"
@echo "🍜 Running python tests"
${DOCKER_COMPOSE_TEST} up -d neo4j
${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /parser
${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /code/tests
${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /parser ${args}
${DOCKER_COMPOSE_TEST} run --rm taxonomy_api pytest /code/tests ${args}
${DOCKER_COMPOSE_TEST} stop neo4j

checks: quality tests ## Run all checks (quality + tests)
Expand Down
8 changes: 4 additions & 4 deletions backend/editor/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ async def find_all_nodes(response: Response, branch: str, taxonomy_name: str):
Get all nodes within taxonomy
"""
taxonomy = TaxonomyGraph(branch, taxonomy_name)
all_nodes = await taxonomy.get_all_nodes("")
all_nodes = await taxonomy.get_all_nodes()
return all_nodes


Expand Down Expand Up @@ -395,7 +395,7 @@ async def upload_taxonomy(
"""
taxonomy = TaxonomyGraph(branch, taxonomy_name)
if not taxonomy.is_valid_branch_name():
raise HTTPException(status_code=422, detail="branch_name: Enter a valid branch name!")
raise HTTPException(status_code=422, detail="branch_name: Enter a valid branch name!")
if await taxonomy.does_project_exist():
raise HTTPException(status_code=409, detail="Project already exists!")
if not await taxonomy.is_branch_unique(from_github=False):
Expand Down Expand Up @@ -467,5 +467,5 @@ async def delete_project(branch: str, taxonomy_name: str):
"""
Delete a project
"""
taxonomy = TaxonomyGraph(branch, taxonomy_name)
await project_controller.delete_project(taxonomy.project_name)
project_id = project_controller.get_project_id(branch, taxonomy_name)
await project_controller.delete_project(project_id)
4 changes: 3 additions & 1 deletion backend/editor/controllers/node_controller.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import logging

from openfoodfacts_taxonomy_parser import utils as parser_utils
Expand All @@ -12,7 +13,7 @@
async def delete_project_nodes(project_id: str):
"""
Remove all nodes for project.
This includes entries, stopwords, synonyms and errors
This includes entries, stopwords, synonyms, errors and removed entries
"""

query = f"""
Expand All @@ -38,6 +39,7 @@ async def create_entry_node(
"id": language_code + ":" + normalized_name,
f"tags_{language_code}": [name],
f"tags_ids_{language_code}": [normalized_name],
"modified": datetime.datetime.now().timestamp(),
}
params = {"entry_node": entry_node_data}

Expand Down
37 changes: 37 additions & 0 deletions backend/editor/controllers/project_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from .utils.result_utils import get_unique_record


def get_project_id(branch_name: str, taxonomy_name: str) -> str:
return "p_" + taxonomy_name + "_" + branch_name


async def get_project(project_id: str) -> Project:
"""
Get project by id
Expand Down Expand Up @@ -78,3 +82,36 @@ async def delete_project(project_id: str):
params = {"project_id": project_id}
await get_current_transaction().run(query, params)
await delete_project_nodes(project_id)


async def clone_project(source_branch: str, taxonomy_name: str, target_branch: str):
"""
Clone a project using a new branch name

Currently used for tests only.
"""
source_id = get_project_id(source_branch, taxonomy_name)
target_id = get_project_id(target_branch, taxonomy_name)
# clone project node
query = """
MATCH (p:PROJECT {id: $project_id})
WITH p
CALL apoc.refactor.cloneNodes([p], true, ['id', 'branch'] )
YIELD output as new_node
WITH new_node
SET new_node.created_at = datetime(), new_node.branch_name = $target_branch, new_node.id = $target_id
RETURN new_node
"""
params = {"project_id": source_id, "target_branch": target_branch, "target_id": get_project_id(target_branch, taxonomy_name)}
await get_current_transaction().run(query, params)
# clone nodes thanks to apoc.refactor.cloneSubgraph
query = f"""
MATCH (n:{source_id})
WITH collect(n) AS source_nodes
CALL apoc.refactor.cloneSubgraph(source_nodes)
YIELD output as new_node
WITH new_node
REMOVE new_node:{source_id}
SET new_node:{target_id}
"""
await get_current_transaction().run(query)
101 changes: 76 additions & 25 deletions backend/editor/entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
"""

import asyncio
import datetime
import logging
import shutil
import tempfile
import urllib.request # Sending requests
from typing import Optional

from fastapi import BackgroundTasks, HTTPException, UploadFile

# For synchronous I/O-bound functions in async path operations/background tasks
from fastapi.concurrency import run_in_threadpool
from openfoodfacts_taxonomy_parser import parser # Parser for taxonomies
from openfoodfacts_taxonomy_parser import unparser # Unparser for taxonomies
from openfoodfacts_taxonomy_parser import patcher
from openfoodfacts_taxonomy_parser import utils as parser_utils

from . import settings, utils
from .controllers.node_controller import create_entry_node, get_error_node
from .controllers.project_controller import create_project, edit_project, get_project
from .controllers.project_controller import get_project_id, create_project, edit_project, get_project
from .exceptions import GithubBranchExistsError # Custom exceptions
from .exceptions import (
GithubUploadError,
Expand All @@ -32,7 +35,7 @@
TransactionCtx,
get_current_transaction,
)
from .models.node_models import EntryNode, EntryNodeCreate
from .models.node_models import EntryNode, EntryNodeCreate, NodeType
from .models.project_models import ProjectCreate, ProjectEdit, ProjectStatus
from .settings import EXTERNAL_TAXONOMIES

Expand All @@ -49,7 +52,7 @@ class TaxonomyGraph:
def __init__(self, branch_name, taxonomy_name):
self.taxonomy_name = taxonomy_name
self.branch_name = branch_name
self.project_name = "p_" + taxonomy_name + "_" + branch_name
self.project_name = get_project_id(branch_name, taxonomy_name)

def taxonomy_path_in_repository(self, taxonomy_name):
return utils.taxonomy_path_in_repository(taxonomy_name)
Expand Down Expand Up @@ -199,18 +202,18 @@ async def import_taxonomy(
background_tasks.add_task(self.get_and_parse_taxonomy, uploadfile)
return True

def dump_taxonomy(self, background_tasks: BackgroundTasks):
def dump_taxonomy(self, background_tasks: BackgroundTasks, dump_cls: unparser.WriteTaxonomy=patcher.PatchTaxonomy):
"""
Helper function to create the txt file of a taxonomy
"""
# Create unparser object and pass a sync session to it
with SyncTransactionCtx() as session:
unparser_object = unparser.WriteTaxonomy(session)
dumper = dump_cls(session)
# Creates a unique file for dumping the taxonomy
filename = self.project_name + ".txt"
try:
# Dump taxonomy with given file name and branch name
unparser_object(filename, self.branch_name, self.taxonomy_name)
dumper(filename, self.branch_name, self.taxonomy_name)
# Program file removal in the background
background_tasks.add_task(utils.file_cleanup, filename)
return filename
Expand Down Expand Up @@ -342,7 +345,7 @@ async def list_projects(self, status=None):

async def add_node_to_end(self, label, entry):
"""
Helper function which adds an existing node to end of taxonomy
Helper function which adds an a newly created node to end of taxonomy
"""
# Delete relationship between current last node and __footer__
query = f"""
Expand Down Expand Up @@ -394,28 +397,56 @@ async def add_node_to_beginning(self, label, entry):
async def delete_node(self, label, entry):
"""
Helper function used for deleting a node with given id and label

We don't really delete it because we have to keep track of modified nodes.
We set the entry type label to REMOVED_<label>
"""
# Finding node to be deleted using node ID
modified = datetime.datetime.now().timestamp()
# Remove node from is_before relation and attach node previous node to next node
query = f"""
// Find node to be deleted using node ID
MATCH (deleted_node:{self.project_name}:{label})-[:is_before]->(next_node)
WHERE deleted_node.id = $id
MATCH (previous_node)-[:is_before]->(deleted_node)
// Remove node
DETACH DELETE (deleted_node)
DETACH (deleted_node)
// Rebuild relationships after deletion
CREATE (previous_node)-[:is_before]->(next_node)
"""
await get_current_transaction().run(query, {"id": entry})
# transfert child parent relations, and mark child nodes as modified
query = f"""
// Find relations to be removed using node ID
MATCH (child_node)-[:is_child_of]->(deleted_node:{self.project_name}:{label})
WHERE deleted_node.id = $id
MATCH (deleted_node)-[:is_child_of]->(parent_node)
DETACH (deleted_node)
// transfer child
CREATE (child_node) -[:is_child_of]->(parent_node)
// mark modified
SET child_node.modified = $modified
"""
await get_current_transaction().run(query, {"id": entry, "modified": modified})
# change label of node to be deleted
query = f"""
MATCH (deleted_node:{self.project_name}:{label}) WHERE deleted_node.id = $id
REMOVE deleted_node:{label}
SET deleted_node:REMOVED_{label}
// and mark modification date also
SET deleted_node.modified = $modified
"""
result = await get_current_transaction().run(query, {"id": entry})
return await async_list(result)

async def get_all_nodes(self, label):
async def get_all_nodes(self, label: Optional[str]=None, removed: bool=False):
"""
Helper function used for getting all nodes with/without given label
"""
qualifier = f":{label}" if label else ""
labels = [label] if label else [label.value for label in NodeType]
if removed:
labels = [f"REMOVED_{label}" for label in labels]
query = f"""
MATCH (n:{self.project_name}{qualifier}) RETURN n
MATCH (n:{self.project_name}:{"|".join(labels)}) RETURN n
"""
result = await get_current_transaction().run(query)
return await async_list(result)
Expand Down Expand Up @@ -522,6 +553,9 @@ async def update_node(self, label, new_node: EntryNode):
# Build query
query = [f"""MATCH (n:{self.project_name}:{label}) WHERE n.id = $id """]

modified = datetime.datetime.now().timestamp()
query.append(f"""\nSET n.modified = {modified}""")

# Delete keys removed by user
deleted_keys = (
(set(curr_node.tags.keys()) - set(new_node.tags.keys()))
Expand Down Expand Up @@ -558,24 +592,33 @@ async def update_node_children(self, entry, new_children_ids):
"""
Helper function used for updation of node children with given id
"""
modified = datetime.datetime.now().timestamp()
# Parse node ids from Neo4j Record object
current_children = [record["child.id"] for record in list(await self.get_children(entry))]
deleted_children = set(current_children) - set(new_children_ids)
deleted_children = list(set(current_children) - set(new_children_ids))
added_children = set(new_children_ids) - set(current_children)

# Delete relationships
for child in deleted_children:
query = f"""
MATCH
(deleted_child:{self.project_name}:ENTRY)
-[rel:is_child_of]->
(parent:{self.project_name}:ENTRY)
WHERE parent.id = $id AND deleted_child.id = $child
DELETE rel
"""
await get_current_transaction().run(query, {"id": entry, "child": child})
query = f"""
MATCH
(deleted_child:{self.project_name}:ENTRY)
-[rel:is_child_of]->
(parent:{self.project_name}:ENTRY)
WHERE parent.id = $id AND deleted_child.id IN $children
DELETE rel
"""
await get_current_transaction().run(query, {"id": entry, "children": deleted_children})
# update children modified property
query = f"""
MATCH (child:{self.project_name}:ENTRY)
WHERE child.id in $children
SET child.modified = $modified
"""
await get_current_transaction().run(
query, {"children": deleted_children, "modified": modified}
)

# Create non-existing nodes
# get non-existing nodes
query = f"""
MATCH (child:{self.project_name}:ENTRY)
WHERE child.id in $ids RETURN child.id
Expand All @@ -586,7 +629,7 @@ async def update_node_children(self, entry, new_children_ids):

# Normalising new children node ID
created_child_ids = []

# create new nodes
for child in to_create:
main_language_code, child_name = child.split(":", 1)
created_node_id = await self.create_entry_node(child_name, main_language_code)
Expand All @@ -612,5 +655,13 @@ async def update_node_children(self, entry, new_children_ids):
query, {"id": entry, "child_id": child_id}
)
result = list(await _result.value())
# update modified of existing but added children entries
# update children modified property
query = f"""
MATCH (child:{self.project_name}:ENTRY)
WHERE child.id in $children
SET child.modified = $modified
"""
await get_current_transaction().run(query, {"children": existing_ids, "modified": modified})

return result
2 changes: 1 addition & 1 deletion backend/editor/graph_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async def database_lifespan():
uri = settings.uri
driver = neo4j.AsyncGraphDatabase.driver(uri)
try:
yield
yield driver
finally:
await driver.close()

Expand Down
Loading
Loading