From a40fc276229113cec3e5288b587f53718bbe21c0 Mon Sep 17 00:00:00 2001 From: Nicholas Wold Date: Thu, 20 Jan 2022 11:17:53 -0800 Subject: [PATCH 1/5] Only fix cell ID validation issues if asked Adds a new kwarg to validate called `repair` which will fix well-known validation errors during the validation process, but only when set to `True`. Updates existing functionality to not mutate the notebook contents, but instead fail validation. --- .gitignore | 1 + nbformat/tests/test_validator.py | 33 +++++++++++++++++++++++---- nbformat/tests/v4_5_no_cell_id.ipynb | 34 ++++++++++++++++++++++++++++ nbformat/validator.py | 17 ++++++++------ 4 files changed, 73 insertions(+), 12 deletions(-) create mode 100644 nbformat/tests/v4_5_no_cell_id.ipynb diff --git a/.gitignore b/.gitignore index ed39d43b..6f294ab9 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ __pycache__ .#* .coverage .cache +.python-version diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index 8a01b428..ef48b3ed 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -214,17 +214,38 @@ def test_non_unique_cell_ids(): """Test than a non-unique cell id does not pass validation""" with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f: nb = read(f, as_version=4) - # The read call corrects the error and only logs the validation issue, so we reapply the issue for the test after - nb.cells[1].id = nb.cells[0].id with pytest.raises(ValidationError): validate(nb) - # The validate call should have corrected the duplicate id entry + assert not isvalid(nb) + + +def test_repair_non_unique_cell_ids(): + """Test that we will repair non-unique cell ids if asked during validation""" + with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f: + nb = read(f, as_version=4) + assert not isvalid(nb) + validate(nb, repair=True) assert isvalid(nb) - # Reapply to id duplication issue - nb.cells[1].id = nb.cells[0].id + + +def test_no_cell_ids(): + """Test that a cell without a cell ID does not pass validation""" + with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError): + validate(nb) assert not isvalid(nb) +def test_repair_no_cell_ids(): + """Test that we will repair cells without ids if asked during validation""" + with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f: + nb = read(f, as_version=4) + assert not isvalid(nb) + validate(nb, repair=True) + assert isvalid(nb) + + def test_invalid_cell_id(): """Test than an invalid cell id does not pass validation""" with TestsBase.fopen(u'invalid_cell_id.ipynb', u'r') as f: @@ -233,11 +254,13 @@ def test_invalid_cell_id(): validate(nb) assert not isvalid(nb) + def test_notebook_invalid_without_min_version(): with TestsBase.fopen(u'no_min_version.ipynb', u'r') as f: nb = read(f, as_version=4) with pytest.raises(ValidationError): validate(nb) + def test_notebook_invalid_without_main_version(): pass diff --git a/nbformat/tests/v4_5_no_cell_id.ipynb b/nbformat/tests/v4_5_no_cell_id.ipynb new file mode 100644 index 00000000..3465d4eb --- /dev/null +++ b/nbformat/tests/v4_5_no_cell_id.ipynb @@ -0,0 +1,34 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"foo\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbformat/validator.py b/nbformat/validator.py index 7d1ac60f..78c803fd 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -230,7 +230,7 @@ def better_validation_error(error, version, version_minor): def validate(nbdict=None, ref=None, version=None, version_minor=None, - relax_add_props=False, nbjson=None): + relax_add_props=False, nbjson=None, repair=False): """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. @@ -258,8 +258,9 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, if version is None: version, version_minor = 1, 0 - if ref is None and version >= 4 and version_minor >= 5: - # if we support cell ids ensure default ids are provided + notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5 + if notebook_supports_cell_ids and repair: + # Auto-generate cell ids for cells that are missing them. for cell in nbdict['cells']: if 'id' not in cell: # Generate cell ids if any are missing @@ -270,15 +271,17 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, relax_add_props=relax_add_props): raise error - if ref is None and version >= 4 and version_minor >= 5: + if notebook_supports_cell_ids: # if we support cell ids check for uniqueness when validating the whole notebook seen_ids = set() for cell in nbdict['cells']: cell_id = cell['id'] if cell_id in seen_ids: - cell['id'] = generate_corpus_id() - # Best effort to repair if we find a duplicate id in case the ValidationError isn't blocking - raise ValidationError("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id'])) + if repair: + # Best effort to repair if we find a duplicate id + cell['id'] = generate_corpus_id() + else: + raise ValidationError("Non-unique cell id '{}' detected.".format(cell_id)) seen_ids.add(cell_id) From 7718e7a7bb8bd1e2e4227a80c3c23f4d06190254 Mon Sep 17 00:00:00 2001 From: Nicholas Wold Date: Thu, 20 Jan 2022 11:58:06 -0800 Subject: [PATCH 2/5] Remove python 3.5 from ci test matrix --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1553b2e2..95525811 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: OS: ['ubuntu-latest', 'windows-latest'] - PYTHON_VERSION: ['3.5', '3.6', '3.7', '3.8', '3.9'] + PYTHON_VERSION: ['3.6', '3.7', '3.8', '3.9'] steps: - uses: actions/checkout@v2 - name: Set up Python From 9b53b7cc0fb3c33010cbe0be7270d13ffb56fc94 Mon Sep 17 00:00:00 2001 From: Nicholas Wold Date: Thu, 20 Jan 2022 12:01:16 -0800 Subject: [PATCH 3/5] explicitly exclude jupyter-trust script from manifest --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index e048e396..1235b07a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,6 +8,7 @@ include MANIFEST.in recursive-include nbformat *.txt recursive-include nbformat *.json recursive-exclude nbformat/tests *.* +exclude scripts/jupyter-trust # Javascript include package.json From 2da82365318c60f57588e65e8f57bb4551bb9c92 Mon Sep 17 00:00:00 2001 From: Nicholas Wold Date: Thu, 20 Jan 2022 17:09:21 -0800 Subject: [PATCH 4/5] Ensure that validate default mutates notebook content to repair cell ids --- nbformat/tests/test_validator.py | 35 +++++++++++++++++++++----------- nbformat/validator.py | 10 ++++++--- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index ef48b3ed..e7317ca4 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -3,6 +3,7 @@ # Copyright (c) IPython Development Team. # Distributed under the terms of the Modified BSD License. +import json import os import re @@ -212,37 +213,47 @@ def test_fallback_validator_with_iter_errors_using_ref(): def test_non_unique_cell_ids(): """Test than a non-unique cell id does not pass validation""" + import nbformat with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f: - nb = read(f, as_version=4) + # Avoids validate call from `.read` + nb = nbformat.from_dict(json.load(f)) with pytest.raises(ValidationError): - validate(nb) - assert not isvalid(nb) + validate(nb, repair_invalid_cell_ids=False) + # try again to verify that we didn't modify the content + with pytest.raises(ValidationError): + validate(nb, repair_invalid_cell_ids=False) def test_repair_non_unique_cell_ids(): """Test that we will repair non-unique cell ids if asked during validation""" + import nbformat with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f: - nb = read(f, as_version=4) - assert not isvalid(nb) - validate(nb, repair=True) + # Avoids validate call from `.read` + nb = nbformat.from_dict(json.load(f)) + validate(nb) assert isvalid(nb) def test_no_cell_ids(): """Test that a cell without a cell ID does not pass validation""" + import nbformat with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f: - nb = read(f, as_version=4) + # Avoids validate call from `.read` + nb = nbformat.from_dict(json.load(f)) with pytest.raises(ValidationError): - validate(nb) - assert not isvalid(nb) + validate(nb, repair_invalid_cell_ids=False) + # try again to verify that we didn't modify the content + with pytest.raises(ValidationError): + validate(nb, repair_invalid_cell_ids=False) def test_repair_no_cell_ids(): """Test that we will repair cells without ids if asked during validation""" + import nbformat with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f: - nb = read(f, as_version=4) - assert not isvalid(nb) - validate(nb, repair=True) + # Avoids validate call from `.read` + nb = nbformat.from_dict(json.load(f)) + validate(nb) assert isvalid(nb) diff --git a/nbformat/validator.py b/nbformat/validator.py index 78c803fd..d99d8c84 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -13,6 +13,9 @@ from .reader import get_version, reads from .corpus.words import generate_corpus_id +from traitlets.log import get_logger + + validators = {} def _relax_additional_properties(obj): @@ -230,7 +233,7 @@ def better_validation_error(error, version, version_minor): def validate(nbdict=None, ref=None, version=None, version_minor=None, - relax_add_props=False, nbjson=None, repair=False): + relax_add_props=False, nbjson=None, repair_invalid_cell_ids=True): """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. @@ -259,7 +262,7 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, version, version_minor = 1, 0 notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5 - if notebook_supports_cell_ids and repair: + if notebook_supports_cell_ids and repair_invalid_cell_ids: # Auto-generate cell ids for cells that are missing them. for cell in nbdict['cells']: if 'id' not in cell: @@ -277,9 +280,10 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, for cell in nbdict['cells']: cell_id = cell['id'] if cell_id in seen_ids: - if repair: + if repair_invalid_cell_ids: # Best effort to repair if we find a duplicate id cell['id'] = generate_corpus_id() + get_logger().warn("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id'])) else: raise ValidationError("Non-unique cell id '{}' detected.".format(cell_id)) seen_ids.add(cell_id) From 2732fbf8356231df9e4b00d7daa215cb6d7ac49c Mon Sep 17 00:00:00 2001 From: Nicholas Wold Date: Fri, 21 Jan 2022 09:05:25 -0800 Subject: [PATCH 5/5] Correct kwarg name for clarity --- nbformat/tests/test_validator.py | 8 ++++---- nbformat/validator.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index e7317ca4..979d9a22 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -218,10 +218,10 @@ def test_non_unique_cell_ids(): # Avoids validate call from `.read` nb = nbformat.from_dict(json.load(f)) with pytest.raises(ValidationError): - validate(nb, repair_invalid_cell_ids=False) + validate(nb, repair_duplicate_cell_ids=False) # try again to verify that we didn't modify the content with pytest.raises(ValidationError): - validate(nb, repair_invalid_cell_ids=False) + validate(nb, repair_duplicate_cell_ids=False) def test_repair_non_unique_cell_ids(): @@ -241,10 +241,10 @@ def test_no_cell_ids(): # Avoids validate call from `.read` nb = nbformat.from_dict(json.load(f)) with pytest.raises(ValidationError): - validate(nb, repair_invalid_cell_ids=False) + validate(nb, repair_duplicate_cell_ids=False) # try again to verify that we didn't modify the content with pytest.raises(ValidationError): - validate(nb, repair_invalid_cell_ids=False) + validate(nb, repair_duplicate_cell_ids=False) def test_repair_no_cell_ids(): diff --git a/nbformat/validator.py b/nbformat/validator.py index d99d8c84..11d55be7 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -233,7 +233,7 @@ def better_validation_error(error, version, version_minor): def validate(nbdict=None, ref=None, version=None, version_minor=None, - relax_add_props=False, nbjson=None, repair_invalid_cell_ids=True): + relax_add_props=False, nbjson=None, repair_duplicate_cell_ids=True): """Checks whether the given notebook dict-like object conforms to the relevant notebook format schema. @@ -262,7 +262,7 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, version, version_minor = 1, 0 notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5 - if notebook_supports_cell_ids and repair_invalid_cell_ids: + if notebook_supports_cell_ids and repair_duplicate_cell_ids: # Auto-generate cell ids for cells that are missing them. for cell in nbdict['cells']: if 'id' not in cell: @@ -280,7 +280,7 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, for cell in nbdict['cells']: cell_id = cell['id'] if cell_id in seen_ids: - if repair_invalid_cell_ids: + if repair_duplicate_cell_ids: # Best effort to repair if we find a duplicate id cell['id'] = generate_corpus_id() get_logger().warn("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))