Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only fix cell ID validation issues if asked #244

Merged
merged 5 commits into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
OS: ['ubuntu-latest', 'windows-latest']
PYTHON_VERSION: ['3.5', '3.6', '3.7', '3.8', '3.9']
PYTHON_VERSION: ['3.6', '3.7', '3.8', '3.9']
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ __pycache__
.#*
.coverage
.cache
.python-version
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include MANIFEST.in
recursive-include nbformat *.txt
recursive-include nbformat *.json
recursive-exclude nbformat/tests *.*
exclude scripts/jupyter-trust

# Javascript
include package.json
Expand Down
50 changes: 42 additions & 8 deletions nbformat/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.

import json
import os
import re

Expand Down Expand Up @@ -212,17 +213,48 @@ def test_fallback_validator_with_iter_errors_using_ref():

def test_non_unique_cell_ids():
"""Test than a non-unique cell id does not pass validation"""
import nbformat
with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
nb = read(f, as_version=4)
# The read call corrects the error and only logs the validation issue, so we reapply the issue for the test after
nb.cells[1].id = nb.cells[0].id
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
with pytest.raises(ValidationError):
validate(nb)
# The validate call should have corrected the duplicate id entry
validate(nb, repair_duplicate_cell_ids=False)
# try again to verify that we didn't modify the content
with pytest.raises(ValidationError):
validate(nb, repair_duplicate_cell_ids=False)


def test_repair_non_unique_cell_ids():
"""Test that we will repair non-unique cell ids if asked during validation"""
import nbformat
with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
validate(nb)
assert isvalid(nb)


def test_no_cell_ids():
"""Test that a cell without a cell ID does not pass validation"""
import nbformat
with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
with pytest.raises(ValidationError):
validate(nb, repair_duplicate_cell_ids=False)
# try again to verify that we didn't modify the content
with pytest.raises(ValidationError):
validate(nb, repair_duplicate_cell_ids=False)


def test_repair_no_cell_ids():
"""Test that we will repair cells without ids if asked during validation"""
import nbformat
with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
validate(nb)
assert isvalid(nb)
# Reapply to id duplication issue
nb.cells[1].id = nb.cells[0].id
assert not isvalid(nb)


def test_invalid_cell_id():
Expand All @@ -233,11 +265,13 @@ def test_invalid_cell_id():
validate(nb)
assert not isvalid(nb)


def test_notebook_invalid_without_min_version():
with TestsBase.fopen(u'no_min_version.ipynb', u'r') as f:
nb = read(f, as_version=4)
with pytest.raises(ValidationError):
validate(nb)


def test_notebook_invalid_without_main_version():
pass
34 changes: 34 additions & 0 deletions nbformat/tests/v4_5_no_cell_id.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"foo\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
21 changes: 14 additions & 7 deletions nbformat/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from .reader import get_version, reads
from .corpus.words import generate_corpus_id

from traitlets.log import get_logger


validators = {}

def _relax_additional_properties(obj):
Expand Down Expand Up @@ -230,7 +233,7 @@ def better_validation_error(error, version, version_minor):


def validate(nbdict=None, ref=None, version=None, version_minor=None,
relax_add_props=False, nbjson=None):
relax_add_props=False, nbjson=None, repair_duplicate_cell_ids=True):
"""Checks whether the given notebook dict-like object
conforms to the relevant notebook format schema.

Expand Down Expand Up @@ -258,8 +261,9 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
if version is None:
version, version_minor = 1, 0

if ref is None and version >= 4 and version_minor >= 5:
# if we support cell ids ensure default ids are provided
notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Much cleaner, thanks.

if notebook_supports_cell_ids and repair_duplicate_cell_ids:
# Auto-generate cell ids for cells that are missing them.
for cell in nbdict['cells']:
if 'id' not in cell:
# Generate cell ids if any are missing
Expand All @@ -270,15 +274,18 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
relax_add_props=relax_add_props):
raise error

if ref is None and version >= 4 and version_minor >= 5:
if notebook_supports_cell_ids:
# if we support cell ids check for uniqueness when validating the whole notebook
seen_ids = set()
for cell in nbdict['cells']:
cell_id = cell['id']
if cell_id in seen_ids:
cell['id'] = generate_corpus_id()
# Best effort to repair if we find a duplicate id in case the ValidationError isn't blocking
raise ValidationError("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
if repair_duplicate_cell_ids:
# Best effort to repair if we find a duplicate id
cell['id'] = generate_corpus_id()
nicholaswold marked this conversation as resolved.
Show resolved Hide resolved
get_logger().warn("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
else:
raise ValidationError("Non-unique cell id '{}' detected.".format(cell_id))
seen_ids.add(cell_id)


Expand Down