From a40fc276229113cec3e5288b587f53718bbe21c0 Mon Sep 17 00:00:00 2001
From: Nicholas Wold <nick@nicholaswold.com>
Date: Thu, 20 Jan 2022 11:17:53 -0800
Subject: [PATCH 1/5] Only fix cell ID validation issues if asked

Adds a new kwarg to validate called `repair` which will fix well-known validation
errors during the validation process, but only when set to `True`.

Updates existing functionality to not mutate the notebook contents, but instead
fail validation.
---
 .gitignore                           |  1 +
 nbformat/tests/test_validator.py     | 33 +++++++++++++++++++++++----
 nbformat/tests/v4_5_no_cell_id.ipynb | 34 ++++++++++++++++++++++++++++
 nbformat/validator.py                | 17 ++++++++------
 4 files changed, 73 insertions(+), 12 deletions(-)
 create mode 100644 nbformat/tests/v4_5_no_cell_id.ipynb

diff --git a/.gitignore b/.gitignore
index ed39d43b..6f294ab9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ __pycache__
 .#*
 .coverage
 .cache
+.python-version
diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py
index 8a01b428..ef48b3ed 100644
--- a/nbformat/tests/test_validator.py
+++ b/nbformat/tests/test_validator.py
@@ -214,17 +214,38 @@ def test_non_unique_cell_ids():
     """Test than a non-unique cell id does not pass validation"""
     with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
         nb = read(f, as_version=4)
-    # The read call corrects the error and only logs the validation issue, so we reapply the issue for the test after
-    nb.cells[1].id = nb.cells[0].id
     with pytest.raises(ValidationError):
         validate(nb)
-    # The validate call should have corrected the duplicate id entry
+    assert not isvalid(nb)
+
+
+def test_repair_non_unique_cell_ids():
+    """Test that we will repair non-unique cell ids if asked during validation"""
+    with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
+        nb = read(f, as_version=4)
+    assert not isvalid(nb)
+    validate(nb, repair=True)
     assert isvalid(nb)
-    # Reapply to id duplication issue
-    nb.cells[1].id = nb.cells[0].id
+
+
+def test_no_cell_ids():
+    """Test that a cell without a cell ID does not pass validation"""
+    with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
+        nb = read(f, as_version=4)
+    with pytest.raises(ValidationError):
+        validate(nb)
     assert not isvalid(nb)
 
 
+def test_repair_no_cell_ids():
+    """Test that we will repair cells without ids if asked during validation"""
+    with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
+        nb = read(f, as_version=4)
+    assert not isvalid(nb)
+    validate(nb, repair=True)
+    assert isvalid(nb)
+
+
 def test_invalid_cell_id():
     """Test than an invalid cell id does not pass validation"""
     with TestsBase.fopen(u'invalid_cell_id.ipynb', u'r') as f:
@@ -233,11 +254,13 @@ def test_invalid_cell_id():
         validate(nb)
     assert not isvalid(nb)
 
+
 def test_notebook_invalid_without_min_version():
     with TestsBase.fopen(u'no_min_version.ipynb', u'r') as f:
         nb = read(f, as_version=4)
     with pytest.raises(ValidationError):
         validate(nb)
 
+
 def test_notebook_invalid_without_main_version():
     pass
diff --git a/nbformat/tests/v4_5_no_cell_id.ipynb b/nbformat/tests/v4_5_no_cell_id.ipynb
new file mode 100644
index 00000000..3465d4eb
--- /dev/null
+++ b/nbformat/tests/v4_5_no_cell_id.ipynb
@@ -0,0 +1,34 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"foo\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nbformat/validator.py b/nbformat/validator.py
index 7d1ac60f..78c803fd 100644
--- a/nbformat/validator.py
+++ b/nbformat/validator.py
@@ -230,7 +230,7 @@ def better_validation_error(error, version, version_minor):
 
 
 def validate(nbdict=None, ref=None, version=None, version_minor=None,
-             relax_add_props=False, nbjson=None):
+             relax_add_props=False, nbjson=None, repair=False):
     """Checks whether the given notebook dict-like object
     conforms to the relevant notebook format schema.
 
@@ -258,8 +258,9 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
         if version is None:
             version, version_minor = 1, 0
 
-    if ref is None and version >= 4 and version_minor >= 5:
-        # if we support cell ids ensure default ids are provided
+    notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
+    if notebook_supports_cell_ids and repair:
+        # Auto-generate cell ids for cells that are missing them.
         for cell in nbdict['cells']:
             if 'id' not in cell:
                 # Generate cell ids if any are missing
@@ -270,15 +271,17 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
                                relax_add_props=relax_add_props):
         raise error
 
-    if ref is None and version >= 4 and version_minor >= 5:
+    if notebook_supports_cell_ids:
         # if we support cell ids check for uniqueness when validating the whole notebook
         seen_ids = set()
         for cell in nbdict['cells']:
             cell_id = cell['id']
             if cell_id in seen_ids:
-                cell['id'] = generate_corpus_id()
-                # Best effort to repair if we find a duplicate id in case the ValidationError isn't blocking
-                raise ValidationError("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
+                if repair:
+                    # Best effort to repair if we find a duplicate id
+                    cell['id'] = generate_corpus_id()
+                else:
+                    raise ValidationError("Non-unique cell id '{}' detected.".format(cell_id))
             seen_ids.add(cell_id)
 
 

From 7718e7a7bb8bd1e2e4227a80c3c23f4d06190254 Mon Sep 17 00:00:00 2001
From: Nicholas Wold <nick@nicholaswold.com>
Date: Thu, 20 Jan 2022 11:58:06 -0800
Subject: [PATCH 2/5] Remove python 3.5 from ci test matrix

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 1553b2e2..95525811 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false 
       matrix:
         OS: ['ubuntu-latest', 'windows-latest']
-        PYTHON_VERSION: ['3.5', '3.6', '3.7', '3.8', '3.9']
+        PYTHON_VERSION: ['3.6', '3.7', '3.8', '3.9']
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python

From 9b53b7cc0fb3c33010cbe0be7270d13ffb56fc94 Mon Sep 17 00:00:00 2001
From: Nicholas Wold <nick@nicholaswold.com>
Date: Thu, 20 Jan 2022 12:01:16 -0800
Subject: [PATCH 3/5] explicitly exclude jupyter-trust script from manifest

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index e048e396..1235b07a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -8,6 +8,7 @@ include MANIFEST.in
 recursive-include nbformat *.txt
 recursive-include nbformat *.json
 recursive-exclude nbformat/tests *.*
+exclude scripts/jupyter-trust
 
 # Javascript
 include package.json

From 2da82365318c60f57588e65e8f57bb4551bb9c92 Mon Sep 17 00:00:00 2001
From: Nicholas Wold <nick@nicholaswold.com>
Date: Thu, 20 Jan 2022 17:09:21 -0800
Subject: [PATCH 4/5] Ensure that validate default mutates notebook content to
 repair cell ids

---
 nbformat/tests/test_validator.py | 35 +++++++++++++++++++++-----------
 nbformat/validator.py            | 10 ++++++---
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py
index ef48b3ed..e7317ca4 100644
--- a/nbformat/tests/test_validator.py
+++ b/nbformat/tests/test_validator.py
@@ -3,6 +3,7 @@
 # Copyright (c) IPython Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+import json
 import os
 import re
 
@@ -212,37 +213,47 @@ def test_fallback_validator_with_iter_errors_using_ref():
 
 def test_non_unique_cell_ids():
     """Test than a non-unique cell id does not pass validation"""
+    import nbformat
     with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
-        nb = read(f, as_version=4)
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
     with pytest.raises(ValidationError):
-        validate(nb)
-    assert not isvalid(nb)
+        validate(nb, repair_invalid_cell_ids=False)
+    # try again to verify that we didn't modify the content
+    with pytest.raises(ValidationError):
+        validate(nb, repair_invalid_cell_ids=False)
 
 
 def test_repair_non_unique_cell_ids():
     """Test that we will repair non-unique cell ids if asked during validation"""
+    import nbformat
     with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
-        nb = read(f, as_version=4)
-    assert not isvalid(nb)
-    validate(nb, repair=True)
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
+    validate(nb)
     assert isvalid(nb)
 
 
 def test_no_cell_ids():
     """Test that a cell without a cell ID does not pass validation"""
+    import nbformat
     with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
-        nb = read(f, as_version=4)
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
     with pytest.raises(ValidationError):
-        validate(nb)
-    assert not isvalid(nb)
+        validate(nb, repair_invalid_cell_ids=False)
+    # try again to verify that we didn't modify the content
+    with pytest.raises(ValidationError):
+        validate(nb, repair_invalid_cell_ids=False)
 
 
 def test_repair_no_cell_ids():
     """Test that we will repair cells without ids if asked during validation"""
+    import nbformat
     with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
-        nb = read(f, as_version=4)
-    assert not isvalid(nb)
-    validate(nb, repair=True)
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
+    validate(nb)
     assert isvalid(nb)
 
 
diff --git a/nbformat/validator.py b/nbformat/validator.py
index 78c803fd..d99d8c84 100644
--- a/nbformat/validator.py
+++ b/nbformat/validator.py
@@ -13,6 +13,9 @@
 from .reader import get_version, reads
 from .corpus.words import generate_corpus_id
 
+from traitlets.log import get_logger
+
+
 validators = {}
 
 def _relax_additional_properties(obj):
@@ -230,7 +233,7 @@ def better_validation_error(error, version, version_minor):
 
 
 def validate(nbdict=None, ref=None, version=None, version_minor=None,
-             relax_add_props=False, nbjson=None, repair=False):
+             relax_add_props=False, nbjson=None, repair_invalid_cell_ids=True):
     """Checks whether the given notebook dict-like object
     conforms to the relevant notebook format schema.
 
@@ -259,7 +262,7 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
             version, version_minor = 1, 0
 
     notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
-    if notebook_supports_cell_ids and repair:
+    if notebook_supports_cell_ids and repair_invalid_cell_ids:
         # Auto-generate cell ids for cells that are missing them.
         for cell in nbdict['cells']:
             if 'id' not in cell:
@@ -277,9 +280,10 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
         for cell in nbdict['cells']:
             cell_id = cell['id']
             if cell_id in seen_ids:
-                if repair:
+                if repair_invalid_cell_ids:
                     # Best effort to repair if we find a duplicate id
                     cell['id'] = generate_corpus_id()
+                    get_logger().warn("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
                 else:
                     raise ValidationError("Non-unique cell id '{}' detected.".format(cell_id))
             seen_ids.add(cell_id)

From 2732fbf8356231df9e4b00d7daa215cb6d7ac49c Mon Sep 17 00:00:00 2001
From: Nicholas Wold <nick@nicholaswold.com>
Date: Fri, 21 Jan 2022 09:05:25 -0800
Subject: [PATCH 5/5] Correct kwarg name for clarity

---
 nbformat/tests/test_validator.py | 8 ++++----
 nbformat/validator.py            | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py
index e7317ca4..979d9a22 100644
--- a/nbformat/tests/test_validator.py
+++ b/nbformat/tests/test_validator.py
@@ -218,10 +218,10 @@ def test_non_unique_cell_ids():
         # Avoids validate call from `.read`
         nb = nbformat.from_dict(json.load(f))
     with pytest.raises(ValidationError):
-        validate(nb, repair_invalid_cell_ids=False)
+        validate(nb, repair_duplicate_cell_ids=False)
     # try again to verify that we didn't modify the content
     with pytest.raises(ValidationError):
-        validate(nb, repair_invalid_cell_ids=False)
+        validate(nb, repair_duplicate_cell_ids=False)
 
 
 def test_repair_non_unique_cell_ids():
@@ -241,10 +241,10 @@ def test_no_cell_ids():
         # Avoids validate call from `.read`
         nb = nbformat.from_dict(json.load(f))
     with pytest.raises(ValidationError):
-        validate(nb, repair_invalid_cell_ids=False)
+        validate(nb, repair_duplicate_cell_ids=False)
     # try again to verify that we didn't modify the content
     with pytest.raises(ValidationError):
-        validate(nb, repair_invalid_cell_ids=False)
+        validate(nb, repair_duplicate_cell_ids=False)
 
 
 def test_repair_no_cell_ids():
diff --git a/nbformat/validator.py b/nbformat/validator.py
index d99d8c84..11d55be7 100644
--- a/nbformat/validator.py
+++ b/nbformat/validator.py
@@ -233,7 +233,7 @@ def better_validation_error(error, version, version_minor):
 
 
 def validate(nbdict=None, ref=None, version=None, version_minor=None,
-             relax_add_props=False, nbjson=None, repair_invalid_cell_ids=True):
+             relax_add_props=False, nbjson=None, repair_duplicate_cell_ids=True):
     """Checks whether the given notebook dict-like object
     conforms to the relevant notebook format schema.
 
@@ -262,7 +262,7 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
             version, version_minor = 1, 0
 
     notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
-    if notebook_supports_cell_ids and repair_invalid_cell_ids:
+    if notebook_supports_cell_ids and repair_duplicate_cell_ids:
         # Auto-generate cell ids for cells that are missing them.
         for cell in nbdict['cells']:
             if 'id' not in cell:
@@ -280,7 +280,7 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
         for cell in nbdict['cells']:
             cell_id = cell['id']
             if cell_id in seen_ids:
-                if repair_invalid_cell_ids:
+                if repair_duplicate_cell_ids:
                     # Best effort to repair if we find a duplicate id
                     cell['id'] = generate_corpus_id()
                     get_logger().warn("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))