Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow nbdev_clean to accept multiple filenames or globs (#1480) #1488

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions nbdev/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _nbdev_clean(nb, path=None, clear_all=None):
# %% ../nbs/api/11_clean.ipynb
@call_parse
def nbdev_clean(
fname:str=None, # A notebook name or glob to clean
fname=None, # A notebook path or a list of paths/globs to clean
clear_all:bool=False, # Remove all cell metadata and cell outputs?
disp:bool=False, # Print the cleaned outputs
stdin:bool=False # Read notebook from input stream
Expand All @@ -141,7 +141,9 @@ def nbdev_clean(
_write = partial(process_write, warn_msg='Failed to clean notebook', proc_nb=_clean)
if stdin: return _write(f_in=sys.stdin, f_out=sys.stdout)
if fname is None: fname = get_config().nbs_path
for f in globtastic(fname, file_glob='*.ipynb', skip_folder_re='^[_.]'): _write(f_in=f, disp=disp)
# Use listify to handle both single paths and lists of paths
for f in globtastic(listify(fname), file_glob='*.ipynb', skip_folder_re='^[_.]'):
_write(f_in=f, disp=disp)

# %% ../nbs/api/11_clean.ipynb
def clean_jupyter(path, model, **kwargs):
Expand Down
103 changes: 101 additions & 2 deletions nbs/api/11_clean.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@
"#|export\n",
"@call_parse\n",
"def nbdev_clean(\n",
" fname:str=None, # A notebook name or glob to clean\n",
" fname=None, # A notebook path or a list of paths/globs to clean\n",
" clear_all:bool=False, # Remove all cell metadata and cell outputs?\n",
" disp:bool=False, # Print the cleaned outputs\n",
" stdin:bool=False # Read notebook from input stream\n",
Expand All @@ -399,7 +399,9 @@
" _write = partial(process_write, warn_msg='Failed to clean notebook', proc_nb=_clean)\n",
" if stdin: return _write(f_in=sys.stdin, f_out=sys.stdout)\n",
" if fname is None: fname = get_config().nbs_path\n",
" for f in globtastic(fname, file_glob='*.ipynb', skip_folder_re='^[_.]'): _write(f_in=f, disp=disp)"
" # Use listify to handle both single paths and lists of paths\n",
" for f in globtastic(listify(fname), file_glob='*.ipynb', skip_folder_re='^[_.]'): \n",
" _write(f_in=f, disp=disp)"
]
},
{
Expand Down Expand Up @@ -811,6 +813,103 @@
"test_eq(nb.cells[-1].output, ours.cells[-1].output)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#|hide\n",
"#|eval: false\n",
"def test_clean_multiple_paths():\n",
" \"\"\"Test that demonstrates the current failure and success of the fixed version.\"\"\"\n",
" with tempfile.TemporaryDirectory() as d, working_directory(d):\n",
" # Create test notebooks\n",
" nbs_path = Path('nbs')\n",
" nbs_path.mkdir()\n",
" \n",
" # Create two notebooks with metadata we expect to be cleaned\n",
" nb1 = nbs_path/'nb1.ipynb'\n",
" nb2 = nbs_path/'nb2.ipynb'\n",
" \n",
" meta = {'nbformat': 4, 'metadata': {\n",
" 'kernelspec': {'name': 'python3'}, \n",
" 'test_metadata': 'should_be_removed'\n",
" }}\n",
" \n",
" # Add execution_count that should be cleaned\n",
" cells = [\n",
" {'cell_type': 'code', 'execution_count': 1, 'source': 'print(\"test\")', 'outputs': [], 'metadata': {}}\n",
" ]\n",
" \n",
" # Write the test notebooks\n",
" for nb_path in [nb1, nb2]:\n",
" write_nb(dict2nb({'cells': cells, **meta}), nb_path)\n",
" \n",
" # CURRENT BEHAVIOR SIMULATION:\n",
" # This simulates the current nbdev_clean implementation\n",
" current_implementation = False\n",
" try:\n",
" # Original code passes fname directly to globtastic:\n",
" fname = [str(nb1), str(nb2)] # List of paths\n",
" # This will fail because globtastic expects a string/Path, not a list\n",
" for f in globtastic(fname, file_glob='*.ipynb', skip_folder_re='^[_.]'): \n",
" pass\n",
" current_implementation = True\n",
" except Exception:\n",
" # Expected to fail with the current implementation\n",
" current_implementation = False\n",
" \n",
" # FIXED BEHAVIOR SIMULATION:\n",
" # This simulates the fixed implementation using listify\n",
" fixed_implementation = False\n",
" try:\n",
" # The fix uses listify, which lets us process all paths\n",
" fname = [str(nb1), str(nb2)]\n",
" for f in globtastic(listify(fname), file_glob='*.ipynb', skip_folder_re='^[_.]'):\n",
" pass\n",
" fixed_implementation = True\n",
" except Exception:\n",
" fixed_implementation = False\n",
" \n",
" # Iterate manually over each path as a workaround for current implementation\n",
" workaround_implementation = False\n",
" try:\n",
" fname = [str(nb1), str(nb2)]\n",
" # Have to manually iterate over each path\n",
" for path in fname:\n",
" for f in globtastic(path, file_glob='*.ipynb', skip_folder_re='^[_.]'):\n",
" pass\n",
" workaround_implementation = True\n",
" except Exception:\n",
" workaround_implementation = False\n",
" \n",
" # Assertions to prove the issue and fix\n",
" assert not current_implementation, \"Current implementation unexpectedly works with list of paths\"\n",
" assert fixed_implementation, \"Fixed implementation fails with list of paths\"\n",
" assert workaround_implementation, \"Even the manual workaround fails\"\n",
" \n",
" # Now test with the fixed nbdev_clean function (assuming it's been updated with listify)\n",
" # These assertions should only pass with the fixed implementation\n",
" try:\n",
" # Call the actual nbdev_clean function (should be fixed version with listify)\n",
" nbdev_clean([str(nb1), str(nb2)])\n",
" \n",
" # Verify both notebooks were properly cleaned\n",
" for nb_path in [nb1, nb2]:\n",
" cleaned_nb = read_nb(nb_path)\n",
" assert 'test_metadata' not in cleaned_nb.metadata, f\"Metadata was not cleaned in {nb_path}\"\n",
" assert cleaned_nb.cells[0].get('execution_count') is None, f\"Execution count not cleared in {nb_path}\"\n",
" \n",
" # If we got here without exceptions, the fix works\n",
" fixed_function_works = True\n",
" except Exception as e:\n",
" fixed_function_works = False\n",
" print(f\"Error when running fixed nbdev_clean: {e}\")\n",
" \n",
" assert fixed_function_works, \"Fixed nbdev_clean function failed with multiple paths\""
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down