Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: convert to executable markdown #3296

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 88 additions & 11 deletions docs/_scripts/notebook_convert.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,54 @@
import argparse
import os
import re
from pathlib import Path
from typing import Literal, Optional

import nbformat
from nbconvert.exporters import MarkdownExporter
from nbconvert.preprocessors import Preprocessor


class EscapePreprocessor(Preprocessor):
def __init__(self, rewrite_links: bool = True, **kwargs) -> None:
super().__init__(**kwargs)
self.rewrite_links = rewrite_links

def preprocess_cell(self, cell, resources, cell_index):
if cell.cell_type == "markdown":
# rewrite markdown links to html links (excluding image links)
cell.source = re.sub(
r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
r'<a href="\2">\1</a>',
cell.source,
)
if self.rewrite_links:
# We'll need to adjust the logic for this to keep markdown format
# but link to markdown files rather than ipynb files.
cell.source = re.sub(
r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
r'<a href="\2">\1</a>',
cell.source,
)
else:
# Keep format but replace the .ipynb extension with .md
cell.source = re.sub(
r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
r"[\1](\2.md)",
cell.source,
)

# Fix image paths in <img> tags
cell.source = re.sub(
r'<img\s+src="\.?/img/([^"]+)"', r'<img src="../img/\1"', cell.source
)

elif cell.cell_type == "code":
# Determine if the cell has bash or cell magic
if cell.source.startswith("%") or cell.source.startswith("!"):
# update metadata to denote that it's not a python cell
cell.metadata["language_info"] = {"name": "unknown"}

# Remove noqa comments
cell.source = re.sub(r'#\s*noqa.*$', '', cell.source, flags=re.MULTILINE)
cell.source = re.sub(r"#\s*noqa.*$", "", cell.source, flags=re.MULTILINE)
# escape ``` in code
# This is needed because the markdown exporter will wrap code blocks in
# triple backticks, which will break the markdown output if the code block
# contains triple backticks.
cell.source = cell.source.replace("```", r"\`\`\`")
# escape ``` in output
if "outputs" in cell:
Expand Down Expand Up @@ -58,7 +82,7 @@ def preprocess_cell(self, cell, resources, cell_index):

class ExtractAttachmentsPreprocessor(Preprocessor):
"""
Extracts all of the outputs from the notebook file. The extracted
Extracts all the outputs from the notebook file. The extracted
outputs are returned in the 'resources' dictionary.
"""

Expand All @@ -82,7 +106,7 @@ def preprocess_cell(self, cell, resources, cell_index):
if not isinstance(resources["outputs"], dict):
resources["outputs"] = {}

# Loop through all of the attachments in the cell
# Loop through all the attachments in the cell
for name, attach in cell.get("attachments", {}).items():
for mime, data in attach.items():
if mime not in {
Expand Down Expand Up @@ -114,12 +138,65 @@ def preprocess_cell(self, cell, resources, cell_index):
],
)

md_executable = MarkdownExporter(
preprocessors=[
ExtractAttachmentsPreprocessor,
EscapePreprocessor(rewrite_links=False),
],
template_name="md_executable",
extra_template_basedirs=[
os.path.join(os.path.dirname(__file__), "notebook_convert_templates")
],
)


def convert_notebook(
notebook_path: Path,
) -> Path:
mode: Literal["markdown", "exec"] = "markdown",
) -> str:
with open(notebook_path) as f:
nb = nbformat.read(f, as_version=4)

body, _ = exporter.from_notebook_node(nb)
nb.metadata.mode = mode
if mode == "markdown":
body, _ = exporter.from_notebook_node(nb)
else:
body, _ = md_executable.from_notebook_node(nb)
return body


HERE = Path(__file__).parent
DOCS = HERE.parent / "docs"


# Convert notebooks to markdown
def _convert_notebooks(
*, output_dir: Optional[Path] = None, replace: bool = False
) -> None:
"""Converting notebooks."""
if not output_dir and not replace:
raise ValueError("Either --output_dir or --replace must be specified")

output_dir_path = DOCS if replace else Path(output_dir)
for notebook in DOCS.rglob("*.ipynb"):
markdown = convert_notebook(notebook, mode="exec")
markdown_path = output_dir_path / notebook.relative_to(DOCS).with_suffix(".md")
markdown_path.parent.mkdir(parents=True, exist_ok=True)
with open(markdown_path, "w") as f:
f.write(markdown)
if replace:
notebook.unlink(missing_ok=False)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert notebooks to markdown")
parser.add_argument(
"--output_dir", default=None, help="Directory to output markdown files",
)
parser.add_argument(
"--replace",
action="store_true",
help="Replace original notebooks with markdown files",
)
args = parser.parse_args()
_convert_notebooks(replace=args.replace, output_dir=args.output_dir)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"mimetypes": {
"text/markdown": true
}
}
42 changes: 42 additions & 0 deletions docs/_scripts/notebook_convert_templates/md_executable/index.md.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{#https://github.com/rdbisme/nbconvert/blob/master/share/jupyter/nbconvert/templates/markdown/index.md.j2#}
{% extends 'markdown/index.md.j2' %}

{% block input %}

```
{%- if 'magics_language' in cell.metadata -%}
{{ cell.metadata.magics_language}}
{%- elif 'name' in cell.metadata.get('language_info', {}) -%}
{%- if cell.metadata['language_info']['name'] == "python" -%}
{{ cell.metadata.language_info.name }} exec="1" source="below" result="ini"
{%- endif -%}
{%- elif 'name' in nb.metadata.get('language_info', {}) -%}
{{ nb.metadata.language_info.name }} exec="1" source="below" result="ini"
{%- endif %}
{{ cell.source}}
```

{% endblock input %}

{%- block traceback_line -%}
{%- endblock traceback_line -%}

{%- block stream -%}
{%- endblock stream -%}

{%- block data_text scoped -%}
{%- endblock data_text -%}

{%- block data_html scoped -%}
```html
{{ output.data['text/html'] | safe }}
```
{%- endblock data_html -%}

{%- block data_jpg scoped -%}
![](data:image/jpg;base64,{{ output.data['image/jpeg'] }})
{%- endblock data_jpg -%}

{%- block data_png scoped -%}
![](data:image/png;base64,{{ output.data['image/png'] }})
{%- endblock data_png -%}
1 change: 1 addition & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ plugins:
- search:
separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
- autorefs
- markdown-exec
- mkdocstrings:
handlers:
python:
Expand Down
Empty file added docs/tests/__init__.py
Empty file.
46 changes: 46 additions & 0 deletions docs/tests/test_notebook_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import nbformat

from _scripts.notebook_convert import exporter


def _remove_consecutive_new_lines(s) -> str:
"""Remove consecutive new lines from a string."""
return "\n".join([line for line in s.split("\n") if line.strip()])


def test_convert_notebook():
# Test the convert_notebook function
# Create a new, minimal notebook programmatically
nb = nbformat.v4.new_notebook()
nb.metadata.kernelspec = {
"name": "python3",
"language": "python",
"display_name": "Python 3",
}
nb.metadata.language_info = {
"name": "python",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3,
},
}

# Add a markdown cell with a link to an .ipynb file
md_cell_source = "This is a [link](example_notebook.ipynb) in markdown."
nb.cells.append(nbformat.v4.new_markdown_cell(md_cell_source))

# Add a code cell with a noqa comment
code_cell_source = "print('hello')"
nb.cells.append(nbformat.v4.new_code_cell(code_cell_source))
nb.metadata.mode = "exec"

body, _ = exporter.from_notebook_node(nb)
assert (
_remove_consecutive_new_lines(body)
== """\
This is a [link](example_notebook.ipynb) in markdown.
```python exec="1" source="below" result="ini"
print('hello')
```"""
)
Loading