Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: replace ipython notebooks with markdown #3314

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 107 additions & 11 deletions docs/_scripts/notebook_convert.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,54 @@
import argparse
import os
import re
from pathlib import Path
from typing import Literal, Optional

import nbformat
from nbconvert.exporters import MarkdownExporter
from nbconvert.preprocessors import Preprocessor


class EscapePreprocessor(Preprocessor):
def __init__(self, rewrite_links: bool = True, **kwargs) -> None:
super().__init__(**kwargs)
self.rewrite_links = rewrite_links

def preprocess_cell(self, cell, resources, cell_index):
if cell.cell_type == "markdown":
# rewrite markdown links to html links (excluding image links)
cell.source = re.sub(
r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
r'<a href="\2">\1</a>',
cell.source,
)
if self.rewrite_links:
# We'll need to adjust the logic for this to keep markdown format
# but link to markdown files rather than ipynb files.
cell.source = re.sub(
r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
r'<a href="\2">\1</a>',
cell.source,
)
else:
# Keep format but replace the .ipynb extension with .md
cell.source = re.sub(
r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
r"[\1](\2.md)",
cell.source,
)

# Fix image paths in <img> tags
cell.source = re.sub(
r'<img\s+src="\.?/img/([^"]+)"', r'<img src="../img/\1"', cell.source
)

elif cell.cell_type == "code":
# Determine if the cell has bash or cell magic
if cell.source.startswith("%") or cell.source.startswith("!"):
# update metadata to denote that it's not a python cell
cell.metadata["language_info"] = {"name": "unknown"}

# Remove noqa comments
cell.source = re.sub(r'#\s*noqa.*$', '', cell.source, flags=re.MULTILINE)
cell.source = re.sub(r"#\s*noqa.*$", "", cell.source, flags=re.MULTILINE)
# escape ``` in code
# This is needed because the markdown exporter will wrap code blocks in
# triple backticks, which will break the markdown output if the code block
# contains triple backticks.
cell.source = cell.source.replace("```", r"\`\`\`")
# escape ``` in output
if "outputs" in cell:
Expand Down Expand Up @@ -58,7 +82,7 @@ def preprocess_cell(self, cell, resources, cell_index):

class ExtractAttachmentsPreprocessor(Preprocessor):
"""
Extracts all of the outputs from the notebook file. The extracted
Extracts all the outputs from the notebook file. The extracted
outputs are returned in the 'resources' dictionary.
"""

Expand All @@ -82,7 +106,7 @@ def preprocess_cell(self, cell, resources, cell_index):
if not isinstance(resources["outputs"], dict):
resources["outputs"] = {}

# Loop through all of the attachments in the cell
# Loop through all the attachments in the cell
for name, attach in cell.get("attachments", {}).items():
for mime, data in attach.items():
if mime not in {
Expand Down Expand Up @@ -114,12 +138,84 @@ def preprocess_cell(self, cell, resources, cell_index):
],
)

md_executable = MarkdownExporter(
preprocessors=[
ExtractAttachmentsPreprocessor,
EscapePreprocessor(rewrite_links=False),
],
template_name="md_executable",
extra_template_basedirs=[
os.path.join(os.path.dirname(__file__), "notebook_convert_templates")
],
)


def convert_notebook(
notebook_path: Path,
) -> Path:
mode: Literal["markdown", "exec"] = "markdown",
) -> str:
with open(notebook_path) as f:
nb = nbformat.read(f, as_version=4)

body, _ = exporter.from_notebook_node(nb)
nb.metadata.mode = mode
if mode == "markdown":
body, _ = exporter.from_notebook_node(nb)
else:
body, _ = md_executable.from_notebook_node(nb)
return body


HERE = Path(__file__).parent
DOCS = HERE.parent / "docs"


# Convert notebooks to markdown
def _convert_notebooks(
*, output_dir: Optional[Path] = None, replace: bool = False
) -> None:
"""Converting notebooks."""
if not output_dir and not replace:
raise ValueError("Either --output_dir or --replace must be specified")

output_dir_path = DOCS if replace else Path(output_dir)
for notebook in DOCS.rglob("*.ipynb"):
markdown = convert_notebook(notebook, mode="exec")
markdown_path = output_dir_path / notebook.relative_to(DOCS).with_suffix(".md")
markdown_path.parent.mkdir(parents=True, exist_ok=True)
with open(markdown_path, "w") as f:
f.write(markdown)
if replace:
notebook.unlink(missing_ok=False)

if replace:
# Update links in markdown files from ipynb to md files
for path in output_dir_path.rglob("*.md"):
with open(path, "r") as f:
content = f.read()
# Keep format but replace the .ipynb extension with .md
pattern = r"(?<!!)\[([^\]]*)\]\((?![^)]*//)([^)]*)\.ipynb\)"
replacement = r"[\1](\2.md)"

source = re.sub(
pattern,
replacement,
content,
)
with open(path, "w") as f:
f.write(source)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert notebooks to markdown")
parser.add_argument(
"--output_dir",
default=None,
help="Directory to output markdown files",
)
parser.add_argument(
"--replace",
action="store_true",
help="Replace original notebooks with markdown files",
)
args = parser.parse_args()
_convert_notebooks(replace=args.replace, output_dir=args.output_dir)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"mimetypes": {
"text/markdown": true
}
}
38 changes: 38 additions & 0 deletions docs/_scripts/notebook_convert_templates/md_executable/index.md.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{#https://github.com/rdbisme/nbconvert/blob/master/share/jupyter/nbconvert/templates/markdown/index.md.j2#}
{% extends 'markdown/index.md.j2' %}

{% block input %}
```
{%- if 'magics_language' in cell.metadata -%}
{{ cell.metadata.magics_language}}
{%- elif 'name' in cell.metadata.get('language_info', {}) -%}
{%- if cell.metadata['language_info']['name'] == "python" -%}
{{ cell.metadata.language_info.name }}
{%- endif -%}
{%- elif 'name' in nb.metadata.get('language_info', {}) -%}{{ nb.metadata.language_info.name }}{%- endif %}
{{ cell.source}}
```
{% endblock input %}

{%- block traceback_line -%}
{%- endblock traceback_line -%}

{%- block stream -%}
{%- endblock stream -%}

{%- block data_text scoped -%}
{%- endblock data_text -%}

{%- block data_html scoped -%}
```html
{{ output.data['text/html'] | safe }}
```
{%- endblock data_html -%}

{%- block data_jpg scoped -%}
![](data:image/jpg;base64,{{ output.data['image/jpeg'] }})
{%- endblock data_jpg -%}

{%- block data_png scoped -%}
![](data:image/png;base64,{{ output.data['image/png'] }})
{%- endblock data_png -%}
2 changes: 1 addition & 1 deletion docs/docs/cloud/how-tos/human_in_the_loop_breakpoint.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ You can then resume from that spot to continue.

### Code for your graph

In this how-to we use a simple ReAct style hosted graph (you can see the full code for defining it [here](../../how-tos/human_in_the_loop/breakpoints.ipynb)). The important thing is that there are two nodes (one named `agent` that calls the LLM, and one named `action` that calls the tool), and a routing function from `agent` that determines whether to call `action` next or just end the graph run (the `action` node always calls the `agent` node after execution).
In this how-to we use a simple ReAct style hosted graph (you can see the full code for defining it [here](../../how-tos/human_in_the_loop/breakpoints.md)). The important thing is that there are two nodes (one named `agent` that calls the LLM, and one named `action` that calls the tool), and a routing function from `agent` that determines whether to call `action` next or just end the graph run (the `action` node always calls the `agent` node after execution).

### SDK Initialization

Expand Down
Loading