langchain-ai · eyurtsev · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025
diff --git a/docs/_scripts/notebook_convert.py b/docs/_scripts/notebook_convert.py
@@ -1,30 +1,54 @@
+import argparse
 import os
 import re
 from pathlib import Path
+from typing import Literal, Optional
 
 import nbformat
 from nbconvert.exporters import MarkdownExporter
 from nbconvert.preprocessors import Preprocessor
 
 
 class EscapePreprocessor(Preprocessor):
+    def __init__(self, rewrite_links: bool = True, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self.rewrite_links = rewrite_links
+
     def preprocess_cell(self, cell, resources, cell_index):
         if cell.cell_type == "markdown":
-            # rewrite markdown links to html links (excluding image links)
-            cell.source = re.sub(
-                r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
-                r'<a href="\2">\1</a>',
-                cell.source,
-            )
+            if self.rewrite_links:
+                # We'll need to adjust the logic for this to keep markdown format
+                # but link to markdown files rather than ipynb files.
+                cell.source = re.sub(
+                    r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
+                    r'<a href="\2">\1</a>',
+                    cell.source,
+                )
+            else:
+                # Keep format but replace the .ipynb extension with .md
+                cell.source = re.sub(
+                    r"(?<!!)\[([^\]]*)\]\((?![^\)]*//)([^)]*)(?:\.ipynb)?\)",
+                    r"[\1](\2.md)",
+                    cell.source,
+                )
+
             # Fix image paths in <img> tags
             cell.source = re.sub(
                 r'<img\s+src="\.?/img/([^"]+)"', r'<img src="../img/\1"', cell.source
             )
 
         elif cell.cell_type == "code":
+            # Determine if the cell has bash or cell magic
+            if cell.source.startswith("%") or cell.source.startswith("!"):
+                # update metadata to denote that it's not a python cell
+                cell.metadata["language_info"] = {"name": "unknown"}
+
             # Remove noqa comments
-            cell.source = re.sub(r'#\s*noqa.*$', '', cell.source, flags=re.MULTILINE)
+            cell.source = re.sub(r"#\s*noqa.*$", "", cell.source, flags=re.MULTILINE)
             # escape ``` in code
+            # This is needed because the markdown exporter will wrap code blocks in
+            # triple backticks, which will break the markdown output if the code block
+            # contains triple backticks.
             cell.source = cell.source.replace("```", r"\`\`\`")
             # escape ``` in output
             if "outputs" in cell:
@@ -58,7 +82,7 @@ def preprocess_cell(self, cell, resources, cell_index):
 
 class ExtractAttachmentsPreprocessor(Preprocessor):
     """
-    Extracts all of the outputs from the notebook file.  The extracted
+    Extracts all the outputs from the notebook file.  The extracted
     outputs are returned in the 'resources' dictionary.
     """
 
@@ -82,7 +106,7 @@ def preprocess_cell(self, cell, resources, cell_index):
         if not isinstance(resources["outputs"], dict):
             resources["outputs"] = {}
 
-        # Loop through all of the attachments in the cell
+        # Loop through all the attachments in the cell
         for name, attach in cell.get("attachments", {}).items():
             for mime, data in attach.items():
                 if mime not in {
@@ -114,12 +138,84 @@ def preprocess_cell(self, cell, resources, cell_index):
     ],
 )
 
+md_executable = MarkdownExporter(
+    preprocessors=[
+        ExtractAttachmentsPreprocessor,
+        EscapePreprocessor(rewrite_links=False),
+    ],
+    template_name="md_executable",
+    extra_template_basedirs=[
+        os.path.join(os.path.dirname(__file__), "notebook_convert_templates")
+    ],
+)
+
 
 def convert_notebook(
     notebook_path: Path,
-) -> Path:
+    mode: Literal["markdown", "exec"] = "markdown",
+) -> str:
     with open(notebook_path) as f:
         nb = nbformat.read(f, as_version=4)
 
-    body, _ = exporter.from_notebook_node(nb)
+    nb.metadata.mode = mode
+    if mode == "markdown":
+        body, _ = exporter.from_notebook_node(nb)
+    else:
+        body, _ = md_executable.from_notebook_node(nb)
     return body
+
+
+HERE = Path(__file__).parent
+DOCS = HERE.parent / "docs"
+
+
+# Convert notebooks to markdown
+def _convert_notebooks(
+    *, output_dir: Optional[Path] = None, replace: bool = False
+) -> None:
+    """Converting notebooks."""
+    if not output_dir and not replace:
+        raise ValueError("Either --output_dir or --replace must be specified")
+
+    output_dir_path = DOCS if replace else Path(output_dir)
+    for notebook in DOCS.rglob("*.ipynb"):
+        markdown = convert_notebook(notebook, mode="exec")
+        markdown_path = output_dir_path / notebook.relative_to(DOCS).with_suffix(".md")
+        markdown_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(markdown_path, "w") as f:
+            f.write(markdown)
+        if replace:
+            notebook.unlink(missing_ok=False)
+
+    if replace:
+        # Update links in markdown files from ipynb to md files
+        for path in output_dir_path.rglob("*.md"):
+            with open(path, "r") as f:
+                content = f.read()
+            # Keep format but replace the .ipynb extension with .md
+            pattern = r"(?<!!)\[([^\]]*)\]\((?![^)]*//)([^)]*)\.ipynb\)"
+            replacement = r"[\1](\2.md)"
+
+            source = re.sub(
+                pattern,
+                replacement,
+                content,
+            )
+            with open(path, "w") as f:
+                f.write(source)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert notebooks to markdown")
+    parser.add_argument(
+        "--output_dir",
+        default=None,
+        help="Directory to output markdown files",
+    )
+    parser.add_argument(
+        "--replace",
+        action="store_true",
+        help="Replace original notebooks with markdown files",
+    )
+    args = parser.parse_args()
+    _convert_notebooks(replace=args.replace, output_dir=args.output_dir)
diff --git a/docs/_scripts/notebook_convert_templates/md_executable/conf.json b/docs/_scripts/notebook_convert_templates/md_executable/conf.json
@@ -0,0 +1,5 @@
+{
+  "mimetypes": {
+    "text/markdown": true
+  }
+}
diff --git a/docs/_scripts/notebook_convert_templates/md_executable/index.md.j2 b/docs/_scripts/notebook_convert_templates/md_executable/index.md.j2
@@ -0,0 +1,38 @@
+{#https://github.com/rdbisme/nbconvert/blob/master/share/jupyter/nbconvert/templates/markdown/index.md.j2#}
+{% extends 'markdown/index.md.j2' %}
+
+{% block input %}
+```
+{%- if 'magics_language' in cell.metadata  -%}
+    {{ cell.metadata.magics_language}}
+{%- elif 'name' in cell.metadata.get('language_info', {}) -%}
+    {%- if cell.metadata['language_info']['name'] == "python" -%}
+        {{ cell.metadata.language_info.name }}
+    {%- endif -%}
+{%- elif 'name' in nb.metadata.get('language_info', {}) -%}{{ nb.metadata.language_info.name }}{%- endif %}
+{{ cell.source}}
+```
+{% endblock input %}
+
+{%- block traceback_line -%}
+{%- endblock traceback_line -%}
+
+{%- block stream -%}
+{%- endblock stream -%}
+
+{%- block data_text scoped -%}
+{%- endblock data_text -%}
+
+{%- block data_html scoped -%}
+```html
+{{ output.data['text/html'] | safe }} 
+```
+{%- endblock data_html -%}
+
+{%- block data_jpg scoped -%}
+![](data:image/jpg;base64,{{ output.data['image/jpeg'] }})
+{%- endblock data_jpg -%}
+
+{%- block data_png scoped -%}
+![](data:image/png;base64,{{ output.data['image/png'] }})
+{%- endblock data_png -%}
diff --git a/docs/docs/cloud/how-tos/human_in_the_loop_breakpoint.md b/docs/docs/cloud/how-tos/human_in_the_loop_breakpoint.md
@@ -12,7 +12,7 @@ You can then resume from that spot to continue.
 
 ### Code for your graph
 
-In this how-to we use a simple ReAct style hosted graph (you can see the full code for defining it [here](../../how-tos/human_in_the_loop/breakpoints.ipynb)). The important thing is that there are two nodes (one named `agent` that calls the LLM, and one named `action` that calls the tool), and a routing function from `agent` that determines whether to call `action` next or just end the graph run (the `action` node always calls the `agent` node after execution).
+In this how-to we use a simple ReAct style hosted graph (you can see the full code for defining it [here](../../how-tos/human_in_the_loop/breakpoints.md)). The important thing is that there are two nodes (one named `agent` that calls the LLM, and one named `action` that calls the tool), and a routing function from `agent` that determines whether to call `action` next or just end the graph run (the `action` node always calls the `agent` node after execution).
 
 ### SDK Initialization