Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/actions/run-notebook/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: "Run Notebook"
description: "Run a notebook"

inputs:
notebook:
description: "The notebook to run"
required: true
PINECONE_API_KEY:
description: "The Pinecone API key"
required: true
OPENAI_API_KEY:
description: "The OpenAI API key"
required: true

runs:
using: 'composite'
steps:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
shell: bash
run: |
pip install --upgrade pip
pip install nbformat

- id: convert
shell: bash
name: Convert notebook into tmpdir script
run: |
python .github/actions/run-notebook/convert-notebook.py ${{ inputs.notebook }}

- name: View the run script
shell: bash
run: |
cat ${{ steps.convert.outputs.script_path }}

- name: View converted notebook content
shell: bash
run: |
cat ${{ steps.convert.outputs.notebook_path }}

- name: Run the converted notebook
shell: bash
run: |
bash ${{ steps.convert.outputs.script_path }}
env:
PINECONE_API_KEY: ${{ inputs.PINECONE_API_KEY }}
OPENAI_API_KEY: ${{ inputs.OPENAI_API_KEY }}
89 changes: 89 additions & 0 deletions .github/actions/run-notebook/convert-notebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#! /usr/bin/env python

# Convert a notebook to a Python script

import os
import sys
import nbformat
import shutil
from tempfile import mkdtemp
from tempfile import TemporaryDirectory

# Get the notebook filename from the command line
filename = "../../../" + sys.argv[1]
print(f"Processing notebook: {filename}")
nb_source_path = os.path.join(os.path.dirname(__file__), filename)

temp_dir = mkdtemp()
venv_path = os.path.join(temp_dir, 'venv')
os.makedirs(venv_path, exist_ok=True)

# Copy file into temp directory
temp_nb_path = os.path.join(temp_dir, 'notebook.ipynb')
print(f"Copying notebook to {temp_nb_path}")
shutil.copy(nb_source_path, temp_nb_path)

with open(temp_nb_path, "r", encoding="utf-8") as f:
nb = nbformat.read(f, as_version=4)

# Extract pip install commands (assumes they are written as "!pip install ..." or "%pip install ...")
# This grabs any line containing "pip install" in the script.
activate_venv = """
#!/bin/bash

set -ex

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Create new virtual environment
python -m venv "${SCRIPT_DIR}/venv"

# Activate the virtual environment
source "${SCRIPT_DIR}/venv/bin/activate"
pip install --upgrade pip
"""
run_commands = [activate_venv]
for cell in nb.cells:
if cell.cell_type == "code":
if "!pip" in cell.source or "%pip" in cell.source:
# Replace all instances of "!pip" and "%pip" with "pip"
command = cell.source.replace("!pip", "pip").replace("%pip", "pip")
run_commands.append(command)

run_commands.append("""
# Run the notebook executable code
python "${SCRIPT_DIR}/notebook.py"
""")

run_commands.append("""
# Deactivate the virtual environment
deactivate
""")

# Save pip install commands to a run.sh script
run_script_path = os.path.join(temp_dir, 'run.sh')
with open(run_script_path, 'w', encoding="utf-8") as f:
f.write("\n".join(run_commands))

print(f"Setup script saved to {run_script_path}")

# Collect cells that are not pip install commands
executable_cells = []
for cell in nb.cells:
if cell.cell_type == "code":
if "pip" not in cell.source:
executable_cells.append(cell)

# Save executable cells to a notebook.py file
script_path = os.path.join(temp_dir, 'notebook.py')
with open(script_path, 'w', encoding="utf-8") as f:
for cell in executable_cells:
f.write(cell.source + '\n')

print(f"Script saved to {script_path}")

# Output script and notebook path to github actions output
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write(f"script_path={run_script_path}\n")
f.write(f"notebook_path={script_path}\n")

21 changes: 21 additions & 0 deletions .github/actions/validate-json/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: "Validate JSON"
description: "Validate JSON"

runs:
using: 'composite'
steps:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
shell: bash
run: |
pip install --upgrade pip
pip install nbformat

- name: Validate all notebooks
shell: bash
run: |
python .github/actions/validate-json/validate-notebook-formats.py
3 changes: 2 additions & 1 deletion .github/scripts/version-census.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ def main():
print()
print(f"Notebooks using {client_type}:")
for version, notebooks in sorted(pinecone_versions.items()):
if client_type in version:
client = version.split("==")[0]
if client_type == client:
print(f" {version}: {len(notebooks)} notebooks")
for notebook in notebooks:
print(" - ", notebook)
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/client-versions.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
name: "Report: Client Version Usage"

on:
push:
branches:
- main
pull_request:
workflow_dispatch:
workflow_call:

jobs:
analyze-client-versions:
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: "Pull Request"

on:
pull_request:
push:
branches:
- main

jobs:
report-client-versions:
uses: './.github/workflows/client-versions.yaml'
secrets: inherit

test-notebooks:
uses: './.github/workflows/test-notebooks-changed.yaml'
secrets: inherit
46 changes: 46 additions & 0 deletions .github/workflows/test-notebooks-all.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: "Test: All Notebooks"

on:
workflow_dispatch:
inputs:
directory:
description: 'Directory to search for notebooks'
required: true
default: 'docs'
type: string

jobs:
validate-notebooks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/validate-json

list-notebooks:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
- name: Find all *.ipynb files
id: set-matrix
run: |
# Get list of all .ipynb files in target directory
NOTEBOOKS=$(find ${{ inputs.directory }} -name "*.ipynb" | jq -R -s -c 'split("\n")[:-1]')
echo "matrix={\"notebook\":$NOTEBOOKS}" >> $GITHUB_OUTPUT

test-notebooks:
needs: list-notebooks
runs-on: ubuntu-latest
strategy:
fail-fast: false
max-parallel: 10
matrix: ${{ fromJSON(needs.list-notebooks.outputs.matrix) }}
steps:
- uses: actions/checkout@v4

- uses: ./.github/actions/run-notebook
with:
notebook: ${{ matrix.notebook }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
62 changes: 62 additions & 0 deletions .github/workflows/test-notebooks-changed.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: "Test: Notebook Execution"

on:
workflow_call:
inputs:
base_ref:
required: false
type: string
default: 'master'

jobs:
validate-notebooks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/validate-json

detect-changes:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
has_changes: ${{ steps.set-matrix.outputs.has_changes }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git diff

- name: Fetch base branch
run: git fetch origin ${{ inputs.base_ref }}

- name: Detect changed notebooks
id: set-matrix
run: |
# Get list of changed .ipynb files
CHANGED_NOTEBOOKS=$(git diff --name-only origin/${{ inputs.base_ref }}...HEAD | grep '\.ipynb$' || true)
if [ -z "$CHANGED_NOTEBOOKS" ]; then
echo "No notebook changes detected"
echo "has_changes=false" >> $GITHUB_OUTPUT
echo "matrix={\"notebook\":[]}" >> $GITHUB_OUTPUT
else
# Convert newlines to JSON array format
NOTEBOOK_LIST=$(echo "$CHANGED_NOTEBOOKS" | jq -R -s -c 'split("\n")[:-1]')
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "matrix={\"notebook\":$NOTEBOOK_LIST}" >> $GITHUB_OUTPUT
fi

test-notebooks:
needs:
- detect-changes
- validate-notebooks
if: needs.detect-changes.outputs.has_changes == 'true'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.detect-changes.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/run-notebook
with:
notebook: ${{ matrix.notebook }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
25 changes: 0 additions & 25 deletions .github/workflows/validate.yaml

This file was deleted.

1,450 changes: 905 additions & 545 deletions docs/pinecone-reranker.ipynb

Large diffs are not rendered by default.