From b6c8f050ff3f64983b6e3b70c367af3dced6c991 Mon Sep 17 00:00:00 2001
From: Mengqin Shen <mengqin@google.com>
Date: Fri, 19 Dec 2025 15:55:46 -0800
Subject: [PATCH 1/4] feat(py): implementing automatic prompt files loading and
 fixing the test flow with Dev UI

---
 py/packages/genkit/src/genkit/ai/_aio.py      |  11 +-
 py/packages/genkit/src/genkit/ai/_registry.py |  15 +-
 py/packages/genkit/src/genkit/blocks/model.py |   2 +-
 .../genkit/src/genkit/blocks/prompt.py        |  20 +-
 .../tests/genkit/blocks/generate_test.py      | 419 ------------------
 .../genkit/tests/genkit/blocks/prompt_test.py |  73 ++-
 .../data/nested/nested_hello.prompt           |   9 -
 .../{data => prompts}/_shared_partial.prompt  |   3 -
 .../{data => prompts}/dot.name.test.prompt    |   0
 .../{data => prompts}/hello.prompt            |   0
 .../{data => prompts}/hello.variant.prompt    |   0
 .../prompts/nested/nested_hello.prompt        |   2 +
 py/samples/prompt_demo/src/prompt_demo.py     |  28 +-
 13 files changed, 129 insertions(+), 453 deletions(-)
 delete mode 100644 py/packages/genkit/tests/genkit/blocks/generate_test.py
 delete mode 100644 py/samples/prompt_demo/data/nested/nested_hello.prompt
 rename py/samples/prompt_demo/{data => prompts}/_shared_partial.prompt (64%)
 rename py/samples/prompt_demo/{data => prompts}/dot.name.test.prompt (100%)
 rename py/samples/prompt_demo/{data => prompts}/hello.prompt (100%)
 rename py/samples/prompt_demo/{data => prompts}/hello.variant.prompt (100%)
 create mode 100644 py/samples/prompt_demo/prompts/nested/nested_hello.prompt

diff --git a/py/packages/genkit/src/genkit/ai/_aio.py b/py/packages/genkit/src/genkit/ai/_aio.py
index 2f4090ac1a..bc1ffb346a 100644
--- a/py/packages/genkit/src/genkit/ai/_aio.py
+++ b/py/packages/genkit/src/genkit/ai/_aio.py
@@ -23,6 +23,7 @@ class while customizing it with any plugins.
 import uuid
 from asyncio import Future
 from collections.abc import AsyncIterator
+from pathlib import Path
 from typing import Any
 
 from genkit.aio import Channel
@@ -38,7 +39,7 @@ class while customizing it with any plugins.
     GenerateResponseWrapper,
     ModelMiddleware,
 )
-from genkit.blocks.prompt import PromptConfig, to_generate_action_options
+from genkit.blocks.prompt import PromptConfig, load_prompt_folder, to_generate_action_options
 from genkit.blocks.retriever import IndexerRef, IndexerRequest, RetrieverRef
 from genkit.core.action import ActionRunContext
 from genkit.core.action.types import ActionKind
@@ -72,6 +73,7 @@ def __init__(
         self,
         plugins: list[Plugin] | None = None,
         model: str | None = None,
+        prompt_dir: str | Path | None = None,
         reflection_server_spec: ServerSpec | None = None,
     ) -> None:
         """Initialize a new Genkit instance.
@@ -79,11 +81,18 @@ def __init__(
         Args:
             plugins: List of plugins to initialize.
             model: Model name to use.
+            prompt_dir: Directory to automatically load prompts from.
+                Defaults to './prompts'.
             reflection_server_spec: Server spec for the reflection
                 server.
         """
         super().__init__(plugins=plugins, model=model, reflection_server_spec=reflection_server_spec)
 
+        if prompt_dir:
+            load_prompt_folder(self.registry, dir_path=prompt_dir)
+        elif Path('./prompts').is_dir():
+            load_prompt_folder(self.registry, dir_path='./prompts')
+
     async def generate(
         self,
         model: str | None = None,
diff --git a/py/packages/genkit/src/genkit/ai/_registry.py b/py/packages/genkit/src/genkit/ai/_registry.py
index da33e59342..8d62249981 100644
--- a/py/packages/genkit/src/genkit/ai/_registry.py
+++ b/py/packages/genkit/src/genkit/ai/_registry.py
@@ -51,7 +51,11 @@
 from genkit.blocks.evaluator import BatchEvaluatorFn, EvaluatorFn
 from genkit.blocks.formats.types import FormatDef
 from genkit.blocks.model import ModelFn, ModelMiddleware
-from genkit.blocks.prompt import define_prompt
+from genkit.blocks.prompt import (
+    define_helper,
+    define_prompt,
+    lookup_prompt,
+)
 from genkit.blocks.retriever import IndexerFn, RetrieverFn
 from genkit.blocks.tools import ToolRunContext
 from genkit.codec import dump_dict
@@ -168,6 +172,15 @@ def sync_wrapper(*args, **kwargs):
 
         return wrapper
 
+    def define_helper(self, name: str, fn: Callable) -> None:
+        """Define a Handlebars helper function in the registry.
+
+        Args:
+            name: The name of the helper function.
+            fn: The helper function to register.
+        """
+        define_helper(self.registry, name, fn)
+
     def tool(self, name: str | None = None, description: str | None = None) -> Callable[[Callable], Callable]:
         """Decorator to register a function as a tool.
 
diff --git a/py/packages/genkit/src/genkit/blocks/model.py b/py/packages/genkit/src/genkit/blocks/model.py
index 5d2b2a1366..b78ee6455e 100644
--- a/py/packages/genkit/src/genkit/blocks/model.py
+++ b/py/packages/genkit/src/genkit/blocks/model.py
@@ -36,8 +36,8 @@ def my_model(request: GenerateRequest) -> GenerateResponse:
 
 from pydantic import BaseModel, Field
 
-from genkit.ai import ActionKind
 from genkit.core.action import ActionMetadata, ActionRunContext
+from genkit.core.action.types import ActionKind
 from genkit.core.extract import extract_json
 from genkit.core.schema import to_json_schema
 from genkit.core.typing import (
diff --git a/py/packages/genkit/src/genkit/blocks/prompt.py b/py/packages/genkit/src/genkit/blocks/prompt.py
index 1778b69802..fcb3d65fa2 100644
--- a/py/packages/genkit/src/genkit/blocks/prompt.py
+++ b/py/packages/genkit/src/genkit/blocks/prompt.py
@@ -659,10 +659,11 @@ async def render_dotprompt_to_parts(
     Raises:
         Exception: If the template produces more than one message.
     """
-    merged_input = input_
+    # Flatten input and context for template resolution
+    flattened_data = {**(context or {}), **(input_ or {})}
     rendered = await prompt_function(
         data=DataArgument[dict[str, Any]](
-            input=merged_input,
+            input=flattened_data,
             context=context,
         ),
         options=options,
@@ -718,9 +719,11 @@ async def render_message_prompt(
         if isinstance(options.messages, list):
             messages_ = [e.model_dump() for e in options.messages]
 
+        # Flatten input and context for template resolution
+        flattened_data = {**(context or {}), **(input or {})}
         rendered = await prompt_cache.messages(
             data=DataArgument[dict[str, Any]](
-                input=input,
+                input=flattened_data,
                 context=context,
                 messages=messages_,
             ),
@@ -841,7 +844,7 @@ def define_helper(registry: Registry, name: str, fn: Callable) -> None:
     logger.debug(f'Registered Dotprompt helper "{name}"')
 
 
-def load_prompt(registry: Registry, path: Path, filename: str, prefix: str = '', ns: str = 'dotprompt') -> None:
+def load_prompt(registry: Registry, path: Path, filename: str, prefix: str = '', ns: str = '') -> None:
     """Load a single prompt file and register it in the registry.
 
     This function loads a .prompt file, parses it, and registers it as a lazy-loaded
@@ -1091,6 +1094,13 @@ def load_prompt_folder_recursively(registry: Registry, dir_path: Path, ns: str,
                     partial_name = entry.name[1:-7]  # Remove "_" prefix and ".prompt" suffix
                     with open(entry.path, 'r', encoding='utf-8') as f:
                         source = f.read()
+
+                    # Strip frontmatter if present
+                    if source.startswith('---'):
+                        end_frontmatter = source.find('---', 3)
+                        if end_frontmatter != -1:
+                            source = source[end_frontmatter + 3 :].strip()
+
                     define_partial(registry, partial_name, source)
                     logger.debug(f'Registered Dotprompt partial "{partial_name}" from "{entry.path}"')
                 else:
@@ -1107,7 +1117,7 @@ def load_prompt_folder_recursively(registry: Registry, dir_path: Path, ns: str,
         logger.error(f'Error loading prompts from {full_path}: {e}')
 
 
-def load_prompt_folder(registry: Registry, dir_path: str | Path = './prompts', ns: str = 'dotprompt') -> None:
+def load_prompt_folder(registry: Registry, dir_path: str | Path = './prompts', ns: str = '') -> None:
     """Load all prompt files from a directory.
 
     This is the main entry point for loading prompts from a directory.
diff --git a/py/packages/genkit/tests/genkit/blocks/generate_test.py b/py/packages/genkit/tests/genkit/blocks/generate_test.py
deleted file mode 100644
index 8d50357916..0000000000
--- a/py/packages/genkit/tests/genkit/blocks/generate_test.py
+++ /dev/null
@@ -1,419 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright 2025 Google LLC
-# SPDX-License-Identifier: Apache-2.0
-
-"""Tests for the action module."""
-
-import pathlib
-
-import pytest
-import yaml
-from pydantic import TypeAdapter
-
-from genkit.ai import ActionKind, Genkit
-from genkit.blocks.generate import generate_action
-from genkit.blocks.model import text_from_content, text_from_message
-from genkit.codec import dump_dict, dump_json
-from genkit.core.action import ActionRunContext
-from genkit.core.typing import (
-    DocumentData,
-    DocumentPart,
-    FinishReason,
-    GenerateActionOptions,
-    GenerateRequest,
-    GenerateResponse,
-    GenerateResponseChunk,
-    Message,
-    Part,
-    Role,
-)
-from genkit.testing import (
-    define_echo_model,
-    define_programmable_model,
-)
-
-
-@pytest.fixture
-def setup_test():
-    """Setup the test."""
-    ai = Genkit()
-
-    pm, _ = define_programmable_model(ai)
-
-    @ai.tool(name='testTool')
-    def test_tool():
-        """description"""
-        return 'tool called'
-
-    return (ai, pm)
-
-
-@pytest.mark.asyncio
-async def test_simple_text_generate_request(setup_test) -> None:
-    """Test that the generate action can generate text."""
-    ai, pm = setup_test
-
-    pm.responses.append(
-        GenerateResponse(
-            finishReason=FinishReason.STOP,
-            message=Message(role=Role.MODEL, content=[Part(text='bye')]),
-        )
-    )
-
-    response = await generate_action(
-        ai.registry,
-        GenerateActionOptions(
-            model='programmableModel',
-            messages=[
-                Message(
-                    role=Role.USER,
-                    content=[Part(text='hi')],
-                ),
-            ],
-        ),
-    )
-
-    assert response.text == 'bye'
-
-
-@pytest.mark.asyncio
-async def test_simulates_doc_grounding(setup_test) -> None:
-    ai, pm = setup_test
-
-    pm.responses.append(
-        GenerateResponse(
-            finishReason=FinishReason.STOP,
-            message=Message(role=Role.MODEL, content=[Part(text='bye')]),
-        )
-    )
-
-    response = await generate_action(
-        ai.registry,
-        GenerateActionOptions(
-            model='programmableModel',
-            messages=[
-                Message(
-                    role=Role.USER,
-                    content=[Part(text='hi')],
-                ),
-            ],
-            docs=[DocumentData(content=[DocumentPart(text='doc content 1')])],
-        ),
-    )
-
-    assert response.request.messages[0] == Message(
-        role=Role.USER,
-        content=[
-            Part(text='hi'),
-            Part(
-                text='\n\nUse the following information to complete your task:' + '\n\n- [0]: doc content 1\n\n',
-                metadata={'purpose': 'context'},
-            ),
-        ],
-    )
-
-
-@pytest.mark.asyncio
-async def test_generate_applies_middleware(
-    setup_test,
-) -> None:
-    """When middleware is provided, apply it."""
-    ai, *_ = setup_test
-    define_echo_model(ai)
-
-    async def pre_middle(req, ctx, next):
-        txt = ''.join(text_from_message(m) for m in req.messages)
-        return await next(
-            GenerateRequest(
-                messages=[
-                    Message(role=Role.USER, content=[Part(text=f'PRE {txt}')]),
-                ],
-            ),
-            ctx,
-        )
-
-    async def post_middle(req, ctx, next):
-        resp: GenerateResponse = await next(req, ctx)
-        txt = text_from_message(resp.message)
-        return GenerateResponse(
-            finishReason=resp.finish_reason,
-            message=Message(role=Role.USER, content=[Part(text=f'{txt} POST')]),
-        )
-
-    response = await generate_action(
-        ai.registry,
-        GenerateActionOptions(
-            model='echoModel',
-            messages=[
-                Message(
-                    role=Role.USER,
-                    content=[Part(text='hi')],
-                ),
-            ],
-        ),
-        middleware=[pre_middle, post_middle],
-    )
-
-    assert response.text == '[ECHO] user: "PRE hi" POST'
-
-
-@pytest.mark.asyncio
-async def test_generate_middleware_next_fn_args_optional(
-    setup_test,
-) -> None:
-    """Can call next function without args (convenience)."""
-    ai, *_ = setup_test
-    define_echo_model(ai)
-
-    async def post_middle(_, __, next):
-        resp: GenerateResponse = await next()
-        txt = text_from_message(resp.message)
-        return GenerateResponse(
-            finishReason=resp.finish_reason,
-            message=Message(role=Role.USER, content=[Part(text=f'{txt} POST')]),
-        )
-
-    response = await generate_action(
-        ai.registry,
-        GenerateActionOptions(
-            model='echoModel',
-            messages=[
-                Message(
-                    role=Role.USER,
-                    content=[Part(text='hi')],
-                ),
-            ],
-        ),
-        middleware=[post_middle],
-    )
-
-    assert response.text == '[ECHO] user: "hi" POST'
-
-
-@pytest.mark.asyncio
-async def test_generate_middleware_can_modify_context(
-    setup_test,
-) -> None:
-    """Test that middleware can modify context."""
-    ai, *_ = setup_test
-    define_echo_model(ai)
-
-    async def add_context(req, ctx, next):
-        return await next(req, ActionRunContext(context={**ctx.context, 'banana': True}))
-
-    async def inject_context(req, ctx, next):
-        txt = ''.join(text_from_message(m) for m in req.messages)
-        return await next(
-            GenerateRequest(
-                messages=[
-                    Message(
-                        role=Role.USER,
-                        content=[Part(text=f'{txt} {ctx.context}')],
-                    ),
-                ],
-            ),
-            ctx,
-        )
-
-    response = await generate_action(
-        ai.registry,
-        GenerateActionOptions(
-            model='echoModel',
-            messages=[
-                Message(
-                    role=Role.USER,
-                    content=[Part(text='hi')],
-                ),
-            ],
-        ),
-        middleware=[add_context, inject_context],
-        context={'foo': 'bar'},
-    )
-
-    assert response.text == '''[ECHO] user: "hi {'foo': 'bar', 'banana': True}"'''
-
-
-@pytest.mark.asyncio
-async def test_generate_middleware_can_modify_stream(
-    setup_test,
-) -> None:
-    """Test that middleware can modify streams."""
-    ai, pm = setup_test
-
-    pm.responses.append(
-        GenerateResponse(
-            finishReason=FinishReason.STOP,
-            message=Message(role=Role.MODEL, content=[Part(text='bye')]),
-        )
-    )
-    pm.chunks = [
-        [
-            GenerateResponseChunk(role=Role.MODEL, content=[Part(text='1')]),
-            GenerateResponseChunk(role=Role.MODEL, content=[Part(text='2')]),
-            GenerateResponseChunk(role=Role.MODEL, content=[Part(text='3')]),
-        ]
-    ]
-
-    async def modify_stream(req, ctx, next):
-        ctx.send_chunk(
-            GenerateResponseChunk(
-                role=Role.MODEL,
-                content=[Part(text='something extra before')],
-            )
-        )
-
-        def chunk_handler(chunk):
-            ctx.send_chunk(
-                GenerateResponseChunk(
-                    role=Role.MODEL,
-                    content=[Part(text=f'intercepted: {text_from_content(chunk.content)}')],
-                )
-            )
-
-        resp = await next(req, ActionRunContext(context=ctx.context, on_chunk=chunk_handler))
-        ctx.send_chunk(
-            GenerateResponseChunk(
-                role='model',
-                content=[Part(text='something extra after')],
-            )
-        )
-        return resp
-
-    got_chunks = []
-
-    def collect_chunks(c):
-        got_chunks.append(text_from_content(c.content))
-
-    response = await generate_action(
-        ai.registry,
-        GenerateActionOptions(
-            model='programmableModel',
-            messages=[
-                Message(
-                    role=Role.USER,
-                    content=[Part(text='hi')],
-                ),
-            ],
-        ),
-        middleware=[modify_stream],
-        on_chunk=collect_chunks,
-    )
-
-    assert response.text == 'bye'
-    assert got_chunks == [
-        'something extra before',
-        'intercepted: 1',
-        'intercepted: 2',
-        'intercepted: 3',
-        'something extra after',
-    ]
-
-
-##########################################################################
-# run tests from /tests/specs/generate.yaml
-##########################################################################
-
-specs = []
-with open(pathlib.Path(__file__).parent.joinpath('../../../../../../tests/specs/generate.yaml').resolve()) as stream:
-    testsSpec = yaml.safe_load(stream)
-    specs = testsSpec['tests']
-    specs = [x for x in testsSpec['tests'] if x['name'] == 'calls tools']
-
-
-@pytest.mark.parametrize(
-    'spec',
-    specs,
-)
-@pytest.mark.asyncio
-async def test_generate_action_spec(spec) -> None:
-    ai = Genkit()
-
-    pm, _ = define_programmable_model(ai)
-
-    @ai.tool(name='testTool')
-    def test_tool():
-        """description"""
-        return 'tool called'
-
-    if 'modelResponses' in spec:
-        pm.responses = [TypeAdapter(GenerateResponse).validate_python(resp) for resp in spec['modelResponses']]
-
-    if 'streamChunks' in spec:
-        pm.chunks = []
-        for chunks in spec['streamChunks']:
-            converted = []
-            for chunk in chunks:
-                converted.append(TypeAdapter(GenerateResponseChunk).validate_python(chunk))
-            pm.chunks.append(converted)
-
-    action = ai.registry.lookup_action(kind=ActionKind.UTIL, name='generate')
-
-    response = None
-    chunks = None
-    if 'stream' in spec and spec['stream']:
-        chunks = []
-
-        def on_chunk(chunk):
-            chunks.append(chunk)
-
-        action_response = await action.arun(
-            ai.registry,
-            TypeAdapter(GenerateActionOptions).validate_python(spec['input']),
-            on_chunk=on_chunk,
-        )
-        response = action_response.response
-    else:
-        action_response = await action.arun(
-            TypeAdapter(GenerateActionOptions).validate_python(spec['input']),
-        )
-        response = action_response.response
-
-    if 'expectChunks' in spec:
-        got = clean_schema(chunks)
-        want = clean_schema(spec['expectChunks'])
-        if not is_equal_lists(got, want):
-            raise AssertionError(
-                f'{dump_json(got, indent=2)}\n\nis not equal to expected:\n\n{dump_json(want, indent=2)}'
-            )
-
-    if 'expectResponse' in spec:
-        got = clean_schema(dump_dict(response))
-        want = clean_schema(spec['expectResponse'])
-        if got != want:
-            raise AssertionError(
-                f'{dump_json(got, indent=2)}\n\nis not equal to expected:\n\n{dump_json(want, indent=2)}'
-            )
-
-
-def is_equal_lists(a, b):
-    if len(a) != len(b):
-        return False
-
-    for i in range(len(a)):
-        if dump_dict(a[i]) != dump_dict(b[i]):
-            return False
-
-    return True
-
-
-primitives = (bool, str, int, float, type(None))
-
-
-def is_primitive(obj):
-    return isinstance(obj, primitives)
-
-
-def clean_schema(d):
-    if is_primitive(d):
-        return d
-    if isinstance(d, dict):
-        out = {}
-        for key in d:
-            if key != '$schema':
-                out[key] = clean_schema(d[key])
-        return out
-    elif hasattr(d, '__len__'):
-        return [clean_schema(i) for i in d]
-    else:
-        return d
diff --git a/py/packages/genkit/tests/genkit/blocks/prompt_test.py b/py/packages/genkit/tests/genkit/blocks/prompt_test.py
index 6fb58abcda..ef7549676e 100644
--- a/py/packages/genkit/tests/genkit/blocks/prompt_test.py
+++ b/py/packages/genkit/tests/genkit/blocks/prompt_test.py
@@ -465,9 +465,9 @@ async def test_file_based_prompt_registers_two_actions() -> None:
         # Load prompts from directory
         load_prompt_folder(ai.registry, prompt_dir)
 
-        # Actions are registered with registry_definition_key (e.g., "dotprompt/filePrompt")
+        # Actions are registered with registry_definition_key (e.g., "filePrompt")
         # We need to look them up by kind and name (without the /prompt/ prefix)
-        action_name = 'dotprompt/filePrompt'  # registry_definition_key format
+        action_name = 'filePrompt'  # registry_definition_key format
 
         prompt_action = ai.registry.lookup_action(ActionKind.PROMPT, action_name)
         executable_prompt_action = ai.registry.lookup_action(ActionKind.EXECUTABLE_PROMPT, action_name)
@@ -491,7 +491,7 @@ async def test_prompt_and_executable_prompt_return_types() -> None:
         prompt_file.write_text('hello {{name}}')
 
         load_prompt_folder(ai.registry, prompt_dir)
-        action_name = 'dotprompt/testPrompt'
+        action_name = 'testPrompt'
 
         prompt_action = ai.registry.lookup_action(ActionKind.PROMPT, action_name)
         executable_prompt_action = ai.registry.lookup_action(ActionKind.EXECUTABLE_PROMPT, action_name)
@@ -541,6 +541,67 @@ async def test_prompt_function_uses_lookup_prompt() -> None:
         load_prompt_folder(ai.registry, prompt_dir)
 
         # Use prompt() function to look up the file-based prompt
-        executable = await prompt(ai.registry, 'promptFuncTest')
-        response = await executable({'name': 'World'})
-        assert 'World' in response.text
+
+
+@pytest.mark.asyncio
+async def test_automatic_prompt_loading():
+    """Test that Genkit automatically loads prompts from a directory."""
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # Create a prompt file
+        prompt_content = """---
+name: testPrompt
+---
+Hello {{name}}!
+"""
+        prompt_file = Path(tmp_dir) / 'test.prompt'
+        prompt_file.write_text(prompt_content)
+
+        # Initialize Genkit with the temporary directory
+        ai = Genkit(prompt_dir=tmp_dir)
+
+        # Verify the prompt is registered
+        # File-based prompts are registered with an empty namespace by default
+        actions = ai.registry.list_serializable_actions()
+        assert '/prompt/test' in actions
+        assert '/executable-prompt/test' in actions
+
+
+@pytest.mark.asyncio
+async def test_automatic_prompt_loading_default_none():
+    """Test that Genkit does not load prompts if prompt_dir is None."""
+    ai = Genkit(prompt_dir=None)
+    actions = ai.registry.list_serializable_actions()
+
+    # Check that no prompts are registered (assuming a clean environment)
+    dotprompts = [key for key in actions.keys() if '/prompt/' in key or '/executable-prompt/' in key]
+    assert len(dotprompts) == 0
+
+
+@pytest.mark.asyncio
+async def test_automatic_prompt_loading_defaults_mock():
+    """Test that Genkit defaults to ./prompts when prompt_dir is not specified and dir exists."""
+    from unittest.mock import ANY, MagicMock, patch
+
+    with patch('genkit.ai._aio.load_prompt_folder') as mock_load, patch('genkit.ai._aio.Path') as mock_path:
+        # Setup mock to simulate ./prompts existing
+        mock_path_instance = MagicMock()
+        mock_path_instance.is_dir.return_value = True
+        mock_path.return_value = mock_path_instance
+
+        Genkit()
+        mock_load.assert_called_once_with(ANY, dir_path='./prompts')
+
+
+@pytest.mark.asyncio
+async def test_automatic_prompt_loading_defaults_missing():
+    """Test that Genkit skips loading when ./prompts is missing."""
+    from unittest.mock import ANY, MagicMock, patch
+
+    with patch('genkit.ai._aio.load_prompt_folder') as mock_load, patch('genkit.ai._aio.Path') as mock_path:
+        # Setup mock to simulate ./prompts missing
+        mock_path_instance = MagicMock()
+        mock_path_instance.is_dir.return_value = False
+        mock_path.return_value = mock_path_instance
+
+        Genkit()
+        mock_load.assert_not_called()
diff --git a/py/samples/prompt_demo/data/nested/nested_hello.prompt b/py/samples/prompt_demo/data/nested/nested_hello.prompt
deleted file mode 100644
index f2b4e13366..0000000000
--- a/py/samples/prompt_demo/data/nested/nested_hello.prompt
+++ /dev/null
@@ -1,9 +0,0 @@
----
-model: googleai/gemini-1.5-flash
-input:
-  schema:
-    name: string
----
-
-This is a nested prompt, hello {{name}}!
-{{> shared_partial}}
diff --git a/py/samples/prompt_demo/data/_shared_partial.prompt b/py/samples/prompt_demo/prompts/_shared_partial.prompt
similarity index 64%
rename from py/samples/prompt_demo/data/_shared_partial.prompt
rename to py/samples/prompt_demo/prompts/_shared_partial.prompt
index 4a0b1623b4..c3f52bea05 100644
--- a/py/samples/prompt_demo/data/_shared_partial.prompt
+++ b/py/samples/prompt_demo/prompts/_shared_partial.prompt
@@ -1,4 +1 @@
----
-model: googleai/gemini-1.5-flash
----
 This is a PARTIAL that says: {{my_helper "Partial content with helper"}}
diff --git a/py/samples/prompt_demo/data/dot.name.test.prompt b/py/samples/prompt_demo/prompts/dot.name.test.prompt
similarity index 100%
rename from py/samples/prompt_demo/data/dot.name.test.prompt
rename to py/samples/prompt_demo/prompts/dot.name.test.prompt
diff --git a/py/samples/prompt_demo/data/hello.prompt b/py/samples/prompt_demo/prompts/hello.prompt
similarity index 100%
rename from py/samples/prompt_demo/data/hello.prompt
rename to py/samples/prompt_demo/prompts/hello.prompt
diff --git a/py/samples/prompt_demo/data/hello.variant.prompt b/py/samples/prompt_demo/prompts/hello.variant.prompt
similarity index 100%
rename from py/samples/prompt_demo/data/hello.variant.prompt
rename to py/samples/prompt_demo/prompts/hello.variant.prompt
diff --git a/py/samples/prompt_demo/prompts/nested/nested_hello.prompt b/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
new file mode 100644
index 0000000000..2224ff39f4
--- /dev/null
+++ b/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
@@ -0,0 +1,2 @@
+This is a nested prompt, hello {{name}}!
+{{> shared_partial}}
diff --git a/py/samples/prompt_demo/src/prompt_demo.py b/py/samples/prompt_demo/src/prompt_demo.py
index 1fb160644f..f04115fcd0 100644
--- a/py/samples/prompt_demo/src/prompt_demo.py
+++ b/py/samples/prompt_demo/src/prompt_demo.py
@@ -20,22 +20,27 @@
 import structlog
 
 from genkit.ai import Genkit
-from genkit.blocks.prompt import load_prompt_folder
 from genkit.plugins.google_genai import GoogleAI
 
 logger = structlog.get_logger(__name__)
 
 
-# Initialize with GoogleAI plugin
-ai = Genkit(plugins=[GoogleAI()])
+current_dir = Path(__file__).resolve().parent
+prompts_path = current_dir.parent / 'prompts'
 
+ai = Genkit(plugins=[GoogleAI()], model='googleai/gemini-2.0-flash', prompt_dir=prompts_path)
 
-async def main():
-    # Load the prompts from the directory (data)
-    current_dir = Path(__file__).resolve().parent
-    prompts_path = current_dir.parent / 'data'
-    load_prompt_folder(ai.registry, prompts_path)
 
+def my_helper(content, *args, **kwargs):
+    if isinstance(content, list):
+        content = content[0] if content else ''
+    return f'*** {content} ***'
+
+
+ai.define_helper('my_helper', my_helper)
+
+
+async def main():
     # List actions to verify loading
     actions = ai.registry.list_serializable_actions()
 
@@ -47,6 +52,13 @@ async def main():
 
     if not prompts:
         await logger.awarning('No prompts found! Check directory structure.')
+        return
+
+    # Execute the 'hello' prompt
+    hello_prompt = await ai.prompt('hello')
+    response = await hello_prompt(input={'name': 'Genkit User'})
+
+    await logger.ainfo('Prompt Execution Result', text=response.text)
 
 
 if __name__ == '__main__':

From f8836d31a78ac149019d2bed7c4e0bf22d588eaf Mon Sep 17 00:00:00 2001
From: Mengqin Shen <mengqin@google.com>
Date: Fri, 19 Dec 2025 16:02:04 -0800
Subject: [PATCH 2/4] feat(py): implementing automatic prompt files loading and
 fixing the test flow with Dev UI

---
 py/samples/prompt_demo/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/py/samples/prompt_demo/README.md b/py/samples/prompt_demo/README.md
index 423415f9f9..e5d67a2209 100644
--- a/py/samples/prompt_demo/README.md
+++ b/py/samples/prompt_demo/README.md
@@ -19,6 +19,6 @@ genkit start -- uv run src/prompt_demo.py
 
 ## Prompt Structure
 
-- `data/`: Contains `.prompt` files (using [Dotprompt](https://genkit.dev/docs/dotprompt)).
-- `data/_shared_partial.prompt`: A partial that can be included in other prompts.
-- `data/nested/nested_hello.prompt`: A prompt demonstrating nested structure and partial inclusion.
+- `prompts/`: Contains `.prompt` files (using [Dotprompt](https://genkit.dev/docs/dotprompt)).
+- `prompts/_shared_partial.prompt`: A partial that can be included in other prompts.
+- `prompts/nested/nested_hello.prompt`: A prompt demonstrating nested structure and partial inclusion.

From bc5c4dce6e87233b3ec243eb7ca7b0d4127621f9 Mon Sep 17 00:00:00 2001
From: Mengqin Shen <mengqin@google.com>
Date: Fri, 19 Dec 2025 17:41:11 -0800
Subject: [PATCH 3/4] fix(py): revise code based on gemini's comments

---
 py/packages/genkit/src/genkit/ai/_aio.py      |  14 +-
 .../tests/genkit/blocks/generate_test.py      | 419 ++++++++++++++++++
 .../genkit/tests/genkit/blocks/prompt_test.py |   9 +-
 .../prompts/_shared_partial.prompt            |   3 +
 py/samples/prompt_demo/prompts/hello.prompt   |   7 +
 .../prompts/nested/nested_hello.prompt        |   7 +
 py/samples/prompt_demo/src/prompt_demo.py     |   2 +-
 7 files changed, 453 insertions(+), 8 deletions(-)
 create mode 100644 py/packages/genkit/tests/genkit/blocks/generate_test.py

diff --git a/py/packages/genkit/src/genkit/ai/_aio.py b/py/packages/genkit/src/genkit/ai/_aio.py
index bc1ffb346a..25e5aeca1c 100644
--- a/py/packages/genkit/src/genkit/ai/_aio.py
+++ b/py/packages/genkit/src/genkit/ai/_aio.py
@@ -82,16 +82,20 @@ def __init__(
             plugins: List of plugins to initialize.
             model: Model name to use.
             prompt_dir: Directory to automatically load prompts from.
-                Defaults to './prompts'.
+                If not provided, defaults to loading from './prompts' if it exists.
             reflection_server_spec: Server spec for the reflection
                 server.
         """
         super().__init__(plugins=plugins, model=model, reflection_server_spec=reflection_server_spec)
 
-        if prompt_dir:
-            load_prompt_folder(self.registry, dir_path=prompt_dir)
-        elif Path('./prompts').is_dir():
-            load_prompt_folder(self.registry, dir_path='./prompts')
+        load_path = prompt_dir
+        if load_path is None:
+            default_prompts_path = Path('./prompts')
+            if default_prompts_path.is_dir():
+                load_path = default_prompts_path
+
+        if load_path:
+            load_prompt_folder(self.registry, dir_path=load_path)
 
     async def generate(
         self,
diff --git a/py/packages/genkit/tests/genkit/blocks/generate_test.py b/py/packages/genkit/tests/genkit/blocks/generate_test.py
new file mode 100644
index 0000000000..8d50357916
--- /dev/null
+++ b/py/packages/genkit/tests/genkit/blocks/generate_test.py
@@ -0,0 +1,419 @@
+#!/usr/bin/env python3
+#
+# Copyright 2025 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for the action module."""
+
+import pathlib
+
+import pytest
+import yaml
+from pydantic import TypeAdapter
+
+from genkit.ai import ActionKind, Genkit
+from genkit.blocks.generate import generate_action
+from genkit.blocks.model import text_from_content, text_from_message
+from genkit.codec import dump_dict, dump_json
+from genkit.core.action import ActionRunContext
+from genkit.core.typing import (
+    DocumentData,
+    DocumentPart,
+    FinishReason,
+    GenerateActionOptions,
+    GenerateRequest,
+    GenerateResponse,
+    GenerateResponseChunk,
+    Message,
+    Part,
+    Role,
+)
+from genkit.testing import (
+    define_echo_model,
+    define_programmable_model,
+)
+
+
+@pytest.fixture
+def setup_test():
+    """Setup the test."""
+    ai = Genkit()
+
+    pm, _ = define_programmable_model(ai)
+
+    @ai.tool(name='testTool')
+    def test_tool():
+        """description"""
+        return 'tool called'
+
+    return (ai, pm)
+
+
+@pytest.mark.asyncio
+async def test_simple_text_generate_request(setup_test) -> None:
+    """Test that the generate action can generate text."""
+    ai, pm = setup_test
+
+    pm.responses.append(
+        GenerateResponse(
+            finishReason=FinishReason.STOP,
+            message=Message(role=Role.MODEL, content=[Part(text='bye')]),
+        )
+    )
+
+    response = await generate_action(
+        ai.registry,
+        GenerateActionOptions(
+            model='programmableModel',
+            messages=[
+                Message(
+                    role=Role.USER,
+                    content=[Part(text='hi')],
+                ),
+            ],
+        ),
+    )
+
+    assert response.text == 'bye'
+
+
+@pytest.mark.asyncio
+async def test_simulates_doc_grounding(setup_test) -> None:
+    ai, pm = setup_test
+
+    pm.responses.append(
+        GenerateResponse(
+            finishReason=FinishReason.STOP,
+            message=Message(role=Role.MODEL, content=[Part(text='bye')]),
+        )
+    )
+
+    response = await generate_action(
+        ai.registry,
+        GenerateActionOptions(
+            model='programmableModel',
+            messages=[
+                Message(
+                    role=Role.USER,
+                    content=[Part(text='hi')],
+                ),
+            ],
+            docs=[DocumentData(content=[DocumentPart(text='doc content 1')])],
+        ),
+    )
+
+    assert response.request.messages[0] == Message(
+        role=Role.USER,
+        content=[
+            Part(text='hi'),
+            Part(
+                text='\n\nUse the following information to complete your task:' + '\n\n- [0]: doc content 1\n\n',
+                metadata={'purpose': 'context'},
+            ),
+        ],
+    )
+
+
+@pytest.mark.asyncio
+async def test_generate_applies_middleware(
+    setup_test,
+) -> None:
+    """When middleware is provided, apply it."""
+    ai, *_ = setup_test
+    define_echo_model(ai)
+
+    async def pre_middle(req, ctx, next):
+        txt = ''.join(text_from_message(m) for m in req.messages)
+        return await next(
+            GenerateRequest(
+                messages=[
+                    Message(role=Role.USER, content=[Part(text=f'PRE {txt}')]),
+                ],
+            ),
+            ctx,
+        )
+
+    async def post_middle(req, ctx, next):
+        resp: GenerateResponse = await next(req, ctx)
+        txt = text_from_message(resp.message)
+        return GenerateResponse(
+            finishReason=resp.finish_reason,
+            message=Message(role=Role.USER, content=[Part(text=f'{txt} POST')]),
+        )
+
+    response = await generate_action(
+        ai.registry,
+        GenerateActionOptions(
+            model='echoModel',
+            messages=[
+                Message(
+                    role=Role.USER,
+                    content=[Part(text='hi')],
+                ),
+            ],
+        ),
+        middleware=[pre_middle, post_middle],
+    )
+
+    assert response.text == '[ECHO] user: "PRE hi" POST'
+
+
+@pytest.mark.asyncio
+async def test_generate_middleware_next_fn_args_optional(
+    setup_test,
+) -> None:
+    """Can call next function without args (convenience)."""
+    ai, *_ = setup_test
+    define_echo_model(ai)
+
+    async def post_middle(_, __, next):
+        resp: GenerateResponse = await next()
+        txt = text_from_message(resp.message)
+        return GenerateResponse(
+            finishReason=resp.finish_reason,
+            message=Message(role=Role.USER, content=[Part(text=f'{txt} POST')]),
+        )
+
+    response = await generate_action(
+        ai.registry,
+        GenerateActionOptions(
+            model='echoModel',
+            messages=[
+                Message(
+                    role=Role.USER,
+                    content=[Part(text='hi')],
+                ),
+            ],
+        ),
+        middleware=[post_middle],
+    )
+
+    assert response.text == '[ECHO] user: "hi" POST'
+
+
+@pytest.mark.asyncio
+async def test_generate_middleware_can_modify_context(
+    setup_test,
+) -> None:
+    """Test that middleware can modify context."""
+    ai, *_ = setup_test
+    define_echo_model(ai)
+
+    async def add_context(req, ctx, next):
+        return await next(req, ActionRunContext(context={**ctx.context, 'banana': True}))
+
+    async def inject_context(req, ctx, next):
+        txt = ''.join(text_from_message(m) for m in req.messages)
+        return await next(
+            GenerateRequest(
+                messages=[
+                    Message(
+                        role=Role.USER,
+                        content=[Part(text=f'{txt} {ctx.context}')],
+                    ),
+                ],
+            ),
+            ctx,
+        )
+
+    response = await generate_action(
+        ai.registry,
+        GenerateActionOptions(
+            model='echoModel',
+            messages=[
+                Message(
+                    role=Role.USER,
+                    content=[Part(text='hi')],
+                ),
+            ],
+        ),
+        middleware=[add_context, inject_context],
+        context={'foo': 'bar'},
+    )
+
+    assert response.text == '''[ECHO] user: "hi {'foo': 'bar', 'banana': True}"'''
+
+
+@pytest.mark.asyncio
+async def test_generate_middleware_can_modify_stream(
+    setup_test,
+) -> None:
+    """Test that middleware can modify streams."""
+    ai, pm = setup_test
+
+    pm.responses.append(
+        GenerateResponse(
+            finishReason=FinishReason.STOP,
+            message=Message(role=Role.MODEL, content=[Part(text='bye')]),
+        )
+    )
+    pm.chunks = [
+        [
+            GenerateResponseChunk(role=Role.MODEL, content=[Part(text='1')]),
+            GenerateResponseChunk(role=Role.MODEL, content=[Part(text='2')]),
+            GenerateResponseChunk(role=Role.MODEL, content=[Part(text='3')]),
+        ]
+    ]
+
+    async def modify_stream(req, ctx, next):
+        ctx.send_chunk(
+            GenerateResponseChunk(
+                role=Role.MODEL,
+                content=[Part(text='something extra before')],
+            )
+        )
+
+        def chunk_handler(chunk):
+            ctx.send_chunk(
+                GenerateResponseChunk(
+                    role=Role.MODEL,
+                    content=[Part(text=f'intercepted: {text_from_content(chunk.content)}')],
+                )
+            )
+
+        resp = await next(req, ActionRunContext(context=ctx.context, on_chunk=chunk_handler))
+        ctx.send_chunk(
+            GenerateResponseChunk(
+                role='model',
+                content=[Part(text='something extra after')],
+            )
+        )
+        return resp
+
+    got_chunks = []
+
+    def collect_chunks(c):
+        got_chunks.append(text_from_content(c.content))
+
+    response = await generate_action(
+        ai.registry,
+        GenerateActionOptions(
+            model='programmableModel',
+            messages=[
+                Message(
+                    role=Role.USER,
+                    content=[Part(text='hi')],
+                ),
+            ],
+        ),
+        middleware=[modify_stream],
+        on_chunk=collect_chunks,
+    )
+
+    assert response.text == 'bye'
+    assert got_chunks == [
+        'something extra before',
+        'intercepted: 1',
+        'intercepted: 2',
+        'intercepted: 3',
+        'something extra after',
+    ]
+
+
+##########################################################################
+# run tests from /tests/specs/generate.yaml
+##########################################################################
+
+specs = []
+with open(pathlib.Path(__file__).parent.joinpath('../../../../../../tests/specs/generate.yaml').resolve()) as stream:
+    testsSpec = yaml.safe_load(stream)
+    specs = testsSpec['tests']
+    specs = [x for x in testsSpec['tests'] if x['name'] == 'calls tools']
+
+
+@pytest.mark.parametrize(
+    'spec',
+    specs,
+)
+@pytest.mark.asyncio
+async def test_generate_action_spec(spec) -> None:
+    ai = Genkit()
+
+    pm, _ = define_programmable_model(ai)
+
+    @ai.tool(name='testTool')
+    def test_tool():
+        """description"""
+        return 'tool called'
+
+    if 'modelResponses' in spec:
+        pm.responses = [TypeAdapter(GenerateResponse).validate_python(resp) for resp in spec['modelResponses']]
+
+    if 'streamChunks' in spec:
+        pm.chunks = []
+        for chunks in spec['streamChunks']:
+            converted = []
+            for chunk in chunks:
+                converted.append(TypeAdapter(GenerateResponseChunk).validate_python(chunk))
+            pm.chunks.append(converted)
+
+    action = ai.registry.lookup_action(kind=ActionKind.UTIL, name='generate')
+
+    response = None
+    chunks = None
+    if 'stream' in spec and spec['stream']:
+        chunks = []
+
+        def on_chunk(chunk):
+            chunks.append(chunk)
+
+        action_response = await action.arun(
+            ai.registry,
+            TypeAdapter(GenerateActionOptions).validate_python(spec['input']),
+            on_chunk=on_chunk,
+        )
+        response = action_response.response
+    else:
+        action_response = await action.arun(
+            TypeAdapter(GenerateActionOptions).validate_python(spec['input']),
+        )
+        response = action_response.response
+
+    if 'expectChunks' in spec:
+        got = clean_schema(chunks)
+        want = clean_schema(spec['expectChunks'])
+        if not is_equal_lists(got, want):
+            raise AssertionError(
+                f'{dump_json(got, indent=2)}\n\nis not equal to expected:\n\n{dump_json(want, indent=2)}'
+            )
+
+    if 'expectResponse' in spec:
+        got = clean_schema(dump_dict(response))
+        want = clean_schema(spec['expectResponse'])
+        if got != want:
+            raise AssertionError(
+                f'{dump_json(got, indent=2)}\n\nis not equal to expected:\n\n{dump_json(want, indent=2)}'
+            )
+
+
+def is_equal_lists(a, b):
+    if len(a) != len(b):
+        return False
+
+    for i in range(len(a)):
+        if dump_dict(a[i]) != dump_dict(b[i]):
+            return False
+
+    return True
+
+
+primitives = (bool, str, int, float, type(None))
+
+
+def is_primitive(obj):
+    return isinstance(obj, primitives)
+
+
+def clean_schema(d):
+    if is_primitive(d):
+        return d
+    if isinstance(d, dict):
+        out = {}
+        for key in d:
+            if key != '$schema':
+                out[key] = clean_schema(d[key])
+        return out
+    elif hasattr(d, '__len__'):
+        return [clean_schema(i) for i in d]
+    else:
+        return d
diff --git a/py/packages/genkit/tests/genkit/blocks/prompt_test.py b/py/packages/genkit/tests/genkit/blocks/prompt_test.py
index ef7549676e..112cbcbde4 100644
--- a/py/packages/genkit/tests/genkit/blocks/prompt_test.py
+++ b/py/packages/genkit/tests/genkit/blocks/prompt_test.py
@@ -540,7 +540,12 @@ async def test_prompt_function_uses_lookup_prompt() -> None:
 
         load_prompt_folder(ai.registry, prompt_dir)
 
-        # Use prompt() function to look up the file-based prompt
+        # Use ai.prompt() to look up the file-based prompt
+        executable = await ai.prompt('promptFuncTest')
+
+        # Verify it can be executed
+        response = await executable({'name': 'Genkit'})
+        assert 'Genkit' in response.text
 
 
 @pytest.mark.asyncio
@@ -589,7 +594,7 @@ async def test_automatic_prompt_loading_defaults_mock():
         mock_path.return_value = mock_path_instance
 
         Genkit()
-        mock_load.assert_called_once_with(ANY, dir_path='./prompts')
+        mock_load.assert_called_once_with(ANY, dir_path=mock_path_instance)
 
 
 @pytest.mark.asyncio
diff --git a/py/samples/prompt_demo/prompts/_shared_partial.prompt b/py/samples/prompt_demo/prompts/_shared_partial.prompt
index c3f52bea05..f9601d8307 100644
--- a/py/samples/prompt_demo/prompts/_shared_partial.prompt
+++ b/py/samples/prompt_demo/prompts/_shared_partial.prompt
@@ -1 +1,4 @@
+---
+model: googleai/gemini-2.0-flash
+---
 This is a PARTIAL that says: {{my_helper "Partial content with helper"}}
diff --git a/py/samples/prompt_demo/prompts/hello.prompt b/py/samples/prompt_demo/prompts/hello.prompt
index 6cb905fab4..3ebc8e3341 100644
--- a/py/samples/prompt_demo/prompts/hello.prompt
+++ b/py/samples/prompt_demo/prompts/hello.prompt
@@ -1 +1,8 @@
+---
+model: googleai/gemini-2.0-flash
+input:
+  schema:
+    name: string
+---
+
 Hello {{name}}!
diff --git a/py/samples/prompt_demo/prompts/nested/nested_hello.prompt b/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
index 2224ff39f4..ba4a95debe 100644
--- a/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
+++ b/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
@@ -1,2 +1,9 @@
+---
+model: googleai/gemini-2.0-flash
+input:
+  schema:
+    name: string
+---
+
 This is a nested prompt, hello {{name}}!
 {{> shared_partial}}
diff --git a/py/samples/prompt_demo/src/prompt_demo.py b/py/samples/prompt_demo/src/prompt_demo.py
index f04115fcd0..1030e9e4cf 100644
--- a/py/samples/prompt_demo/src/prompt_demo.py
+++ b/py/samples/prompt_demo/src/prompt_demo.py
@@ -31,7 +31,7 @@
 ai = Genkit(plugins=[GoogleAI()], model='googleai/gemini-2.0-flash', prompt_dir=prompts_path)
 
 
-def my_helper(content, *args, **kwargs):
+def my_helper(content, *_, **__):
     if isinstance(content, list):
         content = content[0] if content else ''
     return f'*** {content} ***'

From 9b59d60b1802cf26cc65533afdbde22429e9bab3 Mon Sep 17 00:00:00 2001
From: Mengqin Shen <mengqin@google.com>
Date: Fri, 19 Dec 2025 18:52:00 -0800
Subject: [PATCH 4/4] fix(py): update google genai model to 2.5-flash instead
 of 2.0

---
 py/samples/evaluator-demo/src/eval_demo.py    | 22 +++++++++++--------
 .../prompts/_shared_partial.prompt            |  2 +-
 py/samples/prompt_demo/prompts/hello.prompt   |  2 +-
 .../prompts/nested/nested_hello.prompt        |  2 +-
 py/samples/prompt_demo/src/prompt_demo.py     |  2 +-
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/py/samples/evaluator-demo/src/eval_demo.py b/py/samples/evaluator-demo/src/eval_demo.py
index 45f95cd931..cbcec2556e 100644
--- a/py/samples/evaluator-demo/src/eval_demo.py
+++ b/py/samples/evaluator-demo/src/eval_demo.py
@@ -16,7 +16,7 @@
 
 import json
 import os
-from typing import Any
+from typing import Any, List
 
 import pytest
 import structlog
@@ -27,7 +27,7 @@
 
 logger = structlog.get_logger(__name__)
 
-ai = Genkit(plugins=[GoogleAI()])
+ai = Genkit(plugins=[GoogleAI()], model='googleai/gemini-2.5-flash')
 
 
 async def substring_match(datapoint: BaseDataPoint, options: Any | None):
@@ -54,15 +54,19 @@ async def substring_match(datapoint: BaseDataPoint, options: Any | None):
 )
 
 
+
 #  Define a flow that programmatically runs the evaluation
 @ai.flow()
-async def run_eval_demo(input: Any = None):
-    # Load dataset
-    data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'dataset.json')
-    with open(data_path, 'r') as f:
-        raw_data = json.load(f)
-
-    dataset = [BaseDataPoint(**d) for d in raw_data]
+async def run_eval_demo(dataset_input: List[BaseDataPoint] | None = None):
+    if dataset_input:
+        dataset = dataset_input
+    else:
+        # Load dataset
+        data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'dataset.json')
+        with open(data_path, 'r') as f:
+            raw_data = json.load(f)
+
+        dataset = [BaseDataPoint(**d) for d in raw_data]
 
     logger.info('Running evaluation...', count=len(dataset))
 
diff --git a/py/samples/prompt_demo/prompts/_shared_partial.prompt b/py/samples/prompt_demo/prompts/_shared_partial.prompt
index f9601d8307..72827d8671 100644
--- a/py/samples/prompt_demo/prompts/_shared_partial.prompt
+++ b/py/samples/prompt_demo/prompts/_shared_partial.prompt
@@ -1,4 +1,4 @@
 ---
-model: googleai/gemini-2.0-flash
+model: googleai/gemini-2.5-flash
 ---
 This is a PARTIAL that says: {{my_helper "Partial content with helper"}}
diff --git a/py/samples/prompt_demo/prompts/hello.prompt b/py/samples/prompt_demo/prompts/hello.prompt
index 3ebc8e3341..1824e7e97b 100644
--- a/py/samples/prompt_demo/prompts/hello.prompt
+++ b/py/samples/prompt_demo/prompts/hello.prompt
@@ -1,5 +1,5 @@
 ---
-model: googleai/gemini-2.0-flash
+model: googleai/gemini-2.5-flash
 input:
   schema:
     name: string
diff --git a/py/samples/prompt_demo/prompts/nested/nested_hello.prompt b/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
index ba4a95debe..546cc223e6 100644
--- a/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
+++ b/py/samples/prompt_demo/prompts/nested/nested_hello.prompt
@@ -1,5 +1,5 @@
 ---
-model: googleai/gemini-2.0-flash
+model: googleai/gemini-2.5-flash
 input:
   schema:
     name: string
diff --git a/py/samples/prompt_demo/src/prompt_demo.py b/py/samples/prompt_demo/src/prompt_demo.py
index 1030e9e4cf..7ea72592f3 100644
--- a/py/samples/prompt_demo/src/prompt_demo.py
+++ b/py/samples/prompt_demo/src/prompt_demo.py
@@ -28,7 +28,7 @@
 current_dir = Path(__file__).resolve().parent
 prompts_path = current_dir.parent / 'prompts'
 
-ai = Genkit(plugins=[GoogleAI()], model='googleai/gemini-2.0-flash', prompt_dir=prompts_path)
+ai = Genkit(plugins=[GoogleAI()], model='googleai/gemini-2.5-flash', prompt_dir=prompts_path)
 
 
 def my_helper(content, *_, **__):