transformerlab
diff --git a/‎api/alembic/versions/63ca6eebc24c_add_team_providers_tables.py‎
Lines changed: 48 additions & 0 deletions b/‎api/alembic/versions/63ca6eebc24c_add_team_providers_tables.py‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎api/alembic/versions/be6b6cb9f784_rename_team_providers_to_compute_.py‎
Lines changed: 60 additions & 0 deletions b/‎api/alembic/versions/be6b6cb9f784_rename_team_providers_to_compute_.py‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎api/api.py‎
Lines changed: 2 additions & 0 deletions b/‎api/api.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎api/transformerlab/compute_providers/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎api/transformerlab/compute_providers/__init__.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎api/transformerlab/compute_providers/base.py‎
Lines changed: 131 additions & 0 deletions b/‎api/transformerlab/compute_providers/base.py‎
Lines changed: 131 additions & 0 deletions
@@ -0,0 +1,48 @@
+"""add team_providers_tables
+
+Revision ID: 63ca6eebc24c
+Revises: f7661070ec23
+Create Date: 2025-11-24 11:35:14.455588
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '63ca6eebc24c'
+down_revision: Union[str, Sequence[str], None] = 'f7661070ec23'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('team_providers',
+    sa.Column('id', sa.String(), nullable=False),
+    sa.Column('team_id', sa.String(), nullable=False),
+    sa.Column('name', sa.String(), nullable=False),
+    sa.Column('type', sa.String(), nullable=False),
+    sa.Column('config', sa.JSON(), nullable=False),
+    sa.Column('created_by_user_id', sa.String(), nullable=False),
+    sa.Column('created_at', sa.DateTime(), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=False),
+    sa.Column('updated_at', sa.DateTime(), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index('idx_team_provider_name', 'team_providers', ['team_id', 'name'], unique=False)
+    op.create_index(op.f('ix_team_providers_team_id'), 'team_providers', ['team_id'], unique=False)
+    op.create_index(op.f('ix_team_providers_type'), 'team_providers', ['type'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_team_providers_type'), table_name='team_providers')
+    op.drop_index(op.f('ix_team_providers_team_id'), table_name='team_providers')
+    op.drop_index('idx_team_provider_name', table_name='team_providers')
+    op.drop_table('team_providers')
+    # ### end Alembic commands ###
@@ -0,0 +1,60 @@
+"""rename_team_providers_to_compute_providers
+
+Revision ID: be6b6cb9f784
+Revises: 63ca6eebc24c
+Create Date: 2025-11-26 14:47:16.424026
+
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision: str = "be6b6cb9f784"
+down_revision: Union[str, Sequence[str], None] = "63ca6eebc24c"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # Rename the table from team_providers to compute_providers
+    op.rename_table("team_providers", "compute_providers")
+
+    # Rename the index
+    op.drop_index("idx_team_provider_name", table_name="compute_providers")
+    op.create_index("idx_compute_provider_name", "compute_providers", ["team_id", "name"], unique=False)
+
+    # Update index names that use the table name pattern
+    # The ix_team_providers_* indexes will be automatically handled by SQLAlchemy/Alembic
+    # but we should verify they exist and update if needed
+    try:
+        op.drop_index(op.f("ix_team_providers_team_id"), table_name="compute_providers")
+    except Exception:
+        pass  # Index might not exist or already dropped
+    try:
+        op.drop_index(op.f("ix_team_providers_type"), table_name="compute_providers")
+    except Exception:
+        pass  # Index might not exist or already dropped
+
+    # Create new indexes with correct names (Alembic will auto-generate these on next autogenerate)
+    op.create_index(op.f("ix_compute_providers_team_id"), "compute_providers", ["team_id"], unique=False)
+    op.create_index(op.f("ix_compute_providers_type"), "compute_providers", ["type"], unique=False)
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # Drop new indexes
+    op.drop_index(op.f("ix_compute_providers_type"), table_name="compute_providers")
+    op.drop_index(op.f("ix_compute_providers_team_id"), table_name="compute_providers")
+    op.drop_index("idx_compute_provider_name", table_name="compute_providers")
+
+    # Rename the table back first
+    op.rename_table("compute_providers", "team_providers")
+
+    # Recreate old indexes on the renamed table
+    op.create_index("idx_team_provider_name", "team_providers", ["team_id", "name"], unique=False)
+    op.create_index(op.f("ix_team_providers_team_id"), "team_providers", ["team_id"], unique=False)
+    op.create_index(op.f("ix_team_providers_type"), "team_providers", ["type"], unique=False)
@@ -60,6 +60,7 @@
     batched_prompts,
     recipes,
     teams,
+    compute_provider,
     auth,
 )
 from transformerlab.routers.auth import get_user_and_team  # noqa: E402
@@ -236,6 +237,7 @@ async def validation_exception_handler(request, exc):
 app.include_router(batched_prompts.router, dependencies=[Depends(get_user_and_team)])
 app.include_router(fastchat_openai_api.router, dependencies=[Depends(get_user_and_team)])
 app.include_router(teams.router, dependencies=[Depends(get_user_and_team)])
+app.include_router(compute_provider.router)
 app.include_router(auth.router)
 
 controller_process = None
 
@@ -0,0 +1,13 @@
+"""Compute provider bridge system for abstracting GPU orchestration providers."""
+
+from .base import ComputeProvider
+from .router import ComputeProviderRouter, get_provider
+from .config import load_compute_providers_config, ComputeProviderConfig
+
+__all__ = [
+    "ComputeProvider",
+    "ComputeProviderRouter",
+    "get_provider",
+    "load_compute_providers_config",
+    "ComputeProviderConfig",
+]
@@ -0,0 +1,131 @@
+"""Abstract base class for provider implementations."""
+
+from abc import ABC, abstractmethod
+from typing import Dict, List, Any, Optional, Union
+from .models import (
+    ClusterConfig,
+    JobConfig,
+    ClusterStatus,
+    JobInfo,
+    ResourceInfo,
+)
+
+
+class ComputeProvider(ABC):
+    """Abstract base class for all compute provider implementations."""
+
+    @abstractmethod
+    def launch_cluster(self, cluster_name: str, config: ClusterConfig) -> Dict[str, Any]:
+        """
+        Launch/provision a new cluster.
+
+        Args:
+            cluster_name: Name of the cluster to launch
+            config: Cluster configuration
+
+        Returns:
+            Dictionary with launch result (e.g., request_id, cluster_name)
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def stop_cluster(self, cluster_name: str) -> Dict[str, Any]:
+        """
+        Stop a running cluster (but don't tear it down).
+
+        Args:
+            cluster_name: Name of the cluster to stop
+
+        Returns:
+            Dictionary with stop result
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_cluster_status(self, cluster_name: str) -> ClusterStatus:
+        """
+        Get the status of a cluster.
+
+        Args:
+            cluster_name: Name of the cluster
+
+        Returns:
+            ClusterStatus object with cluster information
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_cluster_resources(self, cluster_name: str) -> ResourceInfo:
+        """
+        Get resource information for a cluster (GPUs, CPUs, memory, etc.).
+
+        Args:
+            cluster_name: Name of the cluster
+
+        Returns:
+            ResourceInfo object with resource details
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def submit_job(self, cluster_name: str, job_config: JobConfig) -> Dict[str, Any]:
+        """
+        Submit a job to an existing cluster.
+
+        Args:
+            cluster_name: Name of the cluster
+            job_config: Job configuration
+
+        Returns:
+            Dictionary with job submission result (e.g., job_id)
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_job_logs(
+        self,
+        cluster_name: str,
+        job_id: Union[str, int],
+        tail_lines: Optional[int] = None,
+        follow: bool = False,
+    ) -> Union[str, Any]:
+        """
+        Get logs for a job.
+
+        Args:
+            cluster_name: Name of the cluster
+            job_id: Job identifier
+            tail_lines: Number of lines to retrieve from the end (None for all)
+            follow: Whether to stream/follow logs (returns stream if True)
+
+        Returns:
+            Log content as string, or stream object if follow=True
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def cancel_job(self, cluster_name: str, job_id: Union[str, int]) -> Dict[str, Any]:
+        """
+        Cancel a running or queued job.
+
+        Args:
+            cluster_name: Name of the cluster
+            job_id: Job identifier
+
+        Returns:
+            Dictionary with cancellation result
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def list_jobs(self, cluster_name: str) -> List[JobInfo]:
+        """
+        List all jobs for a cluster.
+
+        Args:
+            cluster_name: Name of the cluster
+
+        Returns:
+            List of JobInfo objects
+        """
+        raise NotImplementedError