diff --git a/backend/src/routes/api/namespaces/const.ts b/backend/src/routes/api/namespaces/const.ts index 08e493a647..e45453b137 100644 --- a/backend/src/routes/api/namespaces/const.ts +++ b/backend/src/routes/api/namespaces/const.ts @@ -11,4 +11,8 @@ export enum NamespaceApplicationCase { * Upgrade an existing DSG project to work with model kserve. */ KSERVE_PROMOTION, + /** + * Nvidia NIMs run on KServe but have different requirements than regular models. + */ + KSERVE_NIM_PROMOTION, } diff --git a/backend/src/routes/api/namespaces/namespaceUtils.ts b/backend/src/routes/api/namespaces/namespaceUtils.ts index 3304a64057..ffff13328e 100644 --- a/backend/src/routes/api/namespaces/namespaceUtils.ts +++ b/backend/src/routes/api/namespaces/namespaceUtils.ts @@ -71,6 +71,7 @@ export const applyNamespaceChange = async ( ); } + let annotations = {}; let labels = {}; let checkPermissionsFn = null; switch (context) { @@ -92,6 +93,13 @@ export const applyNamespaceChange = async ( checkPermissionsFn = checkEditNamespacePermission; } break; + case NamespaceApplicationCase.KSERVE_NIM_PROMOTION: + { + annotations = { 'opendatahub.io/nim-support': 'true' }; + labels = { 'modelmesh-enabled': 'false' }; + checkPermissionsFn = checkEditNamespacePermission; + } + break; default: throw createCustomError('Unknown configuration', 'Cannot apply namespace change', 400); } @@ -121,9 +129,17 @@ export const applyNamespaceChange = async ( } return fastify.kube.coreV1Api - .patchNamespace(name, { metadata: { labels } }, undefined, dryRun, undefined, undefined, { - headers: { 'Content-type': PatchUtils.PATCH_FORMAT_JSON_MERGE_PATCH }, - }) + .patchNamespace( + name, + { metadata: { annotations, labels } }, + undefined, + dryRun, + undefined, + undefined, + { + headers: { 'Content-type': PatchUtils.PATCH_FORMAT_JSON_MERGE_PATCH }, + }, + ) .then(() => ({ applied: true })) .catch((e) => { fastify.log.error( diff --git a/backend/src/routes/api/nim-serving/index.ts b/backend/src/routes/api/nim-serving/index.ts new file mode 100644 index 0000000000..5c14cf9463 --- /dev/null +++ b/backend/src/routes/api/nim-serving/index.ts @@ -0,0 +1,25 @@ +import { KubeFastifyInstance, OauthFastifyRequest } from '../../../types'; +import { createCustomError } from '../../../utils/requestUtils'; +import { logRequestDetails } from '../../../utils/fileUtils'; + +const secretNames = ['nvidia-nim-access', 'nvidia-nim-image-pull']; + +export default async (fastify: KubeFastifyInstance): Promise => { + fastify.get( + '/:secretName', + async ( + request: OauthFastifyRequest<{ + Params: { secretName: string }; + }>, + ) => { + logRequestDetails(fastify, request); + const { secretName } = request.params; + if (!secretNames.includes(secretName)) { + throw createCustomError('Not found', 'Secret not found', 404); + } + const { coreV1Api, namespace } = fastify.kube; + + return coreV1Api.readNamespacedSecret(secretName, namespace); + }, + ); +}; diff --git a/backend/src/types.ts b/backend/src/types.ts index 1e2c8a8c2e..9bcbb0cb31 100644 --- a/backend/src/types.ts +++ b/backend/src/types.ts @@ -41,6 +41,7 @@ export type DashboardConfig = K8sResourceCommon & { disableModelRegistry: boolean; disableConnectionTypes: boolean; disableStorageClasses: boolean; + disableNIMModelServing: boolean; }; groupsConfig?: { adminGroups: string; diff --git a/backend/src/utils/constants.ts b/backend/src/utils/constants.ts index cb8905e8c9..04e2f61974 100644 --- a/backend/src/utils/constants.ts +++ b/backend/src/utils/constants.ts @@ -66,6 +66,7 @@ export const blankDashboardCR: DashboardConfig = { disableModelRegistry: true, disableConnectionTypes: true, disableStorageClasses: true, + disableNIMModelServing: false, }, notebookController: { enabled: true, diff --git a/docs/dashboard-config.md b/docs/dashboard-config.md index c5764f5900..0e9b7e763d 100644 --- a/docs/dashboard-config.md +++ b/docs/dashboard-config.md @@ -36,6 +36,7 @@ The following are a list of features that are supported, along with there defaul | disableModelRegistry | true | Disables Model Registry from the dashboard. | | disableConnectionTypes | true | Disables creating custom data connection types from the dashboard. | | disableStorageClasses | true | Disables storage classes settings nav item from the dashboard. | +| disableNIMModelServing | true | Disables components of NIM Model UI from the dashboard. ## Defaults @@ -67,6 +68,7 @@ spec: disableDistributedWorkloads: false disableConnectionTypes: false disableStorageClasses: true + disableNIMModelServing: true ``` ## Additional fields @@ -161,6 +163,7 @@ spec: disableBiasMetrics: false disablePerformanceMetrics: false disablePipelineExperiments: false + disableNIMModelServing: true notebookController: enabled: true gpuSetting: autodetect diff --git a/frontend/src/__mocks__/mockDashboardConfig.ts b/frontend/src/__mocks__/mockDashboardConfig.ts index 193574975f..07a0019b93 100644 --- a/frontend/src/__mocks__/mockDashboardConfig.ts +++ b/frontend/src/__mocks__/mockDashboardConfig.ts @@ -29,6 +29,7 @@ type MockDashboardConfigType = { disableStorageClasses?: boolean; disableNotebookController?: boolean; notebookSizes?: NotebookSize[]; + disableNIMModelServing?: boolean; }; export const mockDashboardConfig = ({ @@ -58,6 +59,7 @@ export const mockDashboardConfig = ({ disableConnectionTypes = true, disableStorageClasses = false, disableNotebookController = false, + disableNIMModelServing = true, notebookSizes = [ { name: 'XSmall', @@ -164,6 +166,7 @@ export const mockDashboardConfig = ({ disableModelRegistry, disableConnectionTypes, disableStorageClasses, + disableNIMModelServing, }, notebookController: { enabled: !disableNotebookController, diff --git a/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts b/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts index 0dda0c16a9..0f9bf21ab0 100644 --- a/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts +++ b/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts @@ -1,9 +1,9 @@ import { - K8sStatus, k8sCreateResource, k8sDeleteResource, k8sGetResource, k8sListResource, + K8sStatus, k8sUpdateResource, } from '@openshift/dynamic-plugin-sdk-utils'; import { mockAcceleratorProfile } from '~/__mocks__/mockAcceleratorProfile'; @@ -186,6 +186,7 @@ describe('assembleInferenceService', () => { undefined, false, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -218,6 +219,7 @@ describe('assembleInferenceService', () => { undefined, true, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -251,6 +253,7 @@ describe('assembleInferenceService', () => { undefined, false, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -279,6 +282,7 @@ describe('assembleInferenceService', () => { undefined, true, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -321,6 +325,7 @@ describe('assembleInferenceService', () => { undefined, false, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -373,6 +378,7 @@ describe('assembleInferenceService', () => { undefined, true, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); diff --git a/frontend/src/api/k8s/inferenceServices.ts b/frontend/src/api/k8s/inferenceServices.ts index 028688eda7..f42bc6cb93 100644 --- a/frontend/src/api/k8s/inferenceServices.ts +++ b/frontend/src/api/k8s/inferenceServices.ts @@ -4,8 +4,8 @@ import { k8sDeleteResource, k8sGetResource, k8sListResource, - k8sUpdateResource, K8sStatus, + k8sUpdateResource, } from '@openshift/dynamic-plugin-sdk-utils'; import { InferenceServiceModel } from '~/api/models'; import { InferenceServiceKind, K8sAPIOptions, KnownLabels } from '~/k8sTypes'; @@ -24,6 +24,7 @@ export const assembleInferenceService = ( editName?: string, isModelMesh?: boolean, inferenceService?: InferenceServiceKind, + isStorageNeeded?: boolean, initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, ): InferenceServiceKind => { @@ -162,6 +163,11 @@ export const assembleInferenceService = ( }; } + // If storage is not needed, remove storage from the inference service + if (isStorageNeeded !== undefined && !isStorageNeeded) { + delete updateInferenceService.spec.predictor.model?.storage; + } + return updateInferenceService; }; @@ -234,6 +240,7 @@ export const createInferenceService = ( initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, dryRun = false, + isStorageNeeded?: boolean, ): Promise => { const inferenceService = assembleInferenceService( data, @@ -241,6 +248,7 @@ export const createInferenceService = ( undefined, isModelMesh, undefined, + isStorageNeeded, initialAcceleratorProfile, selectedAcceleratorProfile, ); @@ -263,6 +271,7 @@ export const updateInferenceService = ( initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, dryRun = false, + isStorageNeeded?: boolean, ): Promise => { const inferenceService = assembleInferenceService( data, @@ -270,6 +279,7 @@ export const updateInferenceService = ( existingData.metadata.name, isModelMesh, existingData, + isStorageNeeded, initialAcceleratorProfile, selectedAcceleratorProfile, ); diff --git a/frontend/src/api/k8s/pvcs.ts b/frontend/src/api/k8s/pvcs.ts index 7981c1d553..dcbe07088e 100644 --- a/frontend/src/api/k8s/pvcs.ts +++ b/frontend/src/api/k8s/pvcs.ts @@ -3,8 +3,8 @@ import { k8sCreateResource, k8sDeleteResource, k8sListResourceItems, - k8sUpdateResource, K8sStatus, + k8sUpdateResource, } from '@openshift/dynamic-plugin-sdk-utils'; import { K8sAPIOptions, KnownLabels, PersistentVolumeClaimKind } from '~/k8sTypes'; import { PVCModel } from '~/api/models'; @@ -17,6 +17,7 @@ export const assemblePvc = ( data: CreatingStorageObject, namespace: string, editName?: string, + hideFromUI?: boolean, ): PersistentVolumeClaimKind => { const { nameDesc: { name: pvcName, description }, @@ -32,9 +33,11 @@ export const assemblePvc = ( metadata: { name, namespace, - labels: { - [KnownLabels.DASHBOARD_RESOURCE]: 'true', - }, + ...(hideFromUI !== true && { + labels: { + [KnownLabels.DASHBOARD_RESOURCE]: 'true', + }, + }), annotations: { 'openshift.io/display-name': pvcName.trim(), 'openshift.io/description': description, @@ -69,8 +72,9 @@ export const createPvc = ( data: CreatingStorageObject, namespace: string, opts?: K8sAPIOptions, + hideFromUI?: boolean, ): Promise => { - const pvc = assemblePvc(data, namespace); + const pvc = assemblePvc(data, namespace, undefined, hideFromUI); return k8sCreateResource( applyK8sAPIOptions({ model: PVCModel, resource: pvc }, opts), diff --git a/frontend/src/api/k8s/servingRuntimes.ts b/frontend/src/api/k8s/servingRuntimes.ts index d00e5f2757..7ecf91c4e1 100644 --- a/frontend/src/api/k8s/servingRuntimes.ts +++ b/frontend/src/api/k8s/servingRuntimes.ts @@ -13,7 +13,10 @@ import { ServingRuntimeAnnotations, ServingRuntimeKind, } from '~/k8sTypes'; -import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types'; +import { + CreatingServingRuntimeObject, + SupportedModelFormatsInfo, +} from '~/pages/modelServing/screens/types'; import { ContainerResources } from '~/types'; import { getModelServingRuntimeName } from '~/pages/modelServing/utils'; import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils'; @@ -33,7 +36,15 @@ export const assembleServingRuntime = ( selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, isModelMesh?: boolean, ): ServingRuntimeKind => { - const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth } = data; + const { + name: displayName, + numReplicas, + modelSize, + externalRoute, + tokenAuth, + imageName, + supportedModelFormatsInfo, + } = data; const createName = isCustomServingRuntimesEnabled ? translateDisplayNameForK8s(displayName) : getModelServingRuntimeName(namespace); @@ -123,7 +134,12 @@ export const assembleServingRuntime = ( volumeMounts.push(getshmVolumeMount()); } - const containerWithoutResources = _.omit(container, 'resources'); + const updatedContainer = { + ...container, + ...(imageName && { image: imageName }), + }; + + const containerWithoutResources = _.omit(updatedContainer, 'resources'); return { ...containerWithoutResources, @@ -134,6 +150,17 @@ export const assembleServingRuntime = ( }, ); + if (supportedModelFormatsInfo) { + const supportedModelFormatsObj: SupportedModelFormatsInfo = { + name: supportedModelFormatsInfo.name, + version: supportedModelFormatsInfo.version, + autoSelect: true, + priority: 1, + }; + + updatedServingRuntime.spec.supportedModelFormats = [supportedModelFormatsObj]; + } + if (isModelMesh) { updatedServingRuntime.spec.tolerations = tolerations; } diff --git a/frontend/src/concepts/areas/const.ts b/frontend/src/concepts/areas/const.ts index 2caf38c4d2..98c01a5df2 100644 --- a/frontend/src/concepts/areas/const.ts +++ b/frontend/src/concepts/areas/const.ts @@ -29,6 +29,7 @@ export const allFeatureFlags: string[] = Object.keys({ disableModelRegistry: false, disableConnectionTypes: false, disableStorageClasses: false, + disableNIMModelServing: true, } satisfies DashboardCommonConfig); export const SupportedAreasStateMap: SupportedAreasState = { @@ -119,4 +120,7 @@ export const SupportedAreasStateMap: SupportedAreasState = { requiredComponents: [StackComponent.MODEL_REGISTRY], requiredCapabilities: [StackCapability.SERVICE_MESH, StackCapability.SERVICE_MESH_AUTHZ], }, + [SupportedArea.NIM_MODEL]: { + featureFlags: ['disableNIMModelServing'], + }, }; diff --git a/frontend/src/concepts/areas/types.ts b/frontend/src/concepts/areas/types.ts index 81d3b98b5f..10f2bb9588 100644 --- a/frontend/src/concepts/areas/types.ts +++ b/frontend/src/concepts/areas/types.ts @@ -55,6 +55,7 @@ export enum SupportedArea { BIAS_METRICS = 'bias-metrics', PERFORMANCE_METRICS = 'performance-metrics', TRUSTY_AI = 'trusty-ai', + NIM_MODEL = 'nim-model', /* Distributed Workloads areas */ DISTRIBUTED_WORKLOADS = 'distributed-workloads', diff --git a/frontend/src/k8sTypes.ts b/frontend/src/k8sTypes.ts index 1b3fdefb96..190c07e300 100644 --- a/frontend/src/k8sTypes.ts +++ b/frontend/src/k8sTypes.ts @@ -3,17 +3,17 @@ import { EitherNotBoth, EitherOrNone } from '@openshift/dynamic-plugin-sdk'; import { AwsKeys } from '~/pages/projects/dataConnections/const'; import { StackComponent } from '~/concepts/areas/types'; import { + ContainerResourceAttributes, + ContainerResources, + ImageStreamStatusTagCondition, + ImageStreamStatusTagItem, + NotebookSize, PodAffinity, PodContainer, Toleration, - Volume, - ContainerResources, - NotebookSize, TolerationSettings, - ImageStreamStatusTagItem, - ImageStreamStatusTagCondition, + Volume, VolumeMount, - ContainerResourceAttributes, } from './types'; import { ModelServingSize } from './pages/modelServing/screens/types'; @@ -1302,6 +1302,7 @@ export type DashboardCommonConfig = { disableModelRegistry: boolean; disableConnectionTypes: boolean; disableStorageClasses: boolean; + disableNIMModelServing: boolean; }; export type DashboardConfigKind = K8sResourceCommon & { diff --git a/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx b/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx index 18db99bd1b..8f3a3426b7 100644 --- a/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx +++ b/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx @@ -39,7 +39,6 @@ const InferenceServiceTableRow: React.FC = ({ const modelMesh = isModelMesh(inferenceService); const modelMeshMetricsSupported = modelMetricsEnabled && modelMesh; const kserveMetricsSupported = modelMetricsEnabled && kserveMetricsEnabled && !modelMesh; - const displayName = getDisplayNameFromK8sResource(inferenceService); return ( diff --git a/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx b/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx new file mode 100644 index 0000000000..8f151267f2 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx @@ -0,0 +1,104 @@ +import * as React from 'react'; +import { + Bullseye, + Card, + CardBody, + CardFooter, + CardTitle, + Text, + TextContent, + TextVariants, +} from '@patternfly/react-core'; +import { ProjectDetailsContext } from '~/pages/projects/ProjectDetailsContext'; +import { ServingRuntimePlatform } from '~/types'; +import { + getSortedTemplates, + getTemplateEnabled, + getTemplateEnabledForPlatform, +} from '~/pages/modelServing/customServingRuntimes/utils'; +import ModelServingPlatformButtonAction from '~/pages/modelServing/screens/projects/ModelServingPlatformButtonAction'; +import DeployNIMServiceModal from './NIMServiceModal/DeployNIMServiceModal'; + +const EmptyNIMModelServingCard: React.FC = () => { + const { + dataConnections: { data: dataConnections }, + } = React.useContext(ProjectDetailsContext); + const [open, setOpen] = React.useState(false); + + const { + servingRuntimes: { refresh: refreshServingRuntime }, + servingRuntimeTemplates: [templates], + servingRuntimeTemplateOrder: { data: templateOrder }, + servingRuntimeTemplateDisablement: { data: templateDisablement }, + serverSecrets: { refresh: refreshTokens }, + inferenceServices: { refresh: refreshInferenceServices }, + currentProject, + } = React.useContext(ProjectDetailsContext); + + const onSubmit = (submit: boolean) => { + if (submit) { + refreshServingRuntime(); + refreshInferenceServices(); + setTimeout(refreshTokens, 500); // need a timeout to wait for tokens creation + } + }; + + const templatesSorted = getSortedTemplates(templates, templateOrder); + const templatesEnabled = templatesSorted.filter((template) => + getTemplateEnabled(template, templateDisablement), + ); + const emptyTemplates = templatesEnabled.length === 0; + + return ( + <> + + + + NVIDIA NIM model serving platform + + + + Models are deployed using NVIDIA NIM microservices. Choose this option when you want to + deploy your model within a NIM container. Please provide the API key to authenticate with + the NIM service. + + + + setOpen(true)} + variant="secondary" + testId="nim-serving-deploy-button" + /> + + + + {open && ( + + getTemplateEnabledForPlatform(template, ServingRuntimePlatform.SINGLE), + )} + onClose={(submit) => { + onSubmit(submit); + setOpen(false); + }} + /> + )} + + ); +}; + +export default EmptyNIMModelServingCard; diff --git a/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx b/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx index 903862c2ca..5d664942f2 100644 --- a/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx @@ -1,4 +1,5 @@ import * as React from 'react'; +import { useEffect, useState } from 'react'; import { OutlinedQuestionCircleIcon } from '@patternfly/react-icons'; import { Alert, @@ -32,6 +33,10 @@ import EmptySingleModelServingCard from '~/pages/modelServing/screens/projects/E import EmptyMultiModelServingCard from '~/pages/modelServing/screens/projects/EmptyMultiModelServingCard'; import { ProjectObjectType, typedEmptyImage } from '~/concepts/design/utils'; import EmptyModelServingPlatform from '~/pages/modelServing/screens/projects/EmptyModelServingPlatform'; +import EmptyNIMModelServingCard from '~/pages/modelServing/screens/projects/EmptyNIMModelServingCard'; +import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; +import { isNIMAPIKeyEnabled } from '~/pages/modelServing/screens/projects/nimUtils'; +import { useDashboardNamespace } from '~/redux/selectors'; import ManageServingRuntimeModal from './ServingRuntimeModal/ManageServingRuntimeModal'; import ModelMeshServingRuntimeTable from './ModelMeshSection/ServingRuntimeTable'; import ModelServingPlatformButtonAction from './ModelServingPlatformButtonAction'; @@ -44,6 +49,22 @@ const ModelServingPlatform: React.FC = () => { const servingPlatformStatuses = useServingPlatformStatuses(); + const isNIMModelServingAvailable = useIsAreaAvailable(SupportedArea.NIM_MODEL).status; + const [isNIMAPIKeyValid, setIsNIMAPIKeyValid] = useState(false); + const dashboardNamespace = useDashboardNamespace(); + + useEffect(() => { + const checkAPIKey = async () => { + try { + const valid = await isNIMAPIKeyEnabled(dashboardNamespace); + setIsNIMAPIKeyValid(valid); + } catch (error) { + setIsNIMAPIKeyValid(false); + } + }; + checkAPIKey(); + }, [dashboardNamespace]); + const kServeEnabled = servingPlatformStatuses.kServe.enabled; const modelMeshEnabled = servingPlatformStatuses.modelMesh.enabled; @@ -197,6 +218,11 @@ const ModelServingPlatform: React.FC = () => { + {isNIMModelServingAvailable && isNIMAPIKeyValid && ( + + + + )} diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx new file mode 100644 index 0000000000..c19c73ffdd --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx @@ -0,0 +1,330 @@ +import * as React from 'react'; +import { + Alert, + AlertActionCloseButton, + Form, + Modal, + Stack, + StackItem, +} from '@patternfly/react-core'; +import { EitherOrNone } from '@openshift/dynamic-plugin-sdk'; +import { + createNIMPVC, + createNIMSecret, + getSubmitInferenceServiceResourceFn, + getSubmitServingRuntimeResourcesFn, + useCreateInferenceServiceObject, + useCreateServingRuntimeObject, +} from '~/pages/modelServing/screens/projects/utils'; +import { + AccessReviewResourceAttributes, + InferenceServiceKind, + ProjectKind, + SecretKind, + TemplateKind, +} from '~/k8sTypes'; +import { requestsUnderLimits, resourcesArePositive } from '~/pages/modelServing/utils'; +import useCustomServingRuntimesEnabled from '~/pages/modelServing/customServingRuntimes/useCustomServingRuntimesEnabled'; +import { getServingRuntimeFromName } from '~/pages/modelServing/customServingRuntimes/utils'; +import useServingAcceleratorProfile from '~/pages/modelServing/screens/projects/useServingAcceleratorProfile'; +import DashboardModalFooter from '~/concepts/dashboard/DashboardModalFooter'; +import { ServingRuntimeEditInfo } from '~/pages/modelServing/screens/types'; +import ServingRuntimeSizeSection from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection'; +import NIMModelListSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection'; +import NIMModelDeploymentNameSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection'; +import ProjectSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/ProjectSection'; +import { DataConnection, NamespaceApplicationCase } from '~/pages/projects/types'; +import { + getDisplayNameFromK8sResource, + translateDisplayNameForK8s, + translateDisplayNameForK8sAndReport, +} from '~/concepts/k8s/utils'; +import { useAccessReview } from '~/api'; +import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; +import KServeAutoscalerReplicaSection from '~/pages/modelServing/screens/projects/kServeModal/KServeAutoscalerReplicaSection'; +import useGenericObjectState from '~/utilities/useGenericObjectState'; +import { AcceleratorProfileSelectFieldState } from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; +import NIMPVCSizeSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection'; + +const NIM_SECRET_NAME = 'nvidia-nim-secrets'; +const NIM_NGC_SECRET_NAME = 'ngc-secret'; +const NIM_PVC_NAME = 'nim-pvc'; + +const accessReviewResource: AccessReviewResourceAttributes = { + group: 'rbac.authorization.k8s.io', + resource: 'rolebindings', + verb: 'create', +}; + +type DeployNIMServiceModalProps = { + isOpen: boolean; + onClose: (submit: boolean) => void; + servingRuntimeTemplates?: TemplateKind[]; +} & EitherOrNone< + { + projectContext?: { + currentProject: ProjectKind; + dataConnections: DataConnection[]; + }; + }, + { + editInfo?: { + servingRuntimeEditInfo?: ServingRuntimeEditInfo; + inferenceServiceEditInfo?: InferenceServiceKind; + secrets?: SecretKind[]; + }; + } +>; + +const DeployNIMServiceModal: React.FC = ({ + isOpen, + onClose, + servingRuntimeTemplates, + projectContext, + editInfo, +}) => { + const [createDataServingRuntime, setCreateDataServingRuntime, resetDataServingRuntime, sizes] = + useCreateServingRuntimeObject(editInfo?.servingRuntimeEditInfo); + const [createDataInferenceService, setCreateDataInferenceService, resetDataInferenceService] = + useCreateInferenceServiceObject( + editInfo?.inferenceServiceEditInfo, + editInfo?.servingRuntimeEditInfo?.servingRuntime, + editInfo?.secrets, + ); + + const isAuthorinoEnabled = useIsAreaAvailable(SupportedArea.K_SERVE_AUTH).status; + const currentProjectName = projectContext?.currentProject.metadata.name; + const namespace = currentProjectName || createDataInferenceService.project; + + const [translatedName] = translateDisplayNameForK8sAndReport(createDataInferenceService.name, { + maxLength: 253, + }); + + const acceleratorProfileState = useServingAcceleratorProfile( + editInfo?.servingRuntimeEditInfo?.servingRuntime, + editInfo?.inferenceServiceEditInfo, + ); + const [ + selectedAcceleratorProfile, + setSelectedAcceleratorProfile, + resetSelectedAcceleratorProfile, + ] = useGenericObjectState({ + profile: undefined, + count: 0, + useExistingSettings: false, + }); + const customServingRuntimesEnabled = useCustomServingRuntimesEnabled(); + const [allowCreate] = useAccessReview({ + ...accessReviewResource, + namespace, + }); + + const [actionInProgress, setActionInProgress] = React.useState(false); + const [error, setError] = React.useState(); + const [alertVisible, setAlertVisible] = React.useState(true); + const [pvcSize, setPvcSize] = React.useState(''); + + React.useEffect(() => { + if (currentProjectName && isOpen) { + setCreateDataInferenceService('project', currentProjectName); + } + }, [currentProjectName, setCreateDataInferenceService, isOpen]); + + // Serving Runtime Validation + const isDisabledServingRuntime = + namespace === '' || actionInProgress || createDataServingRuntime.imageName === undefined; + + const baseInputValueValid = + createDataServingRuntime.numReplicas >= 0 && + resourcesArePositive(createDataServingRuntime.modelSize.resources) && + requestsUnderLimits(createDataServingRuntime.modelSize.resources); + + const isDisabledInferenceService = + actionInProgress || + createDataInferenceService.name.trim() === '' || + createDataInferenceService.project === '' || + !translatedName || + !baseInputValueValid; + + const servingRuntimeSelected = React.useMemo( + () => + editInfo?.servingRuntimeEditInfo?.servingRuntime || + getServingRuntimeFromName('nvidia-nim-runtime', servingRuntimeTemplates), + [editInfo, servingRuntimeTemplates], + ); + + const onBeforeClose = (submitted: boolean) => { + onClose(submitted); + setError(undefined); + setActionInProgress(false); + resetDataServingRuntime(); + resetDataInferenceService(); + resetSelectedAcceleratorProfile(); + setAlertVisible(true); + }; + + const setErrorModal = (e: Error) => { + setError(e); + setActionInProgress(false); + }; + + const onSuccess = () => { + setActionInProgress(false); + onBeforeClose(true); + }; + + const submit = () => { + setError(undefined); + setActionInProgress(true); + + const servingRuntimeName = + editInfo?.inferenceServiceEditInfo?.spec.predictor.model?.runtime || + translateDisplayNameForK8s(createDataInferenceService.name); + + const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn( + servingRuntimeSelected, + createDataServingRuntime, + customServingRuntimesEnabled, + namespace, + editInfo?.servingRuntimeEditInfo, + false, + acceleratorProfileState, + selectedAcceleratorProfile, + NamespaceApplicationCase.KSERVE_NIM_PROMOTION, + projectContext?.currentProject, + servingRuntimeName, + true, + ); + + const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn( + createDataInferenceService, + editInfo?.inferenceServiceEditInfo, + servingRuntimeName, + false, + acceleratorProfileState, + selectedAcceleratorProfile, + allowCreate, + editInfo?.secrets, + false, + ); + + Promise.all([ + submitServingRuntimeResources({ dryRun: true }), + submitInferenceServiceResource({ dryRun: true }), + ]) + .then(() => + Promise.all([ + submitServingRuntimeResources({ dryRun: false }), + submitInferenceServiceResource({ dryRun: false }), + createNIMSecret(namespace, NIM_SECRET_NAME, false, false), + createNIMSecret(namespace, NIM_NGC_SECRET_NAME, true, false), + createNIMPVC(namespace, NIM_PVC_NAME, pvcSize, false), + ]), + ) + .then(() => onSuccess()) + .catch((e) => { + setErrorModal(e); + }); + }; + const getProjectName = () => { + const currentName = projectContext?.currentProject + ? getDisplayNameFromK8sResource(projectContext.currentProject) + : ''; + const namespaceName = editInfo?.inferenceServiceEditInfo?.metadata.namespace || ''; + return currentName || namespaceName || ''; + }; + + return ( + onBeforeClose(false)} + footer={ + onBeforeClose(false)} + isSubmitDisabled={isDisabledServingRuntime || isDisabledInferenceService} + error={error} + alertTitle="Error creating model server" + /> + } + showClose + > +
{ + e.preventDefault(); + submit(); + }} + > + + {!isAuthorinoEnabled && alertVisible && ( + + setAlertVisible(false)} />} + > +

+ The NVIDIA NIM model serving platform used by this project allows deployed models + to be accessible via external routes. It is recommended that token authentication + be enabled to protect these routes. The serving platform requires the Authorino + operator be installed on the cluster for token authentication. Contact a cluster + administrator to install the operator. +

+
+
+ )} + + + + + + + + + + + + + + + + + + +
+
+
+ ); +}; + +export default DeployNIMServiceModal; diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx new file mode 100644 index 0000000000..20f894ec1e --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx @@ -0,0 +1,26 @@ +import * as React from 'react'; +import { FormGroup, TextInput } from '@patternfly/react-core'; +import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; +import { CreatingInferenceServiceObject } from '~/pages/modelServing/screens/types'; + +type NIMModelDeploymentNameSectionProps = { + data: CreatingInferenceServiceObject; + setData: UpdateObjectAtPropAndValue; +}; + +const NIMModelDeploymentNameSection: React.FC = ({ + data, + setData, +}) => ( + + setData('name', name)} + /> + +); + +export default NIMModelDeploymentNameSection; diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx new file mode 100644 index 0000000000..215a525092 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx @@ -0,0 +1,113 @@ +import * as React from 'react'; +import { useEffect, useState } from 'react'; +import { FormGroup, HelperText, HelperTextItem } from '@patternfly/react-core'; +import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; +import { + CreatingInferenceServiceObject, + CreatingServingRuntimeObject, +} from '~/pages/modelServing/screens/types'; +import SimpleSelect from '~/components/SimpleSelect'; +import { fetchNIMModelNames, ModelInfo } from '~/pages/modelServing/screens/projects/utils'; +import { useDashboardNamespace } from '~/redux/selectors'; + +type NIMModelListSectionProps = { + inferenceServiceData: CreatingInferenceServiceObject; + setInferenceServiceData: UpdateObjectAtPropAndValue; + setServingRuntimeData: UpdateObjectAtPropAndValue; + isEditing?: boolean; +}; + +const NIMModelListSection: React.FC = ({ + inferenceServiceData, + setInferenceServiceData, + setServingRuntimeData, + isEditing, +}) => { + const [options, setOptions] = useState<{ key: string; label: string }[]>([]); + const [modelList, setModelList] = useState([]); + const { dashboardNamespace } = useDashboardNamespace(); + const [error, setError] = useState(''); + + useEffect(() => { + const getModelNames = async () => { + try { + const modelInfos = await fetchNIMModelNames(dashboardNamespace); + if (modelInfos && modelInfos.length > 0) { + const fetchedOptions = modelInfos.map((modelInfo) => ({ + key: modelInfo.name, + label: `${modelInfo.displayName} - ${modelInfo.latestTag}`, + })); + setModelList(modelInfos); + setOptions(fetchedOptions); + setError(''); + } else { + setError('No NVIDIA NIM models found. Please check the installation.'); + setOptions([]); + } + } catch (err) { + setError('There was a problem fetching the NIM models. Please try again later.'); + setOptions([]); + } + }; + getModelNames(); + }, [dashboardNamespace]); + + const getSupportedModelFormatsInfo = (name: string) => { + const modelInfo = modelList.find((model) => model.name === name); + if (modelInfo) { + return { + name: modelInfo.name, + version: modelInfo.latestTag, + }; + } + return null; + }; + + const getNIMImageName = (name: string) => { + const imageInfo = modelList.find((model) => model.name === name); + if (imageInfo) { + return `nvcr.io/${imageInfo.namespace}/${name}:${imageInfo.latestTag}`; + } + return ''; + }; + + return ( + + { + const supportedModelInfo = getSupportedModelFormatsInfo(name); + + if (supportedModelInfo) { + setServingRuntimeData('supportedModelFormatsInfo', supportedModelInfo); + setServingRuntimeData('imageName', getNIMImageName(name)); + setInferenceServiceData('format', { name }); + setError(''); + } else { + setError('Error: Model not found.'); // Set error when model is not found + } + }} + /> + {error && ( + + {error} + + )} + + ); +}; + +export default NIMModelListSection; diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection.tsx new file mode 100644 index 0000000000..30cb1e231a --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection.tsx @@ -0,0 +1,27 @@ +import React from 'react'; +import { HelperText, HelperTextItem, StackItem } from '@patternfly/react-core'; +import PVSizeField from '~/pages/projects/components/PVSizeField'; + +type NIMPVCSizeSectionProps = { + pvcSize: string; + setPvcSize: (value: string) => void; +}; + +const NIMPVCSizeSection: React.FC = ({ pvcSize, setPvcSize }) => ( + + setPvcSize(value)} + label="NVIDIA NIM storage size" + /> + + + Specify the size of the cluster storage instance that will be created to store the + downloaded NVIDIA NIM. + + + +); + +export default NIMPVCSizeSection; diff --git a/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts b/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts index dd5c245b22..8911790120 100644 --- a/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts +++ b/frontend/src/pages/modelServing/screens/projects/__tests__/utils.spec.ts @@ -1,6 +1,9 @@ import { mockDataConnection } from '~/__mocks__/mockDataConnection'; import { mockProjectK8sResource } from '~/__mocks__/mockProjectK8sResource'; import { + createNIMPVC, + createNIMSecret, + fetchNIMModelNames, filterOutConnectionsWithoutBucket, getCreateInferenceServiceLabels, getProjectModelServingPlatform, @@ -9,6 +12,15 @@ import { import { LabeledDataConnection, ServingPlatformStatuses } from '~/pages/modelServing/screens/types'; import { ServingRuntimePlatform } from '~/types'; import { mockInferenceServiceK8sResource } from '~/__mocks__/mockInferenceServiceK8sResource'; +import { createPvc, createSecret, getConfigMap, getSecret } from '~/api'; +import { PersistentVolumeClaimKind } from '~/k8sTypes'; + +jest.mock('~/api', () => ({ + getSecret: jest.fn(), + createSecret: jest.fn(), + getConfigMap: jest.fn(), + createPvc: jest.fn(), +})); describe('filterOutConnectionsWithoutBucket', () => { it('should return an empty array if input connections array is empty', () => { @@ -202,3 +214,252 @@ describe('getCreateInferenceServiceLabels', () => { }); }); }); + +describe('createNIMSecret', () => { + const projectName = 'test-project'; + const secretName = 'test-secret'; + const dashboardNamespace = 'test-namespace'; + const dryRun = false; + + const nimSecretMock = { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: 'test-secret', + namespace: projectName, + }, + data: {}, + }; + + const nimSecretDataNGC = { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: 'ngc-secret', + namespace: dashboardNamespace, + }, + data: { + '.dockerconfigjson': 'mocked-dockerconfig-json', + }, + }; + + const nimSecretDataNonNGC = { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: 'nim-secret', + namespace: dashboardNamespace, + }, + data: { + api_key: 'mocked-api-key', + }, + }; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should create NGC secret when isNGC is true', async () => { + (getSecret as jest.Mock).mockResolvedValueOnce(nimSecretDataNGC); + (createSecret as jest.Mock).mockResolvedValueOnce(nimSecretMock); + + const result = await createNIMSecret(projectName, secretName, true, dryRun); + + expect(getSecret).toHaveBeenCalledWith(dashboardNamespace, 'nvidia-nim-image-pull'); + expect(createSecret).toHaveBeenCalledWith( + { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: secretName, + namespace: projectName, + }, + data: { + '.dockerconfigjson': 'mocked-dockerconfig-json', + }, + type: 'kubernetes.io/dockerconfigjson', + }, + { dryRun }, + ); + expect(result).toEqual(nimSecretMock); + }); + + it('should create non-NGC secret when isNGC is false', async () => { + (getSecret as jest.Mock).mockResolvedValueOnce(nimSecretDataNonNGC); + (createSecret as jest.Mock).mockResolvedValueOnce(nimSecretMock); + + const result = await createNIMSecret(projectName, secretName, false, dryRun); + + expect(getSecret).toHaveBeenCalledWith(dashboardNamespace, 'nvidia-nim-access'); + expect(createSecret).toHaveBeenCalledWith( + { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: secretName, + namespace: projectName, + }, + data: { + NGC_API_KEY: 'mocked-api-key', + }, + type: 'Opaque', + }, + { dryRun }, + ); + expect(result).toEqual(nimSecretMock); + }); + + it('should reject if nimSecretData has no data', async () => { + (getSecret as jest.Mock).mockResolvedValueOnce({ + ...nimSecretDataNGC, + data: null, + }); + + await expect(createNIMSecret(projectName, secretName, true, dryRun)).rejects.toThrow( + 'Error creating NIM NGC secret', + ); + }); +}); + +describe('fetchNIMModelNames', () => { + const dashboardNamespace = 'test-namespace'; + const NIM_CONFIGMAP_NAME = 'nvidia-nim-images-data'; + + const configMapMock = { + data: { + model1: JSON.stringify({ + displayName: 'Model One', + shortDescription: 'First model description', + namespace: 'namespace-one', + tags: ['tag1', 'tag2'], + latestTag: 'v1.0.0', + updatedDate: '2024-09-15T00:00:00Z', + }), + model2: JSON.stringify({ + displayName: 'Model Two', + shortDescription: 'Second model description', + namespace: 'namespace-two', + tags: ['tag3', 'tag4'], + latestTag: 'v2.0.0', + updatedDate: '2024-09-16T00:00:00Z', + }), + }, + }; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should return model infos when configMap has data', async () => { + (getConfigMap as jest.Mock).mockResolvedValueOnce(configMapMock); + + const result = await fetchNIMModelNames(dashboardNamespace); + + expect(getConfigMap).toHaveBeenCalledWith(dashboardNamespace, NIM_CONFIGMAP_NAME); + expect(result).toEqual([ + { + name: 'model1', + displayName: 'Model One', + shortDescription: 'First model description', + namespace: 'namespace-one', + tags: ['tag1', 'tag2'], + latestTag: 'v1.0.0', + updatedDate: '2024-09-15T00:00:00Z', + }, + { + name: 'model2', + displayName: 'Model Two', + shortDescription: 'Second model description', + namespace: 'namespace-two', + tags: ['tag3', 'tag4'], + latestTag: 'v2.0.0', + updatedDate: '2024-09-16T00:00:00Z', + }, + ]); + }); + + it('should return undefined if configMap has no data', async () => { + (getConfigMap as jest.Mock).mockResolvedValueOnce({ data: {} }); + + const result = await fetchNIMModelNames(dashboardNamespace); + + expect(getConfigMap).toHaveBeenCalledWith(dashboardNamespace, NIM_CONFIGMAP_NAME); + expect(result).toBeUndefined(); + }); + + it('should return undefined if configMap.data is not defined', async () => { + (getConfigMap as jest.Mock).mockResolvedValueOnce({ data: undefined }); + + const result = await fetchNIMModelNames(dashboardNamespace); + + expect(getConfigMap).toHaveBeenCalledWith(dashboardNamespace, NIM_CONFIGMAP_NAME); + expect(result).toBeUndefined(); + }); +}); + +describe('createNIMPVC', () => { + const projectName = 'test-project'; + const pvcName = 'test-pvc'; + const pvcSize = '10Gi'; + const dryRun = true; + + const pvcMock: PersistentVolumeClaimKind = { + apiVersion: 'v1', + kind: 'PersistentVolumeClaim', + metadata: { + name: pvcName, + namespace: projectName, + }, + spec: { + accessModes: ['ReadWriteOnce'], + resources: { + requests: { + storage: pvcSize, + }, + }, + volumeMode: 'Filesystem', + }, + }; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should call createPvc with correct arguments and return the result', async () => { + (createPvc as jest.Mock).mockResolvedValueOnce(pvcMock); + + const result = await createNIMPVC(projectName, pvcName, pvcSize, dryRun); + + expect(createPvc).toHaveBeenCalledWith( + { + nameDesc: { + name: pvcName, + description: '', + }, + size: pvcSize, + }, + projectName, + undefined, + { dryRun }, + ); + expect(result).toEqual(pvcMock); + }); + + it('should handle the dryRun flag correctly', async () => { + const dryRunFlag = false; + await createNIMPVC(projectName, pvcName, pvcSize, dryRunFlag); + + expect(createPvc).toHaveBeenCalledWith( + { + nameDesc: { + name: pvcName, + description: '', + }, + size: pvcSize, + }, + projectName, + undefined, + { dryRun: dryRunFlag }, + ); + }); +}); diff --git a/frontend/src/pages/modelServing/screens/projects/nimUtils.ts b/frontend/src/pages/modelServing/screens/projects/nimUtils.ts new file mode 100644 index 0000000000..2b3cb78dca --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/nimUtils.ts @@ -0,0 +1,69 @@ +// NGC stands for NVIDIA GPU Cloud. + +import { ProjectKind, SecretKind } from '~/k8sTypes'; +import { getConfigMap } from '~/api'; + +const NIM_SECRET_NAME = 'nvidia-nim-access'; +const NIM_NGC_SECRET_NAME = 'nvidia-nim-image-pull'; +const NIM_API_KEY_VALIDATION = 'nvidia-nim-validation-result'; + +export const getNGCSecretType = (isNGC: boolean): string => + isNGC ? 'kubernetes.io/dockerconfigjson' : 'Opaque'; + +const getNIMSecretData = async (secretName: string): Promise => { + try { + const response = await fetch(`/api/nim-serving/${secretName}`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }); + + if (!response.ok) { + throw new Error(`Error fetching secret: ${response.statusText}`); + } + const secretData = await response.json(); + return secretData; + } catch (error) { + throw new Error(`Failed to fetch secret: ${secretName}.`); + } +}; +export const getNIMData = async (isNGC: boolean): Promise | undefined> => { + const nimSecretData = isNGC + ? await getNIMSecretData(NIM_NGC_SECRET_NAME) + : await getNIMSecretData(NIM_SECRET_NAME); + + if (!nimSecretData.data) { + throw new Error(`Error retrieving NIM ${isNGC ? 'NGC' : ''} secret data`); + } + + const data: Record = {}; + if (!isNGC) { + data.NGC_API_KEY = nimSecretData.data.api_key; + } else { + data['.dockerconfigjson'] = nimSecretData.data['.dockerconfigjson']; + } + return data; +}; + +export const isNIMSupported = (currentProject: ProjectKind): boolean => { + const isModelMeshDisabled = currentProject.metadata.labels?.['modelmesh-enabled'] === 'false'; + const hasNIMSupportAnnotation = + currentProject.metadata.annotations?.['opendatahub.io/nim-support'] === 'true'; + + return isModelMeshDisabled && hasNIMSupportAnnotation; +}; + +export const isNIMAPIKeyEnabled = async (dashboardNamespace: string): Promise => { + try { + const configMap = await getConfigMap(dashboardNamespace, NIM_API_KEY_VALIDATION); + + if (configMap.data && Object.keys(configMap.data).length > 0) { + const validationResult = configMap.data.validation_result; + return validationResult === 'true'; + } + } catch (error) { + throw new Error(`Error fetching API key validation.`); + } + return false; +}; diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts index b97c4059a0..474957270f 100644 --- a/frontend/src/pages/modelServing/screens/projects/utils.ts +++ b/frontend/src/pages/modelServing/screens/projects/utils.ts @@ -3,6 +3,7 @@ import { DashboardConfigKind, InferenceServiceKind, KnownLabels, + PersistentVolumeClaimKind, ProjectKind, SecretKind, ServingRuntimeKind, @@ -13,10 +14,10 @@ import { CreatingInferenceServiceObject, CreatingServingRuntimeObject, InferenceServiceStorageType, + LabeledDataConnection, + ModelServingSize, ServingPlatformStatuses, ServingRuntimeEditInfo, - ModelServingSize, - LabeledDataConnection, } from '~/pages/modelServing/screens/types'; import { ServingRuntimePlatform } from '~/types'; import { DEFAULT_MODEL_SERVER_SIZES } from '~/pages/modelServing/screens/const'; @@ -36,8 +37,10 @@ import { addSupportServingPlatformProject, assembleSecret, createInferenceService, + createPvc, createSecret, createServingRuntime, + getConfigMap, updateInferenceService, updateServingRuntime, } from '~/api'; @@ -45,6 +48,9 @@ import { isDataConnectionAWS } from '~/pages/projects/screens/detail/data-connec import { removeLeadingSlash } from '~/utilities/string'; import { RegisteredModelDeployInfo } from '~/pages/modelRegistry/screens/RegisteredModels/useRegisteredModelDeployInfo'; import { AcceleratorProfileSelectFieldState } from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; +import { getNGCSecretType, getNIMData } from '~/pages/modelServing/screens/projects/nimUtils'; + +const NIM_CONFIGMAP_NAME = 'nvidia-nim-images-data'; export const getServingRuntimeSizes = (config: DashboardConfigKind): ModelServingSize[] => { let sizes = config.spec.modelServerSizes || []; @@ -318,6 +324,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, dryRun = false, + isStorageNeeded?: boolean, ) => { if (!existingStorage) { return createAWSSecret(inferenceServiceData, dryRun).then((secret) => @@ -330,6 +337,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ) : createInferenceService( inferenceServiceData, @@ -338,6 +346,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ), ); } @@ -350,6 +359,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ) : createInferenceService( inferenceServiceData, @@ -358,6 +368,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ); }; @@ -370,6 +381,7 @@ export const getSubmitInferenceServiceResourceFn = ( selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, allowCreate?: boolean, secrets?: SecretKind[], + isStorageNeeded?: boolean, ): ((opts: { dryRun?: boolean }) => Promise) => { const inferenceServiceData = { ...createData, @@ -399,6 +411,7 @@ export const getSubmitInferenceServiceResourceFn = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ).then((inferenceService) => setUpTokenAuth( createData, @@ -553,6 +566,90 @@ export const filterOutConnectionsWithoutBucket = ( obj.dataConnection.data.data.AWS_S3_BUCKET.trim() !== '', ); +export interface ModelInfo { + name: string; + displayName: string; + shortDescription: string; + namespace: string; + tags: string[]; + latestTag: string; + updatedDate: string; +} + +export const fetchNIMModelNames = async ( + dashboardNamespace: string, +): Promise => { + const configMap = await getConfigMap(dashboardNamespace, NIM_CONFIGMAP_NAME); + if (configMap.data && Object.keys(configMap.data).length > 0) { + const modelInfos: ModelInfo[] = []; + for (const [key, value] of Object.entries(configMap.data)) { + try { + const modelData = JSON.parse(value); + modelInfos.push({ + name: key, + displayName: modelData.displayName, + shortDescription: modelData.shortDescription, + namespace: modelData.namespace, + tags: modelData.tags, + latestTag: modelData.latestTag, + updatedDate: modelData.updatedDate, + }); + } catch (error) { + throw new Error(`Failed to parse model data for key "${key}".`); + } + } + + return modelInfos.length > 0 ? modelInfos : undefined; + } + return undefined; +}; + +export const createNIMSecret = async ( + projectName: string, + secretName: string, + isNGC: boolean, + dryRun: boolean, +): Promise => { + try { + const data = await getNIMData(isNGC); + + const newSecret = { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: secretName, + namespace: projectName, + }, + data, + type: getNGCSecretType(isNGC), + }; + return await createSecret(newSecret, { dryRun }); + } catch (e) { + return Promise.reject(new Error(`Error creating NIM ${isNGC ? 'NGC' : null} secret`)); + } +}; + +export const createNIMPVC = ( + projectName: string, + pvcName: string, + pvcSize: string, + dryRun: boolean, +): Promise => + createPvc( + { + nameDesc: { + name: pvcName, + description: '', + }, + size: pvcSize, + }, + projectName, + { + dryRun, + }, + true, + ); + export const getCreateInferenceServiceLabels = ( data: Pick | undefined, ): { labels: Record } | undefined => { @@ -570,6 +667,5 @@ export const getCreateInferenceServiceLabels = ( }, }; } - return undefined; }; diff --git a/frontend/src/pages/modelServing/screens/types.ts b/frontend/src/pages/modelServing/screens/types.ts index 07fea3daf4..f443d593de 100644 --- a/frontend/src/pages/modelServing/screens/types.ts +++ b/frontend/src/pages/modelServing/screens/types.ts @@ -33,6 +33,13 @@ export type ModelStatus = { failedToSchedule: boolean; }; +export type SupportedModelFormatsInfo = { + name: string; + version: string; + autoSelect?: boolean; + priority?: number; +}; + export type CreatingServingRuntimeObject = { name: string; servingRuntimeTemplateName: string; @@ -41,6 +48,8 @@ export type CreatingServingRuntimeObject = { externalRoute: boolean; tokenAuth: boolean; tokens: ServingRuntimeToken[]; + imageName?: string; + supportedModelFormatsInfo?: SupportedModelFormatsInfo; }; export type ServingRuntimeToken = { diff --git a/frontend/src/pages/projects/components/PVSizeField.tsx b/frontend/src/pages/projects/components/PVSizeField.tsx index 49a46572e5..51e3136685 100644 --- a/frontend/src/pages/projects/components/PVSizeField.tsx +++ b/frontend/src/pages/projects/components/PVSizeField.tsx @@ -10,6 +10,7 @@ type PVSizeFieldProps = { menuAppendTo?: HTMLElement; setSize: (size: string) => void; currentSize?: string; + label?: string; }; const PVSizeField: React.FC = ({ @@ -18,8 +19,9 @@ const PVSizeField: React.FC = ({ menuAppendTo, setSize, currentSize, + label = 'Persistent storage size', }) => ( - + setSize(value)} diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx index fe5498c9b8..7b9e76a805 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx @@ -12,12 +12,14 @@ import { import { getProjectModelServingPlatform } from '~/pages/modelServing/screens/projects/utils'; import ManageServingRuntimeModal from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ManageServingRuntimeModal'; import ManageKServeModal from '~/pages/modelServing/screens/projects/kServeModal/ManageKServeModal'; +import DeployNIMServiceModal from '~/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal'; type AddModelFooterProps = { selectedPlatform?: ServingRuntimePlatform; + isNIM?: boolean; }; -const AddModelFooter: React.FC = ({ selectedPlatform }) => { +const AddModelFooter: React.FC = ({ selectedPlatform, isNIM }) => { const [modalShown, setModalShown] = React.useState(false); const servingPlatformStatuses = useServingPlatformStatuses(); @@ -67,7 +69,7 @@ const AddModelFooter: React.FC = ({ selectedPlatform }) => isInline testId="model-serving-platform-button" /> - {modalShown && isProjectModelMesh ? ( + {modalShown && isProjectModelMesh && !isNIM ? ( = ({ selectedPlatform }) => onClose={onSubmit} /> ) : null} - {modalShown && !isProjectModelMesh ? ( + {modalShown && !isProjectModelMesh && !isNIM ? ( = ({ selectedPlatform }) => onClose={onSubmit} /> ) : null} + {modalShown && isNIM ? ( + + getTemplateEnabledForPlatform(template, ServingRuntimePlatform.SINGLE), + )} + onClose={onSubmit} + /> + ) : null} ); }; diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx index a94235356c..8c1ed5659d 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx @@ -1,36 +1,67 @@ import * as React from 'react'; +import { useEffect, useState } from 'react'; import { Alert, Gallery, Stack, Text, TextContent } from '@patternfly/react-core'; import CollapsibleSection from '~/concepts/design/CollapsibleSection'; +import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; +import { isNIMAPIKeyEnabled } from '~/pages/modelServing/screens/projects/nimUtils'; +import { useDashboardNamespace } from '~/redux/selectors'; +import SelectNIMCard from './SelectNIMCard'; import SelectSingleModelCard from './SelectSingleModelCard'; import SelectMultiModelCard from './SelectMultiModelCard'; -const PlatformSelectSection: React.FC = () => ( - - - - - Select the type of model serving platform to be used when deploying models from this - project. - - - - - - - - - -); +const PlatformSelectSection: React.FC = () => { + const [isNIMAPIKeyValid, setIsNIMAPIKeyValid] = useState(false); + const isNIMModelServingAvailable = useIsAreaAvailable(SupportedArea.NIM_MODEL).status; + const dashboardNamespace = useDashboardNamespace(); + + useEffect(() => { + const checkAPIKey = async () => { + try { + const valid = await isNIMAPIKeyEnabled(dashboardNamespace); + setIsNIMAPIKeyValid(valid); + } catch (error) { + setIsNIMAPIKeyValid(false); + } + }; + checkAPIKey(); + }, [dashboardNamespace]); + + const galleryWidths = + isNIMModelServingAvailable && isNIMAPIKeyValid + ? { + minWidths: { default: '100%', lg: 'calc(33.33% - 1rem / 3 * 2)' }, + maxWidths: { default: '100%', lg: 'calc(33.33% - 1rem / 3 * 2)' }, + } + : { + minWidths: { default: '100%', lg: 'calc(50% - 1rem / 2)' }, + maxWidths: { default: '100%', lg: 'calc(50% - 1rem / 2)' }, + }; + + return ( + + + + + Select the type of model serving platform to be used when deploying models from this + project. + + + + + + {isNIMModelServingAvailable && } + + + + + ); +}; export default PlatformSelectSection; diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/SelectNIMCard.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/SelectNIMCard.tsx new file mode 100644 index 0000000000..e6353c9ca0 --- /dev/null +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/SelectNIMCard.tsx @@ -0,0 +1,28 @@ +import * as React from 'react'; +import { CardBody, Text, TextContent } from '@patternfly/react-core'; +import { ProjectObjectType, SectionType } from '~/concepts/design/utils'; +import OverviewCard from '~/pages/projects/screens/detail/overview/components/OverviewCard'; +import { ServingRuntimePlatform } from '~/types'; +import AddModelFooter from './AddModelFooter'; + +const SelectNIMCard: React.FC = () => ( + + + + + Models are deployed using NVIDIA NIM microservices. Choose this option when you want to + deploy your model within a NIM container. Please provide the API key to authenticate with + the NIM service. + + + + + +); + +export default SelectNIMCard; diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx index 8c424c770d..7f3d052c47 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx @@ -5,8 +5,8 @@ import { CardHeader, Flex, FlexItem, - TextContent, GalleryItem, + TextContent, TextList, TextListItem, TextListItemVariants, @@ -24,6 +24,8 @@ import InferenceServiceEndpoint from '~/pages/modelServing/screens/global/Infere import TypeBorderedCard from '~/concepts/design/TypeBorderedCard'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas/'; import { getDisplayNameFromK8sResource } from '~/concepts/k8s/utils'; +import { ProjectDetailsContext } from '~/pages/projects/ProjectDetailsContext'; +import { isNIMSupported } from '~/pages/modelServing/screens/projects/nimUtils'; interface DeployedModelCardProps { inferenceService: InferenceServiceKind; @@ -36,8 +38,11 @@ const DeployedModelCard: React.FC = ({ const [modelMetricsEnabled] = useModelMetricsEnabled(); const kserveMetricsEnabled = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; const modelMesh = isModelMesh(inferenceService); + const { currentProject } = React.useContext(ProjectDetailsContext); + const isKServeNIMEnabled = isNIMSupported(currentProject); - const modelMetricsSupported = modelMetricsEnabled && (modelMesh || kserveMetricsEnabled); + const modelMetricsSupported = + modelMetricsEnabled && (modelMesh || kserveMetricsEnabled) && !isKServeNIMEnabled; const inferenceServiceDisplayName = getDisplayNameFromK8sResource(inferenceService); diff --git a/frontend/src/pages/projects/types.ts b/frontend/src/pages/projects/types.ts index e866646607..1c892902fa 100644 --- a/frontend/src/pages/projects/types.ts +++ b/frontend/src/pages/projects/types.ts @@ -163,4 +163,8 @@ export enum NamespaceApplicationCase { * Upgrade an existing DSG project to work with model kserve. */ KSERVE_PROMOTION, + /** + * Nvidia NIMs run on KServe but have different requirements than regular models. + */ + KSERVE_NIM_PROMOTION, } diff --git a/manifests/common/crd/odhdashboardconfigs.opendatahub.io.crd.yaml b/manifests/common/crd/odhdashboardconfigs.opendatahub.io.crd.yaml index 6245c43081..d5fc487ed4 100644 --- a/manifests/common/crd/odhdashboardconfigs.opendatahub.io.crd.yaml +++ b/manifests/common/crd/odhdashboardconfigs.opendatahub.io.crd.yaml @@ -77,6 +77,8 @@ spec: type: boolean disableStorageClasses: type: boolean + disableNIMModelServing: + type: boolean groupsConfig: type: object required: diff --git a/manifests/rhoai/shared/odhdashboardconfig/odhdashboardconfig.yaml b/manifests/rhoai/shared/odhdashboardconfig/odhdashboardconfig.yaml index a2fd1ba9d8..79fe75e0bc 100644 --- a/manifests/rhoai/shared/odhdashboardconfig/odhdashboardconfig.yaml +++ b/manifests/rhoai/shared/odhdashboardconfig/odhdashboardconfig.yaml @@ -30,6 +30,7 @@ spec: disableModelRegistry: true disableConnectionTypes: true disableStorageClasses: true + disableNIMModelServing: true groupsConfig: adminGroups: "$(admin_groups)" allowedGroups: "system:authenticated"