From 5bdae2a2d12e896eee3c0c550ac8a3a548ef2469 Mon Sep 17 00:00:00 2001 From: Olga Lavtar Date: Wed, 4 Sep 2024 22:43:07 -0700 Subject: [PATCH] feat: integrating NIM model UI Signed-off-by: Olga Lavtar --- .../k8s/__tests__/inferenceServices.spec.ts | 12 +- frontend/src/api/k8s/inferenceServices.ts | 16 +- frontend/src/api/k8s/servingRuntimes.ts | 41 ++- .../projects/EmptyNIMModelServingCard.tsx | 102 ++++++ .../screens/projects/ModelServingPlatform.tsx | 7 +- .../NIMServiceModal/DeployNIMServiceModal.tsx | 327 ++++++++++++++++++ .../NIMModelDeploymentNameSection.tsx | 26 ++ .../NIMServiceModal/NIMModelListSection.tsx | 82 +++++ .../NIMServiceModal/NIMPVCSizeSection.tsx | 49 +++ .../modelServing/screens/projects/utils.ts | 113 +++++- .../src/pages/modelServing/screens/types.ts | 9 + .../overview/serverModels/AddModelFooter.tsx | 21 +- .../serverModels/PlatformSelectSection.tsx | 6 +- .../overview/serverModels/SelectNIMCard.tsx | 28 ++ 14 files changed, 815 insertions(+), 24 deletions(-) create mode 100644 frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx create mode 100644 frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx create mode 100644 frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx create mode 100644 frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx create mode 100644 frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection.tsx create mode 100644 frontend/src/pages/projects/screens/detail/overview/serverModels/SelectNIMCard.tsx diff --git a/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts b/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts index 0dda0c16a9..10f9786d98 100644 --- a/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts +++ b/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts @@ -1,9 +1,9 @@ import { - K8sStatus, k8sCreateResource, k8sDeleteResource, k8sGetResource, k8sListResource, + K8sStatus, k8sUpdateResource, } from '@openshift/dynamic-plugin-sdk-utils'; import { mockAcceleratorProfile } from '~/__mocks__/mockAcceleratorProfile'; @@ -28,7 +28,9 @@ import { InferenceServiceKind, ProjectKind } from '~/k8sTypes'; import { translateDisplayNameForK8s } from '~/concepts/k8s/utils'; import { ModelServingSize } from '~/pages/modelServing/screens/types'; import { AcceleratorProfileState } from '~/utilities/useAcceleratorProfileState'; -import { AcceleratorProfileSelectFieldState } from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; +import { + AcceleratorProfileSelectFieldState, +} from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; jest.mock('@openshift/dynamic-plugin-sdk-utils', () => ({ k8sListResource: jest.fn(), @@ -186,6 +188,7 @@ describe('assembleInferenceService', () => { undefined, false, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -218,6 +221,7 @@ describe('assembleInferenceService', () => { undefined, true, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -251,6 +255,7 @@ describe('assembleInferenceService', () => { undefined, false, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -279,6 +284,7 @@ describe('assembleInferenceService', () => { undefined, true, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -321,6 +327,7 @@ describe('assembleInferenceService', () => { undefined, false, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); @@ -373,6 +380,7 @@ describe('assembleInferenceService', () => { undefined, true, undefined, + undefined, acceleratorProfileState, selectedAcceleratorProfile, ); diff --git a/frontend/src/api/k8s/inferenceServices.ts b/frontend/src/api/k8s/inferenceServices.ts index 028688eda7..6bb825836c 100644 --- a/frontend/src/api/k8s/inferenceServices.ts +++ b/frontend/src/api/k8s/inferenceServices.ts @@ -4,8 +4,8 @@ import { k8sDeleteResource, k8sGetResource, k8sListResource, - k8sUpdateResource, K8sStatus, + k8sUpdateResource, } from '@openshift/dynamic-plugin-sdk-utils'; import { InferenceServiceModel } from '~/api/models'; import { InferenceServiceKind, K8sAPIOptions, KnownLabels } from '~/k8sTypes'; @@ -14,7 +14,9 @@ import { translateDisplayNameForK8s } from '~/concepts/k8s/utils'; import { applyK8sAPIOptions } from '~/api/apiMergeUtils'; import { AcceleratorProfileState } from '~/utilities/useAcceleratorProfileState'; import { ContainerResources } from '~/types'; -import { AcceleratorProfileSelectFieldState } from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; +import { + AcceleratorProfileSelectFieldState, +} from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; import { getModelServingProjects } from './projects'; import { assemblePodSpecOptions } from './utils'; @@ -24,6 +26,7 @@ export const assembleInferenceService = ( editName?: string, isModelMesh?: boolean, inferenceService?: InferenceServiceKind, + isStorageNeeded?: boolean, initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, ): InferenceServiceKind => { @@ -162,6 +165,11 @@ export const assembleInferenceService = ( }; } + // If storage is not needed, remove storage from the inference service + if (isStorageNeeded !== undefined && !isStorageNeeded) { + delete updateInferenceService.spec.predictor.model?.storage; + } + return updateInferenceService; }; @@ -234,6 +242,7 @@ export const createInferenceService = ( initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, dryRun = false, + isStorageNeeded?: boolean, ): Promise => { const inferenceService = assembleInferenceService( data, @@ -241,6 +250,7 @@ export const createInferenceService = ( undefined, isModelMesh, undefined, + isStorageNeeded, initialAcceleratorProfile, selectedAcceleratorProfile, ); @@ -263,6 +273,7 @@ export const updateInferenceService = ( initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, dryRun = false, + isStorageNeeded?: boolean, ): Promise => { const inferenceService = assembleInferenceService( data, @@ -270,6 +281,7 @@ export const updateInferenceService = ( existingData.metadata.name, isModelMesh, existingData, + isStorageNeeded, initialAcceleratorProfile, selectedAcceleratorProfile, ); diff --git a/frontend/src/api/k8s/servingRuntimes.ts b/frontend/src/api/k8s/servingRuntimes.ts index d00e5f2757..82a06ab219 100644 --- a/frontend/src/api/k8s/servingRuntimes.ts +++ b/frontend/src/api/k8s/servingRuntimes.ts @@ -7,19 +7,16 @@ import { k8sUpdateResource, } from '@openshift/dynamic-plugin-sdk-utils'; import { ServingRuntimeModel } from '~/api/models'; -import { - K8sAPIOptions, - ServingContainer, - ServingRuntimeAnnotations, - ServingRuntimeKind, -} from '~/k8sTypes'; -import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types'; +import { K8sAPIOptions, ServingContainer, ServingRuntimeAnnotations, ServingRuntimeKind } from '~/k8sTypes'; +import { CreatingServingRuntimeObject, SupportedModelFormatsInfo } from '~/pages/modelServing/screens/types'; import { ContainerResources } from '~/types'; import { getModelServingRuntimeName } from '~/pages/modelServing/utils'; import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils'; import { applyK8sAPIOptions } from '~/api/apiMergeUtils'; import { AcceleratorProfileState } from '~/utilities/useAcceleratorProfileState'; -import { AcceleratorProfileSelectFieldState } from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; +import { + AcceleratorProfileSelectFieldState, +} from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; import { getModelServingProjects } from './projects'; import { assemblePodSpecOptions, getshmVolume, getshmVolumeMount } from './utils'; @@ -33,7 +30,15 @@ export const assembleServingRuntime = ( selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, isModelMesh?: boolean, ): ServingRuntimeKind => { - const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth } = data; + const { + name: displayName, + numReplicas, + modelSize, + externalRoute, + tokenAuth, + imageName, + supportedModelFormatsInfo, + } = data; const createName = isCustomServingRuntimesEnabled ? translateDisplayNameForK8s(displayName) : getModelServingRuntimeName(namespace); @@ -123,7 +128,12 @@ export const assembleServingRuntime = ( volumeMounts.push(getshmVolumeMount()); } - const containerWithoutResources = _.omit(container, 'resources'); + const updatedContainer = { + ...container, + ...(imageName && { image: imageName }), + }; + + const containerWithoutResources = _.omit(updatedContainer, 'resources'); return { ...containerWithoutResources, @@ -134,6 +144,17 @@ export const assembleServingRuntime = ( }, ); + if (supportedModelFormatsInfo) { + const supportedModelFormatsObj: SupportedModelFormatsInfo = { + name: supportedModelFormatsInfo.name, + version: supportedModelFormatsInfo.version, + autoSelect: true, + priority: 1, + }; + + updatedServingRuntime.spec.supportedModelFormats = [supportedModelFormatsObj]; + } + if (isModelMesh) { updatedServingRuntime.spec.tolerations = tolerations; } diff --git a/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx b/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx new file mode 100644 index 0000000000..cdde236478 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/EmptyNIMModelServingCard.tsx @@ -0,0 +1,102 @@ +import * as React from 'react'; +import { + Bullseye, + Card, + CardBody, + CardFooter, + CardTitle, + Text, + TextContent, + TextVariants, +} from '@patternfly/react-core'; +import { ProjectDetailsContext } from '~/pages/projects/ProjectDetailsContext'; +import { ServingRuntimePlatform } from '~/types'; +import { + getSortedTemplates, + getTemplateEnabled, + getTemplateEnabledForPlatform, +} from '~/pages/modelServing/customServingRuntimes/utils'; +import ModelServingPlatformButtonAction from '~/pages/modelServing/screens/projects/ModelServingPlatformButtonAction'; +import DeployNIMServiceModal from './NIMServiceModal/DeployNIMServiceModal'; + +const EmptyNIMModelServingCard: React.FC = () => { + const { + dataConnections: { data: dataConnections }, + } = React.useContext(ProjectDetailsContext); + const [open, setOpen] = React.useState(false); + + const { + servingRuntimes: { refresh: refreshServingRuntime }, + servingRuntimeTemplates: [templates], + servingRuntimeTemplateOrder: { data: templateOrder }, + servingRuntimeTemplateDisablement: { data: templateDisablement }, + serverSecrets: { refresh: refreshTokens }, + inferenceServices: { refresh: refreshInferenceServices }, + currentProject, + } = React.useContext(ProjectDetailsContext); + + const onSubmit = (submit: boolean) => { + if (submit) { + refreshServingRuntime(); + refreshInferenceServices(); + setTimeout(refreshTokens, 500); // need a timeout to wait for tokens creation + } + }; + + const templatesSorted = getSortedTemplates(templates, templateOrder); + const templatesEnabled = templatesSorted.filter((template) => + getTemplateEnabled(template, templateDisablement), + ); + const emptyTemplates = templatesEnabled.length === 0; + + return ( + <> + + + + NVIDIA NIM model serving platform + + + + Models are deployed using NVIDIA NIM microservices. Choose this option when you want to + deploy your model within a NIM container. Please provide the API key to authenticate with + the NIM service. + + + + setOpen(true)} + variant="secondary" + testId="nim-serving-deploy-button" + /> + + + + + getTemplateEnabledForPlatform(template, ServingRuntimePlatform.SINGLE), + )} + onClose={(submit) => { + onSubmit(submit); + setOpen(false); + }} + /> + + ); +}; + +export default EmptyNIMModelServingCard; diff --git a/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx b/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx index 903862c2ca..aefd1f6d21 100644 --- a/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx +++ b/frontend/src/pages/modelServing/screens/projects/ModelServingPlatform.tsx @@ -23,7 +23,8 @@ import { } from '~/pages/modelServing/customServingRuntimes/utils'; import { ServingRuntimePlatform } from '~/types'; import { getProjectModelServingPlatform } from '~/pages/modelServing/screens/projects/utils'; -import KServeInferenceServiceTable from '~/pages/modelServing/screens/projects/KServeSection/KServeInferenceServiceTable'; +import KServeInferenceServiceTable + from '~/pages/modelServing/screens/projects/KServeSection/KServeInferenceServiceTable'; import useServingPlatformStatuses from '~/pages/modelServing/useServingPlatformStatuses'; import DashboardPopupIconButton from '~/concepts/dashboard/DashboardPopupIconButton'; import DetailsSection from '~/pages/projects/screens/detail/DetailsSection'; @@ -32,6 +33,7 @@ import EmptySingleModelServingCard from '~/pages/modelServing/screens/projects/E import EmptyMultiModelServingCard from '~/pages/modelServing/screens/projects/EmptyMultiModelServingCard'; import { ProjectObjectType, typedEmptyImage } from '~/concepts/design/utils'; import EmptyModelServingPlatform from '~/pages/modelServing/screens/projects/EmptyModelServingPlatform'; +import EmptyNIMModelServingCard from '~/pages/modelServing/screens/projects/EmptyNIMModelServingCard'; import ManageServingRuntimeModal from './ServingRuntimeModal/ManageServingRuntimeModal'; import ModelMeshServingRuntimeTable from './ModelMeshSection/ServingRuntimeTable'; import ModelServingPlatformButtonAction from './ModelServingPlatformButtonAction'; @@ -197,6 +199,9 @@ const ModelServingPlatform: React.FC = () => { + + + diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx new file mode 100644 index 0000000000..b305f6329d --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx @@ -0,0 +1,327 @@ +import * as React from 'react'; +import { Alert, AlertActionCloseButton, Form, Modal, Stack, StackItem } from '@patternfly/react-core'; +import { EitherOrNone } from '@openshift/dynamic-plugin-sdk'; +import { + createNIMPVC, + createNIMSecret, + getSubmitInferenceServiceResourceFn, + getSubmitServingRuntimeResourcesFn, + useCreateInferenceServiceObject, + useCreateServingRuntimeObject, +} from '~/pages/modelServing/screens/projects/utils'; +import { + AccessReviewResourceAttributes, + InferenceServiceKind, + ProjectKind, + SecretKind, + TemplateKind, +} from '~/k8sTypes'; +import { requestsUnderLimits, resourcesArePositive } from '~/pages/modelServing/utils'; +import useCustomServingRuntimesEnabled + from '~/pages/modelServing/customServingRuntimes/useCustomServingRuntimesEnabled'; +import { getServingRuntimeFromName } from '~/pages/modelServing/customServingRuntimes/utils'; +import useServingAcceleratorProfile from '~/pages/modelServing/screens/projects/useServingAcceleratorProfile'; +import DashboardModalFooter from '~/concepts/dashboard/DashboardModalFooter'; +import { ServingRuntimeEditInfo } from '~/pages/modelServing/screens/types'; +import ServingRuntimeSizeSection + from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ServingRuntimeSizeSection'; +import NIMModelListSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection'; +import NIMModelDeploymentNameSection + from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection'; +import ProjectSection from '~/pages/modelServing/screens/projects/InferenceServiceModal/ProjectSection'; +import { DataConnection, NamespaceApplicationCase } from '~/pages/projects/types'; +import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils'; +import { useAccessReview } from '~/api'; +import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; +import KServeAutoscalerReplicaSection + from '~/pages/modelServing/screens/projects/kServeModal/KServeAutoscalerReplicaSection'; +import useGenericObjectState from '~/utilities/useGenericObjectState'; +import { + AcceleratorProfileSelectFieldState, +} from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; +import NIMPVCSizeSection from '~/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection'; +import { useDashboardNamespace } from '~/redux/selectors'; + +const NIM_SECRET_NAME = 'nvidia-nim-secrets'; +const NIM_NGC_SECRET_NAME = 'ngc-secret'; +const NIM_PVC_NAME = 'nim-pvc'; + +const accessReviewResource: AccessReviewResourceAttributes = { + group: 'rbac.authorization.k8s.io', + resource: 'rolebindings', + verb: 'create', +}; + +type DeployNIMServiceModalProps = { + isOpen: boolean; + onClose: (submit: boolean) => void; + servingRuntimeTemplates?: TemplateKind[]; +} & EitherOrNone< + { + projectContext?: { + currentProject: ProjectKind; + dataConnections: DataConnection[]; + }; + }, + { + editInfo?: { + servingRuntimeEditInfo?: ServingRuntimeEditInfo; + inferenceServiceEditInfo?: InferenceServiceKind; + secrets?: SecretKind[]; + }; + } +>; + +const DeployNIMServiceModal: React.FC = ({ + isOpen, + onClose, + servingRuntimeTemplates, + projectContext, + editInfo, +}) => { + const [createDataServingRuntime, setCreateDataServingRuntime, resetDataServingRuntime, sizes] = + useCreateServingRuntimeObject(editInfo?.servingRuntimeEditInfo); + const [createDataInferenceService, setCreateDataInferenceService, resetDataInferenceService] = + useCreateInferenceServiceObject( + editInfo?.inferenceServiceEditInfo, + editInfo?.servingRuntimeEditInfo?.servingRuntime, + editInfo?.secrets, + ); + + const isAuthorinoEnabled = useIsAreaAvailable(SupportedArea.K_SERVE_AUTH).status; + const currentProjectName = projectContext?.currentProject.metadata.name; + const namespace = currentProjectName || createDataInferenceService.project; + const isInferenceServiceNameWithinLimit = + translateDisplayNameForK8s(createDataInferenceService.name).length <= 253; + + const acceleratorProfileState = useServingAcceleratorProfile( + editInfo?.servingRuntimeEditInfo?.servingRuntime, + editInfo?.inferenceServiceEditInfo, + ); + const [ + selectedAcceleratorProfile, + setSelectedAcceleratorProfile, + resetSelectedAcceleratorProfile, + ] = useGenericObjectState({ + profile: undefined, + count: 0, + useExistingSettings: false, + }); + const customServingRuntimesEnabled = useCustomServingRuntimesEnabled(); + const [allowCreate] = useAccessReview({ + ...accessReviewResource, + namespace, + }); + + const [actionInProgress, setActionInProgress] = React.useState(false); + const [error, setError] = React.useState(); + const [alertVisible, setAlertVisible] = React.useState(true); + const [pvcSize, setPvcSize] = React.useState(''); + const { dashboardNamespace } = useDashboardNamespace(); + + React.useEffect(() => { + if (currentProjectName && isOpen) { + setCreateDataInferenceService('project', currentProjectName); + } + }, [currentProjectName, setCreateDataInferenceService, isOpen]); + + // Serving Runtime Validation + const isDisabledServingRuntime = + namespace === '' || actionInProgress || createDataServingRuntime.imageName === undefined; + + const baseInputValueValid = + createDataServingRuntime.numReplicas >= 0 && + resourcesArePositive(createDataServingRuntime.modelSize.resources) && + requestsUnderLimits(createDataServingRuntime.modelSize.resources); + + const isDisabledInferenceService = + actionInProgress || + createDataInferenceService.name.trim() === '' || + createDataInferenceService.project === '' || + !isInferenceServiceNameWithinLimit || + !baseInputValueValid; + + const servingRuntimeSelected = React.useMemo( + () => + editInfo?.servingRuntimeEditInfo?.servingRuntime || + getServingRuntimeFromName('nvidia-nim-runtime', servingRuntimeTemplates), + [editInfo, servingRuntimeTemplates], + ); + + const onBeforeClose = (submitted: boolean) => { + onClose(submitted); + setError(undefined); + setActionInProgress(false); + resetDataServingRuntime(); + resetDataInferenceService(); + resetSelectedAcceleratorProfile(); + setAlertVisible(true); + }; + + const setErrorModal = (e: Error) => { + setError(e); + setActionInProgress(false); + }; + + const onSuccess = () => { + setActionInProgress(false); + onBeforeClose(true); + }; + + const submit = () => { + setError(undefined); + setActionInProgress(true); + + const servingRuntimeName = + editInfo?.inferenceServiceEditInfo?.spec.predictor.model?.runtime || + translateDisplayNameForK8s(createDataInferenceService.name); + + const submitServingRuntimeResources = getSubmitServingRuntimeResourcesFn( + servingRuntimeSelected, + createDataServingRuntime, + customServingRuntimesEnabled, + namespace, + editInfo?.servingRuntimeEditInfo, + false, + acceleratorProfileState, + selectedAcceleratorProfile, + NamespaceApplicationCase.KSERVE_PROMOTION, + projectContext?.currentProject, + servingRuntimeName, + true, + ); + + const submitInferenceServiceResource = getSubmitInferenceServiceResourceFn( + createDataInferenceService, + editInfo?.inferenceServiceEditInfo, + servingRuntimeName, + false, + acceleratorProfileState, + selectedAcceleratorProfile, + allowCreate, + editInfo?.secrets, + false, + ); + + Promise.all([ + createNIMSecret(namespace, NIM_SECRET_NAME, false, false, dashboardNamespace), + createNIMSecret(namespace, NIM_NGC_SECRET_NAME, true, false, dashboardNamespace), + createNIMPVC(namespace, NIM_PVC_NAME, pvcSize, false), + submitServingRuntimeResources({ dryRun: true }), + submitInferenceServiceResource({ dryRun: true }), + ]) + .then(() => + Promise.all([ + submitServingRuntimeResources({ dryRun: false }), + submitInferenceServiceResource({ dryRun: false }), + ]), + ) + .then(() => onSuccess()) + .catch((e) => { + setErrorModal(e); + }); + }; + + return ( + onBeforeClose(false)} + footer={ + onBeforeClose(false)} + isSubmitDisabled={isDisabledServingRuntime || isDisabledInferenceService} + error={error} + alertTitle="Error creating model server" + /> + } + showClose + > +
{ + e.preventDefault(); + submit(); + }} + > + + {!isAuthorinoEnabled && alertVisible && ( + + setAlertVisible(false)} />} + > +

+ The NVIDIA NIM model serving platform used by this project allows deployed models + to be accessible via external routes. It is recommended that token authentication + be enabled to protect these routes. The serving platform requires the Authorino + operator be installed on the cluster for token authentication. Contact a cluster + administrator to install the operator. +

+
+
+ )} + + + + + + + + + + + + + + + + + + + + +
+
+
+ ); +}; + +export default DeployNIMServiceModal; diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx new file mode 100644 index 0000000000..20f894ec1e --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelDeploymentNameSection.tsx @@ -0,0 +1,26 @@ +import * as React from 'react'; +import { FormGroup, TextInput } from '@patternfly/react-core'; +import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; +import { CreatingInferenceServiceObject } from '~/pages/modelServing/screens/types'; + +type NIMModelDeploymentNameSectionProps = { + data: CreatingInferenceServiceObject; + setData: UpdateObjectAtPropAndValue; +}; + +const NIMModelDeploymentNameSection: React.FC = ({ + data, + setData, +}) => ( + + setData('name', name)} + /> + +); + +export default NIMModelDeploymentNameSection; diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx new file mode 100644 index 0000000000..d5d5509007 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx @@ -0,0 +1,82 @@ +import * as React from 'react'; +import { useEffect, useState } from 'react'; +import { FormGroup } from '@patternfly/react-core'; +import { UpdateObjectAtPropAndValue } from '~/pages/projects/types'; +import { CreatingInferenceServiceObject, CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types'; +import SimpleSelect from '~/components/SimpleSelect'; +import { fetchNIMModelNames, ModelInfo } from '~/pages/modelServing/screens/projects/utils'; +import { useDashboardNamespace } from '~/redux/selectors'; + +type NIMModelListSectionProps = { + inferenceServiceData: CreatingInferenceServiceObject; + setInferenceServiceData: UpdateObjectAtPropAndValue; + setServingRuntimeData: UpdateObjectAtPropAndValue; + isEditing?: boolean; +}; + +const NIMModelListSection: React.FC = ({ + inferenceServiceData, + setInferenceServiceData, + setServingRuntimeData, + isEditing, +}) => { + const [options, setOptions] = useState<{ key: string; label: string }[]>([]); + const [modelList, setModelList] = useState([]); + const { dashboardNamespace } = useDashboardNamespace(); + + useEffect(() => { + const getModelNames = async () => { + const modelInfos = await fetchNIMModelNames(dashboardNamespace); + if (modelInfos !== undefined) { + const fetchedOptions = modelInfos.map((modelInfo) => ({ + key: modelInfo.name, + label: `${modelInfo.displayName} - ${modelInfo.latestTag}`, + })); + setModelList(modelInfos); + setOptions(fetchedOptions); + } + }; + getModelNames(); + }, [dashboardNamespace]); + + const getSupportedModelFormatsInfo = (name: string) => { + const modelInfo = modelList.find((model) => model.name === name); + if (modelInfo) { + return { + name: modelInfo.name, + version: modelInfo.latestTag, + }; + } + return { name: '', version: '' }; + }; + + const getNIMImageName = (name: string) => { + const imageInfo = modelList.find((model) => model.name === name); + if (imageInfo) { + return `nvcr.io/${imageInfo.namespace}/${name}:${imageInfo.latestTag}`; + } + return ''; + }; + + return ( + + { + setServingRuntimeData('supportedModelFormatsInfo', getSupportedModelFormatsInfo(name)); + setServingRuntimeData('imageName', getNIMImageName(name)); + setInferenceServiceData('format', { name }); + }} + /> + + ); +}; + +export default NIMModelListSection; diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection.tsx new file mode 100644 index 0000000000..2408bb9835 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMPVCSizeSection.tsx @@ -0,0 +1,49 @@ +import React, { useState } from 'react'; +import { FormGroup, HelperText, HelperTextItem, TextInput } from '@patternfly/react-core'; + +interface PVCSizeSectionProps { + pvcSize: string; + setPvcSize: (value: string) => void; +} + +const PVCSizeSection: React.FC = ({ pvcSize, setPvcSize }) => { + const [error, setError] = useState(''); + + const handlePVCSizeChange = (value: string) => { + let errorMessage = ''; + if (value.length === 0) { + errorMessage = 'PVC Size is required'; + } else if (!/^\d+(Gi|Mi|Ti)$/.test(value)) { + errorMessage = 'Invalid format. Use numbers followed by Gi, Mi, or Ti (e.g., 10Gi)'; + } + setError(errorMessage); + setPvcSize(value); + }; + + return ( + + handlePVCSizeChange(value)} + placeholder="e.g., 10Gi" + aria-label="pvc-size-input" + /> + + + Specify the size of the cluster storage instance that will be created to store the + downloaded NVIDIA NIM. + + + {error && ( + + {error} + + )} + + ); +}; + +export default PVCSizeSection; diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts index b97c4059a0..b0da49d2a6 100644 --- a/frontend/src/pages/modelServing/screens/projects/utils.ts +++ b/frontend/src/pages/modelServing/screens/projects/utils.ts @@ -3,6 +3,7 @@ import { DashboardConfigKind, InferenceServiceKind, KnownLabels, + PersistentVolumeClaimKind, ProjectKind, SecretKind, ServingRuntimeKind, @@ -13,10 +14,10 @@ import { CreatingInferenceServiceObject, CreatingServingRuntimeObject, InferenceServiceStorageType, + LabeledDataConnection, + ModelServingSize, ServingPlatformStatuses, ServingRuntimeEditInfo, - ModelServingSize, - LabeledDataConnection, } from '~/pages/modelServing/screens/types'; import { ServingRuntimePlatform } from '~/types'; import { DEFAULT_MODEL_SERVER_SIZES } from '~/pages/modelServing/screens/const'; @@ -36,15 +37,24 @@ import { addSupportServingPlatformProject, assembleSecret, createInferenceService, + createPvc, createSecret, createServingRuntime, + getConfigMap, + getSecret, updateInferenceService, updateServingRuntime, } from '~/api'; import { isDataConnectionAWS } from '~/pages/projects/screens/detail/data-connections/utils'; import { removeLeadingSlash } from '~/utilities/string'; import { RegisteredModelDeployInfo } from '~/pages/modelRegistry/screens/RegisteredModels/useRegisteredModelDeployInfo'; -import { AcceleratorProfileSelectFieldState } from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; +import { + AcceleratorProfileSelectFieldState, +} from '~/pages/notebookController/screens/server/AcceleratorProfileSelectField'; + +const NIM_CONFIGMAP_NAME = 'nvidia-nim-images-data'; +const NIM_SECRET_NAME = 'nvidia-nim-access'; +const NIM_NGC_SECRET_NAME = 'nvidia-nim-image-pull'; export const getServingRuntimeSizes = (config: DashboardConfigKind): ModelServingSize[] => { let sizes = config.spec.modelServerSizes || []; @@ -318,6 +328,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile?: AcceleratorProfileState, selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, dryRun = false, + isStorageNeeded?: boolean, ) => { if (!existingStorage) { return createAWSSecret(inferenceServiceData, dryRun).then((secret) => @@ -330,6 +341,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ) : createInferenceService( inferenceServiceData, @@ -338,6 +350,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ), ); } @@ -350,6 +363,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ) : createInferenceService( inferenceServiceData, @@ -358,6 +372,7 @@ const createInferenceServiceAndDataConnection = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ); }; @@ -370,6 +385,7 @@ export const getSubmitInferenceServiceResourceFn = ( selectedAcceleratorProfile?: AcceleratorProfileSelectFieldState, allowCreate?: boolean, secrets?: SecretKind[], + isStorageNeeded?: boolean, ): ((opts: { dryRun?: boolean }) => Promise) => { const inferenceServiceData = { ...createData, @@ -399,6 +415,7 @@ export const getSubmitInferenceServiceResourceFn = ( initialAcceleratorProfile, selectedAcceleratorProfile, dryRun, + isStorageNeeded, ).then((inferenceService) => setUpTokenAuth( createData, @@ -553,6 +570,96 @@ export const filterOutConnectionsWithoutBucket = ( obj.dataConnection.data.data.AWS_S3_BUCKET.trim() !== '', ); +export interface ModelInfo { + name: string; + displayName: string; + shortDescription: string; + namespace: string; + tags: string[]; + latestTag: string; + updatedDate: string; +} + +export const fetchNIMModelNames = async ( + dashboardNamespace: string, +): Promise => { + const configMap = await getConfigMap(dashboardNamespace, NIM_CONFIGMAP_NAME); + if (configMap.data && Object.keys(configMap.data).length > 0) { + const modelInfos: ModelInfo[] = Object.entries(configMap.data).map(([key, value]) => { + const modelData = JSON.parse(value); + return { + name: key, + displayName: modelData.displayName, + shortDescription: modelData.shortDescription, + namespace: modelData.namespace, + tags: modelData.tags, + latestTag: modelData.latestTag, + updatedDate: modelData.updatedDate, + }; + }); + return modelInfos; + } + return undefined; +}; + +export const createNIMSecret = async ( + projectName: string, + secretName: string, + isNGC: boolean, + dryRun: boolean, + dashboardNamespace: string, +): Promise => { + const labels: Record = { + [KnownLabels.DASHBOARD_RESOURCE]: 'true', + }; + const data: Record = {}; + const newSecret = { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: secretName, + namespace: projectName, + // labels, + }, + data, + type: isNGC ? 'kubernetes.io/dockerconfigjson' : 'Opaque', + }; + const nimSecretData: SecretKind = isNGC + ? await getSecret(dashboardNamespace, NIM_NGC_SECRET_NAME) + : await getSecret(dashboardNamespace, NIM_SECRET_NAME); + + if (nimSecretData.data) { + if (!isNGC) { + data.NGC_API_KEY = nimSecretData.data.api_key; + } else { + data['.dockerconfigjson'] = nimSecretData.data['.dockerconfigjson']; + } + return createSecret(newSecret, { dryRun }); + } + + return Promise.reject(new Error(`Error creating NIM ${isNGC ? 'NGC' : null} secret`)); +}; + +export const createNIMPVC = ( + projectName: string, + pvcName: string, + pvcSize: string, + dryRun: boolean, +): Promise => + createPvc( + { + nameDesc: { + name: pvcName, + description: '', + }, + size: pvcSize, + }, + projectName, + undefined, + { + dryRun, + }, + ); export const getCreateInferenceServiceLabels = ( data: Pick | undefined, ): { labels: Record } | undefined => { diff --git a/frontend/src/pages/modelServing/screens/types.ts b/frontend/src/pages/modelServing/screens/types.ts index 07fea3daf4..f443d593de 100644 --- a/frontend/src/pages/modelServing/screens/types.ts +++ b/frontend/src/pages/modelServing/screens/types.ts @@ -33,6 +33,13 @@ export type ModelStatus = { failedToSchedule: boolean; }; +export type SupportedModelFormatsInfo = { + name: string; + version: string; + autoSelect?: boolean; + priority?: number; +}; + export type CreatingServingRuntimeObject = { name: string; servingRuntimeTemplateName: string; @@ -41,6 +48,8 @@ export type CreatingServingRuntimeObject = { externalRoute: boolean; tokenAuth: boolean; tokens: ServingRuntimeToken[]; + imageName?: string; + supportedModelFormatsInfo?: SupportedModelFormatsInfo; }; export type ServingRuntimeToken = { diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx index fe5498c9b8..8135428dea 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/AddModelFooter.tsx @@ -10,14 +10,17 @@ import { getTemplateEnabledForPlatform, } from '~/pages/modelServing/customServingRuntimes/utils'; import { getProjectModelServingPlatform } from '~/pages/modelServing/screens/projects/utils'; -import ManageServingRuntimeModal from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ManageServingRuntimeModal'; +import ManageServingRuntimeModal + from '~/pages/modelServing/screens/projects/ServingRuntimeModal/ManageServingRuntimeModal'; import ManageKServeModal from '~/pages/modelServing/screens/projects/kServeModal/ManageKServeModal'; +import DeployNIMServiceModal from '~/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal'; type AddModelFooterProps = { selectedPlatform?: ServingRuntimePlatform; + isNIM?: boolean; }; -const AddModelFooter: React.FC = ({ selectedPlatform }) => { +const AddModelFooter: React.FC = ({ selectedPlatform, isNIM }) => { const [modalShown, setModalShown] = React.useState(false); const servingPlatformStatuses = useServingPlatformStatuses(); @@ -67,7 +70,7 @@ const AddModelFooter: React.FC = ({ selectedPlatform }) => isInline testId="model-serving-platform-button" /> - {modalShown && isProjectModelMesh ? ( + {modalShown && isProjectModelMesh && !isNIM ? ( = ({ selectedPlatform }) => onClose={onSubmit} /> ) : null} - {modalShown && !isProjectModelMesh ? ( + {modalShown && !isProjectModelMesh && !isNIM ? ( = ({ selectedPlatform }) => onClose={onSubmit} /> ) : null} + {modalShown && isNIM ? ( + + getTemplateEnabledForPlatform(template, ServingRuntimePlatform.SINGLE), + )} + onClose={onSubmit} + /> + ) : null} ); }; diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx index a94235356c..08c76f175e 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/PlatformSelectSection.tsx @@ -1,6 +1,7 @@ import * as React from 'react'; import { Alert, Gallery, Stack, Text, TextContent } from '@patternfly/react-core'; import CollapsibleSection from '~/concepts/design/CollapsibleSection'; +import SelectNIMCard from './SelectNIMCard'; import SelectSingleModelCard from './SelectSingleModelCard'; import SelectMultiModelCard from './SelectMultiModelCard'; @@ -18,11 +19,12 @@ const PlatformSelectSection: React.FC = () => ( + ( + + + + + Models are deployed using NVIDIA NIM microservices. Choose this option when you want to + deploy your model within a NIM container. Please provide the API key to authenticate with + the NIM service. + + + + + +); + +export default SelectNIMCard;