Skip to content

Commit 48a64e3

Browse files
authored
[CSI] trigger StopEndpoint if StartEndpoint has failed with GRPC Timeout error (#2802)
1 parent 0e6e9e7 commit 48a64e3

File tree

2 files changed

+214
-1
lines changed

2 files changed

+214
-1
lines changed

cloud/blockstore/tools/csi_driver/internal/driver/node.go

+44-1
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,11 @@ func (s *nodeService) nodePublishDiskAsVhostSocket(
492492
})
493493

494494
if err != nil {
495+
if s.IsGrpcTimeoutError(err) {
496+
s.nbsClient.StopEndpoint(ctx, &nbsapi.TStopEndpointRequest{
497+
UnixSocketPath: filepath.Join(endpointDir, nbsSocketName),
498+
})
499+
}
495500
return fmt.Errorf("failed to start NBS endpoint: %w", err)
496501
}
497502

@@ -582,6 +587,11 @@ func (s *nodeService) nodeStageDiskAsVhostSocket(
582587
})
583588

584589
if err != nil {
590+
if s.IsGrpcTimeoutError(err) {
591+
s.nbsClient.StopEndpoint(ctx, &nbsapi.TStopEndpointRequest{
592+
UnixSocketPath: filepath.Join(endpointDir, nbsSocketName),
593+
})
594+
}
585595
return fmt.Errorf("failed to start NBS endpoint: %w", err)
586596
}
587597

@@ -716,6 +726,19 @@ func (s *nodeService) IsMountConflictError(err error) bool {
716726
return false
717727
}
718728

729+
func (s *nodeService) IsGrpcTimeoutError(err error) bool {
730+
if err != nil {
731+
var clientErr *nbsclient.ClientError
732+
if errors.As(err, &clientErr) {
733+
if clientErr.Code == nbsclient.E_GRPC_DEADLINE_EXCEEDED {
734+
return true
735+
}
736+
}
737+
}
738+
739+
return false
740+
}
741+
719742
func (s *nodeService) hasLocalEndpoint(
720743
ctx context.Context,
721744
diskId string) (bool, error) {
@@ -900,7 +923,7 @@ func (s *nodeService) startNbsEndpointForNBD(
900923
}
901924

902925
hostType := nbsapi.EHostType_HOST_TYPE_DEFAULT
903-
return s.nbsClient.StartEndpoint(ctx, &nbsapi.TStartEndpointRequest{
926+
resp, err := s.nbsClient.StartEndpoint(ctx, &nbsapi.TStartEndpointRequest{
904927
UnixSocketPath: filepath.Join(endpointDir, nbsSocketName),
905928
DiskId: diskId,
906929
InstanceId: nbsInstanceId,
@@ -918,6 +941,14 @@ func (s *nodeService) startNbsEndpointForNBD(
918941
HostType: &hostType,
919942
},
920943
})
944+
945+
if s.IsGrpcTimeoutError(err) {
946+
s.nbsClient.StopEndpoint(ctx, &nbsapi.TStopEndpointRequest{
947+
UnixSocketPath: filepath.Join(endpointDir, nbsSocketName),
948+
})
949+
}
950+
951+
return resp, err
921952
}
922953

923954
func (s *nodeService) getNfsClient(fileSystemId string) nfsclient.EndpointClientIface {
@@ -956,6 +987,12 @@ func (s *nodeService) nodePublishFileStoreAsVhostSocket(
956987
},
957988
})
958989
if err != nil {
990+
if s.IsGrpcTimeoutError(err) {
991+
s.nbsClient.StopEndpoint(ctx, &nbsapi.TStopEndpointRequest{
992+
UnixSocketPath: filepath.Join(endpointDir, nbsSocketName),
993+
})
994+
}
995+
959996
return fmt.Errorf("failed to start NFS endpoint: %w", err)
960997
}
961998

@@ -994,6 +1031,12 @@ func (s *nodeService) nodeStageFileStoreAsVhostSocket(
9941031
},
9951032
})
9961033
if err != nil {
1034+
if s.IsGrpcTimeoutError(err) {
1035+
s.nbsClient.StopEndpoint(ctx, &nbsapi.TStopEndpointRequest{
1036+
UnixSocketPath: filepath.Join(endpointDir, nbsSocketName),
1037+
})
1038+
}
1039+
9971040
return fmt.Errorf("failed to start NFS endpoint: %w", err)
9981041
}
9991042

cloud/blockstore/tools/csi_driver/internal/driver/node_test.go

+170
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package driver
44

55
import (
66
"context"
7+
"fmt"
78
"io/fs"
89
"os"
910
"os/exec"
@@ -16,6 +17,7 @@ import (
1617
"github.com/stretchr/testify/mock"
1718
"github.com/stretchr/testify/require"
1819
nbs "github.com/ydb-platform/nbs/cloud/blockstore/public/api/protos"
20+
nbsclient "github.com/ydb-platform/nbs/cloud/blockstore/public/sdk/go/client"
1921
"github.com/ydb-platform/nbs/cloud/blockstore/tools/csi_driver/internal/driver/mocks"
2022
csimounter "github.com/ydb-platform/nbs/cloud/blockstore/tools/csi_driver/internal/mounter"
2123
nfs "github.com/ydb-platform/nbs/cloud/filestore/public/api/protos"
@@ -937,3 +939,171 @@ func TestPublishDeviceWithReadWriteManyModeIsNotSupportedWithNBS(t *testing.T) {
937939
})
938940
require.Error(t, err)
939941
}
942+
943+
func TestGrpcTimeoutForIKubevirt(t *testing.T) {
944+
tempDir := t.TempDir()
945+
946+
nbsClient := mocks.NewNbsClientMock()
947+
nfsClient := mocks.NewNfsEndpointClientMock()
948+
nfsLocalClient := mocks.NewNfsEndpointClientMock()
949+
mounter := csimounter.NewMock()
950+
951+
ctx := context.Background()
952+
nodeId := "testNodeId"
953+
clientId := "testClientId"
954+
instanceId := "testInstanceId"
955+
actualClientId := "testClientId-" + instanceId
956+
diskId := "test-disk-id-42"
957+
deviceName := diskId
958+
volumeId := diskId + "#" + instanceId
959+
backend := "nbs"
960+
961+
stagingTargetPath := filepath.Join(tempDir, "testStagingTargetPath")
962+
socketsDir := filepath.Join(tempDir, "sockets")
963+
sourcePath := filepath.Join(socketsDir, instanceId, diskId)
964+
targetFsPathPattern := filepath.Join(tempDir, "pods/([a-z0-9-]+)/volumes/([a-z0-9-]+)/mount")
965+
nbsSocketPath := filepath.Join(sourcePath, "nbs.sock")
966+
967+
nodeService := newNodeService(
968+
nodeId,
969+
clientId,
970+
true,
971+
socketsDir,
972+
targetFsPathPattern,
973+
"",
974+
make(LocalFilestoreOverrideMap),
975+
nbsClient,
976+
nfsClient,
977+
nfsLocalClient,
978+
mounter,
979+
)
980+
981+
accessMode := csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER
982+
983+
volumeCapability := csi.VolumeCapability{
984+
AccessType: &csi.VolumeCapability_Mount{
985+
Mount: &csi.VolumeCapability_MountVolume{},
986+
},
987+
AccessMode: &csi.VolumeCapability_AccessMode{
988+
Mode: accessMode,
989+
},
990+
}
991+
992+
volumeContext := map[string]string{
993+
backendVolumeContextKey: backend,
994+
instanceIdKey: instanceId,
995+
}
996+
997+
hostType := nbs.EHostType_HOST_TYPE_DEFAULT
998+
grpcError := nbsclient.ClientError{Code: nbsclient.E_GRPC_DEADLINE_EXCEEDED}
999+
startEndpointError := fmt.Errorf("%w", grpcError)
1000+
nbsClient.On("StartEndpoint", ctx, &nbs.TStartEndpointRequest{
1001+
UnixSocketPath: nbsSocketPath,
1002+
DiskId: diskId,
1003+
InstanceId: instanceId,
1004+
ClientId: actualClientId,
1005+
DeviceName: deviceName,
1006+
IpcType: nbs.EClientIpcType_IPC_VHOST,
1007+
VhostQueuesCount: 8,
1008+
VolumeAccessMode: nbs.EVolumeAccessMode_VOLUME_ACCESS_READ_WRITE,
1009+
VolumeMountMode: nbs.EVolumeMountMode_VOLUME_MOUNT_LOCAL,
1010+
Persistent: true,
1011+
NbdDevice: &nbs.TStartEndpointRequest_UseFreeNbdDeviceFile{
1012+
false,
1013+
},
1014+
ClientProfile: &nbs.TClientProfile{
1015+
HostType: &hostType,
1016+
},
1017+
}).Once().Return(&nbs.TStartEndpointResponse{}, startEndpointError)
1018+
1019+
nbsClient.On("StopEndpoint", ctx, &nbs.TStopEndpointRequest{
1020+
UnixSocketPath: nbsSocketPath,
1021+
}).Once().Return(&nbs.TStopEndpointResponse{}, nil)
1022+
1023+
_, err := nodeService.NodeStageVolume(ctx, &csi.NodeStageVolumeRequest{
1024+
VolumeId: volumeId,
1025+
StagingTargetPath: stagingTargetPath,
1026+
VolumeCapability: &volumeCapability,
1027+
VolumeContext: volumeContext,
1028+
})
1029+
require.Error(t, err)
1030+
}
1031+
1032+
func TestGrpcTimeoutForInfrakuber(t *testing.T) {
1033+
tempDir := t.TempDir()
1034+
1035+
nbsClient := mocks.NewNbsClientMock()
1036+
mounter := csimounter.NewMock()
1037+
1038+
ipcType := nbs.EClientIpcType_IPC_NBD
1039+
nbdDeviceFile := filepath.Join(tempDir, "dev", "nbd3")
1040+
err := os.MkdirAll(nbdDeviceFile, fs.FileMode(0755))
1041+
require.NoError(t, err)
1042+
1043+
ctx := context.Background()
1044+
nodeId := "testNodeId"
1045+
clientId := "testClientId"
1046+
diskId := "test-disk-id-42"
1047+
actualClientId := "testClientId-testNodeId"
1048+
targetFsPathPattern := filepath.Join(tempDir, "pods/([a-z0-9-]+)/volumes/([a-z0-9-]+)/mount")
1049+
stagingTargetPath := "testStagingTargetPath"
1050+
socketsDir := filepath.Join(tempDir, "sockets")
1051+
socketPath := filepath.Join(socketsDir, diskId, "nbs.sock")
1052+
1053+
nodeService := newNodeService(
1054+
nodeId,
1055+
clientId,
1056+
false,
1057+
socketsDir,
1058+
targetFsPathPattern,
1059+
"",
1060+
make(LocalFilestoreOverrideMap),
1061+
nbsClient,
1062+
nil,
1063+
nil,
1064+
mounter,
1065+
)
1066+
1067+
volumeCapability := csi.VolumeCapability{
1068+
AccessType: &csi.VolumeCapability_Mount{},
1069+
AccessMode: &csi.VolumeCapability_AccessMode{
1070+
Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
1071+
},
1072+
}
1073+
1074+
volumeContext := map[string]string{}
1075+
1076+
hostType := nbs.EHostType_HOST_TYPE_DEFAULT
1077+
grpcError := nbsclient.ClientError{Code: nbsclient.E_GRPC_DEADLINE_EXCEEDED}
1078+
startEndpointError := fmt.Errorf("%w", grpcError)
1079+
nbsClient.On("StartEndpoint", ctx, &nbs.TStartEndpointRequest{
1080+
UnixSocketPath: socketPath,
1081+
DiskId: diskId,
1082+
InstanceId: nodeId,
1083+
ClientId: actualClientId,
1084+
DeviceName: diskId,
1085+
IpcType: ipcType,
1086+
VhostQueuesCount: 8,
1087+
VolumeAccessMode: nbs.EVolumeAccessMode_VOLUME_ACCESS_READ_WRITE,
1088+
VolumeMountMode: nbs.EVolumeMountMode_VOLUME_MOUNT_LOCAL,
1089+
Persistent: true,
1090+
NbdDevice: &nbs.TStartEndpointRequest_UseFreeNbdDeviceFile{
1091+
true,
1092+
},
1093+
ClientProfile: &nbs.TClientProfile{
1094+
HostType: &hostType,
1095+
},
1096+
}).Return(&nbs.TStartEndpointResponse{}, startEndpointError)
1097+
1098+
nbsClient.On("StopEndpoint", ctx, &nbs.TStopEndpointRequest{
1099+
UnixSocketPath: socketPath,
1100+
}).Return(&nbs.TStopEndpointResponse{}, nil)
1101+
1102+
_, err = nodeService.NodeStageVolume(ctx, &csi.NodeStageVolumeRequest{
1103+
VolumeId: diskId,
1104+
StagingTargetPath: stagingTargetPath,
1105+
VolumeCapability: &volumeCapability,
1106+
VolumeContext: volumeContext,
1107+
})
1108+
require.Error(t, err)
1109+
}

0 commit comments

Comments
 (0)