Skip to content

Commit 5f46ca8

Browse files
authored
Nexus handler error translation (#1626)
* Nexus handler error translation * Address review comments
1 parent 0fc83e9 commit 5f46ca8

File tree

2 files changed

+134
-5
lines changed

2 files changed

+134
-5
lines changed

internal/internal_nexus_task_handler.go

+67-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ import (
3737
"go.temporal.io/api/common/v1"
3838
nexuspb "go.temporal.io/api/nexus/v1"
3939
"go.temporal.io/api/workflowservice/v1"
40+
"google.golang.org/grpc/codes"
41+
"google.golang.org/grpc/status"
4042

4143
"go.temporal.io/sdk/converter"
4244
"go.temporal.io/sdk/internal/common/metrics"
@@ -231,6 +233,7 @@ func (h *nexusTaskHandler) handleStartOperation(
231233
},
232234
}, nil, nil
233235
}
236+
err = convertKnownErrors(err)
234237
var handlerErr *nexus.HandlerError
235238
if errors.As(err, &handlerErr) {
236239
return nil, nexusHandlerErrorToProto(handlerErr), nil
@@ -302,6 +305,7 @@ func (h *nexusTaskHandler) handleCancelOperation(ctx context.Context, nctx *Nexu
302305
return nil, nil, ctx.Err()
303306
}
304307
if err != nil {
308+
err = convertKnownErrors(err)
305309
var handlerErr *nexus.HandlerError
306310
if errors.As(err, &handlerErr) {
307311
return nil, nexusHandlerErrorToProto(handlerErr), nil
@@ -319,7 +323,7 @@ func (h *nexusTaskHandler) handleCancelOperation(ctx context.Context, nctx *Nexu
319323

320324
func (h *nexusTaskHandler) internalError(err error) *nexuspb.HandlerError {
321325
h.logger.Error("error processing nexus task", "error", err)
322-
return nexusHandlerError(nexus.HandlerErrorTypeInternal, "internal error")
326+
return nexusHandlerError(nexus.HandlerErrorTypeInternal, err.Error())
323327
}
324328

325329
func (h *nexusTaskHandler) goContextForTask(nctx *NexusOperationContext, header nexus.Header) (context.Context, context.CancelFunc, *nexuspb.HandlerError) {
@@ -416,3 +420,65 @@ func (p *payloadSerializer) Serialize(v any) (*nexus.Content, error) {
416420
}
417421

418422
var emptyReaderNopCloser = io.NopCloser(bytes.NewReader([]byte{}))
423+
424+
// convertKnownErrors converts known errors to corresponding Nexus HandlerError.
425+
func convertKnownErrors(err error) error {
426+
// Handle common errors returned from various client methods.
427+
if workflowErr, ok := err.(*WorkflowExecutionError); ok {
428+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeBadRequest, workflowErr.Error())
429+
}
430+
if queryRejectedErr, ok := err.(*QueryRejectedError); ok {
431+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeBadRequest, queryRejectedErr.Error())
432+
}
433+
434+
// Not using errors.As to be consistent ApplicationError checking with the rest of the SDK.
435+
if appErr, ok := err.(*ApplicationError); ok {
436+
if appErr.NonRetryable() {
437+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeBadRequest, appErr.Error())
438+
}
439+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeInternal, appErr.Error())
440+
}
441+
return convertServiceError(err)
442+
}
443+
444+
// convertServiceError converts a serviceerror into a Nexus HandlerError if possible.
445+
// If exposeDetails is true, the error message from the given error is exposed in the converted HandlerError, otherwise,
446+
// a default message with minimal information is attached to the returned error.
447+
// Roughly taken from https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto
448+
// and
449+
// https://github.com/grpc-ecosystem/grpc-gateway/blob/a7cf811e6ffabeaddcfb4ff65602c12671ff326e/runtime/errors.go#L56.
450+
func convertServiceError(err error) error {
451+
var st *status.Status
452+
453+
// Temporal serviceerrors have a Status() method.
454+
stGetter, ok := err.(interface{ Status() *status.Status })
455+
if !ok {
456+
// Not a serviceerror, passthrough.
457+
return err
458+
}
459+
460+
st = stGetter.Status()
461+
errMessage := err.Error()
462+
463+
switch st.Code() {
464+
case codes.AlreadyExists, codes.Canceled, codes.InvalidArgument, codes.FailedPrecondition, codes.OutOfRange:
465+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeBadRequest, errMessage)
466+
case codes.Aborted, codes.Unavailable:
467+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeUnavailable, errMessage)
468+
case codes.DataLoss, codes.Internal, codes.Unknown, codes.Unauthenticated, codes.PermissionDenied:
469+
// Note that codes.Unauthenticated, codes.PermissionDenied have Nexus error types but we convert to internal
470+
// because this is not a client auth error and happens when the handler fails to auth with Temporal and should
471+
// be considered retryable.
472+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeInternal, errMessage)
473+
case codes.NotFound:
474+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeNotFound, errMessage)
475+
case codes.ResourceExhausted:
476+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeResourceExhausted, errMessage)
477+
case codes.Unimplemented:
478+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeNotImplemented, errMessage)
479+
case codes.DeadlineExceeded:
480+
return nexus.HandlerErrorf(nexus.HandlerErrorTypeDownstreamTimeout, errMessage)
481+
}
482+
483+
return err
484+
}

test/nexus_test.go

+67-4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
historypb "go.temporal.io/api/history/v1"
4343
nexuspb "go.temporal.io/api/nexus/v1"
4444
"go.temporal.io/api/operatorservice/v1"
45+
"go.temporal.io/api/serviceerror"
4546

4647
"go.temporal.io/sdk/client"
4748
"go.temporal.io/sdk/interceptor"
@@ -182,10 +183,20 @@ var syncOp = temporalnexus.NewSyncOperation("sync-op", func(ctx context.Context,
182183
Message: "fail",
183184
},
184185
}
186+
case "fmt-errorf":
187+
return "", fmt.Errorf("arbitrary error message")
185188
case "handlererror":
186189
return "", nexus.HandlerErrorf(nexus.HandlerErrorTypeBadRequest, s)
190+
case "already-started":
191+
return "", serviceerror.NewWorkflowExecutionAlreadyStarted("faking workflow already started", "dont-care", "dont-care")
192+
case "retryable-application-error":
193+
return "", temporal.NewApplicationError("fake app error for test", "FakeTestError")
194+
case "non-retryable-application-error":
195+
return "", temporal.NewApplicationErrorWithOptions("fake app error for test", "FakeTestError", temporal.ApplicationErrorOptions{
196+
NonRetryable: true,
197+
})
187198
case "panic":
188-
panic("panic")
199+
panic("panic requested")
189200
}
190201
return "", nil
191202
})
@@ -213,9 +224,8 @@ func TestNexusSyncOperation(t *testing.T) {
213224

214225
w := worker.New(tc.client, tc.taskQueue, worker.Options{})
215226
service := nexus.NewService("test")
216-
require.NoError(t, service.Register(syncOp, workflowOp))
227+
require.NoError(t, service.Register(syncOp))
217228
w.RegisterNexusService(service)
218-
w.RegisterWorkflow(waitForCancelWorkflow)
219229
require.NoError(t, w.Start())
220230
t.Cleanup(w.Stop)
221231

@@ -248,6 +258,14 @@ func TestNexusSyncOperation(t *testing.T) {
248258
require.Equal(t, "fail", unsuccessfulOperationErr.Failure.Message)
249259
})
250260

261+
t.Run("fmt-errorf", func(t *testing.T) {
262+
tc.metricsHandler.Clear()
263+
_, err := nexus.ExecuteOperation(ctx, nc, syncOp, "fmt-errorf", nexus.ExecuteOperationOptions{})
264+
var unexpectedResponseErr *nexus.UnexpectedResponseError
265+
require.ErrorAs(t, err, &unexpectedResponseErr)
266+
require.Contains(t, unexpectedResponseErr.Message, `"500 Internal Server Error": arbitrary error message`)
267+
})
268+
251269
t.Run("handlererror", func(t *testing.T) {
252270
_, err := nexus.ExecuteOperation(ctx, nc, syncOp, "handlererror", nexus.ExecuteOperationOptions{})
253271
var unexpectedResponseErr *nexus.UnexpectedResponseError
@@ -263,12 +281,57 @@ func TestNexusSyncOperation(t *testing.T) {
263281
}, time.Second*3, time.Millisecond*100)
264282
})
265283

284+
t.Run("already-started", func(t *testing.T) {
285+
_, err := nexus.ExecuteOperation(ctx, nc, syncOp, "already-started", nexus.ExecuteOperationOptions{})
286+
var unexpectedResponseErr *nexus.UnexpectedResponseError
287+
require.ErrorAs(t, err, &unexpectedResponseErr)
288+
require.Equal(t, http.StatusBadRequest, unexpectedResponseErr.Response.StatusCode)
289+
require.Contains(t, unexpectedResponseErr.Message, `"400 Bad Request": faking workflow already started`)
290+
291+
require.EventuallyWithT(t, func(t *assert.CollectT) {
292+
tc.requireTimer(t, metrics.NexusTaskEndToEndLatency, service.Name, syncOp.Name())
293+
tc.requireTimer(t, metrics.NexusTaskScheduleToStartLatency, service.Name, syncOp.Name())
294+
tc.requireTimer(t, metrics.NexusTaskExecutionLatency, service.Name, syncOp.Name())
295+
tc.requireCounter(t, metrics.NexusTaskExecutionFailedCounter, service.Name, syncOp.Name())
296+
}, time.Second*3, time.Millisecond*100)
297+
})
298+
299+
t.Run("retryable-application-error", func(t *testing.T) {
300+
_, err := nexus.ExecuteOperation(ctx, nc, syncOp, "retryable-application-error", nexus.ExecuteOperationOptions{})
301+
var unexpectedResponseErr *nexus.UnexpectedResponseError
302+
require.ErrorAs(t, err, &unexpectedResponseErr)
303+
require.Equal(t, http.StatusInternalServerError, unexpectedResponseErr.Response.StatusCode)
304+
require.Contains(t, unexpectedResponseErr.Message, `"500 Internal Server Error": fake app error for test`)
305+
306+
require.EventuallyWithT(t, func(t *assert.CollectT) {
307+
tc.requireTimer(t, metrics.NexusTaskEndToEndLatency, service.Name, syncOp.Name())
308+
tc.requireTimer(t, metrics.NexusTaskScheduleToStartLatency, service.Name, syncOp.Name())
309+
tc.requireTimer(t, metrics.NexusTaskExecutionLatency, service.Name, syncOp.Name())
310+
tc.requireCounter(t, metrics.NexusTaskExecutionFailedCounter, service.Name, syncOp.Name())
311+
}, time.Second*3, time.Millisecond*100)
312+
})
313+
314+
t.Run("non-retryable-application-error", func(t *testing.T) {
315+
_, err := nexus.ExecuteOperation(ctx, nc, syncOp, "non-retryable-application-error", nexus.ExecuteOperationOptions{})
316+
var unexpectedResponseErr *nexus.UnexpectedResponseError
317+
require.ErrorAs(t, err, &unexpectedResponseErr)
318+
require.Equal(t, http.StatusBadRequest, unexpectedResponseErr.Response.StatusCode)
319+
require.Contains(t, unexpectedResponseErr.Message, `"400 Bad Request": fake app error for test`)
320+
321+
require.EventuallyWithT(t, func(t *assert.CollectT) {
322+
tc.requireTimer(t, metrics.NexusTaskEndToEndLatency, service.Name, syncOp.Name())
323+
tc.requireTimer(t, metrics.NexusTaskScheduleToStartLatency, service.Name, syncOp.Name())
324+
tc.requireTimer(t, metrics.NexusTaskExecutionLatency, service.Name, syncOp.Name())
325+
tc.requireCounter(t, metrics.NexusTaskExecutionFailedCounter, service.Name, syncOp.Name())
326+
}, time.Second*3, time.Millisecond*100)
327+
})
328+
266329
t.Run("panic", func(t *testing.T) {
267330
_, err := nexus.ExecuteOperation(ctx, nc, syncOp, "panic", nexus.ExecuteOperationOptions{})
268331
var unexpectedResponseErr *nexus.UnexpectedResponseError
269332
require.ErrorAs(t, err, &unexpectedResponseErr)
270333
require.Equal(t, 500, unexpectedResponseErr.Response.StatusCode)
271-
require.Contains(t, unexpectedResponseErr.Message, "internal error")
334+
require.Contains(t, unexpectedResponseErr.Message, "panic: panic requested")
272335
})
273336
}
274337

0 commit comments

Comments
 (0)