diff --git a/couscous.yml b/couscous.yml index 46da09db7..f9fe88622 100644 --- a/couscous.yml +++ b/couscous.yml @@ -114,6 +114,10 @@ menu: text: Databases url: /docs/environment/database.html title: Using a database from AWS Lambda + timeouts: + text: Timeouts + url: /docs/environment/timeouts.html + title: Configure and handle timeouts custom-domains: text: Custom domains url: /docs/environment/custom-domains.html diff --git a/docs/environment/database.md b/docs/environment/database.md index 631a5cb25..476827999 100644 --- a/docs/environment/database.md +++ b/docs/environment/database.md @@ -54,6 +54,8 @@ When possible, an alternative to NAT Gateways is to split the work done by a lam Finally, another free alternative to NAT Gateway is to access AWS services by creating "*private VPC endpoints*": this is possible for S3, API Gateway, [and more](https://docs.aws.amazon.com/en_pv/vpc/latest/userguide/vpc-endpoints-access.html). +Read more in the section about [timeouts](/docs/environment/timeouts.md). + ## Creating a database On the [RDS console](https://console.aws.amazon.com/rds/home): diff --git a/docs/environment/timeouts.md b/docs/environment/timeouts.md new file mode 100644 index 000000000..c5566028d --- /dev/null +++ b/docs/environment/timeouts.md @@ -0,0 +1,63 @@ +--- +title: Timeouts +current_menu: timeouts +introduction: Configure and handle timeouts. +--- + +When a Lambda function times out, it is like the power to the computer is suddenly +just turned off. This does not give the application a chance to shut down properly. +This leaves you without any logs and the problem could be hard to fix. + +To allow your application to shut down properly and write logs, Bref can throw an exception just before the Lambda times out. + +> Note, this feature is experimental and available since Bref 1.3. + +To enable this feature **in `php-XX` layers**, set the environment variable `BREF_FEATURE_TIMEOUT`: + +```yaml +provider: + environment: + BREF_FEATURE_TIMEOUT: 1 +``` + +To enable this feature **in `php-XX-fpm` layers**, call `Timeout::enableInFpm()` in your application. +For example in `index.php`: + +```php +if (isset($_SERVER['LAMBDA_TASK_ROOT'])) { + \Bref\Timeout\Timeout::enableInFpm(); +} +``` + +Whenever a timeout happens, a full stack trace will be logged, including the line that was executing. + +In most cases, it is an external call to a database, cache or API that is stuck waiting. +If you are using a RDS database, [you are encouraged to read this section](database.md#accessing-the-internet). + +## Catching the exception + +You can catch the timeout exception to perform some cleanup, logs or even display a proper error page. + +In `php-XX-fpm` layers, most frameworks will catch the `LambdaTimeout` exception automatically (like any other error). + +In `php-XX` layers, you can catch it in your handlers. For example: + +```php +use Bref\Context\Context; +use Bref\Timeout\LambdaTimeout; + +class Handler implements \Bref\Event\Handler +{ + public function handle($event, Context $context) + { + try { + // your code here + // ... + } catch (LambdaTimeout $e) { + echo 'Oops, sorry. We spent too much time on this.'; + } catch (\Throwable $e) { + echo 'Some other unexpected error happened.'; + } + } +} +``` diff --git a/runtime/layers/fpm/bootstrap b/runtime/layers/fpm/bootstrap index 3b9bdf4f1..36647b074 100755 --- a/runtime/layers/fpm/bootstrap +++ b/runtime/layers/fpm/bootstrap @@ -7,6 +7,15 @@ use Bref\Runtime\LambdaRuntime; ini_set('display_errors', '1'); error_reporting(E_ALL); +/** + * Disable the Bref timeout feature in the main bootstrap process. + * Indeed it doesn't make sense here because the application code runs in the + * FPM worker process, not in this process (the bootstrap process). We don't want to + * terminate the bootstrap process while leaving the FPM worker running. + * That's why we force `BREF_FEATURE_TIMEOUT` to be disabled. + */ +putenv('BREF_FEATURE_TIMEOUT=0'); + $appRoot = getenv('LAMBDA_TASK_ROOT'); if (getenv('BREF_DOWNLOAD_VENDOR')) { diff --git a/src/Runtime/LambdaRuntime.php b/src/Runtime/LambdaRuntime.php index a295e449d..f155af176 100755 --- a/src/Runtime/LambdaRuntime.php +++ b/src/Runtime/LambdaRuntime.php @@ -5,6 +5,7 @@ use Bref\Context\Context; use Bref\Context\ContextBuilder; use Bref\Event\Handler; +use Bref\Timeout\Timeout; use Exception; use Psr\Http\Server\RequestHandlerInterface; @@ -42,12 +43,15 @@ final class LambdaRuntime /** @var Invoker */ private $invoker; + /** @var bool */ + private $enableTimeout; + public static function fromEnvironmentVariable(): self { - return new self((string) getenv('AWS_LAMBDA_RUNTIME_API')); + return new self((string) getenv('AWS_LAMBDA_RUNTIME_API'), (bool) getenv('BREF_FEATURE_TIMEOUT')); } - public function __construct(string $apiUrl) + private function __construct(string $apiUrl, bool $enableTimeout) { if ($apiUrl === '') { die('At the moment lambdas can only be executed in an Lambda environment'); @@ -55,6 +59,7 @@ public function __construct(string $apiUrl) $this->apiUrl = $apiUrl; $this->invoker = new Invoker; + $this->enableTimeout = $enableTimeout; } public function __destruct() @@ -96,6 +101,12 @@ public function processNextEvent($handler): void [$event, $context] = $this->waitNextInvocation(); \assert($context instanceof Context); + $remainingTimeInMillis = $context->getRemainingTimeInMillis(); + if ($this->enableTimeout && 0 < $remainingTimeInMillis) { + // Throw exception one second before Lambda pulls the plug. + Timeout::enable($context->getRemainingTimeInMillis()); + } + $this->ping(); try { @@ -104,6 +115,8 @@ public function processNextEvent($handler): void $this->sendResponse($context->getAwsRequestId(), $result); } catch (\Throwable $e) { $this->signalFailure($context->getAwsRequestId(), $e); + } finally { + Timeout::reset(); } } diff --git a/src/Timeout/LambdaTimeout.php b/src/Timeout/LambdaTimeout.php new file mode 100644 index 000000000..d46a0f3c2 --- /dev/null +++ b/src/Timeout/LambdaTimeout.php @@ -0,0 +1,12 @@ +getTraceAsString()); + error_log('Original stack trace'); + error_log(Timeout::$stackTrace); + + exit(1); + } + + $exception = new LambdaTimeout('Maximum AWS Lambda execution time reached'); + Timeout::$stackTrace = $exception->getTraceAsString(); + + // Trigger another alarm after 1 second to do a hard exit. + pcntl_alarm(1); + + throw $exception; + }); + + self::$initialized = true; + } + + /** + * Cancel all current timeouts. + * + * @internal + */ + public static function reset(): void + { + if (self::$initialized) { + pcntl_alarm(0); + self::$stackTrace = null; + } + } +} diff --git a/tests/Runtime/LambdaRuntimeTest.php b/tests/Runtime/LambdaRuntimeTest.php index f72f0a56d..49cfc8360 100644 --- a/tests/Runtime/LambdaRuntimeTest.php +++ b/tests/Runtime/LambdaRuntimeTest.php @@ -15,6 +15,8 @@ use Bref\Event\Sqs\SqsHandler; use Bref\Runtime\LambdaRuntime; use Bref\Test\Server; +use Bref\Timeout\LambdaTimeout; +use Bref\Timeout\Timeout; use GuzzleHttp\Psr7\Response; use PHPUnit\Framework\TestCase; use Psr\Http\Message\ResponseInterface; @@ -35,13 +37,41 @@ protected function setUp(): void { ob_start(); Server::start(); - $this->runtime = new LambdaRuntime('localhost:8126'); + // Mock the API with our test server + putenv('AWS_LAMBDA_RUNTIME_API=localhost:8126'); + $this->runtime = LambdaRuntime::fromEnvironmentVariable(); } protected function tearDown(): void { Server::stop(); ob_end_clean(); + // Cleanup + putenv('AWS_LAMBDA_RUNTIME_API='); + putenv('BREF_FEATURE_TIMEOUT='); + } + + public function test Lambda timeouts can be anticipated() + { + // Re-create the runtime class with timeout prevention enabled + putenv('BREF_FEATURE_TIMEOUT=1'); + $this->runtime = LambdaRuntime::fromEnvironmentVariable(); + + $this->givenAnEvent([]); + + $start = microtime(true); + $this->runtime->processNextEvent(function () { + // This 10s sleep should be interrupted + sleep(10); + }); + + $elapsedTime = microtime(true) - $start; + // The Lambda timeout was 2 seconds, we expect the Bref timeout to trigger 1 second before that: 1 second + $this->assertEqualsWithDelta(1, $elapsedTime, 0.2); + Timeout::reset(); + + $this->assertInvocationErrorResult(LambdaTimeout::class, 'Maximum AWS Lambda execution time reached'); + $this->assertErrorInLogs(LambdaTimeout::class, 'Maximum AWS Lambda execution time reached'); } public function test basic behavior() @@ -348,6 +378,8 @@ private function givenAnEvent($event): void [ 'lambda-runtime-aws-request-id' => '1', 'lambda-runtime-invoked-function-arn' => 'test-function-name', + // now + 2 seconds + 'lambda-runtime-deadline-ms' => intval((microtime(true) + 2) * 1000), ], json_encode($event) ), diff --git a/tests/Server.php b/tests/Server.php index 1ecc7b6d6..87b17d575 100644 --- a/tests/Server.php +++ b/tests/Server.php @@ -140,6 +140,13 @@ public static function stop() if (self::$started) { self::getClient()->request('DELETE', 'guzzle-server'); } + $tries = 0; + while (self::isListening() && ++$tries < 5) { + usleep(100000); + } + if (self::isListening()) { + throw new \RuntimeException('Unable to stop node.js server'); + } self::$started = false; } @@ -159,11 +166,12 @@ public static function start() if (self::$started) { return; } - if (! self::isListening()) { - exec('node ' . __DIR__ . '/server.js ' - . self::$port . ' >> /tmp/server.log 2>&1 &'); - self::wait(); + if (self::isListening()) { + throw new \Exception('Server is already running'); } + exec('node ' . __DIR__ . '/server.js ' + . self::$port . ' >> /tmp/server.log 2>&1 &'); + self::wait(); self::$started = true; } diff --git a/tests/Timeout/TimeoutTest.php b/tests/Timeout/TimeoutTest.php new file mode 100644 index 000000000..ed44af498 --- /dev/null +++ b/tests/Timeout/TimeoutTest.php @@ -0,0 +1,68 @@ +assertSame(1, $timeout); + } + + public function test enable in FPM() + { + $_SERVER['LAMBDA_INVOCATION_CONTEXT'] = json_encode(['deadlineMs' => (time() + 30) * 1000]); + + Timeout::enableInFpm(); + $timeout = pcntl_alarm(0); + $this->assertEqualsWithDelta(28, $timeout, 1); + } + + public function test enable in FPM requires the context() + { + $this->expectException(\LogicException::class); + Timeout::enableInFpm(); + } + + public function test timeouts are interrupted in time() + { + $start = microtime(true); + Timeout::enable(3000); + try { + sleep(4); + $this->fail('We expect a LambdaTimeout before we reach this line'); + } catch (LambdaTimeout $e) { + $time = 1000 * (microtime(true) - $start); + $this->assertEqualsWithDelta(1000, $time, 200, 'We must wait about 1 second'); + Timeout::reset(); + } catch (\Throwable $e) { + $this->fail('It must throw a LambdaTimeout.'); + } + } +} diff --git a/website/package-lock.json b/website/package-lock.json index d12b099da..1613cbb67 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -445,7 +445,6 @@ "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", "dev": true, "dependencies": { - "graceful-fs": "^4.1.6", "universalify": "^2.0.0" }, "optionalDependencies": {