From c968993267cae60d4b473e3d8198d1fa4df0da55 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Wed, 3 May 2023 14:26:26 +0200 Subject: [PATCH 1/6] fix rare crash in getHtml for a split second, documentElement might be missing, causing getHtml() to crash. I had a program that was doing page stuff and calling getHtml() like every 10 milliseconds (100 times per second), and got an unexpected crash. Was able to create a small reproducible sample: ```php setOptions([ "headless" => true, "noSandbox" => true, 'windowSize' => [1000, 1000] ]); $browser = $browser_factory->createBrowser(); $page = $browser->createPage(); for ($i = 0; $i < 100; ++$i) { $page->navigate("http://example.com"); $html = $page->getHtml(); $page->navigate("http://example.org"); $html = $page->getHtml(); } ``` consistently crash with: ``` PHP Fatal error: Uncaught HeadlessChromium\Exception\JavascriptException: Error during javascript evaluation: TypeError: Cannot read properties of null (reading 'outerHTML') at :1:26 in /home/hans/projects/ibkr/vendor/chrome-php/chrome/src/PageUtils/PageEvaluation.php:89 Stack trace: #0 /home/hans/projects/ibkr/vendor/chrome-php/chrome/src/PageUtils/PageEvaluation.php(108): HeadlessChromium\PageUtils\PageEvaluation->waitForResponse() #1 /home/hans/projects/ibkr/vendor/chrome-php/chrome/src/Page.php(894): HeadlessChromium\PageUtils\PageEvaluation->getReturnValue() #2 /home/hans/projects/ibkr/test_crash.php(16): HeadlessChromium\Page->getHtml() #3 {main} thrown in /home/hans/projects/ibkr/vendor/chrome-php/chrome/src/PageUtils/PageEvaluation.php on line 89 ``` --- src/Page.php | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Page.php b/src/Page.php index 062a6d17..cd64b498 100644 --- a/src/Page.php +++ b/src/Page.php @@ -890,9 +890,19 @@ public function setHtml(string $html, int $timeout = 3000, string $eventName = s */ public function getHtml(?int $timeout = null): string { - return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); + try { + return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); + } catch (Exception\JavascriptException $e) { + if (0 === strpos($e->getMessage(), 'Error during javascript evaluation: TypeError: Cannot read properties of null (reading \'outerHTML\')')) { + // sometimes after a page reload, for a split second, + // document.documentElement does not exist + // (not sure if its a chromium bug or intentional but either way) + usleep(1000); // 1ms seems to be more than enough, unable to reproduce. + return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); + } + throw $e; + } } - /** * Read cookies for the current page. * From 8f80ff969250bcc0a48683c45aa5976ad2775ace Mon Sep 17 00:00:00 2001 From: divinity76 Date: Wed, 3 May 2023 14:34:14 +0200 Subject: [PATCH 2/6] appease StyleCI --- src/Page.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Page.php b/src/Page.php index cd64b498..95f94c5d 100644 --- a/src/Page.php +++ b/src/Page.php @@ -893,16 +893,17 @@ public function getHtml(?int $timeout = null): string try { return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); } catch (Exception\JavascriptException $e) { - if (0 === strpos($e->getMessage(), 'Error during javascript evaluation: TypeError: Cannot read properties of null (reading \'outerHTML\')')) { - // sometimes after a page reload, for a split second, + if (0 === \strpos($e->getMessage(), 'Error during javascript evaluation: TypeError: Cannot read properties of null (reading \'outerHTML\')')) { + // sometimes after a page reload, for a split second, // document.documentElement does not exist // (not sure if its a chromium bug or intentional but either way) - usleep(1000); // 1ms seems to be more than enough, unable to reproduce. + \usleep(1000); // 1ms seems to be more than enough, unable to reproduce. return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); } throw $e; } } + /** * Read cookies for the current page. * From 672d3ae7fdec050ecec67699b32658f70f38c7fc Mon Sep 17 00:00:00 2001 From: divinity76 Date: Wed, 3 May 2023 14:37:49 +0200 Subject: [PATCH 3/6] appease StyleCI --- src/Page.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Page.php b/src/Page.php index 95f94c5d..92a476d3 100644 --- a/src/Page.php +++ b/src/Page.php @@ -898,6 +898,7 @@ public function getHtml(?int $timeout = null): string // document.documentElement does not exist // (not sure if its a chromium bug or intentional but either way) \usleep(1000); // 1ms seems to be more than enough, unable to reproduce. + return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); } throw $e; From 7cdc7ab334c2dd247cf4f363c2b7f0381eac0dad Mon Sep 17 00:00:00 2001 From: divinity76 Date: Wed, 3 May 2023 14:44:52 +0200 Subject: [PATCH 4/6] javascriptException is already imported --- src/Page.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Page.php b/src/Page.php index 92a476d3..24a16480 100644 --- a/src/Page.php +++ b/src/Page.php @@ -892,7 +892,7 @@ public function getHtml(?int $timeout = null): string { try { return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); - } catch (Exception\JavascriptException $e) { + } catch (JavascriptException $e) { if (0 === \strpos($e->getMessage(), 'Error during javascript evaluation: TypeError: Cannot read properties of null (reading \'outerHTML\')')) { // sometimes after a page reload, for a split second, // document.documentElement does not exist From dffb0f79c489d6e5f7e365e573deb6ad46d37625 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Thu, 4 May 2023 13:27:58 +0200 Subject: [PATCH 5/6] remove comments requested at https://github.com/chrome-php/chrome/pull/516#issuecomment-1533022892 --- src/Page.php | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Page.php b/src/Page.php index 24a16480..4032f3f0 100644 --- a/src/Page.php +++ b/src/Page.php @@ -894,10 +894,7 @@ public function getHtml(?int $timeout = null): string return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); } catch (JavascriptException $e) { if (0 === \strpos($e->getMessage(), 'Error during javascript evaluation: TypeError: Cannot read properties of null (reading \'outerHTML\')')) { - // sometimes after a page reload, for a split second, - // document.documentElement does not exist - // (not sure if its a chromium bug or intentional but either way) - \usleep(1000); // 1ms seems to be more than enough, unable to reproduce. + \usleep(1000); return $this->evaluate('document.documentElement.outerHTML')->getReturnValue($timeout); } From 872ff19dc050aadd1312cad3ad3a4c90db8fa1f9 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Thu, 4 May 2023 20:17:10 +0200 Subject: [PATCH 6/6] Update Page.php possibly throws JavascriptException --- src/Page.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Page.php b/src/Page.php index 4032f3f0..d7ba46fc 100644 --- a/src/Page.php +++ b/src/Page.php @@ -887,6 +887,7 @@ public function setHtml(string $html, int $timeout = 3000, string $eventName = s * Gets the raw html of the current page. * * @throws CommunicationException + * @throws JavascriptException */ public function getHtml(?int $timeout = null): string {