Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: status-codes (fixes #161) #164

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
remember request properties from last go() request
eposjk committed Dec 11, 2022
commit 25a2379d5193eb0c74f132ff703a21b6a877c6e2
11 changes: 11 additions & 0 deletions src/GoutteClient.php
Original file line number Diff line number Diff line change
@@ -38,6 +38,17 @@ class GoutteClient extends Client
*/
protected $retryFailureAt = 0;

/**
* Reset internal variables
*/
public function initNewRequest()
{
$this->usesTemporaryRedirect = false;
$this->permanentRedirectUrl = null;
$this->retryRedirectAt = PHP_INT_MAX;
$this->retryFailureAt = 0;
}

/**
* Remember permanent redirect url and detect if the redirect chain contains temporary redirects
*
54 changes: 39 additions & 15 deletions src/UsesGoutte.php
Original file line number Diff line number Diff line change
@@ -28,6 +28,27 @@ trait UsesGoutte
*/
protected $currentPage = null;

/**
spekulatius marked this conversation as resolved.
Show resolved Hide resolved
* Was a temporary redirect involved in loading this request?
*
* @var bool
*/
protected $usesTemporaryRedirect = false;

/**
* Should subsequent requests go to a different URL?
*
* @var string
*/
protected $permanentRedirectUrl = '';

/**
* Which is the earliest moment to retry the request? (unix timestamp)
*
* @var int
*/
protected $retryAt = 0;

/**
* Overwrites the client
*
@@ -73,9 +94,19 @@ public function client(): GoutteClient
*/
public function go(string $url): self
{
$this->client->initNewRequest();

// Keep it around for internal processing.
$this->currentPage = $this->client->request('GET', $url);

// Remember request properties.
$this->usesTemporaryRedirect = $this->client->usesTemporaryRedirect;
$this->permanentRedirectUrl = $this->client->permanentRedirectUrl ?? '';
$this->retryAt = $this->client->retryAt();
if (!$this->retryAt && $this->statusCode() === 509 /* Bandwidth Limit Exceeded */) {
$this->retryAt = strtotime('next month 12:00 UTC');
// give providers in each timezone the chance to reset the traffic quota for month
}
return $this;
}

@@ -133,14 +164,9 @@ public function clickLink($titleOrUrl): self
return $this;
}

public function usesTemporaryRedirect(): bool
{
return $this->client ? $this->client->usesTemporaryRedirect : false;
}

public function isTemporaryResult(): bool
{
return $this->usesTemporaryRedirect() || \in_array($this->statusCode(), [
return $this->usesTemporaryRedirect || \in_array($this->statusCode(), [
408, // Request Timeout
409, // Conflict
419, // Page Expired
@@ -177,21 +203,19 @@ public function isPermanentError(): bool
return $this->statusCode() >= 400 && !$this->isTemporaryResult();
}

public function usesTemporaryRedirect(): bool
{
return $this->usesTemporaryRedirect;
}

public function permanentRedirectUrl(): string
{
return $this->client ? ($this->client->permanentRedirectUrl ?? '') : '';
return $this->permanentRedirectUrl;
}

public function retryAt(): int
{
$retryAt = $this->client ? ($this->client->retryAt()) : 0;
if ($retryAt) {
return $retryAt;
}
if ($this->statusCode() === 509 /* Bandwidth Limit Exceeded */) {
return strtotime('next month 12:00 UTC');
} // give providers in each timezone the chance to reset the traffic quota for month
return 0;
return $this->retryAt;
}

public function statusCode(): int