Skip to content

Commit 167e824

Browse files
authored
feat!: add namespace to run and scheduler (#105)
1 parent 4542cbf commit 167e824

12 files changed

+68
-8
lines changed

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@
22
/.build/
33
.phpunit.result.cache
44
.phpunit.cache/
5-
/coverage/
5+
/coverage/
6+
/package-lock.json
7+
/package.json
8+
/node_modules

src/Core/Engine.php

+5-2
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,10 @@ public function start(Run $run): void
6161
RunStarting::NAME,
6262
);
6363

64-
foreach ($run->startRequests as $request) {
65-
$this->scheduleRequest($request);
64+
if ($this->scheduler->empty()) {
65+
foreach ($run->startRequests as $request) {
66+
$this->scheduleRequest($request);
67+
}
6668
}
6769

6870
$this->work($run);
@@ -144,6 +146,7 @@ private function scheduleRequest(Request $request): void
144146
private function configure(Run $run): void
145147
{
146148
$this->scheduler->setDelay($run->requestDelay);
149+
$this->scheduler->setNamespace($run->namespace);
147150
$this->itemPipeline->setProcessors(...$run->itemProcessors);
148151
$this->downloader->withMiddleware(...$run->downloaderMiddleware);
149152
$this->responseProcessor->withMiddleware(...$run->responseMiddleware);

src/Core/Run.php

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ final class Run
3333
*/
3434
public function __construct(
3535
public array $startRequests,
36+
public string $namespace,
3637
public array $downloaderMiddleware = [],
3738
public array $itemProcessors = [],
3839
public array $responseMiddleware = [],

src/Core/RunFactory.php

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public function fromSpider(SpiderInterface $spider, ?Overrides $overrides = null
4040

4141
return new Run(
4242
$spider->getInitialRequests(),
43+
$spider::class,
4344
$this->buildDownloaderMiddleware($configuration->downloaderMiddleware),
4445
$this->buildItemPipeline($configuration->itemProcessors),
4546
$this->buildResponseMiddleware($configuration->spiderMiddleware),

src/Http/Request.php

+5
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,9 @@ public function getPsrRequest(): RequestInterface
115115
{
116116
return $this->psrRequest;
117117
}
118+
119+
public function getParseCallback(): Closure
120+
{
121+
return $this->parseCallback;
122+
}
118123
}

src/Scheduling/ArrayRequestScheduler.php

+5
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ public function setDelay(int $delay): RequestSchedulerInterface
6868
return $this;
6969
}
7070

71+
public function setNamespace(string $namespace): RequestSchedulerInterface
72+
{
73+
return $this;
74+
}
75+
7176
private function updateNextBatchTime(): void
7277
{
7378
$this->nextBatchReadyAt = $this->clock->now()->add(new DateInterval("PT{$this->delay}S"));

src/Scheduling/RequestSchedulerInterface.php

+2
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,6 @@ public function forceNextRequests(int $batchSize): array;
3838
public function empty(): bool;
3939

4040
public function setDelay(int $delay): self;
41+
42+
public function setNamespace(string $namespace): self;
4143
}

tests/Core/EngineTest.php

+30-2
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,17 @@ final class EngineTest extends IntegrationTestCase
4949

5050
private FakeClock $clock;
5151

52+
private ArrayRequestScheduler $scheduler;
53+
5254
protected function setUp(): void
5355
{
5456
parent::setUp();
5557

5658
$dispatcher = new EventDispatcher();
5759
$this->clock = new FakeClock();
60+
$this->scheduler = new ArrayRequestScheduler($this->clock);
5861
$this->engine = new Engine(
59-
new ArrayRequestScheduler($this->clock),
62+
$this->scheduler,
6063
new Downloader(new Client(), $dispatcher),
6164
new ItemPipeline($dispatcher),
6265
new Processor($dispatcher),
@@ -72,14 +75,32 @@ public function testCrawlsStartUrls(): void
7275
$this->makeRequest('http://localhost:8000/test1'),
7376
$this->makeRequest('http://localhost:8000/test2'),
7477
];
75-
$run = new Run($startRequests);
78+
$run = new Run($startRequests, '::namespace::');
7679

7780
$this->engine->start($run);
7881

7982
$this->assertRouteWasCrawledTimes('/test1', 1);
8083
$this->assertRouteWasCrawledTimes('/test2', 1);
8184
}
8285

86+
public function testDoesntCrawlStartUrlsWithExistingRequestsInScheduler(): void
87+
{
88+
$startRequests = [
89+
$this->makeRequest('http://localhost:8000/test1'),
90+
$this->makeRequest('http://localhost:8000/test2'),
91+
];
92+
93+
$run = new Run($startRequests, '::namespace::');
94+
95+
$this->scheduler->schedule($this->makeRequest('http://localhost:8000/test3'));
96+
97+
$this->engine->start($run);
98+
99+
$this->assertRouteWasNotCrawled('/test1');
100+
$this->assertRouteWasNotCrawled('/test2');
101+
$this->assertRouteWasCrawledTimes('/test3', 1);
102+
}
103+
83104
public function testCrawlUrlsReturnedFromParseCallback(): void
84105
{
85106
$parseFunction = static function (Response $response) {
@@ -89,6 +110,7 @@ public function testCrawlUrlsReturnedFromParseCallback(): void
89110
};
90111
$run = new Run(
91112
[$this->makeRequest('http://localhost:8000/test2', $parseFunction)],
113+
'::namespace::',
92114
);
93115

94116
$this->engine->start($run);
@@ -108,6 +130,7 @@ public function testCallCorrectParseCallbackForRequest(): void
108130
};
109131
$run = new Run(
110132
[$this->makeRequest('http://localhost:8000/test1', $parseCallback)],
133+
'::namespace::',
111134
);
112135

113136
$this->engine->start($run);
@@ -127,6 +150,7 @@ public function testSendItemsThroughItemPipeline(): void
127150
];
128151
$run = new Run(
129152
$startRequests,
153+
'::namespace::',
130154
itemProcessors: [$processor],
131155
);
132156

@@ -145,6 +169,7 @@ public function testHandleBothRequestAndItemEmittedFromSameParseCallback(): void
145169
};
146170
$run = new Run(
147171
[$this->makeRequest('http://localhost:8000/test1', $parseCallback)],
172+
'::namespace::',
148173
itemProcessors: [$processor],
149174
);
150175

@@ -163,6 +188,7 @@ public function testRegisterExtensions(): void
163188
};
164189
$run = new Run(
165190
[$this->makeRequest('http://localhost:8000/test1', $parseCallback)],
191+
'::namespace::',
166192
extensions: [
167193
new StatsCollectorExtension($logger, new FakeClock()),
168194
new LoggerExtension($logger),
@@ -197,6 +223,7 @@ public function testCollectAndReturnScrapedItems(): void
197223
};
198224
$run = new Run(
199225
[$this->makeRequest('http://localhost:8000/test1', $parseCallback)],
226+
'::namespace::',
200227
);
201228

202229
$result = $this->engine->collect($run);
@@ -228,6 +255,7 @@ public function handleRequest(Request $request): Request
228255
$this->makeRequest('http://localhost:8000/test3'),
229256
$this->makeRequest('http://localhost:8000/robots'),
230257
],
258+
'::namespace::',
231259
downloaderMiddleware: [DownloaderMiddlewareAdapter::fromMiddleware($middleware)],
232260
concurrency: 1,
233261
requestDelay: 5,

tests/Core/RunFactoryTest.php

+9
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,15 @@ public function testConfigureRequestDelay(int $requestDelay): void
224224
self::assertSame($requestDelay, $run->requestDelay);
225225
}
226226

227+
public function testConfigureRunNamespace(): void
228+
{
229+
$spider = $this->createSpider();
230+
231+
$run = $this->factory->fromSpider($spider);
232+
233+
self::assertSame($spider::class, $run->namespace);
234+
}
235+
227236
public static function numberProvider(): Generator
228237
{
229238
yield from [

tests/Downloader/Middleware/RobotsTxtMiddlewareTest.php

+3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public function testOnlyRequestsRobotsTxtOnceForRequestsToSameDomain(): void
6464
$parseCallback = static fn () => yield ParseResult::fromValue(self::makeRequest('http://localhost:8000/test2'));
6565
$run = new Run(
6666
[new Request('GET', 'http://localhost:8000/test1', $parseCallback)],
67+
'::namespace::',
6768
downloaderMiddleware: [$this->middleware],
6869
);
6970

@@ -76,6 +77,7 @@ public function testAllowsRequestIfAllowedByRobotsTxt(): void
7677
{
7778
$run = new Run(
7879
[self::makeRequest('http://localhost:8000/test1')],
80+
'::namespace::',
7981
downloaderMiddleware: [$this->middleware],
8082
);
8183

@@ -88,6 +90,7 @@ public function testDropRequestIfForbiddenByRobotsTxt(): void
8890
{
8991
$run = new Run(
9092
[self::makeRequest('http://localhost:8000/test2')],
93+
'::namespace::',
9194
downloaderMiddleware: [$this->middleware],
9295
);
9396

tests/Extensions/LoggerExtensionTest.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public function testLogWhenRunStarts(): void
4444
$this->logger->messageWasLogged('info', 'Run starting'),
4545
);
4646

47-
$this->dispatch(new RunStarting(new Run([])), RunStarting::NAME);
47+
$this->dispatch(new RunStarting(new Run([], '::namespace::')), RunStarting::NAME);
4848

4949
self::assertTrue(
5050
$this->logger->messageWasLogged('info', 'Run starting'),
@@ -57,7 +57,7 @@ public function testLogWhenRunFinished(): void
5757
$this->logger->messageWasLogged('info', 'Run finished'),
5858
);
5959

60-
$this->dispatch(new RunFinished(new Run([])), RunFinished::NAME);
60+
$this->dispatch(new RunFinished(new Run([], '::namespace::')), RunFinished::NAME);
6161

6262
self::assertTrue(
6363
$this->logger->messageWasLogged('info', 'Run finished'),

tests/Extensions/StatsCollectorExtensionTest.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ protected function createExtension(): ExtensionInterface
130130
*/
131131
private function withRun(callable $callback): void
132132
{
133-
$run = new Run([]);
133+
$run = new Run([], '::namespace::');
134134

135135
$this->dispatch(new RunStarting($run), RunStarting::NAME);
136136

0 commit comments

Comments
 (0)