chore - Extract mockFetchPageAction helper in ProcessCrawlJobTest
This commit is contained in:
parent
3297c4bb3b
commit
c80be24e6e
1 changed files with 44 additions and 155 deletions
|
|
@ -14,6 +14,7 @@
|
||||||
use App\ValueObjects\FetchResult;
|
use App\ValueObjects\FetchResult;
|
||||||
use Carbon\Carbon;
|
use Carbon\Carbon;
|
||||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||||
|
use Illuminate\Support\Collection;
|
||||||
use Illuminate\Support\Facades\Queue;
|
use Illuminate\Support\Facades\Queue;
|
||||||
use Mockery;
|
use Mockery;
|
||||||
use Tests\TestCase;
|
use Tests\TestCase;
|
||||||
|
|
@ -49,18 +50,7 @@ public function test_handle_writes_outcome_to_page_crawl_on_success(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Success, statusCode: 200, title: 'Hello', extractedText: 'hi', wordCount: 1);
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Success,
|
|
||||||
statusCode: 200,
|
|
||||||
finalUrl: 'https://example.com/article',
|
|
||||||
title: 'Hello',
|
|
||||||
extractedText: 'hi',
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: 1,
|
|
||||||
errorMessage: null,
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -80,18 +70,7 @@ public function test_handle_updates_page_to_fetched_on_success(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Success, statusCode: 200, title: 'Hello', extractedText: 'hi', wordCount: 1);
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Success,
|
|
||||||
statusCode: 200,
|
|
||||||
finalUrl: 'https://example.com/article',
|
|
||||||
title: 'Hello',
|
|
||||||
extractedText: 'hi',
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: 1,
|
|
||||||
errorMessage: null,
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -110,18 +89,7 @@ public function test_handle_updates_page_to_rejected_on_rejected_outcome(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Rejected, statusCode: 200, errorMessage: 'Unsupported Content-Type: application/pdf');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Rejected,
|
|
||||||
statusCode: 200,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'Unsupported Content-Type: application/pdf',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/brochure.pdf']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/brochure.pdf']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -138,18 +106,7 @@ public function test_handle_updates_page_to_failed_on_blocked_4xx(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Blocked4xx, statusCode: 404, errorMessage: 'HTTP 404');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Blocked4xx,
|
|
||||||
statusCode: 404,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'HTTP 404',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/gone']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/gone']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -167,18 +124,7 @@ public function test_handle_updates_page_to_failed_on_timeout(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Timeout, errorMessage: 'Connection timed out after 10 seconds');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Timeout,
|
|
||||||
statusCode: null,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'Connection timed out after 10 seconds',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/slow']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/slow']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -196,18 +142,7 @@ public function test_handle_schedules_retry_on_transient_failure(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'Connection refused');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Failed,
|
|
||||||
statusCode: null,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'Connection refused',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unstable']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unstable']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -236,18 +171,7 @@ public function test_handle_does_not_retry_after_three_attempts(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'Connection refused');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Failed,
|
|
||||||
statusCode: null,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'Connection refused',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unreachable']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unreachable']);
|
||||||
|
|
||||||
|
|
@ -270,18 +194,7 @@ public function test_handle_writes_failed_outcome_to_page_crawl(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'boom');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Failed,
|
|
||||||
statusCode: null,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'boom',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unstable']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unstable']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -301,18 +214,7 @@ public function test_handle_updates_page_to_failed_on_failed_outcome(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'Connection refused');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Failed,
|
|
||||||
statusCode: null,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'Connection refused',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unreachable']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/unreachable']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -327,18 +229,7 @@ public function test_handle_updates_page_to_failed_on_blocked_5xx(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::Blocked5xx, statusCode: 503, errorMessage: 'HTTP 503');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::Blocked5xx,
|
|
||||||
statusCode: 503,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'HTTP 503',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/overloaded']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/overloaded']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -353,18 +244,7 @@ public function test_handle_updates_page_to_failed_on_blocked_robots(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(CrawlOutcomeEnum::BlockedRobots, errorMessage: 'Disallowed by robots.txt');
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
|
||||||
outcome: CrawlOutcomeEnum::BlockedRobots,
|
|
||||||
statusCode: null,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect(),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'Disallowed by robots.txt',
|
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/private']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/private']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -379,20 +259,11 @@ public function test_handle_does_not_register_outbound_links_on_failure(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
CrawlOutcomeEnum::Failed,
|
||||||
outcome: CrawlOutcomeEnum::Failed,
|
outboundLinks: collect(['https://should-not-be-registered.com/page']),
|
||||||
statusCode: null,
|
|
||||||
finalUrl: null,
|
|
||||||
title: null,
|
|
||||||
extractedText: null,
|
|
||||||
outboundLinks: collect([
|
|
||||||
'https://should-not-be-registered.com/page',
|
|
||||||
]),
|
|
||||||
wordCount: null,
|
|
||||||
errorMessage: 'Connection refused',
|
errorMessage: 'Connection refused',
|
||||||
));
|
);
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/broken']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/broken']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -408,21 +279,15 @@ public function test_handle_registers_outbound_links_on_success(): void
|
||||||
{
|
{
|
||||||
Queue::fake();
|
Queue::fake();
|
||||||
|
|
||||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
$this->mockFetchPageAction(
|
||||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
CrawlOutcomeEnum::Success,
|
||||||
outcome: CrawlOutcomeEnum::Success,
|
|
||||||
statusCode: 200,
|
statusCode: 200,
|
||||||
finalUrl: 'https://source.com/article',
|
finalUrl: 'https://source.com/article',
|
||||||
title: 'Source Article',
|
title: 'Source Article',
|
||||||
extractedText: 'some text',
|
extractedText: 'some text',
|
||||||
outboundLinks: collect([
|
outboundLinks: collect(['https://other.com/article-1', 'https://another.com/post-2']),
|
||||||
'https://other.com/article-1',
|
|
||||||
'https://another.com/post-2',
|
|
||||||
]),
|
|
||||||
wordCount: 2,
|
wordCount: 2,
|
||||||
errorMessage: null,
|
);
|
||||||
));
|
|
||||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
|
||||||
|
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://source.com/article']);
|
$page = Page::factory()->createQuietly(['url' => 'https://source.com/article']);
|
||||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||||
|
|
@ -434,4 +299,28 @@ public function test_handle_registers_outbound_links_on_success(): void
|
||||||
$this->assertDatabaseHas('pages', ['url' => 'https://another.com/post-2']);
|
$this->assertDatabaseHas('pages', ['url' => 'https://another.com/post-2']);
|
||||||
$this->assertSame(3, Page::count());
|
$this->assertSame(3, Page::count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function mockFetchPageAction(
|
||||||
|
CrawlOutcomeEnum $outcome,
|
||||||
|
?int $statusCode = null,
|
||||||
|
?string $finalUrl = 'https://example.com/article',
|
||||||
|
?string $title = null,
|
||||||
|
?string $extractedText = null,
|
||||||
|
?Collection $outboundLinks = null,
|
||||||
|
?int $wordCount = null,
|
||||||
|
?string $errorMessage = null,
|
||||||
|
): void {
|
||||||
|
$fetcher = Mockery::mock(FetchPageAction::class);
|
||||||
|
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
||||||
|
outcome: $outcome,
|
||||||
|
statusCode: $statusCode,
|
||||||
|
finalUrl: $finalUrl,
|
||||||
|
title: $title,
|
||||||
|
extractedText: $extractedText,
|
||||||
|
outboundLinks: $outboundLinks ?? collect(),
|
||||||
|
wordCount: $wordCount,
|
||||||
|
errorMessage: $errorMessage,
|
||||||
|
));
|
||||||
|
$this->app->instance(FetchPageAction::class, $fetcher);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue