From c80be24e6ecfdd78c2d44b7fe7c638d61ebbbc0a Mon Sep 17 00:00:00 2001 From: myrmidex Date: Mon, 27 Apr 2026 00:24:38 +0200 Subject: [PATCH] chore - Extract mockFetchPageAction helper in ProcessCrawlJobTest --- tests/Feature/Jobs/ProcessCrawlJobTest.php | 199 +++++---------------- 1 file changed, 44 insertions(+), 155 deletions(-) diff --git a/tests/Feature/Jobs/ProcessCrawlJobTest.php b/tests/Feature/Jobs/ProcessCrawlJobTest.php index dab484b..487f6d7 100644 --- a/tests/Feature/Jobs/ProcessCrawlJobTest.php +++ b/tests/Feature/Jobs/ProcessCrawlJobTest.php @@ -14,6 +14,7 @@ use App\ValueObjects\FetchResult; use Carbon\Carbon; use Illuminate\Foundation\Testing\RefreshDatabase; +use Illuminate\Support\Collection; use Illuminate\Support\Facades\Queue; use Mockery; use Tests\TestCase; @@ -49,18 +50,7 @@ public function test_handle_writes_outcome_to_page_crawl_on_success(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Success, - statusCode: 200, - finalUrl: 'https://example.com/article', - title: 'Hello', - extractedText: 'hi', - outboundLinks: collect(), - wordCount: 1, - errorMessage: null, - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Success, statusCode: 200, title: 'Hello', extractedText: 'hi', wordCount: 1); $page = Page::factory()->createQuietly(['url' => 'https://example.com/article']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -80,18 +70,7 @@ public function test_handle_updates_page_to_fetched_on_success(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Success, - statusCode: 200, - finalUrl: 'https://example.com/article', - title: 'Hello', - extractedText: 'hi', - outboundLinks: collect(), - wordCount: 1, - errorMessage: null, - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Success, statusCode: 200, title: 'Hello', extractedText: 'hi', wordCount: 1); $page = Page::factory()->createQuietly(['url' => 'https://example.com/article']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -110,18 +89,7 @@ public function test_handle_updates_page_to_rejected_on_rejected_outcome(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Rejected, - statusCode: 200, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'Unsupported Content-Type: application/pdf', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Rejected, statusCode: 200, errorMessage: 'Unsupported Content-Type: application/pdf'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/brochure.pdf']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -138,18 +106,7 @@ public function test_handle_updates_page_to_failed_on_blocked_4xx(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Blocked4xx, - statusCode: 404, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'HTTP 404', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Blocked4xx, statusCode: 404, errorMessage: 'HTTP 404'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/gone']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -167,18 +124,7 @@ public function test_handle_updates_page_to_failed_on_timeout(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Timeout, - statusCode: null, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'Connection timed out after 10 seconds', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Timeout, errorMessage: 'Connection timed out after 10 seconds'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/slow']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -196,18 +142,7 @@ public function test_handle_schedules_retry_on_transient_failure(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Failed, - statusCode: null, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'Connection refused', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'Connection refused'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/unstable']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -236,18 +171,7 @@ public function test_handle_does_not_retry_after_three_attempts(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Failed, - statusCode: null, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'Connection refused', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'Connection refused'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/unreachable']); @@ -270,18 +194,7 @@ public function test_handle_writes_failed_outcome_to_page_crawl(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Failed, - statusCode: null, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'boom', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'boom'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/unstable']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -301,18 +214,7 @@ public function test_handle_updates_page_to_failed_on_failed_outcome(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Failed, - statusCode: null, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'Connection refused', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Failed, errorMessage: 'Connection refused'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/unreachable']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -327,18 +229,7 @@ public function test_handle_updates_page_to_failed_on_blocked_5xx(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Blocked5xx, - statusCode: 503, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'HTTP 503', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::Blocked5xx, statusCode: 503, errorMessage: 'HTTP 503'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/overloaded']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -353,18 +244,7 @@ public function test_handle_updates_page_to_failed_on_blocked_robots(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::BlockedRobots, - statusCode: null, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect(), - wordCount: null, - errorMessage: 'Disallowed by robots.txt', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + $this->mockFetchPageAction(CrawlOutcomeEnum::BlockedRobots, errorMessage: 'Disallowed by robots.txt'); $page = Page::factory()->createQuietly(['url' => 'https://example.com/private']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -379,20 +259,11 @@ public function test_handle_does_not_register_outbound_links_on_failure(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Failed, - statusCode: null, - finalUrl: null, - title: null, - extractedText: null, - outboundLinks: collect([ - 'https://should-not-be-registered.com/page', - ]), - wordCount: null, + $this->mockFetchPageAction( + CrawlOutcomeEnum::Failed, + outboundLinks: collect(['https://should-not-be-registered.com/page']), errorMessage: 'Connection refused', - )); - $this->app->instance(FetchPageAction::class, $fetcher); + ); $page = Page::factory()->createQuietly(['url' => 'https://example.com/broken']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -408,21 +279,15 @@ public function test_handle_registers_outbound_links_on_success(): void { Queue::fake(); - $fetcher = Mockery::mock(FetchPageAction::class); - $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( - outcome: CrawlOutcomeEnum::Success, + $this->mockFetchPageAction( + CrawlOutcomeEnum::Success, statusCode: 200, finalUrl: 'https://source.com/article', title: 'Source Article', extractedText: 'some text', - outboundLinks: collect([ - 'https://other.com/article-1', - 'https://another.com/post-2', - ]), + outboundLinks: collect(['https://other.com/article-1', 'https://another.com/post-2']), wordCount: 2, - errorMessage: null, - )); - $this->app->instance(FetchPageAction::class, $fetcher); + ); $page = Page::factory()->createQuietly(['url' => 'https://source.com/article']); $crawl = PageCrawl::factory()->page($page)->createQuietly(); @@ -434,4 +299,28 @@ public function test_handle_registers_outbound_links_on_success(): void $this->assertDatabaseHas('pages', ['url' => 'https://another.com/post-2']); $this->assertSame(3, Page::count()); } + + private function mockFetchPageAction( + CrawlOutcomeEnum $outcome, + ?int $statusCode = null, + ?string $finalUrl = 'https://example.com/article', + ?string $title = null, + ?string $extractedText = null, + ?Collection $outboundLinks = null, + ?int $wordCount = null, + ?string $errorMessage = null, + ): void { + $fetcher = Mockery::mock(FetchPageAction::class); + $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( + outcome: $outcome, + statusCode: $statusCode, + finalUrl: $finalUrl, + title: $title, + extractedText: $extractedText, + outboundLinks: $outboundLinks ?? collect(), + wordCount: $wordCount, + errorMessage: $errorMessage, + )); + $this->app->instance(FetchPageAction::class, $fetcher); + } }