diff --git a/app/Jobs/ProcessCrawlJob.php b/app/Jobs/ProcessCrawlJob.php index ff700e2..071bd49 100644 --- a/app/Jobs/ProcessCrawlJob.php +++ b/app/Jobs/ProcessCrawlJob.php @@ -84,6 +84,8 @@ private function updatePageStatus(FetchResult $result): void 'status' => $status, 'fetched_at' => now(), 'title' => $result->title, + 'language' => $result->language, + 'language_confidence' => $result->languageConfidence, ], PageStatusEnum::Failed => [ 'status' => $status, diff --git a/tests/Feature/Jobs/ProcessCrawlJobTest.php b/tests/Feature/Jobs/ProcessCrawlJobTest.php index 722f29f..bf353e6 100644 --- a/tests/Feature/Jobs/ProcessCrawlJobTest.php +++ b/tests/Feature/Jobs/ProcessCrawlJobTest.php @@ -457,6 +457,56 @@ public function test_handle_proceeds_through_politeness_lock_when_robots_allow() ); } + public function test_handle_persists_language_on_success(): void + { + Queue::fake(); + + $this->mockFetchPageAction( + CrawlOutcomeEnum::Success, + statusCode: 200, + title: 'Hello', + extractedText: 'hi', + wordCount: 1, + language: 'en', + languageConfidence: 0.95, + ); + + $page = Page::factory()->createQuietly(['url' => 'https://example.com/article']); + $crawl = PageCrawl::factory()->page($page)->createQuietly(); + + app(ProcessCrawlJob::class, ['pageCrawl' => $crawl]) + ->handle(); + + $fresh = $page->fresh(); + $this->assertSame('en', $fresh->language); + $this->assertEqualsWithDelta(0.95, $fresh->language_confidence, 0.001); + } + + public function test_handle_persists_null_language_on_success(): void + { + Queue::fake(); + + $this->mockFetchPageAction( + CrawlOutcomeEnum::Success, + statusCode: 200, + title: 'Hello', + extractedText: 'hi', + wordCount: 1, + language: null, + languageConfidence: null, + ); + + $page = Page::factory()->createQuietly(['url' => 'https://example.com/article']); + $crawl = PageCrawl::factory()->page($page)->createQuietly(); + + app(ProcessCrawlJob::class, ['pageCrawl' => $crawl]) + ->handle(); + + $fresh = $page->fresh(); + $this->assertNull($fresh->language); + $this->assertNull($fresh->language_confidence); + } + private function mockFetchPageAction( CrawlOutcomeEnum $outcome, ?int $statusCode = null, @@ -466,6 +516,8 @@ private function mockFetchPageAction( ?Collection $outboundLinks = null, ?int $wordCount = null, ?string $errorMessage = null, + ?string $language = null, + ?float $languageConfidence = null, ): void { $fetcher = Mockery::mock(FetchPageAction::class); $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( @@ -477,6 +529,8 @@ private function mockFetchPageAction( outboundLinks: $outboundLinks ?? collect(), wordCount: $wordCount, errorMessage: $errorMessage, + language: $language, + languageConfidence: $languageConfidence, )); $this->app->instance(FetchPageAction::class, $fetcher); }