13 - Persist detected language and confidence on Page after successful fetch

This commit is contained in:
myrmidex 2026-04-28 01:20:13 +02:00
parent 81b3c7f70b
commit 1cba8f3fc9
2 changed files with 56 additions and 0 deletions

View file

@ -84,6 +84,8 @@ private function updatePageStatus(FetchResult $result): void
'status' => $status, 'status' => $status,
'fetched_at' => now(), 'fetched_at' => now(),
'title' => $result->title, 'title' => $result->title,
'language' => $result->language,
'language_confidence' => $result->languageConfidence,
], ],
PageStatusEnum::Failed => [ PageStatusEnum::Failed => [
'status' => $status, 'status' => $status,

View file

@ -457,6 +457,56 @@ public function test_handle_proceeds_through_politeness_lock_when_robots_allow()
); );
} }
public function test_handle_persists_language_on_success(): void
{
Queue::fake();
$this->mockFetchPageAction(
CrawlOutcomeEnum::Success,
statusCode: 200,
title: 'Hello',
extractedText: 'hi',
wordCount: 1,
language: 'en',
languageConfidence: 0.95,
);
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
$crawl = PageCrawl::factory()->page($page)->createQuietly();
app(ProcessCrawlJob::class, ['pageCrawl' => $crawl])
->handle();
$fresh = $page->fresh();
$this->assertSame('en', $fresh->language);
$this->assertEqualsWithDelta(0.95, $fresh->language_confidence, 0.001);
}
public function test_handle_persists_null_language_on_success(): void
{
Queue::fake();
$this->mockFetchPageAction(
CrawlOutcomeEnum::Success,
statusCode: 200,
title: 'Hello',
extractedText: 'hi',
wordCount: 1,
language: null,
languageConfidence: null,
);
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
$crawl = PageCrawl::factory()->page($page)->createQuietly();
app(ProcessCrawlJob::class, ['pageCrawl' => $crawl])
->handle();
$fresh = $page->fresh();
$this->assertNull($fresh->language);
$this->assertNull($fresh->language_confidence);
}
private function mockFetchPageAction( private function mockFetchPageAction(
CrawlOutcomeEnum $outcome, CrawlOutcomeEnum $outcome,
?int $statusCode = null, ?int $statusCode = null,
@ -466,6 +516,8 @@ private function mockFetchPageAction(
?Collection $outboundLinks = null, ?Collection $outboundLinks = null,
?int $wordCount = null, ?int $wordCount = null,
?string $errorMessage = null, ?string $errorMessage = null,
?string $language = null,
?float $languageConfidence = null,
): void { ): void {
$fetcher = Mockery::mock(FetchPageAction::class); $fetcher = Mockery::mock(FetchPageAction::class);
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult( $fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
@ -477,6 +529,8 @@ private function mockFetchPageAction(
outboundLinks: $outboundLinks ?? collect(), outboundLinks: $outboundLinks ?? collect(),
wordCount: $wordCount, wordCount: $wordCount,
errorMessage: $errorMessage, errorMessage: $errorMessage,
language: $language,
languageConfidence: $languageConfidence,
)); ));
$this->app->instance(FetchPageAction::class, $fetcher); $this->app->instance(FetchPageAction::class, $fetcher);
} }