13 - Persist detected language and confidence on Page after successful fetch
This commit is contained in:
parent
81b3c7f70b
commit
1cba8f3fc9
2 changed files with 56 additions and 0 deletions
|
|
@ -84,6 +84,8 @@ private function updatePageStatus(FetchResult $result): void
|
|||
'status' => $status,
|
||||
'fetched_at' => now(),
|
||||
'title' => $result->title,
|
||||
'language' => $result->language,
|
||||
'language_confidence' => $result->languageConfidence,
|
||||
],
|
||||
PageStatusEnum::Failed => [
|
||||
'status' => $status,
|
||||
|
|
|
|||
|
|
@ -457,6 +457,56 @@ public function test_handle_proceeds_through_politeness_lock_when_robots_allow()
|
|||
);
|
||||
}
|
||||
|
||||
public function test_handle_persists_language_on_success(): void
|
||||
{
|
||||
Queue::fake();
|
||||
|
||||
$this->mockFetchPageAction(
|
||||
CrawlOutcomeEnum::Success,
|
||||
statusCode: 200,
|
||||
title: 'Hello',
|
||||
extractedText: 'hi',
|
||||
wordCount: 1,
|
||||
language: 'en',
|
||||
languageConfidence: 0.95,
|
||||
);
|
||||
|
||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
|
||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||
|
||||
app(ProcessCrawlJob::class, ['pageCrawl' => $crawl])
|
||||
->handle();
|
||||
|
||||
$fresh = $page->fresh();
|
||||
$this->assertSame('en', $fresh->language);
|
||||
$this->assertEqualsWithDelta(0.95, $fresh->language_confidence, 0.001);
|
||||
}
|
||||
|
||||
public function test_handle_persists_null_language_on_success(): void
|
||||
{
|
||||
Queue::fake();
|
||||
|
||||
$this->mockFetchPageAction(
|
||||
CrawlOutcomeEnum::Success,
|
||||
statusCode: 200,
|
||||
title: 'Hello',
|
||||
extractedText: 'hi',
|
||||
wordCount: 1,
|
||||
language: null,
|
||||
languageConfidence: null,
|
||||
);
|
||||
|
||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/article']);
|
||||
$crawl = PageCrawl::factory()->page($page)->createQuietly();
|
||||
|
||||
app(ProcessCrawlJob::class, ['pageCrawl' => $crawl])
|
||||
->handle();
|
||||
|
||||
$fresh = $page->fresh();
|
||||
$this->assertNull($fresh->language);
|
||||
$this->assertNull($fresh->language_confidence);
|
||||
}
|
||||
|
||||
private function mockFetchPageAction(
|
||||
CrawlOutcomeEnum $outcome,
|
||||
?int $statusCode = null,
|
||||
|
|
@ -466,6 +516,8 @@ private function mockFetchPageAction(
|
|||
?Collection $outboundLinks = null,
|
||||
?int $wordCount = null,
|
||||
?string $errorMessage = null,
|
||||
?string $language = null,
|
||||
?float $languageConfidence = null,
|
||||
): void {
|
||||
$fetcher = Mockery::mock(FetchPageAction::class);
|
||||
$fetcher->shouldReceive('__invoke')->andReturn(new FetchResult(
|
||||
|
|
@ -477,6 +529,8 @@ private function mockFetchPageAction(
|
|||
outboundLinks: $outboundLinks ?? collect(),
|
||||
wordCount: $wordCount,
|
||||
errorMessage: $errorMessage,
|
||||
language: $language,
|
||||
languageConfidence: $languageConfidence,
|
||||
));
|
||||
$this->app->instance(FetchPageAction::class, $fetcher);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue