8 - Wire PageObserver to enqueue page_crawls on Page creation

This commit is contained in:
myrmidex 2026-04-26 15:56:38 +02:00
parent 81209125a1
commit de14ae3ad4
5 changed files with 125 additions and 7 deletions

View file

@ -5,7 +5,9 @@
namespace App\Models; namespace App\Models;
use App\Enums\PageStatusEnum; use App\Enums\PageStatusEnum;
use App\Observers\PageObserver;
use Database\Factories\PageFactory; use Database\Factories\PageFactory;
use Illuminate\Database\Eloquent\Attributes\ObservedBy;
use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model; use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\BelongsTo; use Illuminate\Database\Eloquent\Relations\BelongsTo;
@ -13,6 +15,7 @@
use Illuminate\Database\Eloquent\Relations\HasOne; use Illuminate\Database\Eloquent\Relations\HasOne;
use Lvl0\FediDiscover\Models\Instance; use Lvl0\FediDiscover\Models\Instance;
#[ObservedBy([PageObserver::class])]
class Page extends Model class Page extends Model
{ {
/** @use HasFactory<PageFactory> */ /** @use HasFactory<PageFactory> */

View file

@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace App\Observers;
use App\Models\Page;
use App\Models\PageCrawl;
use App\Services\UrlService;
class PageObserver
{
public function __construct(private UrlService $urlService) {}
public function created(Page $page): void
{
PageCrawl::firstOrCreate(
['page_id' => $page->id],
[
'domain' => $this->urlService->host($page->url),
'priority' => 0,
],
);
}
}

View file

@ -0,0 +1,81 @@
<?php
declare(strict_types=1);
namespace Tests\Feature;
use App\Models\Page;
use App\Models\PageCrawl;
use App\Services\UrlService;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Tests\TestCase;
class PageQueuePopulationTest extends TestCase
{
use RefreshDatabase;
public function test_creating_a_page_inserts_a_page_crawl_row(): void
{
$url = 'https://example-blog.com/article';
$page = Page::factory()->create(['url' => $url]);
$expectedDomain = (new UrlService)->host($url);
$this->assertDatabaseHas('page_crawls', [
'page_id' => $page->id,
'domain' => $expectedDomain,
'priority' => 0,
]);
$crawl = PageCrawl::where('page_id', $page->id)->first();
$this->assertNotNull($crawl);
$this->assertNotNull($crawl->scheduled_for);
}
public function test_created_page_crawl_has_null_outcome(): void
{
$page = Page::factory()->create(['url' => 'https://example-blog.com/article']);
$crawl = PageCrawl::where('page_id', $page->id)->first();
$this->assertNotNull($crawl);
$this->assertNull($crawl->outcome);
}
public function test_first_or_create_with_existing_url_does_not_insert_duplicate_crawl(): void
{
$url = 'https://example-blog.com/article';
Page::factory()->create(['url' => $url]);
// Finds the existing row — created event does not fire again
Page::firstOrCreate(['url' => $url], ['status' => 'discovered']);
$this->assertDatabaseCount('page_crawls', 1);
}
public function test_updating_a_page_does_not_insert_another_crawl(): void
{
$page = Page::factory()->create(['url' => 'https://example-blog.com/article']);
$page->update(['title' => 'New Title']);
$this->assertDatabaseCount('page_crawls', 1);
}
public function test_bad_url_throws_exception_page_persists_no_crawl_inserted(): void
{
$caught = null;
try {
Page::create(['url' => 'not-a-url', 'status' => 'discovered']);
} catch (\InvalidArgumentException $e) {
$caught = $e;
}
$this->assertNotNull($caught, 'Expected InvalidArgumentException to be thrown');
$this->assertDatabaseHas('pages', ['url' => 'not-a-url']);
$this->assertDatabaseCount('page_crawls', 0);
}
}

View file

@ -17,7 +17,7 @@ class PageCrawlTest extends TestCase
public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): void public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): void
{ {
$page = Page::factory()->create(['url' => 'https://example.com/page-1']); $page = Page::factory()->createQuietly(['url' => 'https://example.com/page-1']);
$scheduledFor = Carbon::parse('2026-05-01 10:00:00'); $scheduledFor = Carbon::parse('2026-05-01 10:00:00');
$lockedAt = Carbon::parse('2026-05-01 10:01:00'); $lockedAt = Carbon::parse('2026-05-01 10:01:00');
@ -65,7 +65,7 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied():
public function test_page_crawl_belongs_to_a_page(): void public function test_page_crawl_belongs_to_a_page(): void
{ {
$page = Page::factory()->create(['url' => 'https://example.com/page-2']); $page = Page::factory()->createQuietly(['url' => 'https://example.com/page-2']);
$crawl = PageCrawl::create([ $crawl = PageCrawl::create([
'page_id' => $page->id, 'page_id' => $page->id,
@ -82,7 +82,9 @@ public function test_page_crawl_belongs_to_a_page(): void
public function test_deleting_a_page_cascades_to_its_page_crawls(): void public function test_deleting_a_page_cascades_to_its_page_crawls(): void
{ {
$page = Page::factory()->create(['url' => 'https://example.com/page-cascade']); // createQuietly() skips the PageObserver so the count of explicit rows is predictable;
// this test is about cascade delete behaviour, not observer side effects.
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-cascade']);
PageCrawl::factory()->page($page)->create(); PageCrawl::factory()->page($page)->create();
PageCrawl::factory()->page($page)->successful()->create(); PageCrawl::factory()->page($page)->successful()->create();
@ -97,7 +99,9 @@ public function test_deleting_a_page_cascades_to_its_page_crawls(): void
public function test_pending_crawls_are_filtered_by_null_outcome(): void public function test_pending_crawls_are_filtered_by_null_outcome(): void
{ {
$page = Page::factory()->create(['url' => 'https://example.com/page-pending']); // createQuietly() skips the PageObserver; this test counts rows with null/non-null
// outcome — the auto-inserted observer crawl (outcome=null) would corrupt both counts.
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-pending']);
$pending = PageCrawl::factory()->page($page)->create(); $pending = PageCrawl::factory()->page($page)->create();
PageCrawl::factory()->page($page)->successful()->create(); PageCrawl::factory()->page($page)->successful()->create();

View file

@ -101,8 +101,10 @@ public function test_page_language_is_fillable_and_persists(): void
public function test_page_has_many_crawls(): void public function test_page_has_many_crawls(): void
{ {
$page = Page::factory()->create(); // createQuietly() skips the PageObserver so no auto-crawl row is inserted;
$other = Page::factory()->create(); // this test is about HasMany scoping, not observer side effects.
$page = Page::factory()->createQuietly();
$other = Page::factory()->createQuietly();
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']); PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']); PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
@ -120,7 +122,10 @@ public function test_page_has_many_crawls(): void
public function test_page_latest_crawl_returns_row_with_latest_created_at(): void public function test_page_latest_crawl_returns_row_with_latest_created_at(): void
{ {
$page = Page::factory()->create(); // createQuietly() skips the PageObserver; this test is about latestOfMany ordering,
// not observer side effects. Using create() would add an observer crawl whose
// created_at is now(), making the test fragile once the hardcoded sentinel date passes.
$page = Page::factory()->createQuietly();
$old = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']); $old = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
$old->created_at = Carbon::parse('2026-01-01 08:00:00'); $old->created_at = Carbon::parse('2026-01-01 08:00:00');