From de14ae3ad4dba7acee86050bd2ef5796c32b329b Mon Sep 17 00:00:00 2001 From: myrmidex Date: Sun, 26 Apr 2026 15:56:38 +0200 Subject: [PATCH] 8 - Wire PageObserver to enqueue page_crawls on Page creation --- app/Models/Page.php | 3 + app/Observers/PageObserver.php | 25 +++++++ tests/Feature/PageQueuePopulationTest.php | 81 +++++++++++++++++++++++ tests/Unit/Models/PageCrawlTest.php | 12 ++-- tests/Unit/Models/PageTest.php | 11 ++- 5 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 app/Observers/PageObserver.php create mode 100644 tests/Feature/PageQueuePopulationTest.php diff --git a/app/Models/Page.php b/app/Models/Page.php index 60ce74d..02a0a8e 100644 --- a/app/Models/Page.php +++ b/app/Models/Page.php @@ -5,7 +5,9 @@ namespace App\Models; use App\Enums\PageStatusEnum; +use App\Observers\PageObserver; use Database\Factories\PageFactory; +use Illuminate\Database\Eloquent\Attributes\ObservedBy; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; use Illuminate\Database\Eloquent\Relations\BelongsTo; @@ -13,6 +15,7 @@ use Illuminate\Database\Eloquent\Relations\HasOne; use Lvl0\FediDiscover\Models\Instance; +#[ObservedBy([PageObserver::class])] class Page extends Model { /** @use HasFactory */ diff --git a/app/Observers/PageObserver.php b/app/Observers/PageObserver.php new file mode 100644 index 0000000..e48cc5d --- /dev/null +++ b/app/Observers/PageObserver.php @@ -0,0 +1,25 @@ + $page->id], + [ + 'domain' => $this->urlService->host($page->url), + 'priority' => 0, + ], + ); + } +} diff --git a/tests/Feature/PageQueuePopulationTest.php b/tests/Feature/PageQueuePopulationTest.php new file mode 100644 index 0000000..4204799 --- /dev/null +++ b/tests/Feature/PageQueuePopulationTest.php @@ -0,0 +1,81 @@ +create(['url' => $url]); + + $expectedDomain = (new UrlService)->host($url); + + $this->assertDatabaseHas('page_crawls', [ + 'page_id' => $page->id, + 'domain' => $expectedDomain, + 'priority' => 0, + ]); + + $crawl = PageCrawl::where('page_id', $page->id)->first(); + $this->assertNotNull($crawl); + $this->assertNotNull($crawl->scheduled_for); + } + + public function test_created_page_crawl_has_null_outcome(): void + { + $page = Page::factory()->create(['url' => 'https://example-blog.com/article']); + + $crawl = PageCrawl::where('page_id', $page->id)->first(); + + $this->assertNotNull($crawl); + $this->assertNull($crawl->outcome); + } + + public function test_first_or_create_with_existing_url_does_not_insert_duplicate_crawl(): void + { + $url = 'https://example-blog.com/article'; + + Page::factory()->create(['url' => $url]); + + // Finds the existing row — created event does not fire again + Page::firstOrCreate(['url' => $url], ['status' => 'discovered']); + + $this->assertDatabaseCount('page_crawls', 1); + } + + public function test_updating_a_page_does_not_insert_another_crawl(): void + { + $page = Page::factory()->create(['url' => 'https://example-blog.com/article']); + + $page->update(['title' => 'New Title']); + + $this->assertDatabaseCount('page_crawls', 1); + } + + public function test_bad_url_throws_exception_page_persists_no_crawl_inserted(): void + { + $caught = null; + + try { + Page::create(['url' => 'not-a-url', 'status' => 'discovered']); + } catch (\InvalidArgumentException $e) { + $caught = $e; + } + + $this->assertNotNull($caught, 'Expected InvalidArgumentException to be thrown'); + $this->assertDatabaseHas('pages', ['url' => 'not-a-url']); + $this->assertDatabaseCount('page_crawls', 0); + } +} diff --git a/tests/Unit/Models/PageCrawlTest.php b/tests/Unit/Models/PageCrawlTest.php index b1f8152..4c19294 100644 --- a/tests/Unit/Models/PageCrawlTest.php +++ b/tests/Unit/Models/PageCrawlTest.php @@ -17,7 +17,7 @@ class PageCrawlTest extends TestCase public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): void { - $page = Page::factory()->create(['url' => 'https://example.com/page-1']); + $page = Page::factory()->createQuietly(['url' => 'https://example.com/page-1']); $scheduledFor = Carbon::parse('2026-05-01 10:00:00'); $lockedAt = Carbon::parse('2026-05-01 10:01:00'); @@ -65,7 +65,7 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): public function test_page_crawl_belongs_to_a_page(): void { - $page = Page::factory()->create(['url' => 'https://example.com/page-2']); + $page = Page::factory()->createQuietly(['url' => 'https://example.com/page-2']); $crawl = PageCrawl::create([ 'page_id' => $page->id, @@ -82,7 +82,9 @@ public function test_page_crawl_belongs_to_a_page(): void public function test_deleting_a_page_cascades_to_its_page_crawls(): void { - $page = Page::factory()->create(['url' => 'https://example.com/page-cascade']); + // createQuietly() skips the PageObserver so the count of explicit rows is predictable; + // this test is about cascade delete behaviour, not observer side effects. + $page = Page::factory()->createQuietly(['url' => 'https://example.com/page-cascade']); PageCrawl::factory()->page($page)->create(); PageCrawl::factory()->page($page)->successful()->create(); @@ -97,7 +99,9 @@ public function test_deleting_a_page_cascades_to_its_page_crawls(): void public function test_pending_crawls_are_filtered_by_null_outcome(): void { - $page = Page::factory()->create(['url' => 'https://example.com/page-pending']); + // createQuietly() skips the PageObserver; this test counts rows with null/non-null + // outcome — the auto-inserted observer crawl (outcome=null) would corrupt both counts. + $page = Page::factory()->createQuietly(['url' => 'https://example.com/page-pending']); $pending = PageCrawl::factory()->page($page)->create(); PageCrawl::factory()->page($page)->successful()->create(); diff --git a/tests/Unit/Models/PageTest.php b/tests/Unit/Models/PageTest.php index 217c831..27e9740 100644 --- a/tests/Unit/Models/PageTest.php +++ b/tests/Unit/Models/PageTest.php @@ -101,8 +101,10 @@ public function test_page_language_is_fillable_and_persists(): void public function test_page_has_many_crawls(): void { - $page = Page::factory()->create(); - $other = Page::factory()->create(); + // createQuietly() skips the PageObserver so no auto-crawl row is inserted; + // this test is about HasMany scoping, not observer side effects. + $page = Page::factory()->createQuietly(); + $other = Page::factory()->createQuietly(); PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']); PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']); @@ -120,7 +122,10 @@ public function test_page_has_many_crawls(): void public function test_page_latest_crawl_returns_row_with_latest_created_at(): void { - $page = Page::factory()->create(); + // createQuietly() skips the PageObserver; this test is about latestOfMany ordering, + // not observer side effects. Using create() would add an observer crawl whose + // created_at is now(), making the test fragile once the hardcoded sentinel date passes. + $page = Page::factory()->createQuietly(); $old = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']); $old->created_at = Carbon::parse('2026-01-01 08:00:00');