8 - Wire PageObserver to enqueue page_crawls on Page creation
This commit is contained in:
parent
81209125a1
commit
de14ae3ad4
5 changed files with 125 additions and 7 deletions
|
|
@ -5,7 +5,9 @@
|
||||||
namespace App\Models;
|
namespace App\Models;
|
||||||
|
|
||||||
use App\Enums\PageStatusEnum;
|
use App\Enums\PageStatusEnum;
|
||||||
|
use App\Observers\PageObserver;
|
||||||
use Database\Factories\PageFactory;
|
use Database\Factories\PageFactory;
|
||||||
|
use Illuminate\Database\Eloquent\Attributes\ObservedBy;
|
||||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||||
use Illuminate\Database\Eloquent\Model;
|
use Illuminate\Database\Eloquent\Model;
|
||||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||||
|
|
@ -13,6 +15,7 @@
|
||||||
use Illuminate\Database\Eloquent\Relations\HasOne;
|
use Illuminate\Database\Eloquent\Relations\HasOne;
|
||||||
use Lvl0\FediDiscover\Models\Instance;
|
use Lvl0\FediDiscover\Models\Instance;
|
||||||
|
|
||||||
|
#[ObservedBy([PageObserver::class])]
|
||||||
class Page extends Model
|
class Page extends Model
|
||||||
{
|
{
|
||||||
/** @use HasFactory<PageFactory> */
|
/** @use HasFactory<PageFactory> */
|
||||||
|
|
|
||||||
25
app/Observers/PageObserver.php
Normal file
25
app/Observers/PageObserver.php
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Observers;
|
||||||
|
|
||||||
|
use App\Models\Page;
|
||||||
|
use App\Models\PageCrawl;
|
||||||
|
use App\Services\UrlService;
|
||||||
|
|
||||||
|
class PageObserver
|
||||||
|
{
|
||||||
|
public function __construct(private UrlService $urlService) {}
|
||||||
|
|
||||||
|
public function created(Page $page): void
|
||||||
|
{
|
||||||
|
PageCrawl::firstOrCreate(
|
||||||
|
['page_id' => $page->id],
|
||||||
|
[
|
||||||
|
'domain' => $this->urlService->host($page->url),
|
||||||
|
'priority' => 0,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
81
tests/Feature/PageQueuePopulationTest.php
Normal file
81
tests/Feature/PageQueuePopulationTest.php
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace Tests\Feature;
|
||||||
|
|
||||||
|
use App\Models\Page;
|
||||||
|
use App\Models\PageCrawl;
|
||||||
|
use App\Services\UrlService;
|
||||||
|
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||||
|
use Tests\TestCase;
|
||||||
|
|
||||||
|
class PageQueuePopulationTest extends TestCase
|
||||||
|
{
|
||||||
|
use RefreshDatabase;
|
||||||
|
|
||||||
|
public function test_creating_a_page_inserts_a_page_crawl_row(): void
|
||||||
|
{
|
||||||
|
$url = 'https://example-blog.com/article';
|
||||||
|
|
||||||
|
$page = Page::factory()->create(['url' => $url]);
|
||||||
|
|
||||||
|
$expectedDomain = (new UrlService)->host($url);
|
||||||
|
|
||||||
|
$this->assertDatabaseHas('page_crawls', [
|
||||||
|
'page_id' => $page->id,
|
||||||
|
'domain' => $expectedDomain,
|
||||||
|
'priority' => 0,
|
||||||
|
]);
|
||||||
|
|
||||||
|
$crawl = PageCrawl::where('page_id', $page->id)->first();
|
||||||
|
$this->assertNotNull($crawl);
|
||||||
|
$this->assertNotNull($crawl->scheduled_for);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_created_page_crawl_has_null_outcome(): void
|
||||||
|
{
|
||||||
|
$page = Page::factory()->create(['url' => 'https://example-blog.com/article']);
|
||||||
|
|
||||||
|
$crawl = PageCrawl::where('page_id', $page->id)->first();
|
||||||
|
|
||||||
|
$this->assertNotNull($crawl);
|
||||||
|
$this->assertNull($crawl->outcome);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_first_or_create_with_existing_url_does_not_insert_duplicate_crawl(): void
|
||||||
|
{
|
||||||
|
$url = 'https://example-blog.com/article';
|
||||||
|
|
||||||
|
Page::factory()->create(['url' => $url]);
|
||||||
|
|
||||||
|
// Finds the existing row — created event does not fire again
|
||||||
|
Page::firstOrCreate(['url' => $url], ['status' => 'discovered']);
|
||||||
|
|
||||||
|
$this->assertDatabaseCount('page_crawls', 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_updating_a_page_does_not_insert_another_crawl(): void
|
||||||
|
{
|
||||||
|
$page = Page::factory()->create(['url' => 'https://example-blog.com/article']);
|
||||||
|
|
||||||
|
$page->update(['title' => 'New Title']);
|
||||||
|
|
||||||
|
$this->assertDatabaseCount('page_crawls', 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_bad_url_throws_exception_page_persists_no_crawl_inserted(): void
|
||||||
|
{
|
||||||
|
$caught = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
Page::create(['url' => 'not-a-url', 'status' => 'discovered']);
|
||||||
|
} catch (\InvalidArgumentException $e) {
|
||||||
|
$caught = $e;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->assertNotNull($caught, 'Expected InvalidArgumentException to be thrown');
|
||||||
|
$this->assertDatabaseHas('pages', ['url' => 'not-a-url']);
|
||||||
|
$this->assertDatabaseCount('page_crawls', 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -17,7 +17,7 @@ class PageCrawlTest extends TestCase
|
||||||
|
|
||||||
public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): void
|
public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): void
|
||||||
{
|
{
|
||||||
$page = Page::factory()->create(['url' => 'https://example.com/page-1']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-1']);
|
||||||
|
|
||||||
$scheduledFor = Carbon::parse('2026-05-01 10:00:00');
|
$scheduledFor = Carbon::parse('2026-05-01 10:00:00');
|
||||||
$lockedAt = Carbon::parse('2026-05-01 10:01:00');
|
$lockedAt = Carbon::parse('2026-05-01 10:01:00');
|
||||||
|
|
@ -65,7 +65,7 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied():
|
||||||
|
|
||||||
public function test_page_crawl_belongs_to_a_page(): void
|
public function test_page_crawl_belongs_to_a_page(): void
|
||||||
{
|
{
|
||||||
$page = Page::factory()->create(['url' => 'https://example.com/page-2']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-2']);
|
||||||
|
|
||||||
$crawl = PageCrawl::create([
|
$crawl = PageCrawl::create([
|
||||||
'page_id' => $page->id,
|
'page_id' => $page->id,
|
||||||
|
|
@ -82,7 +82,9 @@ public function test_page_crawl_belongs_to_a_page(): void
|
||||||
|
|
||||||
public function test_deleting_a_page_cascades_to_its_page_crawls(): void
|
public function test_deleting_a_page_cascades_to_its_page_crawls(): void
|
||||||
{
|
{
|
||||||
$page = Page::factory()->create(['url' => 'https://example.com/page-cascade']);
|
// createQuietly() skips the PageObserver so the count of explicit rows is predictable;
|
||||||
|
// this test is about cascade delete behaviour, not observer side effects.
|
||||||
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-cascade']);
|
||||||
|
|
||||||
PageCrawl::factory()->page($page)->create();
|
PageCrawl::factory()->page($page)->create();
|
||||||
PageCrawl::factory()->page($page)->successful()->create();
|
PageCrawl::factory()->page($page)->successful()->create();
|
||||||
|
|
@ -97,7 +99,9 @@ public function test_deleting_a_page_cascades_to_its_page_crawls(): void
|
||||||
|
|
||||||
public function test_pending_crawls_are_filtered_by_null_outcome(): void
|
public function test_pending_crawls_are_filtered_by_null_outcome(): void
|
||||||
{
|
{
|
||||||
$page = Page::factory()->create(['url' => 'https://example.com/page-pending']);
|
// createQuietly() skips the PageObserver; this test counts rows with null/non-null
|
||||||
|
// outcome — the auto-inserted observer crawl (outcome=null) would corrupt both counts.
|
||||||
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-pending']);
|
||||||
|
|
||||||
$pending = PageCrawl::factory()->page($page)->create();
|
$pending = PageCrawl::factory()->page($page)->create();
|
||||||
PageCrawl::factory()->page($page)->successful()->create();
|
PageCrawl::factory()->page($page)->successful()->create();
|
||||||
|
|
|
||||||
|
|
@ -101,8 +101,10 @@ public function test_page_language_is_fillable_and_persists(): void
|
||||||
|
|
||||||
public function test_page_has_many_crawls(): void
|
public function test_page_has_many_crawls(): void
|
||||||
{
|
{
|
||||||
$page = Page::factory()->create();
|
// createQuietly() skips the PageObserver so no auto-crawl row is inserted;
|
||||||
$other = Page::factory()->create();
|
// this test is about HasMany scoping, not observer side effects.
|
||||||
|
$page = Page::factory()->createQuietly();
|
||||||
|
$other = Page::factory()->createQuietly();
|
||||||
|
|
||||||
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||||
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||||
|
|
@ -120,7 +122,10 @@ public function test_page_has_many_crawls(): void
|
||||||
|
|
||||||
public function test_page_latest_crawl_returns_row_with_latest_created_at(): void
|
public function test_page_latest_crawl_returns_row_with_latest_created_at(): void
|
||||||
{
|
{
|
||||||
$page = Page::factory()->create();
|
// createQuietly() skips the PageObserver; this test is about latestOfMany ordering,
|
||||||
|
// not observer side effects. Using create() would add an observer crawl whose
|
||||||
|
// created_at is now(), making the test fragile once the hardcoded sentinel date passes.
|
||||||
|
$page = Page::factory()->createQuietly();
|
||||||
|
|
||||||
$old = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
$old = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||||
$old->created_at = Carbon::parse('2026-01-01 08:00:00');
|
$old->created_at = Carbon::parse('2026-01-01 08:00:00');
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue