7 - Add page_crawls migration, PageCrawl model, factory, and Page relationships
This commit is contained in:
parent
9dd6d84d65
commit
fe8ca7fc10
7 changed files with 351 additions and 0 deletions
|
|
@ -10,6 +10,7 @@
|
|||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
use Illuminate\Database\Eloquent\Relations\HasMany;
|
||||
use Illuminate\Database\Eloquent\Relations\HasOne;
|
||||
use Lvl0\FediDiscover\Models\Instance;
|
||||
|
||||
class Page extends Model
|
||||
|
|
@ -49,4 +50,14 @@ public function incomingLinks(): HasMany
|
|||
{
|
||||
return $this->hasMany(PageLink::class, 'target_page_id');
|
||||
}
|
||||
|
||||
public function crawls(): HasMany
|
||||
{
|
||||
return $this->hasMany(PageCrawl::class);
|
||||
}
|
||||
|
||||
public function latestCrawl(): HasOne
|
||||
{
|
||||
return $this->hasOne(PageCrawl::class)->latestOfMany('created_at');
|
||||
}
|
||||
}
|
||||
|
|
|
|||
41
app/Models/PageCrawl.php
Normal file
41
app/Models/PageCrawl.php
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use App\Enums\CrawlOutcomeEnum;
|
||||
use Database\Factories\PageCrawlFactory;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
|
||||
class PageCrawl extends Model
|
||||
{
|
||||
/** @use HasFactory<PageCrawlFactory> */
|
||||
use HasFactory;
|
||||
|
||||
protected $fillable = [
|
||||
'page_id',
|
||||
'domain',
|
||||
'priority',
|
||||
'scheduled_for',
|
||||
'completed_at',
|
||||
'outcome',
|
||||
'status_code',
|
||||
'error_message',
|
||||
'locked_at',
|
||||
];
|
||||
|
||||
protected $casts = [
|
||||
'scheduled_for' => 'datetime',
|
||||
'completed_at' => 'datetime',
|
||||
'outcome' => CrawlOutcomeEnum::class,
|
||||
'locked_at' => 'datetime',
|
||||
];
|
||||
|
||||
public function page(): BelongsTo
|
||||
{
|
||||
return $this->belongsTo(Page::class);
|
||||
}
|
||||
}
|
||||
71
database/factories/PageCrawlFactory.php
Normal file
71
database/factories/PageCrawlFactory.php
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Database\Factories;
|
||||
|
||||
use App\Enums\CrawlOutcomeEnum;
|
||||
use App\Models\Page;
|
||||
use App\Models\PageCrawl;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Database\Eloquent\Factories\Factory;
|
||||
|
||||
/**
|
||||
* @extends Factory<PageCrawl>
|
||||
*/
|
||||
class PageCrawlFactory extends Factory
|
||||
{
|
||||
public function definition(): array
|
||||
{
|
||||
return [
|
||||
'page_id' => null,
|
||||
'domain' => 'example.com',
|
||||
'priority' => 0,
|
||||
'scheduled_for' => now(),
|
||||
'completed_at' => null,
|
||||
'outcome' => null,
|
||||
'status_code' => null,
|
||||
'error_message' => null,
|
||||
'locked_at' => null,
|
||||
];
|
||||
}
|
||||
|
||||
public function page(Page $page): static
|
||||
{
|
||||
return $this->state(fn () => [
|
||||
'page_id' => $page->id,
|
||||
]);
|
||||
}
|
||||
|
||||
public function successful(): static
|
||||
{
|
||||
return $this->state(fn () => [
|
||||
'outcome' => CrawlOutcomeEnum::Success,
|
||||
'completed_at' => now(),
|
||||
]);
|
||||
}
|
||||
|
||||
public function failed(string $errorMessage): static
|
||||
{
|
||||
return $this->state(fn () => [
|
||||
'outcome' => CrawlOutcomeEnum::Failed,
|
||||
'completed_at' => now(),
|
||||
'error_message' => $errorMessage,
|
||||
]);
|
||||
}
|
||||
|
||||
public function scheduledAt(Carbon $scheduledAt): static
|
||||
{
|
||||
return $this->state(fn () => [
|
||||
'scheduled_for' => $scheduledAt,
|
||||
]);
|
||||
}
|
||||
|
||||
public function locked(): static
|
||||
{
|
||||
return $this->state(fn () => [
|
||||
'locked_at' => now(),
|
||||
'outcome' => null,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('page_crawls', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->foreignId('page_id')
|
||||
->constrained('pages')
|
||||
->cascadeOnDelete();
|
||||
$table->string('domain');
|
||||
$table->smallInteger('priority')->default(0);
|
||||
$table->timestampTz('scheduled_for')->useCurrent();
|
||||
$table->timestampTz('locked_at')->nullable();
|
||||
$table->timestampTz('completed_at')->nullable();
|
||||
$table->string('outcome')->nullable();
|
||||
$table->smallInteger('status_code')->nullable();
|
||||
$table->text('error_message')->nullable();
|
||||
$table->timestampsTz();
|
||||
|
||||
$table->index(['page_id', 'created_at']);
|
||||
});
|
||||
|
||||
if (DB::getDriverName() === 'pgsql') {
|
||||
DB::statement('CREATE INDEX page_crawls_pending_domain_idx ON page_crawls (domain) WHERE outcome IS NULL');
|
||||
DB::statement('CREATE INDEX page_crawls_pending_poll_idx ON page_crawls (scheduled_for, locked_at) WHERE outcome IS NULL');
|
||||
} else {
|
||||
Schema::table('page_crawls', function (Blueprint $table) {
|
||||
$table->index('domain');
|
||||
$table->index(['scheduled_for', 'locked_at']);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('page_crawls');
|
||||
}
|
||||
};
|
||||
56
tests/Unit/Models/PageCrawlFactoryTest.php
Normal file
56
tests/Unit/Models/PageCrawlFactoryTest.php
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Tests\Unit\Models;
|
||||
|
||||
use App\Enums\CrawlOutcomeEnum;
|
||||
use App\Models\Page;
|
||||
use App\Models\PageCrawl;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Tests\TestCase;
|
||||
|
||||
class PageCrawlFactoryTest extends TestCase
|
||||
{
|
||||
use RefreshDatabase;
|
||||
|
||||
public function test_factory_successful_state_produces_success_outcome(): void
|
||||
{
|
||||
$page = Page::factory()->create();
|
||||
$crawl = PageCrawl::factory()->page($page)->successful()->create();
|
||||
|
||||
$this->assertSame(CrawlOutcomeEnum::Success, $crawl->outcome);
|
||||
$this->assertInstanceOf(Carbon::class, $crawl->completed_at);
|
||||
$this->assertNull($crawl->error_message);
|
||||
}
|
||||
|
||||
public function test_factory_failed_state_produces_failed_outcome_with_message(): void
|
||||
{
|
||||
$page = Page::factory()->create();
|
||||
$crawl = PageCrawl::factory()->page($page)->failed('Connection timed out')->create();
|
||||
|
||||
$this->assertSame(CrawlOutcomeEnum::Failed, $crawl->outcome);
|
||||
$this->assertInstanceOf(Carbon::class, $crawl->completed_at);
|
||||
$this->assertSame('Connection timed out', $crawl->error_message);
|
||||
}
|
||||
|
||||
public function test_factory_locked_state_produces_in_flight_crawl(): void
|
||||
{
|
||||
$page = Page::factory()->create();
|
||||
$crawl = PageCrawl::factory()->page($page)->locked()->create();
|
||||
|
||||
$this->assertInstanceOf(Carbon::class, $crawl->locked_at);
|
||||
$this->assertNull($crawl->completed_at);
|
||||
$this->assertNull($crawl->outcome);
|
||||
}
|
||||
|
||||
public function test_factory_scheduled_at_state_overrides_default_scheduled_for(): void
|
||||
{
|
||||
$page = Page::factory()->create();
|
||||
$timestamp = Carbon::parse('2026-05-01 10:00:00');
|
||||
$crawl = PageCrawl::factory()->page($page)->scheduledAt($timestamp)->create();
|
||||
|
||||
$this->assertTrue($timestamp->equalTo($crawl->scheduled_for));
|
||||
}
|
||||
}
|
||||
82
tests/Unit/Models/PageCrawlTest.php
Normal file
82
tests/Unit/Models/PageCrawlTest.php
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Tests\Unit\Models;
|
||||
|
||||
use App\Enums\CrawlOutcomeEnum;
|
||||
use App\Models\Page;
|
||||
use App\Models\PageCrawl;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Tests\TestCase;
|
||||
|
||||
class PageCrawlTest extends TestCase
|
||||
{
|
||||
use RefreshDatabase;
|
||||
|
||||
public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): void
|
||||
{
|
||||
$page = Page::factory()->create(['url' => 'https://example.com/page-1']);
|
||||
|
||||
$scheduledFor = Carbon::parse('2026-05-01 10:00:00');
|
||||
$lockedAt = Carbon::parse('2026-05-01 10:01:00');
|
||||
$completedAt = Carbon::parse('2026-05-01 10:01:05');
|
||||
|
||||
$crawl = PageCrawl::create([
|
||||
'page_id' => $page->id,
|
||||
'domain' => 'example.com',
|
||||
'priority' => 5,
|
||||
'scheduled_for' => $scheduledFor,
|
||||
'locked_at' => $lockedAt,
|
||||
'completed_at' => $completedAt,
|
||||
'outcome' => CrawlOutcomeEnum::Success,
|
||||
'status_code' => 200,
|
||||
'error_message' => null,
|
||||
]);
|
||||
|
||||
$fresh = $crawl->fresh();
|
||||
|
||||
$this->assertNotNull($fresh);
|
||||
|
||||
// domain / priority round-trip
|
||||
$this->assertSame('example.com', $fresh->domain);
|
||||
$this->assertSame(5, $fresh->priority);
|
||||
|
||||
// outcome is cast to the enum
|
||||
$this->assertInstanceOf(CrawlOutcomeEnum::class, $fresh->outcome);
|
||||
$this->assertSame(CrawlOutcomeEnum::Success, $fresh->outcome);
|
||||
|
||||
// datetime casts
|
||||
$this->assertInstanceOf(Carbon::class, $fresh->scheduled_for);
|
||||
$this->assertInstanceOf(Carbon::class, $fresh->locked_at);
|
||||
$this->assertInstanceOf(Carbon::class, $fresh->completed_at);
|
||||
|
||||
$this->assertTrue($scheduledFor->equalTo($fresh->scheduled_for));
|
||||
$this->assertTrue($lockedAt->equalTo($fresh->locked_at));
|
||||
$this->assertTrue($completedAt->equalTo($fresh->completed_at));
|
||||
|
||||
// nullable columns
|
||||
$this->assertNull($fresh->error_message);
|
||||
|
||||
// status_code persists
|
||||
$this->assertSame(200, $fresh->status_code);
|
||||
}
|
||||
|
||||
public function test_page_crawl_belongs_to_a_page(): void
|
||||
{
|
||||
$page = Page::factory()->create(['url' => 'https://example.com/page-2']);
|
||||
|
||||
$crawl = PageCrawl::create([
|
||||
'page_id' => $page->id,
|
||||
'domain' => 'example.com',
|
||||
'priority' => 1,
|
||||
'scheduled_for' => Carbon::now(),
|
||||
]);
|
||||
|
||||
$related = $crawl->page;
|
||||
|
||||
$this->assertInstanceOf(Page::class, $related);
|
||||
$this->assertSame($page->id, $related->id);
|
||||
}
|
||||
}
|
||||
|
|
@ -6,7 +6,9 @@
|
|||
|
||||
use App\Enums\PageStatusEnum;
|
||||
use App\Models\Page;
|
||||
use App\Models\PageCrawl;
|
||||
use App\Models\PageLink;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Lvl0\FediDiscover\Config\InstanceType;
|
||||
use Lvl0\FediDiscover\Models\Instance;
|
||||
|
|
@ -97,6 +99,47 @@ public function test_page_language_is_fillable_and_persists(): void
|
|||
$this->assertNull($unset->fresh()->language);
|
||||
}
|
||||
|
||||
public function test_page_has_many_crawls(): void
|
||||
{
|
||||
$page = Page::factory()->create();
|
||||
$other = Page::factory()->create();
|
||||
|
||||
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||
PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||
PageCrawl::create(['page_id' => $other->id, 'domain' => 'other.com']);
|
||||
|
||||
$crawls = $page->fresh()->crawls;
|
||||
|
||||
$this->assertCount(3, $crawls);
|
||||
foreach ($crawls as $crawl) {
|
||||
$this->assertInstanceOf(PageCrawl::class, $crawl);
|
||||
$this->assertSame($page->id, $crawl->page_id);
|
||||
}
|
||||
}
|
||||
|
||||
public function test_page_latest_crawl_returns_row_with_latest_created_at(): void
|
||||
{
|
||||
$page = Page::factory()->create();
|
||||
|
||||
$old = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||
$old->created_at = Carbon::parse('2026-01-01 08:00:00');
|
||||
$old->save();
|
||||
|
||||
$middle = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com']);
|
||||
$middle->created_at = Carbon::parse('2026-03-15 12:00:00');
|
||||
$middle->save();
|
||||
|
||||
$newest = PageCrawl::create(['page_id' => $page->id, 'domain' => 'example.com', 'error_message' => 'sentinel-latest']);
|
||||
$newest->created_at = Carbon::parse('2026-05-10 18:00:00');
|
||||
$newest->save();
|
||||
|
||||
$latest = $page->fresh()->latestCrawl;
|
||||
|
||||
$this->assertInstanceOf(PageCrawl::class, $latest);
|
||||
$this->assertSame('sentinel-latest', $latest->error_message);
|
||||
}
|
||||
|
||||
public function test_page_status_is_cast_to_enum(): void
|
||||
{
|
||||
$cases = [
|
||||
|
|
|
|||
Loading…
Reference in a new issue