14 - Simplify page_crawls schema (queue moves to Redis)

This commit is contained in:
myrmidex 2026-04-26 20:58:07 +02:00
parent 6c0e1fe12d
commit 118de0023a
6 changed files with 0 additions and 64 deletions

View file

@ -19,20 +19,16 @@ class PageCrawl extends Model
'page_id',
'domain',
'priority',
'scheduled_for',
'completed_at',
'outcome',
'status_code',
'error_message',
'locked_at',
];
protected $casts = [
'priority' => 'integer',
'scheduled_for' => 'datetime',
'completed_at' => 'datetime',
'outcome' => CrawlOutcomeEnum::class,
'locked_at' => 'datetime',
'status_code' => 'integer',
];

View file

@ -7,7 +7,6 @@
use App\Enums\CrawlOutcomeEnum;
use App\Models\Page;
use App\Models\PageCrawl;
use Carbon\Carbon;
use Illuminate\Database\Eloquent\Factories\Factory;
/**
@ -21,12 +20,10 @@ public function definition(): array
'page_id' => null,
'domain' => 'example.com',
'priority' => 0,
'scheduled_for' => now(),
'completed_at' => null,
'outcome' => null,
'status_code' => null,
'error_message' => null,
'locked_at' => null,
];
}
@ -53,19 +50,4 @@ public function failed(string $errorMessage): static
'error_message' => $errorMessage,
]);
}
public function scheduledAt(Carbon $scheduledAt): static
{
return $this->state(fn () => [
'scheduled_for' => $scheduledAt,
]);
}
public function locked(): static
{
return $this->state(fn () => [
'locked_at' => now(),
'outcome' => null,
]);
}
}

View file

@ -4,7 +4,6 @@
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
@ -18,8 +17,6 @@ public function up(): void
->cascadeOnDelete();
$table->string('domain');
$table->smallInteger('priority')->default(0);
$table->timestampTz('scheduled_for')->useCurrent();
$table->timestampTz('locked_at')->nullable();
$table->timestampTz('completed_at')->nullable();
$table->string('outcome')->nullable();
$table->smallInteger('status_code')->nullable();
@ -28,16 +25,6 @@ public function up(): void
$table->index(['page_id', 'created_at']);
});
if (DB::getDriverName() === 'pgsql') {
DB::statement('CREATE INDEX page_crawls_pending_domain_idx ON page_crawls (domain) WHERE outcome IS NULL');
DB::statement('CREATE INDEX page_crawls_pending_poll_idx ON page_crawls (scheduled_for, locked_at) WHERE outcome IS NULL');
} else {
Schema::table('page_crawls', function (Blueprint $table) {
$table->index('domain');
$table->index(['scheduled_for', 'locked_at']);
});
}
}
public function down(): void

View file

@ -30,7 +30,6 @@ public function test_creating_a_page_inserts_a_page_crawl_row(): void
$crawl = PageCrawl::where('page_id', $page->id)->first();
$this->assertNotNull($crawl);
$this->assertNotNull($crawl->scheduled_for);
}
public function test_created_page_crawl_has_null_outcome(): void

View file

@ -34,23 +34,4 @@ public function test_factory_failed_state_produces_failed_outcome_with_message()
$this->assertInstanceOf(Carbon::class, $crawl->completed_at);
$this->assertSame('Connection timed out', $crawl->error_message);
}
public function test_factory_locked_state_produces_in_flight_crawl(): void
{
$page = Page::factory()->create();
$crawl = PageCrawl::factory()->page($page)->locked()->create();
$this->assertInstanceOf(Carbon::class, $crawl->locked_at);
$this->assertNull($crawl->completed_at);
$this->assertNull($crawl->outcome);
}
public function test_factory_scheduled_at_state_overrides_default_scheduled_for(): void
{
$page = Page::factory()->create();
$timestamp = Carbon::parse('2026-05-01 10:00:00');
$crawl = PageCrawl::factory()->page($page)->scheduledAt($timestamp)->create();
$this->assertTrue($timestamp->equalTo($crawl->scheduled_for));
}
}

View file

@ -19,16 +19,12 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied():
{
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-1']);
$scheduledFor = Carbon::parse('2026-05-01 10:00:00');
$lockedAt = Carbon::parse('2026-05-01 10:01:00');
$completedAt = Carbon::parse('2026-05-01 10:01:05');
$crawl = PageCrawl::create([
'page_id' => $page->id,
'domain' => 'example.com',
'priority' => 5,
'scheduled_for' => $scheduledFor,
'locked_at' => $lockedAt,
'completed_at' => $completedAt,
'outcome' => CrawlOutcomeEnum::Success,
'status_code' => 200,
@ -48,12 +44,8 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied():
$this->assertSame(CrawlOutcomeEnum::Success, $fresh->outcome);
// datetime casts
$this->assertInstanceOf(Carbon::class, $fresh->scheduled_for);
$this->assertInstanceOf(Carbon::class, $fresh->locked_at);
$this->assertInstanceOf(Carbon::class, $fresh->completed_at);
$this->assertTrue($scheduledFor->equalTo($fresh->scheduled_for));
$this->assertTrue($lockedAt->equalTo($fresh->locked_at));
$this->assertTrue($completedAt->equalTo($fresh->completed_at));
// nullable columns
@ -71,7 +63,6 @@ public function test_page_crawl_belongs_to_a_page(): void
'page_id' => $page->id,
'domain' => 'example.com',
'priority' => 1,
'scheduled_for' => Carbon::now(),
]);
$related = $crawl->page;