From 118de0023ae95e26d02efd74ef03aa6ffd91e996 Mon Sep 17 00:00:00 2001 From: myrmidex Date: Sun, 26 Apr 2026 20:58:07 +0200 Subject: [PATCH] 14 - Simplify page_crawls schema (queue moves to Redis) --- app/Models/PageCrawl.php | 4 ---- database/factories/PageCrawlFactory.php | 18 ------------------ ..._04_26_111140_create_page_crawls_table.php | 13 ------------- tests/Feature/PageQueuePopulationTest.php | 1 - tests/Unit/Models/PageCrawlFactoryTest.php | 19 ------------------- tests/Unit/Models/PageCrawlTest.php | 9 --------- 6 files changed, 64 deletions(-) diff --git a/app/Models/PageCrawl.php b/app/Models/PageCrawl.php index 8568bee..aa6f77a 100644 --- a/app/Models/PageCrawl.php +++ b/app/Models/PageCrawl.php @@ -19,20 +19,16 @@ class PageCrawl extends Model 'page_id', 'domain', 'priority', - 'scheduled_for', 'completed_at', 'outcome', 'status_code', 'error_message', - 'locked_at', ]; protected $casts = [ 'priority' => 'integer', - 'scheduled_for' => 'datetime', 'completed_at' => 'datetime', 'outcome' => CrawlOutcomeEnum::class, - 'locked_at' => 'datetime', 'status_code' => 'integer', ]; diff --git a/database/factories/PageCrawlFactory.php b/database/factories/PageCrawlFactory.php index 80c6f7c..cdd6289 100644 --- a/database/factories/PageCrawlFactory.php +++ b/database/factories/PageCrawlFactory.php @@ -7,7 +7,6 @@ use App\Enums\CrawlOutcomeEnum; use App\Models\Page; use App\Models\PageCrawl; -use Carbon\Carbon; use Illuminate\Database\Eloquent\Factories\Factory; /** @@ -21,12 +20,10 @@ public function definition(): array 'page_id' => null, 'domain' => 'example.com', 'priority' => 0, - 'scheduled_for' => now(), 'completed_at' => null, 'outcome' => null, 'status_code' => null, 'error_message' => null, - 'locked_at' => null, ]; } @@ -53,19 +50,4 @@ public function failed(string $errorMessage): static 'error_message' => $errorMessage, ]); } - - public function scheduledAt(Carbon $scheduledAt): static - { - return $this->state(fn () => [ - 'scheduled_for' => $scheduledAt, - ]); - } - - public function locked(): static - { - return $this->state(fn () => [ - 'locked_at' => now(), - 'outcome' => null, - ]); - } } diff --git a/database/migrations/2026_04_26_111140_create_page_crawls_table.php b/database/migrations/2026_04_26_111140_create_page_crawls_table.php index b423f25..9e18d9a 100644 --- a/database/migrations/2026_04_26_111140_create_page_crawls_table.php +++ b/database/migrations/2026_04_26_111140_create_page_crawls_table.php @@ -4,7 +4,6 @@ use Illuminate\Database\Migrations\Migration; use Illuminate\Database\Schema\Blueprint; -use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\Schema; return new class extends Migration @@ -18,8 +17,6 @@ public function up(): void ->cascadeOnDelete(); $table->string('domain'); $table->smallInteger('priority')->default(0); - $table->timestampTz('scheduled_for')->useCurrent(); - $table->timestampTz('locked_at')->nullable(); $table->timestampTz('completed_at')->nullable(); $table->string('outcome')->nullable(); $table->smallInteger('status_code')->nullable(); @@ -28,16 +25,6 @@ public function up(): void $table->index(['page_id', 'created_at']); }); - - if (DB::getDriverName() === 'pgsql') { - DB::statement('CREATE INDEX page_crawls_pending_domain_idx ON page_crawls (domain) WHERE outcome IS NULL'); - DB::statement('CREATE INDEX page_crawls_pending_poll_idx ON page_crawls (scheduled_for, locked_at) WHERE outcome IS NULL'); - } else { - Schema::table('page_crawls', function (Blueprint $table) { - $table->index('domain'); - $table->index(['scheduled_for', 'locked_at']); - }); - } } public function down(): void diff --git a/tests/Feature/PageQueuePopulationTest.php b/tests/Feature/PageQueuePopulationTest.php index 4204799..6addcd0 100644 --- a/tests/Feature/PageQueuePopulationTest.php +++ b/tests/Feature/PageQueuePopulationTest.php @@ -30,7 +30,6 @@ public function test_creating_a_page_inserts_a_page_crawl_row(): void $crawl = PageCrawl::where('page_id', $page->id)->first(); $this->assertNotNull($crawl); - $this->assertNotNull($crawl->scheduled_for); } public function test_created_page_crawl_has_null_outcome(): void diff --git a/tests/Unit/Models/PageCrawlFactoryTest.php b/tests/Unit/Models/PageCrawlFactoryTest.php index 65d29cc..21990fa 100644 --- a/tests/Unit/Models/PageCrawlFactoryTest.php +++ b/tests/Unit/Models/PageCrawlFactoryTest.php @@ -34,23 +34,4 @@ public function test_factory_failed_state_produces_failed_outcome_with_message() $this->assertInstanceOf(Carbon::class, $crawl->completed_at); $this->assertSame('Connection timed out', $crawl->error_message); } - - public function test_factory_locked_state_produces_in_flight_crawl(): void - { - $page = Page::factory()->create(); - $crawl = PageCrawl::factory()->page($page)->locked()->create(); - - $this->assertInstanceOf(Carbon::class, $crawl->locked_at); - $this->assertNull($crawl->completed_at); - $this->assertNull($crawl->outcome); - } - - public function test_factory_scheduled_at_state_overrides_default_scheduled_for(): void - { - $page = Page::factory()->create(); - $timestamp = Carbon::parse('2026-05-01 10:00:00'); - $crawl = PageCrawl::factory()->page($page)->scheduledAt($timestamp)->create(); - - $this->assertTrue($timestamp->equalTo($crawl->scheduled_for)); - } } diff --git a/tests/Unit/Models/PageCrawlTest.php b/tests/Unit/Models/PageCrawlTest.php index 4c19294..73fdad0 100644 --- a/tests/Unit/Models/PageCrawlTest.php +++ b/tests/Unit/Models/PageCrawlTest.php @@ -19,16 +19,12 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): { $page = Page::factory()->createQuietly(['url' => 'https://example.com/page-1']); - $scheduledFor = Carbon::parse('2026-05-01 10:00:00'); - $lockedAt = Carbon::parse('2026-05-01 10:01:00'); $completedAt = Carbon::parse('2026-05-01 10:01:05'); $crawl = PageCrawl::create([ 'page_id' => $page->id, 'domain' => 'example.com', 'priority' => 5, - 'scheduled_for' => $scheduledFor, - 'locked_at' => $lockedAt, 'completed_at' => $completedAt, 'outcome' => CrawlOutcomeEnum::Success, 'status_code' => 200, @@ -48,12 +44,8 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied(): $this->assertSame(CrawlOutcomeEnum::Success, $fresh->outcome); // datetime casts - $this->assertInstanceOf(Carbon::class, $fresh->scheduled_for); - $this->assertInstanceOf(Carbon::class, $fresh->locked_at); $this->assertInstanceOf(Carbon::class, $fresh->completed_at); - $this->assertTrue($scheduledFor->equalTo($fresh->scheduled_for)); - $this->assertTrue($lockedAt->equalTo($fresh->locked_at)); $this->assertTrue($completedAt->equalTo($fresh->completed_at)); // nullable columns @@ -71,7 +63,6 @@ public function test_page_crawl_belongs_to_a_page(): void 'page_id' => $page->id, 'domain' => 'example.com', 'priority' => 1, - 'scheduled_for' => Carbon::now(), ]); $related = $crawl->page;