14 - Simplify page_crawls schema (queue moves to Redis)
This commit is contained in:
parent
6c0e1fe12d
commit
118de0023a
6 changed files with 0 additions and 64 deletions
|
|
@ -19,20 +19,16 @@ class PageCrawl extends Model
|
||||||
'page_id',
|
'page_id',
|
||||||
'domain',
|
'domain',
|
||||||
'priority',
|
'priority',
|
||||||
'scheduled_for',
|
|
||||||
'completed_at',
|
'completed_at',
|
||||||
'outcome',
|
'outcome',
|
||||||
'status_code',
|
'status_code',
|
||||||
'error_message',
|
'error_message',
|
||||||
'locked_at',
|
|
||||||
];
|
];
|
||||||
|
|
||||||
protected $casts = [
|
protected $casts = [
|
||||||
'priority' => 'integer',
|
'priority' => 'integer',
|
||||||
'scheduled_for' => 'datetime',
|
|
||||||
'completed_at' => 'datetime',
|
'completed_at' => 'datetime',
|
||||||
'outcome' => CrawlOutcomeEnum::class,
|
'outcome' => CrawlOutcomeEnum::class,
|
||||||
'locked_at' => 'datetime',
|
|
||||||
'status_code' => 'integer',
|
'status_code' => 'integer',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@
|
||||||
use App\Enums\CrawlOutcomeEnum;
|
use App\Enums\CrawlOutcomeEnum;
|
||||||
use App\Models\Page;
|
use App\Models\Page;
|
||||||
use App\Models\PageCrawl;
|
use App\Models\PageCrawl;
|
||||||
use Carbon\Carbon;
|
|
||||||
use Illuminate\Database\Eloquent\Factories\Factory;
|
use Illuminate\Database\Eloquent\Factories\Factory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -21,12 +20,10 @@ public function definition(): array
|
||||||
'page_id' => null,
|
'page_id' => null,
|
||||||
'domain' => 'example.com',
|
'domain' => 'example.com',
|
||||||
'priority' => 0,
|
'priority' => 0,
|
||||||
'scheduled_for' => now(),
|
|
||||||
'completed_at' => null,
|
'completed_at' => null,
|
||||||
'outcome' => null,
|
'outcome' => null,
|
||||||
'status_code' => null,
|
'status_code' => null,
|
||||||
'error_message' => null,
|
'error_message' => null,
|
||||||
'locked_at' => null,
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -53,19 +50,4 @@ public function failed(string $errorMessage): static
|
||||||
'error_message' => $errorMessage,
|
'error_message' => $errorMessage,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function scheduledAt(Carbon $scheduledAt): static
|
|
||||||
{
|
|
||||||
return $this->state(fn () => [
|
|
||||||
'scheduled_for' => $scheduledAt,
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function locked(): static
|
|
||||||
{
|
|
||||||
return $this->state(fn () => [
|
|
||||||
'locked_at' => now(),
|
|
||||||
'outcome' => null,
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
|
|
||||||
use Illuminate\Database\Migrations\Migration;
|
use Illuminate\Database\Migrations\Migration;
|
||||||
use Illuminate\Database\Schema\Blueprint;
|
use Illuminate\Database\Schema\Blueprint;
|
||||||
use Illuminate\Support\Facades\DB;
|
|
||||||
use Illuminate\Support\Facades\Schema;
|
use Illuminate\Support\Facades\Schema;
|
||||||
|
|
||||||
return new class extends Migration
|
return new class extends Migration
|
||||||
|
|
@ -18,8 +17,6 @@ public function up(): void
|
||||||
->cascadeOnDelete();
|
->cascadeOnDelete();
|
||||||
$table->string('domain');
|
$table->string('domain');
|
||||||
$table->smallInteger('priority')->default(0);
|
$table->smallInteger('priority')->default(0);
|
||||||
$table->timestampTz('scheduled_for')->useCurrent();
|
|
||||||
$table->timestampTz('locked_at')->nullable();
|
|
||||||
$table->timestampTz('completed_at')->nullable();
|
$table->timestampTz('completed_at')->nullable();
|
||||||
$table->string('outcome')->nullable();
|
$table->string('outcome')->nullable();
|
||||||
$table->smallInteger('status_code')->nullable();
|
$table->smallInteger('status_code')->nullable();
|
||||||
|
|
@ -28,16 +25,6 @@ public function up(): void
|
||||||
|
|
||||||
$table->index(['page_id', 'created_at']);
|
$table->index(['page_id', 'created_at']);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (DB::getDriverName() === 'pgsql') {
|
|
||||||
DB::statement('CREATE INDEX page_crawls_pending_domain_idx ON page_crawls (domain) WHERE outcome IS NULL');
|
|
||||||
DB::statement('CREATE INDEX page_crawls_pending_poll_idx ON page_crawls (scheduled_for, locked_at) WHERE outcome IS NULL');
|
|
||||||
} else {
|
|
||||||
Schema::table('page_crawls', function (Blueprint $table) {
|
|
||||||
$table->index('domain');
|
|
||||||
$table->index(['scheduled_for', 'locked_at']);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function down(): void
|
public function down(): void
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,6 @@ public function test_creating_a_page_inserts_a_page_crawl_row(): void
|
||||||
|
|
||||||
$crawl = PageCrawl::where('page_id', $page->id)->first();
|
$crawl = PageCrawl::where('page_id', $page->id)->first();
|
||||||
$this->assertNotNull($crawl);
|
$this->assertNotNull($crawl);
|
||||||
$this->assertNotNull($crawl->scheduled_for);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function test_created_page_crawl_has_null_outcome(): void
|
public function test_created_page_crawl_has_null_outcome(): void
|
||||||
|
|
|
||||||
|
|
@ -34,23 +34,4 @@ public function test_factory_failed_state_produces_failed_outcome_with_message()
|
||||||
$this->assertInstanceOf(Carbon::class, $crawl->completed_at);
|
$this->assertInstanceOf(Carbon::class, $crawl->completed_at);
|
||||||
$this->assertSame('Connection timed out', $crawl->error_message);
|
$this->assertSame('Connection timed out', $crawl->error_message);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function test_factory_locked_state_produces_in_flight_crawl(): void
|
|
||||||
{
|
|
||||||
$page = Page::factory()->create();
|
|
||||||
$crawl = PageCrawl::factory()->page($page)->locked()->create();
|
|
||||||
|
|
||||||
$this->assertInstanceOf(Carbon::class, $crawl->locked_at);
|
|
||||||
$this->assertNull($crawl->completed_at);
|
|
||||||
$this->assertNull($crawl->outcome);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function test_factory_scheduled_at_state_overrides_default_scheduled_for(): void
|
|
||||||
{
|
|
||||||
$page = Page::factory()->create();
|
|
||||||
$timestamp = Carbon::parse('2026-05-01 10:00:00');
|
|
||||||
$crawl = PageCrawl::factory()->page($page)->scheduledAt($timestamp)->create();
|
|
||||||
|
|
||||||
$this->assertTrue($timestamp->equalTo($crawl->scheduled_for));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,16 +19,12 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied():
|
||||||
{
|
{
|
||||||
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-1']);
|
$page = Page::factory()->createQuietly(['url' => 'https://example.com/page-1']);
|
||||||
|
|
||||||
$scheduledFor = Carbon::parse('2026-05-01 10:00:00');
|
|
||||||
$lockedAt = Carbon::parse('2026-05-01 10:01:00');
|
|
||||||
$completedAt = Carbon::parse('2026-05-01 10:01:05');
|
$completedAt = Carbon::parse('2026-05-01 10:01:05');
|
||||||
|
|
||||||
$crawl = PageCrawl::create([
|
$crawl = PageCrawl::create([
|
||||||
'page_id' => $page->id,
|
'page_id' => $page->id,
|
||||||
'domain' => 'example.com',
|
'domain' => 'example.com',
|
||||||
'priority' => 5,
|
'priority' => 5,
|
||||||
'scheduled_for' => $scheduledFor,
|
|
||||||
'locked_at' => $lockedAt,
|
|
||||||
'completed_at' => $completedAt,
|
'completed_at' => $completedAt,
|
||||||
'outcome' => CrawlOutcomeEnum::Success,
|
'outcome' => CrawlOutcomeEnum::Success,
|
||||||
'status_code' => 200,
|
'status_code' => 200,
|
||||||
|
|
@ -48,12 +44,8 @@ public function test_page_crawl_fillable_fields_persist_and_casts_are_applied():
|
||||||
$this->assertSame(CrawlOutcomeEnum::Success, $fresh->outcome);
|
$this->assertSame(CrawlOutcomeEnum::Success, $fresh->outcome);
|
||||||
|
|
||||||
// datetime casts
|
// datetime casts
|
||||||
$this->assertInstanceOf(Carbon::class, $fresh->scheduled_for);
|
|
||||||
$this->assertInstanceOf(Carbon::class, $fresh->locked_at);
|
|
||||||
$this->assertInstanceOf(Carbon::class, $fresh->completed_at);
|
$this->assertInstanceOf(Carbon::class, $fresh->completed_at);
|
||||||
|
|
||||||
$this->assertTrue($scheduledFor->equalTo($fresh->scheduled_for));
|
|
||||||
$this->assertTrue($lockedAt->equalTo($fresh->locked_at));
|
|
||||||
$this->assertTrue($completedAt->equalTo($fresh->completed_at));
|
$this->assertTrue($completedAt->equalTo($fresh->completed_at));
|
||||||
|
|
||||||
// nullable columns
|
// nullable columns
|
||||||
|
|
@ -71,7 +63,6 @@ public function test_page_crawl_belongs_to_a_page(): void
|
||||||
'page_id' => $page->id,
|
'page_id' => $page->id,
|
||||||
'domain' => 'example.com',
|
'domain' => 'example.com',
|
||||||
'priority' => 1,
|
'priority' => 1,
|
||||||
'scheduled_for' => Carbon::now(),
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
$related = $crawl->page;
|
$related = $crawl->page;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue