8 - Tighten UrlService validation and add observer integration tests
This commit is contained in:
parent
de14ae3ad4
commit
6f75be7328
4 changed files with 56 additions and 2 deletions
|
|
@ -21,15 +21,20 @@ public function host(string $url): string
|
|||
throw new InvalidArgumentException("Invalid URL scheme: {$scheme}");
|
||||
}
|
||||
|
||||
if ($uri->user() !== null) {
|
||||
throw new InvalidArgumentException("URLs with embedded credentials not allowed: {$url}");
|
||||
}
|
||||
|
||||
$host = $uri->host();
|
||||
if ($host === null || $host === '') {
|
||||
throw new InvalidArgumentException("URL has no host: {$url}");
|
||||
}
|
||||
|
||||
if (filter_var(trim($host, '[]'), FILTER_VALIDATE_IP) !== false) {
|
||||
$bareHost = preg_replace('/%.*$/', '', trim($host, '[]'));
|
||||
if (filter_var($bareHost, FILTER_VALIDATE_IP) !== false) {
|
||||
throw new InvalidArgumentException("IP literal hosts not allowed: {$host}");
|
||||
}
|
||||
|
||||
return strtolower($host);
|
||||
return mb_strtolower($host);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,6 +119,30 @@ public function test_listener_with_null_post_url_creates_only_target_page(): voi
|
|||
$this->assertSame($instance->id, $targetPage->instance_id);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integration — UrlDiscovered event enqueues crawls for both pages via observer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
public function test_url_discovered_event_enqueues_crawls_via_observer(): void
|
||||
{
|
||||
$instance = $this->makeInstance();
|
||||
|
||||
$event = new UrlDiscovered(
|
||||
url: 'https://example-blog.com/article',
|
||||
instanceId: $instance->id,
|
||||
discoveredAt: CarbonImmutable::parse('2026-04-26T12:00:00Z'),
|
||||
postUrl: 'https://mastodon.social/@alice/109876543210',
|
||||
postBody: 'check this out https://example-blog.com/article',
|
||||
);
|
||||
|
||||
event($event);
|
||||
|
||||
// Listener creates 2 pages (target + source); observer fires for each → 2 crawl rows
|
||||
$this->assertDatabaseCount('page_crawls', 2);
|
||||
$this->assertDatabaseHas('page_crawls', ['domain' => 'example-blog.com']);
|
||||
$this->assertDatabaseHas('page_crawls', ['domain' => 'mastodon.social']);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 12 — listener is queued, not run inline
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -118,6 +118,21 @@ public static function invalidUrls(): array
|
|||
];
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Integration — form submission enqueues a crawl via PageObserver
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
public function test_url_submission_form_enqueues_crawl_via_observer(): void
|
||||
{
|
||||
Livewire::test(UrlSubmissionForm::class)
|
||||
->set('url', 'https://example.com/article')
|
||||
->call('submit')
|
||||
->assertHasNoErrors();
|
||||
|
||||
$this->assertDatabaseCount('page_crawls', 1);
|
||||
$this->assertDatabaseHas('page_crawls', ['domain' => 'example.com']);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Test 7 — rate limit blocks the 11th submission within a minute
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -96,6 +96,16 @@ public static function invalidInputs(): array
|
|||
'ipv4 literal' => ['https://192.168.1.1/path'],
|
||||
'ipv6 literal' => ['https://[::1]/path'],
|
||||
'ipv4 without path' => ['http://10.0.0.1'],
|
||||
|
||||
// Embedded credentials (userinfo) — phishing/SSRF flag
|
||||
'embedded credentials' => ['https://user:pass@example.com/'],
|
||||
'username only' => ['https://user@example.com/'],
|
||||
|
||||
// IPv6 with zone identifier — zone suffix defeats FILTER_VALIDATE_IP
|
||||
'ipv6 with zone' => ['https://[fe80::1%25eth0]/'],
|
||||
|
||||
// IPv4-mapped IPv6 — FILTER_VALIDATE_IP recognises ::ffff:x.x.x.x as valid IPv6
|
||||
'ipv4 mapped ipv6' => ['https://[::ffff:192.0.2.1]/path'],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue