8 - Tighten UrlService validation and add observer integration tests

This commit is contained in:
myrmidex 2026-04-26 16:09:28 +02:00
parent de14ae3ad4
commit 6f75be7328
4 changed files with 56 additions and 2 deletions

View file

@ -21,15 +21,20 @@ public function host(string $url): string
throw new InvalidArgumentException("Invalid URL scheme: {$scheme}");
}
if ($uri->user() !== null) {
throw new InvalidArgumentException("URLs with embedded credentials not allowed: {$url}");
}
$host = $uri->host();
if ($host === null || $host === '') {
throw new InvalidArgumentException("URL has no host: {$url}");
}
if (filter_var(trim($host, '[]'), FILTER_VALIDATE_IP) !== false) {
$bareHost = preg_replace('/%.*$/', '', trim($host, '[]'));
if (filter_var($bareHost, FILTER_VALIDATE_IP) !== false) {
throw new InvalidArgumentException("IP literal hosts not allowed: {$host}");
}
return strtolower($host);
return mb_strtolower($host);
}
}

View file

@ -119,6 +119,30 @@ public function test_listener_with_null_post_url_creates_only_target_page(): voi
$this->assertSame($instance->id, $targetPage->instance_id);
}
// ---------------------------------------------------------------------------
// Integration — UrlDiscovered event enqueues crawls for both pages via observer
// ---------------------------------------------------------------------------
public function test_url_discovered_event_enqueues_crawls_via_observer(): void
{
$instance = $this->makeInstance();
$event = new UrlDiscovered(
url: 'https://example-blog.com/article',
instanceId: $instance->id,
discoveredAt: CarbonImmutable::parse('2026-04-26T12:00:00Z'),
postUrl: 'https://mastodon.social/@alice/109876543210',
postBody: 'check this out https://example-blog.com/article',
);
event($event);
// Listener creates 2 pages (target + source); observer fires for each → 2 crawl rows
$this->assertDatabaseCount('page_crawls', 2);
$this->assertDatabaseHas('page_crawls', ['domain' => 'example-blog.com']);
$this->assertDatabaseHas('page_crawls', ['domain' => 'mastodon.social']);
}
// ---------------------------------------------------------------------------
// Test 12 — listener is queued, not run inline
// ---------------------------------------------------------------------------

View file

@ -118,6 +118,21 @@ public static function invalidUrls(): array
];
}
// -------------------------------------------------------------------------
// Integration — form submission enqueues a crawl via PageObserver
// -------------------------------------------------------------------------
public function test_url_submission_form_enqueues_crawl_via_observer(): void
{
Livewire::test(UrlSubmissionForm::class)
->set('url', 'https://example.com/article')
->call('submit')
->assertHasNoErrors();
$this->assertDatabaseCount('page_crawls', 1);
$this->assertDatabaseHas('page_crawls', ['domain' => 'example.com']);
}
// -------------------------------------------------------------------------
// Test 7 — rate limit blocks the 11th submission within a minute
// -------------------------------------------------------------------------

View file

@ -96,6 +96,16 @@ public static function invalidInputs(): array
'ipv4 literal' => ['https://192.168.1.1/path'],
'ipv6 literal' => ['https://[::1]/path'],
'ipv4 without path' => ['http://10.0.0.1'],
// Embedded credentials (userinfo) — phishing/SSRF flag
'embedded credentials' => ['https://user:pass@example.com/'],
'username only' => ['https://user@example.com/'],
// IPv6 with zone identifier — zone suffix defeats FILTER_VALIDATE_IP
'ipv6 with zone' => ['https://[fe80::1%25eth0]/'],
// IPv4-mapped IPv6 — FILTER_VALIDATE_IP recognises ::ffff:x.x.x.x as valid IPv6
'ipv4 mapped ipv6' => ['https://[::ffff:192.0.2.1]/path'],
];
}
}