8 - Add UrlService with host extraction method

This commit is contained in:
myrmidex 2026-04-26 14:52:40 +02:00
parent f2c1fab4e4
commit 81209125a1
2 changed files with 136 additions and 0 deletions

View file

@ -0,0 +1,35 @@
<?php
declare(strict_types=1);
namespace App\Services;
use Illuminate\Support\Uri;
use InvalidArgumentException;
class UrlService
{
public function host(string $url): string
{
$uri = Uri::of($url);
$scheme = $uri->scheme();
if ($scheme === null || $scheme === '') {
throw new InvalidArgumentException("URL has no scheme: {$url}");
}
if (! in_array($scheme, ['http', 'https'], true)) {
throw new InvalidArgumentException("Invalid URL scheme: {$scheme}");
}
$host = $uri->host();
if ($host === null || $host === '') {
throw new InvalidArgumentException("URL has no host: {$url}");
}
if (filter_var(trim($host, '[]'), FILTER_VALIDATE_IP) !== false) {
throw new InvalidArgumentException("IP literal hosts not allowed: {$host}");
}
return strtolower($host);
}
}

View file

@ -0,0 +1,101 @@
<?php
declare(strict_types=1);
namespace Tests\Unit\Services;
use App\Services\UrlService;
use PHPUnit\Framework\Attributes\DataProvider;
use Tests\TestCase;
class UrlServiceTest extends TestCase
{
private UrlService $service;
protected function setUp(): void
{
parent::setUp();
$this->service = new UrlService;
}
// -------------------------------------------------------------------------
// Happy path — simple URL
// -------------------------------------------------------------------------
public function test_extracts_host_from_simple_url(): void
{
$this->assertSame('example.com', $this->service->host('https://example.com'));
}
// -------------------------------------------------------------------------
// Path, query string, and fragment are ignored
// -------------------------------------------------------------------------
#[DataProvider('urlsWithNoise')]
public function test_extracts_host_ignoring_path_query_and_fragment(string $url, string $expectedHost): void
{
$this->assertSame($expectedHost, $this->service->host($url));
}
public static function urlsWithNoise(): array
{
return [
'path only' => ['https://example.com/some/path', 'example.com'],
'path and query' => ['https://example.com/page?q=hello&lang=en', 'example.com'],
'path, query, fragment' => ['https://example.com/page?q=1#section', 'example.com'],
'http scheme with path' => ['http://news.ycombinator.com/item?id=42', 'news.ycombinator.com'],
];
}
// -------------------------------------------------------------------------
// Port number is stripped from the host
// -------------------------------------------------------------------------
public function test_strips_port_from_host(): void
{
$this->assertSame('example.com', $this->service->host('https://example.com:8080/path'));
}
// -------------------------------------------------------------------------
// Host is always returned as lowercase
// -------------------------------------------------------------------------
public function test_lowercases_host(): void
{
$this->assertSame('example.com', $this->service->host('https://EXAMPLE.COM/path'));
}
// -------------------------------------------------------------------------
// Throws on malformed, disallowed, or IP-literal input
// -------------------------------------------------------------------------
#[DataProvider('invalidInputs')]
public function test_throws_on_invalid_input(string $url): void
{
$this->expectException(\InvalidArgumentException::class);
$this->service->host($url);
}
public static function invalidInputs(): array
{
return [
// malformed / missing structure
'empty string' => [''],
'no scheme' => ['example.com/path'],
'scheme only' => ['https://'],
'bare string' => ['not a url at all'],
// disallowed schemes
'javascript scheme' => ['javascript:alert(1)'],
'ftp scheme' => ['ftp://example.com'],
'data scheme' => ['data:text/html,<h1>hi</h1>'],
// IP literals — not valid page-URL hosts for Trove's purposes
'ipv4 literal' => ['https://192.168.1.1/path'],
'ipv6 literal' => ['https://[::1]/path'],
'ipv4 without path' => ['http://10.0.0.1'],
];
}
}