From 81209125a10b564858fbdb72db4c060bf18fef89 Mon Sep 17 00:00:00 2001 From: myrmidex Date: Sun, 26 Apr 2026 14:52:40 +0200 Subject: [PATCH] 8 - Add UrlService with host extraction method --- app/Services/UrlService.php | 35 +++++++++ tests/Unit/Services/UrlServiceTest.php | 101 +++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 app/Services/UrlService.php create mode 100644 tests/Unit/Services/UrlServiceTest.php diff --git a/app/Services/UrlService.php b/app/Services/UrlService.php new file mode 100644 index 0000000..287f8fa --- /dev/null +++ b/app/Services/UrlService.php @@ -0,0 +1,35 @@ +scheme(); + if ($scheme === null || $scheme === '') { + throw new InvalidArgumentException("URL has no scheme: {$url}"); + } + if (! in_array($scheme, ['http', 'https'], true)) { + throw new InvalidArgumentException("Invalid URL scheme: {$scheme}"); + } + + $host = $uri->host(); + if ($host === null || $host === '') { + throw new InvalidArgumentException("URL has no host: {$url}"); + } + + if (filter_var(trim($host, '[]'), FILTER_VALIDATE_IP) !== false) { + throw new InvalidArgumentException("IP literal hosts not allowed: {$host}"); + } + + return strtolower($host); + } +} diff --git a/tests/Unit/Services/UrlServiceTest.php b/tests/Unit/Services/UrlServiceTest.php new file mode 100644 index 0000000..0632766 --- /dev/null +++ b/tests/Unit/Services/UrlServiceTest.php @@ -0,0 +1,101 @@ +service = new UrlService; + } + + // ------------------------------------------------------------------------- + // Happy path — simple URL + // ------------------------------------------------------------------------- + + public function test_extracts_host_from_simple_url(): void + { + $this->assertSame('example.com', $this->service->host('https://example.com')); + } + + // ------------------------------------------------------------------------- + // Path, query string, and fragment are ignored + // ------------------------------------------------------------------------- + + #[DataProvider('urlsWithNoise')] + public function test_extracts_host_ignoring_path_query_and_fragment(string $url, string $expectedHost): void + { + $this->assertSame($expectedHost, $this->service->host($url)); + } + + public static function urlsWithNoise(): array + { + return [ + 'path only' => ['https://example.com/some/path', 'example.com'], + 'path and query' => ['https://example.com/page?q=hello&lang=en', 'example.com'], + 'path, query, fragment' => ['https://example.com/page?q=1#section', 'example.com'], + 'http scheme with path' => ['http://news.ycombinator.com/item?id=42', 'news.ycombinator.com'], + ]; + } + + // ------------------------------------------------------------------------- + // Port number is stripped from the host + // ------------------------------------------------------------------------- + + public function test_strips_port_from_host(): void + { + $this->assertSame('example.com', $this->service->host('https://example.com:8080/path')); + } + + // ------------------------------------------------------------------------- + // Host is always returned as lowercase + // ------------------------------------------------------------------------- + + public function test_lowercases_host(): void + { + $this->assertSame('example.com', $this->service->host('https://EXAMPLE.COM/path')); + } + + // ------------------------------------------------------------------------- + // Throws on malformed, disallowed, or IP-literal input + // ------------------------------------------------------------------------- + + #[DataProvider('invalidInputs')] + public function test_throws_on_invalid_input(string $url): void + { + $this->expectException(\InvalidArgumentException::class); + + $this->service->host($url); + } + + public static function invalidInputs(): array + { + return [ + // malformed / missing structure + 'empty string' => [''], + 'no scheme' => ['example.com/path'], + 'scheme only' => ['https://'], + 'bare string' => ['not a url at all'], + + // disallowed schemes + 'javascript scheme' => ['javascript:alert(1)'], + 'ftp scheme' => ['ftp://example.com'], + 'data scheme' => ['data:text/html,

hi

'], + + // IP literals — not valid page-URL hosts for Trove's purposes + 'ipv4 literal' => ['https://192.168.1.1/path'], + 'ipv6 literal' => ['https://[::1]/path'], + 'ipv4 without path' => ['http://10.0.0.1'], + ]; + } +}