service = new UrlService; } // ------------------------------------------------------------------------- // Happy path — simple URL // ------------------------------------------------------------------------- public function test_extracts_host_from_simple_url(): void { $this->assertSame('example.com', $this->service->host('https://example.com')); } // ------------------------------------------------------------------------- // Path, query string, and fragment are ignored // ------------------------------------------------------------------------- #[DataProvider('urlsWithNoise')] public function test_extracts_host_ignoring_path_query_and_fragment(string $url, string $expectedHost): void { $this->assertSame($expectedHost, $this->service->host($url)); } public static function urlsWithNoise(): array { return [ 'path only' => ['https://example.com/some/path', 'example.com'], 'path and query' => ['https://example.com/page?q=hello&lang=en', 'example.com'], 'path, query, fragment' => ['https://example.com/page?q=1#section', 'example.com'], 'http scheme with path' => ['http://news.ycombinator.com/item?id=42', 'news.ycombinator.com'], ]; } // ------------------------------------------------------------------------- // Port number is stripped from the host // ------------------------------------------------------------------------- public function test_strips_port_from_host(): void { $this->assertSame('example.com', $this->service->host('https://example.com:8080/path')); } // ------------------------------------------------------------------------- // Host is always returned as lowercase // ------------------------------------------------------------------------- public function test_lowercases_host(): void { $this->assertSame('example.com', $this->service->host('https://EXAMPLE.COM/path')); } // ------------------------------------------------------------------------- // Throws on malformed, disallowed, or IP-literal input // ------------------------------------------------------------------------- #[DataProvider('invalidInputs')] public function test_throws_on_invalid_input(string $url): void { $this->expectException(\InvalidArgumentException::class); $this->service->host($url); } public static function invalidInputs(): array { return [ // malformed / missing structure 'empty string' => [''], 'no scheme' => ['example.com/path'], 'scheme only' => ['https://'], 'bare string' => ['not a url at all'], // disallowed schemes 'javascript scheme' => ['javascript:alert(1)'], 'ftp scheme' => ['ftp://example.com'], 'data scheme' => ['data:text/html,

hi

'], // IP literals — not valid page-URL hosts for Trove's purposes 'ipv4 literal' => ['https://192.168.1.1/path'], 'ipv6 literal' => ['https://[::1]/path'], 'ipv4 without path' => ['http://10.0.0.1'], // Embedded credentials (userinfo) — phishing/SSRF flag 'embedded credentials' => ['https://user:pass@example.com/'], 'username only' => ['https://user@example.com/'], // IPv6 with zone identifier — zone suffix defeats FILTER_VALIDATE_IP 'ipv6 with zone' => ['https://[fe80::1%25eth0]/'], // IPv4-mapped IPv6 — FILTER_VALIDATE_IP recognises ::ffff:x.x.x.x as valid IPv6 'ipv4 mapped ipv6' => ['https://[::ffff:192.0.2.1]/path'], ]; } }