111 lines
4 KiB
PHP
111 lines
4 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace Tests\Unit\Services;
|
|
|
|
use App\Services\UrlService;
|
|
use PHPUnit\Framework\Attributes\DataProvider;
|
|
use Tests\TestCase;
|
|
|
|
class UrlServiceTest extends TestCase
|
|
{
|
|
private UrlService $service;
|
|
|
|
protected function setUp(): void
|
|
{
|
|
parent::setUp();
|
|
|
|
$this->service = new UrlService;
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Happy path — simple URL
|
|
// -------------------------------------------------------------------------
|
|
|
|
public function test_extracts_host_from_simple_url(): void
|
|
{
|
|
$this->assertSame('example.com', $this->service->host('https://example.com'));
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Path, query string, and fragment are ignored
|
|
// -------------------------------------------------------------------------
|
|
|
|
#[DataProvider('urlsWithNoise')]
|
|
public function test_extracts_host_ignoring_path_query_and_fragment(string $url, string $expectedHost): void
|
|
{
|
|
$this->assertSame($expectedHost, $this->service->host($url));
|
|
}
|
|
|
|
public static function urlsWithNoise(): array
|
|
{
|
|
return [
|
|
'path only' => ['https://example.com/some/path', 'example.com'],
|
|
'path and query' => ['https://example.com/page?q=hello&lang=en', 'example.com'],
|
|
'path, query, fragment' => ['https://example.com/page?q=1#section', 'example.com'],
|
|
'http scheme with path' => ['http://news.ycombinator.com/item?id=42', 'news.ycombinator.com'],
|
|
];
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Port number is stripped from the host
|
|
// -------------------------------------------------------------------------
|
|
|
|
public function test_strips_port_from_host(): void
|
|
{
|
|
$this->assertSame('example.com', $this->service->host('https://example.com:8080/path'));
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Host is always returned as lowercase
|
|
// -------------------------------------------------------------------------
|
|
|
|
public function test_lowercases_host(): void
|
|
{
|
|
$this->assertSame('example.com', $this->service->host('https://EXAMPLE.COM/path'));
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Throws on malformed, disallowed, or IP-literal input
|
|
// -------------------------------------------------------------------------
|
|
|
|
#[DataProvider('invalidInputs')]
|
|
public function test_throws_on_invalid_input(string $url): void
|
|
{
|
|
$this->expectException(\InvalidArgumentException::class);
|
|
|
|
$this->service->host($url);
|
|
}
|
|
|
|
public static function invalidInputs(): array
|
|
{
|
|
return [
|
|
// malformed / missing structure
|
|
'empty string' => [''],
|
|
'no scheme' => ['example.com/path'],
|
|
'scheme only' => ['https://'],
|
|
'bare string' => ['not a url at all'],
|
|
|
|
// disallowed schemes
|
|
'javascript scheme' => ['javascript:alert(1)'],
|
|
'ftp scheme' => ['ftp://example.com'],
|
|
'data scheme' => ['data:text/html,<h1>hi</h1>'],
|
|
|
|
// IP literals — not valid page-URL hosts for Trove's purposes
|
|
'ipv4 literal' => ['https://192.168.1.1/path'],
|
|
'ipv6 literal' => ['https://[::1]/path'],
|
|
'ipv4 without path' => ['http://10.0.0.1'],
|
|
|
|
// Embedded credentials (userinfo) — phishing/SSRF flag
|
|
'embedded credentials' => ['https://user:pass@example.com/'],
|
|
'username only' => ['https://user@example.com/'],
|
|
|
|
// IPv6 with zone identifier — zone suffix defeats FILTER_VALIDATE_IP
|
|
'ipv6 with zone' => ['https://[fe80::1%25eth0]/'],
|
|
|
|
// IPv4-mapped IPv6 — FILTER_VALIDATE_IP recognises ::ffff:x.x.x.x as valid IPv6
|
|
'ipv4 mapped ipv6' => ['https://[::ffff:192.0.2.1]/path'],
|
|
];
|
|
}
|
|
}
|