trove/tests/Unit/Services/PolitenessServiceTest.php
myrmidex cda1414cd8
Some checks failed
CI / ci (push) Failing after 3h0m1s
CI / ci (pull_request) Has been cancelled
9 - Add robots.txt handling with cache and politeness integration
2026-04-27 23:53:52 +02:00

56 lines
1.9 KiB
PHP

<?php
declare(strict_types=1);
namespace Tests\Unit\Services;
use App\Services\PolitenessService;
use Illuminate\Support\Facades\Http;
use Tests\TestCase;
class PolitenessServiceTest extends TestCase
{
public function test_min_delay_for_returns_config_default(): void
{
$this->assertSame(10, (new PolitenessService)->minDelayFor('example.com'));
}
public function test_min_delay_for_respects_config_override(): void
{
config()->set('crawler.min_domain_delay_seconds', 30);
$this->assertSame(30, (new PolitenessService)->minDelayFor('example.com'));
}
public function test_min_delay_for_uses_robots_crawl_delay_when_higher(): void
{
Http::fake([
'https://example.com/robots.txt' => Http::response(
// Spatie does exact-token matching (lowercased), so the fixture UA
// must match the full string the service passes to crawlDelayFor().
"User-agent: TroveBot/0.1 (+https://trove.lvl0.xyz/bot)\nCrawl-delay: 30",
200,
),
]);
config()->set('crawler.min_domain_delay_seconds', 10);
config()->set('crawler.user_agent', 'TroveBot/0.1 (+https://trove.lvl0.xyz/bot)');
$this->assertSame(30, app(PolitenessService::class)->minDelayFor('example.com'));
}
public function test_min_delay_for_uses_config_when_higher_than_robots(): void
{
Http::fake([
'https://example.com/robots.txt' => Http::response(
"User-agent: TroveBot/0.1 (+https://trove.lvl0.xyz/bot)\nCrawl-delay: 10",
200,
),
]);
config()->set('crawler.min_domain_delay_seconds', 60);
config()->set('crawler.user_agent', 'TroveBot/0.1 (+https://trove.lvl0.xyz/bot)');
$this->assertSame(60, app(PolitenessService::class)->minDelayFor('example.com'));
}
}