97 lines
2.6 KiB
PHP
97 lines
2.6 KiB
PHP
|
|
<?php
|
||
|
|
|
||
|
|
declare(strict_types=1);
|
||
|
|
|
||
|
|
namespace Tests\Unit\Services;
|
||
|
|
|
||
|
|
use App\Services\RobotsService;
|
||
|
|
use Illuminate\Support\Facades\Http;
|
||
|
|
use Tests\TestCase;
|
||
|
|
|
||
|
|
class RobotsServiceTest extends TestCase
|
||
|
|
{
|
||
|
|
public function test_is_allowed_returns_true_when_robots_txt_allows_path(): void
|
||
|
|
{
|
||
|
|
Http::fake([
|
||
|
|
'https://example.com/robots.txt' => Http::response(
|
||
|
|
"User-agent: *\nAllow: /",
|
||
|
|
200,
|
||
|
|
),
|
||
|
|
]);
|
||
|
|
|
||
|
|
$service = app(RobotsService::class);
|
||
|
|
|
||
|
|
$this->assertTrue($service->isAllowed('https://example.com/article', 'TroveBot/0.1'));
|
||
|
|
}
|
||
|
|
|
||
|
|
public function test_is_allowed_returns_false_when_robots_txt_disallows_path(): void
|
||
|
|
{
|
||
|
|
Http::fake([
|
||
|
|
'https://example.com/robots.txt' => Http::response(
|
||
|
|
"User-agent: *\nDisallow: /",
|
||
|
|
200,
|
||
|
|
),
|
||
|
|
]);
|
||
|
|
|
||
|
|
$service = app(RobotsService::class);
|
||
|
|
|
||
|
|
$this->assertFalse($service->isAllowed('https://example.com/article', 'TroveBot/0.1'));
|
||
|
|
}
|
||
|
|
|
||
|
|
public function test_is_allowed_returns_true_when_robots_txt_fetch_fails(): void
|
||
|
|
{
|
||
|
|
Http::fake([
|
||
|
|
'https://example.com/robots.txt' => Http::response('', 500),
|
||
|
|
]);
|
||
|
|
|
||
|
|
$service = app(RobotsService::class);
|
||
|
|
|
||
|
|
$this->assertTrue($service->isAllowed('https://example.com/article', 'TroveBot/0.1'));
|
||
|
|
}
|
||
|
|
|
||
|
|
public function test_is_allowed_caches_robots_txt_body_per_host(): void
|
||
|
|
{
|
||
|
|
Http::fake([
|
||
|
|
'https://example.com/robots.txt' => Http::response(
|
||
|
|
"User-agent: *\nAllow: /",
|
||
|
|
200,
|
||
|
|
),
|
||
|
|
]);
|
||
|
|
|
||
|
|
$service = app(RobotsService::class);
|
||
|
|
|
||
|
|
$service->isAllowed('https://example.com/article', 'TroveBot/0.1');
|
||
|
|
$service->isAllowed('https://example.com/another-article', 'TroveBot/0.1');
|
||
|
|
|
||
|
|
Http::assertSentCount(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
public function test_crawl_delay_for_returns_parsed_value(): void
|
||
|
|
{
|
||
|
|
Http::fake([
|
||
|
|
'https://example.com/robots.txt' => Http::response(
|
||
|
|
"User-agent: TroveBot/0.1\nCrawl-delay: 30",
|
||
|
|
200,
|
||
|
|
),
|
||
|
|
]);
|
||
|
|
|
||
|
|
$service = app(RobotsService::class);
|
||
|
|
|
||
|
|
$this->assertSame(30, $service->crawlDelayFor('example.com', 'TroveBot/0.1'));
|
||
|
|
}
|
||
|
|
|
||
|
|
public function test_crawl_delay_for_returns_null_when_absent(): void
|
||
|
|
{
|
||
|
|
Http::fake([
|
||
|
|
'https://example.com/robots.txt' => Http::response(
|
||
|
|
"User-agent: *\nDisallow: /private",
|
||
|
|
200,
|
||
|
|
),
|
||
|
|
]);
|
||
|
|
|
||
|
|
$service = app(RobotsService::class);
|
||
|
|
|
||
|
|
$this->assertNull($service->crawlDelayFor('example.com', 'TroveBot/0.1'));
|
||
|
|
}
|
||
|
|
}
|