156 lines
5.3 KiB
PHP
156 lines
5.3 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace Tests\Feature\Actions;
|
|
|
|
use App\Actions\FetchPageAction;
|
|
use App\Enums\CrawlOutcomeEnum;
|
|
use App\ValueObjects\FetchResult;
|
|
use GuzzleHttp\Exception\ConnectException;
|
|
use GuzzleHttp\Psr7\Request;
|
|
use Illuminate\Support\Facades\Http;
|
|
use Tests\TestCase;
|
|
|
|
class FetchPageActionTest extends TestCase
|
|
{
|
|
public function test_successful_html_fetch_returns_success_outcome(): void
|
|
{
|
|
Http::fake([
|
|
'example.com/*' => Http::response(
|
|
'<html><body>Hello</body></html>',
|
|
200,
|
|
['Content-Type' => 'text/html'],
|
|
),
|
|
]);
|
|
|
|
$result = $this->makeAction()('https://example.com/page');
|
|
|
|
$this->assertInstanceOf(FetchResult::class, $result);
|
|
$this->assertSame(CrawlOutcomeEnum::Success, $result->outcome);
|
|
$this->assertSame(200, $result->statusCode);
|
|
$this->assertNotNull($result->finalUrl);
|
|
}
|
|
|
|
public function test_4xx_response_returns_blocked_4xx(): void
|
|
{
|
|
Http::fake([
|
|
'example.com/*' => Http::response('Not Found', 404),
|
|
]);
|
|
|
|
$result = $this->makeAction()('https://example.com/missing');
|
|
|
|
$this->assertInstanceOf(FetchResult::class, $result);
|
|
$this->assertSame(CrawlOutcomeEnum::Blocked4xx, $result->outcome);
|
|
$this->assertSame(404, $result->statusCode);
|
|
$this->assertIsString($result->errorMessage);
|
|
$this->assertStringContainsString('404', $result->errorMessage);
|
|
$this->assertNotNull($result->finalUrl);
|
|
}
|
|
|
|
public function test_5xx_response_returns_blocked_5xx(): void
|
|
{
|
|
Http::fake([
|
|
'example.com/*' => Http::response('Service Unavailable', 503),
|
|
]);
|
|
|
|
$result = $this->makeAction()('https://example.com/page');
|
|
|
|
$this->assertInstanceOf(FetchResult::class, $result);
|
|
$this->assertSame(CrawlOutcomeEnum::Blocked5xx, $result->outcome);
|
|
$this->assertSame(503, $result->statusCode);
|
|
$this->assertIsString($result->errorMessage);
|
|
$this->assertStringContainsString('503', $result->errorMessage);
|
|
$this->assertNotNull($result->finalUrl);
|
|
}
|
|
|
|
public function test_non_html_content_type_returns_rejected(): void
|
|
{
|
|
Http::fake([
|
|
'example.com/*' => Http::response(
|
|
'PDF binary stuff',
|
|
200,
|
|
['Content-Type' => 'application/pdf'],
|
|
),
|
|
]);
|
|
|
|
$result = $this->makeAction()('https://example.com/document.pdf');
|
|
|
|
$this->assertInstanceOf(FetchResult::class, $result);
|
|
$this->assertSame(CrawlOutcomeEnum::Rejected, $result->outcome);
|
|
$this->assertSame(200, $result->statusCode);
|
|
$this->assertIsString($result->errorMessage);
|
|
$this->assertStringContainsString('application/pdf', $result->errorMessage);
|
|
$this->assertNotNull($result->finalUrl);
|
|
$this->assertNull($result->title);
|
|
$this->assertNull($result->extractedText);
|
|
$this->assertEmpty($result->outboundLinks);
|
|
$this->assertNull($result->wordCount);
|
|
}
|
|
|
|
public function test_text_html_with_charset_is_accepted(): void
|
|
{
|
|
Http::fake([
|
|
'example.com/*' => Http::response(
|
|
'<html><body>Hello charset world</body></html>',
|
|
200,
|
|
['Content-Type' => 'text/html; charset=utf-8'],
|
|
),
|
|
]);
|
|
|
|
$result = $this->makeAction()('https://example.com/page');
|
|
|
|
$this->assertInstanceOf(FetchResult::class, $result);
|
|
$this->assertSame(CrawlOutcomeEnum::Success, $result->outcome);
|
|
$this->assertSame(200, $result->statusCode);
|
|
}
|
|
|
|
public function test_connection_failure_returns_failed(): void
|
|
{
|
|
Http::fake(function () {
|
|
throw new ConnectException(
|
|
'Could not resolve host',
|
|
new Request('GET', 'https://example.com/page'),
|
|
null,
|
|
['errno' => 6],
|
|
);
|
|
});
|
|
|
|
$result = $this->makeAction()('https://example.com/page');
|
|
|
|
$this->assertInstanceOf(FetchResult::class, $result);
|
|
$this->assertSame(CrawlOutcomeEnum::Failed, $result->outcome);
|
|
$this->assertNull($result->statusCode);
|
|
$this->assertNull($result->finalUrl);
|
|
$this->assertIsString($result->errorMessage);
|
|
$this->assertNull($result->title);
|
|
$this->assertNull($result->extractedText);
|
|
$this->assertEmpty($result->outboundLinks);
|
|
$this->assertNull($result->wordCount);
|
|
}
|
|
|
|
public function test_timeout_returns_timeout(): void
|
|
{
|
|
Http::fake(function () {
|
|
throw new ConnectException(
|
|
'cURL error 28: Operation timed out',
|
|
new Request('GET', 'https://example.com/page'),
|
|
null,
|
|
['errno' => 28],
|
|
);
|
|
});
|
|
|
|
$result = $this->makeAction()('https://example.com/page');
|
|
|
|
$this->assertInstanceOf(FetchResult::class, $result);
|
|
$this->assertSame(CrawlOutcomeEnum::Timeout, $result->outcome);
|
|
$this->assertNull($result->statusCode);
|
|
$this->assertNull($result->finalUrl);
|
|
$this->assertIsString($result->errorMessage);
|
|
}
|
|
|
|
private function makeAction(): FetchPageAction
|
|
{
|
|
return app(FetchPageAction::class);
|
|
}
|
|
}
|