12 - Apply pr-reviewer follow-ups: validation, link filters, readonly VO, docs
This commit is contained in:
parent
35e1147823
commit
dda5b0f770
7 changed files with 126 additions and 45 deletions
|
|
@ -22,6 +22,7 @@ class FetchPageAction
|
||||||
{
|
{
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private Factory $http,
|
private Factory $http,
|
||||||
|
private UrlService $urlService,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
public function __invoke(string $url): FetchResult
|
public function __invoke(string $url): FetchResult
|
||||||
|
|
@ -46,7 +47,7 @@ public function __invoke(string $url): FetchResult
|
||||||
|
|
||||||
if ($outcome === CrawlOutcomeEnum::Success) {
|
if ($outcome === CrawlOutcomeEnum::Success) {
|
||||||
[$title, $extractedText, $links] = $this->extractTitleTextAndLinks($response->body(), $url);
|
[$title, $extractedText, $links] = $this->extractTitleTextAndLinks($response->body(), $url);
|
||||||
$wordCount = count(preg_split('/\s+/u', trim($extractedText)));
|
$wordCount = $extractedText !== '' ? count(preg_split('/\s+/u', trim($extractedText))) : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new FetchResult(
|
return new FetchResult(
|
||||||
|
|
@ -64,18 +65,17 @@ public function __invoke(string $url): FetchResult
|
||||||
private function validateResponse(Response $response): array
|
private function validateResponse(Response $response): array
|
||||||
{
|
{
|
||||||
$status = $response->status();
|
$status = $response->status();
|
||||||
$statusStart = substr((string) $status, 0, 1);
|
|
||||||
|
|
||||||
if ($statusStart === '4') {
|
if ($status >= 400 && $status < 500) {
|
||||||
return [CrawlOutcomeEnum::Blocked4xx, "HTTP {$status}"];
|
return [CrawlOutcomeEnum::Blocked4xx, "HTTP {$status}"];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (str_starts_with((string) $status, '5')) {
|
if ($status >= 500) {
|
||||||
return [CrawlOutcomeEnum::Blocked5xx, "HTTP {$status}"];
|
return [CrawlOutcomeEnum::Blocked5xx, "HTTP {$status}"];
|
||||||
}
|
}
|
||||||
|
|
||||||
$contentType = $response->header('Content-Type');
|
$contentType = $response->header('Content-Type');
|
||||||
if (! str_starts_with($contentType, 'text/html')) {
|
if (! str_starts_with(mb_strtolower($contentType), 'text/html')) {
|
||||||
return [CrawlOutcomeEnum::Rejected, "Unsupported file type: {$contentType}"];
|
return [CrawlOutcomeEnum::Rejected, "Unsupported file type: {$contentType}"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -147,8 +147,12 @@ private function resolveAndValidateLink(string $href, string $finalUrl): ?string
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($resolved === $finalUrl) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
app(UrlService::class)->host($resolved);
|
$this->urlService->host($resolved);
|
||||||
} catch (InvalidArgumentException) {
|
} catch (InvalidArgumentException) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,5 +12,12 @@ enum CrawlOutcomeEnum: string
|
||||||
case BlockedRobots = 'blocked_robots';
|
case BlockedRobots = 'blocked_robots';
|
||||||
case Blocked4xx = 'blocked_4xx';
|
case Blocked4xx = 'blocked_4xx';
|
||||||
case Blocked5xx = 'blocked_5xx';
|
case Blocked5xx = 'blocked_5xx';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The HTTP fetch succeeded (2xx) but the response is unindexable in v0.1
|
||||||
|
* (non-HTML Content-Type). Worker MUST also write `pages.status = Rejected`
|
||||||
|
* on this outcome — do NOT treat as Failed. Page row STAYS in the DB to
|
||||||
|
* prevent re-discovery loops as fediverse re-shares the URL.
|
||||||
|
*/
|
||||||
case Rejected = 'rejected';
|
case Rejected = 'rejected';
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,5 +9,12 @@ enum PageStatusEnum: string
|
||||||
case Discovered = 'discovered';
|
case Discovered = 'discovered';
|
||||||
case Fetched = 'fetched';
|
case Fetched = 'fetched';
|
||||||
case Failed = 'failed';
|
case Failed = 'failed';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The crawler fetched the page but rejected it as unindexable in v0.1
|
||||||
|
* (non-HTML Content-Type). Page row stays as a sentinel preventing
|
||||||
|
* re-discovery loops; future re-crawl could flip status back to
|
||||||
|
* Discovered → Fetched if the URL starts serving HTML.
|
||||||
|
*/
|
||||||
case Rejected = 'rejected';
|
case Rejected = 'rejected';
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,10 @@
|
||||||
use App\Enums\CrawlOutcomeEnum;
|
use App\Enums\CrawlOutcomeEnum;
|
||||||
use Illuminate\Support\Collection;
|
use Illuminate\Support\Collection;
|
||||||
|
|
||||||
class FetchResult
|
final readonly class FetchResult
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
|
* @param ?string $finalUrl Set to the request URL in v0.1; true post-redirect URL tracking is deferred (see ticket #12 spec). Downstream consumers MUST NOT trust this field as the post-redirect location until that lands.
|
||||||
* @param Collection<int, string> $outboundLinks
|
* @param Collection<int, string> $outboundLinks
|
||||||
*/
|
*/
|
||||||
public function __construct(
|
public function __construct(
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@
|
||||||
],
|
],
|
||||||
"require": {
|
"require": {
|
||||||
"php": "^8.3",
|
"php": "^8.3",
|
||||||
"fivefilters/readability.php": "@dev",
|
"fivefilters/readability.php": "^3.3",
|
||||||
"laravel/framework": "^13.0",
|
"laravel/framework": "^13.0",
|
||||||
"laravel/tinker": "^3.0",
|
"laravel/tinker": "^3.0",
|
||||||
"livewire/livewire": "^4.2",
|
"livewire/livewire": "^4.2",
|
||||||
|
|
|
||||||
73
composer.lock
generated
73
composer.lock
generated
|
|
@ -4,7 +4,7 @@
|
||||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||||
"This file is @generated automatically"
|
"This file is @generated automatically"
|
||||||
],
|
],
|
||||||
"content-hash": "30d45d9b30092cc20f9364f7c3828aa5",
|
"content-hash": "2c63ed546b17b144997244f805e8a94a",
|
||||||
"packages": [
|
"packages": [
|
||||||
{
|
{
|
||||||
"name": "brick/math",
|
"name": "brick/math",
|
||||||
|
|
@ -4620,7 +4620,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-ctype",
|
"name": "symfony/polyfill-ctype",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-ctype.git",
|
"url": "https://github.com/symfony/polyfill-ctype.git",
|
||||||
|
|
@ -4679,7 +4679,7 @@
|
||||||
"portable"
|
"portable"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-ctype/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-ctype/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -4703,16 +4703,16 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-intl-grapheme",
|
"name": "symfony/polyfill-intl-grapheme",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-intl-grapheme.git",
|
"url": "https://github.com/symfony/polyfill-intl-grapheme.git",
|
||||||
"reference": "ad1b7b9092976d6c948b8a187cec9faaea9ec1df"
|
"reference": "4864388bfbd3001ce88e234fab652acd91fdc57e"
|
||||||
},
|
},
|
||||||
"dist": {
|
"dist": {
|
||||||
"type": "zip",
|
"type": "zip",
|
||||||
"url": "https://api.github.com/repos/symfony/polyfill-intl-grapheme/zipball/ad1b7b9092976d6c948b8a187cec9faaea9ec1df",
|
"url": "https://api.github.com/repos/symfony/polyfill-intl-grapheme/zipball/4864388bfbd3001ce88e234fab652acd91fdc57e",
|
||||||
"reference": "ad1b7b9092976d6c948b8a187cec9faaea9ec1df",
|
"reference": "4864388bfbd3001ce88e234fab652acd91fdc57e",
|
||||||
"shasum": ""
|
"shasum": ""
|
||||||
},
|
},
|
||||||
"require": {
|
"require": {
|
||||||
|
|
@ -4761,7 +4761,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-intl-grapheme/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-intl-grapheme/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -4781,11 +4781,11 @@
|
||||||
"type": "tidelift"
|
"type": "tidelift"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"time": "2026-04-10T16:19:22+00:00"
|
"time": "2026-04-26T13:13:48+00:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-intl-idn",
|
"name": "symfony/polyfill-intl-idn",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-intl-idn.git",
|
"url": "https://github.com/symfony/polyfill-intl-idn.git",
|
||||||
|
|
@ -4848,7 +4848,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-intl-idn/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-intl-idn/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -4872,7 +4872,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-intl-normalizer",
|
"name": "symfony/polyfill-intl-normalizer",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-intl-normalizer.git",
|
"url": "https://github.com/symfony/polyfill-intl-normalizer.git",
|
||||||
|
|
@ -4933,7 +4933,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-intl-normalizer/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-intl-normalizer/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -4957,7 +4957,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-mbstring",
|
"name": "symfony/polyfill-mbstring",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-mbstring.git",
|
"url": "https://github.com/symfony/polyfill-mbstring.git",
|
||||||
|
|
@ -5018,7 +5018,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-mbstring/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-mbstring/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -5042,7 +5042,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-php80",
|
"name": "symfony/polyfill-php80",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-php80.git",
|
"url": "https://github.com/symfony/polyfill-php80.git",
|
||||||
|
|
@ -5102,7 +5102,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-php80/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-php80/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -5126,7 +5126,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-php83",
|
"name": "symfony/polyfill-php83",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-php83.git",
|
"url": "https://github.com/symfony/polyfill-php83.git",
|
||||||
|
|
@ -5182,7 +5182,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-php83/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-php83/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -5206,7 +5206,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-php84",
|
"name": "symfony/polyfill-php84",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-php84.git",
|
"url": "https://github.com/symfony/polyfill-php84.git",
|
||||||
|
|
@ -5262,7 +5262,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-php84/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-php84/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -5286,16 +5286,16 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-php85",
|
"name": "symfony/polyfill-php85",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-php85.git",
|
"url": "https://github.com/symfony/polyfill-php85.git",
|
||||||
"reference": "2c408a6bb0313e6001a83628dc5506100474254e"
|
"reference": "fcfa4973a9917cef23f2e38774da74a2b7d115ee"
|
||||||
},
|
},
|
||||||
"dist": {
|
"dist": {
|
||||||
"type": "zip",
|
"type": "zip",
|
||||||
"url": "https://api.github.com/repos/symfony/polyfill-php85/zipball/2c408a6bb0313e6001a83628dc5506100474254e",
|
"url": "https://api.github.com/repos/symfony/polyfill-php85/zipball/fcfa4973a9917cef23f2e38774da74a2b7d115ee",
|
||||||
"reference": "2c408a6bb0313e6001a83628dc5506100474254e",
|
"reference": "fcfa4973a9917cef23f2e38774da74a2b7d115ee",
|
||||||
"shasum": ""
|
"shasum": ""
|
||||||
},
|
},
|
||||||
"require": {
|
"require": {
|
||||||
|
|
@ -5342,7 +5342,7 @@
|
||||||
"shim"
|
"shim"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-php85/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-php85/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -5362,11 +5362,11 @@
|
||||||
"type": "tidelift"
|
"type": "tidelift"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"time": "2026-04-10T16:50:15+00:00"
|
"time": "2026-04-26T13:10:57+00:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symfony/polyfill-uuid",
|
"name": "symfony/polyfill-uuid",
|
||||||
"version": "v1.36.0",
|
"version": "v1.37.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/symfony/polyfill-uuid.git",
|
"url": "https://github.com/symfony/polyfill-uuid.git",
|
||||||
|
|
@ -5425,7 +5425,7 @@
|
||||||
"uuid"
|
"uuid"
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"source": "https://github.com/symfony/polyfill-uuid/tree/v1.36.0"
|
"source": "https://github.com/symfony/polyfill-uuid/tree/v1.37.0"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -6263,16 +6263,16 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "voku/portable-ascii",
|
"name": "voku/portable-ascii",
|
||||||
"version": "2.1.0",
|
"version": "2.1.1",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/voku/portable-ascii.git",
|
"url": "https://github.com/voku/portable-ascii.git",
|
||||||
"reference": "d870a33f0f79d2b4579740b0620200221ee44aeb"
|
"reference": "8e1051fe39379367aecf014f41744ce7539a856f"
|
||||||
},
|
},
|
||||||
"dist": {
|
"dist": {
|
||||||
"type": "zip",
|
"type": "zip",
|
||||||
"url": "https://api.github.com/repos/voku/portable-ascii/zipball/d870a33f0f79d2b4579740b0620200221ee44aeb",
|
"url": "https://api.github.com/repos/voku/portable-ascii/zipball/8e1051fe39379367aecf014f41744ce7539a856f",
|
||||||
"reference": "d870a33f0f79d2b4579740b0620200221ee44aeb",
|
"reference": "8e1051fe39379367aecf014f41744ce7539a856f",
|
||||||
"shasum": ""
|
"shasum": ""
|
||||||
},
|
},
|
||||||
"require": {
|
"require": {
|
||||||
|
|
@ -6309,7 +6309,7 @@
|
||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"issues": "https://github.com/voku/portable-ascii/issues",
|
"issues": "https://github.com/voku/portable-ascii/issues",
|
||||||
"source": "https://github.com/voku/portable-ascii/tree/2.1.0"
|
"source": "https://github.com/voku/portable-ascii/tree/2.1.1"
|
||||||
},
|
},
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
|
|
@ -6333,7 +6333,7 @@
|
||||||
"type": "tidelift"
|
"type": "tidelift"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"time": "2026-04-16T23:10:39+00:00"
|
"time": "2026-04-26T05:33:54+00:00"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"packages-dev": [
|
"packages-dev": [
|
||||||
|
|
@ -8649,7 +8649,6 @@
|
||||||
"aliases": [],
|
"aliases": [],
|
||||||
"minimum-stability": "stable",
|
"minimum-stability": "stable",
|
||||||
"stability-flags": {
|
"stability-flags": {
|
||||||
"fivefilters/readability.php": 20,
|
|
||||||
"lvl0/fedi-discover": 20
|
"lvl0/fedi-discover": 20
|
||||||
},
|
},
|
||||||
"prefer-stable": true,
|
"prefer-stable": true,
|
||||||
|
|
@ -8658,5 +8657,5 @@
|
||||||
"php": "^8.3"
|
"php": "^8.3"
|
||||||
},
|
},
|
||||||
"platform-dev": {},
|
"platform-dev": {},
|
||||||
"plugin-api-version": "2.6.0"
|
"plugin-api-version": "2.9.0"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -260,6 +260,69 @@ public function test_success_calculates_word_count(): void
|
||||||
$this->assertSame(9, $result->wordCount);
|
$this->assertSame(9, $result->wordCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function test_uppercase_content_type_is_accepted_as_html(): void
|
||||||
|
{
|
||||||
|
Http::fake([
|
||||||
|
'example.com/*' => Http::response(
|
||||||
|
'<html><head><title>Uppercase CT</title></head><body><p>Content here.</p></body></html>',
|
||||||
|
200,
|
||||||
|
['Content-Type' => 'Text/HTML; charset=utf-8'],
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
|
$result = $this->makeAction()('https://example.com/page');
|
||||||
|
|
||||||
|
$this->assertSame(CrawlOutcomeEnum::Success, $result->outcome);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_empty_href_is_filtered_from_outbound_links(): void
|
||||||
|
{
|
||||||
|
$html = <<<'HTML'
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head><title>Empty Href Test</title></head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<p>This paragraph has <a href="">an empty href anchor</a> that should be dropped.</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
HTML;
|
||||||
|
|
||||||
|
Http::fake([
|
||||||
|
'example.com/*' => Http::response($html, 200, ['Content-Type' => 'text/html']),
|
||||||
|
]);
|
||||||
|
|
||||||
|
$result = $this->makeAction()('https://example.com/article');
|
||||||
|
|
||||||
|
$this->assertSame(CrawlOutcomeEnum::Success, $result->outcome);
|
||||||
|
$this->assertSame(0, $result->outboundLinks->count());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test_fragment_only_href_is_filtered_from_outbound_links(): void
|
||||||
|
{
|
||||||
|
$html = <<<'HTML'
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head><title>Fragment Href Test</title></head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<p>Jump to <a href="#section-2">section 2</a> of this page.</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
HTML;
|
||||||
|
|
||||||
|
Http::fake([
|
||||||
|
'example.com/*' => Http::response($html, 200, ['Content-Type' => 'text/html']),
|
||||||
|
]);
|
||||||
|
|
||||||
|
$result = $this->makeAction()('https://example.com/article');
|
||||||
|
|
||||||
|
$this->assertSame(CrawlOutcomeEnum::Success, $result->outcome);
|
||||||
|
$this->assertSame(0, $result->outboundLinks->count());
|
||||||
|
}
|
||||||
|
|
||||||
private function makeAction(): FetchPageAction
|
private function makeAction(): FetchPageAction
|
||||||
{
|
{
|
||||||
return app(FetchPageAction::class);
|
return app(FetchPageAction::class);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue