http ->timeout(config('crawler.timeout')) ->withHeaders([ 'User-Agent' => config('crawler.user_agent'), 'Accept' => 'text/html', ]) ->withOptions([ 'allow_redirects' => ['max' => config('crawler.max_redirects')], ]) ->get($url); } catch (ConnectionException|ConnectException $e) { return $this->failureResult($e); } [$outcome, $error] = $this->validateResponse($response); return new FetchResult( outcome: $outcome, statusCode: $response->status(), finalUrl: $url, title: null, extractedText: null, outboundLinks: collect(), wordCount: null, errorMessage: $error ?? null, ); } private function validateResponse(Response $response): array { $status = $response->status(); $statusStart = substr((string) $status, 0, 1); if ($statusStart === '4') { return [CrawlOutcomeEnum::Blocked4xx, "HTTP {$status}"]; } if (str_starts_with((string) $status, '5')) { return [CrawlOutcomeEnum::Blocked5xx, "HTTP {$status}"]; } $contentType = $response->header('Content-Type'); if (! str_starts_with($contentType, 'text/html')) { return [CrawlOutcomeEnum::Rejected, "Unsupported file type: {$contentType}"]; } return [CrawlOutcomeEnum::Success, null]; } private function failureResult(ConnectionException|ConnectException $e): FetchResult { $guzzleException = $e instanceof ConnectException ? $e : ($e->getPrevious() instanceof ConnectException ? $e->getPrevious() : null); $errno = $guzzleException?->getHandlerContext()['errno'] ?? null; $outcome = $errno === CURLE_OPERATION_TIMEDOUT ? CrawlOutcomeEnum::Timeout : CrawlOutcomeEnum::Failed; return new FetchResult( outcome: $outcome, statusCode: null, finalUrl: null, title: null, extractedText: null, outboundLinks: collect(), wordCount: null, errorMessage: $e->getMessage(), ); } }