isAllowed($this->pageCrawl->page->url)) { $this->pageCrawl->update([ 'outcome' => CrawlOutcomeEnum::BlockedRobots, 'completed_at' => now(), ]); $this->pageCrawl->page->update(['status' => PageStatusEnum::Failed]); return; } $fetcher = resolve(FetchPageAction::class); $register = resolve(RegisterDiscoveredPageAction::class); $politenessService = resolve(PolitenessService::class); $delay = $politenessService->minDelayFor($this->pageCrawl->domain); $lock = Cache::lock("crawler:domain:{$this->pageCrawl->domain}", $delay); if (! $lock->get()) { $this->release($delay); return; } $result = $fetcher($this->pageCrawl->page->url); $this->writeOutcome($result); $this->updatePageStatus($result); if ($result->outcome->shouldRegisterOutboundLinks()) { $result->outboundLinks->each(fn (string $url) => $register($url)); } if ($result->outcome->isRetryable()) { $this->scheduleRetryIfNeeded(); } } private function writeOutcome(FetchResult $result): void { $this->pageCrawl->update([ 'outcome' => $result->outcome, 'completed_at' => now(), 'status_code' => $result->statusCode, 'error_message' => $result->errorMessage, ]); } private function updatePageStatus(FetchResult $result): void { $status = $result->outcome->toPageStatus(); $update = match ($status) { PageStatusEnum::Fetched => [ 'status' => $status, 'fetched_at' => now(), 'title' => $result->title, 'language' => $result->language, 'language_confidence' => $result->languageConfidence, ], PageStatusEnum::Failed => [ 'status' => $status, 'failed_at' => now(), ], PageStatusEnum::Rejected => [ 'status' => $status, ], PageStatusEnum::Discovered => [ 'status' => $status, ], }; $this->pageCrawl->page->update($update); } private function scheduleRetryIfNeeded(): void { if (PageCrawl::where('page_id', $this->pageCrawl->page_id)->count() >= 3) { return; } $newRow = PageCrawl::withoutEvents( fn () => PageCrawl::create( array_merge($this->pageCrawl->toArray(), [ 'outcome' => null, ]) ) ); ProcessCrawlJob::dispatch($newRow)->delay(now()->addHour()); } }