*/ public function getArticlesFromFeed(Feed $feed): Collection { if ($feed->type === 'rss') { return $this->getArticlesFromRssFeed($feed); } elseif ($feed->type === 'website') { return $this->getArticlesFromWebsiteFeed($feed); } $this->logSaver->warning('Unsupported feed type', null, [ 'feed_id' => $feed->id, 'feed_type' => $feed->type, ]); return collect(); } /** * @return Collection */ private function getArticlesFromRssFeed(Feed $feed): Collection { try { $xml = HttpFetcher::fetchHtml($feed->url); $previousUseErrors = libxml_use_internal_errors(true); try { $rss = simplexml_load_string($xml); } finally { libxml_clear_errors(); libxml_use_internal_errors($previousUseErrors); } if ($rss === false || ! isset($rss->channel->item)) { $this->logSaver->warning('Failed to parse RSS feed XML', null, [ 'feed_id' => $feed->id, 'feed_url' => $feed->url, ]); return collect(); } $articles = collect(); foreach ($rss->channel->item as $item) { $link = (string) $item->link; if ($link !== '') { $articles->push($this->saveArticle($link, $feed->id)); } } return $articles; } catch (Exception $e) { $this->logSaver->error('Failed to fetch articles from RSS feed', null, [ 'feed_id' => $feed->id, 'feed_url' => $feed->url, 'error' => $e->getMessage(), ]); return collect(); } } /** * @return Collection */ private function getArticlesFromWebsiteFeed(Feed $feed): Collection { try { // Try to get parser for this feed $parser = HomepageParserFactory::getParserForFeed($feed); if (! $parser) { $this->logSaver->warning('No parser available for feed URL', null, [ 'feed_id' => $feed->id, 'feed_url' => $feed->url, ]); return collect(); } $html = HttpFetcher::fetchHtml($feed->url); $urls = $parser->extractArticleUrls($html); return collect($urls) ->map(fn (string $url) => $this->saveArticle($url, $feed->id)); } catch (Exception $e) { $this->logSaver->error('Failed to fetch articles from website feed', null, [ 'feed_id' => $feed->id, 'feed_url' => $feed->url, 'error' => $e->getMessage(), ]); return collect(); } } /** * @return array */ public function fetchArticleData(Article $article): array { try { $html = HttpFetcher::fetchHtml($article->url); $parser = ArticleParserFactory::getParser($article->url); return $parser->extractData($html); } catch (Exception $e) { $this->logSaver->error('Exception while fetching article data', null, [ 'url' => $article->url, 'error' => $e->getMessage(), ]); return []; } } private function saveArticle(string $url, ?int $feedId = null): Article { $fallbackTitle = $this->generateFallbackTitle($url); try { $article = Article::firstOrCreate( ['url' => $url], [ 'feed_id' => $feedId, 'title' => $fallbackTitle, ] ); if ($article->wasRecentlyCreated) { $article->dispatchFetchedEvent(); } return $article; } catch (Exception $e) { $this->logSaver->error('Failed to create article', null, [ 'url' => $url, 'feed_id' => $feedId, 'error' => $e->getMessage(), ]); throw $e; } } private function generateFallbackTitle(string $url): string { // Extract filename from URL as a basic fallback title $path = parse_url($url, PHP_URL_PATH); $filename = basename($path ?: $url); // Remove file extension and convert to readable format $title = preg_replace('/\.[^.]*$/', '', $filename); $title = str_replace(['-', '_'], ' ', $title); $title = ucwords($title); return $title ?: 'Untitled Article'; } }