fedi-feed-router/backend/app/Services/Article/ArticleFetcher.php

141 lines
4.1 KiB
PHP

<?php
namespace App\Services\Article;
use App\Models\Article;
use App\Models\Feed;
use App\Services\Http\HttpFetcher;
use App\Services\Factories\ArticleParserFactory;
use App\Services\Factories\HomepageParserFactory;
use App\Services\Log\LogSaver;
use Exception;
use Illuminate\Support\Collection;
class ArticleFetcher
{
/**
* @return Collection<int, Article>
*/
public static function getArticlesFromFeed(Feed $feed): Collection
{
if ($feed->type === 'rss') {
return self::getArticlesFromRssFeed($feed);
} elseif ($feed->type === 'website') {
return self::getArticlesFromWebsiteFeed($feed);
}
LogSaver::warning("Unsupported feed type", null, [
'feed_id' => $feed->id,
'feed_type' => $feed->type
]);
return collect();
}
/**
* @return Collection<int, Article>
*/
private static function getArticlesFromRssFeed(Feed $feed): Collection
{
// TODO: Implement RSS feed parsing
// For now, return empty collection
return collect();
}
/**
* @return Collection<int, Article>
*/
private static function getArticlesFromWebsiteFeed(Feed $feed): Collection
{
try {
// Try to get parser for this feed
$parser = HomepageParserFactory::getParserForFeed($feed);
if (! $parser) {
LogSaver::warning("No parser available for feed URL", null, [
'feed_id' => $feed->id,
'feed_url' => $feed->url
]);
return collect();
}
$html = HttpFetcher::fetchHtml($feed->url);
$urls = $parser->extractArticleUrls($html);
return collect($urls)
->map(fn (string $url) => self::saveArticle($url, $feed->id));
} catch (Exception $e) {
LogSaver::error("Failed to fetch articles from website feed", null, [
'feed_id' => $feed->id,
'feed_url' => $feed->url,
'error' => $e->getMessage()
]);
return collect();
}
}
/**
* @return array<string, mixed>
*/
public static function fetchArticleData(Article $article): array
{
try {
$html = HttpFetcher::fetchHtml($article->url);
$parser = ArticleParserFactory::getParser($article->url);
return $parser->extractData($html);
} catch (Exception $e) {
LogSaver::error('Exception while fetching article data', null, [
'url' => $article->url,
'error' => $e->getMessage()
]);
return [];
}
}
private static function saveArticle(string $url, ?int $feedId = null): Article
{
$existingArticle = Article::where('url', $url)->first();
if ($existingArticle) {
return $existingArticle;
}
// Extract a basic title from URL as fallback
$fallbackTitle = self::generateFallbackTitle($url);
try {
return Article::create([
'url' => $url,
'feed_id' => $feedId,
'title' => $fallbackTitle,
]);
} catch (\Exception $e) {
LogSaver::error("Failed to create article - title validation failed", null, [
'url' => $url,
'feed_id' => $feedId,
'error' => $e->getMessage(),
'suggestion' => 'Check regex parsing patterns for title extraction'
]);
throw $e;
}
}
private static function generateFallbackTitle(string $url): string
{
// Extract filename from URL as a basic fallback title
$path = parse_url($url, PHP_URL_PATH);
$filename = basename($path ?: $url);
// Remove file extension and convert to readable format
$title = preg_replace('/\.[^.]*$/', '', $filename);
$title = str_replace(['-', '_'], ' ', $title);
$title = ucwords($title);
return $title ?: 'Untitled Article';
}
}