113 lines
3 KiB
PHP
113 lines
3 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Article;
|
|
|
|
use App\Models\Article;
|
|
use App\Models\Feed;
|
|
use App\Services\Http\HttpFetcher;
|
|
use App\Services\Factories\ArticleParserFactory;
|
|
use App\Services\Factories\HomepageParserFactory;
|
|
use App\Services\Log\LogSaver;
|
|
use Exception;
|
|
use Illuminate\Support\Collection;
|
|
|
|
class ArticleFetcher
|
|
{
|
|
/**
|
|
* @return Collection<int, Article>
|
|
*/
|
|
public static function getArticlesFromFeed(Feed $feed): Collection
|
|
{
|
|
if ($feed->type === 'rss') {
|
|
return self::getArticlesFromRssFeed($feed);
|
|
} elseif ($feed->type === 'website') {
|
|
return self::getArticlesFromWebsiteFeed($feed);
|
|
}
|
|
|
|
LogSaver::warning("Unsupported feed type", null, [
|
|
'feed_id' => $feed->id,
|
|
'feed_type' => $feed->type
|
|
]);
|
|
|
|
return collect();
|
|
}
|
|
|
|
/**
|
|
* @return Collection<int, Article>
|
|
*/
|
|
private static function getArticlesFromRssFeed(Feed $feed): Collection
|
|
{
|
|
// TODO: Implement RSS feed parsing
|
|
// For now, return empty collection
|
|
return collect();
|
|
}
|
|
|
|
/**
|
|
* @return Collection<int, Article>
|
|
*/
|
|
private static function getArticlesFromWebsiteFeed(Feed $feed): Collection
|
|
{
|
|
try {
|
|
// Try to get parser for this feed
|
|
$parser = HomepageParserFactory::getParserForFeed($feed);
|
|
|
|
if (! $parser) {
|
|
LogSaver::warning("No parser available for feed URL", null, [
|
|
'feed_id' => $feed->id,
|
|
'feed_url' => $feed->url
|
|
]);
|
|
|
|
return collect();
|
|
}
|
|
|
|
$html = HttpFetcher::fetchHtml($feed->url);
|
|
$urls = $parser->extractArticleUrls($html);
|
|
|
|
return collect($urls)
|
|
->map(fn (string $url) => self::saveArticle($url, $feed->id));
|
|
|
|
} catch (Exception $e) {
|
|
LogSaver::error("Failed to fetch articles from website feed", null, [
|
|
'feed_id' => $feed->id,
|
|
'feed_url' => $feed->url,
|
|
'error' => $e->getMessage()
|
|
]);
|
|
|
|
return collect();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return array<string, mixed>
|
|
*/
|
|
public static function fetchArticleData(Article $article): array
|
|
{
|
|
try {
|
|
$html = HttpFetcher::fetchHtml($article->url);
|
|
$parser = ArticleParserFactory::getParser($article->url);
|
|
|
|
return $parser->extractData($html);
|
|
} catch (Exception $e) {
|
|
LogSaver::error('Exception while fetching article data', null, [
|
|
'url' => $article->url,
|
|
'error' => $e->getMessage()
|
|
]);
|
|
|
|
return [];
|
|
}
|
|
}
|
|
|
|
private static function saveArticle(string $url, ?int $feedId = null): Article
|
|
{
|
|
$existingArticle = Article::where('url', $url)->first();
|
|
|
|
if ($existingArticle) {
|
|
return $existingArticle;
|
|
}
|
|
|
|
return Article::create([
|
|
'url' => $url,
|
|
'feed_id' => $feedId
|
|
]);
|
|
}
|
|
}
|