fedi-feed-router/app/Services/Article/ArticleFetcher.php

57 lines
1.6 KiB
PHP
Raw Normal View History

2025-06-29 09:37:49 +02:00
<?php
2025-06-29 17:46:06 +02:00
namespace App\Services\Article;
2025-06-29 09:37:49 +02:00
use App\Models\Article;
2025-06-29 21:33:18 +02:00
use App\Services\Http\HttpFetcher;
use App\Services\Factories\ArticleParserFactory;
use App\Services\Factories\HomepageParserFactory;
2025-06-29 09:37:49 +02:00
use Exception;
2025-06-29 17:13:18 +02:00
use Illuminate\Support\Collection;
2025-06-29 09:37:49 +02:00
class ArticleFetcher
{
public static function getNewArticles(): Collection
2025-06-29 21:20:45 +02:00
{
try {
2025-06-29 21:33:18 +02:00
$allArticles = collect();
2025-06-29 21:20:45 +02:00
2025-06-29 21:33:18 +02:00
foreach (HomepageParserFactory::getAllParsers() as $parser) {
$html = HttpFetcher::fetchHtml($parser->getHomepageUrl());
$urls = $parser->extractArticleUrls($html);
2025-06-29 21:20:45 +02:00
2025-06-29 21:33:18 +02:00
$articles = collect($urls)
->map(fn (string $url) => self::saveArticle($url));
2025-06-29 21:20:45 +02:00
2025-06-29 21:33:18 +02:00
$allArticles = $allArticles->merge($articles);
}
2025-06-29 21:20:45 +02:00
2025-06-29 21:33:18 +02:00
return $allArticles->filter();
2025-06-29 21:20:45 +02:00
} catch (Exception $e) {
2025-06-29 21:33:18 +02:00
logger()->error("Failed to get new articles", ['error' => $e->getMessage()]);
return new Collection([]);
2025-06-29 21:20:45 +02:00
}
}
2025-06-29 21:33:18 +02:00
public static function fetchArticleData(Article $article): array
2025-06-29 09:37:49 +02:00
{
try {
2025-06-29 21:33:18 +02:00
$html = HttpFetcher::fetchHtml($article->url);
$parser = ArticleParserFactory::getParser($article->url);
2025-06-29 09:37:49 +02:00
2025-06-29 21:33:18 +02:00
return $parser->extractData($html);
2025-06-29 09:37:49 +02:00
} catch (Exception $e) {
2025-06-29 21:33:18 +02:00
logger()->error('Exception while fetching article data', [
'url' => $article->url,
'error' => $e->getMessage()
]);
return [];
2025-06-29 09:37:49 +02:00
}
}
2025-06-29 21:20:45 +02:00
private static function saveArticle(string $url): Article
2025-06-29 09:37:49 +02:00
{
return Article::firstOrCreate(['url' => $url]);
}
}