fedi-feed-router/app/Services/Article/ArticleFetcher.php

68 lines
2 KiB
PHP
Raw Normal View History

2025-06-29 09:37:49 +02:00
<?php
2025-06-29 17:46:06 +02:00
namespace App\Services\Article;
2025-06-29 09:37:49 +02:00
use App\Models\Article;
use Exception;
2025-06-29 17:13:18 +02:00
use Illuminate\Support\Collection;
2025-06-29 09:37:49 +02:00
use Illuminate\Support\Facades\Http;
class ArticleFetcher
{
public static function getNewArticles(): Collection
{
return self::fetchArticles()
->map(fn (string $url) => self::saveArticle($url));
}
private static function fetchArticles(): Collection
{
try {
$response = Http::get('https://www.vrt.be/vrtnws/en/');
$html = $response->body();
// Extract article links using regex
preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
$urls = collect($matches[1] ?? [])
->unique()
2025-06-29 17:15:17 +02:00
->map(fn ($path) => 'https://www.vrt.be' . $path)
2025-06-29 09:37:49 +02:00
->toArray();
$responses = Http::pool(function ($pool) use ($urls) {
foreach ($urls as $url) {
$pool->get($url);
}
});
return collect($responses)
->map(function ($response, $index) use ($urls) {
2025-06-29 18:33:18 +02:00
if (!isset($urls[$index])) {
return null;
}
2025-06-29 09:37:49 +02:00
$url = $urls[$index];
try {
if ($response->successful()) {
return $url;
} else {
return null;
}
} catch (Exception) {
return null;
}
})
->filter(fn($article) => !empty($article));
} catch (Exception $e) {
2025-06-29 18:33:18 +02:00
logger()->error("Failed to fetch VRT homepage", ['error' => $e->getMessage()]);
2025-06-29 09:37:49 +02:00
return new Collection([]);
}
}
protected static function saveArticle(string $url): Article
{
return Article::firstOrCreate(['url' => $url]);
}
}