fedi-feed-router/app/Services/Parsers/VrtHomepageParser.php

22 lines
592 B
PHP
Raw Normal View History

2025-06-29 21:33:18 +02:00
<?php
namespace App\Services\Parsers;
class VrtHomepageParser
{
2025-07-07 00:51:32 +02:00
/**
* @return array<int, string>
*/
public static function extractArticleUrls(string $html, string $language = 'en'): array
2025-06-29 21:33:18 +02:00
{
$escapedLanguage = preg_quote($language, '/');
preg_match_all('/href="(?:https:\/\/www\.vrt\.be)?(\/vrtnws\/' . $escapedLanguage . '\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
2025-06-29 21:33:18 +02:00
2025-07-06 20:45:40 +02:00
$urls = collect($matches[1])
2025-06-29 21:33:18 +02:00
->unique()
->map(fn ($path) => 'https://www.vrt.be' . $path)
->toArray();
return $urls;
}
}