22 lines
587 B
PHP
22 lines
587 B
PHP
<?php
|
|
|
|
namespace App\Services\Parsers;
|
|
|
|
class VrtHomepageParser
|
|
{
|
|
/**
|
|
* @return array<int, string>
|
|
*/
|
|
public static function extractArticleUrls(string $html, string $language = 'en'): array
|
|
{
|
|
$escapedLanguage = preg_quote($language, '/');
|
|
preg_match_all('/href="(?:https:\/\/www\.vrt\.be)?(\/vrtnws\/'.$escapedLanguage.'\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
|
|
|
|
$urls = collect($matches[1])
|
|
->unique()
|
|
->map(fn ($path) => 'https://www.vrt.be'.$path)
|
|
->toArray();
|
|
|
|
return $urls;
|
|
}
|
|
}
|