2025-06-29 21:39:28 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
namespace App\Services\Parsers;
|
|
|
|
|
|
|
|
|
|
class BelgaHomepageParser
|
|
|
|
|
{
|
2025-07-07 00:51:32 +02:00
|
|
|
/**
|
|
|
|
|
* @return array<int, string>
|
|
|
|
|
*/
|
2025-06-29 21:39:28 +02:00
|
|
|
public static function extractArticleUrls(string $html): array
|
|
|
|
|
{
|
2025-06-30 21:28:15 +02:00
|
|
|
preg_match_all('/href="(https:\/\/www\.belganewsagency\.eu\/[a-z0-9-]+)"/', $html, $matches);
|
2025-06-29 21:39:28 +02:00
|
|
|
|
2025-07-06 20:45:40 +02:00
|
|
|
$urls = collect($matches[1])
|
2025-06-29 21:39:28 +02:00
|
|
|
->unique()
|
|
|
|
|
->toArray();
|
|
|
|
|
|
|
|
|
|
return $urls;
|
|
|
|
|
}
|
|
|
|
|
}
|