Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
CRAP | |
0.00% |
0 / 1 |
| VrtHomepageParser | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
| extractArticleUrls | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace App\Services\Parsers; |
| 4 | |
| 5 | class VrtHomepageParser |
| 6 | { |
| 7 | /** |
| 8 | * @return array<int, string> |
| 9 | */ |
| 10 | public static function extractArticleUrls(string $html): array |
| 11 | { |
| 12 | // Extract article links using regex |
| 13 | preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches); |
| 14 | |
| 15 | $urls = collect($matches[1]) |
| 16 | ->unique() |
| 17 | ->map(fn ($path) => 'https://www.vrt.be' . $path) |
| 18 | ->toArray(); |
| 19 | |
| 20 | return $urls; |
| 21 | } |
| 22 | } |