Culture
Da Vinci, Botticelli and Cranach shine at the Bozar
Home News
Work to remove 7 Nazi sea mines to get underway on Monday
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
$this->assertCount(2, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/da-vinci-botticelli-and-cranach-shine-at-the-bozar/', $urls);
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/06/work-to-remove-7-nazi-sea-mines-to-get-underway-on-monday/', $urls);
}
public function test_extracts_dutch_article_urls_from_absolute_links(): void
{
$html = <<<'HTML'
Latijns-Amerika
Cuba nadert het einde
Binnenland
Goudkopleeuwaapje even ontsnapt
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
$this->assertCount(2, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/cuba-nadert-het-einde-en-zal-snel-onderhandelen-zegt-presiden/', $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/planckendael-aap-ontsnapt/', $urls);
}
public function test_does_not_extract_urls_for_wrong_language(): void
{
$html = <<<'HTML'
Article
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
$this->assertEmpty($urls);
}
public function test_deduplicates_urls(): void
{
$html = <<<'HTML'
Article
Article again
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
$this->assertCount(1, $urls);
}
public function test_returns_empty_array_for_html_without_article_links(): void
{
$html = 'About';
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
$this->assertEmpty($urls);
}
public function test_handles_mixed_relative_and_absolute_links(): void
{
$html = <<<'HTML'
Relative
Absolute
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
$this->assertCount(2, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/relative-article/', $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/absolute-article/', $urls);
}
public function test_defaults_to_english_when_no_language_specified(): void
{
$html = <<<'HTML'
Test
Dutch
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html);
$this->assertCount(1, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/test-article/', $urls);
}
}