Culture

Da Vinci, Botticelli and Cranach shine at the Bozar

Home News

Work to remove 7 Nazi sea mines to get underway on Monday

HTML; $urls = VrtHomepageParser::extractArticleUrls($html, 'en'); $this->assertCount(2, $urls); $this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/da-vinci-botticelli-and-cranach-shine-at-the-bozar/', $urls); $this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/06/work-to-remove-7-nazi-sea-mines-to-get-underway-on-monday/', $urls); } public function test_extracts_dutch_article_urls_from_absolute_links(): void { $html = <<<'HTML' Latijns-Amerika

Cuba nadert het einde

Binnenland

Goudkopleeuwaapje even ontsnapt

HTML; $urls = VrtHomepageParser::extractArticleUrls($html, 'nl'); $this->assertCount(2, $urls); $this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/cuba-nadert-het-einde-en-zal-snel-onderhandelen-zegt-presiden/', $urls); $this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/planckendael-aap-ontsnapt/', $urls); } public function test_does_not_extract_urls_for_wrong_language(): void { $html = <<<'HTML' Article HTML; $urls = VrtHomepageParser::extractArticleUrls($html, 'nl'); $this->assertEmpty($urls); } public function test_deduplicates_urls(): void { $html = <<<'HTML' Article Article again HTML; $urls = VrtHomepageParser::extractArticleUrls($html, 'en'); $this->assertCount(1, $urls); } public function test_returns_empty_array_for_html_without_article_links(): void { $html = 'About'; $urls = VrtHomepageParser::extractArticleUrls($html, 'en'); $this->assertEmpty($urls); } public function test_handles_mixed_relative_and_absolute_links(): void { $html = <<<'HTML' Relative Absolute HTML; $urls = VrtHomepageParser::extractArticleUrls($html, 'nl'); $this->assertCount(2, $urls); $this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/relative-article/', $urls); $this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/absolute-article/', $urls); } public function test_defaults_to_english_when_no_language_specified(): void { $html = <<<'HTML' Test Dutch HTML; $urls = VrtHomepageParser::extractArticleUrls($html); $this->assertCount(1, $urls); $this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/test-article/', $urls); } public function test_returns_empty_array_for_empty_html(): void { $urls = VrtHomepageParser::extractArticleUrls('', 'en'); $this->assertEmpty($urls); } }