Compare commits

...

2 commits

Author SHA1 Message Date
d73309cb76 77 - Fix VRT homepage parser language support 2026-03-07 18:30:38 +01:00
866f8d02d3 64 - Reload page after article refresh
Dispatch a Livewire event on refresh that triggers a 10-second
setTimeout via Alpine.js, then reloads the page to show newly
fetched articles.
2026-03-07 18:30:28 +01:00
7 changed files with 134 additions and 13 deletions

View file

@ -36,13 +36,7 @@ public function refresh(): void
ArticleDiscoveryJob::dispatch();
// Reset after 10 seconds
$this->dispatch('refresh-complete')->self();
}
public function refreshComplete(): void
{
$this->isRefreshing = false;
$this->dispatch('refresh-started');
}
public function render()

View file

@ -50,6 +50,8 @@ public static function getParserForFeed(Feed $feed): ?HomepageParserInterface
return null;
}
return new $parserClass();
$language = $feed->language?->short_code ?? 'en';
return new $parserClass($language);
}
}

View file

@ -6,6 +6,10 @@
class BelgaHomepageParserAdapter implements HomepageParserInterface
{
public function __construct(
private string $language = 'en',
) {}
public function canParse(string $url): bool
{
return str_contains($url, 'belganewsagency.eu');

View file

@ -7,10 +7,10 @@ class VrtHomepageParser
/**
* @return array<int, string>
*/
public static function extractArticleUrls(string $html): array
public static function extractArticleUrls(string $html, string $language = 'en'): array
{
// Extract article links using regex
preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
$escapedLanguage = preg_quote($language, '/');
preg_match_all('/href="(?:https:\/\/www\.vrt\.be)?(\/vrtnws\/' . $escapedLanguage . '\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
$urls = collect($matches[1])
->unique()

View file

@ -6,6 +6,10 @@
class VrtHomepageParserAdapter implements HomepageParserInterface
{
public function __construct(
private string $language = 'en',
) {}
public function canParse(string $url): bool
{
return str_contains($url, 'vrt.be');
@ -13,12 +17,12 @@ public function canParse(string $url): bool
public function extractArticleUrls(string $html): array
{
return VrtHomepageParser::extractArticleUrls($html);
return VrtHomepageParser::extractArticleUrls($html, $this->language);
}
public function getHomepageUrl(): string
{
return 'https://www.vrt.be/vrtnws/en/';
return "https://www.vrt.be/vrtnws/{$this->language}/";
}
public function getSourceName(): string

View file

@ -19,6 +19,7 @@
wire:click="refresh"
wire:loading.attr="disabled"
@disabled($isRefreshing)
x-on:refresh-started.window="setTimeout(() => window.location.reload(), 10000)"
class="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
>
<svg class="h-4 w-4 mr-2 {{ $isRefreshing ? 'animate-spin' : '' }}" wire:loading.class="animate-spin" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">

View file

@ -0,0 +1,116 @@
<?php
namespace Tests\Unit\Services\Parsers;
use App\Services\Parsers\VrtHomepageParser;
use PHPUnit\Framework\TestCase;
class VrtHomepageParserTest extends TestCase
{
public function test_extracts_english_article_urls_from_relative_links(): void
{
$html = <<<'HTML'
<a href="/vrtnws/en/2026/03/03/da-vinci-botticelli-and-cranach-shine-at-the-bozar/">
<img src="https://images.vrt.be/example.jpg" alt="">
<span>Culture</span>
<h2>Da Vinci, Botticelli and Cranach shine at the Bozar</h2>
<time>10 hours ago</time>
</a>
<a href="/vrtnws/en/2026/03/06/work-to-remove-7-nazi-sea-mines-to-get-underway-on-monday/">
<img src="https://images.vrt.be/example2.jpg" alt="">
<span>Home News</span>
<h2>Work to remove 7 Nazi sea mines to get underway on Monday</h2>
<time>Fri 6 Mar</time>
</a>
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
$this->assertCount(2, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/da-vinci-botticelli-and-cranach-shine-at-the-bozar/', $urls);
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/06/work-to-remove-7-nazi-sea-mines-to-get-underway-on-monday/', $urls);
}
public function test_extracts_dutch_article_urls_from_absolute_links(): void
{
$html = <<<'HTML'
<a href="https://www.vrt.be/vrtnws/nl/2026/03/07/cuba-nadert-het-einde-en-zal-snel-onderhandelen-zegt-presiden/">
<img src="https://images.vrt.be/example.jpg">
<span>Latijns-Amerika</span>
<h3>Cuba nadert het einde</h3>
<time>1 uur geleden</time>
</a>
<a href="https://www.vrt.be/vrtnws/nl/2026/03/07/planckendael-aap-ontsnapt/">
<img src="https://images.vrt.be/example2.jpg">
<span>Binnenland</span>
<h3>Goudkopleeuwaapje even ontsnapt</h3>
<time>49 minuten geleden</time>
</a>
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
$this->assertCount(2, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/cuba-nadert-het-einde-en-zal-snel-onderhandelen-zegt-presiden/', $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/planckendael-aap-ontsnapt/', $urls);
}
public function test_does_not_extract_urls_for_wrong_language(): void
{
$html = <<<'HTML'
<a href="/vrtnws/en/2026/03/03/some-english-article/">Article</a>
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
$this->assertEmpty($urls);
}
public function test_deduplicates_urls(): void
{
$html = <<<'HTML'
<a href="/vrtnws/en/2026/03/03/same-article/">Article</a>
<a href="/vrtnws/en/2026/03/03/same-article/">Article again</a>
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
$this->assertCount(1, $urls);
}
public function test_returns_empty_array_for_html_without_article_links(): void
{
$html = '<html><body><a href="/about">About</a></body></html>';
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
$this->assertEmpty($urls);
}
public function test_handles_mixed_relative_and_absolute_links(): void
{
$html = <<<'HTML'
<a href="/vrtnws/nl/2026/03/07/relative-article/">Relative</a>
<a href="https://www.vrt.be/vrtnws/nl/2026/03/07/absolute-article/">Absolute</a>
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
$this->assertCount(2, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/relative-article/', $urls);
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/absolute-article/', $urls);
}
public function test_defaults_to_english_when_no_language_specified(): void
{
$html = <<<'HTML'
<a href="/vrtnws/en/2026/03/03/test-article/">Test</a>
<a href="/vrtnws/nl/2026/03/03/dutch-article/">Dutch</a>
HTML;
$urls = VrtHomepageParser::extractArticleUrls($html);
$this->assertCount(1, $urls);
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/test-article/', $urls);
}
}