Compare commits
2 commits
276812811c
...
d73309cb76
| Author | SHA1 | Date | |
|---|---|---|---|
| d73309cb76 | |||
| 866f8d02d3 |
7 changed files with 134 additions and 13 deletions
|
|
@ -36,13 +36,7 @@ public function refresh(): void
|
|||
|
||||
ArticleDiscoveryJob::dispatch();
|
||||
|
||||
// Reset after 10 seconds
|
||||
$this->dispatch('refresh-complete')->self();
|
||||
}
|
||||
|
||||
public function refreshComplete(): void
|
||||
{
|
||||
$this->isRefreshing = false;
|
||||
$this->dispatch('refresh-started');
|
||||
}
|
||||
|
||||
public function render()
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@ public static function getParserForFeed(Feed $feed): ?HomepageParserInterface
|
|||
return null;
|
||||
}
|
||||
|
||||
return new $parserClass();
|
||||
$language = $feed->language?->short_code ?? 'en';
|
||||
|
||||
return new $parserClass($language);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@
|
|||
|
||||
class BelgaHomepageParserAdapter implements HomepageParserInterface
|
||||
{
|
||||
public function __construct(
|
||||
private string $language = 'en',
|
||||
) {}
|
||||
|
||||
public function canParse(string $url): bool
|
||||
{
|
||||
return str_contains($url, 'belganewsagency.eu');
|
||||
|
|
|
|||
|
|
@ -7,10 +7,10 @@ class VrtHomepageParser
|
|||
/**
|
||||
* @return array<int, string>
|
||||
*/
|
||||
public static function extractArticleUrls(string $html): array
|
||||
public static function extractArticleUrls(string $html, string $language = 'en'): array
|
||||
{
|
||||
// Extract article links using regex
|
||||
preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
|
||||
$escapedLanguage = preg_quote($language, '/');
|
||||
preg_match_all('/href="(?:https:\/\/www\.vrt\.be)?(\/vrtnws\/' . $escapedLanguage . '\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
|
||||
|
||||
$urls = collect($matches[1])
|
||||
->unique()
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@
|
|||
|
||||
class VrtHomepageParserAdapter implements HomepageParserInterface
|
||||
{
|
||||
public function __construct(
|
||||
private string $language = 'en',
|
||||
) {}
|
||||
|
||||
public function canParse(string $url): bool
|
||||
{
|
||||
return str_contains($url, 'vrt.be');
|
||||
|
|
@ -13,12 +17,12 @@ public function canParse(string $url): bool
|
|||
|
||||
public function extractArticleUrls(string $html): array
|
||||
{
|
||||
return VrtHomepageParser::extractArticleUrls($html);
|
||||
return VrtHomepageParser::extractArticleUrls($html, $this->language);
|
||||
}
|
||||
|
||||
public function getHomepageUrl(): string
|
||||
{
|
||||
return 'https://www.vrt.be/vrtnws/en/';
|
||||
return "https://www.vrt.be/vrtnws/{$this->language}/";
|
||||
}
|
||||
|
||||
public function getSourceName(): string
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
wire:click="refresh"
|
||||
wire:loading.attr="disabled"
|
||||
@disabled($isRefreshing)
|
||||
x-on:refresh-started.window="setTimeout(() => window.location.reload(), 10000)"
|
||||
class="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<svg class="h-4 w-4 mr-2 {{ $isRefreshing ? 'animate-spin' : '' }}" wire:loading.class="animate-spin" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">
|
||||
|
|
|
|||
116
tests/Unit/Services/Parsers/VrtHomepageParserTest.php
Normal file
116
tests/Unit/Services/Parsers/VrtHomepageParserTest.php
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
<?php
|
||||
|
||||
namespace Tests\Unit\Services\Parsers;
|
||||
|
||||
use App\Services\Parsers\VrtHomepageParser;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
class VrtHomepageParserTest extends TestCase
|
||||
{
|
||||
public function test_extracts_english_article_urls_from_relative_links(): void
|
||||
{
|
||||
$html = <<<'HTML'
|
||||
<a href="/vrtnws/en/2026/03/03/da-vinci-botticelli-and-cranach-shine-at-the-bozar/">
|
||||
<img src="https://images.vrt.be/example.jpg" alt="">
|
||||
<span>Culture</span>
|
||||
<h2>Da Vinci, Botticelli and Cranach shine at the Bozar</h2>
|
||||
<time>10 hours ago</time>
|
||||
</a>
|
||||
<a href="/vrtnws/en/2026/03/06/work-to-remove-7-nazi-sea-mines-to-get-underway-on-monday/">
|
||||
<img src="https://images.vrt.be/example2.jpg" alt="">
|
||||
<span>Home News</span>
|
||||
<h2>Work to remove 7 Nazi sea mines to get underway on Monday</h2>
|
||||
<time>Fri 6 Mar</time>
|
||||
</a>
|
||||
HTML;
|
||||
|
||||
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
|
||||
|
||||
$this->assertCount(2, $urls);
|
||||
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/da-vinci-botticelli-and-cranach-shine-at-the-bozar/', $urls);
|
||||
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/06/work-to-remove-7-nazi-sea-mines-to-get-underway-on-monday/', $urls);
|
||||
}
|
||||
|
||||
public function test_extracts_dutch_article_urls_from_absolute_links(): void
|
||||
{
|
||||
$html = <<<'HTML'
|
||||
<a href="https://www.vrt.be/vrtnws/nl/2026/03/07/cuba-nadert-het-einde-en-zal-snel-onderhandelen-zegt-presiden/">
|
||||
<img src="https://images.vrt.be/example.jpg">
|
||||
<span>Latijns-Amerika</span>
|
||||
<h3>Cuba nadert het einde</h3>
|
||||
<time>1 uur geleden</time>
|
||||
</a>
|
||||
<a href="https://www.vrt.be/vrtnws/nl/2026/03/07/planckendael-aap-ontsnapt/">
|
||||
<img src="https://images.vrt.be/example2.jpg">
|
||||
<span>Binnenland</span>
|
||||
<h3>Goudkopleeuwaapje even ontsnapt</h3>
|
||||
<time>49 minuten geleden</time>
|
||||
</a>
|
||||
HTML;
|
||||
|
||||
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
|
||||
|
||||
$this->assertCount(2, $urls);
|
||||
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/cuba-nadert-het-einde-en-zal-snel-onderhandelen-zegt-presiden/', $urls);
|
||||
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/planckendael-aap-ontsnapt/', $urls);
|
||||
}
|
||||
|
||||
public function test_does_not_extract_urls_for_wrong_language(): void
|
||||
{
|
||||
$html = <<<'HTML'
|
||||
<a href="/vrtnws/en/2026/03/03/some-english-article/">Article</a>
|
||||
HTML;
|
||||
|
||||
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
|
||||
|
||||
$this->assertEmpty($urls);
|
||||
}
|
||||
|
||||
public function test_deduplicates_urls(): void
|
||||
{
|
||||
$html = <<<'HTML'
|
||||
<a href="/vrtnws/en/2026/03/03/same-article/">Article</a>
|
||||
<a href="/vrtnws/en/2026/03/03/same-article/">Article again</a>
|
||||
HTML;
|
||||
|
||||
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
|
||||
|
||||
$this->assertCount(1, $urls);
|
||||
}
|
||||
|
||||
public function test_returns_empty_array_for_html_without_article_links(): void
|
||||
{
|
||||
$html = '<html><body><a href="/about">About</a></body></html>';
|
||||
|
||||
$urls = VrtHomepageParser::extractArticleUrls($html, 'en');
|
||||
|
||||
$this->assertEmpty($urls);
|
||||
}
|
||||
|
||||
public function test_handles_mixed_relative_and_absolute_links(): void
|
||||
{
|
||||
$html = <<<'HTML'
|
||||
<a href="/vrtnws/nl/2026/03/07/relative-article/">Relative</a>
|
||||
<a href="https://www.vrt.be/vrtnws/nl/2026/03/07/absolute-article/">Absolute</a>
|
||||
HTML;
|
||||
|
||||
$urls = VrtHomepageParser::extractArticleUrls($html, 'nl');
|
||||
|
||||
$this->assertCount(2, $urls);
|
||||
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/relative-article/', $urls);
|
||||
$this->assertContains('https://www.vrt.be/vrtnws/nl/2026/03/07/absolute-article/', $urls);
|
||||
}
|
||||
|
||||
public function test_defaults_to_english_when_no_language_specified(): void
|
||||
{
|
||||
$html = <<<'HTML'
|
||||
<a href="/vrtnws/en/2026/03/03/test-article/">Test</a>
|
||||
<a href="/vrtnws/nl/2026/03/03/dutch-article/">Dutch</a>
|
||||
HTML;
|
||||
|
||||
$urls = VrtHomepageParser::extractArticleUrls($html);
|
||||
|
||||
$this->assertCount(1, $urls);
|
||||
$this->assertContains('https://www.vrt.be/vrtnws/en/2026/03/03/test-article/', $urls);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue