Release v1.2.0 #87

Merged
myrmidex merged 9 commits from release/v1.2.0 into main 2026-03-08 21:30:58 +01:00
7 changed files with 56 additions and 96 deletions
Showing only changes of commit 0bb10729de - Show all commits

View file

@ -1,49 +0,0 @@
<?php
namespace App\Services\Parsers;
class BelgaHomepageParser
{
/**
* @return array<int, string>
*/
public static function extractArticleUrls(string $html): array
{
// Find all relative article links (most articles use relative paths)
preg_match_all('/<a[^>]+href="(\/[a-z0-9-]+)"/', $html, $matches);
// Blacklist of non-article paths
$blacklistPaths = [
'/',
'/de',
'/feed',
'/search',
'/category',
'/about',
'/contact',
'/privacy',
'/terms',
];
$urls = collect($matches[1])
->unique()
->filter(function ($path) use ($blacklistPaths) {
// Exclude exact matches and paths starting with blacklisted paths
foreach ($blacklistPaths as $blacklistedPath) {
if ($path === $blacklistedPath || str_starts_with($path, $blacklistedPath.'/')) {
return false;
}
}
return true;
})
->map(function ($path) {
// Convert relative paths to absolute URLs
return 'https://www.belganewsagency.eu'.$path;
})
->values()
->toArray();
return $urls;
}
}

View file

@ -1,37 +0,0 @@
<?php
namespace App\Services\Parsers;
use App\Contracts\HomepageParserInterface;
class BelgaHomepageParserAdapter implements HomepageParserInterface
{
public function __construct(
private readonly string $language = 'en',
) {}
public function getLanguage(): string
{
return $this->language;
}
public function canParse(string $url): bool
{
return str_contains($url, 'belganewsagency.eu');
}
public function extractArticleUrls(string $html): array
{
return BelgaHomepageParser::extractArticleUrls($html);
}
public function getHomepageUrl(): string
{
return 'https://www.belganewsagency.eu/';
}
public function getSourceName(): string
{
return 'Belga News Agency';
}
}

View file

@ -33,13 +33,12 @@
'code' => 'belga',
'name' => 'Belga News Agency',
'description' => 'Belgian national news agency',
'type' => 'website',
'type' => 'rss',
'is_active' => true,
'languages' => [
'en' => ['url' => 'https://www.belganewsagency.eu/'],
'en' => ['url' => 'https://www.belganewsagency.eu/feed'],
],
'parsers' => [
'homepage' => \App\Services\Parsers\BelgaHomepageParserAdapter::class,
'article' => \App\Services\Parsers\BelgaArticleParser::class,
'article_page' => \App\Services\Parsers\BelgaArticlePageParser::class,
],

View file

@ -75,7 +75,8 @@ public function belga(): static
{
return $this->state(fn (array $attributes) => [
'provider' => 'belga',
'url' => 'https://www.belganewsagency.eu/',
'url' => 'https://www.belganewsagency.eu/feed',
'type' => 'rss',
]);
}
}

View file

@ -98,16 +98,16 @@ public function test_store_creates_belga_feed_successfully(): void
'message' => 'Feed created successfully!',
'data' => [
'name' => 'Belga Test Feed',
'url' => 'https://www.belganewsagency.eu/',
'type' => 'website',
'url' => 'https://www.belganewsagency.eu/feed',
'type' => 'rss',
'is_active' => true,
],
]);
$this->assertDatabaseHas('feeds', [
'name' => 'Belga Test Feed',
'url' => 'https://www.belganewsagency.eu/',
'type' => 'website',
'url' => 'https://www.belganewsagency.eu/feed',
'type' => 'rss',
]);
}

View file

@ -42,8 +42,8 @@ public function test_creates_belga_feed_with_correct_url(): void
$feed = $this->action->execute('Belga News', 'belga', $language->id);
$this->assertEquals('https://www.belganewsagency.eu/', $feed->url);
$this->assertEquals('website', $feed->type);
$this->assertEquals('https://www.belganewsagency.eu/feed', $feed->url);
$this->assertEquals('rss', $feed->type);
$this->assertEquals('belga', $feed->provider);
$this->assertNull($feed->description);
}

View file

@ -156,6 +156,52 @@ public function test_get_articles_from_rss_feed_handles_http_failure(): void
$this->assertEmpty($result);
}
public function test_get_articles_from_belga_rss_feed_creates_articles(): void
{
$belgaRss = <<<'XML'
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Belga News Agency</title>
<link>https://www.belganewsagency.eu</link>
<item>
<title>Belgium announces new climate plan</title>
<link>https://www.belganewsagency.eu/belgium-announces-new-climate-plan</link>
<description>Belgium has unveiled a comprehensive climate strategy.</description>
<pubDate>Sun, 08 Mar 2026 10:00:00 GMT</pubDate>
</item>
<item>
<title>EU summit concludes in Brussels</title>
<link>https://www.belganewsagency.eu/eu-summit-concludes-in-brussels</link>
<description>European leaders reached agreement on key issues.</description>
<pubDate>Sun, 08 Mar 2026 09:00:00 GMT</pubDate>
</item>
</channel>
</rss>
XML;
Http::fake(['*' => Http::response($belgaRss, 200)]);
$feed = Feed::factory()->create([
'type' => 'rss',
'provider' => 'belga',
'url' => 'https://www.belganewsagency.eu/feed',
]);
$fetcher = $this->createArticleFetcher();
$result = $fetcher->getArticlesFromFeed($feed);
$this->assertCount(2, $result);
$this->assertDatabaseHas('articles', [
'url' => 'https://www.belganewsagency.eu/belgium-announces-new-climate-plan',
'feed_id' => $feed->id,
]);
$this->assertDatabaseHas('articles', [
'url' => 'https://www.belganewsagency.eu/eu-summit-concludes-in-brussels',
'feed_id' => $feed->id,
]);
}
protected function tearDown(): void
{
Mockery::close();