Fetch articles from news site

This commit is contained in:
myrmidex 2025-06-28 12:08:34 +02:00
parent bdf8e9b3a7
commit c5a926446a
4 changed files with 148 additions and 7 deletions

21
bin/check-and-post Normal file
View file

@ -0,0 +1,21 @@
#!/usr/bin/env php
<?php
require __DIR__ . '/../bootstrap/autoload.php';
use Feddev\LemmyArticlePoster\Domain\Articles\ArticleFetcher;
$articles = ArticleFetcher::getNewArticles();
$newArticles = $articles->filter(function($article) {
return $article->wasRecentlyCreated;
});
echo "Found " . $articles->count() . " articles (" . $newArticles->count() . " new)\n";
$newArticles->each(function($article) {
echo "New article: " . $article->url . "\n";
// $article->publish();
});
echo "\n";

View file

@ -1,7 +1,8 @@
<?php <?php
namespace Domain\Articles; namespace Feddev\LemmyArticlePoster\Domain\Articles;
use Feddev\LemmyArticlePoster\Domain\Articles\Jobs\PostArticleToLemmy;
use Illuminate\Database\Eloquent\Model; use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\HasMany; use Illuminate\Database\Eloquent\Relations\HasMany;
@ -28,12 +29,16 @@ public function posts(): HasMany
return $this->hasMany(ArticlePosted::class); return $this->hasMany(ArticlePosted::class);
} }
public function publish(string $community = 'default'): void public function publish(?string $community = null): void
{ {
ArticlePosted::create([ $config = require __DIR__ . '/../../../bootstrap/config.php';
'article_id' => $this->id, $community ??= $config['lemmy']['community'];
'community' => $community,
'posted_at' => now(), if ($this->posts()->where('community', $community)->exists()) {
]); return;
}
$job = new PostArticleToLemmy($this, $community);
$job->handle();
} }
} }

View file

@ -0,0 +1,79 @@
<?php
namespace Feddev\LemmyArticlePoster\Domain\Articles;
use Illuminate\Support\Collection;
use Illuminate\Support\Carbon;
use GuzzleHttp\Client;
class ArticleFetcher
{
public static function getNewArticles(): Collection
{
return self::fetchArticles()->map(fn ($article) => self::saveArticle($article));
}
private static function fetchArticles(): \Illuminate\Support\Collection
{
try {
$client = new Client();
$response = $client->get('https://www.vrt.be/vrtnws/en/');
$html = $response->getBody()->getContents();
// Extract article links using regex
preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
$articles = collect($matches[1] ?? [])
->unique()
->take(10) // Limit to 10 articles
->map(function ($path) use ($client) {
$fullUrl = 'https://www.vrt.be' . $path;
try {
// Fetch the article page to get title
$articleResponse = $client->get($fullUrl);
$articleHtml = $articleResponse->getBody()->getContents();
// Extract title from meta tag or h1
preg_match('/<meta property="og:title" content="([^"]+)"/', $articleHtml, $titleMatch);
$title = $titleMatch[1] ?? null;
// Extract publish date
preg_match('/<meta property="article:published_time" content="([^"]+)"/', $articleHtml, $dateMatch);
$publishedAt = $dateMatch[1] ?? null;
return [
'url' => $fullUrl,
'title' => $title,
'published_at' => $publishedAt,
];
} catch (\Exception $e) {
logger('article_fetcher')->error("Failed to fetch article: {$fullUrl}", ['error' => $e->getMessage()]);
return [
'url' => $fullUrl,
'title' => null,
'published_at' => null,
];
}
})
->filter(fn($article) => !empty($article['url']));
logger('article_fetcher')->info("Fetched " . $articles->count() . " articles from VRT");
return $articles;
} catch (\Exception $e) {
logger('article_fetcher')->error("Failed to fetch VRT homepage", ['error' => $e->getMessage()]);
return collect([]);
}
}
protected static function saveArticle(array $data): Article
{
return Article::firstOrCreate(
['url' => $data['url']],
[
'created_at' => Carbon::now(),
]
);
}
}

View file

@ -0,0 +1,36 @@
<?php
namespace Feddev\LemmyArticlePoster\Domain\Articles\Jobs;
use Feddev\LemmyArticlePoster\Domain\Articles\Article;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Support\Carbon;
class PostArticleToLemmy implements ShouldQueue
{
public int $articleId;
public string $community;
public function __construct(Article $article, string $community = 'default')
{
$this->articleId = $article->id;
$this->community = $community;
}
public function handle(): void
{
$article = Article::findOrFail($this->articleId);
// TODO: Replace with real Lemmy API call
$fakeResponse = [
'status' => 'success',
'post_url' => 'https://lemmy.world/post/12345',
];
$article->posts()->create([
'community' => $this->community,
'posted_at' => Carbon::now(),
'response' => $fakeResponse,
]);
}
}