From c5a926446a5f98345158d1820334a027a19a6828 Mon Sep 17 00:00:00 2001 From: myrmidex Date: Sat, 28 Jun 2025 12:08:34 +0200 Subject: [PATCH] Fetch articles from news site --- bin/check-and-post | 21 +++++ src/Domain/Articles/Article.php | 19 +++-- src/Domain/Articles/ArticleFetcher.php | 79 +++++++++++++++++++ .../Articles/Jobs/PostArticleToLemmy.php | 36 +++++++++ 4 files changed, 148 insertions(+), 7 deletions(-) create mode 100644 bin/check-and-post create mode 100644 src/Domain/Articles/ArticleFetcher.php create mode 100644 src/Domain/Articles/Jobs/PostArticleToLemmy.php diff --git a/bin/check-and-post b/bin/check-and-post new file mode 100644 index 0000000..20726a7 --- /dev/null +++ b/bin/check-and-post @@ -0,0 +1,21 @@ +#!/usr/bin/env php +filter(function($article) { + return $article->wasRecentlyCreated; +}); + +echo "Found " . $articles->count() . " articles (" . $newArticles->count() . " new)\n"; + +$newArticles->each(function($article) { + echo "New article: " . $article->url . "\n"; + // $article->publish(); +}); + +echo "\n"; \ No newline at end of file diff --git a/src/Domain/Articles/Article.php b/src/Domain/Articles/Article.php index 8f2b3e3..dc5d300 100644 --- a/src/Domain/Articles/Article.php +++ b/src/Domain/Articles/Article.php @@ -1,7 +1,8 @@ hasMany(ArticlePosted::class); } - public function publish(string $community = 'default'): void + public function publish(?string $community = null): void { - ArticlePosted::create([ - 'article_id' => $this->id, - 'community' => $community, - 'posted_at' => now(), - ]); + $config = require __DIR__ . '/../../../bootstrap/config.php'; + $community ??= $config['lemmy']['community']; + + if ($this->posts()->where('community', $community)->exists()) { + return; + } + + $job = new PostArticleToLemmy($this, $community); + $job->handle(); } } diff --git a/src/Domain/Articles/ArticleFetcher.php b/src/Domain/Articles/ArticleFetcher.php new file mode 100644 index 0000000..cd38b37 --- /dev/null +++ b/src/Domain/Articles/ArticleFetcher.php @@ -0,0 +1,79 @@ +map(fn ($article) => self::saveArticle($article)); + } + + private static function fetchArticles(): \Illuminate\Support\Collection + { + try { + $client = new Client(); + $response = $client->get('https://www.vrt.be/vrtnws/en/'); + $html = $response->getBody()->getContents(); + + // Extract article links using regex + preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches); + + $articles = collect($matches[1] ?? []) + ->unique() + ->take(10) // Limit to 10 articles + ->map(function ($path) use ($client) { + $fullUrl = 'https://www.vrt.be' . $path; + + try { + // Fetch the article page to get title + $articleResponse = $client->get($fullUrl); + $articleHtml = $articleResponse->getBody()->getContents(); + + // Extract title from meta tag or h1 + preg_match('/ $fullUrl, + 'title' => $title, + 'published_at' => $publishedAt, + ]; + } catch (\Exception $e) { + logger('article_fetcher')->error("Failed to fetch article: {$fullUrl}", ['error' => $e->getMessage()]); + return [ + 'url' => $fullUrl, + 'title' => null, + 'published_at' => null, + ]; + } + }) + ->filter(fn($article) => !empty($article['url'])); + + logger('article_fetcher')->info("Fetched " . $articles->count() . " articles from VRT"); + return $articles; + + } catch (\Exception $e) { + logger('article_fetcher')->error("Failed to fetch VRT homepage", ['error' => $e->getMessage()]); + return collect([]); + } + } + + protected static function saveArticle(array $data): Article + { + return Article::firstOrCreate( + ['url' => $data['url']], + [ + 'created_at' => Carbon::now(), + ] + ); + } +} \ No newline at end of file diff --git a/src/Domain/Articles/Jobs/PostArticleToLemmy.php b/src/Domain/Articles/Jobs/PostArticleToLemmy.php new file mode 100644 index 0000000..71a2423 --- /dev/null +++ b/src/Domain/Articles/Jobs/PostArticleToLemmy.php @@ -0,0 +1,36 @@ +articleId = $article->id; + $this->community = $community; + } + + public function handle(): void + { + $article = Article::findOrFail($this->articleId); + + // TODO: Replace with real Lemmy API call + $fakeResponse = [ + 'status' => 'success', + 'post_url' => 'https://lemmy.world/post/12345', + ]; + + $article->posts()->create([ + 'community' => $this->community, + 'posted_at' => Carbon::now(), + 'response' => $fakeResponse, + ]); + } +}