diff --git a/app/Console/Commands/FetchNewArticlesCommand.php b/app/Console/Commands/FetchNewArticlesCommand.php index f370a10..e2ba1db 100644 --- a/app/Console/Commands/FetchNewArticlesCommand.php +++ b/app/Console/Commands/FetchNewArticlesCommand.php @@ -7,9 +7,9 @@ class FetchNewArticlesCommand extends Command { - protected $signature = 'articles:fetch'; + protected $signature = 'article:refresh'; - protected $description = 'Fetches new articles'; + protected $description = 'Fetches latest articles'; public function handle(): int { diff --git a/app/Console/Commands/PublishToLemmyCommand.php b/app/Console/Commands/PublishToLemmyCommand.php index 4afe928..2e32327 100644 --- a/app/Console/Commands/PublishToLemmyCommand.php +++ b/app/Console/Commands/PublishToLemmyCommand.php @@ -2,29 +2,27 @@ namespace App\Console\Commands; +use App\Jobs\PublishToLemmyJob; use App\Models\Article; -use App\Modules\Lemmy\Services\LemmyPublisher; -use App\Services\Article\ArticleFetcher; -use Exception; use Illuminate\Console\Command; class PublishToLemmyCommand extends Command { protected $signature = 'article:publish-to-lemmy'; - protected $description = 'Publish an article to Lemmy'; + protected $description = 'Queue an article for publishing to Lemmy'; public function handle(): int { - $article = Article::all()->firstOrFail(); + $article = Article::all() + ->filter(fn (Article $article) => $article->articlePublication === null) + ->firstOrFail(); - $this->info('Publishing article: ' . $article->url); + $this->info('Queuing article for publishing: ' . $article->url); - try { - LemmyPublisher::fromConfig()->publish($article, ArticleFetcher::fetchArticleData($article)); - } catch (Exception) { - return self::FAILURE; - } + PublishToLemmyJob::dispatch($article); + + $this->info('Article queued successfully'); return self::SUCCESS; } diff --git a/app/Enums/PlatformEnum.php b/app/Enums/PlatformEnum.php new file mode 100644 index 0000000..689a532 --- /dev/null +++ b/app/Enums/PlatformEnum.php @@ -0,0 +1,8 @@ +value}: {$reason}"; + + parent::__construct($message); + } + + public function getPlatform(): PlatformEnum + { + return $this->platform; + } +} diff --git a/app/Exceptions/PublishException.php b/app/Exceptions/PublishException.php new file mode 100644 index 0000000..e910e9b --- /dev/null +++ b/app/Exceptions/PublishException.php @@ -0,0 +1,35 @@ +id} to {$platform->value}"; + + if ($previous) { + $message .= ": {$previous->getMessage()}"; + } + + parent::__construct($message, 0, $previous); + } + + public function getArticle(): Article + { + return $this->article; + } + + public function getPlatform(): PlatformEnum + { + return $this->platform; + } +} diff --git a/app/Jobs/PublishToLemmyJob.php b/app/Jobs/PublishToLemmyJob.php new file mode 100644 index 0000000..08341d9 --- /dev/null +++ b/app/Jobs/PublishToLemmyJob.php @@ -0,0 +1,50 @@ +onQueue('lemmy-posts'); + } + + public function handle(): void + { + if ($this->article->articlePublication !== null) { + logger()->info('Article already published, skipping', [ + 'article_id' => $this->article->id + ]); + + return; + } + + $extractedData = ArticleFetcher::fetchArticleData($this->article); + + logger()->info('Publishing article to Lemmy', [ + 'article_id' => $this->article->id, + 'url' => $this->article->url + ]); + + try { + LemmyPublisher::fromConfig()->publish($this->article, $extractedData); + + logger()->info('Article published successfully', [ + 'article_id' => $this->article->id + ]); + + } catch (PublishException $e) { + $this->fail($e); + } + } +} diff --git a/app/Listeners/PublishArticle.php b/app/Listeners/PublishArticle.php index a9bd709..86d703f 100644 --- a/app/Listeners/PublishArticle.php +++ b/app/Listeners/PublishArticle.php @@ -3,8 +3,7 @@ namespace App\Listeners; use App\Events\ArticleReadyToPublish; -use App\Modules\Lemmy\Services\LemmyPublisher; -use App\Services\Article\ArticleFetcher; +use App\Jobs\PublishToLemmyJob; class PublishArticle { @@ -16,8 +15,11 @@ public function handle(ArticleReadyToPublish $event): void { $article = $event->article; - logger('Publishing article: ' . $article->id . ' : ' . $article->url); - - LemmyPublisher::fromConfig()->publish($article, ArticleFetcher::fetchArticleData($article)); + logger()->info('Article queued for publishing to Lemmy', [ + 'article_id' => $article->id, + 'url' => $article->url + ]); + + PublishToLemmyJob::dispatch($article); } } diff --git a/app/Models/Article.php b/app/Models/Article.php index 48b3e79..cdd6511 100644 --- a/app/Models/Article.php +++ b/app/Models/Article.php @@ -6,6 +6,8 @@ use Database\Factories\ArticleFactory; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; +use Illuminate\Database\Eloquent\Relations\HasMany; +use Illuminate\Database\Eloquent\Relations\HasOne; use Illuminate\Support\Carbon; /** @@ -16,6 +18,7 @@ * @property Carbon|null $validated_at * @property Carbon $created_at * @property Carbon $updated_at + * @property ArticlePublication $articlePublication */ class Article extends Model { @@ -54,6 +57,16 @@ public function isValid(): bool return $this->is_valid; } + public function articlePublication(): HasOne + { + return $this->hasOne(ArticlePublication::class); + } + + public function articlePublications(): HasMany + { + return $this->hasMany(ArticlePublication::class); + } + protected static function booted(): void { static::created(function ($article) { diff --git a/app/Modules/Lemmy/Services/LemmyApiService.php b/app/Modules/Lemmy/Services/LemmyApiService.php index 5b7a5d3..0e369ff 100644 --- a/app/Modules/Lemmy/Services/LemmyApiService.php +++ b/app/Modules/Lemmy/Services/LemmyApiService.php @@ -57,15 +57,26 @@ public function getCommunityId(string $communityName): int } } - public function createPost(string $token, string $title, string $body, int $communityId): array + public function createPost(string $token, string $title, string $body, int $communityId, ?string $url = null, ?string $thumbnail = null): array { try { $request = new LemmyRequest($this->instance, $token); - $response = $request->post('post', [ + + $postData = [ 'name' => $title, 'body' => $body, 'community_id' => $communityId, - ]); + ]; + + if ($url) { + $postData['url'] = $url; + } + + if ($thumbnail) { + $postData['custom_thumbnail'] = $thumbnail; + } + + $response = $request->post('post', $postData); if (!$response->successful()) { throw new Exception('Failed to create post: ' . $response->status() . ' - ' . $response->body()); @@ -77,4 +88,4 @@ public function createPost(string $token, string $title, string $body, int $comm throw $e; } } -} \ No newline at end of file +} diff --git a/app/Modules/Lemmy/Services/LemmyPublisher.php b/app/Modules/Lemmy/Services/LemmyPublisher.php index bb15409..d4a36f1 100644 --- a/app/Modules/Lemmy/Services/LemmyPublisher.php +++ b/app/Modules/Lemmy/Services/LemmyPublisher.php @@ -2,6 +2,9 @@ namespace App\Modules\Lemmy\Services; +use App\Enums\PlatformEnum; +use App\Exceptions\PlatformAuthException; +use App\Exceptions\PublishException; use App\Models\Article; use App\Models\ArticlePublication; use Exception; @@ -29,38 +32,47 @@ public static function fromConfig(): self ); } + /** + * @throws PublishException + */ public function publish(Article $article, array $extractedData): ArticlePublication { - $token = $this->getAuthToken(); - - if (!$token) { - throw new Exception('Failed to authenticate with Lemmy'); + try { + $token = $this->getAuthToken(); + $communityId = $this->getCommunityId(); + + $postData = $this->api->createPost( + $token, + $extractedData['title'] ?? 'Untitled', + $extractedData['description'] ?? '', + $communityId, + $article->url, + $extractedData['thumbnail'] ?? null + ); + + return $this->createPublicationRecord($article, $postData, $communityId); + } catch (Exception $e) { + throw new PublishException($article, PlatformEnum::LEMMY, $e); } - - $communityId = $this->getCommunityId(); - - $postData = $this->api->createPost( - $token, - $extractedData['title'] ?? 'Untitled', - $extractedData['description'] ?? '', - $communityId - ); - - return $this->createPublicationRecord($article, $postData, $communityId); } - private function getAuthToken(): ?string + private function getAuthToken(): string { return Cache::remember('lemmy_jwt_token', 3600, function () { $username = config('lemmy.username'); $password = config('lemmy.password'); - + if (!$username || !$password) { - logger()->error('Missing Lemmy credentials'); - return null; + throw new PlatformAuthException(PlatformEnum::LEMMY, 'Missing credentials'); } + + $token = $this->api->login($username, $password); - return $this->api->login($username, $password); + if (!$token) { + throw new PlatformAuthException(PlatformEnum::LEMMY, 'Login failed'); + } + + return $token; }); } @@ -83,4 +95,4 @@ private function createPublicationRecord(Article $article, array $postData, int 'publication_data' => $postData, ]); } -} \ No newline at end of file +} diff --git a/app/Services/Parsers/BelgaArticlePageParser.php b/app/Services/Parsers/BelgaArticlePageParser.php index bfcb5af..14fdca7 100644 --- a/app/Services/Parsers/BelgaArticlePageParser.php +++ b/app/Services/Parsers/BelgaArticlePageParser.php @@ -80,12 +80,28 @@ public static function extractFullArticle(string $html): ?string return null; } + public static function extractThumbnail(string $html): ?string + { + // Try OpenGraph image first + if (preg_match('/]+src="([^"]+)"/i', $html, $matches)) { + return $matches[1]; + } + + return null; + } + public static function extractData(string $html): array { return [ 'title' => self::extractTitle($html), 'description' => self::extractDescription($html), 'full_article' => self::extractFullArticle($html), + 'thumbnail' => self::extractThumbnail($html), ]; } } \ No newline at end of file diff --git a/app/Services/Parsers/VrtArticlePageParser.php b/app/Services/Parsers/VrtArticlePageParser.php index 98c1c6c..2a408b1 100644 --- a/app/Services/Parsers/VrtArticlePageParser.php +++ b/app/Services/Parsers/VrtArticlePageParser.php @@ -10,66 +10,80 @@ public static function extractTitle(string $html): ?string if (preg_match('/]*>([^<]+)<\/h1>/i', $html, $matches)) { return html_entity_decode(strip_tags($matches[1]), ENT_QUOTES, 'UTF-8'); } - + // Try title tag if (preg_match('/([^<]+)<\/title>/i', $html, $matches)) { return html_entity_decode($matches[1], ENT_QUOTES, 'UTF-8'); } - + return null; } - + public static function extractDescription(string $html): ?string { // Try meta description first if (preg_match('/<meta property="og:description" content="([^"]+)"/i', $html, $matches)) { return html_entity_decode($matches[1], ENT_QUOTES, 'UTF-8'); } - + // Try to find first paragraph in article content if (preg_match('/<p[^>]*>([^<]+(?:<[^\/](?!p)[^>]*>[^<]*<\/[^>]*>[^<]*)*)<\/p>/i', $html, $matches)) { return html_entity_decode(strip_tags($matches[1]), ENT_QUOTES, 'UTF-8'); } - + return null; } - + public static function extractFullArticle(string $html): ?string { // Remove scripts, styles, and other non-content elements $cleanHtml = preg_replace('/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/mi', '', $html); $cleanHtml = preg_replace('/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/mi', '', $cleanHtml); - + // Extract all paragraph content preg_match_all('/<p[^>]*>(.*?)<\/p>/is', $cleanHtml, $matches); - + if (!empty($matches[1])) { $paragraphs = array_map(function($paragraph) { return html_entity_decode(strip_tags($paragraph), ENT_QUOTES, 'UTF-8'); }, $matches[1]); - + // Filter out empty paragraphs and join with double newlines $fullText = implode("\n\n", array_filter($paragraphs, function($p) { return trim($p) !== ''; })); - + return $fullText ?: null; } - + return null; } - + + public static function extractThumbnail(string $html): ?string + { + if (preg_match('/<meta property="og:image" content="([^"]+)"/i', $html, $matches)) { + return $matches[1]; + } + + if (preg_match('/<img[^>]+src="([^"]+)"/i', $html, $matches)) { + return $matches[1]; + } + + return null; + } + public static function extractData(string $html): array { return [ 'title' => self::extractTitle($html), 'description' => self::extractDescription($html), 'full_article' => self::extractFullArticle($html), + 'thumbnail' => self::extractThumbnail($html), ]; } -} \ No newline at end of file +} diff --git a/routes/console.php b/routes/console.php index aa1da9f..3aeb350 100644 --- a/routes/console.php +++ b/routes/console.php @@ -1,6 +1,38 @@ <?php use App\Console\Commands\FetchNewArticlesCommand; +use App\Models\Article; +use App\Modules\Lemmy\Services\LemmyPublisher; +use App\Services\Article\ArticleFetcher; use Illuminate\Support\Facades\Schedule; Schedule::command(FetchNewArticlesCommand::class)->hourly(); + +Schedule::call(function () { + $article = Article::whereDoesntHave('articlePublications') + ->where('is_valid', true) + ->first(); + + if ($article) { + try { + logger()->info('Publishing article to Lemmy via scheduler', [ + 'article_id' => $article->id, + 'url' => $article->url + ]); + + $extractedData = ArticleFetcher::fetchArticleData($article); + LemmyPublisher::fromConfig()->publish($article, $extractedData); + + logger()->info('Successfully published article to Lemmy', [ + 'article_id' => $article->id + ]); + } catch (Exception $e) { + logger()->error('Failed to publish article to Lemmy via scheduler', [ + 'article_id' => $article->id, + 'error' => $e->getMessage() + ]); + } + } else { + logger()->debug('No unpublished valid articles found for Lemmy publishing'); + } +})->everyFifteenMinutes()->name('publish-to-lemmy');