From 11d4262457af258c46bb71cfabb391c886b7caf9 Mon Sep 17 00:00:00 2001 From: myrmidex Date: Sat, 9 Aug 2025 18:34:19 +0200 Subject: [PATCH] Fix article fetching --- .../Controllers/Api/V1/ArticlesController.php | 20 +++++++ .../Api/V1/OnboardingController.php | 12 ++-- .../Services/Article/ValidationService.php | 26 ++++++--- .../Services/Parsers/BelgaHomepageParser.php | 30 +++++++++- backend/routes/api.php | 1 + backend/routes/console.php | 3 - frontend/src/lib/api.ts | 5 ++ frontend/src/pages/Articles.tsx | 57 +++++++++++++++---- 8 files changed, 124 insertions(+), 30 deletions(-) diff --git a/backend/app/Http/Controllers/Api/V1/ArticlesController.php b/backend/app/Http/Controllers/Api/V1/ArticlesController.php index 279b88f..e13b323 100644 --- a/backend/app/Http/Controllers/Api/V1/ArticlesController.php +++ b/backend/app/Http/Controllers/Api/V1/ArticlesController.php @@ -5,8 +5,10 @@ use App\Http\Resources\ArticleResource; use App\Models\Article; use App\Models\Setting; +use App\Jobs\ArticleDiscoveryJob; use Illuminate\Http\JsonResponse; use Illuminate\Http\Request; +use Illuminate\Support\Facades\Artisan; class ArticlesController extends BaseController { @@ -71,4 +73,22 @@ public function reject(Article $article): JsonResponse return $this->sendError('Failed to reject article: ' . $e->getMessage(), [], 500); } } + + /** + * Manually refresh articles from all active feeds + */ + public function refresh(): JsonResponse + { + try { + // Dispatch the article discovery job + ArticleDiscoveryJob::dispatch(); + + return $this->sendResponse( + null, + 'Article refresh started. New articles will appear shortly.' + ); + } catch (\Exception $e) { + return $this->sendError('Failed to start article refresh: ' . $e->getMessage(), [], 500); + } + } } \ No newline at end of file diff --git a/backend/app/Http/Controllers/Api/V1/OnboardingController.php b/backend/app/Http/Controllers/Api/V1/OnboardingController.php index 0229455..925f066 100644 --- a/backend/app/Http/Controllers/Api/V1/OnboardingController.php +++ b/backend/app/Http/Controllers/Api/V1/OnboardingController.php @@ -15,6 +15,7 @@ use App\Models\Route; use App\Models\Setting; use App\Services\Auth\LemmyAuthService; +use App\Jobs\ArticleDiscoveryJob; use Illuminate\Http\JsonResponse; use Illuminate\Http\Request; use Illuminate\Support\Facades\Validator; @@ -309,13 +310,14 @@ public function createRoute(Request $request): JsonResponse */ public function complete(): JsonResponse { - // In a real implementation, you might want to update a user preference - // or create a setting that tracks onboarding completion - // For now, we'll just return success since the onboarding status - // is determined by the existence of platform accounts, feeds, and channels + // If user has created feeds during onboarding, start article discovery + $hasFeed = Feed::where('is_active', true)->exists(); + if ($hasFeed) { + ArticleDiscoveryJob::dispatch(); + } return $this->sendResponse( - ['completed' => true], + ['completed' => true, 'article_refresh_triggered' => $hasFeed], 'Onboarding completed successfully.' ); } diff --git a/backend/app/Services/Article/ValidationService.php b/backend/app/Services/Article/ValidationService.php index ffff924..ac17e1e 100644 --- a/backend/app/Services/Article/ValidationService.php +++ b/backend/app/Services/Article/ValidationService.php @@ -12,26 +12,34 @@ public static function validate(Article $article): Article $articleData = ArticleFetcher::fetchArticleData($article); - if (!isset($articleData['full_article'])) { + // Update article with fetched data (title, description, etc.) + $updateData = [ + 'validated_at' => now(), + ]; + + if (!empty($articleData)) { + $updateData['title'] = $articleData['title'] ?? null; + $updateData['description'] = $articleData['description'] ?? null; + $updateData['full_article'] = $articleData['full_article'] ?? null; + $updateData['thumbnail'] = $articleData['thumbnail'] ?? null; + } + + if (!isset($articleData['full_article']) || empty($articleData['full_article'])) { logger()->warning('Article data missing full_article key', [ 'article_id' => $article->id, 'url' => $article->url ]); - $article->update([ - 'is_valid' => false, - 'validated_at' => now(), - ]); + $updateData['is_valid'] = false; + $article->update($updateData); return $article->refresh(); } $validationResult = self::validateByKeywords($articleData['full_article']); + $updateData['is_valid'] = $validationResult; - $article->update([ - 'is_valid' => $validationResult, - 'validated_at' => now(), - ]); + $article->update($updateData); return $article->refresh(); } diff --git a/backend/app/Services/Parsers/BelgaHomepageParser.php b/backend/app/Services/Parsers/BelgaHomepageParser.php index 3f3c5b5..8234582 100644 --- a/backend/app/Services/Parsers/BelgaHomepageParser.php +++ b/backend/app/Services/Parsers/BelgaHomepageParser.php @@ -9,10 +9,38 @@ class BelgaHomepageParser */ public static function extractArticleUrls(string $html): array { - preg_match_all('/href="(https:\/\/www\.belganewsagency\.eu\/[a-z0-9-]+)"/', $html, $matches); + // Find all relative article links (most articles use relative paths) + preg_match_all('/]+href="(\/[a-z0-9-]+)"/', $html, $matches); + + // Blacklist of non-article paths + $blacklistPaths = [ + '/', + '/de', + '/feed', + '/search', + '/category', + '/about', + '/contact', + '/privacy', + '/terms', + ]; $urls = collect($matches[1]) ->unique() + ->filter(function ($path) use ($blacklistPaths) { + // Exclude exact matches and paths starting with blacklisted paths + foreach ($blacklistPaths as $blacklistedPath) { + if ($path === $blacklistedPath || str_starts_with($path, $blacklistedPath . '/')) { + return false; + } + } + return true; + }) + ->map(function ($path) { + // Convert relative paths to absolute URLs + return 'https://www.belganewsagency.eu' . $path; + }) + ->values() ->toArray(); return $urls; diff --git a/backend/routes/api.php b/backend/routes/api.php index 56d4e75..4f5ef29 100644 --- a/backend/routes/api.php +++ b/backend/routes/api.php @@ -52,6 +52,7 @@ Route::get('/articles', [ArticlesController::class, 'index'])->name('api.articles.index'); Route::post('/articles/{article}/approve', [ArticlesController::class, 'approve'])->name('api.articles.approve'); Route::post('/articles/{article}/reject', [ArticlesController::class, 'reject'])->name('api.articles.reject'); + Route::post('/articles/refresh', [ArticlesController::class, 'refresh'])->name('api.articles.refresh'); // Platform Accounts Route::apiResource('platform-accounts', PlatformAccountsController::class)->names([ diff --git a/backend/routes/console.php b/backend/routes/console.php index 54bc6f4..cd608fd 100644 --- a/backend/routes/console.php +++ b/backend/routes/console.php @@ -1,11 +1,8 @@ hourly(); - Schedule::call(function () { SyncChannelPostsJob::dispatchForAllActiveChannels(); })->everyTenMinutes()->name('sync-lemmy-channel-posts'); diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 011e832..ad43f37 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -329,6 +329,11 @@ class ApiClient { async resetOnboardingSkip(): Promise { await axios.post('/onboarding/reset-skip'); } + + // Articles management endpoints + async refreshArticles(): Promise { + await axios.post('/articles/refresh'); + } } export const apiClient = new ApiClient(); \ No newline at end of file diff --git a/frontend/src/pages/Articles.tsx b/frontend/src/pages/Articles.tsx index 7fa1409..6d14359 100644 --- a/frontend/src/pages/Articles.tsx +++ b/frontend/src/pages/Articles.tsx @@ -1,10 +1,11 @@ import React, { useState } from 'react'; import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; -import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText } from 'lucide-react'; +import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText, RefreshCw } from 'lucide-react'; import { apiClient, type Article } from '../lib/api'; const Articles: React.FC = () => { const [page, setPage] = useState(1); + const [isRefreshing, setIsRefreshing] = useState(false); const queryClient = useQueryClient(); const { data, isLoading, error } = useQuery({ @@ -26,6 +27,24 @@ const Articles: React.FC = () => { }, }); + const refreshMutation = useMutation({ + mutationFn: () => apiClient.refreshArticles(), + onSuccess: () => { + // Keep the button in "refreshing" state for 10 seconds + setIsRefreshing(true); + + // Refresh the articles list after 10 seconds + setTimeout(() => { + queryClient.invalidateQueries({ queryKey: ['articles'] }); + setIsRefreshing(false); + }, 10000); + }, + onError: () => { + // Reset the refreshing state on error + setIsRefreshing(false); + }, + }); + const handleApprove = (articleId: number) => { approveMutation.mutate(articleId); }; @@ -34,6 +53,10 @@ const Articles: React.FC = () => { rejectMutation.mutate(articleId); }; + const handleRefresh = () => { + refreshMutation.mutate(); + }; + const getStatusBadge = (status: string) => { switch (status) { case 'approved': @@ -98,17 +121,27 @@ const Articles: React.FC = () => { return (
-
-

Articles

-

- Manage and review articles from your feeds -

- {settings?.publishing_approvals_enabled && ( -
- - Approval system enabled -
- )} +
+
+

Articles

+

+ Manage and review articles from your feeds +

+ {settings?.publishing_approvals_enabled && ( +
+ + Approval system enabled +
+ )} +
+