Fix article fetching

This commit is contained in:
myrmidex 2025-08-09 18:34:19 +02:00
parent e495f49481
commit 11d4262457
8 changed files with 124 additions and 30 deletions

View file

@ -5,8 +5,10 @@
use App\Http\Resources\ArticleResource;
use App\Models\Article;
use App\Models\Setting;
use App\Jobs\ArticleDiscoveryJob;
use Illuminate\Http\JsonResponse;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Artisan;
class ArticlesController extends BaseController
{
@ -71,4 +73,22 @@ public function reject(Article $article): JsonResponse
return $this->sendError('Failed to reject article: ' . $e->getMessage(), [], 500);
}
}
/**
* Manually refresh articles from all active feeds
*/
public function refresh(): JsonResponse
{
try {
// Dispatch the article discovery job
ArticleDiscoveryJob::dispatch();
return $this->sendResponse(
null,
'Article refresh started. New articles will appear shortly.'
);
} catch (\Exception $e) {
return $this->sendError('Failed to start article refresh: ' . $e->getMessage(), [], 500);
}
}
}

View file

@ -15,6 +15,7 @@
use App\Models\Route;
use App\Models\Setting;
use App\Services\Auth\LemmyAuthService;
use App\Jobs\ArticleDiscoveryJob;
use Illuminate\Http\JsonResponse;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Validator;
@ -309,13 +310,14 @@ public function createRoute(Request $request): JsonResponse
*/
public function complete(): JsonResponse
{
// In a real implementation, you might want to update a user preference
// or create a setting that tracks onboarding completion
// For now, we'll just return success since the onboarding status
// is determined by the existence of platform accounts, feeds, and channels
// If user has created feeds during onboarding, start article discovery
$hasFeed = Feed::where('is_active', true)->exists();
if ($hasFeed) {
ArticleDiscoveryJob::dispatch();
}
return $this->sendResponse(
['completed' => true],
['completed' => true, 'article_refresh_triggered' => $hasFeed],
'Onboarding completed successfully.'
);
}

View file

@ -12,26 +12,34 @@ public static function validate(Article $article): Article
$articleData = ArticleFetcher::fetchArticleData($article);
if (!isset($articleData['full_article'])) {
// Update article with fetched data (title, description, etc.)
$updateData = [
'validated_at' => now(),
];
if (!empty($articleData)) {
$updateData['title'] = $articleData['title'] ?? null;
$updateData['description'] = $articleData['description'] ?? null;
$updateData['full_article'] = $articleData['full_article'] ?? null;
$updateData['thumbnail'] = $articleData['thumbnail'] ?? null;
}
if (!isset($articleData['full_article']) || empty($articleData['full_article'])) {
logger()->warning('Article data missing full_article key', [
'article_id' => $article->id,
'url' => $article->url
]);
$article->update([
'is_valid' => false,
'validated_at' => now(),
]);
$updateData['is_valid'] = false;
$article->update($updateData);
return $article->refresh();
}
$validationResult = self::validateByKeywords($articleData['full_article']);
$updateData['is_valid'] = $validationResult;
$article->update([
'is_valid' => $validationResult,
'validated_at' => now(),
]);
$article->update($updateData);
return $article->refresh();
}

View file

@ -9,10 +9,38 @@ class BelgaHomepageParser
*/
public static function extractArticleUrls(string $html): array
{
preg_match_all('/href="(https:\/\/www\.belganewsagency\.eu\/[a-z0-9-]+)"/', $html, $matches);
// Find all relative article links (most articles use relative paths)
preg_match_all('/<a[^>]+href="(\/[a-z0-9-]+)"/', $html, $matches);
// Blacklist of non-article paths
$blacklistPaths = [
'/',
'/de',
'/feed',
'/search',
'/category',
'/about',
'/contact',
'/privacy',
'/terms',
];
$urls = collect($matches[1])
->unique()
->filter(function ($path) use ($blacklistPaths) {
// Exclude exact matches and paths starting with blacklisted paths
foreach ($blacklistPaths as $blacklistedPath) {
if ($path === $blacklistedPath || str_starts_with($path, $blacklistedPath . '/')) {
return false;
}
}
return true;
})
->map(function ($path) {
// Convert relative paths to absolute URLs
return 'https://www.belganewsagency.eu' . $path;
})
->values()
->toArray();
return $urls;

View file

@ -52,6 +52,7 @@
Route::get('/articles', [ArticlesController::class, 'index'])->name('api.articles.index');
Route::post('/articles/{article}/approve', [ArticlesController::class, 'approve'])->name('api.articles.approve');
Route::post('/articles/{article}/reject', [ArticlesController::class, 'reject'])->name('api.articles.reject');
Route::post('/articles/refresh', [ArticlesController::class, 'refresh'])->name('api.articles.refresh');
// Platform Accounts
Route::apiResource('platform-accounts', PlatformAccountsController::class)->names([

View file

@ -1,11 +1,8 @@
<?php
use App\Console\Commands\FetchNewArticlesCommand;
use App\Jobs\SyncChannelPostsJob;
use Illuminate\Support\Facades\Schedule;
Schedule::command(FetchNewArticlesCommand::class)->hourly();
Schedule::call(function () {
SyncChannelPostsJob::dispatchForAllActiveChannels();
})->everyTenMinutes()->name('sync-lemmy-channel-posts');

View file

@ -329,6 +329,11 @@ class ApiClient {
async resetOnboardingSkip(): Promise<void> {
await axios.post('/onboarding/reset-skip');
}
// Articles management endpoints
async refreshArticles(): Promise<void> {
await axios.post('/articles/refresh');
}
}
export const apiClient = new ApiClient();

View file

@ -1,10 +1,11 @@
import React, { useState } from 'react';
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText } from 'lucide-react';
import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText, RefreshCw } from 'lucide-react';
import { apiClient, type Article } from '../lib/api';
const Articles: React.FC = () => {
const [page, setPage] = useState(1);
const [isRefreshing, setIsRefreshing] = useState(false);
const queryClient = useQueryClient();
const { data, isLoading, error } = useQuery({
@ -26,6 +27,24 @@ const Articles: React.FC = () => {
},
});
const refreshMutation = useMutation({
mutationFn: () => apiClient.refreshArticles(),
onSuccess: () => {
// Keep the button in "refreshing" state for 10 seconds
setIsRefreshing(true);
// Refresh the articles list after 10 seconds
setTimeout(() => {
queryClient.invalidateQueries({ queryKey: ['articles'] });
setIsRefreshing(false);
}, 10000);
},
onError: () => {
// Reset the refreshing state on error
setIsRefreshing(false);
},
});
const handleApprove = (articleId: number) => {
approveMutation.mutate(articleId);
};
@ -34,6 +53,10 @@ const Articles: React.FC = () => {
rejectMutation.mutate(articleId);
};
const handleRefresh = () => {
refreshMutation.mutate();
};
const getStatusBadge = (status: string) => {
switch (status) {
case 'approved':
@ -98,17 +121,27 @@ const Articles: React.FC = () => {
return (
<div className="p-6">
<div className="mb-8">
<h1 className="text-2xl font-bold text-gray-900">Articles</h1>
<p className="mt-1 text-sm text-gray-500">
Manage and review articles from your feeds
</p>
{settings?.publishing_approvals_enabled && (
<div className="mt-2 inline-flex items-center px-3 py-1 rounded-full text-xs font-medium bg-blue-100 text-blue-800">
<Tag className="h-3 w-3 mr-1" />
Approval system enabled
</div>
)}
<div className="mb-8 flex items-start justify-between">
<div>
<h1 className="text-2xl font-bold text-gray-900">Articles</h1>
<p className="mt-1 text-sm text-gray-500">
Manage and review articles from your feeds
</p>
{settings?.publishing_approvals_enabled && (
<div className="mt-2 inline-flex items-center px-3 py-1 rounded-full text-xs font-medium bg-blue-100 text-blue-800">
<Tag className="h-3 w-3 mr-1" />
Approval system enabled
</div>
)}
</div>
<button
onClick={handleRefresh}
disabled={refreshMutation.isPending || isRefreshing}
className="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
>
<RefreshCw className={`h-4 w-4 mr-2 ${(refreshMutation.isPending || isRefreshing) ? 'animate-spin' : ''}`} />
{(refreshMutation.isPending || isRefreshing) ? 'Refreshing...' : 'Refresh'}
</button>
</div>
<div className="space-y-6">