Fix article fetching
This commit is contained in:
parent
e495f49481
commit
11d4262457
8 changed files with 124 additions and 30 deletions
|
|
@ -5,8 +5,10 @@
|
|||
use App\Http\Resources\ArticleResource;
|
||||
use App\Models\Article;
|
||||
use App\Models\Setting;
|
||||
use App\Jobs\ArticleDiscoveryJob;
|
||||
use Illuminate\Http\JsonResponse;
|
||||
use Illuminate\Http\Request;
|
||||
use Illuminate\Support\Facades\Artisan;
|
||||
|
||||
class ArticlesController extends BaseController
|
||||
{
|
||||
|
|
@ -71,4 +73,22 @@ public function reject(Article $article): JsonResponse
|
|||
return $this->sendError('Failed to reject article: ' . $e->getMessage(), [], 500);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually refresh articles from all active feeds
|
||||
*/
|
||||
public function refresh(): JsonResponse
|
||||
{
|
||||
try {
|
||||
// Dispatch the article discovery job
|
||||
ArticleDiscoveryJob::dispatch();
|
||||
|
||||
return $this->sendResponse(
|
||||
null,
|
||||
'Article refresh started. New articles will appear shortly.'
|
||||
);
|
||||
} catch (\Exception $e) {
|
||||
return $this->sendError('Failed to start article refresh: ' . $e->getMessage(), [], 500);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
use App\Models\Route;
|
||||
use App\Models\Setting;
|
||||
use App\Services\Auth\LemmyAuthService;
|
||||
use App\Jobs\ArticleDiscoveryJob;
|
||||
use Illuminate\Http\JsonResponse;
|
||||
use Illuminate\Http\Request;
|
||||
use Illuminate\Support\Facades\Validator;
|
||||
|
|
@ -309,13 +310,14 @@ public function createRoute(Request $request): JsonResponse
|
|||
*/
|
||||
public function complete(): JsonResponse
|
||||
{
|
||||
// In a real implementation, you might want to update a user preference
|
||||
// or create a setting that tracks onboarding completion
|
||||
// For now, we'll just return success since the onboarding status
|
||||
// is determined by the existence of platform accounts, feeds, and channels
|
||||
// If user has created feeds during onboarding, start article discovery
|
||||
$hasFeed = Feed::where('is_active', true)->exists();
|
||||
if ($hasFeed) {
|
||||
ArticleDiscoveryJob::dispatch();
|
||||
}
|
||||
|
||||
return $this->sendResponse(
|
||||
['completed' => true],
|
||||
['completed' => true, 'article_refresh_triggered' => $hasFeed],
|
||||
'Onboarding completed successfully.'
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,26 +12,34 @@ public static function validate(Article $article): Article
|
|||
|
||||
$articleData = ArticleFetcher::fetchArticleData($article);
|
||||
|
||||
if (!isset($articleData['full_article'])) {
|
||||
// Update article with fetched data (title, description, etc.)
|
||||
$updateData = [
|
||||
'validated_at' => now(),
|
||||
];
|
||||
|
||||
if (!empty($articleData)) {
|
||||
$updateData['title'] = $articleData['title'] ?? null;
|
||||
$updateData['description'] = $articleData['description'] ?? null;
|
||||
$updateData['full_article'] = $articleData['full_article'] ?? null;
|
||||
$updateData['thumbnail'] = $articleData['thumbnail'] ?? null;
|
||||
}
|
||||
|
||||
if (!isset($articleData['full_article']) || empty($articleData['full_article'])) {
|
||||
logger()->warning('Article data missing full_article key', [
|
||||
'article_id' => $article->id,
|
||||
'url' => $article->url
|
||||
]);
|
||||
|
||||
$article->update([
|
||||
'is_valid' => false,
|
||||
'validated_at' => now(),
|
||||
]);
|
||||
$updateData['is_valid'] = false;
|
||||
$article->update($updateData);
|
||||
|
||||
return $article->refresh();
|
||||
}
|
||||
|
||||
$validationResult = self::validateByKeywords($articleData['full_article']);
|
||||
$updateData['is_valid'] = $validationResult;
|
||||
|
||||
$article->update([
|
||||
'is_valid' => $validationResult,
|
||||
'validated_at' => now(),
|
||||
]);
|
||||
$article->update($updateData);
|
||||
|
||||
return $article->refresh();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,10 +9,38 @@ class BelgaHomepageParser
|
|||
*/
|
||||
public static function extractArticleUrls(string $html): array
|
||||
{
|
||||
preg_match_all('/href="(https:\/\/www\.belganewsagency\.eu\/[a-z0-9-]+)"/', $html, $matches);
|
||||
// Find all relative article links (most articles use relative paths)
|
||||
preg_match_all('/<a[^>]+href="(\/[a-z0-9-]+)"/', $html, $matches);
|
||||
|
||||
// Blacklist of non-article paths
|
||||
$blacklistPaths = [
|
||||
'/',
|
||||
'/de',
|
||||
'/feed',
|
||||
'/search',
|
||||
'/category',
|
||||
'/about',
|
||||
'/contact',
|
||||
'/privacy',
|
||||
'/terms',
|
||||
];
|
||||
|
||||
$urls = collect($matches[1])
|
||||
->unique()
|
||||
->filter(function ($path) use ($blacklistPaths) {
|
||||
// Exclude exact matches and paths starting with blacklisted paths
|
||||
foreach ($blacklistPaths as $blacklistedPath) {
|
||||
if ($path === $blacklistedPath || str_starts_with($path, $blacklistedPath . '/')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
})
|
||||
->map(function ($path) {
|
||||
// Convert relative paths to absolute URLs
|
||||
return 'https://www.belganewsagency.eu' . $path;
|
||||
})
|
||||
->values()
|
||||
->toArray();
|
||||
|
||||
return $urls;
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
Route::get('/articles', [ArticlesController::class, 'index'])->name('api.articles.index');
|
||||
Route::post('/articles/{article}/approve', [ArticlesController::class, 'approve'])->name('api.articles.approve');
|
||||
Route::post('/articles/{article}/reject', [ArticlesController::class, 'reject'])->name('api.articles.reject');
|
||||
Route::post('/articles/refresh', [ArticlesController::class, 'refresh'])->name('api.articles.refresh');
|
||||
|
||||
// Platform Accounts
|
||||
Route::apiResource('platform-accounts', PlatformAccountsController::class)->names([
|
||||
|
|
|
|||
|
|
@ -1,11 +1,8 @@
|
|||
<?php
|
||||
|
||||
use App\Console\Commands\FetchNewArticlesCommand;
|
||||
use App\Jobs\SyncChannelPostsJob;
|
||||
use Illuminate\Support\Facades\Schedule;
|
||||
|
||||
Schedule::command(FetchNewArticlesCommand::class)->hourly();
|
||||
|
||||
Schedule::call(function () {
|
||||
SyncChannelPostsJob::dispatchForAllActiveChannels();
|
||||
})->everyTenMinutes()->name('sync-lemmy-channel-posts');
|
||||
|
|
|
|||
|
|
@ -329,6 +329,11 @@ class ApiClient {
|
|||
async resetOnboardingSkip(): Promise<void> {
|
||||
await axios.post('/onboarding/reset-skip');
|
||||
}
|
||||
|
||||
// Articles management endpoints
|
||||
async refreshArticles(): Promise<void> {
|
||||
await axios.post('/articles/refresh');
|
||||
}
|
||||
}
|
||||
|
||||
export const apiClient = new ApiClient();
|
||||
|
|
@ -1,10 +1,11 @@
|
|||
import React, { useState } from 'react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText } from 'lucide-react';
|
||||
import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText, RefreshCw } from 'lucide-react';
|
||||
import { apiClient, type Article } from '../lib/api';
|
||||
|
||||
const Articles: React.FC = () => {
|
||||
const [page, setPage] = useState(1);
|
||||
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
const { data, isLoading, error } = useQuery({
|
||||
|
|
@ -26,6 +27,24 @@ const Articles: React.FC = () => {
|
|||
},
|
||||
});
|
||||
|
||||
const refreshMutation = useMutation({
|
||||
mutationFn: () => apiClient.refreshArticles(),
|
||||
onSuccess: () => {
|
||||
// Keep the button in "refreshing" state for 10 seconds
|
||||
setIsRefreshing(true);
|
||||
|
||||
// Refresh the articles list after 10 seconds
|
||||
setTimeout(() => {
|
||||
queryClient.invalidateQueries({ queryKey: ['articles'] });
|
||||
setIsRefreshing(false);
|
||||
}, 10000);
|
||||
},
|
||||
onError: () => {
|
||||
// Reset the refreshing state on error
|
||||
setIsRefreshing(false);
|
||||
},
|
||||
});
|
||||
|
||||
const handleApprove = (articleId: number) => {
|
||||
approveMutation.mutate(articleId);
|
||||
};
|
||||
|
|
@ -34,6 +53,10 @@ const Articles: React.FC = () => {
|
|||
rejectMutation.mutate(articleId);
|
||||
};
|
||||
|
||||
const handleRefresh = () => {
|
||||
refreshMutation.mutate();
|
||||
};
|
||||
|
||||
const getStatusBadge = (status: string) => {
|
||||
switch (status) {
|
||||
case 'approved':
|
||||
|
|
@ -98,7 +121,8 @@ const Articles: React.FC = () => {
|
|||
|
||||
return (
|
||||
<div className="p-6">
|
||||
<div className="mb-8">
|
||||
<div className="mb-8 flex items-start justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">Articles</h1>
|
||||
<p className="mt-1 text-sm text-gray-500">
|
||||
Manage and review articles from your feeds
|
||||
|
|
@ -110,6 +134,15 @@ const Articles: React.FC = () => {
|
|||
</div>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
onClick={handleRefresh}
|
||||
disabled={refreshMutation.isPending || isRefreshing}
|
||||
className="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<RefreshCw className={`h-4 w-4 mr-2 ${(refreshMutation.isPending || isRefreshing) ? 'animate-spin' : ''}`} />
|
||||
{(refreshMutation.isPending || isRefreshing) ? 'Refreshing...' : 'Refresh'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="space-y-6">
|
||||
{articles.map((article: Article) => (
|
||||
|
|
|
|||
Loading…
Reference in a new issue