Fix article fetching
This commit is contained in:
parent
e495f49481
commit
11d4262457
8 changed files with 124 additions and 30 deletions
|
|
@ -5,8 +5,10 @@
|
||||||
use App\Http\Resources\ArticleResource;
|
use App\Http\Resources\ArticleResource;
|
||||||
use App\Models\Article;
|
use App\Models\Article;
|
||||||
use App\Models\Setting;
|
use App\Models\Setting;
|
||||||
|
use App\Jobs\ArticleDiscoveryJob;
|
||||||
use Illuminate\Http\JsonResponse;
|
use Illuminate\Http\JsonResponse;
|
||||||
use Illuminate\Http\Request;
|
use Illuminate\Http\Request;
|
||||||
|
use Illuminate\Support\Facades\Artisan;
|
||||||
|
|
||||||
class ArticlesController extends BaseController
|
class ArticlesController extends BaseController
|
||||||
{
|
{
|
||||||
|
|
@ -71,4 +73,22 @@ public function reject(Article $article): JsonResponse
|
||||||
return $this->sendError('Failed to reject article: ' . $e->getMessage(), [], 500);
|
return $this->sendError('Failed to reject article: ' . $e->getMessage(), [], 500);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manually refresh articles from all active feeds
|
||||||
|
*/
|
||||||
|
public function refresh(): JsonResponse
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
// Dispatch the article discovery job
|
||||||
|
ArticleDiscoveryJob::dispatch();
|
||||||
|
|
||||||
|
return $this->sendResponse(
|
||||||
|
null,
|
||||||
|
'Article refresh started. New articles will appear shortly.'
|
||||||
|
);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
return $this->sendError('Failed to start article refresh: ' . $e->getMessage(), [], 500);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -15,6 +15,7 @@
|
||||||
use App\Models\Route;
|
use App\Models\Route;
|
||||||
use App\Models\Setting;
|
use App\Models\Setting;
|
||||||
use App\Services\Auth\LemmyAuthService;
|
use App\Services\Auth\LemmyAuthService;
|
||||||
|
use App\Jobs\ArticleDiscoveryJob;
|
||||||
use Illuminate\Http\JsonResponse;
|
use Illuminate\Http\JsonResponse;
|
||||||
use Illuminate\Http\Request;
|
use Illuminate\Http\Request;
|
||||||
use Illuminate\Support\Facades\Validator;
|
use Illuminate\Support\Facades\Validator;
|
||||||
|
|
@ -309,13 +310,14 @@ public function createRoute(Request $request): JsonResponse
|
||||||
*/
|
*/
|
||||||
public function complete(): JsonResponse
|
public function complete(): JsonResponse
|
||||||
{
|
{
|
||||||
// In a real implementation, you might want to update a user preference
|
// If user has created feeds during onboarding, start article discovery
|
||||||
// or create a setting that tracks onboarding completion
|
$hasFeed = Feed::where('is_active', true)->exists();
|
||||||
// For now, we'll just return success since the onboarding status
|
if ($hasFeed) {
|
||||||
// is determined by the existence of platform accounts, feeds, and channels
|
ArticleDiscoveryJob::dispatch();
|
||||||
|
}
|
||||||
|
|
||||||
return $this->sendResponse(
|
return $this->sendResponse(
|
||||||
['completed' => true],
|
['completed' => true, 'article_refresh_triggered' => $hasFeed],
|
||||||
'Onboarding completed successfully.'
|
'Onboarding completed successfully.'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,26 +12,34 @@ public static function validate(Article $article): Article
|
||||||
|
|
||||||
$articleData = ArticleFetcher::fetchArticleData($article);
|
$articleData = ArticleFetcher::fetchArticleData($article);
|
||||||
|
|
||||||
if (!isset($articleData['full_article'])) {
|
// Update article with fetched data (title, description, etc.)
|
||||||
|
$updateData = [
|
||||||
|
'validated_at' => now(),
|
||||||
|
];
|
||||||
|
|
||||||
|
if (!empty($articleData)) {
|
||||||
|
$updateData['title'] = $articleData['title'] ?? null;
|
||||||
|
$updateData['description'] = $articleData['description'] ?? null;
|
||||||
|
$updateData['full_article'] = $articleData['full_article'] ?? null;
|
||||||
|
$updateData['thumbnail'] = $articleData['thumbnail'] ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isset($articleData['full_article']) || empty($articleData['full_article'])) {
|
||||||
logger()->warning('Article data missing full_article key', [
|
logger()->warning('Article data missing full_article key', [
|
||||||
'article_id' => $article->id,
|
'article_id' => $article->id,
|
||||||
'url' => $article->url
|
'url' => $article->url
|
||||||
]);
|
]);
|
||||||
|
|
||||||
$article->update([
|
$updateData['is_valid'] = false;
|
||||||
'is_valid' => false,
|
$article->update($updateData);
|
||||||
'validated_at' => now(),
|
|
||||||
]);
|
|
||||||
|
|
||||||
return $article->refresh();
|
return $article->refresh();
|
||||||
}
|
}
|
||||||
|
|
||||||
$validationResult = self::validateByKeywords($articleData['full_article']);
|
$validationResult = self::validateByKeywords($articleData['full_article']);
|
||||||
|
$updateData['is_valid'] = $validationResult;
|
||||||
|
|
||||||
$article->update([
|
$article->update($updateData);
|
||||||
'is_valid' => $validationResult,
|
|
||||||
'validated_at' => now(),
|
|
||||||
]);
|
|
||||||
|
|
||||||
return $article->refresh();
|
return $article->refresh();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,38 @@ class BelgaHomepageParser
|
||||||
*/
|
*/
|
||||||
public static function extractArticleUrls(string $html): array
|
public static function extractArticleUrls(string $html): array
|
||||||
{
|
{
|
||||||
preg_match_all('/href="(https:\/\/www\.belganewsagency\.eu\/[a-z0-9-]+)"/', $html, $matches);
|
// Find all relative article links (most articles use relative paths)
|
||||||
|
preg_match_all('/<a[^>]+href="(\/[a-z0-9-]+)"/', $html, $matches);
|
||||||
|
|
||||||
|
// Blacklist of non-article paths
|
||||||
|
$blacklistPaths = [
|
||||||
|
'/',
|
||||||
|
'/de',
|
||||||
|
'/feed',
|
||||||
|
'/search',
|
||||||
|
'/category',
|
||||||
|
'/about',
|
||||||
|
'/contact',
|
||||||
|
'/privacy',
|
||||||
|
'/terms',
|
||||||
|
];
|
||||||
|
|
||||||
$urls = collect($matches[1])
|
$urls = collect($matches[1])
|
||||||
->unique()
|
->unique()
|
||||||
|
->filter(function ($path) use ($blacklistPaths) {
|
||||||
|
// Exclude exact matches and paths starting with blacklisted paths
|
||||||
|
foreach ($blacklistPaths as $blacklistedPath) {
|
||||||
|
if ($path === $blacklistedPath || str_starts_with($path, $blacklistedPath . '/')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
})
|
||||||
|
->map(function ($path) {
|
||||||
|
// Convert relative paths to absolute URLs
|
||||||
|
return 'https://www.belganewsagency.eu' . $path;
|
||||||
|
})
|
||||||
|
->values()
|
||||||
->toArray();
|
->toArray();
|
||||||
|
|
||||||
return $urls;
|
return $urls;
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@
|
||||||
Route::get('/articles', [ArticlesController::class, 'index'])->name('api.articles.index');
|
Route::get('/articles', [ArticlesController::class, 'index'])->name('api.articles.index');
|
||||||
Route::post('/articles/{article}/approve', [ArticlesController::class, 'approve'])->name('api.articles.approve');
|
Route::post('/articles/{article}/approve', [ArticlesController::class, 'approve'])->name('api.articles.approve');
|
||||||
Route::post('/articles/{article}/reject', [ArticlesController::class, 'reject'])->name('api.articles.reject');
|
Route::post('/articles/{article}/reject', [ArticlesController::class, 'reject'])->name('api.articles.reject');
|
||||||
|
Route::post('/articles/refresh', [ArticlesController::class, 'refresh'])->name('api.articles.refresh');
|
||||||
|
|
||||||
// Platform Accounts
|
// Platform Accounts
|
||||||
Route::apiResource('platform-accounts', PlatformAccountsController::class)->names([
|
Route::apiResource('platform-accounts', PlatformAccountsController::class)->names([
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,8 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
use App\Console\Commands\FetchNewArticlesCommand;
|
|
||||||
use App\Jobs\SyncChannelPostsJob;
|
use App\Jobs\SyncChannelPostsJob;
|
||||||
use Illuminate\Support\Facades\Schedule;
|
use Illuminate\Support\Facades\Schedule;
|
||||||
|
|
||||||
Schedule::command(FetchNewArticlesCommand::class)->hourly();
|
|
||||||
|
|
||||||
Schedule::call(function () {
|
Schedule::call(function () {
|
||||||
SyncChannelPostsJob::dispatchForAllActiveChannels();
|
SyncChannelPostsJob::dispatchForAllActiveChannels();
|
||||||
})->everyTenMinutes()->name('sync-lemmy-channel-posts');
|
})->everyTenMinutes()->name('sync-lemmy-channel-posts');
|
||||||
|
|
|
||||||
|
|
@ -329,6 +329,11 @@ class ApiClient {
|
||||||
async resetOnboardingSkip(): Promise<void> {
|
async resetOnboardingSkip(): Promise<void> {
|
||||||
await axios.post('/onboarding/reset-skip');
|
await axios.post('/onboarding/reset-skip');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Articles management endpoints
|
||||||
|
async refreshArticles(): Promise<void> {
|
||||||
|
await axios.post('/articles/refresh');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export const apiClient = new ApiClient();
|
export const apiClient = new ApiClient();
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
import React, { useState } from 'react';
|
import React, { useState } from 'react';
|
||||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||||
import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText } from 'lucide-react';
|
import { CheckCircle, XCircle, ExternalLink, Calendar, Tag, FileText, RefreshCw } from 'lucide-react';
|
||||||
import { apiClient, type Article } from '../lib/api';
|
import { apiClient, type Article } from '../lib/api';
|
||||||
|
|
||||||
const Articles: React.FC = () => {
|
const Articles: React.FC = () => {
|
||||||
const [page, setPage] = useState(1);
|
const [page, setPage] = useState(1);
|
||||||
|
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||||
const queryClient = useQueryClient();
|
const queryClient = useQueryClient();
|
||||||
|
|
||||||
const { data, isLoading, error } = useQuery({
|
const { data, isLoading, error } = useQuery({
|
||||||
|
|
@ -26,6 +27,24 @@ const Articles: React.FC = () => {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const refreshMutation = useMutation({
|
||||||
|
mutationFn: () => apiClient.refreshArticles(),
|
||||||
|
onSuccess: () => {
|
||||||
|
// Keep the button in "refreshing" state for 10 seconds
|
||||||
|
setIsRefreshing(true);
|
||||||
|
|
||||||
|
// Refresh the articles list after 10 seconds
|
||||||
|
setTimeout(() => {
|
||||||
|
queryClient.invalidateQueries({ queryKey: ['articles'] });
|
||||||
|
setIsRefreshing(false);
|
||||||
|
}, 10000);
|
||||||
|
},
|
||||||
|
onError: () => {
|
||||||
|
// Reset the refreshing state on error
|
||||||
|
setIsRefreshing(false);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
const handleApprove = (articleId: number) => {
|
const handleApprove = (articleId: number) => {
|
||||||
approveMutation.mutate(articleId);
|
approveMutation.mutate(articleId);
|
||||||
};
|
};
|
||||||
|
|
@ -34,6 +53,10 @@ const Articles: React.FC = () => {
|
||||||
rejectMutation.mutate(articleId);
|
rejectMutation.mutate(articleId);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleRefresh = () => {
|
||||||
|
refreshMutation.mutate();
|
||||||
|
};
|
||||||
|
|
||||||
const getStatusBadge = (status: string) => {
|
const getStatusBadge = (status: string) => {
|
||||||
switch (status) {
|
switch (status) {
|
||||||
case 'approved':
|
case 'approved':
|
||||||
|
|
@ -98,17 +121,27 @@ const Articles: React.FC = () => {
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="p-6">
|
<div className="p-6">
|
||||||
<div className="mb-8">
|
<div className="mb-8 flex items-start justify-between">
|
||||||
<h1 className="text-2xl font-bold text-gray-900">Articles</h1>
|
<div>
|
||||||
<p className="mt-1 text-sm text-gray-500">
|
<h1 className="text-2xl font-bold text-gray-900">Articles</h1>
|
||||||
Manage and review articles from your feeds
|
<p className="mt-1 text-sm text-gray-500">
|
||||||
</p>
|
Manage and review articles from your feeds
|
||||||
{settings?.publishing_approvals_enabled && (
|
</p>
|
||||||
<div className="mt-2 inline-flex items-center px-3 py-1 rounded-full text-xs font-medium bg-blue-100 text-blue-800">
|
{settings?.publishing_approvals_enabled && (
|
||||||
<Tag className="h-3 w-3 mr-1" />
|
<div className="mt-2 inline-flex items-center px-3 py-1 rounded-full text-xs font-medium bg-blue-100 text-blue-800">
|
||||||
Approval system enabled
|
<Tag className="h-3 w-3 mr-1" />
|
||||||
</div>
|
Approval system enabled
|
||||||
)}
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={handleRefresh}
|
||||||
|
disabled={refreshMutation.isPending || isRefreshing}
|
||||||
|
className="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
|
>
|
||||||
|
<RefreshCw className={`h-4 w-4 mr-2 ${(refreshMutation.isPending || isRefreshing) ? 'animate-spin' : ''}`} />
|
||||||
|
{(refreshMutation.isPending || isRefreshing) ? 'Refreshing...' : 'Refresh'}
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="space-y-6">
|
<div className="space-y-6">
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue