Fetch articles from news site
This commit is contained in:
parent
bdf8e9b3a7
commit
c5a926446a
4 changed files with 148 additions and 7 deletions
21
bin/check-and-post
Normal file
21
bin/check-and-post
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
require __DIR__ . '/../bootstrap/autoload.php';
|
||||
|
||||
use Feddev\LemmyArticlePoster\Domain\Articles\ArticleFetcher;
|
||||
|
||||
$articles = ArticleFetcher::getNewArticles();
|
||||
|
||||
$newArticles = $articles->filter(function($article) {
|
||||
return $article->wasRecentlyCreated;
|
||||
});
|
||||
|
||||
echo "Found " . $articles->count() . " articles (" . $newArticles->count() . " new)\n";
|
||||
|
||||
$newArticles->each(function($article) {
|
||||
echo "New article: " . $article->url . "\n";
|
||||
// $article->publish();
|
||||
});
|
||||
|
||||
echo "\n";
|
||||
|
|
@ -1,7 +1,8 @@
|
|||
<?php
|
||||
|
||||
namespace Domain\Articles;
|
||||
namespace Feddev\LemmyArticlePoster\Domain\Articles;
|
||||
|
||||
use Feddev\LemmyArticlePoster\Domain\Articles\Jobs\PostArticleToLemmy;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\HasMany;
|
||||
|
||||
|
|
@ -28,12 +29,16 @@ public function posts(): HasMany
|
|||
return $this->hasMany(ArticlePosted::class);
|
||||
}
|
||||
|
||||
public function publish(string $community = 'default'): void
|
||||
public function publish(?string $community = null): void
|
||||
{
|
||||
ArticlePosted::create([
|
||||
'article_id' => $this->id,
|
||||
'community' => $community,
|
||||
'posted_at' => now(),
|
||||
]);
|
||||
$config = require __DIR__ . '/../../../bootstrap/config.php';
|
||||
$community ??= $config['lemmy']['community'];
|
||||
|
||||
if ($this->posts()->where('community', $community)->exists()) {
|
||||
return;
|
||||
}
|
||||
|
||||
$job = new PostArticleToLemmy($this, $community);
|
||||
$job->handle();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
79
src/Domain/Articles/ArticleFetcher.php
Normal file
79
src/Domain/Articles/ArticleFetcher.php
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
<?php
|
||||
|
||||
namespace Feddev\LemmyArticlePoster\Domain\Articles;
|
||||
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Carbon;
|
||||
use GuzzleHttp\Client;
|
||||
|
||||
class ArticleFetcher
|
||||
{
|
||||
public static function getNewArticles(): Collection
|
||||
{
|
||||
return self::fetchArticles()->map(fn ($article) => self::saveArticle($article));
|
||||
}
|
||||
|
||||
private static function fetchArticles(): \Illuminate\Support\Collection
|
||||
{
|
||||
try {
|
||||
$client = new Client();
|
||||
$response = $client->get('https://www.vrt.be/vrtnws/en/');
|
||||
$html = $response->getBody()->getContents();
|
||||
|
||||
// Extract article links using regex
|
||||
preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
|
||||
|
||||
$articles = collect($matches[1] ?? [])
|
||||
->unique()
|
||||
->take(10) // Limit to 10 articles
|
||||
->map(function ($path) use ($client) {
|
||||
$fullUrl = 'https://www.vrt.be' . $path;
|
||||
|
||||
try {
|
||||
// Fetch the article page to get title
|
||||
$articleResponse = $client->get($fullUrl);
|
||||
$articleHtml = $articleResponse->getBody()->getContents();
|
||||
|
||||
// Extract title from meta tag or h1
|
||||
preg_match('/<meta property="og:title" content="([^"]+)"/', $articleHtml, $titleMatch);
|
||||
$title = $titleMatch[1] ?? null;
|
||||
|
||||
// Extract publish date
|
||||
preg_match('/<meta property="article:published_time" content="([^"]+)"/', $articleHtml, $dateMatch);
|
||||
$publishedAt = $dateMatch[1] ?? null;
|
||||
|
||||
return [
|
||||
'url' => $fullUrl,
|
||||
'title' => $title,
|
||||
'published_at' => $publishedAt,
|
||||
];
|
||||
} catch (\Exception $e) {
|
||||
logger('article_fetcher')->error("Failed to fetch article: {$fullUrl}", ['error' => $e->getMessage()]);
|
||||
return [
|
||||
'url' => $fullUrl,
|
||||
'title' => null,
|
||||
'published_at' => null,
|
||||
];
|
||||
}
|
||||
})
|
||||
->filter(fn($article) => !empty($article['url']));
|
||||
|
||||
logger('article_fetcher')->info("Fetched " . $articles->count() . " articles from VRT");
|
||||
return $articles;
|
||||
|
||||
} catch (\Exception $e) {
|
||||
logger('article_fetcher')->error("Failed to fetch VRT homepage", ['error' => $e->getMessage()]);
|
||||
return collect([]);
|
||||
}
|
||||
}
|
||||
|
||||
protected static function saveArticle(array $data): Article
|
||||
{
|
||||
return Article::firstOrCreate(
|
||||
['url' => $data['url']],
|
||||
[
|
||||
'created_at' => Carbon::now(),
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
36
src/Domain/Articles/Jobs/PostArticleToLemmy.php
Normal file
36
src/Domain/Articles/Jobs/PostArticleToLemmy.php
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
<?php
|
||||
|
||||
namespace Feddev\LemmyArticlePoster\Domain\Articles\Jobs;
|
||||
|
||||
use Feddev\LemmyArticlePoster\Domain\Articles\Article;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Support\Carbon;
|
||||
|
||||
class PostArticleToLemmy implements ShouldQueue
|
||||
{
|
||||
public int $articleId;
|
||||
public string $community;
|
||||
|
||||
public function __construct(Article $article, string $community = 'default')
|
||||
{
|
||||
$this->articleId = $article->id;
|
||||
$this->community = $community;
|
||||
}
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
$article = Article::findOrFail($this->articleId);
|
||||
|
||||
// TODO: Replace with real Lemmy API call
|
||||
$fakeResponse = [
|
||||
'status' => 'success',
|
||||
'post_url' => 'https://lemmy.world/post/12345',
|
||||
];
|
||||
|
||||
$article->posts()->create([
|
||||
'community' => $this->community,
|
||||
'posted_at' => Carbon::now(),
|
||||
'response' => $fakeResponse,
|
||||
]);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue