Fetch articles from news site
This commit is contained in:
parent
bdf8e9b3a7
commit
c5a926446a
4 changed files with 148 additions and 7 deletions
21
bin/check-and-post
Normal file
21
bin/check-and-post
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
#!/usr/bin/env php
|
||||||
|
<?php
|
||||||
|
|
||||||
|
require __DIR__ . '/../bootstrap/autoload.php';
|
||||||
|
|
||||||
|
use Feddev\LemmyArticlePoster\Domain\Articles\ArticleFetcher;
|
||||||
|
|
||||||
|
$articles = ArticleFetcher::getNewArticles();
|
||||||
|
|
||||||
|
$newArticles = $articles->filter(function($article) {
|
||||||
|
return $article->wasRecentlyCreated;
|
||||||
|
});
|
||||||
|
|
||||||
|
echo "Found " . $articles->count() . " articles (" . $newArticles->count() . " new)\n";
|
||||||
|
|
||||||
|
$newArticles->each(function($article) {
|
||||||
|
echo "New article: " . $article->url . "\n";
|
||||||
|
// $article->publish();
|
||||||
|
});
|
||||||
|
|
||||||
|
echo "\n";
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
namespace Domain\Articles;
|
namespace Feddev\LemmyArticlePoster\Domain\Articles;
|
||||||
|
|
||||||
|
use Feddev\LemmyArticlePoster\Domain\Articles\Jobs\PostArticleToLemmy;
|
||||||
use Illuminate\Database\Eloquent\Model;
|
use Illuminate\Database\Eloquent\Model;
|
||||||
use Illuminate\Database\Eloquent\Relations\HasMany;
|
use Illuminate\Database\Eloquent\Relations\HasMany;
|
||||||
|
|
||||||
|
|
@ -28,12 +29,16 @@ public function posts(): HasMany
|
||||||
return $this->hasMany(ArticlePosted::class);
|
return $this->hasMany(ArticlePosted::class);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function publish(string $community = 'default'): void
|
public function publish(?string $community = null): void
|
||||||
{
|
{
|
||||||
ArticlePosted::create([
|
$config = require __DIR__ . '/../../../bootstrap/config.php';
|
||||||
'article_id' => $this->id,
|
$community ??= $config['lemmy']['community'];
|
||||||
'community' => $community,
|
|
||||||
'posted_at' => now(),
|
if ($this->posts()->where('community', $community)->exists()) {
|
||||||
]);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$job = new PostArticleToLemmy($this, $community);
|
||||||
|
$job->handle();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
79
src/Domain/Articles/ArticleFetcher.php
Normal file
79
src/Domain/Articles/ArticleFetcher.php
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Feddev\LemmyArticlePoster\Domain\Articles;
|
||||||
|
|
||||||
|
use Illuminate\Support\Collection;
|
||||||
|
use Illuminate\Support\Carbon;
|
||||||
|
use GuzzleHttp\Client;
|
||||||
|
|
||||||
|
class ArticleFetcher
|
||||||
|
{
|
||||||
|
public static function getNewArticles(): Collection
|
||||||
|
{
|
||||||
|
return self::fetchArticles()->map(fn ($article) => self::saveArticle($article));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static function fetchArticles(): \Illuminate\Support\Collection
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
$client = new Client();
|
||||||
|
$response = $client->get('https://www.vrt.be/vrtnws/en/');
|
||||||
|
$html = $response->getBody()->getContents();
|
||||||
|
|
||||||
|
// Extract article links using regex
|
||||||
|
preg_match_all('/href="(\/vrtnws\/en\/\d{4}\/\d{2}\/\d{2}\/[^"]+)"/', $html, $matches);
|
||||||
|
|
||||||
|
$articles = collect($matches[1] ?? [])
|
||||||
|
->unique()
|
||||||
|
->take(10) // Limit to 10 articles
|
||||||
|
->map(function ($path) use ($client) {
|
||||||
|
$fullUrl = 'https://www.vrt.be' . $path;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Fetch the article page to get title
|
||||||
|
$articleResponse = $client->get($fullUrl);
|
||||||
|
$articleHtml = $articleResponse->getBody()->getContents();
|
||||||
|
|
||||||
|
// Extract title from meta tag or h1
|
||||||
|
preg_match('/<meta property="og:title" content="([^"]+)"/', $articleHtml, $titleMatch);
|
||||||
|
$title = $titleMatch[1] ?? null;
|
||||||
|
|
||||||
|
// Extract publish date
|
||||||
|
preg_match('/<meta property="article:published_time" content="([^"]+)"/', $articleHtml, $dateMatch);
|
||||||
|
$publishedAt = $dateMatch[1] ?? null;
|
||||||
|
|
||||||
|
return [
|
||||||
|
'url' => $fullUrl,
|
||||||
|
'title' => $title,
|
||||||
|
'published_at' => $publishedAt,
|
||||||
|
];
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
logger('article_fetcher')->error("Failed to fetch article: {$fullUrl}", ['error' => $e->getMessage()]);
|
||||||
|
return [
|
||||||
|
'url' => $fullUrl,
|
||||||
|
'title' => null,
|
||||||
|
'published_at' => null,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
})
|
||||||
|
->filter(fn($article) => !empty($article['url']));
|
||||||
|
|
||||||
|
logger('article_fetcher')->info("Fetched " . $articles->count() . " articles from VRT");
|
||||||
|
return $articles;
|
||||||
|
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
logger('article_fetcher')->error("Failed to fetch VRT homepage", ['error' => $e->getMessage()]);
|
||||||
|
return collect([]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static function saveArticle(array $data): Article
|
||||||
|
{
|
||||||
|
return Article::firstOrCreate(
|
||||||
|
['url' => $data['url']],
|
||||||
|
[
|
||||||
|
'created_at' => Carbon::now(),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
36
src/Domain/Articles/Jobs/PostArticleToLemmy.php
Normal file
36
src/Domain/Articles/Jobs/PostArticleToLemmy.php
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Feddev\LemmyArticlePoster\Domain\Articles\Jobs;
|
||||||
|
|
||||||
|
use Feddev\LemmyArticlePoster\Domain\Articles\Article;
|
||||||
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||||
|
use Illuminate\Support\Carbon;
|
||||||
|
|
||||||
|
class PostArticleToLemmy implements ShouldQueue
|
||||||
|
{
|
||||||
|
public int $articleId;
|
||||||
|
public string $community;
|
||||||
|
|
||||||
|
public function __construct(Article $article, string $community = 'default')
|
||||||
|
{
|
||||||
|
$this->articleId = $article->id;
|
||||||
|
$this->community = $community;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function handle(): void
|
||||||
|
{
|
||||||
|
$article = Article::findOrFail($this->articleId);
|
||||||
|
|
||||||
|
// TODO: Replace with real Lemmy API call
|
||||||
|
$fakeResponse = [
|
||||||
|
'status' => 'success',
|
||||||
|
'post_url' => 'https://lemmy.world/post/12345',
|
||||||
|
];
|
||||||
|
|
||||||
|
$article->posts()->create([
|
||||||
|
'community' => $this->community,
|
||||||
|
'posted_at' => Carbon::now(),
|
||||||
|
'response' => $fakeResponse,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue