diff --git a/packages/Lvl0/FediDiscover/src/Actions/PollFediverseAction.php b/packages/Lvl0/FediDiscover/src/Actions/PollFediverseAction.php new file mode 100644 index 0000000..3200b87 --- /dev/null +++ b/packages/Lvl0/FediDiscover/src/Actions/PollFediverseAction.php @@ -0,0 +1,55 @@ +client->fetchPostsSince($instance, $instance->last_seen_id)); + + $posts->each(function (FediversePost $post) use ($instance) { + $this->processLinks($post, $instance); + }); + + if ($posts->isNotEmpty()) { + $instance->last_seen_id = $posts->first()->cursorId; + } + + $instance->last_polled_at = now(); + $instance->save(); + } + + private function processLinks(FediversePost $post, Instance $instance): void + { + if ($post->body === null) { + return; + } + + $linksFound = preg_match_all('~https?://[^\s<>"\'()\[\]]+~', $post->body, $matches); + + if ($linksFound === 0) { + return; + } + + $urls = collect($matches[0]) + ->map(fn (string $u) => rtrim($u, '.,;:!?')) + ->filter(fn (string $u) => filter_var($u, FILTER_VALIDATE_URL) !== false) + ->filter(fn (string $u) => parse_url($u, PHP_URL_HOST) !== parse_url($instance->url, PHP_URL_HOST)) + ->unique() + ->each(fn (string $url) => UrlDiscovered::dispatch( + url: $url, + postUrl: $post->selfUrl, + postBody: $post->body, + )); + } +} diff --git a/packages/Lvl0/FediDiscover/src/Clients/FediverseClient.php b/packages/Lvl0/FediDiscover/src/Clients/FediverseClient.php new file mode 100644 index 0000000..c69049d --- /dev/null +++ b/packages/Lvl0/FediDiscover/src/Clients/FediverseClient.php @@ -0,0 +1,12 @@ + $extras - * @property int $last_seen_id + * @property string|null $last_seen_id * @property Carbon|null $last_polled_at * @property Carbon $created_at * @property Carbon $updated_at diff --git a/packages/Lvl0/FediDiscover/tests/Feature/PollFediverseActionTest.php b/packages/Lvl0/FediDiscover/tests/Feature/PollFediverseActionTest.php new file mode 100644 index 0000000..48abc09 --- /dev/null +++ b/packages/Lvl0/FediDiscover/tests/Feature/PollFediverseActionTest.php @@ -0,0 +1,223 @@ +poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', 'See https://example.com/one and https://other.example/two'), + ]); + + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->url === 'https://example.com/one'); + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->url === 'https://other.example/two'); + Event::assertDispatchedTimes(UrlDiscovered::class, 2); + } + + public function test_it_extracts_urls_from_html_anchor_tags(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', '
Check this!
'), + ]); + + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->url === 'https://example.com/article'); + Event::assertDispatchedTimes(UrlDiscovered::class, 1); + } + + public function test_it_extracts_urls_from_markdown_links(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll( + posts: [new FediversePost('1', 'https://lemmy.world/post/42', 'A [great article](https://example.com/article) about trees.')], + instanceUrl: 'https://lemmy.world', + ); + + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->url === 'https://example.com/article'); + Event::assertDispatchedTimes(UrlDiscovered::class, 1); + } + + public function test_it_strips_trailing_punctuation_from_urls(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', 'Check https://example.com/article, it is great. Also https://other.example/page.'), + ]); + + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->url === 'https://example.com/article'); + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->url === 'https://other.example/page'); + } + + public function test_it_deduplicates_urls_within_a_single_post(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', 'Here is https://example.com/article and again https://example.com/article'), + ]); + + Event::assertDispatchedTimes(UrlDiscovered::class, 1); + } + + public function test_it_filters_urls_on_the_polling_instance_host(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', 'See https://mastodon.social/@bob/42 and https://example.com/article'), + ]); + + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->url === 'https://example.com/article'); + Event::assertDispatchedTimes(UrlDiscovered::class, 1); + } + + public function test_it_ignores_posts_with_a_null_body(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', null), + ]); + + Event::assertNotDispatched(UrlDiscovered::class); + } + + public function test_it_ignores_non_http_schemes(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', 'Email mailto:alice@example.com or try ftp://files.example.com/x'), + ]); + + Event::assertNotDispatched(UrlDiscovered::class); + } + + public function test_it_passes_post_self_url_and_body_through_to_the_event(): void + { + Event::fake([UrlDiscovered::class]); + + $body = 'Here is https://example.com/article with surrounding context.'; + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', $body), + ]); + + Event::assertDispatched(UrlDiscovered::class, fn (UrlDiscovered $e) => $e->postUrl === 'https://mastodon.social/@alice/1' && $e->postBody === $body + ); + } + + public function test_it_processes_multiple_posts(): void + { + Event::fake([UrlDiscovered::class]); + + $this->poll([ + new FediversePost('1', 'https://mastodon.social/@alice/1', 'See https://example.com/one'), + new FediversePost('2', 'https://mastodon.social/@bob/2', 'Also https://example.com/two'), + ]); + + Event::assertDispatchedTimes(UrlDiscovered::class, 2); + } + + public function test_it_updates_last_seen_id_to_the_first_posts_cursor(): void + { + $instance = $this->makeInstance(); + + // Clients return newest-first; the action treats posts[0] + // as the new high-water mark without inspecting cursor values. + $this->pollInstance($instance, [ + new FediversePost('newest-cursor', 'https://mastodon.social/@alice/3', 'x'), + new FediversePost('middle-cursor', 'https://mastodon.social/@bob/2', 'y'), + new FediversePost('oldest-cursor', 'https://mastodon.social/@carol/1', 'z'), + ]); + + $this->assertSame('newest-cursor', $instance->fresh()->last_seen_id); + } + + public function test_it_updates_last_polled_at(): void + { + $instance = $this->makeInstance(); + $this->assertNull($instance->last_polled_at); + + $this->pollInstance($instance, [ + new FediversePost('1', 'https://mastodon.social/@alice/1', 'x'), + ]); + + $this->assertNotNull($instance->fresh()->last_polled_at); + } + + public function test_it_passes_the_existing_last_seen_id_to_the_client(): void + { + $instance = $this->makeInstance(['last_seen_id' => '999']); + + $client = Mockery::mock(FediverseClient::class); + $client->shouldReceive('fetchPostsSince') + ->once() + ->with($instance, $instance->last_seen_id) + ->andReturn([]); + + (new PollFediverseAction($client))->execute($instance); + } + + public function test_it_leaves_last_seen_id_unchanged_when_no_posts_are_returned(): void + { + $instance = $this->makeInstance(['last_seen_id' => '500']); + + $this->pollInstance($instance, []); + + $this->assertSame('500', $instance->fresh()->last_seen_id); + } + + /** + * @param array