diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index 5cafe3e..30eaf8a 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -3,23 +3,18 @@ namespace App\Providers; use App\Listeners\UrlDiscoveredListener; +use App\Services\LanguageDetectionService; use Illuminate\Support\Facades\Event; use Illuminate\Support\ServiceProvider; use Lvl0\FediDiscover\Events\UrlDiscovered; class AppServiceProvider extends ServiceProvider { - /** - * Register any application services. - */ public function register(): void { - // + $this->app->singleton(LanguageDetectionService::class); } - /** - * Bootstrap any application services. - */ public function boot(): void { Event::listen(UrlDiscovered::class, UrlDiscoveredListener::class); diff --git a/app/Services/LanguageDetectionService.php b/app/Services/LanguageDetectionService.php new file mode 100644 index 0000000..2724ea0 --- /dev/null +++ b/app/Services/LanguageDetectionService.php @@ -0,0 +1,39 @@ +language = new Language; + } + + /** + * @return array{0: string, 1: float}|null + */ + public function detect(string $text): ?array + { + if (trim($text) === '') { + return null; + } + + $languages = $this->language->detect($text)->bestResults()->close(); + + if ($languages === []) { + return null; + } + + // bestResults() keeps every candidate within 0.025 of the top score. + // array_key_first picks the highest-ranked one (arsort'd by the library). + $code = array_key_first($languages); + + return [$code, $languages[$code]]; + } +} diff --git a/composer.json b/composer.json index 6ba251e..9af1143 100644 --- a/composer.json +++ b/composer.json @@ -21,6 +21,7 @@ "laravel/tinker": "^3.0", "livewire/livewire": "^4.2", "lvl0/fedi-discover": "@dev", + "patrickschur/language-detection": "^5.3", "spatie/robots-txt": "^2.5", "symfony/dom-crawler": "^7.4" }, diff --git a/composer.lock b/composer.lock index d2b609b..51ecdd2 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "707278fe3558199c1d07f11dba1d20ec", + "content-hash": "4d6e239c94fea8e9511f1e73f05db1df", "packages": [ { "name": "brick/math", @@ -2785,6 +2785,57 @@ ], "time": "2026-02-16T23:10:27+00:00" }, + { + "name": "patrickschur/language-detection", + "version": "v5.3.1", + "source": { + "type": "git", + "url": "https://github.com/patrickschur/language-detection.git", + "reference": "df8d32021b2ef9fde52e6fcccb83e3806822c9c6" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/patrickschur/language-detection/zipball/df8d32021b2ef9fde52e6fcccb83e3806822c9c6", + "reference": "df8d32021b2ef9fde52e6fcccb83e3806822c9c6", + "shasum": "" + }, + "require": { + "ext-json": "*", + "ext-mbstring": "*", + "php": "^7.4 || ^8.0" + }, + "require-dev": { + "phpunit/phpunit": "^9.5.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "LanguageDetection\\": "src/LanguageDetection" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Patrick Schur", + "email": "patrick_schur@outlook.de" + } + ], + "description": "A language detection library for PHP. Detects the language from a given text string.", + "homepage": "https://github.com/patrickschur/language-detection", + "keywords": [ + "detect", + "detection", + "language" + ], + "support": { + "issues": "https://github.com/patrickschur/language-detection/issues", + "source": "https://github.com/patrickschur/language-detection/tree/v5.3.1" + }, + "time": "2025-03-25T22:47:08+00:00" + }, { "name": "phpoption/phpoption", "version": "1.9.5", diff --git a/tests/Unit/Services/LanguageDetectionServiceTest.php b/tests/Unit/Services/LanguageDetectionServiceTest.php new file mode 100644 index 0000000..62d4453 --- /dev/null +++ b/tests/Unit/Services/LanguageDetectionServiceTest.php @@ -0,0 +1,74 @@ +service = new LanguageDetectionService; + } + + public function test_detects_english_from_english_paragraph(): void + { + $text = 'The solar system is the gravitationally bound system of the Sun and the + objects that orbit it. Of the bodies that orbit the Sun directly, the largest + are the eight planets, with the remainder being smaller objects, the dwarf + planets and small solar system bodies. Planets and most other large bodies + in the solar system orbit the Sun in the same direction, counterclockwise + when viewed from above the Sun\'s north pole.'; + + $result = $this->service->detect($text); + + $this->assertIsArray($result); + $this->assertCount(2, $result); + $this->assertTrue( + str_starts_with($result[0], 'en'), + "Expected an English-family tag, got '{$result[0]}'.", + ); + $this->assertIsFloat($result[1]); + $this->assertGreaterThan(0.0, $result[1]); + $this->assertLessThanOrEqual(1.0, $result[1]); + } + + public function test_detects_portuguese_from_portuguese_paragraph(): void + { + $text = 'O sistema solar é o sistema gravitacionalmente ligado composto pelo Sol e + pelos objetos que orbitam ao seu redor. Dos corpos que orbitam o Sol + diretamente, os maiores são os oito planetas, sendo o restante composto por + objetos menores, como planetas anões e corpos menores do sistema solar. + A Terra é o único planeta conhecido a abrigar vida, possuindo uma atmosfera + rica em nitrogênio e oxigênio que sustenta os seres vivos.'; + + $result = $this->service->detect($text); + + $this->assertIsArray($result); + $this->assertCount(2, $result); + $this->assertTrue( + str_starts_with($result[0], 'pt'), + "Expected a Portuguese-family tag, got '{$result[0]}'.", + ); + $this->assertIsFloat($result[1]); + $this->assertGreaterThan(0.0, $result[1]); + $this->assertLessThanOrEqual(1.0, $result[1]); + } + + public function test_returns_null_for_empty_string(): void + { + $this->assertNull($this->service->detect('')); + } + + public function test_returns_null_for_whitespace_only_string(): void + { + $this->assertNull($this->service->detect(' ')); + } +}