Skip to content

Commit

Permalink
Merge dockerfiles and fix utf8 encoding for aspell and hunspell
Browse files Browse the repository at this point in the history
  • Loading branch information
tigitz committed Aug 2, 2021
1 parent 171db81 commit 54d94bb
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 81 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: make pull

- name: Define folders which changes within requires docker build step
run: echo "FILES_REQUIRING_DOCKER_BUILD=php${{ matrix.php }}/Dockerfile" >> $GITHUB_ENV
run: echo "FILES_REQUIRING_DOCKER_BUILD=php/Dockerfile" >> $GITHUB_ENV

- name: Build docker images
run: make build
Expand All @@ -48,6 +48,8 @@ jobs:
run: |
export WITH_COVERAGE=$(if [[ ("${{ matrix.php }}" = "8.0") && ("${{ matrix.stability }}" = "--prefer-stable") ]]; then echo "true"; else echo "false"; fi)
echo "WITH_COVERAGE=${WITH_COVERAGE}" >> $GITHUB_ENV
export XDEBUG_VERSION=$(if [[ ("${{ matrix.php }}" = "7.2") then echo "2.9.3"; else echo "3.0.4"; fi)
echo "XDEBUG_VERSION=${XDEBUG_VERSION}" >> $GITHUB_ENV
make vendor
make tests
Expand Down
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
DOCKER_COMPOSE ?= docker-compose
EXEC_PHP = $(DOCKER_COMPOSE) run --rm -T php$(PHP_VERSION)
EXEC_PHP = $(DOCKER_COMPOSE) run --rm -T php
PHP_VERSION ?= 8.0
XDEBUG_VERSION ?= 3.0.4
DEPS_STRATEGY ?= --prefer-stable
COMPOSER = $(EXEC_PHP) composer
WITH_COVERAGE ?= "FALSE"
EXAMPLES_DIR ?= "examples"

pull:
@$(DOCKER_COMPOSE) pull languagetools jamspell php$(PHP_VERSION)
@$(DOCKER_COMPOSE) pull languagetools jamspell php

build:
$(DOCKER_COMPOSE) build --no-cache php$(PHP_VERSION)
$(DOCKER_COMPOSE) build --no-cache php

push:
$(DOCKER_COMPOSE) push php$(PHP_VERSION)
$(DOCKER_COMPOSE) push php

kill:
$(DOCKER_COMPOSE) kill
Expand Down
23 changes: 8 additions & 15 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
version: '3.4'

x-php-common-conf:
- &php-common-conf
services:
php:
image: tigitz/phpspellchecker:${PHP_VERSION:-8.0}
build:
context: docker/php
args:
PHP_VERSION: ${PHP_VERSION:-8.0}
XDEBUG_VERSION: ${XDEBUG_VERSION:-3.0.4}
volumes:
- .:/usr/src/myapp
- ./cache:/root/composer/cache
Expand All @@ -12,19 +18,6 @@ x-php-common-conf:
- languagetools
- jamspell

services:
php7.2:
image: tigitz/phpspellchecker:7.2
build:
context: docker/php7.2
<<: *php-common-conf

php8.0:
image: tigitz/phpspellchecker:8.0
build:
context: docker/php8.0
<<: *php-common-conf

jamspell:
image: tigitz/jamspell

Expand Down
9 changes: 7 additions & 2 deletions docker/php8.0/Dockerfile → docker/php/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
FROM php:8.0-cli-buster
ARG PHP_VERSION

FROM php:${PHP_VERSION}-cli-buster

ENV XDEBUG_MODE=coverage
RUN echo "memory_limit=-1" > "$PHP_INI_DIR/conf.d/memory-limit.ini" \
&& echo "date.timezone=${PHP_TIMEZONE:-UTC}" > "$PHP_INI_DIR/conf.d/date_timezone.ini"

ARG XDEBUG_VERSION=3.0.2
ARG XDEBUG_VERSION

RUN apt-get update \
&& apt install -y \
Expand All @@ -16,10 +18,13 @@ RUN apt-get update \
libzip-dev \
ispell \
iamerican \
irussian \
hunspell \
hunspell-en-us \
hunspell-ru \
aspell \
aspell-en \
aspell-ru \
libpspell-dev \
&& pecl install xdebug-${XDEBUG_VERSION} \
&& docker-php-ext-configure pspell \
Expand Down
38 changes: 0 additions & 38 deletions docker/php7.2/Dockerfile

This file was deleted.

3 changes: 2 additions & 1 deletion src/Spellchecker/Aspell.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ public function check(string $text, array $languages = [], array $context = []):
{
Assert::maxCount($languages, 1, 'Aspell spellchecker doesn\'t support multiple languages check');

$cmd = $this->binaryPath->addArg('-a');
$cmd = $this->binaryPath->addArgs(['--encoding', 'utf-8']);
$cmd = $cmd->addArg('-a');

if (!empty($languages)) {
$cmd = $cmd->addArg('--lang=' . implode(',', $languages));
Expand Down
1 change: 1 addition & 0 deletions tests/Fixtures/Hunspell/check.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
@(#) International Ispell Version 3.2.06 (but really Hunspell 1.7.0)
& Tigr 6 0: Ti gr, Ti-gr, Tiger, Trig, Tier, Tigris
# страх 21

& theforests 6 3: the forests, the-forests, therefore, afforests, forests, forefathers

Expand Down
33 changes: 28 additions & 5 deletions tests/Spellchecker/AspellTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,17 @@ public function testSpellcheckFromRealBinaries(): void
/**
* @group integration
*/
public function testGetSupportedLanguagesFromRealBinaries(): void
public function testSpellcheckFromRealBinariesUTF8(): void
{
$this->assertWorkingSupportedLanguages(self::realBinaryPath());
$this->assertWorkingSpellcheckRUText(self::realBinaryPath());
}

public function getTextInput(): string
/**
* @group integration
*/
public function testGetSupportedLanguagesFromRealBinaries(): void
{
return TextTest::CONTENT_STUB;
$this->assertWorkingSupportedLanguages(self::realBinaryPath());
}

public function getFakeDicts(): array
Expand Down Expand Up @@ -77,7 +80,7 @@ private function assertWorkingSpellcheck(string $binaries): void
/** @var Misspelling[] $misspellings */
$misspellings = iterator_to_array(
$aspell->check(
$this->getTextInput(),
TextTest::CONTENT_STUB,
['en_US'],
['ctx']
)
Expand All @@ -95,4 +98,24 @@ private function assertWorkingSpellcheck(string $binaries): void
$this->assertSame(2, $misspellings[1]->getLineNumber());
$this->assertNotEmpty($misspellings[1]->getSuggestions());
}

/**
* @param array|string $binaries
*/
private function assertWorkingSpellcheckRUText($binaries): void
{
$aspell = new Aspell(new CommandLine($binaries));
/** @var Misspelling[] $misspellings */
$misspellings = iterator_to_array($aspell->check(TextTest::CONTENT_STUB_RU, ['ru'], ['ctx']));

$this->assertSame(['ctx'], $misspellings[0]->getContext());
$this->assertSame('граматических', $misspellings[0]->getWord());
$this->assertSame(1, $misspellings[0]->getLineNumber());
$this->assertSame(54, $misspellings[0]->getOffset());

$this->assertSame(['ctx'], $misspellings[1]->getContext());
$this->assertSame('англиских', $misspellings[1]->getWord());
$this->assertSame(1, $misspellings[1]->getLineNumber());
$this->assertSame(94, $misspellings[1]->getOffset());
}
}
49 changes: 36 additions & 13 deletions tests/Spellchecker/HunspellTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ class HunspellTest extends TestCase
{
private const FAKE_BINARIES_PATH = [PHP_BINARY, __DIR__ . '/../Fixtures/Hunspell/bin/hunspell.php'];

// public function testSpellcheckFromFakeBinaries(): void
// {
// $this->assertWorkingSpellcheck(self::FAKE_BINARIES_PATH);
// }
public function testSpellcheckFromFakeBinaries(): void
{
$this->assertWorkingSpellcheckENText(self::FAKE_BINARIES_PATH, TextTest::CONTENT_STUB, ['en_US']);
}

public function testGetSupportedLanguagesFromFakeBinaries(): void
{
Expand Down Expand Up @@ -44,20 +44,23 @@ public function testSpellcheckFromRealBinariesLanguage(): void
*/
public function testSpellcheckFromRealBinaries(): void
{
$this->assertWorkingSpellcheck(self::realBinaryPath());
$this->assertWorkingSpellcheckENText(self::realBinaryPath(), TextTest::CONTENT_STUB, ['en_US']);
}

/**
* @group integration
*/
public function testGetSupportedLanguagesFromRealBinaries(): void
public function testSpellcheckFromRealBinariesUTF8(): void
{
$this->assertWorkingSupportedLanguages(self::realBinaryPath());
$this->assertWorkingSpellcheckRUText(self::realBinaryPath(), TextTest::CONTENT_STUB_RU, ['ru_RU']);
}

public function getTextInput(): string
/**
* @group integration
*/
public function testGetSupportedLanguagesFromRealBinaries(): void
{
return TextTest::CONTENT_STUB;
$this->assertWorkingSupportedLanguages(self::realBinaryPath());
}

public function getFakeDicts(): array
Expand Down Expand Up @@ -87,11 +90,11 @@ public static function realBinaryPath(): string
/**
* @param array|string $binaries
*/
private function assertWorkingSpellcheck($binaries): void
private function assertWorkingSpellcheckENText($binaries, string $textInput, array $locales): void
{
$hunspell = new Hunspell(new CommandLine($binaries));
/** @var Misspelling[] $misspellings */
$misspellings = iterator_to_array($hunspell->check($this->getTextInput(), ['en_US'], ['ctx']));
$misspellings = iterator_to_array($hunspell->check($textInput, $locales, ['ctx']));

$this->assertSame(['ctx'], $misspellings[0]->getContext());
$this->assertSame('Tigr', $misspellings[0]->getWord());
Expand All @@ -101,7 +104,27 @@ private function assertWorkingSpellcheck($binaries): void

$this->assertSame(['ctx'], $misspellings[1]->getContext());
$this->assertSame('страх', $misspellings[1]->getWord());
// $this->assertSame(3, $misspellings[1]->getOffset());
// $this->assertSame(2, $misspellings[1]->getLineNumber());
$this->assertSame(21, $misspellings[1]->getOffset());
$this->assertSame(1, $misspellings[1]->getLineNumber());
}

/**
* @param array|string $binaries
*/
private function assertWorkingSpellcheckRUText($binaries, string $textInput, array $locales): void
{
$hunspell = new Hunspell(new CommandLine($binaries));
/** @var Misspelling[] $misspellings */
$misspellings = iterator_to_array($hunspell->check($textInput, $locales, ['ctx']));

$this->assertSame(['ctx'], $misspellings[0]->getContext());
$this->assertSame('граматических', $misspellings[0]->getWord());
$this->assertSame(1, $misspellings[0]->getLineNumber());
$this->assertSame(54, $misspellings[0]->getOffset());

$this->assertSame(['ctx'], $misspellings[1]->getContext());
$this->assertSame('англиских', $misspellings[1]->getWord());
$this->assertSame(1, $misspellings[1]->getLineNumber());
$this->assertSame(94, $misspellings[1]->getOffset());
}
}
28 changes: 28 additions & 0 deletions tests/Spellchecker/IspellTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ public function testSpellcheckFromRealBinaries(): void
$this->assertWorkingSpellcheck(self::realBinaryPath());
}

// /**
// * @group integration
// */
// public function testSpellcheckFromRealBinariesUTF8(): void
// {
// $this->assertWorkingSpellcheckRUText(self::realBinaryPath());
// }

/**
* @group integration
*/
Expand Down Expand Up @@ -109,4 +117,24 @@ private function assertWorkingSpellcheck(string $binaries): void
$this->assertSame($misspellings[1]->getLineNumber(), 2);
$this->assertNotEmpty($misspellings[1]->getSuggestions());
}

// /**
// * @param array|string $binaries
// */
// private function assertWorkingSpellcheckRUText($binaries): void
// {
// $ispell = new Ispell(new CommandLine($binaries));
// /** @var Misspelling[] $misspellings */
// $misspellings = iterator_to_array($ispell->check(TextTest::CONTENT_STUB_RU, ['russian'], ['ctx']));
//
// $this->assertSame(['ctx'], $misspellings[0]->getContext());
// $this->assertSame('граматических', $misspellings[0]->getWord());
// $this->assertSame(1, $misspellings[0]->getLineNumber());
// $this->assertSame(54, $misspellings[0]->getOffset());
//
// $this->assertSame(['ctx'], $misspellings[1]->getContext());
// $this->assertSame('англиских', $misspellings[1]->getWord());
// $this->assertSame(1, $misspellings[1]->getLineNumber());
// $this->assertSame(94, $misspellings[1]->getOffset());
// }
}
2 changes: 1 addition & 1 deletion tests/Spellchecker/LanguageToolTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public function testSpellcheckMultiBytesStringFromRealAPI(): void
{
$misspellings = iterator_to_array(
(new LanguageTool(new LanguageToolApiClient(self::realAPIEndpoint())))->check(
TextTest::CONTENT_STUB_MULTIBYTE,
TextTest::CONTENT_STUB_JP,
['ja-JP'],
['ctx' => 'ctx']
)
Expand Down
9 changes: 8 additions & 1 deletion tests/TextTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,19 @@ class TextTest extends TestCase
CCould frame thy fearful symmetry?
TEXT;

public const CONTENT_STUB_MULTIBYTE = <<<TEXT
public const CONTENT_STUB_JP = <<<TEXT
さよなら解決なる
さよなら
解決なる
TEXT;

public const CONTENT_STUB_RU = <<<TEXT
Используйте этот инструмент для обнаружения опечаток, граматических и стилистических ошибок в англиских текстах.
Пример (с ошибками:
наведите мышь на подсвечиваемые слова, чтобы просмотреть опсание и, при наличии, варианта исправления ошибки.
Для проверки собственного текста, щёлкните на текстовое поле , вставьте свой текст и нажмите на кнопку "Отправить".
TEXT;

public function testContextOverridingMerge(): void
Expand Down

0 comments on commit 54d94bb

Please sign in to comment.