From b73b2159ef23fc3001e6cba414fa1baf6ca10dbb Mon Sep 17 00:00:00 2001 From: tigitz Date: Sun, 6 Jan 2019 19:34:37 +0100 Subject: [PATCH] init commit --- .editorconfig | 22 ++ .gitattributes | 18 ++ .github/CONTRIBUTING.md | 18 ++ .github/ISSUE_TEMPLATE.md | 27 ++ .github/PULL_REQUEST_TEMPLATE.md | 33 +++ .gitignore | 5 + .scrutinizer.yml | 31 +++ .travis.yml | 97 +++++++ .travis/docker-compose.ci.yml | 20 ++ LICENSE.md | 21 ++ Makefile | 66 +++++ README.md | 212 ++++++++++++++++ composer.json | 55 ++++ docker-compose.yml | 36 +++ docker/php7.1/Dockerfile | 43 ++++ docker/php7.2/Dockerfile | 45 ++++ docker/php7.3/Dockerfile | 43 ++++ docs/misspellings-handler/create-custom.md | 75 ++++++ docs/spellchecker/create-custom.md | 5 + docs/text-processor/create-custom.md | 5 + docs/text-source/create-custom.md | 5 + example/aspell_console_output.php | 41 +++ phpcs.xml | 21 ++ phpstan.neon | 53 ++++ phpunit.xml.dist | 39 +++ src/Exception/ExceptionInterface.php | 10 + src/Exception/InvalidArgumentException.php | 9 + src/Exception/LogicException.php | 10 + src/Exception/ProcessFailedException.php | 44 ++++ .../ProcessHasErrorOutputException.php | 31 +++ src/Exception/RuntimeException.php | 9 + src/Misspelling.php | 146 +++++++++++ src/MisspellingFinder.php | 116 +++++++++ src/MisspellingHandler/EchoHandler.php | 29 +++ .../MisspellingHandlerInterface.php | 17 ++ src/MisspellingInterface.php | 35 +++ src/Source/Directory.php | 71 ++++++ src/Source/File.php | 60 +++++ src/Source/MultipleSource.php | 37 +++ src/Source/PHPString.php | 43 ++++ src/Source/SourceInterface.php | 17 ++ src/Spellchecker/Aspell.php | 90 +++++++ src/Spellchecker/Hunspell.php | 99 ++++++++ src/Spellchecker/Ispell.php | 120 +++++++++ src/Spellchecker/LanguageTool.php | 109 ++++++++ .../LanguageTool/LanguageToolApiClient.php | 66 +++++ src/Spellchecker/MultiSpellchecker.php | 99 ++++++++ src/Spellchecker/PHPPspell.php | 82 ++++++ src/Spellchecker/SpellcheckerInterface.php | 25 ++ src/Text.php | 76 ++++++ src/TextInterface.php | 18 ++ src/TextProcessor/MarkdownRemover.php | 63 +++++ src/TextProcessor/TextProcessorInterface.php | 12 + src/Utils/CommandLine.php | 92 +++++++ src/Utils/IspellOutputParser.php | 51 ++++ src/Utils/ProcessRunner.php | 35 +++ src/Utils/ProcessRunnerTest.php | 19 ++ .../SortedNumericArrayNearestValueFinder.php | 65 +++++ src/Utils/TextEncoding.php | 11 + .../Exception/ProcessFailedExceptionTest.php | 46 ++++ .../ProcessHasErrorOutputExceptionTest.php | 27 ++ .../Tests/Fixtures/Aspell/bin/aspell.sh | 16 ++ .../Tests/Fixtures/Aspell/check.txt | 22 ++ .../Tests/Fixtures/Aspell/dicts.txt | 14 ++ .../Tests/Fixtures/Hunspell/bin/hunspell.php | 28 +++ .../Tests/Fixtures/Hunspell/check.txt | 11 + .../Tests/Fixtures/Hunspell/dicts.txt | 34 +++ .../Tests/Fixtures/Ispell/bin/empty_output.sh | 2 + .../Tests/Fixtures/Ispell/bin/ispell.sh | 27 ++ .../Tests/Fixtures/Ispell/check.txt | 27 ++ .../Fixtures/Ispell/lib/ispell/american.aff | 0 .../Directory/SubDirectory/mispelling4.txt | 1 + .../Fixtures/Text/Directory/mispelling2.txt | 1 + .../Fixtures/Text/Directory/mispelling3.txt | 1 + .../Tests/Fixtures/Text/mispelling1.txt | 1 + .../Tests/MisspellingFinderTest.php | 147 +++++++++++ .../MisspellingHandler/EchoHandlerTest.php | 23 ++ tests/PhpSpellcheck/Tests/MisspellingTest.php | 73 ++++++ .../Tests/Source/DirectoryTest.php | 68 +++++ tests/PhpSpellcheck/Tests/Source/FileTest.php | 58 +++++ .../Tests/Source/MultipleSourceTest.php | 62 +++++ .../Tests/Spellchecker/AspellTest.php | 101 ++++++++ .../Tests/Spellchecker/HunspellTest.php | 109 ++++++++ .../Tests/Spellchecker/IspellTest.php | 114 +++++++++ .../Tests/Spellchecker/LanguageToolTest.php | 145 +++++++++++ .../MultipleSpellCheckersTest.php | 84 +++++++ .../Tests/Spellchecker/PHPPspellTest.php | 50 ++++ .../TextProcessor/MarkdownRemoverTest.php | 236 ++++++++++++++++++ tests/PhpSpellcheck/Tests/TextTest.php | 39 +++ .../Tests/Utils/CommandLineTest.php | 30 +++ ...rtedNumericArrayNearestValueFinderTest.php | 111 ++++++++ 91 files changed, 4560 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 .github/CONTRIBUTING.md create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .gitignore create mode 100644 .scrutinizer.yml create mode 100644 .travis.yml create mode 100644 .travis/docker-compose.ci.yml create mode 100644 LICENSE.md create mode 100644 Makefile create mode 100644 README.md create mode 100644 composer.json create mode 100644 docker-compose.yml create mode 100644 docker/php7.1/Dockerfile create mode 100644 docker/php7.2/Dockerfile create mode 100644 docker/php7.3/Dockerfile create mode 100644 docs/misspellings-handler/create-custom.md create mode 100644 docs/spellchecker/create-custom.md create mode 100644 docs/text-processor/create-custom.md create mode 100644 docs/text-source/create-custom.md create mode 100644 example/aspell_console_output.php create mode 100644 phpcs.xml create mode 100644 phpstan.neon create mode 100644 phpunit.xml.dist create mode 100644 src/Exception/ExceptionInterface.php create mode 100644 src/Exception/InvalidArgumentException.php create mode 100644 src/Exception/LogicException.php create mode 100644 src/Exception/ProcessFailedException.php create mode 100644 src/Exception/ProcessHasErrorOutputException.php create mode 100644 src/Exception/RuntimeException.php create mode 100644 src/Misspelling.php create mode 100644 src/MisspellingFinder.php create mode 100644 src/MisspellingHandler/EchoHandler.php create mode 100644 src/MisspellingHandler/MisspellingHandlerInterface.php create mode 100644 src/MisspellingInterface.php create mode 100644 src/Source/Directory.php create mode 100644 src/Source/File.php create mode 100644 src/Source/MultipleSource.php create mode 100644 src/Source/PHPString.php create mode 100644 src/Source/SourceInterface.php create mode 100644 src/Spellchecker/Aspell.php create mode 100644 src/Spellchecker/Hunspell.php create mode 100644 src/Spellchecker/Ispell.php create mode 100644 src/Spellchecker/LanguageTool.php create mode 100644 src/Spellchecker/LanguageTool/LanguageToolApiClient.php create mode 100644 src/Spellchecker/MultiSpellchecker.php create mode 100644 src/Spellchecker/PHPPspell.php create mode 100644 src/Spellchecker/SpellcheckerInterface.php create mode 100644 src/Text.php create mode 100644 src/TextInterface.php create mode 100644 src/TextProcessor/MarkdownRemover.php create mode 100644 src/TextProcessor/TextProcessorInterface.php create mode 100644 src/Utils/CommandLine.php create mode 100644 src/Utils/IspellOutputParser.php create mode 100644 src/Utils/ProcessRunner.php create mode 100644 src/Utils/ProcessRunnerTest.php create mode 100644 src/Utils/SortedNumericArrayNearestValueFinder.php create mode 100644 src/Utils/TextEncoding.php create mode 100644 tests/PhpSpellcheck/Tests/Exception/ProcessFailedExceptionTest.php create mode 100644 tests/PhpSpellcheck/Tests/Exception/ProcessHasErrorOutputExceptionTest.php create mode 100755 tests/PhpSpellcheck/Tests/Fixtures/Aspell/bin/aspell.sh create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Aspell/check.txt create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Aspell/dicts.txt create mode 100755 tests/PhpSpellcheck/Tests/Fixtures/Hunspell/bin/hunspell.php create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Hunspell/check.txt create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Hunspell/dicts.txt create mode 100755 tests/PhpSpellcheck/Tests/Fixtures/Ispell/bin/empty_output.sh create mode 100755 tests/PhpSpellcheck/Tests/Fixtures/Ispell/bin/ispell.sh create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Ispell/check.txt create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Ispell/lib/ispell/american.aff create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/SubDirectory/mispelling4.txt create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling2.txt create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling3.txt create mode 100644 tests/PhpSpellcheck/Tests/Fixtures/Text/mispelling1.txt create mode 100644 tests/PhpSpellcheck/Tests/MisspellingFinderTest.php create mode 100644 tests/PhpSpellcheck/Tests/MisspellingHandler/EchoHandlerTest.php create mode 100644 tests/PhpSpellcheck/Tests/MisspellingTest.php create mode 100644 tests/PhpSpellcheck/Tests/Source/DirectoryTest.php create mode 100644 tests/PhpSpellcheck/Tests/Source/FileTest.php create mode 100644 tests/PhpSpellcheck/Tests/Source/MultipleSourceTest.php create mode 100644 tests/PhpSpellcheck/Tests/Spellchecker/AspellTest.php create mode 100644 tests/PhpSpellcheck/Tests/Spellchecker/HunspellTest.php create mode 100644 tests/PhpSpellcheck/Tests/Spellchecker/IspellTest.php create mode 100644 tests/PhpSpellcheck/Tests/Spellchecker/LanguageToolTest.php create mode 100644 tests/PhpSpellcheck/Tests/Spellchecker/MultipleSpellCheckersTest.php create mode 100644 tests/PhpSpellcheck/Tests/Spellchecker/PHPPspellTest.php create mode 100644 tests/PhpSpellcheck/Tests/TextProcessor/MarkdownRemoverTest.php create mode 100644 tests/PhpSpellcheck/Tests/TextTest.php create mode 100644 tests/PhpSpellcheck/Tests/Utils/CommandLineTest.php create mode 100644 tests/PhpSpellcheck/Tests/Utils/SortedNumericArrayNearestValueFinderTest.php diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..58054b4 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,22 @@ +; This file is for unifying the coding style for different editors and IDEs. +; More information at http://editorconfig.org + +root = true + +[*] +charset = utf-8 +indent_size = 4 +indent_style = space +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.md] +trim_trailing_whitespace = false +max_line_length = 80 + +[Makefile] +indent_style=tab + +[*.yml] +indent_size = 2 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..470a43a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,18 @@ +# Path-based git attributes +# https://www.kernel.org/pub/software/scm/git/docs/gitattributes.html + +# Ignore all test and documentation with "export-ignore". +/.gitattributes export-ignore +/.gitignore export-ignore +/.travis export-ignore +/.github export-ignore +/.travis.yml export-ignore +/.editorconfig export-ignore +/phpunit.xml.dist export-ignore +/.scrutinizer.yml export-ignore +/tests export-ignore +/docs export-ignore +/docker export-ignore +/docker-compose.yml export-ignore +/Makefile export-ignore +/phpstan.neon export-ignore diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..75d6676 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,18 @@ +# Contributing + +Contributions are **welcome** and will be fully **credited**. + +We accept contributions via Pull Requests on [Github](https://github.com/tigitz/php-spellcheck). + + +## Pull Requests + +- **Coding Standard** - Check the code style with ``vendor/bin/phpcs``. + +- **Add tests!** - Your patch won't be accepted if it doesn't have tests. + +- **Document any change in behaviour** - Make sure the `README.md` and any other relevant documentation are kept up-to-date. + +- **Consider our release cycle** - We try to follow [SemVer v2.0.0](http://semver.org/). Randomly breaking public APIs is not an option. + +- **One pull request per feature** - If you want to do more than one thing, send multiple pull requests. diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..3dfbdf1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,27 @@ + + +## Detailed description + +Provide a detailed description of the change or addition you are proposing. + +Tag the issue with it's proper type + +## Context + +Why is this change important to you? How would you use it? + +How can it benefit other users? + +## Possible implementation + +Not obligatory, but suggest an idea for implementing addition or change. + +## Your environment + +Include as many relevant details about the environment you experienced the bug in and how to reproduce it. + +* Version used (e.g. PHP 5.6, HHVM 3): +* Operating system and version (e.g. Ubuntu 16.04, Windows 7): +* Link to your project: +* ... +* ... diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..5050df2 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,33 @@ + + +## Description + +Describe your changes in detail. + +## Motivation and context + +Why is this change required? What problem does it solve? + +If it fixes an open issue, please link to the issue here (if you write `fixes #num` +or `closes #num`, the issue will be automatically closed when the pull is accepted.) + +## How has this been tested? + +Please describe in detail how you tested your changes. + +Include details of your testing environment, and the tests you ran to +see how your change affects other areas of the code, etc. + +## Screenshots (if appropriate) + +## Checklist: + +Go over all the following points before making your PR: + +- [ ] I have read the **[CONTRIBUTING](CONTRIBUTING.md)** document. +- [ ] My pull request addresses exactly one patch/feature. +- [ ] I have created a branch for this patch/feature. +- [ ] I have added tests to cover my changes. +- [ ] If my change requires a change to the documentation, I have updated it accordingly. + +If you're unsure about any of these, don't hesitate to ask. We're here to help! diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..27d7833 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +build +composer.lock +vendor +.idea +.phpcs-cache diff --git a/.scrutinizer.yml b/.scrutinizer.yml new file mode 100644 index 0000000..85b3df9 --- /dev/null +++ b/.scrutinizer.yml @@ -0,0 +1,31 @@ +build: + nodes: + analysis: + environment: + php: + version: 7.2 + cache: + disabled: false + directories: + - ~/.composer/cache + project_setup: + override: true + tests: + override: + - php-scrutinizer-run + dependencies: + override: + - composer install --no-interaction --prefer-dist + +tools: + external_code_coverage: + timeout: 3600 + +filter: + excluded_paths: + - docs + +build_failure_conditions: + - 'elements.rating(<= B).new.exists' # No new classes/methods with a rating of B or worse allowed + - 'issues.severity(>= MAJOR).new.exists' # New issues of major or higher severity + - 'project.metric_change("scrutinizer.test_coverage", < 0)' # Code Coverage decreased from previous inspection diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..97e8d54 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,97 @@ +# This triggers builds to run on the new TravisCI infrastructure. +# See: http://docs.travis-ci.com/user/workers/container-based-infrastructure/ +sudo: false + +language: minimal + +services: + - docker + +stages: + - Smoke Testing + - Test + +## Cache composer +cache: + directories: + - vendor + - $HOME/.composer/cache + - $HOME/docker + +jobs: + include: + - &BASE_JOB + stage: Test + install: + - sudo service docker stop + - if [ "$(ls -A /home/travis/docker)" ]; then echo "/home/travis/docker already set"; else sudo mv /var/lib/docker /home/travis/docker; fi + - sudo bash -c "echo 'DOCKER_OPTS=\"-H tcp://127.0.0.1:2375 -H unix:///var/run/docker.sock -g /home/travis/docker\"' > /etc/default/docker" + - sudo service docker start + - make setup + - make vendor + env: + - DEPS=HIGHEST + - PHP_VERSION=7.3 + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + script: + - make tests + after_success: + - if [[ "$WITH_COVERAGE" == true ]]; then bash <(curl -s https://codecov.io/bash); fi + - | + if [[ "PHP_VERSION" == '7.2' && "$WITH_COVERAGE" == true ]]; then + make scrutinizer + fi + before_cache: + - sudo service docker stop + - sudo chown -R travis ~/docker + + - <<: *BASE_JOB + env: + - DEPS=LOWEST + - PHP_VERSION=7.3 + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + + - <<: *BASE_JOB + env: + - DEPS=HIGHEST + - PHP_VERSION=7.2 + - WITH_COVERAGE=true + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + + - <<: *BASE_JOB + env: + - DEPS=LOWEST + - PHP_VERSION=7.2 + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + + - <<: *BASE_JOB + env: + - DEPS=HIGHEST + - PHP_VERSION=7.1 + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + + - <<: *BASE_JOB + env: + - DEPS=LOWEST + - PHP_VERSION=7.1 + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + + - <<: *BASE_JOB + stage: Smoke Testing + env: + - PHPSTAN + - DEPS=HIGHEST + - PHP_VERSION=7.3 + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + script: + - make phpstan + + - <<: *BASE_JOB + stage: Smoke Testing + env: + - PHPCS + - DEPS=HIGHEST + - PHP_VERSION=7.3 + - DOCKER_COMPOSE=docker-compose -f docker-compose.yml -f .travis/docker-compose.ci.yml + script: + - make phpcs diff --git a/.travis/docker-compose.ci.yml b/.travis/docker-compose.ci.yml new file mode 100644 index 0000000..7715f05 --- /dev/null +++ b/.travis/docker-compose.ci.yml @@ -0,0 +1,20 @@ +version: '3.2' + +services: + php7.1: + volumes: + - $HOME/.composer/cache:/root/composer/cache + environment: + - COMPOSER_CACHE_DIR=/root/composer/cache + + php7.2: + volumes: + - $HOME/.composer/cache:/root/composer/cache + environment: + - COMPOSER_CACHE_DIR=/root/composer/cache + + php7.3: + volumes: + - $HOME/.composer/cache:/root/composer/cache + environment: + - COMPOSER_CACHE_DIR=/root/composer/cache diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..cc12fcc --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# The MIT License (MIT) + +Copyright (c) 2018 PhpSpellCheck Project + +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in +> all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +> THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6c9b167 --- /dev/null +++ b/Makefile @@ -0,0 +1,66 @@ +DOCKER_COMPOSE ?= docker-compose +EXEC_PHP = $(DOCKER_COMPOSE) run --rm -T php$(PHP_VERSION) +PHP_VERSION ?= 7.3 +DEPS ?= "LOCKED" +COMPOSER = $(EXEC_PHP) composer +WITH_COVERAGE ?= "FALSE" + +build: + @$(DOCKER_COMPOSE) pull --parallel --ignore-pull-failures 2> /dev/null + $(DOCKER_COMPOSE) build php$(PHP_VERSION) + +kill: + $(DOCKER_COMPOSE) kill + $(DOCKER_COMPOSE) down --volumes --remove-orphans + +setup: ## Setup spellcheckers dependencies +setup: build + $(DOCKER_COMPOSE) up -d --remove-orphans --no-recreate languagetools + +.PHONY: build kill setup + +tests: ## Run all tests +tests: + if [ $(WITH_COVERAGE) = true ]; then $(EXEC_PHP) vendor/bin/phpunit --coverage-clover clover.xml; else $(EXEC_PHP) vendor/bin/phpunit; fi + +tests-dox: ## Run all tests in dox format +tests-dox: + if [ $(WITH_COVERAGE) = true ]; then $(EXEC_PHP) vendor/bin/phpunit --coverage-clover clover.xml --testdox; else $(EXEC_PHP) vendor/bin/phpunit --testdox; fi + +tu: ## Run unit tests +tu: vendor + $(EXEC_PHP) vendor/bin/phpunit --exclude-group integration + +ti: ## Run functional tests +ti: vendor + $(EXEC_PHP) vendor/bin/phpunit --group integration + +scrutinizer: + $(EXEC_PHP) curl -L https://scrutinizer-ci.com/ocular.phar -o ocular.phar -s + $(EXEC_PHP) php ocular.phar code-coverage:upload --format=php-clover clover.xml + +.PHONY: tests tests-dox tu ti + +vendor: + if [ $(DEPS) = "LOWEST" ]; then $(COMPOSER) update --prefer-lowest; fi + if [ $(DEPS) = "LOCKED" ]; then $(COMPOSER) install; fi + if [ $(DEPS) = "HIGHEST" ]; then $(COMPOSER) update; else $(COMPOSER) install; fi + +rector: + docker run -v $(pwd):/project rector/rector:latest bin/rector process /project/src/ --config vendor/thecodingmachine/safe/rector-migrate.yml --autoload-file /project/vendor/autoload.php + +phpcs: vendor + $(EXEC_PHP) vendor/bin/phpcs + +phpcbf: vendor + $(EXEC_PHP) vendor/bin/phpcbf + +phpstan: vendor + $(EXEC_PHP) vendor/bin/phpstan analyse src -c phpstan.neon -a vendor/autoload.php + +.PHONY: vendor php-cs php-cbf php-stan + +.DEFAULT_GOAL := help +help: + @grep -E '(^[a-zA-Z_-]+:.*?##.*$$)|(^##)' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[32m%-30s\033[0m %s\n", $$1, $$2}' | sed -e 's/\[32m##/[33m/' +.PHONY: help diff --git a/README.md b/README.md new file mode 100644 index 0000000..e9e6b34 --- /dev/null +++ b/README.md @@ -0,0 +1,212 @@ +

PHP-Spellcheck

+ +

+ PHP-Spellcheck +

+

+ Build Status + Code coverage + Code coverage + PHP-Spellcheck chat room + License +

+ +

Check misspellings from any text sources by the most popular spellcheckers available, directly from PHP.

+ + +------ +## Features + +- 🧐 Support Spellcheckers: [Aspell][aspell], [Hunspell][hunspell], [Ispell][ispell], [PHP Pspell][pspell], [LanguageTools][languagetools] and [MultiSpellchecker](src/Spellchecker/MultiSpellchecker.php) [(contribute yours!)](docs/spellchecker/create-custom.md) +- 📄 Support Text Sources: Filesystem [File](src/Source/File.php)/[Directory](src/Source/Directory.php), [String](src/Source/PHPString.php), and [Multisource](src/Source/MultipleSource.php) [(contribute yours!)](docs/spellchecker/create-custom.md) +- 🛠 Support Text Processors: [MarkdownRemover](src/TextProcessor/MarkdownRemover.php) [(contribute yours!)](docs/text-proccesor/create-custom.md) +- 🔁 Support Misspelling Handlers: [EchoHandler](src/MisspellingHandler/EchoHandler.php) [(contribute yours!)](docs/misspellings-handler/create-custom.md) +- ➰ Make use of generators to lower memory footprint +- ⚖ Flexible and straight forward design +- 💡 Really easy to implement your own spellcheckers, text processors and misspellings handlers +- 💪 Tests are run against real spellcheckers to ensure full compatibility + +**PHP-Spellcheck** is a really welcoming project for any new contributors. + +Want +to make **your first open-source contribution** ? Check the [Roadmap](#roadmap), +pick one task, [open an issue](https://github.com/tigitz/php-spellchecker/issues/new) and we'll help you go through it 🤓🚀. + +## Install + +Via Composer + +``` bash +$ composer require tigitz/php-spellcheck +``` + +## Usage + +### Spellchecker directly +You can check misspellings directly from a `PhpSpellCheck\SpellChecker` class and process +them on your own. + +```php +check('mispell', ['en_US'], ['from_example']); +foreach ($misspellings as $misspelling) { + $misspelling->getWord(); // 'mispell' + $misspelling->getLineNumber(); // '1' + $misspelling->getOffset(); // '0' + $misspelling->getSuggestions(); // ['misspell', ...] + $misspelling->getContext(); // ['from_example'] +} +``` + +### MisspellingFinder helper +Or you can use an opinionated `MisspellingFinder` class to orchestrate your +spellchecking flow: + +

+ PHP-Spellcheck-misspellingfinder-flow +

+ +```php +getContent()); + return $text->replaceContent($contentProcessed); + } +}; + +$misspellingFinder = new MisspellingFinder( + Aspell::create(), + new EchoHandler(), + $customTextProcessor +); + +$misspellingFinder->find('It\'s_a_mispelling', ['en_US']); // Misspellings are echoed +``` + +## Roadmap + +### Global +- [ ] Add a cli that could do something like `vendor/bin/php-spellcheck "mispell" Languagetools EchoHandler --lang=en_US` +- [ ] Add asynchronous mechanism to spellcheckers +- [ ] Make some computed misspelling properties optional to enhance performance for certain use cases (Lines and offset in `LanguageTools` spellchecker for example) +- [ ] Add a languages mapper to manage their different representations among spellcheckers +- [ ] Evaluate the use of `strtok` function to parse lines of text instead of `explode` for performance +- [ ] Evaluate the use of a `MutableMisspelling` for performance comparison +- [ ] Wrap `Webmozart/Assert` library exceptions to throw PHP-Spellcheck own exceptions +- [ ] Improve the `Makefile` + +### Source +- [ ] Make `SourceInterface` class able to have an effect on the spellchecker configuration used +- [ ] League/Flysystem plugin +- [ ] Symfony/Finder plugin + +### Text Processor +- [ ] Markdown - find a way to keep original offset and line of words after stripping +- [ ] Add PHPDoc processor + +### SpellCheck +- [ ] Cache suggestions of already spellchecked words (PSR 6 / PSR 16 ?) +- [ ] Pspell - find way to compute word offset +- [ ] LanguageTools - Evaluate the use of http-plug library to make api request +- [ ] Pspell - find way to list available dictionaries +- [ ] Add JamSpell spellchecker +- [ ] Add NuSpell spellchecker + +### Handler +- [ ] MonologHandler +- [ ] ChainedHandler +- [ ] HTMLReportHandler +- [ ] XmlReportHandler +- [ ] JSONReportHandler +- [ ] ConsoleTableHandler + +### Tests +- [ ] Add or improve tests with different text encoding +- [ ] Refactor duplicated Dockerfile content between php images + + +## Versioning +We try to follow [SemVer v2.0.0](http://semver.org/) + +There's still a lot of design decisions that should be confronted to real world +usage before thinking about a v1.0.0 stable release: +- Are `TextInterface` and `MisspellingInterface` really useful ? +- Does using generators is the right way to go ? +- Should all the contributed spellcheckers be maintained by the package itself ? +- How to design an intuitive cli given the flexibility of usage needed ? +- Is the "context" array passed through all the layer the right design to handle data sharing ? + +## Testing + +Spellcheckers comes in a lot of different form, from HTTP API to command line tools. +As **PHP-Spellcheck** wants to ensure real world usage is OK, it contains integration tests. + +Therefore to run these integration tests, these spellcheckers must all be available during tests execution. + +The more convenient way to do it is using **docker** and avoid polluting your own local machine with installed spellcheckers systems only required for this package tests. + +### Docker +Requires `docker` and `docker-compose` to be installed. Tested on `Linux`. +``` bash +$ make build # build containers images +$ make setup # start spellcheckers container +$ make tests-dox +``` + +You can also specify php version, dependency version target and if you want coverage. Coverage is only supported by php7.2 for now. +``` +$ PHP_VERSION=7.2 DEPS=LOWEST WITH_COVERAGE="true" make tests-dox +``` + +Run `make help` to list all tasks available + +### Local + +Todo + +### Environment variables +If spellcheckers execution path are different than their default value +(e.g. `docker exec -ti myispell` instead of `ispell`) you can override the path used in tests +by redefining env vars in [PHPUnit config file](phpunit.xml.dist) + +## Contributing + +Please see [CONTRIBUTING](.github/CONTRIBUTING.md) + +## Credits + +- Inspired by [php-speller](https://github.com/mekras/php-speller) and [monolog](https://github.com/Seldaek/monolog) +- [Philippe Segatori][link-author] +- [All Contributors][link-contributors] + +## License + +The MIT License (MIT). Please see [License File](LICENSE.md) for more information. + +**Logo**: +Elements taken for the final rendering are [Designed by rawpixel.com / Freepik](http://www.freepik.com) + +[link-author]: https://github.com/tigitz +[link-contributors]: ../../contributors +[aspell]: https://github.com/GNUAspell/aspell +[hunspell]: https://github.com/hunspell/hunspell +[ispell]: https://packages.debian.org/stretch/ispell +[languagetools]: https://github.com/languagetool-org/languagetool +[pspell]: http://php.net/manual/fr/book.pspell.php + +[pspell]: http://php.net/manual/fr/book.pspell.php diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..b8e5dc4 --- /dev/null +++ b/composer.json @@ -0,0 +1,55 @@ +{ + "name": "tigitz/php-spellcheck", + "type": "library", + "description": "Provides an easy way to spellcheck multiple text source by many spellcheckers, directly from PHP", + "keywords": [ + "spelling", + "spellcheck", + "spellchecker", + "spell-checker", + "spell-check", + "php-spellcheck" + ], + "homepage": "https://github.com/tigitz/php-spellcheck", + "license": "MIT", + "authors": [ + { + "name": "Philippe Segatori", + "email": "contact@philippe-segatori.fr", + "homepage": "https://github.com/tigitz", + "role": "Maintainer" + } + ], + "require": { + "php": "^7.1", + "symfony/process": "^3.3|^4.0", + "thecodingmachine/safe": "^0.1.13", + "webmozart/assert": "1.3.*" + }, + "require-dev": { + "phpstan/phpstan": "^0.11", + "phpstan/phpstan-strict-rules": "^0.11.0", + "thecodingmachine/phpstan-safe-rule": "^0.1", + "phpstan/phpstan-webmozart-assert": "^0.11.0", + "phpunit/phpunit": "^6.0", + "tigitz/phpcs": "^8.0" + }, + "autoload": { + "psr-4": { + "PhpSpellcheck\\": "src" + } + }, + "autoload-dev": { + "psr-4": { + "PhpSpellcheck\\Tests\\": "tests/PhpSpellcheck/Tests" + } + }, + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "config": { + "sort-packages": true + } +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..cf6ea8a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,36 @@ +version: '3.2' + +services: + php7.1: + build: docker/php7.1 + volumes: + - .:/usr/src/myapp + - ./cache:/root/composer/cache + environment: + - LANG=en_US.UTF-8 + - COMPOSER_CACHE_DIR=/root/composer/cache + depends_on: + - languagetools + + php7.2: + build: docker/php7.2 + volumes: + - .:/usr/src/myapp + environment: + - LANG=en_US.UTF-8 + - COMPOSER_CACHE_DIR=/root/composer/cache + depends_on: + - languagetools + + php7.3: + build: docker/php7.3 + volumes: + - .:/usr/src/myapp + environment: + - LANG=en_US.UTF-8 + - COMPOSER_CACHE_DIR=/root/composer/cache + depends_on: + - languagetools + + languagetools: + image: silviof/docker-languagetool diff --git a/docker/php7.1/Dockerfile b/docker/php7.1/Dockerfile new file mode 100644 index 0000000..d01f814 --- /dev/null +++ b/docker/php7.1/Dockerfile @@ -0,0 +1,43 @@ +FROM php:7.1-cli-stretch + +RUN echo "memory_limit=-1" > "$PHP_INI_DIR/conf.d/memory-limit.ini" \ + && echo "date.timezone=${PHP_TIMEZONE:-UTC}" > "$PHP_INI_DIR/conf.d/date_timezone.ini" + +RUN apt-get update \ + && apt install -y \ + curl \ + git \ + zip \ + unzip \ + openssl \ + libzip-dev \ + ispell \ + iamerican \ + hunspell \ + hunspell-en-us \ + aspell \ + aspell-en \ + libpspell-dev \ + && docker-php-ext-configure pspell \ + && docker-php-ext-install pspell \ + && docker-php-ext-install zip \ + && rm -r /var/lib/apt/lists/* + +RUN cp /usr/share/hunspell/en_US.aff /usr/share/hunspell/en_US.aff.orig \ + && cp /usr/share/hunspell/en_US.dic /usr/share/hunspell/en_US.dic.orig \ + && iconv --from ISO8859-1 -t ascii//TRANSLIT /usr/share/hunspell/en_US.aff.orig > /usr/share/hunspell/en_US.aff \ + && iconv --from ISO8859-1 -t ascii//TRANSLIT /usr/share/hunspell/en_US.dic.orig > /usr/share/hunspell/en_US.dic \ + && head /usr/share/hunspell/en_US.aff \ + && sed -i '/SET ISO8859-1/c\SET UTF-8' /usr/share/hunspell/en_US.aff + +# install composer +ENV COMPOSER_ALLOW_SUPERUSER 1 +ENV COMPOSER_HOME /tmp + +RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" \ + && php -r "if (hash_file('sha384', 'composer-setup.php') === trim(file_get_contents('https://composer.github.io/installer.sig'))) { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \ + && php composer-setup.php --no-ansi --install-dir=/usr/bin --filename=composer \ + && php -r "unlink('composer-setup.php');" \ + && composer --ansi --version --no-interaction + +WORKDIR /usr/src/myapp diff --git a/docker/php7.2/Dockerfile b/docker/php7.2/Dockerfile new file mode 100644 index 0000000..290aa16 --- /dev/null +++ b/docker/php7.2/Dockerfile @@ -0,0 +1,45 @@ +FROM php:7.2-cli-stretch + +RUN echo "memory_limit=-1" > "$PHP_INI_DIR/conf.d/memory-limit.ini" \ + && echo "date.timezone=${PHP_TIMEZONE:-UTC}" > "$PHP_INI_DIR/conf.d/date_timezone.ini" + +RUN apt-get update \ + && apt install -y \ + curl \ + git \ + zip \ + unzip \ + openssl \ + libzip-dev \ + ispell \ + iamerican \ + hunspell \ + hunspell-en-us \ + aspell \ + aspell-en \ + libpspell-dev \ + && docker-php-ext-configure pspell \ + && docker-php-ext-install pspell \ + && docker-php-ext-install zip \ + && rm -r /var/lib/apt/lists/* + +RUN cp /usr/share/hunspell/en_US.aff /usr/share/hunspell/en_US.aff.orig \ + && cp /usr/share/hunspell/en_US.dic /usr/share/hunspell/en_US.dic.orig \ + && iconv --from ISO8859-1 -t ascii//TRANSLIT /usr/share/hunspell/en_US.aff.orig > /usr/share/hunspell/en_US.aff \ + && iconv --from ISO8859-1 -t ascii//TRANSLIT /usr/share/hunspell/en_US.dic.orig > /usr/share/hunspell/en_US.dic \ + && head /usr/share/hunspell/en_US.aff \ + && sed -i '/SET ISO8859-1/c\SET UTF-8' /usr/share/hunspell/en_US.aff + +# install composer +ENV COMPOSER_ALLOW_SUPERUSER 1 +ENV COMPOSER_HOME /tmp + +RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" \ + && php -r "if (hash_file('sha384', 'composer-setup.php') === trim(file_get_contents('https://composer.github.io/installer.sig'))) { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \ + && php composer-setup.php --no-ansi --install-dir=/usr/bin --filename=composer \ + && php -r "unlink('composer-setup.php');" \ + && composer --ansi --version --no-interaction + +RUN pecl install xdebug && docker-php-ext-enable xdebug + +WORKDIR /usr/src/myapp diff --git a/docker/php7.3/Dockerfile b/docker/php7.3/Dockerfile new file mode 100644 index 0000000..ad99e16 --- /dev/null +++ b/docker/php7.3/Dockerfile @@ -0,0 +1,43 @@ +FROM php:7.3-cli-stretch + +RUN echo "memory_limit=-1" > "$PHP_INI_DIR/conf.d/memory-limit.ini" \ + && echo "date.timezone=${PHP_TIMEZONE:-UTC}" > "$PHP_INI_DIR/conf.d/date_timezone.ini" + +RUN apt-get update \ + && apt install -y \ + curl \ + git \ + zip \ + unzip \ + openssl \ + libzip-dev \ + ispell \ + iamerican \ + hunspell \ + hunspell-en-us \ + aspell \ + aspell-en \ + libpspell-dev \ + && docker-php-ext-configure pspell \ + && docker-php-ext-install pspell \ + && docker-php-ext-install zip \ + && rm -r /var/lib/apt/lists/* + +RUN cp /usr/share/hunspell/en_US.aff /usr/share/hunspell/en_US.aff.orig \ + && cp /usr/share/hunspell/en_US.dic /usr/share/hunspell/en_US.dic.orig \ + && iconv --from ISO8859-1 -t ascii//TRANSLIT /usr/share/hunspell/en_US.aff.orig > /usr/share/hunspell/en_US.aff \ + && iconv --from ISO8859-1 -t ascii//TRANSLIT /usr/share/hunspell/en_US.dic.orig > /usr/share/hunspell/en_US.dic \ + && head /usr/share/hunspell/en_US.aff \ + && sed -i '/SET ISO8859-1/c\SET UTF-8' /usr/share/hunspell/en_US.aff + +# install composer +ENV COMPOSER_ALLOW_SUPERUSER 1 +ENV COMPOSER_HOME /tmp + +RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" \ + && php -r "if (hash_file('sha384', 'composer-setup.php') === trim(file_get_contents('https://composer.github.io/installer.sig'))) { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \ + && php composer-setup.php --no-ansi --install-dir=/usr/bin --filename=composer \ + && php -r "unlink('composer-setup.php');" \ + && composer --ansi --version --no-interaction + +WORKDIR /usr/src/myapp diff --git a/docs/misspellings-handler/create-custom.md b/docs/misspellings-handler/create-custom.md new file mode 100644 index 0000000..27aa909 --- /dev/null +++ b/docs/misspellings-handler/create-custom.md @@ -0,0 +1,75 @@ +# Create your custom misspelling handler + +Misspelling handler has the responsibility to process your misspellings found +through the `MisspellingFinder` class. + +It must respects the `MisspellingHandlerInterface`. + +Let's say you want to send an email with all of the misspellings found in + your blog articles. + +Create your `EmailMisspellingHandler`: + +```php +emailSender = $emailSender; + } + + /** + * @param MisspellingInterface[] $misspellings + */ + public function handle(iterable $misspellings) + { + $message = <<getWord(), + $misspelling->getLineNumber(), + $misspelling->getContext()['article-name'], + explode(',', $misspelling->getSuggestions()) + ).PHP_EOL; + } + + $this->emailSender + ->body($message) + ->send(); + } +} +``` + +As you can see it iterates over your misspellings building the email body and +then it's sent to you. + +You just have to use your custom handler while calling `MisspellingFinder`: + +```php +find($blogArticlesSource, ['en_US']); +``` diff --git a/docs/spellchecker/create-custom.md b/docs/spellchecker/create-custom.md new file mode 100644 index 0000000..8eda993 --- /dev/null +++ b/docs/spellchecker/create-custom.md @@ -0,0 +1,5 @@ +# Create your custom spellchecker +TODO + +Meanwhile, check the current implementations in `PhpSpellcheck\Spellchecker` to +get started. diff --git a/docs/text-processor/create-custom.md b/docs/text-processor/create-custom.md new file mode 100644 index 0000000..6c13f62 --- /dev/null +++ b/docs/text-processor/create-custom.md @@ -0,0 +1,5 @@ +# Create your custom text processor +TODO + +Meanwhile, check the current implementations in `PhpSpellcheck\TextProcessor` to +get started. diff --git a/docs/text-source/create-custom.md b/docs/text-source/create-custom.md new file mode 100644 index 0000000..6e6ea01 --- /dev/null +++ b/docs/text-source/create-custom.md @@ -0,0 +1,5 @@ +# Create your custom text source +TODO + +Meanwhile, check the current implementations in `PhpSpellcheck\Source` to +get started. diff --git a/example/aspell_console_output.php b/example/aspell_console_output.php new file mode 100644 index 0000000..c14cdac --- /dev/null +++ b/example/aspell_console_output.php @@ -0,0 +1,41 @@ +check('mispell', ['en_US']); // $misspellings is a \Generator here +foreach ($misspellings as $misspelling) { + $misspelling->getWord(); + $misspelling->getLineNumber(); + $misspelling->getOffset(); + $misspelling->getSuggestions(); + $misspelling->getContext(); +} + +// *** Using MisspellingFinder class to orchestrate spellchecking flow *** +// My custom text processor that replaces "_" by " " +$customTextProcessor = new class implements TextProcessorInterface +{ + public function process(TextInterface $text): TextInterface + { + $contentProcessed = str_replace('_', ' ', $text->getContent()); + + return $text->replaceContent($contentProcessed); + } +}; + +$misspellingFinder = new MisspellingFinder( + Aspell::create(), // Creates aspell spellchecker pointing to "aspell" as it's binary path + new EchoHandler(), // Handles all the misspellings found by echoing their information + $customTextProcessor +); + +$misspellingFinder->find('It\'s_a_mispelling', ['en_US']); // Misspellings are echoed diff --git a/phpcs.xml b/phpcs.xml new file mode 100644 index 0000000..ac9933b --- /dev/null +++ b/phpcs.xml @@ -0,0 +1,21 @@ + + + The coding standard for tigitz/php-spellcheck + + + + + + + + + + + src + tests + + + + + + diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..d23295b --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,53 @@ +includes: + - vendor/phpstan/phpstan-webmozart-assert/extension.neon + - vendor/phpstan/phpstan-strict-rules/rules.neon + - vendor/thecodingmachine/phpstan-safe-rule/phpstan-safe-rule.neon + +parameters: + level: max + reportUnmatchedIgnoredErrors: false + ignoreErrors: + # symfony/process version testing + - + message: "#^Call to function method_exists\\(\\) with Symfony\\\\Component\\\\Process\\\\Process and 'inheritEnvironmentV…' will always evaluate to true#" + path: %currentWorkingDirectory%/src/Utils/ProcessRunner.php + + # webmozart/assert missanalysis + - + message: "#^Call to static method Webmozart\\\\Assert\\\\Assert\\:\\:allIsInstanceOf\\(\\) with iterable\\ and 'PhpSpellcheck…' will always evaluate to true\\.$#" + path: %currentWorkingDirectory%/src/Source/MultipleSource.php + - + message: "#^Call to static method Webmozart\\\\Assert\\\\Assert\\:\\:stringNotEmpty\\(\\) with string will always evaluate to true\\.$#" + path: %currentWorkingDirectory%/src/Misspelling.php + - + message: "#^Instanceof between PhpSpellcheck\\\\Source\\\\SourceInterface and PhpSpellcheck\\\\Source\\\\SourceInterface will always evaluate to true\\.$#" + path: %currentWorkingDirectory%/src/MisspellingFinder.php + - + message: "#^Call to static method Webmozart\\\\Assert\\\\Assert\\:\\:allIsInstanceOf\\(\\) with iterable\\ and 'PhpSpellcheck…' will always evaluate to true\\.$#" + path: %currentWorkingDirectory%/src/Spellchecker/MultiSpellchecker.php + + # preg_* functions surely returning string as parameter is string + - + message: "#^Possibly invalid array key type array\\|string\\.$#" + path: %currentWorkingDirectory%/src/Spellchecker/Hunspell.php + - + message: "#^Parameter \\#1 \\$text of static method PhpSpellcheck\\\\Text\\:\\:utf8\\(\\) expects string, array\\|string given\\.$#" + path: %currentWorkingDirectory%/src/TextProcessor/MarkdownRemover.php + - + message: "#^Parameter \\#2 \\$str of function explode expects string, array\\|string given\\.$#" + path: %currentWorkingDirectory%/src/Spellchecker/PHPPspell.php + - + message: "#^Binary operation \"\\.\" between '\"' and array\\|string results in an error\\.$#" + path: %currentWorkingDirectory%/src/Utils/CommandLine.php + + # return type missanalysis + - + message: "#^Method PhpSpellcheck\\\\Spellchecker\\\\MultiSpellchecker\\:\\:check\\(\\) should return iterable\\ but returns iterable\\.$#" + path: %currentWorkingDirectory%/src/Spellchecker/MultiSpellchecker.php + - + message: "#^Method PhpSpellcheck\\\\Spellchecker\\\\MultiSpellchecker\\:\\:check\\(\\) should return iterable\\ but returns array\\\\.$#" + path: %currentWorkingDirectory%/src/Spellchecker/MultiSpellchecker.php + + # Missing strict comparison + - '#^Construct empty\(\) is not allowed. Use more strict comparison.$#' + - '#^Only booleans are allowed in#' diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..e223cd4 --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + tests + + + + + + src/ + + + diff --git a/src/Exception/ExceptionInterface.php b/src/Exception/ExceptionInterface.php new file mode 100644 index 0000000..256a1ef --- /dev/null +++ b/src/Exception/ExceptionInterface.php @@ -0,0 +1,10 @@ +process = $process; + + $message = \Safe\sprintf( + 'Process with command "%s" has failed%s with exit code %d(%s)%s', + $process->getCommandLine(), + $process->isStarted() ? ' running' : '', + $process->getExitCode(), + $process->getExitCodeText(), + $failureReason ? ' because "' . $failureReason . '"' : '' + ); + + parent::__construct( + $message, + $code, + $previous + ); + } + + public function getProcess(): Process + { + return $this->process; + } +} diff --git a/src/Exception/ProcessHasErrorOutputException.php b/src/Exception/ProcessHasErrorOutputException.php new file mode 100644 index 0000000..0c59b74 --- /dev/null +++ b/src/Exception/ProcessHasErrorOutputException.php @@ -0,0 +1,31 @@ +word = $word; + $this->offset = $offset; + $this->lineNumber = $lineNumber; + $this->suggestions = $suggestions; + $this->context = $context; + } + + public function mergeSuggestions(array $suggestionsToAdd): MisspellingInterface + { + $mergedSuggestions = []; + $existingSuggestionsAsKeys = array_flip($this->suggestions); + foreach ($suggestionsToAdd as $suggestionToAdd) { + if (!isset($existingSuggestionsAsKeys[$suggestionToAdd])) { + $this->suggestions[] = $suggestionToAdd; + } + } + + return new self( + $this->word, + $this->offset, + $this->lineNumber, + $mergedSuggestions, + $this->context + ); + } + + public function getUniqueIdentity(): string + { + return $this->getWord() . $this->getLineNumber() . $this->getOffset(); + } + + public function canDeterminateUniqueIdentity(): bool + { + return $this->getLineNumber() !== null + && $this->getOffset() !== null; + } + + public function getWord(): string + { + return $this->word; + } + + public function getOffset(): ?int + { + return $this->offset; + } + + public function getLineNumber(): ?int + { + return $this->lineNumber; + } + + public function hasSuggestions(): bool + { + return !empty($this->suggestions); + } + + public function hasContext(): bool + { + return !empty($this->context); + } + + /** + * @return string[] + */ + public function getSuggestions(): array + { + return $this->suggestions; + } + + public function getContext(): array + { + return $this->context; + } + + public function setContext(array $context): MisspellingInterface + { + return new self( + $this->word, + $this->offset, + $this->lineNumber, + $this->suggestions, + $context + ); + } + + public function mergeContext(array $context, bool $override = true): MisspellingInterface + { + if (empty($context)) { + throw new InvalidArgumentException('Context trying to be merged is empty'); + } + + return new self( + $this->word, + $this->offset, + $this->lineNumber, + $this->suggestions, + $override ? array_merge($this->context, $context) : array_merge($context, $this->context) + ); + } +} diff --git a/src/MisspellingFinder.php b/src/MisspellingFinder.php new file mode 100644 index 0000000..00f5d02 --- /dev/null +++ b/src/MisspellingFinder.php @@ -0,0 +1,116 @@ +spellChecker = $spellChecker; + $this->misspellingHandler = $misspellingHandler; + $this->textProcessor = $textProcessor; + } + + /** + * @param string|SourceInterface $source + * + * @return MisspellingInterface[] + */ + public function find( + $source, + array $languages = [], + array $context = [], + string $spellCheckEncoding = TextEncoding::UTF8 + ): iterable { + $misspellings = $this->doSpellcheck($source, $languages, $context, $spellCheckEncoding); + + if ($this->misspellingHandler !== null) { + $this->misspellingHandler->handle($misspellings); + } + + return $misspellings; + } + + /** + * @param string|SourceInterface $source + * + * @return MisspellingInterface[] + */ + private function doSpellcheck($source, array $languages, array $context, string $spellCheckEncoding): iterable + { + if (is_string($source)) { + $source = new PHPString($source); + } + + if ($source instanceof SourceInterface) { + return $this->doSpellcheckFromSource($source, $languages, $context, $spellCheckEncoding); + } + + $sourceVarType = is_object($source) ? get_class($source) : gettype($source); + + throw new InvalidArgumentException('Source should be of type string or ' . SourceInterface::class . '. "' . $sourceVarType . '" given'); + } + + /** + * @return MisspellingInterface[] + */ + private function doSpellcheckFromSource( + SourceInterface $source, + array $languages, + array $context, + string $spellCheckEncoding + ): iterable { + foreach ($source->toTexts($context) as $text) { + if ($this->textProcessor !== null) { + $text = $this->textProcessor->process($text); + } + + $misspellingsCheck = $this->spellChecker->check( + $text->getContent(), + $languages, + $text->getContext(), + $spellCheckEncoding + ); + + yield from $misspellingsCheck; + } + } + + public function setSpellchecker(SpellcheckerInterface $spellChecker): void + { + $this->spellChecker = $spellChecker; + } + + public function setMisspellingHandler(MisspellingHandlerInterface $misspellingHandler): void + { + $this->misspellingHandler = $misspellingHandler; + } +} diff --git a/src/MisspellingHandler/EchoHandler.php b/src/MisspellingHandler/EchoHandler.php new file mode 100644 index 0000000..61071a7 --- /dev/null +++ b/src/MisspellingHandler/EchoHandler.php @@ -0,0 +1,29 @@ +getWord(), + $misspelling->getLineNumber(), + $misspelling->getOffset(), + $misspelling->hasSuggestions() ? implode(',', $misspelling->getSuggestions()) : '', + \Safe\json_encode($misspelling->getContext()) + ); + + echo $output; + } + } +} diff --git a/src/MisspellingHandler/MisspellingHandlerInterface.php b/src/MisspellingHandler/MisspellingHandlerInterface.php new file mode 100644 index 0000000..77df8ff --- /dev/null +++ b/src/MisspellingHandler/MisspellingHandlerInterface.php @@ -0,0 +1,17 @@ +dirPath = $dirPath; + $this->pattern = $pattern; + } + + /** + * @return Text[] + */ + public function getContents(): iterable + { + $filesInDir = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator( + $this->dirPath, + \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME + ), + \RecursiveIteratorIterator::SELF_FIRST + ); + + if ($this->pattern) { + $filesInDir = new \RegexIterator($filesInDir, $this->pattern, \RegexIterator::GET_MATCH); + } + + /** @var \SplFileInfo|string|array $file */ + foreach ($filesInDir as $file) { + if (is_string($file)) { + $file = new \SplFileInfo($file); + } elseif (is_array($file)) { + // When regex pattern is used, an array containing the file path in its first element is returned + $file = new \SplFileInfo(current($file)); + } + + if (!$file->isDir()) { + if ($file->getRealPath() !== false) { + yield current((new File($file->getRealPath()))->toTexts()); + } + } + } + } + + public function toTexts(array $context): iterable + { + foreach ($this->getContents() as $text) { + yield new Text( + $text->getContent(), + $text->getEncoding(), + array_merge($text->getContext(), $context) + ); + } + } +} diff --git a/src/Source/File.php b/src/Source/File.php new file mode 100644 index 0000000..f002a41 --- /dev/null +++ b/src/Source/File.php @@ -0,0 +1,60 @@ +filePath = $filePath; + $this->encoding = $encoding; + } + + private function getFileContent(): string + { + return \Safe\file_get_contents($this->filePath); + } + + public function toTexts(array $context = []): iterable + { + $context['filePath'] = \Safe\realpath($this->filePath); + $encoding = $this->encoding; + + if ($encoding === null) { + $encoding = mb_detect_encoding($this->getFileContent(), null, true); + + if ($encoding === false) { + throw new RuntimeException( + \Safe\sprintf( + 'Coulnd\'t detect enconding of string:' . PHP_EOL . '%s', + $this->getFileContent() + ) + ); + } + } + + + return [ + new Text( + $this->getFileContent(), + $encoding, + $context + ), + ]; + } +} diff --git a/src/Source/MultipleSource.php b/src/Source/MultipleSource.php new file mode 100644 index 0000000..9643a9a --- /dev/null +++ b/src/Source/MultipleSource.php @@ -0,0 +1,37 @@ + + */ + private $sources; + + + /** + * @param iterable $sources + */ + public function __construct(iterable $sources) + { + Assert::allIsInstanceOf($sources, SourceInterface::class); + $this->sources = $sources; + } + + /** + * {@inheritDoc} + */ + public function toTexts(array $context = []): iterable + { + foreach ($this->sources as $source) { + foreach ($source->toTexts($context) as $text) { + yield $text->mergeContext($context, true); + } + } + } +} diff --git a/src/Source/PHPString.php b/src/Source/PHPString.php new file mode 100644 index 0000000..4be6439 --- /dev/null +++ b/src/Source/PHPString.php @@ -0,0 +1,43 @@ +string = $string; + } + + /** + * @param array $context + * + * @return TextInterface[] + */ + public function toTexts(array $context): iterable + { + $encoding = mb_detect_encoding($this->string, null, true); + + if ($encoding === false) { + throw new RuntimeException( + \Safe\sprintf( + 'Coulnd\'t detect enconding of string:' . PHP_EOL . '%s', + $this->string + ) + ); + } + + yield new Text($this->string, $encoding, $context); + } +} diff --git a/src/Source/SourceInterface.php b/src/Source/SourceInterface.php new file mode 100644 index 0000000..f64a3ea --- /dev/null +++ b/src/Source/SourceInterface.php @@ -0,0 +1,17 @@ +binaryPath = $binaryPath; + } + + /** + * {@inheritDoc} + */ + public function check(string $text, array $languages = [], array $context = [], ?string $encoding = null): iterable + { + Assert::greaterThan($languages, 1, 'Aspell spellchecker doesn\'t support multiple languages check'); + + $cmd = $this->binaryPath->addArg('-a'); + + if (!empty($languages)) { + $cmd = $cmd->addArg('--lang=' . implode(',', $languages)); + } + + if ($encoding !== null) { + $cmd = $cmd->addArg('--encoding=' . $encoding); + } + + $process = new Process($cmd->getArgs()); + // Add prefix characters putting Ispell's type of spellcheckers in terse-mode, + // ignoring correct words and thus speeding up the execution + $process->setInput('!' . PHP_EOL . $text . PHP_EOL . '%'); + + $output = ProcessRunner::run($process)->getOutput(); + + if ($process->getErrorOutput() !== '') { + throw new ProcessHasErrorOutputException($process->getErrorOutput(), $text, $process->getCommandLine()); + } + + return IspellOutputParser::parseMisspellings($output, $context); + } + + public function getBinaryPath(): CommandLine + { + return $this->binaryPath; + } + + /** + * {@inheritDoc} + */ + public function getSupportedLanguages(): iterable + { + $languages = []; + $cmd = $this->binaryPath->addArgs(['dump', 'dicts']); + $process = new Process($cmd->getArgs()); + $output = explode(PHP_EOL, ProcessRunner::run($process)->getOutput()); + + foreach ($output as $line) { + $name = trim($line); + if (strpos($name, '-variant') !== false || $name === '') { + // Skip variants + continue; + } + $languages[$name] = true; + } + $languages = array_keys($languages); + \Safe\sort($languages); + + return $languages; + } + + public static function create(?string $binaryPathAsString = null): self + { + return new self(new CommandLine($binaryPathAsString ?? 'aspell')); + } +} diff --git a/src/Spellchecker/Hunspell.php b/src/Spellchecker/Hunspell.php new file mode 100644 index 0000000..1d062a6 --- /dev/null +++ b/src/Spellchecker/Hunspell.php @@ -0,0 +1,99 @@ +binaryPath = $binaryPath; + } + + /** + * {@inheritDoc} + */ + public function check( + string $text, + array $languages = [], + array $context = [], + ?string $encoding = TextEncoding::UTF8 + ): iterable { + $cmd = $this->binaryPath->addArg('-a'); + + if (!empty($languages)) { + $cmd = $cmd->addArgs(['-d', implode(',', $languages)]); + } + + if ($encoding) { + $cmd = $cmd->addArgs(['-i', $encoding]); + } + + $process = new Process($cmd->getArgs()); + // Add prefix characters putting Ispell's type of spellcheckers in terse-mode, + // ignoring correct words and thus speeding execution + $process->setInput('!' . PHP_EOL . $text . PHP_EOL . '%'); + + $output = ProcessRunner::run($process)->getOutput(); + if ($process->getErrorOutput() !== '') { + throw new ProcessHasErrorOutputException($process->getErrorOutput(), $text, $process->getCommandLine()); + } + + return IspellOutputParser::parseMisspellings($output, $context); + } + + public function getBinaryPath(): CommandLine + { + return $this->binaryPath; + } + + /** + * {@inheritDoc} + */ + public function getSupportedLanguages(): iterable + { + $languages = []; + $cmd = $this->binaryPath->addArg('-D'); + $process = new Process($cmd->getArgs()); + $output = explode(PHP_EOL, ProcessRunner::run($process)->getErrorOutput()); + + foreach ($output as $line) { + $line = trim($line); + if ('' === $line // Skip empty lines + || \Safe\substr($line, -1) === ':' // Skip headers + || strpos($line, ':') !== false // Skip search path + ) { + continue; + } + $name = basename($line); + if (strpos($name, 'hyph_') === 0) { + // Skip MySpell hyphen files + continue; + } + $name = \Safe\preg_replace('/\.(aff|dic)$/', '', $name); + $languages[$name] = true; + } + $languages = array_keys($languages); + \Safe\sort($languages); + + return $languages; + } + + public static function create(?string $binaryPathAsString): self + { + return new self(new CommandLine($binaryPathAsString ?? 'hunspell')); + } +} diff --git a/src/Spellchecker/Ispell.php b/src/Spellchecker/Ispell.php new file mode 100644 index 0000000..8751575 --- /dev/null +++ b/src/Spellchecker/Ispell.php @@ -0,0 +1,120 @@ +ispellCommandLine = $ispellCommandLine; + $this->shellEntryPoint = $shellEntryPoint; + } + + /** + * {@inheritDoc} + */ + public function check(string $text, array $languages = [], array $context = [], ?string $encoding = null): iterable + { + Assert::greaterThan($languages, 1, 'Ispell spellchecker doesn\'t support multiple languages check'); + + $cmd = $this->ispellCommandLine->addArg('-a'); + + if (!empty($languages)) { + $cmd = $cmd->addArgs(['-d', implode(',', $languages)]); + } + + $process = new Process($cmd->getArgs()); + + // Add prefix characters putting Ispell's type of spellcheckers in terse-mode, + // ignoring correct words and thus speeding execution + $process->setInput('!' . PHP_EOL . $text . PHP_EOL . '%'); + + $output = ProcessRunner::run($process)->getOutput(); + + if ($process->getErrorOutput() !== '') { + throw new ProcessHasErrorOutputException($process->getErrorOutput(), $text, $process->getCommandLine()); + } + + return IspellOutputParser::parseMisspellings($output, $context); + } + + public function getCommandLine(): CommandLine + { + return $this->ispellCommandLine; + } + + /** + * {@inheritDoc} + */ + public function getSupportedLanguages(): iterable + { + if ($this->supportedLanguages === null) { + $shellEntryPoint = $this->shellEntryPoint ?? new CommandLine([]); + $whichCommand = clone $shellEntryPoint; + $process = new Process( + $whichCommand + ->addArg('which') + ->addArg('ispell') + ->getArgs() + ); + $process->mustRun(); + $binaryPath = trim($process->getOutput()); + + $lsCommand = clone $shellEntryPoint; + $process = new Process( + $lsCommand + ->addArg('ls') + ->addArg(\dirname($binaryPath, 2) . '/lib/ispell') + ->getArgs() + ); + $process->mustRun(); + + $listOfFiles = trim($process->getOutput()); + + $this->supportedLanguages = []; + foreach (explode(PHP_EOL, $listOfFiles) as $file) { + if (strpos($file, '.aff', -4) === false) { + continue; + } + + $this->supportedLanguages[] = \Safe\substr($file, 0, -4); + } + + if (empty($this->supportedLanguages)) { + throw new RuntimeException('Ispell doesn\'t have any directory or none could have been found'); + } + } + + return $this->supportedLanguages; + } + + public static function create(?string $ispellCommandLineAsString): self + { + return new self(new CommandLine($ispellCommandLineAsString ?? 'ispell')); + } +} diff --git a/src/Spellchecker/LanguageTool.php b/src/Spellchecker/LanguageTool.php new file mode 100644 index 0000000..068a265 --- /dev/null +++ b/src/Spellchecker/LanguageTool.php @@ -0,0 +1,109 @@ +apiClient = $apiClient; + } + + /** + * @return Misspelling[] + */ + public function check( + string $text, + array $languages = [], + array $context = [], + ?string $encoding = TextEncoding::UTF8 + ): iterable { + Assert::notEmpty($languages, 'LanguageTool requires at least one language to run it\'s spellchecking process'); + + $check = $this->apiClient->spellCheck($text, $languages, $context[self::class] ?? []); + $lineBreaksOffset = $this->getLineBreaksOffset($text, $encoding); + + foreach ($check['matches'] as $match) { + list($offsetFromLine, $line) = $this->computeRealOffsetAndLine($match, $lineBreaksOffset); + + yield new Misspelling( + mb_substr($match['context']['text'], $match['context']['offset'], $match['context']['length']), + $offsetFromLine, + $line, // line break index transformed in line number + array_column($match['replacements'], 'value'), + array_merge( + [ + 'sentence' => $match['sentence'], + 'spellingErrorMessage' => $match['message'], + 'ruleUsed' => $match['rule'], + ], + $context + ) + ); + } + } + + /** + * {@inheritdoc} + */ + public function getSupportedLanguages(): iterable + { + return $this->apiClient->getSupportedLanguages(); + } + + private function computeRealOffsetAndLine(array $match, array $lineBreaksOffset): array + { + $languageToolsOffset = (int) $match['offset']; + $index = SortedNumericArrayNearestValueFinder::findIndex( + (int) $match['offset'], + $lineBreaksOffset, + SortedNumericArrayNearestValueFinder::FIND_HIGHER + ); + + if ($index === 0) { + // word is on the first line + $offsetFromLine = $languageToolsOffset; + $line = $index + 1; + } else { + if ($languageToolsOffset > $lineBreaksOffset[$index]) { + // word is on the last line + $offsetFromLine = $languageToolsOffset - $lineBreaksOffset[$index]; + $line = $index + 2; + } else { + $offsetFromLine = $languageToolsOffset - $lineBreaksOffset[$index - 1]; + $line = $index + 1; + } + } + + return [$offsetFromLine, $line]; + } + + private function getLineBreaksOffset(string $text, ?string $encoding): array + { + if ($encoding === null) { + $encoding = \Safe\mb_internal_encoding(); + } + + $start = 0; + $lineBreaksOffset = []; + while (($pos = \mb_strpos(($text), PHP_EOL, $start, $encoding)) != false) { + $lineBreaksOffset[] = $pos; + $start = $pos + 1; // start searching from next position. + } + + return $lineBreaksOffset; + } +} diff --git a/src/Spellchecker/LanguageTool/LanguageToolApiClient.php b/src/Spellchecker/LanguageTool/LanguageToolApiClient.php new file mode 100644 index 0000000..759f44b --- /dev/null +++ b/src/Spellchecker/LanguageTool/LanguageToolApiClient.php @@ -0,0 +1,66 @@ +baseUrl = $baseUrl; + } + + public function spellCheck(string $text, array $languages, array $options): array + { + $options['text'] = $text; + $options['language'] = array_shift($languages); + + if (!empty($languages)) { + $options['altLanguages'] = implode(',', $languages); + } + + return $this->requestAPI( + '/v2/check', + 'POST', + 'Content-type: application/json; Accept: application/json', + $options + ); + } + + public function getSupportedLanguages(): array + { + return array_column( + $this->requestAPI( + '/v2/languages', + 'GET', + 'Accept: application/json' + ), + 'longCode' + ); + } + + /** + * @throws \RuntimeException + */ + public function requestAPI(string $endpoint, string $method, string $header, array $queryParams = []): array + { + $httpData = [ + 'method' => $method, + 'header' => $header, + ]; + + if (!empty($queryParams)) { + $httpData['content'] = http_build_query($queryParams); + } + + $content = \Safe\file_get_contents($this->baseUrl . $endpoint, false, stream_context_create(['http' => $httpData])); + + return \Safe\json_decode($content, true); + } +} diff --git a/src/Spellchecker/MultiSpellchecker.php b/src/Spellchecker/MultiSpellchecker.php new file mode 100644 index 0000000..9291939 --- /dev/null +++ b/src/Spellchecker/MultiSpellchecker.php @@ -0,0 +1,99 @@ + + */ + private $spellCheckers; + + /** + * @var bool + */ + private $mergeMisspellingsSuggestions; + + + /** + * @param SpellcheckerInterface[] $spellCheckers + */ + public function __construct(iterable $spellCheckers, bool $mergeMisspellingsSuggestions = true) + { + Assert::allIsInstanceOf($spellCheckers, SpellcheckerInterface::class); + $this->spellCheckers = $spellCheckers; + $this->mergeMisspellingsSuggestions = $mergeMisspellingsSuggestions; + } + + /** + * {@inheritdoc} + */ + public function check(string $text, array $languages = [], array $context = [], ?string $encoding = null): iterable + { + if (!$this->mergeMisspellingsSuggestions) { + return $this->checkForAllSpellcheckers($text, $languages, $context, $encoding); + } + + /** @var MisspellingInterface[] $misspellings */ + $misspellings = []; + /** @var SpellcheckerInterface $spellChecker */ + foreach ($this->spellCheckers as $spellChecker) { + foreach ($spellChecker->check($text, $languages, $context, $encoding) as $misspelling) { + if (!empty($context)) { + $misspelling = $misspelling->mergeContext($context); + } + + if (!$misspelling->canDeterminateUniqueIdentity()) { + $misspellings[] = $misspelling; + continue; + } + + if (isset($misspellings[$misspelling->getUniqueIdentity()])) { + $misspellings[$misspelling->getUniqueIdentity()]->mergeSuggestions($misspelling->getSuggestions()); + continue; + } + + $misspellings[$misspelling->getUniqueIdentity()] = $misspelling; + } + } + + return array_values($misspellings); + } + + private function checkForAllSpellcheckers( + string $text, + array $languages, + array $context, + ?string $encoding + ): iterable { + foreach ($this->spellCheckers as $spellChecker) { + foreach ($spellChecker->check($text, $languages, $context, $encoding) as $misspelling) { + if (!empty($context)) { + $misspelling = $misspelling->mergeContext($context); + } + + yield $misspelling; + } + } + } + + /** + * {@inheritdoc} + */ + public function getSupportedLanguages(): iterable + { + $supportedLanguages = []; + foreach ($this->spellCheckers as $spellChecker) { + foreach ($spellChecker->getSupportedLanguages() as $language) { + $supportedLanguages[] = $language; + } + } + + return array_values(array_unique($supportedLanguages)); + } +} diff --git a/src/Spellchecker/PHPPspell.php b/src/Spellchecker/PHPPspell.php new file mode 100644 index 0000000..9d3ed10 --- /dev/null +++ b/src/Spellchecker/PHPPspell.php @@ -0,0 +1,82 @@ +mode = $mode; + $this->numberOfCharactersLowerLimit = $numberOfCharactersLowerLimit; + } + + /** + * {@inheritDoc} + */ + public function check( + string $text, + array $languages = [], + array $context = [], + ?string $encoding = TextEncoding::UTF8 + ): iterable { + Assert::count($languages, 1, 'PHPPspell spellchecker doesn\'t support multiple languages check'); + Assert::notNull($encoding, 'PHPPspell requires the encoding to be defined'); + + $pspellConfig = \Safe\pspell_config_create(current($languages), '', '', $encoding); + \Safe\pspell_config_mode($pspellConfig, $this->mode); + \Safe\pspell_config_ignore($pspellConfig, $this->numberOfCharactersLowerLimit); + $dictionary = \Safe\pspell_new_config($pspellConfig); + + $lines = explode(PHP_EOL, $text); + + /** @var string $line */ + foreach ($lines as $lineNumber => $line) { + $words = explode(' ', \Safe\preg_replace("/(?!['’-])(\p{P}|\+|--)/u", '', $line)); + foreach ($words as $key => $word) { + if (!pspell_check($dictionary, $word)) { + $suggestions = pspell_suggest($dictionary, $word); + yield new Misspelling($word, 0, $lineNumber + 1, $suggestions, $context); + } + } + } + } + + /** + * {@inheritDoc} + */ + public function getSupportedLanguages(): iterable + { + throw new LogicException('Retrieving supported dictionaries for PHPPspell spellchecker is not supported yet'); + } +} diff --git a/src/Spellchecker/SpellcheckerInterface.php b/src/Spellchecker/SpellcheckerInterface.php new file mode 100644 index 0000000..4589531 --- /dev/null +++ b/src/Spellchecker/SpellcheckerInterface.php @@ -0,0 +1,25 @@ +content = $content; + $this->encoding = $encoding; + $this->context = $context; + } + + public function getContent(): string + { + return $this->content; + } + + public function getEncoding(): string + { + return $this->encoding; + } + + public function getContext(): array + { + return $this->context; + } + + public function replaceContent(string $newContent): TextInterface + { + return new self($newContent, $this->encoding, $this->context); + } + + public function mergeContext(array $context, bool $override = true): TextInterface + { + if (empty($context)) { + throw new InvalidArgumentException('Context trying to be merged is empty'); + } + + return new self( + $this->getContent(), + $this->getEncoding(), + $override ? array_merge($this->getContext(), $context) : array_merge($context, $this->getContext()) + ); + } + + public static function utf8(string $text, array $context = []): self + { + return new self($text, TextEncoding::UTF8, $context); + } + + public function __toString() + { + return $this->getContent(); + } +} diff --git a/src/TextInterface.php b/src/TextInterface.php new file mode 100644 index 0000000..3d41aa5 --- /dev/null +++ b/src/TextInterface.php @@ -0,0 +1,18 @@ +getContent()); + + // Github Flavored Markdown + // Header + $output = \Safe\preg_replace('/\n={2,}/', '\n', $output); + // Fenced codeblocks + //@TODO parse programming language comments from codeblock instead of removing whole block + $output = \Safe\preg_replace('/~{3}.*\n/', '', $output); + // Strikethrough + $output = \Safe\preg_replace('/~~/', '', $output); + // Common Markdown + // Remove HTML tags + $output = \Safe\preg_replace('/<[^>]*>/', '', $output); + // Remove setext-style headers + $output = \Safe\preg_replace('/^[=\-]{2,}\s*$/', '', $output); + // Remove footnotes? + $output = \Safe\preg_replace('/\[\^.+?\](\: .*?$)?/', '', $output); + $output = \Safe\preg_replace('/\s{0,2}\[.*?\]: .*?$/', '', $output); + // Remove images + $output = \Safe\preg_replace('/\!\[(.*?)\][\[\(].*?[\]\)]/', '$1', $output); + // Remove inline links + $output = \Safe\preg_replace('/\[(.*?)\][\[\(].*?[\]\)]/', '$1', $output); + // Remove blockquotes + $output = \Safe\preg_replace('/^\s{0,3}>\s?/', '', $output); + // Remove reference-style links? + $output = \Safe\preg_replace('/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/', '', $output); + // Remove atx-style headers + //@TODO find a way to merge the two regex below + // remove ## Heading ## + $output = \Safe\preg_replace('/^#{1,6}\s+(.*)(\s+#{1,6})$/m', '$1', $output); + // remove ## Heading + $output = \Safe\preg_replace('/^#{1,6}\s+(.*)$/m', '$1', $output); + // Remove emphasis (repeat the line to remove double emphasis) + $output = \Safe\preg_replace('/([\*_]{1,3})(\S.*?\S{0,1})\1/', '$2', $output); + $output = \Safe\preg_replace('/([\*_]{1,3})(\S.*?\S{0,1})\1/', '$2', $output); + // Remove list items + $output = \Safe\preg_replace('/^([^\S\r\n]*)\*\s/m', '$1', $output); + // Remove code blocks + $output = \Safe\preg_replace('/^`{3,}(.*)*$/m', '', $output); + // Remove inline code + $output = \Safe\preg_replace('/`(.+?)`/', '$1', $output); + + return Text::utf8($output, $text->getContext()); + } +} diff --git a/src/TextProcessor/TextProcessorInterface.php b/src/TextProcessor/TextProcessorInterface.php new file mode 100644 index 0000000..b40fe50 --- /dev/null +++ b/src/TextProcessor/TextProcessorInterface.php @@ -0,0 +1,12 @@ +commandArgs = $command; + } else { + if (is_string($command)) { + $this->commandArgs = [$command]; + } else { + throw new InvalidArgumentException( + \Safe\sprintf( + 'Command should be an "array" or a "string", "%s" given', + is_object($command) ? get_class($command) : gettype($command) + ) + ); + } + } + } + + public function addArg(string $arg): self + { + $args = $this->commandArgs; + $args[] = $arg; + + return new self($args); + } + + /** + * @param string[] $argsToAdd + */ + public function addArgs(iterable $argsToAdd): self + { + $args = $this->commandArgs; + + foreach ($argsToAdd as $arg) { + $args[] = $arg; + } + + return new self($args); + } + + public function getArgs(): array + { + return $this->commandArgs; + } + + public function asString(): string + { + return implode(' ', array_map([$this, 'escapeArgument'], $this->commandArgs)); + } + + /** + * Escapes a string to be used as a shell argument. + */ + //@codingStandardsIgnoreLine SlevomatCodingStandard.Classes.UnusedPrivateElements.UnusedMethod + private function escapeArgument(string $argument): string + { + if ('\\' !== \DIRECTORY_SEPARATOR) { + return "'" . str_replace("'", "'\\''", $argument) . "'"; + } + if ('' === $argument) { + return '""'; + } + if (false !== strpos($argument, "\0")) { + $argument = str_replace("\0", '?', $argument); + } + if (!\Safe\preg_match('/[\/()%!^"<>&|\s]/', $argument)) { + return $argument; + } + $argument = \Safe\preg_replace('/(\\\\+)$/', '$1$1', $argument); + + return '"' . str_replace(['"', '^', '%', '!', "\n"], ['""', '"^^"', '"^%"', '"^!"', '!LF!'], $argument) . '"'; + } + //@codingStandardsIgnoreEnd +} diff --git a/src/Utils/IspellOutputParser.php b/src/Utils/IspellOutputParser.php new file mode 100644 index 0000000..5b85f09 --- /dev/null +++ b/src/Utils/IspellOutputParser.php @@ -0,0 +1,51 @@ +inheritEnvironmentVariables(true); + } else { + // Symfony < 3.2 + $process->setEnv(['LANG' => getenv('LANG')]); + } + $process->setTimeout($timeout); + + try { + $process->mustRun($callback, $env); + } catch (ExceptionInterface $e) { + throw new ProcessFailedException($process, $e); + } + + return $process; + } +} diff --git a/src/Utils/ProcessRunnerTest.php b/src/Utils/ProcessRunnerTest.php new file mode 100644 index 0000000..746aca2 --- /dev/null +++ b/src/Utils/ProcessRunnerTest.php @@ -0,0 +1,19 @@ +expectException(ProcessFailedException::class); + ProcessRunner::run(new Process(['non_existing_binaries'])); + } +} diff --git a/src/Utils/SortedNumericArrayNearestValueFinder.php b/src/Utils/SortedNumericArrayNearestValueFinder.php new file mode 100644 index 0000000..2e6f922 --- /dev/null +++ b/src/Utils/SortedNumericArrayNearestValueFinder.php @@ -0,0 +1,65 @@ + 1) { + $probe = ($high + $low) / 2; + + Assert::integerish($haystack[$probe]); + if ($haystack[$probe] < $needle) { + $low = $probe; + } else { + $high = $probe; + } + } + if ($high === count($haystack) or $haystack[$high] !== $needle) { + if ($high === count($haystack)) { + Assert::integerish($haystack[$high - 1]); + + return $high - 1; + } + + $ceil_low = (int) ceil($low); + $floor_low = (int) floor($low); + $high_distance = $haystack[$ceil_low] - $needle; + $low_distance = $needle - $haystack[$floor_low]; + + if ($mode === self::FIND_LOWER) { + return $floor_low; + } + + if ($mode === self::FIND_HIGHER) { + return $ceil_low; + } + + if ($mode === self::FIND_DEFAULT) { + return ($high_distance >= $low_distance) ? $ceil_low : $floor_low; + } + + throw new InvalidArgumentException('Finding mode value "' . $mode . '" is not supported'); + } + + return (int) $high; + } +} diff --git a/src/Utils/TextEncoding.php b/src/Utils/TextEncoding.php new file mode 100644 index 0000000..c41a751 --- /dev/null +++ b/src/Utils/TextEncoding.php @@ -0,0 +1,11 @@ +mustRun(); + } catch (ExceptionInterface $exception) { + $processFailure = new ProcessFailedException($process, $exception); + $this->assertSame( + 'Process with command "non_existing_binaries" has failed running with exit code 127(Command not found)', + $processFailure->getMessage() + ); + } + } + + public function testSymfonyBootingProcessFailedException() + { + + $process = new Process('echo test', __DIR__ . '/notfound/'); + try { + $process->mustRun(); + } catch (ExceptionInterface $exception) { + $processFailure = new ProcessFailedException($process, $exception); + $this->assertSame( + 'Process with command "echo test" has failed with exit code 0()', + $processFailure->getMessage() + ); + + return; + } + + $this->markTestSkipped('Test is only relevant for symfony/process: ^4.0'); + } +} diff --git a/tests/PhpSpellcheck/Tests/Exception/ProcessHasErrorOutputExceptionTest.php b/tests/PhpSpellcheck/Tests/Exception/ProcessHasErrorOutputExceptionTest.php new file mode 100644 index 0000000..25f03ad --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Exception/ProcessHasErrorOutputExceptionTest.php @@ -0,0 +1,27 @@ +assertSame(<<getMessage()); + } +} diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Aspell/bin/aspell.sh b/tests/PhpSpellcheck/Tests/Fixtures/Aspell/bin/aspell.sh new file mode 100755 index 0000000..f83553f --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Aspell/bin/aspell.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# +# aspell binary stub +# + +folder=$(dirname $0) + +case "$*" in + 'dump dicts') + cat "$folder/../dicts.txt" + ;; + *'-a'*) + cat "$folder/../check.txt" + ;; +esac +exit 0 diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Aspell/check.txt b/tests/PhpSpellcheck/Tests/Fixtures/Aspell/check.txt new file mode 100644 index 0000000..8da56f2 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Aspell/check.txt @@ -0,0 +1,22 @@ +@(#) International Ispell Version 3.1.20 (but really Aspell 0.60.6.1) +& Tigr 34 0: Ti gr, Ti-gr, Tiger, Tier +* +* + +* +& theforests 10 3: the forests, the-forests, reforests, theorists, deforests, forests, theorist's, afforests, Forest's, forest's +* +* +* + +* +& imortal 6 5: immortal, mortal, immortally, immortals, immoral, immortal's +* +* +& eey 13 21: eye, EEO, EEC, EEG, eek, eel, Key, bey, fey, hey, key, e'er, e'en + +& CCould 17 0: Could, Cold, Gould, Cloud, Coiled, Cloudy, Coaled, Cooled, Colt, Clod, Cult, Gold, Scold, Mould, Would, Clout, Clued +* +* +* +* diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Aspell/dicts.txt b/tests/PhpSpellcheck/Tests/Fixtures/Aspell/dicts.txt new file mode 100644 index 0000000..01f87ba --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Aspell/dicts.txt @@ -0,0 +1,14 @@ +en +en-variant_0 +en-variant_1 +en_GB +en_GB-ise +en_GB-ise-w_accents +en_GB-ise-wo_accents +en_GB-ize +en_GB-ize-w_accents +en_GB-ize-wo_accents +ru +ru-ye +ru-yeyo +ru-yo \ No newline at end of file diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Hunspell/bin/hunspell.php b/tests/PhpSpellcheck/Tests/Fixtures/Hunspell/bin/hunspell.php new file mode 100755 index 0000000..991be65 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Hunspell/bin/hunspell.php @@ -0,0 +1,28 @@ +#!/usr/bin/env php +&2 echo "error" diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Ispell/bin/ispell.sh b/tests/PhpSpellcheck/Tests/Fixtures/Ispell/bin/ispell.sh new file mode 100755 index 0000000..410874e --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Ispell/bin/ispell.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# +# ispell binary stub +# + +folder=$(dirname $0) +# +#abspath() { +# cd "$(dirname "$1")" +# printf "%s/%s\n" "$(pwd)" "$(basename "$1")" +# cd "$OLDPWD" +#} +# +#dictionaries=$(abspath "$folder/../lib/ispell") + +case "$*" in + 'which ispell') + echo "usr/bin/ispell" + ;; + "ls usr/lib/ispell") + ls "$folder/../lib/ispell" + ;; + *'-a'*) + cat "$folder/../check.txt" + ;; +esac +exit 0 diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Ispell/check.txt b/tests/PhpSpellcheck/Tests/Fixtures/Ispell/check.txt new file mode 100644 index 0000000..8e657f9 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Ispell/check.txt @@ -0,0 +1,27 @@ +@(#) International Ispell Version 3.4.00 8 Feb 2015 +& Tigr 2 0: Tier, Tiger +* +* +* +* +* +* +* + +* +& theforests 2 3: the forests, the-forests +* +* +* + +* +& imortal 2 5: immortal, mortal +* +* +& eey 16 21: bey, dey, eeg, eel, eely, eery, Ely, eye, fey, gey, hey, key, ley, ney, rey, sey + +& CCould 1 0: Could +* +* +* +* diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Ispell/lib/ispell/american.aff b/tests/PhpSpellcheck/Tests/Fixtures/Ispell/lib/ispell/american.aff new file mode 100644 index 0000000..e69de29 diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/SubDirectory/mispelling4.txt b/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/SubDirectory/mispelling4.txt new file mode 100644 index 0000000..31a30fc --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/SubDirectory/mispelling4.txt @@ -0,0 +1 @@ +mispelling4 diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling2.txt b/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling2.txt new file mode 100644 index 0000000..cd0dae3 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling2.txt @@ -0,0 +1 @@ +mispelling2 diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling3.txt b/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling3.txt new file mode 100644 index 0000000..17a79e5 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Text/Directory/mispelling3.txt @@ -0,0 +1 @@ +mispélling3 diff --git a/tests/PhpSpellcheck/Tests/Fixtures/Text/mispelling1.txt b/tests/PhpSpellcheck/Tests/Fixtures/Text/mispelling1.txt new file mode 100644 index 0000000..860d5ea --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Fixtures/Text/mispelling1.txt @@ -0,0 +1 @@ +mispelling1 diff --git a/tests/PhpSpellcheck/Tests/MisspellingFinderTest.php b/tests/PhpSpellcheck/Tests/MisspellingFinderTest.php new file mode 100644 index 0000000..3041a59 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/MisspellingFinderTest.php @@ -0,0 +1,147 @@ +spellChecker = $this->createMock(SpellcheckerInterface::class); + $this->misspellingHandler = $this->createMock(MisspellingHandlerInterface::class); + $this->textProcessor = $this->createMock(TextProcessorInterface::class); + } + + public function testFindFromString() + { + $misspellingFinder = new MisspellingFinder( + $this->spellChecker + ); + $misspelling1 = $this->generateMisspellingMock(); + + $this->spellChecker + ->expects($this->once()) + ->method('check') + ->willReturn([$misspelling1]); + + $this->assertSame([$misspelling1], iterator_to_array($misspellingFinder->find('mispell'))); + } + + public function testFindFromInvalidArgument() + { + $this->expectException(InvalidArgumentException::class); + + $misspellingFinder = new MisspellingFinder( + $this->spellChecker, + $this->misspellingHandler, + $this->textProcessor + ); + + $misspellingFinder->find(3); + } + + public function testFindWithMisspelingHandler() + { + $this->misspellingHandler->expects($this->once()) + ->method('handle'); + + $misspellingFinder = new MisspellingFinder( + $this->spellChecker, + $this->misspellingHandler + ); + $misspelling1 = $this->generateMisspellingMock(); + + $this->spellChecker + ->expects($this->once()) + ->method('check') + ->willReturn([$misspelling1]); + + $this->assertSame([$misspelling1], iterator_to_array($misspellingFinder->find('mispell'))); + } + + public function testFindWithTextProcessor() + { + $this->textProcessor->expects($this->once()) + ->method('process'); + + $misspellingFinder = new MisspellingFinder( + $this->spellChecker, + null, + $this->textProcessor + ); + $misspelling1 = $this->generateMisspellingMock(); + + $this->spellChecker + ->expects($this->once()) + ->method('check') + ->willReturn([$misspelling1]); + + $this->assertSame([$misspelling1], iterator_to_array($misspellingFinder->find('mispell'))); + } + + public function testFindFromSource() + { + $misspelling1 = $this->generateMisspellingMock(); + $this->spellChecker + ->expects($this->once()) + ->method('check') + ->willReturn([$misspelling1]); + + $source = $this->createMock(SourceInterface::class); + $source->method('toTexts') + ->willReturn([$text = $this->generateTextMock()]); + + $misspellingFinder = new MisspellingFinder( + $this->spellChecker, + null, + $this->textProcessor + ); + + $this->assertSame([$misspelling1], iterator_to_array($misspellingFinder->find('mispell'))); + } + + private function generateMisspellingMock() + { + $mispelling = $this->createMock(MisspellingInterface::class); + $mispelling->method('getWord') + ->willReturn('mispelled'); + $mispelling->method('getContext') + ->willReturn([]); + $mispelling->method('getLineNumber') + ->willReturn(1); + $mispelling->method('getOffset') + ->willReturn(1); + $mispelling->method('getSuggestions') + ->willReturn(['misspelled', 'misspelling']); + + return $mispelling; + } + + private function generateTextMock() + { + $text = $this->createMock(TextInterface::class); + $text->method('getContent') + ->willReturn('mispell'); + $text->method('getContext') + ->willReturn([]); + $text->method('getEncoding') + ->willReturn('utf-8'); + + return $text; + } +} diff --git a/tests/PhpSpellcheck/Tests/MisspellingHandler/EchoHandlerTest.php b/tests/PhpSpellcheck/Tests/MisspellingHandler/EchoHandlerTest.php new file mode 100644 index 0000000..e453aec --- /dev/null +++ b/tests/PhpSpellcheck/Tests/MisspellingHandler/EchoHandlerTest.php @@ -0,0 +1,23 @@ +expectOutputString( + 'word: mispelling | line: 10 | offset: 4 | suggestions: misspelling,misspellings | context: {"sentence":"two mispelling"}' . PHP_EOL + ); + + (new EchoHandler())->handle( + [new Misspelling('mispelling', 4, 10, ['misspelling', 'misspellings'], ['sentence' => 'two mispelling'])] + ); + } +} diff --git a/tests/PhpSpellcheck/Tests/MisspellingTest.php b/tests/PhpSpellcheck/Tests/MisspellingTest.php new file mode 100644 index 0000000..9726483 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/MisspellingTest.php @@ -0,0 +1,73 @@ +mergeSuggestions(['misspelling', 'misspelled']); + + $this->assertSame(['misspelled', 'misspelling'], $misspelling->getSuggestions()); + } + + /** + * @dataProvider nonDeterminableUniqueIdentityMisspellings + */ + public function testCanDeterminateUniqueIdentity(Misspelling $misspelling) + { + $this->assertFalse($misspelling->canDeterminateUniqueIdentity()); + } + + + public function nonDeterminableUniqueIdentityMisspellings() + { + return [ + [new Misspelling('mispelled')], + [new Misspelling('mispelled', 1)], + [new Misspelling('mispelled', null, 1)], + ]; + } + + public function testContextOverridingMerge() + { + $misspelling = (new Misspelling('mispelled', 1, 0, [], ['idx' => '1']))->mergeContext([ + 'idx' => 'foo', + 'idx2' => '2', + ]); + + $this->assertEquals(new Misspelling('mispelled', 1, 0, [], ['idx' => 'foo', 'idx2' => '2']), $misspelling); + } + + public function testContextNonOverridingMerge() + { + $misspelling = (new Misspelling('mispelled', 1, 0, [], ['idx' => '1']))->mergeContext([ + 'idx' => 'foo', + 'idx2' => '2', + ], false); + + $this->assertEquals(new Misspelling('mispelled', 1, 0, [], ['idx' => '1', 'idx2' => '2']), $misspelling); + } + + public function testExceptionWhenMergingEmptyContext() + { + $this->expectException(InvalidArgumentException::class); + (new Misspelling('mispelled', 1, 0, [], []))->mergeContext([]); + } + + public function testImmutableSetContext() + { + $misspelling = new Misspelling('mispelled', 1, 0, [], []); + $misspellingAfterSettingContext = $misspelling->setContext(['test']); + + $this->assertNotSame($misspelling, $misspellingAfterSettingContext); + } +} diff --git a/tests/PhpSpellcheck/Tests/Source/DirectoryTest.php b/tests/PhpSpellcheck/Tests/Source/DirectoryTest.php new file mode 100644 index 0000000..920ba93 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Source/DirectoryTest.php @@ -0,0 +1,68 @@ +toTexts(['ctx' => 'in tests']); + $expectedValues = [ + new Text( + "mispélling3\n", + TextEncoding::UTF8, + ['ctx' => 'in tests', 'filePath' => realpath(self::TEXT_FIXTURES_PATH . '/mispelling3.txt')] + ), + new Text( + "mispelling2\n", + TextEncoding::ASCII, + ['ctx' => 'in tests', 'filePath' => realpath(self::TEXT_FIXTURES_PATH . '/mispelling2.txt')] + ), + new Text( + "mispelling4\n", + TextEncoding::ASCII, + [ + 'ctx' => 'in tests', + 'filePath' => realpath(self::TEXT_FIXTURES_PATH . '/SubDirectory/mispelling4.txt'), + ] + ), + ]; + $realValues = iterator_to_array($textsFromDirectory); + + foreach ($expectedValues as $value) { + $this->assertTrue(in_array($value, $realValues)); + } + } + + public function testToTextsMatchingRegex() + { + $textsFromDirectory = (new Directory(self::TEXT_FIXTURES_PATH, '/^((?!mispelling3\.txt).)*$/')) + ->toTexts(['ctx' => 'in tests']); + + $expectedValues = [ + new Text( + "mispelling2\n", + TextEncoding::ASCII, + ['ctx' => 'in tests', 'filePath' => realpath(self::TEXT_FIXTURES_PATH . '/mispelling2.txt')] + ), + new Text("mispelling4\n", TextEncoding::ASCII, [ + 'ctx' => 'in tests', + 'filePath' => realpath(self::TEXT_FIXTURES_PATH . '/SubDirectory/mispelling4.txt'), + ]), + ]; + $realValues = iterator_to_array($textsFromDirectory); + + foreach ($expectedValues as $value) { + $this->assertTrue(in_array($value, $realValues)); + } + } +} diff --git a/tests/PhpSpellcheck/Tests/Source/FileTest.php b/tests/PhpSpellcheck/Tests/Source/FileTest.php new file mode 100644 index 0000000..5768256 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Source/FileTest.php @@ -0,0 +1,58 @@ +toTexts(['ctx' => 'in tests']); + $this->assertEquals( + [ + new Text( + "mispelling1\n", + TextEncoding::ASCII, + [ + 'ctx' => 'in tests', + 'filePath' => realpath(self::TEXT_FIXTURE_FILE_PATH), + ] + ), + ], + $texts + ); + } + + public function testInvalidPath() + { + $this->expectException(FilesystemException::class); + (new File('invalidPath'))->toTexts(); + } + + public function testToTextsWithEncoding() + { + $texts = (new File(self::TEXT_FIXTURE_FILE_PATH, TextEncoding::UTF8))->toTexts(['ctx' => 'in tests']); + $this->assertEquals( + [ + new Text( + "mispelling1\n", + TextEncoding::UTF8, + [ + 'ctx' => 'in tests', + 'filePath' => realpath(self::TEXT_FIXTURE_FILE_PATH), + ] + ), + ], + $texts + ); + } +} diff --git a/tests/PhpSpellcheck/Tests/Source/MultipleSourceTest.php b/tests/PhpSpellcheck/Tests/Source/MultipleSourceTest.php new file mode 100644 index 0000000..2d17028 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Source/MultipleSourceTest.php @@ -0,0 +1,62 @@ +generateMockText('mispelling1', ['ctx' => null]); + $mockText1AfterContextMerge = $this->generateMockText('mispelling1AfterMerge', ['ctx' => 'merged']); + $mockText1->method('mergeContext') + ->willReturn($mockText1AfterContextMerge); + $mockText2 = $this->generateMockText('mispelling2'); + $mockText2->method('mergeContext') + ->willReturn($mockText2); + $mockSource1 = $this->generateMockSource([$mockText1]); + $mockSource2 = $this->generateMockSource([$mockText2]); + + $source = new MultipleSource( + [ + $mockSource1, + $mockSource2, + ] + ); + + $expectedTexts = [$mockText1AfterContextMerge, $mockText2]; + + $this->assertSame($expectedTexts, iterator_to_array($source->toTexts())); + } + + private function generateMockText(string $content, array $context = []) + { + $textMock = $this->createMock(TextInterface::class); + $textMock->method('getContext') + ->willReturn($context); + $textMock->method('getEncoding') + ->willReturn(TextEncoding::UTF8); + $textMock->method('getContent') + ->willReturn($content); + + return $textMock; + } + + private function generateMockSource(array $texts) + { + $sourceMock = $this->createMock(SourceInterface::class); + $sourceMock->expects($this->once()) + ->method('toTexts') + ->willReturn($texts); + + return $sourceMock; + } +} diff --git a/tests/PhpSpellcheck/Tests/Spellchecker/AspellTest.php b/tests/PhpSpellcheck/Tests/Spellchecker/AspellTest.php new file mode 100644 index 0000000..b73a2e3 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Spellchecker/AspellTest.php @@ -0,0 +1,101 @@ +assertWorkingSpellcheck(self::FAKE_BINARIES_PATH); + } + + public function testBadCheckRequest() + { + $this->expectException(ProcessHasErrorOutputException::class); + Aspell::create(IspellTest::FAKE_BAD_BINARIES_PATH)->check('bla'); + } + + public function testGetSupportedLanguagesFromFakeBinaries() + { + $this->assertWorkingSupportedLanguages(self::FAKE_BINARIES_PATH); + } + + /** + * @group integration + */ + public function testSpellcheckFromRealBinaries() + { + $this->assertWorkingSpellcheck(self::realBinaryPath()); + } + + /** + * @group integration + */ + public function testGetSupportedLanguagesFromRealBinaries() + { + $this->assertWorkingSupportedLanguages(self::realBinaryPath()); + } + + public function getTextInput() + { + return TextTest::CONTENT_STUB; + } + + public function getFakeDicts() + { + return explode(PHP_EOL, file_get_contents(__DIR__ . '/../Fixtures/Aspell/dicts.txt')); + } + + private function assertWorkingSpellcheck($binaries) + { + $aspell = new Aspell(new CommandLine($binaries)); + /** @var Misspelling[] $misspellings */ + $misspellings = iterator_to_array( + $aspell->check( + $this->getTextInput(), + ['en_US'], + ['ctx'], + TextEncoding::UTF8 + ) + ); + + $this->assertSame(['ctx'], $misspellings[0]->getContext()); + $this->assertSame('Tigr', $misspellings[0]->getWord()); + $this->assertSame(0, $misspellings[0]->getOffset()); + $this->assertSame(1, $misspellings[0]->getLineNumber()); + $this->assertNotEmpty($misspellings[0]->getSuggestions()); + + $this->assertSame(['ctx'], $misspellings[1]->getContext()); + $this->assertSame('theforests', $misspellings[1]->getWord()); + $this->assertSame(3, $misspellings[1]->getOffset()); + $this->assertSame(2, $misspellings[1]->getLineNumber()); + $this->assertNotEmpty($misspellings[1]->getSuggestions()); + } + + public function assertWorkingSupportedLanguages($binaries) + { + $aspell = new Aspell(new CommandLine($binaries)); + $this->assertNotFalse(array_search('en_GB', $aspell->getSupportedLanguages())); + } + + public static function realBinaryPath(): string + { + if (getenv('ASPELL_BINARY_PATH') === false) { + throw new \RuntimeException('"ASPELL_BINARY_PATH" env must be set to find the executable to run tests on'); + } + + return getenv('ASPELL_BINARY_PATH'); + } +} diff --git a/tests/PhpSpellcheck/Tests/Spellchecker/HunspellTest.php b/tests/PhpSpellcheck/Tests/Spellchecker/HunspellTest.php new file mode 100644 index 0000000..8e0ad12 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Spellchecker/HunspellTest.php @@ -0,0 +1,109 @@ +assertWorkingSpellcheck(self::FAKE_BINARIES_PATH); + } + + public function testGetSupportedLanguagesFromFakeBinaries() + { + $this->assertWorkingSupportedLanguages(self::FAKE_BINARIES_PATH); + } + + public function testBadCheckRequest() + { + $this->expectException(ProcessHasErrorOutputException::class); + (new Hunspell(new CommandLine(IspellTest::FAKE_BAD_BINARIES_PATH)))->check('bla'); + } + + /** + * @group integration + */ + public function testSpellcheckFromRealBinariesLanguage() + { + $hunspell = new Hunspell(new CommandLine(self::realBinaryPath())); + $misspellings = iterator_to_array($hunspell->check('mispell', ['en_US'])); + $this->assertInstanceOf(Misspelling::class, $misspellings[0]); + } + + /** + * @group integration + */ + public function testSpellcheckFromRealBinaries() + { + $this->assertWorkingSpellcheck(self::realBinaryPath()); + } + + /** + * @group integration + */ + public function testGetSupportedLanguagesFromRealBinaries() + { + $this->assertWorkingSupportedLanguages(self::realBinaryPath()); + } + + public function getTextInput() + { + return TextTest::CONTENT_STUB; + } + + public function getFakeDicts() + { + return explode(PHP_EOL, file_get_contents(__DIR__ . '/../Fixtures/Hunspell/dicts.txt')); + } + + + /** + * @param string|array $binaries + */ + private function assertWorkingSpellcheck($binaries) + { + $hunspell = new Hunspell(new CommandLine($binaries)); + /** @var Misspelling[] $misspellings */ + $misspellings = iterator_to_array($hunspell->check($this->getTextInput(), ['en_US'], ['ctx'])); + + $this->assertSame(['ctx'], $misspellings[0]->getContext()); + $this->assertSame('Tigr', $misspellings[0]->getWord()); + $this->assertSame(0, $misspellings[0]->getOffset()); + $this->assertSame(1, $misspellings[0]->getLineNumber()); + $this->assertNotEmpty($misspellings[0]->getSuggestions()); + + $this->assertSame(['ctx'], $misspellings[1]->getContext()); + $this->assertSame('страх', $misspellings[1]->getWord()); + $this->assertSame(21, $misspellings[1]->getOffset()); + $this->assertSame(1, $misspellings[1]->getLineNumber()); + } + + /** + * @param string|array $binaries + */ + public function assertWorkingSupportedLanguages($binaries) + { + $hunspell = new Hunspell(new CommandLine($binaries)); + $this->assertNotFalse(array_search('en_US', $hunspell->getSupportedLanguages())); + } + + public static function realBinaryPath(): string + { + if (getenv('HUNSPELL_BINARY_PATH') === false) { + throw new \RuntimeException('"HUNSPELL_BINARY_PATH" env must be set to find the executable to run tests on'); + } + + return getenv('HUNSPELL_BINARY_PATH'); + } +} diff --git a/tests/PhpSpellcheck/Tests/Spellchecker/IspellTest.php b/tests/PhpSpellcheck/Tests/Spellchecker/IspellTest.php new file mode 100644 index 0000000..bdb7a3a --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Spellchecker/IspellTest.php @@ -0,0 +1,114 @@ +assertWorkingSpellcheck(self::FAKE_BINARIES_PATH); + } + + public function testGetSupportedLanguagesFromFakeBinaries() + { + $this->assertWorkingSupportedLanguages(self::FAKE_BINARIES_PATH, self:: FAKE_BINARIES_PATH); + } + + public function testBadCheckRequest() + { + $this->expectException(ProcessHasErrorOutputException::class); + Ispell::create(self::FAKE_BAD_BINARIES_PATH)->check('bla'); + } + + /** + * @group integration + */ + public function testSpellcheckFromRealBinaries() + { + $this->assertWorkingSpellcheck(self::realBinaryPath()); + } + + /** + * @group integration + */ + public function testGetSupportedLanguagesFromRealBinaries() + { + $this->assertWorkingSupportedLanguages(self::realBinaryPath(), self::realShellPath()); + } + + public function getTextInput() + { + return TextTest::CONTENT_STUB; + } + + public function getFakeDicts() + { + return explode(PHP_EOL, file_get_contents(__DIR__ . '/../Fixtures/Ispell/dicts.txt')); + } + + private function assertWorkingSpellcheck($binaries) + { + $ispell = new Ispell(new CommandLine($binaries)); + /** @var Misspelling[] $misspellings */ + $misspellings = iterator_to_array( + $ispell->check( + $this->getTextInput(), + ['american'], + ['ctx'], + TextEncoding::UTF8 + ) + ); + + $this->assertSame($misspellings[0]->getContext(), ['ctx']); + $this->assertSame($misspellings[0]->getWord(), 'Tigr'); + $this->assertSame($misspellings[0]->getOffset(), 0); + $this->assertSame($misspellings[0]->getLineNumber(), 1); + $this->assertNotEmpty($misspellings[0]->getSuggestions()); + + $this->assertSame($misspellings[1]->getContext(), ['ctx']); + $this->assertSame($misspellings[1]->getWord(), 'theforests'); + $this->assertSame($misspellings[1]->getOffset(), 3); + $this->assertSame($misspellings[1]->getLineNumber(), 2); + $this->assertNotEmpty($misspellings[1]->getSuggestions()); + } + + public function assertWorkingSupportedLanguages($binaries, $shellEntryPoint = null) + { + $ispell = new Ispell( + new CommandLine($binaries), + $shellEntryPoint !== null ? new CommandLine($shellEntryPoint) : null + ); + $this->assertNotFalse(array_search('american', $ispell->getSupportedLanguages())); + } + + public static function realBinaryPath(): string + { + if (getenv('ISPELL_BINARY_PATH') === false) { + throw new \RuntimeException('"ISPELL_BINARY_PATH" env must be set to find the executable to run tests on'); + } + + return getenv('ISPELL_BINARY_PATH'); + } + + public static function realShellPath(): ?string + { + if (getenv('ISPELL_SHELL_PATH') === false) { + throw new \RuntimeException('"ISPELL_SHELL_PATH" env must be set to find the executable to run tests on'); + } + + return getenv('ISPELL_SHELL_PATH') ? getenv('ISPELL_SHELL_PATH') : null; + } +} diff --git a/tests/PhpSpellcheck/Tests/Spellchecker/LanguageToolTest.php b/tests/PhpSpellcheck/Tests/Spellchecker/LanguageToolTest.php new file mode 100644 index 0000000..7349050 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Spellchecker/LanguageToolTest.php @@ -0,0 +1,145 @@ +getMockBuilder(LanguageToolApiClient::class) + ->disableOriginalConstructor() + ->getMock(); + + return $mock; + } + + public function testSpellcheck() + { + $client = $this->getClientMock(); + $client->expects($this->once()) + ->method('spellCheck') + ->willReturn([ + 'matches' => [ + [ + 'message' => 'Possible spelling mistake found', + 'replacements' => [['value' => 'Tier']], + 'offset' => 0, + 'length' => 4, + 'context' => [ + 'text' => 'Tigr, tiger, burning страх. In theforests of...', + 'offset' => 0, + 'length' => 4, + ], + 'sentence' => 'Tigr, tiger, burning страх.', + 'rule' => [ + 'description' => 'Possible spelling mistake', + 'issueType' => 'misspelling', + ], + ], + [ + 'message' => 'Possible spelling mistake found', + 'replacements' => [['value' => 'Could']], + 'offset' => 81, + 'length' => 6, + 'context' => [ + 'text' => '... of the night, What imortal hand or eey CCould frame thy fearful symmetry?', + 'offset' => 43, + 'length' => 6, + ], + 'sentence' => "In theforests of the night,\nWhat imortal hand or eey\nCCould frame thy fearful symmetry?", + 'rule' => [ + 'description' => 'Possible spelling mistake', + 'issueType' => 'misspelling', + ], + ], + ], + ]); + + $this->assertWorkingSpellcheck($client); + } + + public function testGetSupportedLanguages() + { + $client = $this->getClientMock(); + $client->expects($this->once()) + ->method('getSupportedLanguages') + ->willReturn(['en']); + + $this->assertWorkingSupportedLanguages($client); + } + + /** + * @group integration + */ + public function testSpellcheckFromRealAPI() + { + $this->assertWorkingSpellcheck(new LanguageToolApiClient(self::realAPIEndpoint())); + } + + /** + * @group integration + */ + public function testGetSupportedLanguagesFromRealBinaries() + { + $this->assertWorkingSupportedLanguages(new LanguageToolApiClient(self::realAPIEndpoint())); + } + + public function getTextInput() + { + return TextTest::CONTENT_STUB; + } + + private function assertWorkingSpellcheck(LanguageToolApiClient $apiClient) + { + $languageTool = new LanguageTool($apiClient); + /** @var Misspelling[] $misspellings */ + $misspellings = iterator_to_array( + $languageTool->check( + $this->getTextInput(), + ['en-US'], + ['ctx' => 'ctx'], + TextEncoding::UTF8 + ) + ); + + // test first line offset computation + $this->assertArrayHasKey('ctx', $misspellings[0]->getContext()); + $this->assertSame($misspellings[0]->getWord(), 'Tigr'); + $this->assertSame($misspellings[0]->getOffset(), 0); + $this->assertSame($misspellings[0]->getLineNumber(), 1); + $this->assertNotEmpty($misspellings[0]->getSuggestions()); + + end($misspellings); + $lastKey = key($misspellings); + // test last line offset computation + $this->assertArrayHasKey('ctx', $misspellings[$lastKey]->getContext()); + $this->assertSame($misspellings[$lastKey]->getWord(), 'CCould'); + $this->assertSame($misspellings[$lastKey]->getOffset(), 1); + $this->assertSame($misspellings[$lastKey]->getLineNumber(), 4); + $this->assertNotEmpty($misspellings[$lastKey]->getSuggestions()); + } + + public function assertWorkingSupportedLanguages(LanguageToolApiClient $apiClient) + { + $languageTool = new LanguageTool($apiClient); + $this->assertNotFalse(array_search('en', $languageTool->getSupportedLanguages())); + } + + private static function realAPIEndpoint(): string + { + if (getenv('LANGUAGETOOLS_ENDPOINT') === false) { + throw new \RuntimeException('"LANGUAGETOOLS_ENDPOINT" env must be set to run the tests on'); + } + + return getenv('LANGUAGETOOLS_ENDPOINT'); + } +} diff --git a/tests/PhpSpellcheck/Tests/Spellchecker/MultipleSpellCheckersTest.php b/tests/PhpSpellcheck/Tests/Spellchecker/MultipleSpellCheckersTest.php new file mode 100644 index 0000000..952b1e7 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Spellchecker/MultipleSpellCheckersTest.php @@ -0,0 +1,84 @@ +createMock(SpellcheckerInterface::class); + $misspelling1 = new Misspelling('mispelling1', 1); + $misspelling2a = new Misspelling('mispelling2', 2, 2, ['suggestionA']); + $misspelling2b = new Misspelling('mispelling2', 2, 2, ['suggestionB']); + + $spellChecker1->method('check') + ->willReturn([$misspelling1, $misspelling2a]); + $spellChecker2 = $this->createMock(SpellcheckerInterface::class); + $misspelling3 = new Misspelling('mispelling3', 3, 3); + $spellChecker2->method('check') + ->willReturn([$misspelling2b, $misspelling3]); + + $multiSpellchecker = new MultiSpellchecker([$spellChecker1, $spellChecker2]); + + $misspellings = $multiSpellchecker->check('test', ['en'], ['ctx'], TextEncoding::UTF8); + $this->assertEquals( + [ + $misspelling1->setContext(['ctx']), + new Misspelling('mispelling2', 2, 2, ['suggestionA', 'suggestionB'], ['ctx']), + $misspelling3->setContext(['ctx']), + ], + $misspellings + ); + } + + public function testCheckAndNotMergeSuggestions() + { + $spellChecker1 = $this->createMock(SpellcheckerInterface::class); + $misspelling1 = new Misspelling('mispelling1', 1); + $misspelling2a = new Misspelling('mispelling2', 2, 2, ['suggestionA']); + $misspelling2b = new Misspelling('mispelling2', 2, 2, ['suggestionB']); + + $spellChecker1->method('check') + ->willReturn([$misspelling1, $misspelling2a]); + $spellChecker2 = $this->createMock(SpellcheckerInterface::class); + $misspelling3 = new Misspelling('mispelling3', 3, 3); + $spellChecker2->method('check') + ->willReturn([$misspelling2b, $misspelling3]); + + $multiSpellchecker = new MultiSpellchecker([$spellChecker1, $spellChecker2], false); + + $misspellings = $multiSpellchecker->check('test', ['en'], ['ctx'], TextEncoding::UTF8); + $this->assertEquals( + [ + $misspelling1->setContext(['ctx']), + $misspelling2a->setContext(['ctx']), + $misspelling2b->setContext(['ctx']), + $misspelling3->setContext(['ctx']), + ], + iterator_to_array($misspellings) + ); + } + + public function testGetSupportedLanguages() + { + $spellChecker1 = $this->createMock(SpellcheckerInterface::class); + $spellChecker1->method('getSupportedLanguages') + ->willReturn(['en', 'fr']); + $spellChecker2 = $this->createMock(SpellcheckerInterface::class); + $spellChecker2->method('getSupportedLanguages') + ->willReturn(['fr', 'ru']); + + $multipleSpellchecker = new MultiSpellchecker([$spellChecker1, $spellChecker2]); + + $this->assertSame(['en', 'fr', 'ru'], $multipleSpellchecker->getSupportedLanguages()); + } +} diff --git a/tests/PhpSpellcheck/Tests/Spellchecker/PHPPspellTest.php b/tests/PhpSpellcheck/Tests/Spellchecker/PHPPspellTest.php new file mode 100644 index 0000000..d2ae524 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Spellchecker/PHPPspellTest.php @@ -0,0 +1,50 @@ +check('mispell', ['en'], ['ctx'])); + + $this->assertSame($misspellings[0]->getContext(), ['ctx']); + $this->assertSame($misspellings[0]->getWord(), 'mispell'); + $this->assertSame($misspellings[0]->getOffset(), 0); + $this->assertSame($misspellings[0]->getLineNumber(), 1); + $this->assertNotEmpty($misspellings[0]->getSuggestions()); + } + + public function testCheckWithoutEnconding() + { + $this->expectException(\InvalidArgumentException::class); + $pspell = new PHPPspell(PSPELL_FAST); + /** @var Misspelling[] $misspellings */ + iterator_to_array($pspell->check('mispell', ['en'], ['ctx'], null)); + } + + public function testGetSupportedLanguages() + { + $this->expectException(LogicException::class); + $pspell = new PHPPspell(PSPELL_FAST); + $pspell->getSupportedLanguages(); + } +} diff --git a/tests/PhpSpellcheck/Tests/TextProcessor/MarkdownRemoverTest.php b/tests/PhpSpellcheck/Tests/TextProcessor/MarkdownRemoverTest.php new file mode 100644 index 0000000..84de808 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/TextProcessor/MarkdownRemoverTest.php @@ -0,0 +1,236 @@ +assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldLeaveNonMatchingMarkdownMarkdown() + { + $string = "*Javascript* developers* are the _best_."; + $expected = "Javascript developers* are the best."; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldLeaveNonMatchingMarkdownButStripEmptyAnchors() + { + $string = "*Javascript* [developers]()* are the _best_."; + $expected = "Javascript developers* are the best."; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldStripHtml() + { + $string = "

Hello World

"; + $expected = "Hello World"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldStripAnchors() + { + $string = "*Javascript* [developers](https://engineering.condenast.io/)* are the _best_."; + $expected = "Javascript developers* are the best."; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldStripImgTags() + { + $string = "![](https://placebear.com/640/480)*Javascript* developers are the _best_."; + $expected = "Javascript developers are the best."; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldUseTheAltTextOfAnImageIfItIsProvided() + { + $string = "![This is the alt-text](https://www.example.com/images/logo.png)"; + $expected = "This is the alt-text"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldStripCodeTags() + { + $string = "In `Getting Started` we set up `something` foo."; + $expected = "In Getting Started we set up something foo."; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldLeaveHashtagsInHeadings() + { + $string = "## This #heading contains #hashtags"; + $expected = "This #heading contains #hashtags"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveHeadingsTrailingHashtags() + { + $string = "## This #heading contains #hashtags ##"; + $expected = "This #heading contains #hashtags"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveHeadingsHashtags() + { + $string = "## This #heading contains #hashtags"; + $expected = "This #heading contains #hashtags"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveEmphasis() + { + $string = "I italicized an *I* and it _made_ me *sad*."; + $expected = "I italicized an I and it made me sad."; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveDoubleEmphasis() + { + $string = "**this sentence has __double styling__**"; + $expected = "this sentence has double styling"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveHorizontalRules() + { + $string = "Some text on a line\n\n---\n\nA line below"; + $expected = "Some text on a line\n\n\n\n\nA line below"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveHorizontalRulesAndReplaceByAtLeastABreakLine() + { + $string = "Some text on a line\n---\nA line below"; + $expected = "Some text on a line\n\n\nA line below"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveHorizontalRulesWithSpaceSeparatedAsterisks() + { + $string = "Some text on a line\n\n* * *\n\nA line below"; + $expected = "Some text on a line\n\n\n\n\nA line below"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveBlockquotes() + { + $string = ">I am a blockquote"; + $expected = "I am a blockquote"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveBlockquotesWithSpaces() + { + $string = "> I am a blockquote"; + $expected = "I am a blockquote"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldRemoveIndentedBlockquotes() + { + $string = " > I am a blockquote"; + $expected = "I am a blockquote"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + + $string = " > I am a blockquote"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldNotRemoveGreaterThanSigns() + { + $tests = [ + ['string' => '100 > 0', 'expected' => '100 > 0'], + ['string' => '100 >= 0', 'expected' => '100 >= 0'], + ['string' => '100>0', 'expected' => '100>0'], + ['string' => '> 100 > 0', 'expected' => '100 > 0'], + ['string' => '1 < 100', 'expected' => '1 < 100'], + ['string' => '1 <= 100', 'expected' => '1 <= 100'], + ]; + + foreach ($tests as $test) { + $this->assertSame( + $test['expected'], + (new MarkdownRemover())->process(Text::utf8($test['string']))->getContent() + ); + } + } + + public function testShouldStripUnorderedListLeaders() + { + $string = "Some text on a line\n\n* A list Item\n* Another list item"; + $expected = "Some text on a line\n\nA list Item\nAnother list item"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldStripOrderedListLeaders() + { + $string = "Some text on a line\n\n* A list Item\n* Another list item"; + $expected = "Some text on a line\n\nA list Item\nAnother list item"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldStripOrderedListLeadersKeepingIndentation() + { + $string = "Some text on a line\n\n* A list Item\n * Another list item"; + $expected = "Some text on a line\n\n A list Item\n Another list item"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldStripCodeBlocks() + { + $string = <<assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } + + public function testShouldHandleParagraphsWithMarkdown() + { + $string = "\n## This is a heading ##\n\nThis is a paragraph with [a link](http://www.disney.com/).\n\n### This is another heading\n\nIn `Getting Started` we set up `something` foo.\n\n * Some list\n * With items\n * Even indented"; + $expected = "\nThis is a heading\n\nThis is a paragraph with a link.\n\nThis is another heading\n\nIn Getting Started we set up something foo.\n\n Some list\n With items\n Even indented"; + + $this->assertSame($expected, (new MarkdownRemover())->process(Text::utf8($string))->getContent()); + } +} diff --git a/tests/PhpSpellcheck/Tests/TextTest.php b/tests/PhpSpellcheck/Tests/TextTest.php new file mode 100644 index 0000000..ffa812b --- /dev/null +++ b/tests/PhpSpellcheck/Tests/TextTest.php @@ -0,0 +1,39 @@ + '1'])->mergeContext(['idx' => 'foo', 'idx2' => '2']); + + $this->assertEquals(Text::utf8('test', ['idx' => 'foo', 'idx2' => '2']), $text); + } + + public function testContextNonOverridingMerge() + { + $text = Text::utf8('test', ['idx' => '1'])->mergeContext(['idx' => 'foo', 'idx2' => '2'], false); + + $this->assertEquals(Text::utf8('test', ['idx' => '1', 'idx2' => '2']), $text); + } + + public function testExceptionWhenMergingEmptyContext() + { + $this->expectException(InvalidArgumentException::class); + Text::utf8('test', ['idx' => '1'])->mergeContext([]); + } +} diff --git a/tests/PhpSpellcheck/Tests/Utils/CommandLineTest.php b/tests/PhpSpellcheck/Tests/Utils/CommandLineTest.php new file mode 100644 index 0000000..e37d3c6 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Utils/CommandLineTest.php @@ -0,0 +1,30 @@ +assertInstanceOf(CommandLine::class, new CommandLine('ls')); + $this->assertInstanceOf(CommandLine::class, new CommandLine(['ls'])); + } + + public function testCreateWithInvalidArgument() + { + $this->expectException(InvalidArgumentException::class); + new CommandLine(4); + } + + public function testAsString() + { + $this->assertSame("'ls' '-lsa'", (new CommandLine(['ls', '-lsa']))->asString()); + } +} diff --git a/tests/PhpSpellcheck/Tests/Utils/SortedNumericArrayNearestValueFinderTest.php b/tests/PhpSpellcheck/Tests/Utils/SortedNumericArrayNearestValueFinderTest.php new file mode 100644 index 0000000..da9e2a3 --- /dev/null +++ b/tests/PhpSpellcheck/Tests/Utils/SortedNumericArrayNearestValueFinderTest.php @@ -0,0 +1,111 @@ +assertSame(0, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 1337, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_HIGHER + ); + $this->assertSame(1, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 2000, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_LOWER + ); + $this->assertSame(1, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 1500, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_DEFAULT + ); + $this->assertSame(1, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 1337, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_DEFAULT + ); + $this->assertSame(1, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 4000, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_DEFAULT + ); + $this->assertSame(2, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 4000, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_HIGHER + ); + $this->assertSame(2, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 50, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_DEFAULT + ); + $this->assertSame(0, $foundNearestLowerIndex); + + $foundNearestLowerIndex = SortedNumericArrayNearestValueFinder::findIndex( + 50, + [1000, 2000, 3000], + SortedNumericArrayNearestValueFinder::FIND_LOWER + ); + $this->assertSame(0, $foundNearestLowerIndex); + } + + public function testFindEmptyArray() + { + $this->expectException(\InvalidArgumentException::class); + + SortedNumericArrayNearestValueFinder::findIndex( + 1500, + [], + SortedNumericArrayNearestValueFinder::FIND_DEFAULT + ); + } + + public function testFindInNonIntArray() + { + $this->expectException(\InvalidArgumentException::class); + + SortedNumericArrayNearestValueFinder::findIndex( + 1500, + ['foo'], + SortedNumericArrayNearestValueFinder::FIND_DEFAULT + ); + } + + public function testFindInMixedTypedArray() + { + $this->expectException(\InvalidArgumentException::class); + + SortedNumericArrayNearestValueFinder::findIndex( + 1500, + ['foo', 1, 'bar'], + SortedNumericArrayNearestValueFinder::FIND_DEFAULT + ); + } +}