diff --git a/.gitignore b/.gitignore index c6e172a5bd..adc8e5163d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ target/ # ... but not code generation targets !tool/src/org/antlr/v4/codegen/target/ -# Node.js (npm and typings) cached dependencies +# Node.js (npm and typings) cached dependencies node_modules/ typings/ diff --git a/.travis.yml b/.travis.yml index cae5dae3df..aac274bebb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,208 +2,236 @@ sudo: true language: java +branches: + only: + - rust-target + before_cache: - rm -rf $HOME/.m2/repository/org/antlr cache: timeout: 600 directories: - - $HOME/.m2 - - $HOME/Library/Caches/Antlr4 - - $HOME/Library/Caches/Homebrew + - $HOME/.m2 + - $HOME/Library/Caches/Antlr4 + - $HOME/Library/Caches/Homebrew + cargo: true stages: - smoke-test - - main-test - - extended-test + # - main-test + # - extended-test + - deploy -matrix: +jobs: include: - os: linux dist: trusty - compiler: clang jdk: openjdk8 - env: - - TARGET=cpp - - CXX=g++-5 - - GROUP=LEXER - stage: main-test - addons: - apt: - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.7 - packages: - - g++-5 - - uuid-dev - - clang-3.7 - - os: linux - dist: trusty - compiler: clang - jdk: openjdk8 - env: - - TARGET=cpp - - CXX=g++-5 - - GROUP=PARSER - stage: main-test - addons: - apt: - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.7 - packages: - - g++-5 - - uuid-dev - - clang-3.7 - - os: linux - dist: trusty - compiler: clang - jdk: openjdk8 - env: - - TARGET=cpp - - CXX=g++-5 - - GROUP=RECURSION - stage: main-test - addons: - apt: - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.7 - packages: - - g++-5 - - uuid-dev - - clang-3.7 - - os: osx - compiler: clang - osx_image: xcode10.2 - env: - - TARGET=cpp - - GROUP=LEXER - stage: extended-test - - os: osx - compiler: clang - osx_image: xcode10.2 - env: - - TARGET=cpp - - GROUP=PARSER - stage: extended-test - - os: osx - compiler: clang - osx_image: xcode10.2 - env: - - TARGET=cpp - - GROUP=RECURSION - stage: extended-test - - os: osx - compiler: clang - osx_image: xcode10.2 - env: - - TARGET=swift - - GROUP=LEXER - stage: main-test - - os: osx - compiler: clang - osx_image: xcode10.2 - env: - - TARGET=swift - - GROUP=PARSER - stage: main-test - - os: osx - compiler: clang - osx_image: xcode10.2 - env: - - TARGET=swift - - GROUP=RECURSION - stage: main-test - - os: linux - dist: xenial - compiler: clang - env: - - TARGET=swift - - GROUP=ALL - stage: extended-test - - os: osx - osx_image: xcode10.2 - env: - - TARGET=dotnet - - GROUP=LEXER - stage: extended-test - - os: osx - osx_image: xcode10.2 - env: - - TARGET=dotnet - - GROUP=PARSER - stage: extended-test - - os: osx - osx_image: xcode10.2 - env: - - TARGET=dotnet - - GROUP=RECURSION - stage: extended-test - - os: linux - dist: trusty - jdk: openjdk7 - env: TARGET=java - stage: extended-test + cache: cargo + env: TARGET=rust + stage: smoke-test + - script: skip + before_install: skip + stage: deploy + deploy: + provider: releases + token: $GITHUB_TOKEN + repo: rrevenantt/antlr4rust + file: tool/target/antlr4-4.8-2-SNAPSHOT-complete.jar + tag_name: antlr4-4.8-2-Rust-0.2 + overwrite: true + cleanup: false + prerelease: false + edge: true + on: + tags: false + branch: rust-target + # - os: linux + # dist: trusty + # compiler: clang + # jdk: openjdk8 + # env: + # - TARGET=cpp + # - CXX=g++-5 + # - GROUP=LEXER + # stage: main-test + # addons: + # apt: + # sources: + # - ubuntu-toolchain-r-test + # - llvm-toolchain-precise-3.7 + # packages: + # - g++-5 + # - uuid-dev + # - clang-3.7 + # - os: linux + # dist: trusty + # compiler: clang + # jdk: openjdk8 + # env: + # - TARGET=cpp + # - CXX=g++-5 + # - GROUP=PARSER + # stage: main-test + # addons: + # apt: + # sources: + # - ubuntu-toolchain-r-test + # - llvm-toolchain-precise-3.7 + # packages: + # - g++-5 + # - uuid-dev + # - clang-3.7 + # - os: linux + # dist: trusty + # compiler: clang + # jdk: openjdk8 + # env: + # - TARGET=cpp + # - CXX=g++-5 + # - GROUP=RECURSION + # stage: main-test + # addons: + # apt: + # sources: + # - ubuntu-toolchain-r-test + # - llvm-toolchain-precise-3.7 + # packages: + # - g++-5 + # - uuid-dev + # - clang-3.7 + # - os: osx + # compiler: clang + # osx_image: xcode10.2 + # env: + # - TARGET=cpp + # - GROUP=LEXER + # stage: extended-test + # - os: osx + # compiler: clang + # osx_image: xcode10.2 + # env: + # - TARGET=cpp + # - GROUP=PARSER + # stage: extended-test + # - os: osx + # compiler: clang + # osx_image: xcode10.2 + # env: + # - TARGET=cpp + # - GROUP=RECURSION + # stage: extended-test + # - os: osx + # compiler: clang + # osx_image: xcode10.2 + # env: + # - TARGET=swift + # - GROUP=LEXER + # stage: main-test + # - os: osx + # compiler: clang + # osx_image: xcode10.2 + # env: + # - TARGET=swift + # - GROUP=PARSER + # stage: main-test + # - os: osx + # compiler: clang + # osx_image: xcode10.2 + # env: + # - TARGET=swift + # - GROUP=RECURSION + # stage: main-test + # - os: linux + # dist: xenial + # compiler: clang + # env: + # - TARGET=swift + # - GROUP=ALL + # stage: extended-test + # - os: osx + # osx_image: xcode10.2 + # env: + # - TARGET=dotnet + # - GROUP=LEXER + # stage: extended-test + # - os: osx + # osx_image: xcode10.2 + # env: + # - TARGET=dotnet + # - GROUP=PARSER + # stage: extended-test + # - os: osx + # osx_image: xcode10.2 + # env: + # - TARGET=dotnet + # - GROUP=RECURSION + # stage: extended-test + # - os: linux + # dist: trusty + # jdk: openjdk7 + # env: TARGET=java + # stage: extended-test - os: linux jdk: openjdk8 env: TARGET=java stage: smoke-test - - os: linux - jdk: openjdk8 - env: TARGET=csharp - stage: main-test - - os: linux - language: php - php: - - 7.2 - jdk: openjdk8 - env: TARGET=php - stage: main-test - - os: linux - jdk: openjdk8 - dist: trusty - env: - - TARGET=dotnet - - GROUP=LEXER - stage: extended-test - - os: linux - jdk: openjdk8 - dist: trusty - env: - - TARGET=dotnet - - GROUP=PARSER - stage: extended-test - - os: linux - jdk: openjdk8 - dist: trusty - env: - - TARGET=dotnet - - GROUP=RECURSION - stage: extended-test - - os: linux - jdk: openjdk8 - env: TARGET=python2 - stage: main-test - - os: linux - jdk: openjdk8 - env: TARGET=python3 - addons: - apt: - sources: - - deadsnakes # source required so it finds the package definition below - packages: - - python3.7 - stage: main-test - - os: linux - dist: trusty - jdk: openjdk8 - env: TARGET=javascript - stage: main-test - - os: linux - dist: trusty - jdk: openjdk8 - env: TARGET=go - stage: main-test + # - os: linux + # jdk: openjdk8 + # env: TARGET=csharp + # stage: main-test + # - os: linux + # language: php + # php: + # - 7.2 + # jdk: openjdk8 + # env: TARGET=php + # stage: main-test + # - os: linux + # jdk: openjdk8 + # dist: trusty + # env: + # - TARGET=dotnet + # - GROUP=LEXER + # stage: extended-test + # - os: linux + # jdk: openjdk8 + # dist: trusty + # env: + # - TARGET=dotnet + # - GROUP=PARSER + # stage: extended-test + # - os: linux + # jdk: openjdk8 + # dist: trusty + # env: + # - TARGET=dotnet + # - GROUP=RECURSION + # stage: extended-test +# - os: linux +# jdk: openjdk8 +# env: TARGET=python2 +# stage: main-test +# - os: linux +# jdk: openjdk8 +# env: TARGET=python3 +# addons: +# apt: +# sources: +# - deadsnakes # source required so it finds the package definition below +# packages: +# - python3.7 +# stage: main-test +# - os: linux +# dist: trusty +# jdk: openjdk8 +# env: TARGET=javascript +# stage: main-test +# - os: linux +# dist: trusty +# jdk: openjdk8 +# env: TARGET=go +# stage: smoke-test before_install: - f="./.travis/before-install-$TRAVIS_OS_NAME-$TARGET.sh"; ! [ -x "$f" ] || "$f" @@ -215,3 +243,4 @@ script: rc=$?; cat target/surefire-reports/*.dumpstream || true; exit $rc + diff --git a/.travis/before-install-linux-rust.sh b/.travis/before-install-linux-rust.sh new file mode 100755 index 0000000000..0789550897 --- /dev/null +++ b/.travis/before-install-linux-rust.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -euo pipefail + +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain nightly-2020-12-23 -y +export PATH=$HOME/.cargo/bin:$PATH +( rustc --version ; cargo --version ) || true \ No newline at end of file diff --git a/.travis/run-tests-rust.sh b/.travis/run-tests-rust.sh new file mode 100755 index 0000000000..988979acd0 --- /dev/null +++ b/.travis/run-tests-rust.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -euo pipefail + +export PATH=$HOME/.cargo/bin:$PATH +mvn test -Dtest=rust.* -q diff --git a/antlr4-maven-plugin/pom.xml b/antlr4-maven-plugin/pom.xml index 76e9bd37f3..22dcc63799 100644 --- a/antlr4-maven-plugin/pom.xml +++ b/antlr4-maven-plugin/pom.xml @@ -8,7 +8,7 @@ org.antlr antlr4-master - 4.8-2-SNAPSHOT + 4.8-2-RUSTTARGET1 antlr4-maven-plugin maven-plugin @@ -110,7 +110,7 @@ resources - src/test + src/test/java src/test/resources diff --git a/contributors.txt b/contributors.txt index 8c753a0018..0fb2f28d20 100644 --- a/contributors.txt +++ b/contributors.txt @@ -238,3 +238,4 @@ YYYY/MM/DD, github id, Full name, email 2019/11/17, felixn, Felix Nieuwenhuizhen, felix@tdlrali.com 2019/11/18, mlilback, Mark Lilback, mark@lilback.com 2020/02/02, carocad, Camilo Roca, carocad@unal.edu.co +2020/02/10, rrevenantt, Konstantin Anisimov, rrevenantt[at]gmail.com \ No newline at end of file diff --git a/doc/rust-target.md b/doc/rust-target.md new file mode 100644 index 0000000000..1e7ed5a0a8 --- /dev/null +++ b/doc/rust-target.md @@ -0,0 +1,44 @@ +# ANTLR4 Runtime for Rust + +### First steps + +#### 1. Install ANTLR4 + +[The getting started guide](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md) +should get you started. + +#### 2. Install the Rust ANTLR runtime + +Each target language for ANTLR has a runtime package for running parser +generated by ANTLR4. The runtime provides a common set of tools for using your parser. + +Add antlr-rust and lazy_static dependencies to your `Cargo.toml`: + +```toml +[dependencies] +lazy_static = "1.4" +antlr-rust = "0.1.0" +``` + +#### 3. Generate your parser + +You use the ANTLR4 "tool" to generate a parser. These will reference the ANTLR +runtime, installed above. + +Suppose you're using a UNIX system and have set up an alias for the ANTLR4 tool +as described in [the getting started guide](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md). +To generate your Rust parser, run the following command: + +```bash +antlr4 -Dlanguage=Rust MyGrammar.g4 +``` + +For a full list of antlr4 tool options, please visit the +[tool documentation page](https://github.com/antlr/antlr4/blob/master/doc/tool-options.md). + +### Next + +More information in the Rust target [README](todo) +and in the antlr-rust crate [documentation](https://doc.rs/antlr-rust) + + diff --git a/doc/targets.md b/doc/targets.md index c2341ec486..f2a63689ae 100644 --- a/doc/targets.md +++ b/doc/targets.md @@ -10,6 +10,7 @@ This page lists the available and upcoming ANTLR runtimes. Please note that you * [C++](cpp-target.md) * [Swift](swift-target.md) * [PHP](php-target.md) +* [Rust](rust-target.md) (Unstable) ## Target feature parity diff --git a/pom.xml b/pom.xml index 5286a1a8c2..97ef6a8606 100644 --- a/pom.xml +++ b/pom.xml @@ -13,7 +13,7 @@ org.antlr antlr4-master - 4.8-2-SNAPSHOT + 4.8-2-RUSTTARGET1 pom ANTLR 4 diff --git a/runtime-testsuite/annotations/pom.xml b/runtime-testsuite/annotations/pom.xml index 97bf786495..eac9ac9c50 100644 --- a/runtime-testsuite/annotations/pom.xml +++ b/runtime-testsuite/annotations/pom.xml @@ -9,7 +9,7 @@ org.antlr antlr4-master - 4.8-2-SNAPSHOT + 4.8-2-RUSTTARGET1 ../../pom.xml antlr4-runtime-test-annotations diff --git a/runtime-testsuite/pom.xml b/runtime-testsuite/pom.xml index 33ea5e9f14..79fade9ddd 100644 --- a/runtime-testsuite/pom.xml +++ b/runtime-testsuite/pom.xml @@ -10,7 +10,7 @@ org.antlr antlr4-master - 4.8-2-SNAPSHOT + 4.8-2-RUSTTARGET1 antlr4-runtime-testsuite ANTLR 4 Runtime Tests (2nd generation) @@ -111,6 +111,7 @@ **/csharp/Test*.java **/java/Test*.java + **/rust/Test*.java **/go/Test*.java **/javascript/node/Test*.java **/python2/Test*.java diff --git a/runtime-testsuite/processors/pom.xml b/runtime-testsuite/processors/pom.xml index c417c626d1..92d6b90155 100644 --- a/runtime-testsuite/processors/pom.xml +++ b/runtime-testsuite/processors/pom.xml @@ -9,7 +9,7 @@ org.antlr antlr4-master - 4.8-2-SNAPSHOT + 4.8-2-RUSTTARGET1 ../../pom.xml antlr4-runtime-test-annotation-processors diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Rust.test.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Rust.test.stg new file mode 120000 index 0000000000..3b32ca630b --- /dev/null +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Rust.test.stg @@ -0,0 +1 @@ +../../../../../../../../runtime/Rust/templates/Rust.test.stg \ No newline at end of file diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java index 6134b265b9..5d2f398e82 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java @@ -227,7 +227,7 @@ public static class DelegatorInvokesDelegateRuleWithArgs extends BaseCompositePa /** parser grammar S; - a[int x] returns [int y] : B {} {$y=1000;} ; + a[] returns [] : B {} {$y=1000;} ; */ @CommentHasStringValue public String slaveGrammarS; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java index fb71b38632..3453c1ae01 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java @@ -171,7 +171,7 @@ public static abstract class ExprAmbiguity extends BaseDiagnosticParserTestDescr s @init {} : expr[0] {}; - expr[int _p] + expr[] : ID ( {5 >= $_p}? '*' expr[6] diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java index 01e868d6e1..43f0f66ec9 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java @@ -524,7 +524,7 @@ public static abstract class MultipleAlternativesWithCommonLabel extends BasePar /** grammar T; s : e {}; - e returns [int v] + e returns [] : e '*' e {$v = (0)}, {})> * (1)}, {})>;} # binary | e '+' e {$v = (0)}, {})> + (1)}, {})>;} # binary | INT {$v = $INT.int;} # anInt @@ -672,11 +672,11 @@ public static abstract class ReturnValueAndActionsAndLabels extends BaseParserTe /** grammar T; s : q=e {}; - e returns [int v] + e returns [] : a=e op='*' b=e {$v = $a.v * $b.v;} # mult | a=e '+' b=e {$v = $a.v + $b.v;} # add | INT {$v = $INT.int;} # anInt - | '(' x=e ')' {$v = $x.v;} # parens + | '(' x=e ')' {$v = 0 + $x.v;} # parens | x=e '++' {$v = $x.v+1;} # inc | e '--' # dec | ID {$v = 3;} # anID @@ -822,11 +822,11 @@ public static abstract class ReturnValueAndActions extends BaseParserTestDescrip /** grammar T; s : e {}; - e returns [int v, ignored] + e returns [, ignored] : a=e '*' b=e {$v = $a.v * $b.v;} | a=e '+' b=e {$v = $a.v + $b.v;} | INT {$v = $INT.int;} - | '(' x=e ')' {$v = $x.v;} + | '(' x=e ')' {$v = 0 + $x.v;} ; INT : '0'..'9'+ ; WS : (' '|'\n') -> skip ; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java index bda6fecf3a..816f67b774 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java @@ -44,6 +44,10 @@ public static class AltNum extends BaseParserTestDescriptor { @CommentHasStringValue public String grammar; + @Override + public boolean ignore(String targetName) { + return targetName.equals("Rust"); + } } public static class ExtraToken extends BaseParserTestDescriptor { @@ -112,7 +116,7 @@ public static class ExtraTokensAndAltLabels extends BaseParserTestDescriptor { @Override public boolean ignore(String targetName) { - return !targetName.matches("Java|Python2|Python3|Node|Swift|CSharp"); + return !targetName.matches("Java|Python2|Python3|Node|Swift|CSharp|Rust"); } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java index 0a3e40b198..4b860569d2 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java @@ -639,7 +639,7 @@ public static class ExtraneousInput extends BaseParserTestDescriptor { @Override public boolean ignore(String targetName) { - return !"Java".equals(targetName) && !"Swift".equals(targetName); + return !"Java".equals(targetName) && !"Swift".equals(targetName) && !"Rust".equals(targetName); } } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java index ca6e393dd6..6fa27fc0d2 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java @@ -693,7 +693,7 @@ public static class PredicatedIfIfElse extends BaseParserTestDescriptor { grammar T; s : stmt EOF ; stmt : ifStmt | ID; - ifStmt : 'if' ID stmt ('else' stmt | { })> }?); + ifStmt : 'if' ID stmt ('else' stmt | { })> }?); ELSE : 'else'; ID : [a-zA-Z]+; WS : [ \\n\\t]+ -> skip; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java index 2c2702fe5f..f2ea13b5ee 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java @@ -113,7 +113,7 @@ public static abstract class DropLoopEntryBranchInLRRule extends BaseParserTestD @Override public boolean ignore(String targetName) { - return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift", "Rust").contains(targetName); } } @@ -199,7 +199,7 @@ public static class DropLoopEntryBranchInLRRule_4 extends DropLoopEntryBranchInL @Override public boolean ignore(String targetName) { // passes, but still too slow in Python and JavaScript - return !Arrays.asList("Java", "CSharp", "Cpp", "Swift").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Cpp", "Swift", "Rust").contains(targetName); } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java index b09f074881..3993c94106 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java @@ -90,7 +90,7 @@ public static class AtomWithClosureInTranslatedLRRule extends BaseParserTestDesc /** grammar T; start : e[0] EOF; - e[int _p] + e[] : ( 'a' | 'b'+ ) ( {3 >= $_p}? '+' e[4] )* ; @@ -100,14 +100,15 @@ public static class AtomWithClosureInTranslatedLRRule extends BaseParserTestDesc } - /** We cannot collect predicates that are dependent on local context if - * we are doing a global follow. They appear as if they were not there at all. + /** + * We cannot collect predicates that are dependent on local context if + * we are doing a global follow. They appear as if they were not there at all. */ - public static class DepedentPredsInGlobalFOLLOW extends BaseParserTestDescriptor { + public static class DependentPredsInGlobalFOLLOW extends BaseParserTestDescriptor { public String input = "a!"; /** - eval=true - parse + eval=true + parse */ @CommentHasStringValue public String output; @@ -122,8 +123,8 @@ public static class DepedentPredsInGlobalFOLLOW extends BaseParserTestDescriptor } s : a[99] ; - a[int i] : e {}? {} '!' ; - b[int i] : e {}? ID ; + a[] : e {}? {} '!' ; + b[] : e {}? ID ; e : ID | ; // non-LL(1) so we use ATN ID : 'a'..'z'+ ; INT : '0'..'9'+; @@ -269,7 +270,7 @@ public static abstract class PredFromAltTestedInLoopBack extends BaseParserTestD @after {} : para para EOF ; para: paraContent NL NL ; - paraContent : ('s'|'x'|{})>}? NL)+ ; + paraContent : ('s'|'x'|{})>}? NL)+ ; NL : '\n' ; s : 's' ; X : 'x' ; @@ -291,7 +292,7 @@ public static class PredFromAltTestedInLoopBack_1 extends PredFromAltTestedInLoo @Override public boolean ignore(String targetName) { - return !"Java".equals(targetName) && !"Swift".equals(targetName); + return !"Java".equals(targetName) && !"Swift".equals(targetName) && !"Rust".equals(targetName); } } @@ -355,7 +356,7 @@ public static class PredicateDependentOnArg extends BaseParserTestDescriptor { grammar T; @parser::members {} s : a[2] a[1]; - a[int i] + a[] : {}? ID {} | {}? ID {} ; @@ -389,7 +390,7 @@ public static class PredicateDependentOnArg2 extends BaseParserTestDescriptor { grammar T; @parser::members {} s : a[2] a[1]; - a[int i] + a[] : {}? ID | {}? ID ; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/BaseRustTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/BaseRustTest.java new file mode 120000 index 0000000000..b65b090406 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/BaseRustTest.java @@ -0,0 +1 @@ +../../../../../../../../runtime/Rust/templates/BaseRustTest.java \ No newline at end of file diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeLexers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeLexers.java new file mode 100644 index 0000000000..b4afcae6d1 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeLexers.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeLexersDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeLexers extends BaseRuntimeTest { + public TestCompositeLexers(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeLexersDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeParsers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeParsers.java new file mode 100644 index 0000000000..bdf55028f4 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeParsers.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeParsersDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeParsers extends BaseRuntimeTest { + public TestCompositeParsers(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeParsersDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestFullContextParsing.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestFullContextParsing.java new file mode 100644 index 0000000000..a6ff193914 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestFullContextParsing.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.FullContextParsingDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestFullContextParsing extends BaseRuntimeTest { + public TestFullContextParsing(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(FullContextParsingDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLeftRecursion.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLeftRecursion.java new file mode 100644 index 0000000000..230d10d45a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLeftRecursion.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LeftRecursionDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLeftRecursion extends BaseRuntimeTest { + public TestLeftRecursion(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LeftRecursionDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerErrors.java new file mode 100644 index 0000000000..3d46210464 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerErrors.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerErrorsDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerErrors extends BaseRuntimeTest { + public TestLexerErrors(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerErrorsDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerExec.java new file mode 100644 index 0000000000..6ab4ab1acd --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerExec.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerExecDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerExec extends BaseRuntimeTest { + public TestLexerExec(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerExecDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestListeners.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestListeners.java new file mode 100644 index 0000000000..1318d7799c --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestListeners.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ListenersDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestListeners extends BaseRuntimeTest { + public TestListeners(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ListenersDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParseTrees.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParseTrees.java new file mode 100644 index 0000000000..b36cef9186 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParseTrees.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParseTreesDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParseTrees extends BaseRuntimeTest { + public TestParseTrees(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParseTreesDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserErrors.java new file mode 100644 index 0000000000..5ab7d8d7c5 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserErrors.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserErrorsDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserErrors extends BaseRuntimeTest { + public TestParserErrors(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserErrorsDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserExec.java new file mode 100644 index 0000000000..7adf9ecb4e --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserExec.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserExecDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserExec extends BaseRuntimeTest { + public TestParserExec(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserExecDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestPerformance.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestPerformance.java new file mode 100644 index 0000000000..475910665d --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestPerformance.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.PerformanceDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestPerformance extends BaseRuntimeTest { + public TestPerformance(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + ((BaseRustTest) this.delegate).cargo_options = "--release"; + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(PerformanceDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalLexer.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalLexer.java new file mode 100644 index 0000000000..8b11dc1edd --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalLexer.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalLexerDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalLexer extends BaseRuntimeTest { + public TestSemPredEvalLexer(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalLexerDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalParser.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalParser.java new file mode 100644 index 0000000000..16619de1f5 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalParser.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalParserDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalParser extends BaseRuntimeTest { + public TestSemPredEvalParser(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalParserDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSets.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSets.java new file mode 100644 index 0000000000..774c3ad12f --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSets.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SetsDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSets extends BaseRuntimeTest { + public TestSets(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SetsDescriptors.class, "Rust"); + } +} diff --git a/runtime/Java/pom.xml b/runtime/Java/pom.xml index ce1e2063af..4dd22ca285 100644 --- a/runtime/Java/pom.xml +++ b/runtime/Java/pom.xml @@ -9,7 +9,7 @@ org.antlr antlr4-master - 4.8-2-SNAPSHOT + 4.8-2-RUSTTARGET1 ../../pom.xml antlr4-runtime diff --git a/runtime/Rust/.gitignore b/runtime/Rust/.gitignore new file mode 100644 index 0000000000..98380a72be --- /dev/null +++ b/runtime/Rust/.gitignore @@ -0,0 +1,8 @@ +.idea +.vscode +/target +/tests/gen/*.tokens +/tests/gen/*.interp +**/*.rs.bk +*.iml +Cargo.lock diff --git a/runtime/Rust/Cargo.toml b/runtime/Rust/Cargo.toml new file mode 100644 index 0000000000..1c48779a01 --- /dev/null +++ b/runtime/Rust/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "antlr-rust" +version = "0.3.0-beta" +authors = ["Konstantin Anisimov ", "Bo Lin "] +homepage = "https://github.com/sdf-labs/antlr4" +repository = "https://github.com/sdf-labs/antlr4" +documentation = "https://docs.rs/antlr-rust" +description = "ANTLR4 runtime for Rust" +readme = "README.md" +edition = "2021" +license = "BSD-3-Clause" +keywords = ["ANTLR","ANTLR4","parsing","runtime"] +categories = ["parsing"] +exclude = ["/build.rs","/grammars","/templates"] + + +[dependencies] +lazy_static = "^1.4" +byteorder = "^1" +murmur3 = "^0.4" # 0.5 is incompatible currently +bit-set = "=0.5.*" +once_cell = "^1.2" +#backtrace = "=0.3" +typed-arena = "^2.0" +better_any = "0.2.0" +#better_any = "=0.1" +#parking_lot = "0.11" +#qcell = { path="../qcell" } + +[dependencies.uuid] +version = "1.7.0" +features = [ + "v4", # Lets you generate random UUIDs + "fast-rng", # Use a faster (but still sufficiently random) RNG + "macro-diagnostics", # Enable better diagnostics for compile-time UUIDs +] + +[lib] + +#[[test]] +#name = "my_test" +#path="tests/my_test.rs" + + +[profile.release] +#opt-level = 3 +#debug = true \ No newline at end of file diff --git a/runtime/Rust/LICENSE.txt b/runtime/Rust/LICENSE.txt new file mode 100644 index 0000000000..1f11de272d --- /dev/null +++ b/runtime/Rust/LICENSE.txt @@ -0,0 +1,26 @@ +[The "BSD 3-clause license"] +Copyright (c) 2020-2021 Konstantin Anisimov. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/runtime/Rust/README.md b/runtime/Rust/README.md new file mode 100644 index 0000000000..e9c264ae5d --- /dev/null +++ b/runtime/Rust/README.md @@ -0,0 +1,125 @@ +# antlr4rust +[![Crate](https://flat.badgen.net/crates/v/antlr-rust)](https://crates.io/crates/antlr_rust/0.3.0-beta) +[![docs](https://flat.badgen.net/badge/docs.rs/v0.3.0-beta)](https://docs.rs/antlr-rust/0.3.0-beta) +![ANTLR4 testsuite](https://github.com/rrevenantt/antlr4rust/workflows/ANTLR4%20testsuite/badge.svg?event=push) +![cargo test](https://github.com/rrevenantt/antlr4rust/workflows/cargo%20test/badge.svg) +[![](https://tokei.rs/b1/github/rrevenantt/antlr4rust)](https://github.com/rrevenantt/antlr4rust) + +[ANTLR4](https://github.com/antlr/antlr4) runtime for Rust programming language. + +For examples you can see [grammars](grammars), [tests/gen](tests/gen) for corresponding generated code +and [tests/my_tests.rs](tests/my_test.rs) for actual usage examples + +## ANTLR4 Tool(parser generator) + +Generator part is currently located in rust-target branch of my antlr4 fork [rrevenantt/antlr4/tree/rust-target](https://github.com/rrevenantt/antlr4/tree/rust-target) +Latest version is automatically built to [releases](https://github.com/rrevenantt/antlr4rust/releases) on this repository. +So if you just want to generate parser +or if you want to contribute to only runtime part you don't have to do build it yourself. + +But if you want to build or change generator yourself: +* `git clone -b rust-target https://github.com/rrevenantt/antlr4` - clone my antlr4 fork +* `git submodule update --init --recursive --remote` - update Rust target submodule +* `mvn -DskipTests install` - build generator + +### Implementation status + +For now development is going on in this repository +but eventually it will be merged to main ANTLR4 repo + +Since version `0.3` works on stable rust. +Previous versions are not maintained any more +so in case of nightly breakage you should migrate to the latest version. + +### Usage + +You should use the ANTLR4 "tool" to generate a parser, that will use the ANTLR +runtime located here. You can run it with the following command: +```bash +java -jar -Dlanguage=Rust MyGrammar.g4 +``` +For a full list of antlr4 tool options, please visit the +[tool documentation page](https://github.com/antlr/antlr4/blob/master/doc/tool-options.md). + +You can also see [build.rs](build.rs) as an example of `build.rs` configuration +to rebuild parser automatically if grammar file was changed. + +Then add following to `Cargo.toml` of the crate from which generated parser +is going to be used: +```toml +[dependencies] +antlr-rust = "0.3" +``` + +### Parse Tree structure + +It is possible to generate idiomatic Rust syntax trees. For this you would need to use labels feature of ANTLR tool. +You can see [Labels](grammars/Labels.g4) grammar for example. +Consider following rule : +```text +e : a=e op='*' b=e # mult + | left=e '+' b=e # add + +``` +For such rule ANTLR will generate enum `EContextAll` containing `mult` and `add` alternatives, +so you will be able to match on them in your code. +Also corresponding struct for each alternative will contain fields you labeled. +I.e. for `MultContext` struct will contain `a` and `b` fields containing child subtrees and +`op` field with `TerminalNode` type which corresponds to individual `Token`. +It also is possible to disable generic parse tree creation to keep only selected children via +`parser.build_parse_trees = false`, but unfortunately currently it will prevent visitors from working. + +### Differences with Java +Although Rust runtime API has been made as close as possible to Java, +there are quite some differences because Rust is not an OOP language and is much more explicit. + + - If you are using labeled alternatives, + struct generated for the rule is an enum with variant for each alternative + - Parser needs to have ownership for listeners, but it is possible to get listener back via `ListenerId` + otherwise `ParseTreeWalker` should be used. + - In embedded actions to access parser you should use `recog` variable instead of `self`/`this`. + This is because predicates have to be inserted into two syntactically different places in generated parser + and in one of them it is impossible to have parser as `self`. + - str based `InputStream` have different index behavior when there are unicode characters. + If you need exactly the same behavior, use `[u32]` based `InputStream`, or implement custom `CharStream`. + - In actions you have to escape `'` in rust lifetimes with `\ ` because ANTLR considers them as strings, e.g. `Struct<\'lifetime>` + - To make custom tokens you should use `@tokenfactory` custom action, instead of usual `TokenLabelType` parser option. + ANTLR parser options can accept only single identifiers while Rust target needs know about lifetime as well. + Also in Rust target `TokenFactory` is the way to specify token type. As example you can see [CSV](grammars/CSV.g4) test grammar. + - All rule context variables (rule argument or rule return) should implement `Default + Clone`. + +### Benchmarks +Here is comparison of antlr generated XML lexer and parser +(from default XML grammar but with custom minimal Token/TokenFactory/InputStream/RuleContext) to hand-written implementations in rust ecosystem. +Keep in mind that `xmlparser` and `quick_xml` are much closer to being lexer than parser, so they should be compared with antlr lexer. +Also while structs used by generated lexer and parser were customized to track as minimum data as required +(which is possible by any user of antlr-rust), +internals of the lexer cannot be customized enough yet and still track quite a lot of data that might not be used in particular case. +So there is still room for improvement. +```text +lexers: +large/large_xmlparser time: [1.8598 ms 1.8607 ms 1.8619 ms] +large/large_quick_xml time: [1.4623 ms 1.4645 ms 1.4675 ms] +large/large_antlr_xml_lexer time: [5.7866 ms 5.7877 ms 5.7891 ms] +parsers: +large/large_xmlrs time: [16.734 ms 16.748 ms 16.766 ms] +large/large_minidom time: [7.0639 ms 7.0792 ms 7.0975 ms] +large/large_roxmltree time: [4.9341 ms 4.9360 ms 4.9380 ms] +large/large_antlr_xml_full time: [10.243 ms 10.248 ms 10.252 ms] +``` + +### Unsafe +Currently, unsafe is used only for downcasting (through separate crate) +and to update data inside Rc via `get_mut_unchecked`(returned mutable reference is used immediately and not stored anywhere) + +### Versioning +In addition to usual Rust semantic versioning, +patch version changes of the crate should not require updating of generator part + +## Licence + +BSD 3-clause. +Unless you explicitly state otherwise, +any contribution intentionally submitted for inclusion in this project by you +shall be licensed as above, without any additional terms or conditions. + diff --git a/runtime/Rust/grammars/CSV.g4 b/runtime/Rust/grammars/CSV.g4 new file mode 100644 index 0000000000..987db700b3 --- /dev/null +++ b/runtime/Rust/grammars/CSV.g4 @@ -0,0 +1,20 @@ +grammar CSV; + +@tokenfactory{ +pub type LocalTokenFactory<'input> = antlr_rust::token_factory::ArenaCommonFactory<'input>; +} + +csvFile: hdr row+ ; +hdr : row ; + +row : field (',' field)* '\r'? '\n'; + +field + : TEXT + | STRING + | + ; + +WS : [ ]+ -> channel(HIDDEN); +TEXT : ~[ ,\n\r"]+ ; +STRING : '"' ('""'|~'"')* '"' ; // quote-quote is an escaped quote diff --git a/runtime/Rust/grammars/Labels.g4 b/runtime/Rust/grammars/Labels.g4 new file mode 100644 index 0000000000..7a655a04e2 --- /dev/null +++ b/runtime/Rust/grammars/Labels.g4 @@ -0,0 +1,14 @@ +grammar Labels; +s : q=e ; +e returns [String v] + : a=e op='*' b=e {$v = "* ".to_owned() + $a.v + " " + $b.v;} # mult + | a=e '+' b=e {$v = "+ ".to_owned() + $a.v + " " + $b.v;} # add + | INT {$v = $INT.text.to_owned();} # anInt + | '(' x=e ')' {$v = $x.v;} # parens + | x=e '++' {$v = " ++".to_owned() + $x.v;} # inc + | x=e '--' {$v = " --".to_owned() + $x.v;} # dec + | ID {$v = $ID.text.to_owned();} # anID + ; +ID : 'a'..'z'+ ; +INT : '0'..'9'+ ; +WS : (' '|'\n') -> skip ; \ No newline at end of file diff --git a/runtime/Rust/grammars/Perf.g4 b/runtime/Rust/grammars/Perf.g4 new file mode 100644 index 0000000000..1f8333e2ca --- /dev/null +++ b/runtime/Rust/grammars/Perf.g4 @@ -0,0 +1,18 @@ +grammar Perf; + +stat : expr ';' + | expr '.' + ; + +expr + : ID + | 'not' expr + | expr 'and' expr + | expr 'or' expr + | '(' ID ')' expr + | expr '?' expr ':' expr + | 'between' expr 'and' expr + ; + +ID: [a-zA-Z_][a-zA-Z_0-9]*; +WS: [ \t\n\r\f]+ -> skip; \ No newline at end of file diff --git a/runtime/Rust/grammars/ReferenceToATN.g4 b/runtime/Rust/grammars/ReferenceToATN.g4 new file mode 100644 index 0000000000..0549d622fe --- /dev/null +++ b/runtime/Rust/grammars/ReferenceToATN.g4 @@ -0,0 +1,10 @@ +grammar ReferenceToATN; + +@tokenfactory{ +pub type LocalTokenFactory<\'input> = antlr_rust::token_factory::OwningTokenFactory; +} + +a : (ID|ATN)* ATN? {println!("{}",$text);}; +ID : 'a'..'z'+ ; +ATN : '0'..'9'+; +WS : (' '|'\n') -> skip ; diff --git a/runtime/Rust/grammars/SimpleLR.g4 b/runtime/Rust/grammars/SimpleLR.g4 new file mode 100644 index 0000000000..cf2f301afd --- /dev/null +++ b/runtime/Rust/grammars/SimpleLR.g4 @@ -0,0 +1,8 @@ +grammar SimpleLR; +s @after {println!("test");} : a ; +a : a ID + | ID + ; +ID : 'a'..'z'+ ; +WS : (' '|'\n') -> skip ; + diff --git a/runtime/Rust/grammars/VisitorBasic.g4 b/runtime/Rust/grammars/VisitorBasic.g4 new file mode 100644 index 0000000000..b1e67b3084 --- /dev/null +++ b/runtime/Rust/grammars/VisitorBasic.g4 @@ -0,0 +1,7 @@ +grammar VisitorBasic; + +s + : 'A' EOF + ; + +A : 'A'; diff --git a/runtime/Rust/grammars/VisitorCalc.g4 b/runtime/Rust/grammars/VisitorCalc.g4 new file mode 100644 index 0000000000..6ca1c2e835 --- /dev/null +++ b/runtime/Rust/grammars/VisitorCalc.g4 @@ -0,0 +1,18 @@ +grammar VisitorCalc; + +s + : expr EOF + ; + +expr + : INT # number + | expr (MUL | DIV) expr # multiply + | expr (ADD | SUB) expr # add + ; + +INT : [0-9]+; +MUL : '*'; +DIV : '/'; +ADD : '+'; +SUB : '-'; +WS : [ \t]+ -> channel(HIDDEN); diff --git a/runtime/Rust/grammars/XMLLexer.g4 b/runtime/Rust/grammars/XMLLexer.g4 new file mode 100644 index 0000000000..8c630f4f34 --- /dev/null +++ b/runtime/Rust/grammars/XMLLexer.g4 @@ -0,0 +1,63 @@ +lexer grammar XMLLexer; + +// Default "mode": Everything OUTSIDE of a tag +COMMENT : '' {true}? ; +CDATA : '' ; +/** Scarf all DTD stuff, Entity Declarations like , + * and Notation Declarations + */ +DTD : '' -> skip ; +EntityRef : '&' Name ';' ; +CharRef : '&#' DIGIT+ ';' + | '&#x' HEXDIGIT+ ';' + ; +SEA_WS : (' '|'\t'|'\r'? '\n') ; + +OPEN : '<' -> pushMode(INSIDE) ; +XMLDeclOpen : ' pushMode(INSIDE) ; +SPECIAL_OPEN: ' more, pushMode(PROC_INSTR) ; + +TEXT : ~[<&]+ ; // match any 16 bit char other than < and & + +// ----------------- Everything INSIDE of a tag --------------------- +mode INSIDE; + +CLOSE : '>' {recog.pop_mode();} ; +SPECIAL_CLOSE: '?>' -> popMode ; // close +SLASH_CLOSE : '/>' -> popMode ; +SLASH : '/' ; +EQUALS : '=' ; +STRING : '"' ~[<"]* '"' + | '\'' ~[<']* '\'' + ; +Name : NameStartChar NameChar* ; +S : [ \t\r\n] -> skip ; + +fragment +HEXDIGIT : [a-fA-F0-9] ; + +fragment +DIGIT : [0-9] ; + +fragment +NameChar : NameStartChar + | '-' | '.' | DIGIT + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + ; + +fragment +NameStartChar + : [:a-zA-Z] + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + ; + +// ----------------- Handle --------------------- +mode PROC_INSTR; +PI : '?>' -> popMode ; // close +IGNORE : . -> more ; diff --git a/runtime/Rust/rustfmt.toml b/runtime/Rust/rustfmt.toml new file mode 100644 index 0000000000..d3fc0f18c7 --- /dev/null +++ b/runtime/Rust/rustfmt.toml @@ -0,0 +1,3 @@ +edition = "2018" +#fn_single_line = true +#fn_args_layout = "Compressed" \ No newline at end of file diff --git a/runtime/Rust/src/atn.rs b/runtime/Rust/src/atn.rs new file mode 100644 index 0000000000..30c8c56d0c --- /dev/null +++ b/runtime/Rust/src/atn.rs @@ -0,0 +1,175 @@ +use std::collections::HashMap; + +use crate::atn_state::ATNState; +use crate::atn_state::ATNStateRef; +use crate::atn_type::ATNType; +use crate::dfa::ScopeExt; +use crate::interval_set::IntervalSet; +use crate::lexer_action::LexerAction; +use crate::ll1_analyzer::LL1Analyzer; +use crate::parser::ParserNodeType; +use crate::rule_context::EmptyContextType; +use crate::token::{TOKEN_EOF, TOKEN_EPSILON}; +use crate::token_factory::CommonTokenFactory; +use crate::transition::RuleTransition; +use std::fmt::{Debug, Formatter}; + +pub const INVALID_ALT: isize = 0; + +/// Augmented Transition Network +/// +/// Basically NFA(graph) of states and possible(maybe multiple) transitions on a given particular symbol. +/// +/// Public mostly because of implementations reasons. From user side is only useful for advanced error handling +pub struct ATN { + pub decision_to_state: Vec, + + pub grammar_type: ATNType, + + pub(crate) lexer_actions: Vec, + + pub max_token_type: isize, + + pub mode_name_to_start_state: HashMap, + + pub mode_to_start_state: Vec, + + pub rule_to_start_state: Vec, + + pub rule_to_stop_state: Vec, + + pub rule_to_token_type: Vec, + + pub states: Vec>, +} + +impl Debug for ATN { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ATN") + .field("grammar_type", &self.grammar_type) + .field("max_token_type", &self.max_token_type) + .field("states count", &self.states.len()) + .field("..", &"..") + .finish() + } +} + +impl ATN { + pub(crate) fn new_atn(grammar_type: ATNType, max_token_type: isize) -> ATN { + ATN { + decision_to_state: Vec::new(), + grammar_type, + lexer_actions: vec![], + max_token_type, + mode_name_to_start_state: HashMap::new(), + mode_to_start_state: Vec::new(), + rule_to_start_state: Vec::new(), + rule_to_stop_state: Vec::new(), + rule_to_token_type: Vec::new(), + states: Vec::new(), + } + } + + ///Compute the set of valid tokens that can occur starting in `s` and + ///staying in same rule. `Token::EPSILON` is in set if we reach end of + ///rule. + pub fn next_tokens<'a>(&self, s: &'a dyn ATNState) -> &'a IntervalSet { + s.get_next_tokens_within_rule().get_or_init(|| { + self.next_tokens_in_ctx::>(s, None) + .modify_with(|r| r.read_only = true) + }) + } + + /// Compute the set of valid tokens that can occur starting in state `s`. + /// If `ctx` is null, the set of tokens will not include what can follow + /// the rule surrounding `s`. In other words, the set will be + /// restricted to tokens reachable staying within `s`'s rule. + pub fn next_tokens_in_ctx<'a, Ctx: ParserNodeType<'a>>( + &self, + s: &dyn ATNState, + _ctx: Option<&Ctx::Type>, + ) -> IntervalSet { + let analyzer = LL1Analyzer::new(self); + analyzer.look::(s, None, _ctx) + } + + pub(crate) fn add_state(&mut self, state: Box) { + debug_assert_eq!(state.get_state_number(), self.states.len()); + self.states.push(state) + } + + // fn remove_state(&self, _state: ATNStateRef) { unimplemented!() } + + // fn define_decision_state(&self, _s: ATNStateRef) -> isize { unimplemented!() } + + pub fn get_decision_state(&self, decision: usize) -> ATNStateRef { + self.decision_to_state[decision] + } + + /// Computes the set of input symbols which could follow ATN state number + /// {@code stateNumber} in the specified full {@code context}. This method + /// considers the complete parser context, but does not evaluate semantic + /// predicates (i.e. all predicates encountered during the calculation are + /// assumed true). If a path in the ATN exists from the starting state to the + /// {@link RuleStopState} of the outermost context without matching any + /// symbols, {@link Token#EOF} is added to the returned set. + /// + ///

If {@code context} is {@code null}, it is treated as {@link ParserRuleContext#EMPTY}.

+ /// + /// Note that this does NOT give you the set of all tokens that could + /// appear at a given token position in the input phrase. In other words, + /// it does not answer: + /// + /// "Given a specific partial input phrase, return the set of all tokens + /// that can follow the last token in the input phrase." + /// + /// The big difference is that with just the input, the parser could + /// land right in the middle of a lookahead decision. Getting + /// all *possible* tokens given a partial input stream is a separate + /// computation. See https://github.com/antlr/antlr4/issues/1428 + /// + /// For this function, we are specifying an ATN state and call stack to compute + /// what token(s) can come next and specifically: outside of a lookahead decision. + /// That is what you want for error reporting and recovery upon parse error. + /// + /// @param stateNumber the ATN state number + /// @param context the full parse context + /// @return The set of potentially valid input symbols which could follow the + /// specified state in the specified context. + /// Panics if the ATN does not contain a state with + /// number {@code stateNumber} + pub fn get_expected_tokens( + &self, + state_number: isize, + states_stack: impl Iterator, // _ctx: &Rc, + ) -> IntervalSet { + let s = self.states[state_number as usize].as_ref(); + let mut following = self.next_tokens(s); + if !following.contains(TOKEN_EPSILON) { + return following.clone(); + } + let mut expected = IntervalSet::new(); + expected.add_set(&following); + expected.remove_one(TOKEN_EPSILON); + // let mut ctx = Some(Rc::clone(_ctx)); + + for state in states_stack { + if !following.contains(TOKEN_EPSILON) { + break; + } + + let invoking_state = self.states[state as usize].as_ref(); + let tr = invoking_state.get_transitions().first().unwrap().as_ref(); + let tr = tr.cast::(); + following = self.next_tokens(self.states[tr.follow_state].as_ref()); + expected.add_set(following); + expected.remove_one(TOKEN_EPSILON); + // ctx = c.get_parent_ctx(); + } + + if following.contains(TOKEN_EPSILON) { + expected.add_one(TOKEN_EOF); + } + expected + } +} diff --git a/runtime/Rust/src/atn_config.rs b/runtime/Rust/src/atn_config.rs new file mode 100644 index 0000000000..c430184529 --- /dev/null +++ b/runtime/Rust/src/atn_config.rs @@ -0,0 +1,261 @@ +use std::fmt::{Debug, Error, Formatter}; +use std::hash::{Hash, Hasher}; +use std::rc::Rc; + +use murmur3::murmur3_32::MurmurHasher; + +use crate::atn_config::ATNConfigType::LexerATNConfig; +use crate::atn_state::{ATNState, ATNStateRef, ATNStateType}; +use crate::dfa::ScopeExt; +use crate::lexer_action_executor::LexerActionExecutor; +use crate::prediction_context::PredictionContext; +use crate::semantic_context::SemanticContext; + +#[derive(Clone)] +pub struct ATNConfig { + precedence_filter_suppressed: bool, + //todo since ATNState is immutable when we started working with ATNConfigs + // looks like it is possible to have usual reference here + state: ATNStateRef, + alt: isize, + //todo maybe option is unnecessary and PredictionContext::EMPTY would be enough + //another todo check arena alloc + context: Option>, + pub semantic_context: Box, + pub reaches_into_outer_context: isize, + pub(crate) config_type: ATNConfigType, +} + +impl Eq for ATNConfig {} + +impl PartialEq for ATNConfig { + fn eq(&self, other: &Self) -> bool { + self.get_state() == other.get_state() + && self.get_alt() == other.get_alt() + // Rc is optimized to not do a deep equalitiy if arc pointers are equal so that's enough + && self.context == other.context + && self.get_type() == other.get_type() + && self.semantic_context == other.semantic_context + && self.precedence_filter_suppressed == other.precedence_filter_suppressed + } +} + +impl Hash for ATNConfig { + fn hash(&self, state: &mut H) { + state.write_i32(self.get_state() as i32); + state.write_i32(self.get_alt() as i32); + match self.get_context() { + None => state.write_i32(0), + Some(c) => c.hash(state), + } + self.semantic_context.hash(state); + if let LexerATNConfig { + lexer_action_executor, + passed_through_non_greedy_decision, + } = &self.config_type + { + state.write_i32(if *passed_through_non_greedy_decision { + 1 + } else { + 0 + }); + match lexer_action_executor { + None => state.write_i32(0), + Some(ex) => ex.hash(state), + } + } + } +} + +impl Debug for ATNConfig { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + f.write_fmt(format_args!( + "({},{},[{}]", + self.state, + self.alt, + self.context.as_deref().unwrap() + ))?; + if self.reaches_into_outer_context > 0 { + f.write_fmt(format_args!(",up={}", self.reaches_into_outer_context))?; + } + + f.write_str(")") + } +} + +#[derive(Eq, PartialEq, Clone, Debug)] +pub(crate) enum ATNConfigType { + BaseATNConfig, + LexerATNConfig { + lexer_action_executor: Option>, + passed_through_non_greedy_decision: bool, + }, +} + +impl ATNConfig { + pub(crate) fn get_lexer_executor(&self) -> Option<&LexerActionExecutor> { + match &self.config_type { + ATNConfigType::BaseATNConfig => None, + ATNConfigType::LexerATNConfig { + lexer_action_executor, + .. + } => lexer_action_executor.as_deref(), + } + } + + pub fn default_hash(&self) -> u64 { + MurmurHasher::default().convert_with(|mut x| { + self.hash(&mut x); + x.finish() + }) + } + + pub fn new( + state: ATNStateRef, + alt: isize, + context: Option>, + ) -> ATNConfig { + ATNConfig { + precedence_filter_suppressed: false, + state, + alt, + context, + semantic_context: Box::new(SemanticContext::NONE), + reaches_into_outer_context: 0, + config_type: ATNConfigType::BaseATNConfig, + } + } + + pub fn new_with_semantic( + state: ATNStateRef, + alt: isize, + context: Option>, + semantic_context: Box, + ) -> ATNConfig { + let mut new = Self::new(state, alt, context); + new.semantic_context = semantic_context; + new + } + + pub fn new_lexer_atnconfig6( + _state: ATNStateRef, + _alt: isize, + _context: Rc, + ) -> ATNConfig { + let mut atnconfig = ATNConfig::new(_state, _alt, Some(_context)); + atnconfig.config_type = ATNConfigType::LexerATNConfig { + lexer_action_executor: None, + passed_through_non_greedy_decision: false, + }; + atnconfig + } + + pub fn cloned_with_new_semantic( + &self, + target: &dyn ATNState, + ctx: Box, + ) -> ATNConfig { + let mut new = self.cloned(target); + new.semantic_context = ctx; + new + } + + pub fn cloned(&self, target: &dyn ATNState) -> ATNConfig { + // println!("depth {}",PredictionContext::size(self.context.as_deref())); + let mut new = self.clone(); + new.state = target.get_state_number(); + if let ATNConfigType::LexerATNConfig { + passed_through_non_greedy_decision, + .. + } = &mut new.config_type + { + *passed_through_non_greedy_decision = check_non_greedy_decision(self, target); + } + new + } + + pub fn cloned_with_new_ctx( + &self, + target: &dyn ATNState, + ctx: Option>, + ) -> ATNConfig { + let mut new = self.cloned(target); + new.context = ctx; + + new + } + + pub(crate) fn cloned_with_new_exec( + &self, + target: &dyn ATNState, + exec: Option, + ) -> ATNConfig { + let mut new = self.cloned(target); + if let ATNConfigType::LexerATNConfig { + lexer_action_executor, + passed_through_non_greedy_decision: _, + } = &mut new.config_type + { + *lexer_action_executor = exec.map(Box::new); + // *passed_through_non_greedy_decision = check_non_greedy_decision(self, target); + } + new + } + + pub fn get_state(&self) -> ATNStateRef { + self.state + } + + pub fn get_alt(&self) -> isize { + self.alt + } + + pub(crate) fn get_type(&self) -> &ATNConfigType { + &self.config_type + } + + pub fn get_context(&self) -> Option<&Rc> { + self.context.as_ref() + } + + pub fn take_context(&mut self) -> Rc { + self.context.take().unwrap() + } + + pub fn set_context(&mut self, _v: Rc) { + self.context = Some(_v); + } + + pub fn get_reaches_into_outer_context(&self) -> isize { + self.reaches_into_outer_context + } + + pub fn set_reaches_into_outer_context(&mut self, _v: isize) { + self.reaches_into_outer_context = _v + } + + pub fn is_precedence_filter_suppressed(&self) -> bool { + self.precedence_filter_suppressed + } + + pub fn set_precedence_filter_suppressed(&mut self, _v: bool) { + self.precedence_filter_suppressed = _v; + } +} + +fn check_non_greedy_decision(source: &ATNConfig, target: &dyn ATNState) -> bool { + if let LexerATNConfig { + passed_through_non_greedy_decision: true, + .. + } = source.get_type() + { + return true; + } + if let ATNStateType::DecisionState { + nongreedy: true, .. + } = target.get_state_type() + { + return true; + } + false +} diff --git a/runtime/Rust/src/atn_config_set.rs b/runtime/Rust/src/atn_config_set.rs new file mode 100644 index 0000000000..d4eea7ca20 --- /dev/null +++ b/runtime/Rust/src/atn_config_set.rs @@ -0,0 +1,257 @@ +use std::cmp::max; +use std::collections::HashMap; +use std::fmt::{Debug, Error, Formatter}; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; + +use bit_set::BitSet; +use murmur3::murmur3_32::MurmurHasher; + +use crate::atn_config::ATNConfig; +use crate::atn_simulator::IATNSimulator; +use crate::atn_state::ATNStateRef; +use crate::parser_atn_simulator::MergeCache; +use crate::prediction_context::{MurmurHasherBuilder, PredictionContext}; +use crate::semantic_context::SemanticContext; + +pub struct ATNConfigSet { + cached_hash: u64, + + //todo looks like we need only iteration for configs + // so i think we can replace configs and lookup with indexhashset + config_lookup: HashMap, + + //todo remove box? + pub(crate) configs: Vec>, + + pub(crate) conflicting_alts: BitSet, + + dips_into_outer_context: bool, + + full_ctx: bool, + + has_semantic_context: bool, + + read_only: bool, + + unique_alt: isize, + + /// creates key for lookup + /// Key::Full - for Lexer + /// Key::Partial - for Parser + hasher: fn(&ATNConfig) -> Key, +} + +#[derive(Eq, PartialEq)] +enum Key { + Full(ATNConfig), + Partial(i32, ATNStateRef, isize, SemanticContext), +} + +impl Hash for Key { + fn hash(&self, state: &mut H) { + match self { + Key::Full(x) => x.hash(state), + Key::Partial(hash, _, _, _) => state.write_i32(*hash), + } + } +} + +impl Debug for ATNConfigSet { + fn fmt(&self, _f: &mut Formatter<'_>) -> Result<(), Error> { + _f.write_str("ATNConfigSet")?; + _f.debug_list().entries(self.configs.iter()).finish()?; + if self.has_semantic_context { + _f.write_str(",hasSemanticContext=true")? + } + if self.conflicting_alts.is_empty() { + _f.write_fmt(format_args!(",uniqueAlt={}", self.unique_alt)) + } else { + _f.write_fmt(format_args!(",conflictingAlts={:?}", self.conflicting_alts)) + } + } +} + +impl PartialEq for ATNConfigSet { + fn eq(&self, other: &Self) -> bool { + self.configs == other.configs + && self.full_ctx == other.full_ctx + && self.unique_alt == other.unique_alt + && self.conflicting_alts == other.conflicting_alts + && self.has_semantic_context == other.has_semantic_context + && self.dips_into_outer_context == other.dips_into_outer_context + } +} + +impl Eq for ATNConfigSet {} + +impl Hash for ATNConfigSet { + fn hash(&self, state: &mut H) { + self.configs.hash(state) + } +} + +impl ATNConfigSet { + pub fn new_base_atnconfig_set(full_ctx: bool) -> ATNConfigSet { + ATNConfigSet { + cached_hash: 0, + config_lookup: HashMap::with_hasher(MurmurHasherBuilder {}), + configs: vec![], + conflicting_alts: Default::default(), + dips_into_outer_context: false, + full_ctx, + has_semantic_context: false, + read_only: false, + unique_alt: 0, + hasher: Self::local_hash_key, + } + } + + // for lexerATNConfig + pub fn new_ordered() -> ATNConfigSet { + let mut a = ATNConfigSet::new_base_atnconfig_set(true); + + a.hasher = Self::full_hash_key; + a + } + + fn full_hash_key(config: &ATNConfig) -> Key { + Key::Full(config.clone()) + } + + fn local_hash_key(config: &ATNConfig) -> Key { + let mut hasher = MurmurHasher::default(); + config.get_state().hash(&mut hasher); + config.get_alt().hash(&mut hasher); + config.semantic_context.hash(&mut hasher); + + Key::Partial( + hasher.finish() as i32, + config.get_state(), + config.get_alt(), + config.semantic_context.deref().clone(), + ) + } + + pub fn add_cached( + &mut self, + config: Box, + mut merge_cache: Option<&mut MergeCache>, + ) -> bool { + assert!(!self.read_only); + + if *config.semantic_context != SemanticContext::NONE { + self.has_semantic_context = true + } + + if config.get_reaches_into_outer_context() > 0 { + self.dips_into_outer_context = true + } + + let hasher = self.hasher; + let key = hasher(config.as_ref()); + + if let Some(existing) = self.config_lookup.get(&key) { + let existing = self.configs.get_mut(*existing).unwrap().as_mut(); + let root_is_wildcard = !self.full_ctx; + + let merged = PredictionContext::merge( + existing.get_context().unwrap(), + config.get_context().unwrap(), + root_is_wildcard, + &mut merge_cache, + ); + + existing.set_reaches_into_outer_context(max( + existing.get_reaches_into_outer_context(), + config.get_reaches_into_outer_context(), + )); + + if config.is_precedence_filter_suppressed() { + existing.set_precedence_filter_suppressed(true) + } + + existing.set_context(merged); + } else { + self.config_lookup.insert(key, self.configs.len()); + self.cached_hash = 0; + self.configs.push(config); + } + true + } + + pub fn add(&mut self, config: Box) -> bool { + self.add_cached(config, None) + } + + pub fn get_items(&self) -> impl Iterator { + self.configs.iter().map(|c| c.as_ref()) + } + + pub fn optimize_configs(&mut self, _interpreter: &dyn IATNSimulator) { + if self.configs.is_empty() { + return; + } + + for config in self.configs.iter_mut() { + let mut visited = HashMap::new(); + config.set_context( + _interpreter + .shared_context_cache() + .get_shared_context(config.get_context().unwrap(), &mut visited), + ); + } + } + + pub fn length(&self) -> usize { + self.configs.len() + } + + pub fn is_empty(&self) -> bool { + self.configs.is_empty() + } + + pub fn has_semantic_context(&self) -> bool { + self.has_semantic_context + } + + pub fn set_has_semantic_context(&mut self, _v: bool) { + self.has_semantic_context = _v; + } + + pub fn read_only(&self) -> bool { + self.read_only + } + + pub fn set_read_only(&mut self, _read_only: bool) { + self.read_only = _read_only; + } + + pub fn full_context(&self) -> bool { + self.full_ctx + } + + //duplicate of the self.conflicting_alts??? + pub fn get_alts(&self) -> BitSet { + self.configs.iter().fold(BitSet::new(), |mut acc, c| { + acc.insert(c.get_alt() as usize); + acc + }) + } + + pub fn get_unique_alt(&self) -> isize { + self.unique_alt + } + + pub fn set_unique_alt(&mut self, _v: isize) { + self.unique_alt = _v + } + + pub fn get_dips_into_outer_context(&self) -> bool { + self.dips_into_outer_context + } + + pub fn set_dips_into_outer_context(&mut self, _v: bool) { + self.dips_into_outer_context = _v + } +} diff --git a/runtime/Rust/src/atn_deserialization_options.rs b/runtime/Rust/src/atn_deserialization_options.rs new file mode 100644 index 0000000000..34c8890d5c --- /dev/null +++ b/runtime/Rust/src/atn_deserialization_options.rs @@ -0,0 +1,23 @@ +#[allow(dead_code)] +#[derive(Debug)] +pub struct ATNDeserializationOptions { + read_only: bool, + verify_atn: bool, + generate_rule_bypass_transitions: bool, +} + +impl ATNDeserializationOptions { + pub fn is_verify(&self) -> bool { + self.verify_atn + } +} + +impl Default for ATNDeserializationOptions { + fn default() -> Self { + ATNDeserializationOptions { + read_only: true, + verify_atn: true, + generate_rule_bypass_transitions: false, + } + } +} diff --git a/runtime/Rust/src/atn_deserializer.rs b/runtime/Rust/src/atn_deserializer.rs new file mode 100644 index 0000000000..593da836f4 --- /dev/null +++ b/runtime/Rust/src/atn_deserializer.rs @@ -0,0 +1,614 @@ +use std::str::Chars; +use std::str::FromStr; + +use byteorder::LittleEndian; +use byteorder::WriteBytesExt; +use uuid::Uuid; + +use crate::atn::ATN; +use crate::atn_deserialization_options::ATNDeserializationOptions; +use crate::atn_state::ATNBlockStart; +use crate::atn_state::ATNDecisionState; +use crate::atn_state::ATNState; +use crate::atn_state::ATNStateType; +use crate::atn_state::BaseATNState; +use crate::atn_state::*; +use crate::atn_type::ATNType; +use crate::int_stream::EOF; +use crate::interval_set::IntervalSet; +use crate::lexer_action::LexerAction::*; +use crate::lexer_action::*; +use crate::transition::Transition; +use crate::transition::*; + +lazy_static! { + static ref BASE_SERIALIZED_UUID: Uuid = + Uuid::from_str("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3").unwrap(); + static ref ADDED_PRECEDENCE_TRANSITIONS: Uuid = + Uuid::from_str("1DA0C57D-6C06-438A-9B27-10BCB3CE0F61").unwrap(); + static ref ADDED_LEXER_ACTIONS: Uuid = + Uuid::from_str("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E").unwrap(); + static ref ADDED_UNICODE_SMP: Uuid = + Uuid::from_str("59627784-3BE5-417A-B9EB-8131A7286089").unwrap(); + static ref SUPPORTED_UUIDS: Vec = vec![ + *BASE_SERIALIZED_UUID, + *ADDED_PRECEDENCE_TRANSITIONS, + *ADDED_LEXER_ACTIONS, + *ADDED_UNICODE_SMP, + ]; +} + +const SERIALIZED_VERSION: isize = 3; + +#[derive(Debug)] +pub struct ATNDeserializer { + deserialization_options: ATNDeserializationOptions, +} + +impl ATNDeserializer { + pub fn new(options: Option) -> ATNDeserializer { + ATNDeserializer { + deserialization_options: options.unwrap_or(ATNDeserializationOptions::default()), + } + } + + pub fn deserialize(&self, data: Chars<'_>) -> ATN { + let mut data = data.clone().map(|ch| { + let mut ch = ch as isize; + // decode surrogates + ch = if ch > 0xFFFF { ch - 0x3000 } else { ch }; + ch -= 2; + ch + }); + + self.check_version(data.next().unwrap() + 2); + + let _uuid = self.check_uuid(&mut data); + + let mut atn = self.read_atn(&mut data); + + self.read_states(&mut atn, &mut data); + self.read_rules(&mut atn, &mut data); + self.read_modes(&mut atn, &mut data); + + let mut sets = self.read_sets(&mut atn, &mut data, |data| { + data.next().unwrap() as u16 as isize + }); + + sets.extend(self.read_sets(&mut atn, &mut data, |data| { + (data.next().unwrap() & 0xFFFF) | data.next().unwrap() << 16 + })); + + self.read_edges(&mut atn, &mut data, &sets); + self.read_decisions(&mut atn, &mut data); + if atn.grammar_type == ATNType::LEXER { + self.read_lexer_actions(&mut atn, &mut data); + } + self.mark_precedence_decisions(&mut atn, &mut data); + if self.deserialization_options.is_verify() { + self.verify_atn(&mut atn, &mut data); + } + // TODO parser + // if a.deserializationOptions.generateRuleBypassTransitions && atn.grammarType == ATNTypeParser { + // a.generateRuleBypassTransitions(atn) + // a.verifyATN(atn) + // } + + atn + } + + // fn reset(&self, _data: Vec) { unimplemented!() } + + fn check_version(&self, version: isize) { + if version != self::SERIALIZED_VERSION { + panic!( + "Could not deserialize ATN with version {} (expected {})", + version, SERIALIZED_VERSION + ); + } + } + + fn check_uuid(&self, data: &mut dyn Iterator) -> Uuid { + //rust uses UTF-8 encoding so we need explicitly convert unicode + //codepoint numbers to bytes + let mut bytes = Vec::new(); + for i in data.take(8) { + bytes.write_u16::(i as u16).unwrap(); + } + + bytes.reverse(); + let uuid = Uuid::from_slice(&bytes).unwrap(); + if !SUPPORTED_UUIDS.contains(&uuid) { + panic!("Could not deserialize ATN with UUID {}", uuid) + } + uuid + } + + fn read_atn(&self, data: &mut dyn Iterator) -> ATN { + let atn = ATN::new_atn( + match data.next() { + Some(0) => ATNType::LEXER, + Some(1) => ATNType::PARSER, + _ => panic!("invalid ATN type"), + }, + data.next().unwrap(), + ); + + atn + } + + fn read_states(&self, atn: &mut ATN, data: &mut dyn Iterator) { + // let loop_back_states = Vec::<(BaseATNState,isize)>::new(); + // let end_states = Vec::<(BaseATNState,isize)>::new(); + let states_count = data.next().unwrap() as usize; + for i in 0..states_count { + let state_type = data.next().unwrap(); + if state_type == ATNSTATE_INVALID_STATE_NUMBER { + atn.add_state(self.state_factory(ATNSTATE_INVALID_TYPE, -1, i)); + panic!("why invalid state serialized?"); + } + + let mut rule_index = data.next().unwrap(); + if rule_index == 0xFFFF { + rule_index = -1; + } + let mut state = self.state_factory(state_type, rule_index, i); + + match state.get_state_type_mut() { + ATNStateType::DecisionState { + state: ATNDecisionState::BlockStartState { end_state, .. }, + .. + } => *end_state = data.next().unwrap() as ATNStateRef, + ATNStateType::LoopEndState(loop_back) => { + *loop_back = data.next().unwrap() as ATNStateRef + } + _ => (), + } + atn.add_state(state); + } + + let num_non_greedy = data.next().unwrap(); + //println!("num_non_greedy {}", num_non_greedy); + for _ in 0..num_non_greedy { + let st = data.next().unwrap() as usize; + if let ATNStateType::DecisionState { nongreedy: ng, .. } = + atn.states[st].get_state_type_mut() + { + *ng = true + } + } + + //if (supportsPrecedencePredicates) + if true { + let num_precedence_states = data.next().unwrap(); + for _ in 0..num_precedence_states { + let st = data.next().unwrap() as usize; + if let ATNStateType::RuleStartState { + is_left_recursive: left_rec, + .. + } = atn.states[st].get_state_type_mut() + { + *left_rec = true + } + } + } + } + + fn read_rules(&self, atn: &mut ATN, data: &mut dyn Iterator) { + let nrules = data.next().unwrap() as usize; + // if atn.grammar_type == ATNType::LEXER { + // atn.rule_to_token_type.resize(nrules, 0) + // } + + atn.rule_to_start_state.resize(nrules, 0); + for i in 0..nrules { + let s = data.next().unwrap() as usize; + atn.rule_to_start_state[i] = s; + if atn.grammar_type == ATNType::LEXER { + let token_type = data.next().unwrap(); + + atn.rule_to_token_type.push(token_type); + } + } + //println!("rule_to_token_type {:?}", atn.rule_to_token_type); + //println!("rule_to_start_state {:?}", atn.rule_to_start_state); + + atn.rule_to_stop_state.resize(nrules, 0); + for i in 0..atn.states.len() { + let state = atn.states.get(i).unwrap(); + if let ATNStateType::RuleStopState = state.get_state_type() { + let rule_index = state.get_rule_index(); + atn.rule_to_stop_state[rule_index] = i; + let start_state = atn + .states + .get_mut(atn.rule_to_start_state[rule_index]) + .unwrap(); + if let ATNStateType::RuleStartState { + stop_state: stop, .. + } = start_state.get_state_type_mut() + { + *stop = i + } + } + } + } + + fn read_modes(&self, atn: &mut ATN, data: &mut dyn Iterator) { + let nmodes = data.next().unwrap(); + for _i in 0..nmodes { + atn.mode_to_start_state.push(data.next().unwrap() as usize); + } + } + + fn read_sets>( + &self, + _atn: &mut ATN, + data: &mut T, + read_unicode: fn(&mut T) -> isize, + ) -> Vec { + let nsets = data.next().unwrap(); + let mut sets = Vec::new(); + for _i in 0..nsets { + let intervals = data.next().unwrap(); + + let mut set = IntervalSet::new(); + + // check if contains eof + if data.next().unwrap() != 0 { + set.add_one(-1) + } + + for _ in 0..intervals { + set.add_range(read_unicode(data), read_unicode(data)); + } + sets.push(set); + } + + sets + } + + fn read_edges( + &self, + atn: &mut ATN, + data: &mut dyn Iterator, + sets: &Vec, + ) { + let nedges = data.next().unwrap(); + + for _i in 0..nedges { + let src = data.next().unwrap() as usize; + let trg = data.next().unwrap() as usize; + let ttype = data.next().unwrap(); + let arg1 = data.next().unwrap(); + let arg2 = data.next().unwrap(); + let arg3 = data.next().unwrap(); + + let transition = self.edge_factory(atn, ttype, src, trg, arg1, arg2, arg3, sets); + + atn.states.get_mut(src).unwrap().add_transition(transition); + } + + let mut new_tr = Vec::new(); + for i in &atn.states { + for tr in i.get_transitions() { + match tr.get_serialization_type() { + TransitionType::TRANSITION_RULE => { + // println!("TRANSITION_RULE"); + let tr = tr.as_ref().cast::(); + let target = atn.states.get(tr.get_target()).unwrap(); + + let outermost_prec_return = if let ATNStateType::RuleStartState { + is_left_recursive: true, + .. + } = atn + .states + .get(atn.rule_to_start_state[target.get_rule_index()]) + .unwrap() + .get_state_type() + { + if tr.precedence == 0 { + target.get_rule_index() as isize + } else { + -1 + } + } else { + -1 + }; + + let return_tr = EpsilonTransition { + target: tr.follow_state, + outermost_precedence_return: outermost_prec_return, + }; + new_tr.push(( + atn.rule_to_stop_state[target.get_rule_index()], + Box::new(return_tr), + )); + } + _ => continue, + } + } + } + new_tr + .drain(..) + .for_each(|(state, tr)| atn.states[state].add_transition(tr)); + + for i in 0..atn.states.len() { + let atn_state = atn.states.get(i).unwrap(); + match atn_state.get_state_type() { + ATNStateType::DecisionState { + state: + ATNDecisionState::BlockStartState { + end_state: _, + en: _, + }, + .. + } => { + + // if *end_state == 0 { panic!("invalid state")} + // looks like it is never used during recognition + // todo missed part + } + // ATNStateType::DecisionState {state:ATNDecisionState::PlusLoopBack,..} =>{ + // for tr in atn_state.get_transitions(){ + // if let ATNStateType::DecisionState { + // state:ATNDecisionState::BlockStartState { + // en:ATNBlockStart::PlusBlockStart(loopBack),..},..} + // = atn.states.get_mut(tr.get_target()).unwrap().get_state_type_mut(){ + // *loopBack = i; + // + // } + // } + // } + _x => { /*println!("{:?}",x);*/ } + } + } + } + + fn read_decisions(&self, atn: &mut ATN, _data: &mut dyn Iterator) { + let ndecisions = _data.next().unwrap(); + for i in 0..ndecisions { + let s = _data.next().unwrap() as usize; + let dec_state: &mut Box = atn.states.get_mut(s).unwrap(); + atn.decision_to_state.push(s); + if let ATNStateType::DecisionState { decision, .. } = dec_state.get_state_type_mut() { + *decision = i + } + } + } + + fn read_lexer_actions(&self, atn: &mut ATN, _data: &mut dyn Iterator) { + //lexer actions are always supported here + let nactions = _data.next().unwrap() as usize; + + for _i in 0..nactions { + let action_type = _data.next().unwrap(); + + let mut data1 = _data.next().unwrap(); + if data1 == 0xFFFF { + data1 = -1; + } + let mut data2 = _data.next().unwrap(); + if data2 == 0xFFFF { + data2 = -1; + } + + let lexer_action = self.lexer_action_factory(action_type, data1, data2); + + atn.lexer_actions.push(lexer_action); + } + } + + fn mark_precedence_decisions(&self, _atn: &mut ATN, _data: &mut dyn Iterator) { + let mut precedence_states = Vec::new(); + for state in _atn.states.iter() { + if let ATNStateType::DecisionState { + state: ATNDecisionState::StarLoopEntry { .. }, + .. + } = state.get_state_type() + { + if let ATNStateType::RuleStartState { + is_left_recursive: true, + .. + } = + _atn.states[_atn.rule_to_start_state[state.get_rule_index()]].get_state_type() + { + let maybe_loop_end = + state.get_transitions().iter().last().unwrap().get_target(); + let maybe_loop_end = _atn.states[maybe_loop_end].as_ref(); + if let ATNStateType::LoopEndState(_) = maybe_loop_end.get_state_type() { + if maybe_loop_end.has_epsilon_only_transitions() { + if let ATNStateType::RuleStopState = _atn.states + [maybe_loop_end.get_transitions()[0].get_target()] + .get_state_type() + { + precedence_states.push(state.get_state_number()) + } + } + } + } + } + } + for st in precedence_states { + if let ATNStateType::DecisionState { + state: + ATNDecisionState::StarLoopEntry { + loop_back_state: _, + is_precedence, + }, + .. + } = _atn.states[st].get_state_type_mut() + { + *is_precedence = true + } + } + } + + fn verify_atn(&self, _atn: &mut ATN, _data: &mut dyn Iterator) { + //TODO + } + + // fn check_condition(&self, _condition: bool, _message: String) { unimplemented!() } + + fn edge_factory( + &self, + _atn: &ATN, + type_index: isize, + _src: ATNStateRef, + target: ATNStateRef, + arg1: isize, + arg2: isize, + arg3: isize, + sets: &Vec, + ) -> Box { + // // let target = atn.states.get + // let mut base = BaseTransition { + // target: trg, + // // is_epsilon: false, + // // label: 0, + // interval_set: IntervalSet::new_interval_set(), + // }; + + match type_index { + TRANSITION_EPSILON => Box::new(EpsilonTransition { + target, + outermost_precedence_return: 0, + }), + TRANSITION_RANGE => Box::new(RangeTransition { + target, + start: if arg3 != 0 { + super::token::TOKEN_EOF + } else { + arg1 + }, + stop: arg2, + }), + TRANSITION_RULE => { + // base.set_target(arg1 as usize); + Box::new(RuleTransition { + target: arg1 as usize, + follow_state: target, + rule_index: arg2, + precedence: arg3, + }) + } + TRANSITION_PREDICATE => Box::new(PredicateTransition { + target, + is_ctx_dependent: arg3 != 0, + rule_index: arg1, + pred_index: arg2, + }), + TRANSITION_ATOM => Box::new(AtomTransition { + target, + label: if arg3 != 0 { EOF } else { arg1 }, + }), + TRANSITION_ACTION => Box::new(ActionTransition { + target, + is_ctx_dependent: arg3 != 0, + rule_index: arg1, + action_index: arg2, + pred_index: 0, + }), + TRANSITION_SET => Box::new(SetTransition { + target, + set: sets[arg1 as usize].clone(), + }), + TRANSITION_NOTSET => Box::new(NotSetTransition { + target, + set: sets[arg1 as usize].clone(), + }), + TRANSITION_WILDCARD => Box::new(WildcardTransition { target }), + TRANSITION_PRECEDENCE => Box::new(PrecedencePredicateTransition { + target, + precedence: arg1, + }), + _ => panic!("invalid transition type"), + } + } + + fn state_factory( + &self, + type_index: isize, + rule_index: isize, + state_number: usize, + ) -> Box { + let mut state = BaseATNState::new_base_atnstate(); + state.state_number = state_number; + state.rule_index = rule_index as usize; + state.state_type_id = type_index; + state.state_type = match type_index { + ATNSTATE_INVALID_TYPE => ATNStateType::InvalidState, + ATNSTATE_BASIC => ATNStateType::BasicState, + ATNSTATE_RULE_START => ATNStateType::RuleStartState { + stop_state: 0, + is_left_recursive: false, + }, + ATNSTATE_BLOCK_START => ATNStateType::DecisionState { + decision: -1, + nongreedy: false, + state: ATNDecisionState::BlockStartState { + end_state: 0, + en: ATNBlockStart::BasicBlockStart, + }, + }, + ATNSTATE_PLUS_BLOCK_START => ATNStateType::DecisionState { + decision: -1, + nongreedy: false, + state: ATNDecisionState::BlockStartState { + end_state: 0, + en: ATNBlockStart::PlusBlockStart(0), + }, + }, + ATNSTATE_STAR_BLOCK_START => ATNStateType::DecisionState { + decision: -1, + nongreedy: false, + state: ATNDecisionState::BlockStartState { + end_state: 0, + en: ATNBlockStart::StarBlockStart, + }, + }, + ATNSTATE_TOKEN_START => ATNStateType::DecisionState { + decision: -1, + nongreedy: false, + state: ATNDecisionState::TokenStartState, + }, + ATNSTATE_RULE_STOP => ATNStateType::RuleStopState, + ATNSTATE_BLOCK_END => ATNStateType::BlockEndState(0), + ATNSTATE_STAR_LOOP_BACK => ATNStateType::StarLoopbackState, + ATNSTATE_STAR_LOOP_ENTRY => ATNStateType::DecisionState { + decision: -1, + nongreedy: false, + state: ATNDecisionState::StarLoopEntry { + loop_back_state: 0, + is_precedence: false, + }, + }, + ATNSTATE_PLUS_LOOP_BACK => ATNStateType::DecisionState { + decision: -1, + nongreedy: false, + state: ATNDecisionState::PlusLoopBack, + }, + ATNSTATE_LOOP_END => ATNStateType::LoopEndState(0), + + _ => panic!("invalid ATN state type"), + }; + // println!("created state {} {:?}", state_number, state.state_type); + Box::new(state) + } + + fn lexer_action_factory(&self, action_type: isize, data1: isize, data2: isize) -> LexerAction { + match action_type { + LEXER_ACTION_TYPE_CHANNEL => LexerChannelAction(data1), + LEXER_ACTION_TYPE_CUSTOM => LexerCustomAction { + rule_index: data1, + action_index: data2, + }, + LEXER_ACTION_TYPE_MODE => LexerModeAction(data1), + LEXER_ACTION_TYPE_MORE => LexerMoreAction, + LEXER_ACTION_TYPE_POP_MODE => LexerPopModeAction, + LEXER_ACTION_TYPE_PUSH_MODE => LexerPushModeAction(data1), + LEXER_ACTION_TYPE_SKIP => LexerSkipAction, + LEXER_ACTION_TYPE_TYPE => LexerTypeAction(data1), + _ => panic!("invalid action type {}", action_type), + } + } +} diff --git a/runtime/Rust/src/atn_simulator.rs b/runtime/Rust/src/atn_simulator.rs new file mode 100644 index 0000000000..9fae27a60c --- /dev/null +++ b/runtime/Rust/src/atn_simulator.rs @@ -0,0 +1,54 @@ +use std::cell::RefCell; +use std::fmt::{Debug, Error, Formatter}; +use std::ops::Deref; +use std::rc::Rc; + +use crate::atn::ATN; +use crate::dfa::DFA; +use crate::prediction_context::PredictionContextCache; + +pub trait IATNSimulator { + fn shared_context_cache(&self) -> &PredictionContextCache; + fn atn(&self) -> &ATN; + fn decision_to_dfa(&self) -> &Vec>; +} + +pub struct BaseATNSimulator { + pub atn: Rc, + pub shared_context_cache: Rc, + pub decision_to_dfa: Rc>>, +} + +impl Debug for BaseATNSimulator { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + f.write_str("BaseATNSimulator { .. }") + } +} + +impl BaseATNSimulator { + pub fn new_base_atnsimulator( + atn: Rc, + decision_to_dfa: Rc>>, + shared_context_cache: Rc, + ) -> BaseATNSimulator { + BaseATNSimulator { + atn, + shared_context_cache, + decision_to_dfa, + } + } +} + +impl IATNSimulator for BaseATNSimulator { + fn shared_context_cache(&self) -> &PredictionContextCache { + self.shared_context_cache.deref() + } + + fn atn(&self) -> &ATN { + self.atn.as_ref() + } + + fn decision_to_dfa(&self) -> &Vec> { + self.decision_to_dfa.as_ref() + } +} diff --git a/runtime/Rust/src/atn_state.rs b/runtime/Rust/src/atn_state.rs new file mode 100644 index 0000000000..2e6a12508a --- /dev/null +++ b/runtime/Rust/src/atn_state.rs @@ -0,0 +1,443 @@ +use std::fmt::Debug; + +use once_cell::sync::OnceCell; + +use crate::interval_set::IntervalSet; +use crate::transition::Transition; + +pub(crate) const ATNSTATE_INVALID_TYPE: isize = 0; +pub(crate) const ATNSTATE_BASIC: isize = 1; +pub(crate) const ATNSTATE_RULE_START: isize = 2; +pub(crate) const ATNSTATE_BLOCK_START: isize = 3; +pub(crate) const ATNSTATE_PLUS_BLOCK_START: isize = 4; +pub(crate) const ATNSTATE_STAR_BLOCK_START: isize = 5; +pub(crate) const ATNSTATE_TOKEN_START: isize = 6; +pub(crate) const ATNSTATE_RULE_STOP: isize = 7; +pub(crate) const ATNSTATE_BLOCK_END: isize = 8; +pub(crate) const ATNSTATE_STAR_LOOP_BACK: isize = 9; +pub(crate) const ATNSTATE_STAR_LOOP_ENTRY: isize = 10; +pub(crate) const ATNSTATE_PLUS_LOOP_BACK: isize = 11; +pub(crate) const ATNSTATE_LOOP_END: isize = 12; +pub(crate) const ATNSTATE_INVALID_STATE_NUMBER: isize = -1; + +//might be changed later +#[doc(hidden)] +#[derive(Debug, Eq, PartialEq)] +pub enum ATNStateType { + RuleStartState { + stop_state: ATNStateRef, + is_left_recursive: bool, + }, + RuleStopState, + BlockEndState(ATNStateRef), + LoopEndState(ATNStateRef), + StarLoopbackState, + BasicState, + DecisionState { + decision: isize, + nongreedy: bool, + state: ATNDecisionState, + }, + InvalidState, +} + +#[doc(hidden)] +#[derive(Debug, Eq, PartialEq)] +pub enum ATNDecisionState { + StarLoopEntry { + loop_back_state: ATNStateRef, + is_precedence: bool, + }, + TokenStartState, + PlusLoopBack, + BlockStartState { + end_state: ATNStateRef, + en: ATNBlockStart, + }, +} + +#[doc(hidden)] +#[derive(Debug, Eq, PartialEq)] +pub enum ATNBlockStart { + BasicBlockStart, + StarBlockStart, + PlusBlockStart(ATNStateRef), +} + +pub type ATNStateRef = usize; + +// todo no need for trait here, it is too slow for hot code +pub trait ATNState: Sync + Send + Debug { + fn has_epsilon_only_transitions(&self) -> bool; + + fn get_rule_index(&self) -> usize; + fn set_rule_index(&self, v: usize); + + fn get_next_tokens_within_rule(&self) -> &OnceCell; + // fn set_next_token_within_rule(&mut self, v: IntervalSet); + + fn get_state_type(&self) -> &ATNStateType; + fn get_state_type_mut(&mut self) -> &mut ATNStateType; + + fn get_state_type_id(&self) -> isize; + + fn get_state_number(&self) -> usize; + fn set_state_number(&self, state_number: isize); + + fn get_transitions(&self) -> &Vec>; + fn set_transitions(&self, t: Vec>); + fn add_transition(&mut self, trans: Box); +} + +#[derive(Debug)] +pub struct BaseATNState { + next_tokens_within_rule: OnceCell, + + // atn: Box, + epsilon_only_transitions: bool, + + pub rule_index: usize, + + pub state_number: usize, + + pub state_type_id: isize, + + pub state_type: ATNStateType, + + transitions: Vec>, +} + +impl BaseATNState { + pub fn new_base_atnstate() -> BaseATNState { + BaseATNState { + next_tokens_within_rule: OnceCell::new(), + epsilon_only_transitions: false, + rule_index: 0, + state_number: 0, + state_type_id: 0, + state_type: ATNStateType::InvalidState, + transitions: Vec::new(), + } + } +} + +impl ATNState for BaseATNState { + fn has_epsilon_only_transitions(&self) -> bool { + self.epsilon_only_transitions + } + fn get_rule_index(&self) -> usize { + self.rule_index + } + + fn set_rule_index(&self, _v: usize) { + unimplemented!() + } + + fn get_next_tokens_within_rule(&self) -> &OnceCell { + &self.next_tokens_within_rule + } + + fn get_state_type(&self) -> &ATNStateType { + &self.state_type + } + + fn get_state_type_mut(&mut self) -> &mut ATNStateType { + &mut self.state_type + } + + fn get_state_type_id(&self) -> isize { + self.state_type_id + } + + fn get_state_number(&self) -> usize { + self.state_number + } + + fn set_state_number(&self, _state_number: isize) { + unimplemented!() + } + + fn get_transitions(&self) -> &Vec> { + &self.transitions + } + + fn set_transitions(&self, _t: Vec>) { + unimplemented!() + } + + fn add_transition(&mut self, trans: Box) { + if self.transitions.is_empty() { + self.epsilon_only_transitions = trans.is_epsilon() + } else { + self.epsilon_only_transitions &= trans.is_epsilon() + } + + let mut already_present = false; + for existing in self.transitions.iter() { + if existing.get_target() == trans.get_target() { + if existing.get_label().is_some() + && trans.get_label().is_some() + && existing.get_label() == trans.get_label() + { + already_present = true; + break; + } else if existing.is_epsilon() && trans.is_epsilon() { + already_present = true; + break; + } + } + } + if !already_present { + self.transitions.push(trans); + } + } +} +//pub struct BasicState { +// base: BaseATNState, +//} +// +//fn new_basic_state() -> BasicState { unimplemented!() } +// +//pub trait DecisionState:ATNState { +// +// fn get_decision(&self) -> isize; +// fn set_decision(&self, b: isize); +// +// fn get_non_greedy(&self) -> bool; +// fn set_non_greedy(&self, b: bool); +//} +// +//pub struct BaseDecisionState { +// base: BaseATNState, +// decision: isize, +// non_greedy: bool, +//} + +// +//fn new_base_decision_state() -> BaseDecisionState { unimplemented!() } +//impl DecisionState for BaseDecisionState { +// fn get_decision(&self) -> isize { unimplemented!() } +// +// fn set_decision(&self, b: isize) { unimplemented!() } +// +// fn get_non_greedy(&self) -> bool { unimplemented!() } +// +// fn set_non_greedy(&self, b: bool) { unimplemented!() } +//} +// +//impl ATNState for BaseDecisionState{ +// fn get_epsilon_only_transitions(&self) -> bool { +// self.base.get_epsilon_only_transitions() +// } +// +// fn get_rule_index(&self) -> isize { +// self.base.get_rule_index() +// } +// +// fn set_rule_index(&self, v: isize) { +// self.base.set_rule_index(v) +// } +// +// fn get_next_token_within_rule(&self) -> IntervalSet { +// self.base.get_next_token_within_rule() +// } +// +// fn set_next_token_within_rule(&self, v: IntervalSet) { +// self.base.set_next_token_within_rule(v) +// } +// +// fn get_atn(&self) -> Rc { +// self.base.get_atn() +// } +// +// fn set_atn(&self, atn: Box) { +// self.base.set_atn(atn) +// } +// +// fn get_state_type(&self) -> &ATNStateType { +// self.base.get_state_type() +// } +// +// fn get_state_number(&self) -> isize { +// self.base.get_state_number() +// } +// +// fn set_state_number(&self, stateNumber: isize) { +// self.base.set_state_number(stateNumber) +// } +// +// fn get_transitions(&self) -> Vec<&Transition> { +// self.base.get_transitions() +// } +// +// fn set_transitions(&self, t: Vec>) { +// self.base.set_transitions(t) +// } +// +// fn add_transition(&self, trans: Box, index: isize) { +// self.base.add_transition(trans, index) +// } +//} +//pub trait BlockStartState :DecisionState{ +// +// fn get_end_state(&self) -> &BlockEndState; +// fn set_end_state(&self, b: Box); +//} +// +//pub struct BaseBlockStartState { +// base: BaseDecisionState, +// end_state: Box, +//} +// +//fn new_block_start_state() -> BaseBlockStartState { unimplemented!() } +// +//impl BlockStartState for BaseBlockStartState { +// fn get_end_state(&self) -> &BlockEndState { unimplemented!() } +// +// fn set_end_state(&self, b: Box) { unimplemented!() } +//} +// +//impl DecisionState for BaseBlockStartState{ +// fn get_decision(&self) -> isize { +// self.base.get_decision() +// } +// +// fn set_decision(&self, b: isize) { +// self.base.set_decision(b) +// } +// +// fn get_non_greedy(&self) -> bool { +// self.base.get_non_greedy() +// } +// +// fn set_non_greedy(&self, b: bool) { +// self.base.set_non_greedy(b) +// } +//} +// +//impl ATNState for BaseBlockStartState{ +// fn get_epsilon_only_transitions(&self) -> bool { +// self.base.get_epsilon_only_transitions() +// } +// +// fn get_rule_index(&self) -> isize { +// self.base.get_rule_index() +// } +// +// fn set_rule_index(&self, v: isize) { +// self.base.set_rule_index(v) +// } +// +// fn get_next_token_within_rule(&self) -> IntervalSet { +// self.base.get_next_token_within_rule() +// } +// +// fn set_next_token_within_rule(&self, v: IntervalSet) { +// self.base.set_next_token_within_rule(v) +// } +// +// fn get_atn(&self) -> Rc { +// self.base.get_atn() +// } +// +// fn set_atn(&self, atn: Box) { +// self.base.set_atn(atn) +// } +// +// fn get_state_type(&self) -> &ATNStateType { +// self.base.get_state_type() +// } +// +// fn get_state_number(&self) -> isize { +// self.base.get_state_number() +// } +// +// fn set_state_number(&self, stateNumber: isize) { +// self.base.set_state_number(stateNumber) +// } +// +// fn get_transitions(&self) -> Vec<&Transition> { +// self.base.get_transitions() +// } +// +// fn set_transitions(&self, t: Vec>) { +// self.base.set_transitions(t) +// } +// +// fn add_transition(&self, trans: Box, index: isize) { +// self.base.add_transition(trans, index) +// } +//} +// +//pub struct BasicBlockStartState { +// base: BaseBlockStartState, +//} +// +//fn new_basic_block_start_state() -> BasicBlockStartState { unimplemented!() } +// +//pub struct BlockEndState { +// base: BaseATNState, +// start_state: Box, +//} +// +//fn new_block_end_state() -> BlockEndState { unimplemented!() } +// +//pub struct RuleStopState { +// base: BaseATNState, +//} +// +//fn new_rule_stop_state() -> RuleStopState { unimplemented!() } +// +//pub struct RuleStartState { +// base: BaseATNState, +// stop_state: Box, +// is_precedence_rule: bool, +//} +// +//fn new_rule_start_state() -> RuleStartState { unimplemented!() } +// +//pub struct PlusLoopbackState { +// base: BaseDecisionState, +//} +// +//fn new_plus_loopback_state() -> PlusLoopbackState { unimplemented!() } +// +//pub struct PlusBlockStartState { +// base: BaseBlockStartState, +// loop_back_state: Box, +//} +// +//fn new_plus_block_start_state() -> PlusBlockStartState { unimplemented!() } +// +//pub struct StarBlockStartState { +// base: BaseBlockStartState, +//} +// +//fn new_star_block_start_state() -> StarBlockStartState { unimplemented!() } +// +//pub struct StarLoopbackState { +// base: BaseATNState, +//} +// +//fn new_star_loopback_state() -> StarLoopbackState { unimplemented!() } +// +//pub struct StarLoopEntryState { +// base: BaseDecisionState, +// loop_back_state: Box, +// precedence_rule_decision: bool, +//} +// +//fn new_star_loop_entry_state() -> StarLoopEntryState { unimplemented!() } +// +//pub struct LoopEndState { +// base: BaseATNState, +// loop_back_state: Box, +//} +// +//fn new_loop_end_state() -> LoopEndState { unimplemented!() } +// +//pub struct TokensStartState { +// base: BaseDecisionState, +//} +// +//fn new_tokens_start_state() -> TokensStartState { unimplemented!() } diff --git a/runtime/Rust/src/atn_type.rs b/runtime/Rust/src/atn_type.rs new file mode 100644 index 0000000000..e379b81f8f --- /dev/null +++ b/runtime/Rust/src/atn_type.rs @@ -0,0 +1,6 @@ +#[doc(hidden)] +#[derive(Eq, PartialEq, Debug)] +pub enum ATNType { + LEXER = 0, + PARSER, +} diff --git a/runtime/Rust/src/char_stream.rs b/runtime/Rust/src/char_stream.rs new file mode 100644 index 0000000000..d93fc38bcc --- /dev/null +++ b/runtime/Rust/src/char_stream.rs @@ -0,0 +1,163 @@ +//! `IntStream` extension for Lexer that allows subslicing of underlying data +use std::char::REPLACEMENT_CHARACTER; +use std::convert::TryFrom; +use std::fmt::Debug; +use std::ops::{Index, Range, RangeFrom}; + +use crate::int_stream::IntStream; + +/// Provides underlying data for Tokens. +pub trait CharStream: IntStream { + /// Returns underlying data piece, either slice or owned copy. + /// Panics if provided indexes are invalid + /// Called by parser only on token intervals. + /// This fact can be used by custom implementations + fn get_text(&self, a: isize, b: isize) -> Data; +} + +/// Trait for input that can be accepted by `InputStream` to be able to provide lexer with data. +/// Public for implementation reasons. +pub trait InputData: + Index, Output = Self> + + Index, Output = Self> + + ToOwned + + Debug + + 'static +{ + // fn to_indexed_vec(&self) -> Vec<(u32, u32)>; + + #[doc(hidden)] + fn offset(&self, index: isize, item_offset: isize) -> Option; + + #[doc(hidden)] + fn item(&self, index: isize) -> Option; + + #[doc(hidden)] + fn len(&self) -> usize; + + #[doc(hidden)] + fn from_text(text: &str) -> Self::Owned; + + #[doc(hidden)] + fn to_display(&self) -> String; +} + +impl + From + TryFrom + Copy + Debug + 'static> InputData for [T] +where + >::Error: Debug, +{ + // fn to_indexed_vec(&self) -> Vec<(u32, u32)> { + // self.into_iter() + // .enumerate() + // .map(|(x, &y)| (x as u32, y.into())) + // .collect() + // } + + #[inline] + fn offset(&self, index: isize, item_offset: isize) -> Option { + let new_index = index + item_offset; + if new_index < 0 { + return None; // invalid; no char before first char + } + if new_index > self.len() as isize { + return None; + } + + Some(new_index) + } + + #[inline] + fn item(&self, index: isize) -> Option { + self.get(index as usize).map(|&it| it.into() as isize) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + #[inline] + fn from_text(text: &str) -> Self::Owned { + text.chars() + .map(|it| T::try_from(it as u32).unwrap()) + .collect() + } + + #[inline] + // default + fn to_display(&self) -> String { + self.iter() + .map(|x| char::try_from((*x).into()).unwrap_or(REPLACEMENT_CHARACTER)) + .collect() + } +} +// +// impl InputData for [u8] { +// #[inline] +// fn to_display(&self) -> String { String::from_utf8_lossy(self).into_owned() } +// } + +// impl InputData for [u16] { +// } +// +// impl InputData for [u32] { +// #[inline] +// fn to_display(&self) -> String { +// self.iter() +// .map(|x| char::try_from(*x).unwrap_or(REPLACEMENT_CHARACTER)) +// .collect() +// } +// } + +impl InputData for str { + // fn to_indexed_vec(&self) -> Vec<(u32, u32)> { + // self.char_indices() + // .map(|(i, ch)| (i as u32, ch as u32)) + // .collect() + // } + + #[inline] + fn offset(&self, mut index: isize, mut item_offset: isize) -> Option { + if item_offset == 0 { + return Some(index); + } + let direction = item_offset.signum(); + + while { + index += direction; + if index < 0 || index > self.len() as isize { + return None; + } + if self.is_char_boundary(index as usize) { + item_offset -= direction; + } + item_offset != 0 + } {} + + Some(index) + } + + #[inline] + fn item(&self, index: isize) -> Option { + self.get(index as usize..) + .and_then(|it| it.chars().next()) + .map(|it| it as isize) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + fn from_text(text: &str) -> Self::Owned { + text.to_owned() + } + + // #[inline] + // fn from_text(text: &str) -> Self::Owned { text.to_owned() } + + #[inline] + fn to_display(&self) -> String { + self.to_string() + } +} diff --git a/runtime/Rust/src/common_token_stream.rs b/runtime/Rust/src/common_token_stream.rs new file mode 100644 index 0000000000..e18a3c4a78 --- /dev/null +++ b/runtime/Rust/src/common_token_stream.rs @@ -0,0 +1,232 @@ +//! Channel based `TokenStream` +use std::borrow::Borrow; + +use crate::int_stream::{IntStream, IterWrapper, EOF}; +use crate::token::{Token, TOKEN_DEFAULT_CHANNEL, TOKEN_INVALID_TYPE}; +use crate::token_factory::TokenFactory; +use crate::token_source::TokenSource; +use crate::token_stream::{TokenStream, UnbufferedTokenStream}; + +/// Default token stream that skips token that not correspond to current channel. +#[derive(Debug)] +pub struct CommonTokenStream<'input, T: TokenSource<'input>> { + base: UnbufferedTokenStream<'input, T>, + channel: isize, +} + +better_any::tid! { impl<'input,T> TidAble<'input> for CommonTokenStream<'input, T> where T: TokenSource<'input>} + +impl<'input, T: TokenSource<'input>> IntStream for CommonTokenStream<'input, T> { + #[inline] + fn consume(&mut self) { + self.base.consume(); + // self.base.p = self.next_token_on_channel(self.base.p,self.channel); + // self.base.current_token_index = self.base.p; + let next = self.next_token_on_channel(self.base.p, self.channel, 1); + self.base.seek(next); + // Ok(()) + } + + #[inline] + fn la(&mut self, i: isize) -> isize { + self.lt(i) + .map(|t| t.borrow().get_token_type()) + .unwrap_or(TOKEN_INVALID_TYPE) + } + + #[inline(always)] + fn mark(&mut self) -> isize { + 0 + } + + #[inline(always)] + fn release(&mut self, _marker: isize) {} + + #[inline(always)] + fn index(&self) -> isize { + self.base.index() + } + + #[inline(always)] + fn seek(&mut self, index: isize) { + self.base.seek(index); + } + + #[inline(always)] + fn size(&self) -> isize { + self.base.size() + } + + fn get_source_name(&self) -> String { + self.base.get_source_name() + } +} + +impl<'input, T: TokenSource<'input>> TokenStream<'input> for CommonTokenStream<'input, T> { + type TF = T::TF; + + #[inline(always)] + fn lt(&mut self, k: isize) -> Option<&>::Tok> { + if k == 1 { + return self.base.tokens.get(self.base.p as usize); + } + if k == 0 { + panic!(); + } + if k < 0 { + return self.lb(-k); + } + self.lt_inner(k) + } + + #[inline] + fn get(&self, index: isize) -> &>::Tok { + self.base.get(index) + } + + fn get_token_source(&self) -> &dyn TokenSource<'input, TF = Self::TF> { + self.base.get_token_source() + } + + fn get_text_from_interval(&self, start: isize, stop: isize) -> String { + self.base.get_text_from_interval(start, stop) + } +} + +impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> { + /// Creates CommonTokenStream that produces tokens from `TOKEN_DEFAULT_CHANNEL` + pub fn new(lexer: T) -> CommonTokenStream<'input, T> { + Self::with_channel(lexer, TOKEN_DEFAULT_CHANNEL) + } + + /// Creates CommonTokenStream that produces tokens from `channel` + pub fn with_channel(lexer: T, channel: isize) -> CommonTokenStream<'input, T> { + let mut r = CommonTokenStream { + base: UnbufferedTokenStream::new_buffered(lexer), + channel, + }; + let i = r.next_token_on_channel(0, channel, 1); + r.base.seek(i); + r + } + + fn lt_inner(&mut self, k: isize) -> Option<&>::Tok> { + let mut i = self.base.p; + let mut n = 1; // we know tokens[p] is a good one + // find k good tokens + while n < k { + // skip off-channel tokens, but make sure to not look past EOF + if self.sync(i + 1) { + i = self.next_token_on_channel(i + 1, self.channel, 1); + } + n += 1; + } + // if ( i>range ) range = i; + return self.base.tokens.get(i as usize); + } + + /// Restarts this token stream + pub fn reset(&mut self) { + self.base.p = 0; + self.base.current_token_index = 0; + } + + /// Creates iterator over this token stream + pub fn iter(&mut self) -> IterWrapper<'_, Self> { + IterWrapper(self) + } + + fn sync(&mut self, i: isize) -> bool { + let need = i - self.size() + 1; + if need > 0 { + let fetched = self.base.fill(need); + return fetched >= need; + } + + true + } + // + // fn fetch(&self, n: isize) -> int { unimplemented!() } + // + // fn get_tokens(&self, start: isize, stop: isize, types: &IntervalSet) -> Vec { unimplemented!() } + // + // fn lazy_init(&self) { unimplemented!() } + // + // fn setup(&self) { unimplemented!() } + // + // fn get_token_source(&self) -> TokenSource { unimplemented!() } + // + // fn set_token_source(&self, tokenSource: TokenSource) { unimplemented!() } + + //todo make this const generic over direction + fn next_token_on_channel(&mut self, mut i: isize, channel: isize, direction: isize) -> isize { + self.sync(i); + if i >= self.size() { + return self.size() - 1; + } + + let mut token = self.base.tokens[i as usize].borrow(); + while token.get_channel() != channel { + if token.get_token_type() == EOF || i < 0 { + return i; + } + + i += direction; + self.sync(i); + if i >= 0 { + token = self.base.tokens[i as usize].borrow(); + } else { + return i; + } + } + + return i; + } + // + // fn previous_token_on_channel(&self, i: isize, channel: isize) -> int { unimplemented!() } + // + // fn get_hidden_tokens_to_right(&self, tokenIndex: isize, channel: isize) -> Vec { unimplemented!() } + // + // fn get_hidden_tokens_to_left(&self, tokenIndex: isize, channel: isize) -> Vec { unimplemented!() } + // + // fn filter_for_channel(&self, left: isize, right: isize, channel: isize) -> Vec { unimplemented!() } + // + // fn get_source_name(&self) -> String { unimplemented!() } + // + // fn get_all_text(&self) -> String { unimplemented!() } + // + // fn get_text_from_tokens(&self, start: Token, end: Token) -> String { unimplemented!() } + // + // fn get_text_from_rule_context(&self, interval: RuleContext) -> String { unimplemented!() } + // + // fn get_text_from_interval(&self, interval: &Interval) -> String { unimplemented!() } + // + // fn fill(&self) { unimplemented!() } + // + // fn adjust_seek_index(&self, i: isize) -> int { unimplemented!() } + + fn lb( + &mut self, + k: isize, + ) -> Option<&<>::TF as TokenFactory<'input>>::Tok> { + if k == 0 || (self.base.p - k) < 0 { + return None; + } + + let mut i = self.base.p; + let mut n = 1; + // find k good tokens looking backwards + while n <= k && i > 0 { + // skip off-channel tokens + i = self.next_token_on_channel(i - 1, self.channel, -1); + n += 1; + } + if i < 0 { + return None; + } + + return self.base.tokens.get(i as usize); + } + + // fn get_number_of_on_channel_tokens(&self) -> int { unimplemented!() } +} diff --git a/runtime/Rust/src/context_factory.rs b/runtime/Rust/src/context_factory.rs new file mode 100644 index 0000000000..e18f4174b8 --- /dev/null +++ b/runtime/Rust/src/context_factory.rs @@ -0,0 +1,203 @@ +use std::cell::{Ref, RefCell, RefMut}; +use std::marker::PhantomData; +use std::ops::{CoerceUnsized, Deref, DerefMut}; +use std::rc::Rc; + +use better_any::TidExt; +use qcell::{TLCell, TLCellOwner}; +use typed_arena::Arena; + +use crate::parser_rule_context::ParserRuleContext; + +trait ContextFactory<'a, T: ?Sized> { + type CtxRef; + type Ref: Deref + 'a; + type RefMut: DerefMut + 'a; + + fn new(&mut self, inner: T) -> Self::CtxRef + where + T: Sized; + + fn borrow(&'a self, this: &'a Self::CtxRef) -> Self::Ref; + fn borrow_mut(&'a mut self, this: &'a mut Self::CtxRef) -> Self::RefMut; +} + +struct RcFactory; + +impl<'a, T: 'a + ?Sized> ContextFactory<'a, T> for RcFactory { + type CtxRef = Rc; + type Ref = &'a T; + type RefMut = &'a mut T; + + fn new(&mut self, inner: T) -> Self::CtxRef + where + T: Sized, + { + Rc::new(inner) + } + + fn borrow(&'a self, this: &'a Self::CtxRef) -> Self::Ref { &*this } + + fn borrow_mut(&'a mut self, this: &'a mut Self::CtxRef) -> Self::RefMut { + unsafe { Rc::get_mut_unchecked(this) } + } +} + +struct RefCellFactory { + arena: Arena>>, +} + +impl<'a, 'this, T, Dyn> ContextFactory<'a, T> for &'this RefCellFactory +where + T: 'this + 'a + CoerceUnsized + ?Sized, + Dyn: 'this + 'a + ?Sized, + Box>: CoerceUnsized>>, +{ + type CtxRef = &'this RefCell; + type Ref = Ref<'a, T>; + type RefMut = RefMut<'a, T>; + + fn new(&mut self, inner: T) -> Self::CtxRef + where + T: Sized, + { + let val = Box::new(RefCell::new(inner)) as Box>; + let res = self.arena.alloc(val).as_mut(); + unsafe { &*(res as *mut RefCell as *mut RefCell) } + } + + fn borrow(&'a self, this: &'a Self::CtxRef) -> Self::Ref { RefCell::borrow(this) } + + fn borrow_mut(&'a mut self, this: &'a mut Self::CtxRef) -> Self::RefMut { + RefCell::borrow_mut(this) + } +} + +/// index that saves type info to downcast back without checks +struct Id { + idx: usize, + phantom: PhantomData>, +} + +struct IdFactory { + arena: Vec>, +} + +impl<'a, T, Dyn> ContextFactory<'a, T> for IdFactory +where + T: 'a + CoerceUnsized + ?Sized, + Dyn: 'a + ?Sized, + Box: CoerceUnsized>, +{ + type CtxRef = Id; + type Ref = &'a T; + type RefMut = &'a mut T; + + fn new(&mut self, inner: T) -> Self::CtxRef + where + T: Sized, + { + let b = Box::new(inner); + self.arena.push(b as _); + Id { + idx: self.arena.len() - 1, + phantom: Default::default(), + } + } + + fn borrow(&'a self, this: &'a Self::CtxRef) -> Self::Ref { + let this = &*self.arena[this.idx]; + // safe because we know that T:CoerceUnsized + unsafe { std::mem::transmute_copy::<&Dyn, &T>(&this) } + } + + fn borrow_mut(&'a mut self, this: &'a mut Self::CtxRef) -> Self::RefMut { + let this = &mut *self.arena[this.idx]; + unsafe { std::mem::transmute_copy::<&mut Dyn, &mut T>(&this) } + } +} + +struct Owner; + +struct QCellArena { + arena: Arena>>, + guard: TLCellOwner, +} + +impl<'a, 'this, T, Dyn> ContextFactory<'a, T> for &'this mut QCellArena +where + T: 'a + 'this + CoerceUnsized + ?Sized, + Dyn: 'a + 'this + ?Sized, + Box>: CoerceUnsized>>, +{ + type CtxRef = &'this TLCell; + type Ref = &'a T; + type RefMut = &'a mut T; + + fn new(&mut self, inner: T) -> Self::CtxRef + where + T: Sized, + { + let t = Box::new(self.guard.cell(inner)); + let r = &**self.arena.alloc(t); + unsafe { &*(r as *const _ as *const _) } + } + + fn borrow(&'a self, this: &'a Self::CtxRef) -> Self::Ref { self.guard.ro(this) } + + fn borrow_mut(&'a mut self, this: &'a mut Self::CtxRef) -> Self::RefMut { self.guard.rw(this) } +} + +trait Cast { + type WrappedT: CoerceUnsized; + type WrappedSelf; + fn downcast(this: Self::WrappedSelf) -> Self::WrappedT; +} + +impl<'i, T, Y: ?Sized> Cast for Y +where + Y: ParserRuleContext<'i>, + T: ParserRuleContext<'i>, + Rc: CoerceUnsized>, +{ + type WrappedT = Rc; + type WrappedSelf = Rc; + + fn downcast(this: Self::WrappedSelf) -> Self::WrappedT { this.downcast_rc().unwrap() } +} + +// +// trait Downcast<'i, Owner>: CoerceUnsized { +// type Inner: ParserRuleContext<'i, Ctx = Self::Dyn> + ?Sized; +// type DynRef; +// type Dyn; +// fn downcast<'x>(from: Self::DynRef, owner: Owner) -> Option; +// } +// +// impl<'i, T: ParserRuleContext<'i> + ?Sized, Owner, U> Downcast for Rc +// where +// Rc: CoerceUnsized>, +// { +// type Inner = T; +// type DynRef = Rc; +// type Dyn = T::Ctx; +// +// fn downcast<'x>(from: Rc, owner: Owner) -> Option { +// if from.self_id() == T::id() { +// Some(unsafe { Rc::from_raw(Rc::into_raw(from) as *const _) }) +// } else { +// None +// } +// } +// } +// +// impl<'i, T: ParserRuleContext<'i> + ?Sized, Owner, U> Downcast for &RefCell +// where +// Rc: CoerceUnsized>, +// { +// type Inner = T; +// type DynRef = Rc; +// type Dyn = T::Ctx; +// +// fn downcast<'x>(from: Self::DynRef, owner: Owner) -> Option { unimplemented!() } +// } diff --git a/runtime/Rust/src/dfa.rs b/runtime/Rust/src/dfa.rs new file mode 100644 index 0000000000..0ecf0eb3af --- /dev/null +++ b/runtime/Rust/src/dfa.rs @@ -0,0 +1,168 @@ +use std::collections::HashMap; +use std::convert::TryFrom; + +use std::rc::Rc; + +use crate::atn::ATN; +use crate::atn_config_set::ATNConfigSet; +use crate::atn_state::{ATNDecisionState, ATNStateRef, ATNStateType}; +use crate::dfa_serializer::DFASerializer; +use crate::dfa_state::{DFAState, DFAStateRef}; +use crate::vocabulary::Vocabulary; + +///Helper trait for scope management and temporary values not living long enough +pub(crate) trait ScopeExt: Sized { + fn convert_with T>(self, f: F) -> T { + f(self) + } + fn run T>(&self, f: F) -> T { + f(self) + } + + //apply + fn modify_with(mut self, f: F) -> Self { + f(&mut self); + self + } + //apply_inplace + fn apply(&mut self, f: F) -> &mut Self { + f(self); + self + } + + fn drop(self) {} +} + +impl ScopeExt for Any {} + +#[derive(Debug)] +pub struct DFA { + /// ATN state from which this DFA creation was started from + pub atn_start_state: ATNStateRef, + + pub decision: isize, + + /// Set of all dfa states. + pub states: Vec, + + // for faster duplicate search + // TODO i think DFAState.edges can contain references to its elements + pub(crate) states_map: HashMap>, + // states_mu sync.RWMutex + /// Initial DFA state + pub s0: Option, + // s0_mu sync.RWMutex + is_precedence_dfa: bool, +} + +impl DFA { + pub fn new(atn: Rc, atn_start_state: ATNStateRef, decision: isize) -> DFA { + let mut dfa = DFA { + atn_start_state, + decision, + states: Default::default(), + // states_map: RefCell::new(HashMap::new()), + states_map: Default::default(), + s0: Default::default(), + is_precedence_dfa: false, + }; + + // to indicate null + dfa.states.push(DFAState::new_dfastate( + usize::max_value(), + Box::new(ATNConfigSet::new_base_atnconfig_set(true)), + )); + if let ATNStateType::DecisionState { + state: + ATNDecisionState::StarLoopEntry { + is_precedence: true, + .. + }, + .. + } = atn.states[atn_start_state].get_state_type() + { + dfa.is_precedence_dfa = true; + let mut precedence_state = DFAState::new_dfastate( + dfa.states.len(), + Box::new(ATNConfigSet::new_base_atnconfig_set(true)), + ); + precedence_state.edges = vec![]; + precedence_state.is_accept_state = false; + precedence_state.requires_full_context = false; + + dfa.s0 = Some(precedence_state.state_number); + dfa.states.push(precedence_state) + } + dfa + } + + pub fn get_precedence_start_state(&self, _precedence: isize) -> Option { + if !self.is_precedence_dfa { + panic!("dfa is supposed to be precedence here"); + } + + self.s0.and_then(|s0| { + self.states[s0] + .edges + .get(_precedence as usize) + .and_then(|it| match *it { + 0 => None, + x => Some(x), + }) + }) + } + + pub fn set_precedence_start_state(&mut self, precedence: isize, _start_state: DFAStateRef) { + if !self.is_precedence_dfa { + panic!("set_precedence_start_state called for not precedence dfa") + } + + if precedence < 0 { + return; + } + let precedence = precedence as usize; + + if let Some(x) = &self.s0 { + self.states[*x].edges.apply(|edges| { + if edges.len() <= precedence { + edges.resize(precedence + 1, 0); + } + edges[precedence] = _start_state; + }); + } + } + + pub fn is_precedence_dfa(&self) -> bool { + self.is_precedence_dfa + } + + pub fn set_precedence_dfa(&mut self, precedence_dfa: bool) { + self.is_precedence_dfa = precedence_dfa + } + + pub fn to_string(&self, vocabulary: &dyn Vocabulary) -> String { + if self.s0.is_none() { + return String::new(); + } + + return format!( + "{}", + DFASerializer::new(self, &|x| vocabulary + .get_display_name(x as isize - 1) + .into_owned(),) + ); + } + + pub fn to_lexer_string(&self) -> String { + if self.s0.is_none() { + return String::new(); + } + format!( + "{}", + DFASerializer::new(self, &|x| format!( + "'{}'", + char::try_from(x as u32).unwrap() + )) + ) + } +} diff --git a/runtime/Rust/src/dfa_serializer.rs b/runtime/Rust/src/dfa_serializer.rs new file mode 100644 index 0000000000..dea3a620d6 --- /dev/null +++ b/runtime/Rust/src/dfa_serializer.rs @@ -0,0 +1,60 @@ +use std::fmt::{Display, Formatter}; + +use crate::dfa::DFA; +use crate::dfa_state::DFAState; +use crate::lexer_atn_simulator::ERROR_DFA_STATE_REF; + +pub struct DFASerializer<'a, 'b> { + dfa: &'a DFA, + get_edge_label: &'b dyn Fn(usize) -> String, +} + +impl Display for DFASerializer<'_, '_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let dfa = &self.dfa.states; + for source in dfa.iter() { + for (i, edge) in source.edges.iter().copied().enumerate() { + if edge != 0 && edge != ERROR_DFA_STATE_REF { + let target = &dfa[edge]; + f.write_fmt(format_args!( + "{}-{}->{}\n", + self.get_state_string(source), + (self.get_edge_label)(i), + self.get_state_string(target) + ))?; + } + } + } + Ok(()) + } +} + +impl DFASerializer<'_, '_> { + pub fn new<'a, 'b>( + dfa: &'a DFA, + get_edge_label: &'b dyn Fn(usize) -> String, + ) -> DFASerializer<'a, 'b> { + DFASerializer { + dfa, + get_edge_label, + } + } + + fn get_state_string(&self, state: &DFAState) -> String { + let mut base_str = format!( + "{}s{}{}", + if state.is_accept_state { ":" } else { "" }, + state.state_number - 1, + if state.requires_full_context { "^" } else { "" }, + ); + if state.is_accept_state { + base_str = if !state.predicates.is_empty() { + unimplemented!() + // format!("{}=>{:?}", base_str, state.predicates) + } else { + format!("{}=>{}", base_str, state.prediction) + }; + } + base_str + } +} diff --git a/runtime/Rust/src/dfa_state.rs b/runtime/Rust/src/dfa_state.rs new file mode 100644 index 0000000000..21b156085c --- /dev/null +++ b/runtime/Rust/src/dfa_state.rs @@ -0,0 +1,78 @@ +use std::fmt::{Display, Error, Formatter}; +use std::hash::{Hash, Hasher}; + +use murmur3::murmur3_32::MurmurHasher; + +use crate::atn_config_set::ATNConfigSet; +use crate::lexer_action_executor::LexerActionExecutor; +use crate::semantic_context::SemanticContext; + +#[derive(Eq, PartialEq, Debug)] +pub struct PredPrediction { + pub(crate) alt: isize, + pub(crate) pred: SemanticContext, +} + +impl Display for PredPrediction { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + f.write_fmt(format_args!("({},{:?})", self.alt, self.pred)) + } +} + +//index in DFA.states +pub type DFAStateRef = usize; + +#[derive(Eq, Debug)] +pub struct DFAState { + /// Number of this state in corresponding DFA + pub state_number: usize, + pub configs: Box, + /// - 0 => no edge + /// - usize::MAX => error edge + /// - _ => actual edge + pub edges: Vec, + pub is_accept_state: bool, + + pub prediction: isize, + pub(crate) lexer_action_executor: Option>, + pub requires_full_context: bool, + pub predicates: Vec, +} + +impl PartialEq for DFAState { + fn eq(&self, other: &Self) -> bool { + self.configs == other.configs + } +} + +impl Hash for DFAState { + fn hash(&self, state: &mut H) { + self.configs.hash(state); + } +} + +impl DFAState { + pub fn default_hash(&self) -> u64 { + let mut hasher = MurmurHasher::default(); + self.hash(&mut hasher); + hasher.finish() + } + + pub fn new_dfastate(state_number: usize, configs: Box) -> DFAState { + DFAState { + state_number, + configs, + // edges: Vec::with_capacity((MAX_DFA_EDGE - MIN_DFA_EDGE + 1) as usize), + edges: Vec::new(), + is_accept_state: false, + prediction: 0, + lexer_action_executor: None, + requires_full_context: false, + predicates: Vec::new(), + } + } + + // fn get_alt_set(&self) -> &Set { unimplemented!() } + + // fn set_prediction(&self, _v: isize) { unimplemented!() } +} diff --git a/runtime/Rust/src/error_listener.rs b/runtime/Rust/src/error_listener.rs new file mode 100644 index 0000000000..f381c8cc3f --- /dev/null +++ b/runtime/Rust/src/error_listener.rs @@ -0,0 +1,356 @@ +//! Error reporting +use std::cell::Ref; +use std::ops::Deref; + +use bit_set::BitSet; + +use crate::atn_config_set::ATNConfigSet; +use crate::dfa::DFA; +use crate::errors::ANTLRError; + +use crate::parser::Parser; +use crate::recognizer::Recognizer; + +use crate::token_factory::TokenFactory; +use std::borrow::Cow; +use std::fmt::Debug; + +/// Describes interface for listening on parser/lexer errors. +/// Should only listen for errors, for processing/recovering from errors use `ErrorStrategy` +pub trait ErrorListener<'a, T: Recognizer<'a>> { + /// Called when parser/lexer encounter hard error. + /// + /// The `_error` is not None for all syntax errors except + /// when we discover mismatched token errors that we can recover from + /// in-line, without returning from the surrounding rule (via the single + /// token insertion and deletion mechanism) + fn syntax_error( + &self, + _recognizer: &T, + _offending_symbol: Option<&>::Inner>, + _line: isize, + _column: isize, + _msg: &str, + _error: Option<&ANTLRError>, + ) { + } + + /// This method is called by the parser when a full-context prediction + /// results in an ambiguity. + fn report_ambiguity( + &self, + _recognizer: &T, + _dfa: &DFA, + _start_index: isize, + _stop_index: isize, + _exact: bool, + _ambig_alts: &BitSet, + _configs: &ATNConfigSet, + ) { + } + + /// This method is called when an SLL conflict occurs and the parser is about + /// to use the full context information to make an LL decision. + fn report_attempting_full_context( + &self, + _recognizer: &T, + _dfa: &DFA, + _start_index: isize, + _stop_index: isize, + _conflicting_alts: &BitSet, + _configs: &ATNConfigSet, + ) { + } + + /// This method is called by the parser when a full-context prediction has a + /// unique result. + fn report_context_sensitivity( + &self, + _recognizer: &T, + _dfa: &DFA, + _start_index: isize, + _stop_index: isize, + _prediction: isize, + _configs: &ATNConfigSet, + ) { + } +} + +impl Debug for Box> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Box").finish() + } +} + +/// Default error listener that outputs errors to stderr +#[derive(Debug)] +pub struct ConsoleErrorListener {} + +impl<'a, T: Recognizer<'a>> ErrorListener<'a, T> for ConsoleErrorListener { + fn syntax_error( + &self, + _recognizer: &T, + _offending_symbol: Option<&>::Inner>, + line: isize, + column: isize, + msg: &str, + _e: Option<&ANTLRError>, + ) { + eprintln!("line {}:{} {}", line, column, msg); + } +} + +// #[derive(Debug)] +pub(crate) struct ProxyErrorListener<'b, 'a, T> { + pub delegates: Ref<'b, Vec>>>, +} + +impl<'b, 'a, T: Recognizer<'a>> ErrorListener<'a, T> for ProxyErrorListener<'b, 'a, T> { + fn syntax_error( + &self, + _recognizer: &T, + offending_symbol: Option<&>::Inner>, + line: isize, + column: isize, + msg: &str, + e: Option<&ANTLRError>, + ) { + for listener in self.delegates.deref() { + listener.syntax_error(_recognizer, offending_symbol, line, column, msg, e) + } + } + + fn report_ambiguity( + &self, + recognizer: &T, + dfa: &DFA, + start_index: isize, + stop_index: isize, + exact: bool, + ambig_alts: &BitSet, + configs: &ATNConfigSet, + ) { + for listener in self.delegates.deref() { + listener.report_ambiguity( + recognizer, + dfa, + start_index, + stop_index, + exact, + ambig_alts, + configs, + ) + } + } + + fn report_attempting_full_context( + &self, + recognizer: &T, + dfa: &DFA, + start_index: isize, + stop_index: isize, + conflicting_alts: &BitSet, + configs: &ATNConfigSet, + ) { + for listener in self.delegates.deref() { + listener.report_attempting_full_context( + recognizer, + dfa, + start_index, + stop_index, + conflicting_alts, + configs, + ) + } + } + + fn report_context_sensitivity( + &self, + recognizer: &T, + dfa: &DFA, + start_index: isize, + stop_index: isize, + prediction: isize, + configs: &ATNConfigSet, + ) { + for listener in self.delegates.deref() { + listener.report_context_sensitivity( + recognizer, + dfa, + start_index, + stop_index, + prediction, + configs, + ) + } + } +} + +/// This implementation of `ErrorListener` can be used to identify +/// certain potential correctness and performance problems in grammars. "Reports" +/// are made by calling `Parser::notify_error_listeners` with the appropriate +/// message. +/// +/// - Ambiguities: These are cases where more than one path through the +/// grammar can match the input. +/// - Weak context sensitivity: These are cases where full-context +/// prediction resolved an SLL conflict to a unique alternative which equaled the +/// minimum alternative of the SLL conflict. +/// - Strong (forced) context sensitivity: These are cases where the +/// full-context prediction resolved an SLL conflict to a unique alternative, +/// *and* the minimum alternative of the SLL conflict was found to not be +/// a truly viable alternative. Two-stage parsing cannot be used for inputs where +/// this situation occurs. +#[derive(Debug)] +pub struct DiagnosticErrorListener { + exact_only: bool, +} + +impl DiagnosticErrorListener { + /// When `exact_only` is true, only exactly known ambiguities are reported. + pub fn new(exact_only: bool) -> Self { + Self { exact_only } + } + + fn get_decision_description<'a, T: Parser<'a>>(&self, recog: &T, dfa: &DFA) -> String { + let decision = dfa.decision; + let rule_index = recog.get_atn().states[dfa.atn_start_state].get_rule_index(); + + let rule_names = recog.get_rule_names(); + if let Some(&rule_name) = rule_names.get(rule_index) { + format!("{} ({})", decision, rule_name) + } else { + decision.to_string() + } + } + /// Computes the set of conflicting or ambiguous alternatives from a + /// configuration set, if that information was not already provided by the + /// parser in `alts`. + pub fn get_conflicting_alts<'a>( + &self, + alts: Option<&'a BitSet>, + _configs: &ATNConfigSet, + ) -> Cow<'a, BitSet> { + match alts { + Some(alts) => Cow::Borrowed(alts), + None => Cow::Owned( + _configs + .configs + .iter() + .map(|config| config.get_alt() as usize) + .collect::(), + ), + } + } +} + +impl<'a, T: Parser<'a>> ErrorListener<'a, T> for DiagnosticErrorListener { + fn report_ambiguity( + &self, + recognizer: &T, + dfa: &DFA, + start_index: isize, + stop_index: isize, + exact: bool, + ambig_alts: &BitSet, + _configs: &ATNConfigSet, + ) { + if self.exact_only && !exact { + return; + } + let msg = format!( + "reportAmbiguity d={}: ambigAlts={:?}, input='{}'", + self.get_decision_description(recognizer, dfa), + ambig_alts, + recognizer + .get_input_stream() + .get_text_from_interval(start_index, stop_index) + ); + recognizer.notify_error_listeners(msg, None, None); + } + + fn report_attempting_full_context( + &self, + recognizer: &T, + dfa: &DFA, + start_index: isize, + stop_index: isize, + _conflicting_alts: &BitSet, + _configs: &ATNConfigSet, + ) { + let msg = format!( + "reportAttemptingFullContext d={}, input='{}'", + self.get_decision_description(recognizer, dfa), + recognizer + .get_input_stream() + .get_text_from_interval(start_index, stop_index) + ); + recognizer.notify_error_listeners(msg, None, None); + } + + fn report_context_sensitivity( + &self, + recognizer: &T, + dfa: &DFA, + start_index: isize, + stop_index: isize, + _prediction: isize, + _configs: &ATNConfigSet, + ) { + let msg = format!( + "reportContextSensitivity d={}, input='{}'", + self.get_decision_description(recognizer, dfa), + recognizer + .get_input_stream() + .get_text_from_interval(start_index, stop_index) + ); + recognizer.notify_error_listeners(msg, None, None); + } +} +/* +impl DefaultErrorListener { + fn new_default_error_listener() -> * DefaultErrorListener { unimplemented!() } + + fn syntax_error(&self, recognizer: Recognizer, offendingSymbol: interface { + }, line: isize, column: isize, msg: String, e: RecognitionError) { unimplemented!() } + + fn report_ambiguity(&self, recognizer: Parser, dfa: * DFA, startIndex: isize, stopIndex: isize, exact: bool, ambigAlts: * BitSet, configs: ATNConfigSet) { unimplemented!() } + + fn report_attempting_full_context(&self, recognizer: Parser, dfa: * DFA, startIndex: isize, stopIndex: isize, conflictingAlts: * BitSet, configs: ATNConfigSet) { unimplemented!() } + + fn report_context_sensitivity(&self, recognizer: Parser, dfa: * DFA, startIndex: isize, stopIndex: isize, prediction: isize, configs: ATNConfigSet) { unimplemented!() } + + pub struct ConsoleErrorListener { + base: DefaultErrorListener, + } + + fn new_console_error_listener() -> * ConsoleErrorListener { unimplemented!() } + + var ConsoleErrorListenerINSTANCE = NewConsoleErrorListener() + + fn syntax_error(&self, recognizer: Recognizer, offendingSymbol: interface { + }, line: isize, column: isize, msg: String, e: RecognitionError) { + fmt.Fprintln(os.Stderr, "line " + strconv.Itoa(line) + ":" + strconv.Itoa(column) + " " + msg) + } + + pub struct ProxyErrorListener { + base: DefaultErrorListener, + delegates: Vec < ErrorListener > , + } + + fn new_proxy_error_listener(delegates Vec) -> * ProxyErrorListener { unimplemented!() } + + fn syntax_error(&self, recognizer: Recognizer, offendingSymbol: interface { + }, line: isize, column: isize, msg: String, e: RecognitionError) { + for _, d: = range p.delegates { + d.SyntaxError(recognizer, offendingSymbol, line, column, msg, e) + } + } + + fn report_ambiguity(&self, recognizer: Parser, dfa: * DFA, startIndex: isize, stopIndex: isize, exact: bool, ambigAlts: * BitSet, configs: ATNConfigSet) { unimplemented!() } + + fn report_attempting_full_context(&self, recognizer: Parser, dfa: * DFA, startIndex: isize, stopIndex: isize, conflictingAlts: * BitSet, configs: ATNConfigSet) { unimplemented!() } + + fn report_context_sensitivity(&self, recognizer: Parser, dfa: * DFA, startIndex: isize, stopIndex: isize, prediction: isize, configs: ATNConfigSet) { unimplemented!() } +} + */ diff --git a/runtime/Rust/src/error_strategy.rs b/runtime/Rust/src/error_strategy.rs new file mode 100644 index 0000000000..7bf30c9dcc --- /dev/null +++ b/runtime/Rust/src/error_strategy.rs @@ -0,0 +1,643 @@ +//! Error handling and recovery +use std::borrow::Borrow; +use std::error::Error; +use std::fmt; +use std::fmt::{Display, Formatter}; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +use crate::atn_simulator::IATNSimulator; +use crate::atn_state::*; +use crate::char_stream::{CharStream, InputData}; +use crate::dfa::ScopeExt; +use crate::errors::{ANTLRError, FailedPredicateError, InputMisMatchError, NoViableAltError}; +use crate::interval_set::IntervalSet; +use crate::parser::{Parser, ParserNodeType}; +use crate::parser_rule_context::ParserRuleContext; +use crate::rule_context::{CustomRuleContext, RuleContext}; +use crate::token::{Token, TOKEN_DEFAULT_CHANNEL, TOKEN_EOF, TOKEN_EPSILON, TOKEN_INVALID_TYPE}; +use crate::token_factory::TokenFactory; +use crate::transition::RuleTransition; +use crate::tree::Tree; +use crate::utils::escape_whitespaces; +use better_any::{Tid, TidAble}; + +/// The interface for defining strategies to deal with syntax errors encountered +/// during a parse by ANTLR-generated parsers. We distinguish between three +/// different kinds of errors: +/// - The parser could not figure out which path to take in the ATN (none of +/// the available alternatives could possibly match) +/// - The current input does not match what we were looking for +/// - A predicate evaluated to false +/// +/// Implementations of this interface should report syntax errors by calling [`Parser::notifyErrorListeners`] +/// +/// [`Parser::notifyErrorListeners`]: crate::parser::Parser::notifyErrorListeners +pub trait ErrorStrategy<'a, T: Parser<'a>>: Tid<'a> { + ///Reset the error handler state for the specified `recognizer`. + fn reset(&mut self, recognizer: &mut T); + + /// This method is called when an unexpected symbol is encountered during an + /// inline match operation, such as `Parser::match`. If the error + /// strategy successfully recovers from the match failure, this method + /// returns the `Token` instance which should be treated as the + /// successful result of the match. + /// + /// This method handles the consumption of any tokens - the caller should + /// **not** call `Parser::consume` after a successful recovery. + /// + /// Note that the calling code will not report an error if this method + /// returns successfully. The error strategy implementation is responsible + /// for calling `Parser::notifyErrorListeners` as appropriate. + /// + /// Returns `ANTLRError` if can't recover from unexpected input symbol + fn recover_inline( + &mut self, + recognizer: &mut T, + ) -> Result<>::Tok, ANTLRError>; + + /// This method is called to recover from error `e`. This method is + /// called after `ErrorStrategy::reportError` by the default error handler + /// generated for a rule method. + /// + /// + fn recover(&mut self, recognizer: &mut T, e: &ANTLRError) -> Result<(), ANTLRError>; + + /// This method provides the error handler with an opportunity to handle + /// syntactic or semantic errors in the input stream before they result in a + /// error. + /// + /// The generated code currently contains calls to `ErrorStrategy::sync` after + /// entering the decision state of a closure block ({@code (...)*} or + /// {@code (...)+}).

+ fn sync(&mut self, recognizer: &mut T) -> Result<(), ANTLRError>; + + /// Tests whether or not {@code recognizer} is in the process of recovering + /// from an error. In error recovery mode, `Parser::consume` will create + /// `ErrorNode` leaf instead of `TerminalNode` one + fn in_error_recovery_mode(&mut self, recognizer: &mut T) -> bool; + + /// Report any kind of `ANTLRError`. This method is called by + /// the default exception handler generated for a rule method. + fn report_error(&mut self, recognizer: &mut T, e: &ANTLRError); + + /// This method is called when the parser successfully matches an input + /// symbol. + fn report_match(&mut self, recognizer: &mut T); +} +// +// impl<'a, T: Parser<'a>> Default for Box + 'a> { +// fn default() -> Self { Box::new(DefaultErrorStrategy::new()) } +// } +// +// /// Error strategy trait object if there is a need to change error strategy at runtime +// /// Supports downcasting. +// pub type DynHandler<'a, T> = Box + 'a>; + +// impl<'a, T: Parser<'a> + TidAble<'a>> TidAble<'a> for Box + 'a> {} +better_any::tid! { impl<'a, T> TidAble<'a> for Box + 'a> where T: Parser<'a>} + +impl<'a, T: Parser<'a> + TidAble<'a>> ErrorStrategy<'a, T> for Box + 'a> { + #[inline(always)] + fn reset(&mut self, recognizer: &mut T) { + self.deref_mut().reset(recognizer) + } + + #[inline(always)] + fn recover_inline( + &mut self, + recognizer: &mut T, + ) -> Result<>::Tok, ANTLRError> { + self.deref_mut().recover_inline(recognizer) + } + + #[inline(always)] + fn recover(&mut self, recognizer: &mut T, e: &ANTLRError) -> Result<(), ANTLRError> { + self.deref_mut().recover(recognizer, e) + } + + #[inline(always)] + fn sync(&mut self, recognizer: &mut T) -> Result<(), ANTLRError> { + self.deref_mut().sync(recognizer) + } + + #[inline(always)] + fn in_error_recovery_mode(&mut self, recognizer: &mut T) -> bool { + self.deref_mut().in_error_recovery_mode(recognizer) + } + + #[inline(always)] + fn report_error(&mut self, recognizer: &mut T, e: &ANTLRError) { + self.deref_mut().report_error(recognizer, e) + } + + #[inline(always)] + fn report_match(&mut self, recognizer: &mut T) { + self.deref_mut().report_match(recognizer) + } +} + +/// This is the default implementation of `ErrorStrategy` used for +/// error reporting and recovery in ANTLR parsers. +#[derive(Debug)] +pub struct DefaultErrorStrategy<'input, Ctx: ParserNodeType<'input>> { + error_recovery_mode: bool, + last_error_index: isize, + last_error_states: Option, + next_tokens_state: isize, + next_tokens_ctx: Option>, +} + +better_any::tid! { impl<'i,Ctx> TidAble<'i> for DefaultErrorStrategy<'i,Ctx> where Ctx: ParserNodeType<'i>} + +impl<'input, Ctx: ParserNodeType<'input>> Default for DefaultErrorStrategy<'input, Ctx> { + fn default() -> Self { + Self::new() + } +} + +impl<'input, Ctx: ParserNodeType<'input>> DefaultErrorStrategy<'input, Ctx> { + /// Creates new instance of `DefaultErrorStrategy` + pub fn new() -> Self { + Self { + error_recovery_mode: false, + last_error_index: -1, + last_error_states: None, + next_tokens_state: ATNSTATE_INVALID_STATE_NUMBER, + next_tokens_ctx: None, + } + } + + fn begin_error_condition>( + &mut self, + _recognizer: &T, + ) { + self.error_recovery_mode = true; + } + + fn end_error_condition>( + &mut self, + _recognizer: &T, + ) { + self.error_recovery_mode = false; + self.last_error_index = -1; + self.last_error_states = None; + } + + fn report_no_viable_alternative>( + &self, + recognizer: &mut T, + e: &NoViableAltError, + ) -> String { + let input = if e.start_token.token_type == TOKEN_EOF { + "".to_owned() + } else { + recognizer.get_input_stream_mut().get_text_from_interval( + e.start_token.get_token_index(), + e.base.offending_token.get_token_index(), + ) + }; + + format!("no viable alternative at input '{}'", input) + } + + fn report_input_mismatch>( + &self, + recognizer: &T, + e: &InputMisMatchError, + ) -> String { + format!( + "mismatched input {} expecting {}", + self.get_token_error_display(&e.base.offending_token), + e.base + .get_expected_tokens(recognizer) + .to_token_string(recognizer.get_vocabulary()) + ) + } + + fn report_failed_predicate>( + &self, + recognizer: &T, + e: &FailedPredicateError, + ) -> String { + format!( + "rule {} {}", + recognizer.get_rule_names()[recognizer.get_parser_rule_context().get_rule_index()], + e.base.message + ) + } + + fn report_unwanted_token>( + &mut self, + recognizer: &mut T, + ) { + if self.in_error_recovery_mode(recognizer) { + return; + } + + self.begin_error_condition(recognizer); + let expecting = self.get_expected_tokens(recognizer); + let expecting = expecting.to_token_string(recognizer.get_vocabulary()); + let t = recognizer.get_current_token().borrow(); + let token_name = self.get_token_error_display(t); + let msg = format!("extraneous input {} expecting {}", token_name, expecting); + let t = t.get_token_index(); + recognizer.notify_error_listeners(msg, Some(t), None); + } + + fn report_missing_token>( + &mut self, + recognizer: &mut T, + ) { + if self.in_error_recovery_mode(recognizer) { + return; + } + + self.begin_error_condition(recognizer); + let expecting = self.get_expected_tokens(recognizer); + let expecting = expecting.to_token_string(recognizer.get_vocabulary()); + let t = recognizer.get_current_token().borrow(); + let _token_name = self.get_token_error_display(t); + let msg = format!( + "missing {} at {}", + expecting, + self.get_token_error_display(t) + ); + let t = t.get_token_index(); + recognizer.notify_error_listeners(msg, Some(t), None); + } + + fn single_token_insertion>( + &mut self, + recognizer: &mut T, + ) -> bool { + let current_token = recognizer.get_input_stream_mut().la(1); + + let atn = recognizer.get_interpreter().atn(); + let current_state = atn.states[recognizer.get_state() as usize].as_ref(); + let next = current_state + .get_transitions() + .first() + .unwrap() + .get_target(); + let expect_at_ll2 = atn.next_tokens_in_ctx::( + atn.states[next].as_ref(), + Some(recognizer.get_parser_rule_context().deref()), + ); + if expect_at_ll2.contains(current_token) { + self.report_missing_token(recognizer); + return true; + } + false + } + + fn single_token_deletion<'a, T: Parser<'input, Node = Ctx, TF = Ctx::TF>>( + &mut self, + recognizer: &'a mut T, + ) -> Option<&'a >::Tok> { + let next_token_type = recognizer.get_input_stream_mut().la(2); + let expecting = self.get_expected_tokens(recognizer); + // println!("expecting {}", expecting.to_token_string(recognizer.get_vocabulary())); + if expecting.contains(next_token_type) { + self.report_unwanted_token(recognizer); + recognizer.consume(self); + self.report_match(recognizer); + let matched_symbol = recognizer.get_current_token(); + return Some(matched_symbol); + } + None + } + + fn get_missing_symbol>( + &self, + recognizer: &mut T, + ) -> >::Tok { + let expected = self.get_expected_tokens(recognizer); + let expected_token_type = expected.get_min().unwrap_or(TOKEN_INVALID_TYPE); + let token_text = if expected_token_type == TOKEN_EOF { + "".to_owned() + } else { + format!( + "", + recognizer + .get_vocabulary() + .get_display_name(expected_token_type) + ) + }; + let token_text = >::Data::from_text(&token_text); + let mut curr = recognizer.get_current_token().borrow(); + if curr.get_token_type() == TOKEN_EOF { + curr = recognizer + .get_input_stream() + .run(|it| it.get((it.index() - 1).max(0)).borrow()); + } + let (line, column) = (curr.get_line(), curr.get_column()); + recognizer.get_token_factory().create( + None::<&mut dyn CharStream<>::From>>, + expected_token_type, + Some(token_text), + TOKEN_DEFAULT_CHANNEL, + -1, + -1, + line, + column, + ) + // Token::to_owned(token.borrow()) + // .modify_with(|it| it.text = token_text) + } + + fn get_expected_tokens>( + &self, + recognizer: &T, + ) -> IntervalSet { + recognizer.get_expected_tokens() + } + + fn get_token_error_display(&self, t: &T) -> String { + let text = t.get_text().to_display(); + self.escape_ws_and_quote(&text) + } + + fn escape_ws_and_quote(&self, s: &str) -> String { + format!("'{}'", escape_whitespaces(s, false)) + } + + fn get_error_recovery_set>( + &self, + recognizer: &T, + ) -> IntervalSet { + let atn = recognizer.get_interpreter().atn(); + let mut ctx = Some(recognizer.get_parser_rule_context().clone()); + let mut recover_set = IntervalSet::new(); + while let Some(c) = ctx { + if c.get_invoking_state() < 0 { + break; + } + + let invoking_state = atn.states[c.get_invoking_state() as usize].as_ref(); + let tr = invoking_state.get_transitions().first().unwrap().as_ref(); + let tr = tr.cast::(); + let follow = atn.next_tokens(atn.states[tr.follow_state].as_ref()); + recover_set.add_set(follow); + ctx = c.get_parent_ctx(); + } + recover_set.remove_one(TOKEN_EPSILON); + return recover_set; + } + + fn consume_until>( + &mut self, + recognizer: &mut T, + set: &IntervalSet, + ) { + let mut ttype = recognizer.get_input_stream_mut().la(1); + while ttype != TOKEN_EOF && !set.contains(ttype) { + recognizer.consume(self); + ttype = recognizer.get_input_stream_mut().la(1); + } + } +} + +impl<'a, T: Parser<'a>> ErrorStrategy<'a, T> for DefaultErrorStrategy<'a, T::Node> { + fn reset(&mut self, recognizer: &mut T) { + self.end_error_condition(recognizer) + } + + fn recover_inline( + &mut self, + recognizer: &mut T, + ) -> Result<>::Tok, ANTLRError> { + let t = self + .single_token_deletion(recognizer) + .map(|it| it.to_owned()); + if let Some(t) = t { + recognizer.consume(self); + return Ok(t); + } + + if self.single_token_insertion(recognizer) { + return Ok(self.get_missing_symbol(recognizer)); + } + + if let Some(next_tokens_ctx) = &self.next_tokens_ctx { + Err(ANTLRError::InputMismatchError( + InputMisMatchError::with_state( + recognizer, + self.next_tokens_state, + next_tokens_ctx.clone(), + ), + )) + } else { + Err(ANTLRError::InputMismatchError(InputMisMatchError::new( + recognizer, + ))) + } + // Err(ANTLRError::IllegalStateError("aaa".to_string())) + } + + fn recover(&mut self, recognizer: &mut T, _e: &ANTLRError) -> Result<(), ANTLRError> { + if self.last_error_index == recognizer.get_input_stream_mut().index() + && self.last_error_states.is_some() + && self + .last_error_states + .as_ref() + .unwrap() + .contains(recognizer.get_state()) + { + recognizer.consume(self) + } + + self.last_error_index = recognizer.get_input_stream_mut().index(); + self.last_error_states + .get_or_insert(IntervalSet::new()) + .apply(|x| x.add_one(recognizer.get_state())); + let follow_set = self.get_error_recovery_set(recognizer); + self.consume_until(recognizer, &follow_set); + Ok(()) + } + + fn sync(&mut self, recognizer: &mut T) -> Result<(), ANTLRError> { + if self.in_error_recovery_mode(recognizer) { + return Ok(()); + } + let next = recognizer.get_input_stream_mut().la(1); + let state = + recognizer.get_interpreter().atn().states[recognizer.get_state() as usize].as_ref(); + + let next_tokens = recognizer.get_interpreter().atn().next_tokens(state); + // println!("{:?}",next_tokens); + + if next_tokens.contains(next) { + self.next_tokens_state = ATNSTATE_INVALID_STATE_NUMBER; + self.next_tokens_ctx = None; + return Ok(()); + } + + if next_tokens.contains(TOKEN_EPSILON) { + if self.next_tokens_ctx.is_none() { + self.next_tokens_state = recognizer.get_state(); + self.next_tokens_ctx = Some(recognizer.get_parser_rule_context().clone()); + } + return Ok(()); + } + + match state.get_state_type_id() { + ATNSTATE_BLOCK_START + | ATNSTATE_PLUS_BLOCK_START + | ATNSTATE_STAR_BLOCK_START + | ATNSTATE_STAR_LOOP_ENTRY => { + if self.single_token_deletion(recognizer).is_none() { + return Err(ANTLRError::InputMismatchError(InputMisMatchError::new( + recognizer, + ))); + } + } + ATNSTATE_PLUS_LOOP_BACK | ATNSTATE_STAR_LOOP_BACK => { + self.report_unwanted_token(recognizer); + let mut expecting = recognizer.get_expected_tokens(); + expecting.add_set(&self.get_error_recovery_set(recognizer)); + self.consume_until(recognizer, &expecting); + } + _ => panic!("invalid ANTState type id"), + } + + Ok(()) + } + + fn in_error_recovery_mode(&mut self, _recognizer: &mut T) -> bool { + self.error_recovery_mode + } + + fn report_error(&mut self, recognizer: &mut T, e: &ANTLRError) { + if self.in_error_recovery_mode(recognizer) { + return; + } + + self.begin_error_condition(recognizer); + let msg = match e { + ANTLRError::NoAltError(e) => self.report_no_viable_alternative(recognizer, e), + ANTLRError::InputMismatchError(e) => self.report_input_mismatch(recognizer, e), + ANTLRError::PredicateError(e) => self.report_failed_predicate(recognizer, e), + _ => e.to_string(), + }; + let offending_token_index = e.get_offending_token().map(|it| it.get_token_index()); + recognizer.notify_error_listeners(msg, offending_token_index, Some(&e)) + } + + fn report_match(&mut self, recognizer: &mut T) { + self.end_error_condition(recognizer); + //println!("matched token succesfully {}", recognizer.get_input_stream().la(1)) + } +} + +/// This implementation of `ANTLRErrorStrategy` responds to syntax errors +/// by immediately canceling the parse operation with a +/// `ParseCancellationException`. The implementation ensures that the +/// [`ParserRuleContext.exception`] field is set for all parse tree nodes +/// that were not completed prior to encountering the error. +/// +///

This error strategy is useful in the following scenarios.

+/// +/// - Two-stage parsing: This error strategy allows the first +/// stage of two-stage parsing to immediately terminate if an error is +/// encountered, and immediately fall back to the second stage. In addition to +/// avoiding wasted work by attempting to recover from errors here, the empty +/// implementation of `sync` improves the performance of +/// the first stage. +/// - Silent validation: When syntax errors are not being +/// reported or logged, and the parse result is simply ignored if errors occur, +/// the `BailErrorStrategy` avoids wasting work on recovering from errors +/// when the result will be ignored either way. +/// +/// # Usage +/// ```ignore +/// use antlr_rust::error_strategy::BailErrorStrategy; +/// myparser.err_handler = BailErrorStrategy::new(); +/// ``` +/// +/// [`ParserRuleContext.exception`]: todo +/// */ +#[derive(Default, Debug)] +pub struct BailErrorStrategy<'input, Ctx: ParserNodeType<'input>>( + DefaultErrorStrategy<'input, Ctx>, +); + +better_any::tid! {impl<'i,Ctx> TidAble<'i> for BailErrorStrategy<'i,Ctx> where Ctx:ParserNodeType<'i> } + +impl<'input, Ctx: ParserNodeType<'input>> BailErrorStrategy<'input, Ctx> { + /// Creates new instance of `BailErrorStrategy` + pub fn new() -> Self { + Self(DefaultErrorStrategy::new()) + } + + fn process_error>( + &self, + recognizer: &mut T, + e: &ANTLRError, + ) -> ANTLRError { + let mut ctx = recognizer.get_parser_rule_context().clone(); + let _: Option<()> = (|| loop { + ctx.set_exception(e.clone()); + ctx = ctx.get_parent()? + })(); + return ANTLRError::FallThrough(Rc::new(ParseCancelledError(e.clone()))); + } +} + +/// `ANTLRError::FallThrough` Error returned `BailErrorStrategy` to bail out from parsing +#[derive(Debug)] +pub struct ParseCancelledError(ANTLRError); + +impl Error for ParseCancelledError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.0) + } +} + +impl Display for ParseCancelledError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str("ParseCancelledError, caused by ")?; + self.0.fmt(f) + } +} + +impl<'a, T: Parser<'a>> ErrorStrategy<'a, T> for BailErrorStrategy<'a, T::Node> { + #[inline(always)] + fn reset(&mut self, recognizer: &mut T) { + self.0.reset(recognizer) + } + + #[cold] + fn recover_inline( + &mut self, + recognizer: &mut T, + ) -> Result<>::Tok, ANTLRError> { + let err = ANTLRError::InputMismatchError(InputMisMatchError::new(recognizer)); + + Err(self.process_error(recognizer, &err)) + } + + #[cold] + fn recover(&mut self, recognizer: &mut T, e: &ANTLRError) -> Result<(), ANTLRError> { + Err(self.process_error(recognizer, &e)) + } + + #[inline(always)] + fn sync(&mut self, _recognizer: &mut T) -> Result<(), ANTLRError> { + /* empty */ + Ok(()) + } + + #[inline(always)] + fn in_error_recovery_mode(&mut self, recognizer: &mut T) -> bool { + self.0.in_error_recovery_mode(recognizer) + } + + #[inline(always)] + fn report_error(&mut self, recognizer: &mut T, e: &ANTLRError) { + self.0.report_error(recognizer, e) + } + + #[inline(always)] + fn report_match(&mut self, _recognizer: &mut T) {} +} diff --git a/runtime/Rust/src/errors.rs b/runtime/Rust/src/errors.rs new file mode 100644 index 0000000000..83ca5312fd --- /dev/null +++ b/runtime/Rust/src/errors.rs @@ -0,0 +1,264 @@ +//! Error types +use std::borrow::Borrow; +use std::error::Error; +use std::fmt; +use std::fmt::Formatter; +use std::fmt::{Debug, Display}; +use std::ops::Deref; +use std::rc::Rc; + +use crate::atn_simulator::IATNSimulator; +use crate::interval_set::IntervalSet; +use crate::parser::{Parser, ParserNodeType}; +use crate::rule_context::states_stack; +use crate::token::{OwningToken, Token}; +use crate::transition::PredicateTransition; +use crate::transition::TransitionType::TRANSITION_PREDICATE; + +/// Main ANTLR4 Rust runtime error +#[derive(Debug, Clone)] +pub enum ANTLRError { + /// Returned from Lexer when it fails to find matching token type for current input + /// + /// Usually Lexers contain last rule that captures all invalid tokens like: + /// ```text + /// ERROR_TOKEN: . ; + /// ``` + /// to prevent lexer from throwing errors and have all error handling in parser. + LexerNoAltError { + /// Index at which error has happened + start_index: isize, + }, + + /// Indicates that the parser could not decide which of two or more paths + /// to take based upon the remaining input. It tracks the starting token + /// of the offending input and also knows where the parser was + /// in the various paths when the error. Reported by reportNoViableAlternative() + NoAltError(NoViableAltError), + + /// This signifies any kind of mismatched input exceptions such as + /// when the current input does not match the expected token. + InputMismatchError(InputMisMatchError), + + /// A semantic predicate failed during validation. Validation of predicates + /// occurs when normally parsing the alternative just like matching a token. + /// Disambiguating predicate evaluation occurs when we test a predicate during + /// prediction. + PredicateError(FailedPredicateError), + + /// Internal error. Or user provided type returned data that is + /// incompatible with current parser state + IllegalStateError(String), + + /// Unrecoverable error. Indicates that error should not be processed by parser/error strategy + /// and it should abort parsing and immediately return to caller. + FallThrough(Rc), + + /// Potentially recoverable error. + /// Used to allow user to emit his own errors from parser actions or from custom error strategy. + /// Parser will try to recover with provided `ErrorStrategy` + OtherError(Rc), +} + +// impl Clone for ANTLRError { +// fn clone(&self) -> Self { +// match self { +// ANTLRError::LexerNoAltError { start_index } => ANTLRError::LexerNoAltError { +// start_index: *start_index, +// }, +// ANTLRError::NoAltError(e) => ANTLRError::NoAltError(e.clone()), +// ANTLRError::InputMismatchError(e) => ANTLRError::InputMismatchError(e.clone()), +// ANTLRError::PredicateError(e) => ANTLRError::PredicateError(e.clone()), +// ANTLRError::IllegalStateError(e) => ANTLRError::IllegalStateError(e.clone()), +// ANTLRError::FallThrough(_) => panic!("clone not supported"), +// ANTLRError::OtherError(_) => panic!("clone not supported"), +// } +// } +// } + +impl Display for ANTLRError { + fn fmt(&self, _f: &mut Formatter<'_>) -> fmt::Result { + ::fmt(self, _f) + } +} + +impl Error for ANTLRError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + ANTLRError::FallThrough(x) => Some(x.as_ref()), + ANTLRError::OtherError(x) => Some(x.as_ref()), + _ => None, + } + } +} + +impl ANTLRError { + /// Returns first token that caused parser to fail. + pub fn get_offending_token(&self) -> Option<&OwningToken> { + Some(match self { + ANTLRError::NoAltError(e) => &e.base.offending_token, + ANTLRError::InputMismatchError(e) => &e.base.offending_token, + ANTLRError::PredicateError(e) => &e.base.offending_token, + _ => return None, + }) + } +} + +//impl ANTLRError { +// fn get_expected_tokens(&self, _atn: &ATN) -> IntervalSet { +//// atn.get_expected_tokens(se) +// unimplemented!() +// } +//} + +/// Common part of ANTLR parser errors +#[derive(Debug, Clone)] +#[allow(missing_docs)] +pub struct BaseRecognitionError { + pub message: String, + // recognizer: Box, + pub offending_token: OwningToken, + pub offending_state: isize, + states_stack: Vec, // ctx: Rc + // input: Box +} + +impl BaseRecognitionError { + /// Returns tokens that were expected by parser in error place + pub fn get_expected_tokens<'a, T: Parser<'a>>(&self, recognizer: &T) -> IntervalSet { + recognizer + .get_interpreter() + .atn() + .get_expected_tokens(self.offending_state, self.states_stack.iter().copied()) + } + + fn new<'a, T: Parser<'a>>(recog: &mut T) -> BaseRecognitionError { + BaseRecognitionError { + message: "".to_string(), + offending_token: recog.get_current_token().borrow().to_owned(), + offending_state: recog.get_state(), + // ctx: recog.get_parser_rule_context().clone(), + states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(), + } + } +} + +/// See `ANTLRError::NoAltError` +#[derive(Debug, Clone)] +#[allow(missing_docs)] +pub struct NoViableAltError { + pub base: BaseRecognitionError, + pub start_token: OwningToken, + // ctx: Rc, + // dead_end_configs: BaseATNConfigSet, +} + +#[allow(missing_docs)] +impl NoViableAltError { + pub fn new<'a, T: Parser<'a>>(recog: &mut T) -> NoViableAltError { + Self { + base: BaseRecognitionError { + message: "".to_string(), + offending_token: recog.get_current_token().borrow().to_owned(), + offending_state: recog.get_state(), + // ctx: recog.get_parser_rule_context().clone(), + states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(), + }, + start_token: recog.get_current_token().borrow().to_owned(), + // ctx: recog.get_parser_rule_context().clone() + } + } + pub fn new_full<'a, T: Parser<'a>>( + recog: &mut T, + start_token: OwningToken, + offending_token: OwningToken, + ) -> NoViableAltError { + Self { + base: BaseRecognitionError { + message: "".to_string(), + offending_token, + offending_state: recog.get_state(), + states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(), // ctx: recog.get_parser_rule_context().clone(), + }, + start_token, + // ctx + } + } +} + +/// See `ANTLRError::InputMismatchError` +#[derive(Debug, Clone)] +#[allow(missing_docs)] +pub struct InputMisMatchError { + pub base: BaseRecognitionError, +} + +#[allow(missing_docs)] +impl InputMisMatchError { + pub fn new<'a, T: Parser<'a>>(recognizer: &mut T) -> InputMisMatchError { + InputMisMatchError { + base: BaseRecognitionError::new(recognizer), + } + } + + pub fn with_state<'a, T: Parser<'a>>( + recognizer: &mut T, + offending_state: isize, + ctx: Rc<>::Type>, + ) -> InputMisMatchError { + let mut a = Self::new(recognizer); + // a.base.ctx = ctx; + a.base.offending_state = offending_state; + a.base.states_stack = states_stack(ctx).collect(); + a + } +} + +//fn new_input_mis_match_exception(recognizer: Parser) -> InputMisMatchError { unimplemented!() } + +/// See `ANTLRError::PredicateError` +#[derive(Debug, Clone)] +#[allow(missing_docs, dead_code)] +pub struct FailedPredicateError { + pub base: BaseRecognitionError, + pub rule_index: isize, + predicate_index: isize, + pub predicate: String, +} + +#[allow(missing_docs)] +impl FailedPredicateError { + pub fn new<'a, T: Parser<'a>>( + recog: &mut T, + predicate: Option, + msg: Option, + ) -> ANTLRError { + let tr = recog.get_interpreter().atn().states[recog.get_state() as usize] + .get_transitions() + .first() + .unwrap(); + let (rule_index, predicate_index) = if tr.get_serialization_type() == TRANSITION_PREDICATE { + let pr = tr.deref().cast::(); + (pr.rule_index, pr.pred_index) + } else { + (0, 0) + }; + + ANTLRError::PredicateError(FailedPredicateError { + base: BaseRecognitionError { + message: msg.unwrap_or_else(|| { + format!( + "failed predicate: {}", + predicate.as_deref().unwrap_or("None") + ) + }), + offending_token: recog.get_current_token().borrow().to_owned(), + offending_state: recog.get_state(), + states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(), // ctx: recog.get_parser_rule_context().clone() + }, + rule_index, + predicate_index, + predicate: predicate.unwrap_or_default(), + }) + } +} diff --git a/runtime/Rust/src/file_stream.rs b/runtime/Rust/src/file_stream.rs new file mode 100644 index 0000000000..bb3bf8611b --- /dev/null +++ b/runtime/Rust/src/file_stream.rs @@ -0,0 +1,14 @@ +use std; + +pub struct FileStream { + base: InputStream, + + filename: String, +} + +impl FileStream { + fn new(fileName: String) -> Result { unimplemented!() } + + fn get_source_name(&self) -> String { unimplemented!() } +} + \ No newline at end of file diff --git a/runtime/Rust/src/input_stream.rs b/runtime/Rust/src/input_stream.rs new file mode 100644 index 0000000000..ecc44f7f20 --- /dev/null +++ b/runtime/Rust/src/input_stream.rs @@ -0,0 +1,262 @@ +//! Input to lexer +use std::borrow::Cow; + +use crate::char_stream::{CharStream, InputData}; +use crate::int_stream::IntStream; +use std::ops::Deref; + +/// Default rust target input stream. +/// +/// Since Rust uses UTF-8 format which does not support indexing by char, +/// `InputStream<&str>` has slightly different index behavior in compare to java runtime when there are +/// non-ASCII unicode characters. +/// If you need it to generate exactly the same indexes as Java runtime, you have to use `CodePoint8/16/32BitCharStream`, +/// which does not use rusts native `str` type, so it would do additional conversions and allocations along the way. +#[derive(Debug)] +pub struct InputStream { + name: String, + data_raw: Data, + index: isize, +} + +// #[impl_tid] +// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream> {} +// #[impl_tid] +// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream<&'a T> {} +better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream<&'a T> where T: ?Sized} +better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream> where T: ?Sized} + +impl<'a, T: From<&'a str>> CharStream for InputStream<&'a str> { + #[inline] + fn get_text(&self, start: isize, stop: isize) -> T { + self.get_text_inner(start, stop).into() + } +} + +impl, D: ?Sized + InputData> CharStream for InputStream> { + #[inline] + fn get_text(&self, start: isize, stop: isize) -> T { + self.get_text_owned(start, stop).into() + } +} +/// `InputStream` over byte slice +pub type ByteStream<'a> = InputStream<&'a [u8]>; +/// InputStream which treats the input as a series of Unicode code points that fit into `u8` +pub type CodePoint8BitCharStream<'a> = InputStream<&'a [u8]>; +/// InputStream which treats the input as a series of Unicode code points that fit into `u16` +pub type CodePoint16BitCharStream<'a> = InputStream<&'a [u16]>; +/// InputStream which treats the input as a series of Unicode code points +pub type CodePoint32BitCharStream<'a> = InputStream<&'a [u32]>; + +impl<'a, T> CharStream> for InputStream<&'a [T]> +where + [T]: InputData, +{ + #[inline] + fn get_text(&self, a: isize, b: isize) -> Cow<'a, [T]> { + Cow::Borrowed(self.get_text_inner(a, b)) + } +} + +impl<'a, T> CharStream for InputStream<&'a [T]> +where + [T]: InputData, +{ + fn get_text(&self, a: isize, b: isize) -> String { + self.get_text_inner(a, b).to_display() + } +} + +impl<'a, 'b, T> CharStream> for InputStream<&'a [T]> +where + [T]: InputData, +{ + #[inline] + fn get_text(&self, a: isize, b: isize) -> Cow<'b, str> { + self.get_text_inner(a, b).to_display().into() + } +} + +impl<'a, T> CharStream<&'a [T]> for InputStream<&'a [T]> +where + [T]: InputData, +{ + #[inline] + fn get_text(&self, a: isize, b: isize) -> &'a [T] { + self.get_text_inner(a, b) + } +} + +impl InputStream> { + fn get_text_owned(&self, start: isize, stop: isize) -> Data::Owned { + let start = start as usize; + let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize; + + if stop < self.data_raw.len() { + &self.data_raw[start..stop] + } else { + &self.data_raw[start..] + } + .to_owned() + } + + /// Creates new `InputStream` over owned data + pub fn new_owned(data: Box) -> Self { + Self { + name: "".to_string(), + data_raw: data.into(), + index: 0, + } + } +} + +impl<'a, Data> InputStream<&'a Data> +where + Data: ?Sized + InputData, +{ + fn get_text_inner(&self, start: isize, stop: isize) -> &'a Data { + // println!("get text {}..{} of {:?}",start,stop,self.data_raw.to_display()); + let start = start as usize; + let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize; + // println!("justed range {}..{} ",start,stop); + // let start = self.data_raw.offset(0,start).unwrap() as usize; + // let stop = self.data_raw.offset(0,stop + 1).unwrap() as usize; + + if stop < self.data_raw.len() { + &self.data_raw[start..stop] + } else { + &self.data_raw[start..] + } + } + + /// Creates new `InputStream` over borrowed data + pub fn new(data_raw: &'a Data) -> Self { + // let data_raw = data_raw.as_ref(); + // let data = data_raw.to_indexed_vec(); + Self { + name: "".to_string(), + data_raw, + index: 0, + // phantom: Default::default(), + } + } +} +impl<'a, Data: Deref> InputStream +where + Data::Target: InputData, +{ + /// Resets input stream to start from the beginning of this slice + #[inline] + pub fn reset(&mut self) { + self.index = 0 + } +} + +impl<'a, Data: Deref> IntStream for InputStream +where + Data::Target: InputData, +{ + #[inline] + fn consume(&mut self) { + if let Some(index) = self.data_raw.offset(self.index, 1) { + self.index = index; + // self.current = self.data_raw.deref().item(index).unwrap_or(TOKEN_EOF); + // Ok(()) + } else { + panic!("cannot consume EOF"); + } + } + + #[inline] + fn la(&mut self, mut offset: isize) -> isize { + if offset == 1 { + return self + .data_raw + .item(self.index) + .unwrap_or(crate::int_stream::EOF); + } + if offset == 0 { + panic!("should not be called with offset 0"); + } + if offset < 0 { + offset += 1; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1] + } + + self.data_raw + .offset(self.index, offset - 1) + .and_then(|index| self.data_raw.item(index)) + .unwrap_or(crate::int_stream::EOF) + } + + #[inline] + fn mark(&mut self) -> isize { + -1 + } + + #[inline] + fn release(&mut self, _marker: isize) {} + + #[inline] + fn index(&self) -> isize { + self.index + } + + #[inline] + fn seek(&mut self, index: isize) { + self.index = index + } + + #[inline] + fn size(&self) -> isize { + self.data_raw.len() as isize + } + + fn get_source_name(&self) -> String { + self.name.clone() + } +} + +#[cfg(test)] +mod test { + use std::ops::Deref; + + use crate::char_stream::CharStream; + use crate::int_stream::{IntStream, EOF}; + + use super::InputStream; + + #[test] + fn test_str_input_stream() { + let mut input = InputStream::new("V1は3"); + let input = &mut input as &mut dyn CharStream; + assert_eq!(input.la(1), 'V' as isize); + assert_eq!(input.index(), 0); + input.consume(); + assert_eq!(input.la(1), '1' as isize); + assert_eq!(input.la(-1), 'V' as isize); + assert_eq!(input.index(), 1); + input.consume(); + assert_eq!(input.la(1), 0x306F); + assert_eq!(input.index(), 2); + input.consume(); + assert_eq!(input.index(), 5); + assert_eq!(input.la(-2), '1' as isize); + assert_eq!(input.la(2), EOF); + assert_eq!(input.get_text(1, 1).deref(), "1"); + assert_eq!(input.get_text(1, 2).deref(), "1は"); + assert_eq!(input.get_text(2, 2).deref(), "は"); + assert_eq!(input.get_text(2, 5).deref(), "は3"); + assert_eq!(input.get_text(5, 5).deref(), "3"); + } + + #[test] + fn test_byte_input_stream() { + let mut input = InputStream::new(&b"V\xaa\xbb"[..]); + assert_eq!(input.la(1), 'V' as isize); + input.seek(2); + assert_eq!(input.la(1), 0xBB); + assert_eq!(input.index(), 2); + let mut input = InputStream::new("は".as_bytes()); + assert_eq!(input.la(1), 227); + } +} diff --git a/runtime/Rust/src/int_stream.rs b/runtime/Rust/src/int_stream.rs new file mode 100644 index 0000000000..1ae40bcc83 --- /dev/null +++ b/runtime/Rust/src/int_stream.rs @@ -0,0 +1,85 @@ +//! isize; + + /// After this call subsequent calls to seek must succeed if seek index is greater than mark index + /// + /// Returns marker that should be used later by `release` call to release this stream from + fn mark(&mut self) -> isize; + + /// Releases `marker` + fn release(&mut self, marker: isize); + + /// Returns current position of the input stream + /// + /// If there is active marker from `mark` then calling `seek` later with result of this call + /// should put stream in same state it is currently in. + fn index(&self) -> isize; + /// Put stream back in state it was when it was in `index` position + /// + /// Allowed to panic if `index` does not belong to marked region(via `mark`-`release` calls) + fn seek(&mut self, index: isize); + + /// Returns the total number of symbols in the stream. + fn size(&self) -> isize; + + /// Returns name of the source this stream operates over if any + fn get_source_name(&self) -> String; +} + +/// Iterator over `IntStream` +#[derive(Debug)] +pub struct IterWrapper<'a, T: IntStream>(pub &'a mut T); + +impl<'a, T: IntStream> Iterator for IterWrapper<'a, T> { + type Item = isize; + + fn next(&mut self) -> Option { + let result = self.0.la(1); + self.0.consume(); + match result { + EOF => None, + x => Some(x), + } + } +} diff --git a/runtime/Rust/src/interval_set.rs b/runtime/Rust/src/interval_set.rs new file mode 100644 index 0000000000..9707c1fabf --- /dev/null +++ b/runtime/Rust/src/interval_set.rs @@ -0,0 +1,403 @@ +use std::borrow::Cow; +use std::borrow::Cow::Borrowed; +use std::cmp::{max, min, Ordering}; + +use crate::token::{TOKEN_EOF, TOKEN_EPSILON}; +use crate::vocabulary::{Vocabulary, DUMMY_VOCAB}; + +/// Represents interval equivalent to `a..=b` +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub struct Interval { + /// start + pub a: isize, + /// end >= start + pub b: isize, +} + +pub(crate) const INVALID: Interval = Interval { a: -1, b: -2 }; + +impl Interval { + /* stop is not included! */ + fn new(a: isize, b: isize) -> Interval { + Interval { a, b } + } + + // fn contains(&self, _item: isize) -> bool { unimplemented!() } + + fn length(&self) -> isize { + self.b - self.a + } + + fn union(&self, another: &Interval) -> Interval { + Interval { + a: min(self.a, another.a), + b: max(self.b, another.b), + } + } + + /** Does self start completely before other? Disjoint */ + pub fn starts_before_disjoint(&self, other: &Interval) -> bool { + return self.a < other.a && self.b < other.a; + } + + /** Does self start at or before other? Nondisjoint */ + pub fn starts_before_non_disjoint(&self, other: &Interval) -> bool { + return self.a <= other.a && self.b >= other.a; + } + + /** Does self.a start after other.b? May or may not be disjoint */ + pub fn starts_after(&self, other: &Interval) -> bool { + return self.a > other.a; + } + + /** Does self start completely after other? Disjoint */ + pub fn starts_after_disjoint(&self, other: &Interval) -> bool { + return self.a > other.b; + } + + /** Does self start after other? NonDisjoint */ + pub fn starts_after_non_disjoint(&self, other: &Interval) -> bool { + return self.a > other.a && self.a <= other.b; // self.b>=other.b implied + } + + /** Are both ranges disjoint? I.e., no overlap? */ + pub fn disjoint(&self, other: &Interval) -> bool { + return self.starts_before_disjoint(other) || self.starts_after_disjoint(other); + } + + /** Are two intervals adjacent such as 0..41 and 42..42? */ + pub fn adjacent(&self, other: &Interval) -> bool { + return self.a == other.b + 1 || self.b == other.a - 1; + } + + // public boolean properlyContains(Interval other) { + // return other.a >= self.a && other.b <= self.b; + // } + // + // /** Return the interval computed from combining self and other */ + // public Interval union(Interval other) { + // return Interval.of(Math.min(a, other.a), Math.max(b, other.b)); + // } + // + // /** Return the interval in common between self and o */ + // public Interval intersection(Interval other) { + // return Interval.of(Math.max(a, other.a), Math.min(b, other.b)); + // } +} + +/// Set of disjoint intervals +/// +/// Basically a set of integers but optimized for cases when it is sparse and created by adding +/// intervals of integers. +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct IntervalSet { + intervals: Vec, + #[allow(missing_docs)] + pub read_only: bool, +} + +#[allow(missing_docs)] +impl IntervalSet { + pub fn new() -> IntervalSet { + IntervalSet { + intervals: Vec::new(), + read_only: false, + } + } + + pub fn get_min(&self) -> Option { + self.intervals.first().map(|x| x.a) + } + + pub fn add_one(&mut self, _v: isize) { + self.add_range(_v, _v) + } + + pub fn add_range(&mut self, l: isize, h: isize) { + self.add_interval(Interval { a: l, b: h }) + } + + pub fn add_interval(&mut self, added: Interval) { + if added.length() < 0 { + return; + } + + let mut i = 0; + while let Some(r) = self.intervals.get_mut(i) { + if *r == added { + return; + } + + if added.adjacent(r) || !added.disjoint(r) { + // next to each other, make a single larger interval + let bigger = added.union(r); + *r = bigger; + // make sure we didn't just create an interval that + // should be merged with next interval in list + loop { + i += 1; + let next = match self.intervals.get(i) { + Some(v) => v, + None => break, + }; + if !bigger.adjacent(next) && bigger.disjoint(next) { + break; + } + + // if we bump up against or overlap next, merge + self.intervals[i - 1] = bigger.union(next); // set to 3 merged ones + self.intervals.remove(i); + } + return; + } + if added.starts_before_disjoint(r) { + // insert before r + self.intervals.insert(i, added); + return; + } + i += 1; + } + + self.intervals.push(added); + } + + pub fn add_set(&mut self, _other: &IntervalSet) { + for i in &_other.intervals { + self.add_interval(*i) + } + } + + pub fn substract(&mut self, right: &IntervalSet) { + let result = self; + let mut result_i = 0usize; + let mut right_i = 0usize; + + while result_i < result.intervals.len() && right_i < right.intervals.len() { + let result_interval = result.intervals[result_i]; + let right_interval = right.intervals[right_i]; + + if right_interval.b < result_interval.a { + right_i += 1; + continue; + } + + if right_interval.a > result_interval.b { + result_i += 1; + continue; + } + + let before_curr = if right_interval.a > result_interval.a { + Some(Interval::new(result_interval.a, right_interval.a - 1)) + } else { + None + }; + let after_curr = if right_interval.b < result_interval.b { + Some(Interval::new(right_interval.b + 1, result_interval.b)) + } else { + None + }; + + match (before_curr, after_curr) { + (Some(before_curr), Some(after_curr)) => { + result.intervals[result_i] = before_curr; + result.intervals.insert(result_i + 1, after_curr); + result_i += 1; + right_i += 1; + } + (Some(before_curr), None) => { + result.intervals[result_i] = before_curr; + result_i += 1; + } + (None, Some(after_curr)) => { + result.intervals[result_i] = after_curr; + right_i += 1; + } + (None, None) => { + result.intervals.remove(result_i); + } + } + } + + // return result; + } + + pub fn complement(&self, start: isize, stop: isize) -> IntervalSet { + let mut vocablulary_is = IntervalSet::new(); + vocablulary_is.add_range(start, stop); + vocablulary_is.substract(self); + return vocablulary_is; + } + + pub fn contains(&self, _item: isize) -> bool { + self.intervals + .binary_search_by(|x| { + if _item < x.a { + return Ordering::Greater; + } + if _item > x.b { + return Ordering::Less; + } + Ordering::Equal + }) + .is_ok() + } + + pub fn length(&self) -> isize { + self.intervals + .iter() + .fold(0, |acc, it| acc + it.b - it.a + 1) + } + + // fn remove_range(&self, _v: &Interval) { unimplemented!() } + + pub fn remove_one(&mut self, el: isize) { + if self.read_only { + panic!("can't alter readonly IntervalSet") + } + + for i in 0..self.intervals.len() { + let int = &mut self.intervals[i]; + if el < int.a { + break; + } + + if el == int.a && el == int.b { + self.intervals.remove(i); + break; + } + + if el == int.a { + int.a += 1; + break; + } + + if el == int.b { + int.b -= 1; + break; + } + + if el > int.a && el < int.b { + let old_b = int.b; + int.b = el - 1; + self.add_range(el + 1, old_b); + } + } + } + + // fn String(&self) -> String { + // unimplemented!() + // } + // + // fn String_verbose( + // &self, + // _literalNames: Vec, + // _symbolicNames: Vec, + // _elemsAreChar: bool, + // ) -> String { + // unimplemented!() + // } + // + // fn to_char_String(&self) -> String { + // unimplemented!() + // } + // + pub fn to_index_string(&self) -> String { + self.to_token_string(&DUMMY_VOCAB) + } + + pub fn to_token_string(&self, vocabulary: &dyn Vocabulary) -> String { + if self.intervals.is_empty() { + return "{}".to_owned(); + } + let mut buf = String::new(); + if self.length() > 1 { + buf += "{"; + } + let mut iter = self.intervals.iter(); + while let Some(int) = iter.next() { + if int.a == int.b { + buf += self.element_name(vocabulary, int.a).as_ref(); + } else { + for i in int.a..(int.b + 1) { + if i > int.a { + buf += ", "; + } + buf += self.element_name(vocabulary, i).as_ref(); + } + } + if iter.len() > 0 { + buf += ", "; + } + } + + if self.length() > 1 { + buf += "}"; + } + + return buf; + } + + fn element_name<'a>(&self, vocabulary: &'a dyn Vocabulary, a: isize) -> Cow<'a, str> { + if a == TOKEN_EOF { + Borrowed("") + } else if a == TOKEN_EPSILON { + Borrowed("") + } else { + vocabulary.get_display_name(a) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_add_1() { + let mut set = IntervalSet::new(); + set.add_range(1, 2); + assert_eq!(&set.intervals, &[Interval { a: 1, b: 2 }]); + set.add_range(2, 3); + assert_eq!(&set.intervals, &[Interval { a: 1, b: 3 }]); + set.add_range(1, 5); + assert_eq!(&set.intervals, &[Interval { a: 1, b: 5 }]); + } + + #[test] + fn test_add_2() { + let mut set = IntervalSet::new(); + set.add_range(1, 3); + set.add_range(5, 6); + assert_eq!( + &set.intervals, + &[Interval { a: 1, b: 3 }, Interval { a: 5, b: 6 }] + ); + set.add_range(3, 4); + assert_eq!(&set.intervals, &[Interval { a: 1, b: 6 }]); + } + + #[test] + fn test_remove() { + let mut set = IntervalSet::new(); + set.add_range(1, 5); + set.remove_one(3); + assert_eq!( + &set.intervals, + &[Interval { a: 1, b: 2 }, Interval { a: 4, b: 5 }] + ); + } + + #[test] + fn test_substract() { + let mut set1 = IntervalSet::new(); + set1.add_range(1, 2); + set1.add_range(4, 5); + let mut set2 = IntervalSet::new(); + set2.add_range(2, 4); + set1.substract(&set2); + assert_eq!( + &set1.intervals, + &[Interval { a: 1, b: 1 }, Interval { a: 5, b: 5 }] + ); + } +} diff --git a/runtime/Rust/src/lexer.rs b/runtime/Rust/src/lexer.rs new file mode 100644 index 0000000000..bfc1e270a1 --- /dev/null +++ b/runtime/Rust/src/lexer.rs @@ -0,0 +1,540 @@ +//! Lexer implementation +use std::borrow::Cow::Borrowed; +use std::borrow::{Borrow, Cow}; +use std::cell::{Cell, RefCell}; + +use std::rc::Rc; + +use crate::char_stream::{CharStream, InputData}; +use crate::error_listener::{ConsoleErrorListener, ErrorListener}; +use crate::errors::ANTLRError; +use crate::int_stream::IntStream; +use crate::lexer_atn_simulator::{ILexerATNSimulator, LexerATNSimulator}; +use crate::parser::ParserNodeType; + +use crate::recognizer::{Actions, Recognizer}; +use crate::rule_context::EmptyContextType; +use crate::token::TOKEN_INVALID_TYPE; +use crate::token_factory::{CommonTokenFactory, TokenAware, TokenFactory}; +use crate::token_source::TokenSource; +use std::ops::{Deref, DerefMut}; + +/// Lexer functionality required by `LexerATNSimulator` to work properly +pub trait Lexer<'input>: + TokenSource<'input> + + Recognizer<'input, Node = EmptyContextType<'input, >::TF>> +{ + /// Concrete input stream used by this parser + type Input: IntStream; + /// Same as `TokenStream::get_input_stream` but returns concrete type instance + /// important for proper inlining in hot code of `LexerATNSimulator` + fn input(&mut self) -> &mut Self::Input; + /// Sets channel where current token will be pushed + /// + /// By default two channels are available: + /// - `LEXER_DEFAULT_TOKEN_CHANNEL` + /// - `LEXER_HIDDEN` + fn set_channel(&mut self, v: isize); + + /// Pushes current mode to internal mode stack and sets `m` as current lexer mode + /// `pop_mode should be used to recover previous mode + fn push_mode(&mut self, m: usize); + + /// Pops mode from internal mode stack + fn pop_mode(&mut self) -> Option; + + /// Sets type of the current token + /// Called from action to override token that will be emitted by lexer + fn set_type(&mut self, t: isize); + + /// Sets lexer mode discarding current one + fn set_mode(&mut self, m: usize); + + /// Used to informs lexer that it should consider next token as a continuation of the current one + fn more(&mut self); + + /// Tells lexer to completely ignore and not emit current token. + fn skip(&mut self); + + #[doc(hidden)] + fn reset(&mut self); + + #[doc(hidden)] + fn get_interpreter(&self) -> Option<&LexerATNSimulator>; +} + +/// **! Usually generated by ANTLR !** +/// +/// This trait combines everything that can be used to extend Lexer behavior +pub trait LexerRecog<'a, T: Recognizer<'a>>: Actions<'a, T> + Sized + 'static { + /// Callback to extend emit behavior + fn before_emit(_lexer: &mut T) {} +} + +/// Default implementation of Lexer +/// +/// Public fields in this struct are intended to be used by embedded actions +#[allow(missing_docs)] +pub struct BaseLexer< + 'input, + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input> = CommonTokenFactory, +> { + /// `LexerATNSimulator` instance of this lexer + pub interpreter: Option>, + /// `CharStream` used by this lexer + pub input: Option, + recog: T, + + factory: &'input TF, + + error_listeners: RefCell>>>, + + pub token_start_char_index: isize, + pub token_start_line: isize, + pub token_start_column: isize, + current_pos: Rc, + /// Overrides token type emitted by lexer for current token + pub token_type: isize, + /// Make it `Some` to override token that is currently being generated by lexer + pub token: Option, + hit_eof: bool, + /// Channel lexer is currently assigning tokens to + pub channel: isize, + /// stack of modes, which is used for pushMode,popMode lexer actions + pub mode_stack: Vec, + /// Mode lexer is currently in + pub mode: usize, + /// Make it `Some` to override text for token that is currently being generated by lexer + pub text: Option<::Owned>, +} + +#[derive(Debug)] +pub(crate) struct LexerPosition { + pub(crate) line: Cell, + pub(crate) char_position_in_line: Cell, +} + +impl<'input, T, Input, TF> core::fmt::Debug for BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BaseLexer") + .field("interpreter", &self.interpreter) + // TODO: fix this + // .field("input", &self.input) + // .field("recog", &self.recog) + // .field("factory", &self.factory) + .field("error_listeners", &self.error_listeners) + .field("token_start_char_index", &self.token_start_char_index) + .field("token_start_line", &self.token_start_line) + .field("token_start_column", &self.token_start_column) + .field("current_pos", &self.current_pos) + .field("token_type", &self.token_type) + .field("token", &self.token) + .field("hit_eof", &self.hit_eof) + .field("channel", &self.channel) + .field("mode_stack", &self.mode_stack) + .field("mode", &self.mode) + .field( + "text", + match &self.text { + Some(_) => &"Some", + None => &"None", + }, + ) + .finish() + } +} + +impl<'input, T, Input, TF> Deref for BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.recog + } +} + +impl<'input, T, Input, TF> DerefMut for BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.recog + } +} + +impl<'input, T, Input, TF> Recognizer<'input> for BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + type Node = EmptyContextType<'input, TF>; + + fn sempred( + &mut self, + _localctx: Option<&>::Type>, + rule_index: isize, + action_index: isize, + ) -> bool { + >::sempred(_localctx, rule_index, action_index, self) + } + + fn action( + &mut self, + _localctx: Option<&>::Type>, + rule_index: isize, + action_index: isize, + ) { + >::action(_localctx, rule_index, action_index, self) + } +} + +/// Default lexer mode id +pub const LEXER_DEFAULT_MODE: usize = 0; +/// Special token type to indicate that lexer should continue current token on next iteration +/// see `Lexer::more()` +pub const LEXER_MORE: isize = -2; +/// Special token type to indicate that lexer should not return current token +/// usually used to skip whitespaces and comments +/// see `Lexer::skip()` +pub const LEXER_SKIP: isize = -3; + +#[doc(inline)] +pub use super::token::TOKEN_DEFAULT_CHANNEL as LEXER_DEFAULT_TOKEN_CHANNEL; + +#[doc(inline)] +pub use super::token::TOKEN_HIDDEN_CHANNEL as LEXER_HIDDEN; + +pub(crate) const LEXER_MIN_CHAR_VALUE: isize = 0x0000; +pub(crate) const LEXER_MAX_CHAR_VALUE: isize = 0x10FFFF; + +impl<'input, T, Input, TF> BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + fn emit_token(&mut self, token: TF::Tok) { + self.token = Some(token); + } + + fn emit(&mut self) { + >::before_emit(self); + let stop = self.get_char_index() - 1; + let token = self.factory.create( + Some(self.input.as_mut().unwrap()), + self.token_type, + self.text.take(), + self.channel, + self.token_start_char_index, + stop, + self.token_start_line, + self.token_start_column, + ); + self.emit_token(token); + } + + fn emit_eof(&mut self) { + let token = self.factory.create( + None::<&mut Input>, + super::int_stream::EOF, + None, + LEXER_DEFAULT_TOKEN_CHANNEL, + self.get_char_index(), + self.get_char_index() - 1, + self.get_line(), + self.get_char_position_in_line(), + ); + self.emit_token(token) + } + + /// Current position in input stream + pub fn get_char_index(&self) -> isize { + self.input.as_ref().unwrap().index() + } + + /// Current token text + pub fn get_text<'a>(&'a self) -> Cow<'a, TF::Data> + where + 'input: 'a, + { + self.text + .as_ref() + .map(|it| Borrowed(it.borrow())) + // .unwrap_or("") + .unwrap_or_else(|| { + let text = self + .input + .as_ref() + .unwrap() + .get_text(self.token_start_char_index, self.get_char_index() - 1); + TF::get_data(text) + }) + } + + /// Used from lexer actions to override text of the token that will be emitted next + pub fn set_text(&mut self, _text: ::Owned) { + self.text = Some(_text); + } + + // fn get_all_tokens(&mut self) -> Vec { unimplemented!() } + + // fn get_char_error_display(&self, _c: char) -> String { unimplemented!() } + + /// Add error listener + pub fn add_error_listener(&mut self, listener: Box>) { + self.error_listeners.borrow_mut().push(listener); + } + + /// Remove and drop all error listeners + pub fn remove_error_listeners(&mut self) { + self.error_listeners.borrow_mut().clear(); + } + + /// Creates new lexer instance + pub fn new_base_lexer( + input: Input, + interpreter: LexerATNSimulator, + recog: T, + factory: &'input TF, + ) -> Self { + let mut lexer = Self { + interpreter: Some(Box::new(interpreter)), + input: Some(input), + recog, + factory, + error_listeners: RefCell::new(vec![Box::new(ConsoleErrorListener {})]), + token_start_char_index: 0, + token_start_line: 0, + token_start_column: 0, + current_pos: Rc::new(LexerPosition { + line: Cell::new(1), + char_position_in_line: Cell::new(0), + }), + token_type: super::token::TOKEN_INVALID_TYPE, + text: None, + token: None, + hit_eof: false, + channel: super::token::TOKEN_DEFAULT_CHANNEL, + // token_factory_source_pair: None, + mode_stack: Vec::new(), + mode: self::LEXER_DEFAULT_MODE, + }; + let pos = lexer.current_pos.clone(); + lexer.interpreter.as_mut().unwrap().current_pos = pos; + lexer + } +} + +impl<'input, T, Input, TF> TokenAware<'input> for BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + type TF = TF; +} + +impl<'input, T, Input, TF> TokenSource<'input> for BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + type TF = TF; + #[inline] + #[allow(unused_labels)] + fn next_token(&mut self) -> >::Tok { + assert!(self.input.is_some()); + + let _marker = self.input().mark(); + 'outer: loop { + if self.hit_eof { + self.emit_eof(); + break; + } + self.token = None; + self.channel = LEXER_DEFAULT_TOKEN_CHANNEL; + self.token_start_column = self + .interpreter + .as_ref() + .unwrap() + .get_char_position_in_line(); + self.token_start_line = self.interpreter.as_ref().unwrap().get_line(); + self.text = None; + let index = self.input().index(); + self.token_start_char_index = index; + + 'inner: loop { + self.token_type = TOKEN_INVALID_TYPE; + // detach from self, to allow self to be passed deeper + let mut interpreter = self.interpreter.take().unwrap(); + // let mut input = self.input.take().unwrap(); + let result = interpreter.match_token(self.mode, self); + self.interpreter = Some(interpreter); + + let ttype = result.unwrap_or_else(|err| { + // println!("error, recovering"); + notify_listeners(&mut self.error_listeners.borrow_mut(), &err, self); + self.interpreter + .as_mut() + .unwrap() + .recover(err, self.input.as_mut().unwrap()); + LEXER_SKIP + }); + // self.input = Some(input) + + if self.input().la(1) == super::int_stream::EOF { + self.hit_eof = true; + } + + if self.token_type == TOKEN_INVALID_TYPE { + self.token_type = ttype; + } + + if self.token_type == LEXER_SKIP { + continue 'outer; + } + + if self.token_type != LEXER_MORE { + break; + } + } + + if self.token.is_none() { + self.emit(); + break; + } + } + self.input().release(_marker); + self.token.take().unwrap() + } + + fn get_line(&self) -> isize { + self.current_pos.line.get() + } + + fn get_char_position_in_line(&self) -> isize { + self.current_pos.char_position_in_line.get() + } + + fn get_input_stream(&mut self) -> Option<&mut dyn IntStream> { + match &mut self.input { + None => None, + Some(x) => Some(x as _), + } + } + + fn get_source_name(&self) -> String { + self.input + .as_ref() + .map(|it| it.get_source_name()) + .unwrap_or("".to_string()) + } + + // fn set_token_factory<'c: 'b>(&mut self, f: &'c TokenFactory) { + // self.factory = f; + // } + + fn get_token_factory(&self) -> &'input TF { + self.factory + } +} + +#[cold] +#[inline(never)] +fn notify_listeners<'input, T, Input, TF>( + liseners: &mut Vec>>>, + e: &ANTLRError, + lexer: &BaseLexer<'input, T, Input, TF>, +) where + T: LexerRecog<'input, BaseLexer<'input, T, Input, TF>> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + let inner = lexer + .input + .as_ref() + .unwrap() + .get_text(lexer.token_start_char_index, lexer.get_char_index()); + let text = format!( + "token recognition error at: '{}'", + TF::get_data(inner).to_display() + ); + for listener in liseners.iter_mut() { + listener.syntax_error( + lexer, + None, + lexer.token_start_line, + lexer.token_start_column, + &text, + Some(e), + ) + } +} + +impl<'input, T, Input, TF> Lexer<'input> for BaseLexer<'input, T, Input, TF> +where + T: LexerRecog<'input, Self> + 'static, + Input: CharStream, + TF: TokenFactory<'input>, +{ + type Input = Input; + + fn input(&mut self) -> &mut Self::Input { + self.input.as_mut().unwrap() + } + + fn set_channel(&mut self, v: isize) { + self.channel = v; + } + + fn push_mode(&mut self, m: usize) { + self.mode_stack.push(self.mode); + self.mode = m; + } + + fn pop_mode(&mut self) -> Option { + self.mode_stack.pop().map(|mode| { + self.mode = mode; + mode + }) + } + + fn set_type(&mut self, t: isize) { + self.token_type = t; + } + + fn set_mode(&mut self, m: usize) { + self.mode = m; + } + + fn more(&mut self) { + self.set_type(LEXER_MORE) + } + + fn skip(&mut self) { + self.set_type(LEXER_SKIP) + } + + fn reset(&mut self) { + unimplemented!() + } + + fn get_interpreter(&self) -> Option<&LexerATNSimulator> { + self.interpreter.as_deref() + } +} diff --git a/runtime/Rust/src/lexer_action.rs b/runtime/Rust/src/lexer_action.rs new file mode 100644 index 0000000000..01953e9016 --- /dev/null +++ b/runtime/Rust/src/lexer_action.rs @@ -0,0 +1,65 @@ +use std::hash::Hash; + +use crate::lexer::Lexer; + +pub(crate) const LEXER_ACTION_TYPE_CHANNEL: isize = 0; +pub(crate) const LEXER_ACTION_TYPE_CUSTOM: isize = 1; +pub(crate) const LEXER_ACTION_TYPE_MODE: isize = 2; +pub(crate) const LEXER_ACTION_TYPE_MORE: isize = 3; +pub(crate) const LEXER_ACTION_TYPE_POP_MODE: isize = 4; +pub(crate) const LEXER_ACTION_TYPE_PUSH_MODE: isize = 5; +pub(crate) const LEXER_ACTION_TYPE_SKIP: isize = 6; +pub(crate) const LEXER_ACTION_TYPE_TYPE: isize = 7; + +#[derive(Clone, Eq, PartialEq, Debug, Hash)] +pub(crate) enum LexerAction { + LexerChannelAction(isize), + LexerCustomAction { + rule_index: isize, + action_index: isize, + }, + LexerModeAction(isize), + LexerMoreAction, + LexerPopModeAction, + LexerPushModeAction(isize), + LexerSkipAction, + LexerTypeAction(isize), + LexerIndexedCustomAction { + offset: isize, + action: Box, + }, +} + +impl LexerAction { + // fn get_action_type(&self) -> isize { + // unimplemented!() + //// unsafe {discriminant_value(self)} as isize + // } + pub fn is_position_dependent(&self) -> bool { + match self { + LexerAction::LexerCustomAction { .. } + | LexerAction::LexerIndexedCustomAction { .. } => true, + _ => false, + } + } + pub(crate) fn execute<'input, T: Lexer<'input>>(&self, lexer: &mut T) { + match self { + &LexerAction::LexerChannelAction(channel) => lexer.set_channel(channel), + &LexerAction::LexerCustomAction { + rule_index, + action_index, + } => { + lexer.action(None, rule_index, action_index); + } + &LexerAction::LexerModeAction(mode) => lexer.set_mode(mode as usize), + &LexerAction::LexerMoreAction => lexer.more(), + &LexerAction::LexerPopModeAction => { + lexer.pop_mode(); + } + &LexerAction::LexerPushModeAction(mode) => lexer.push_mode(mode as usize), + &LexerAction::LexerSkipAction => lexer.skip(), + &LexerAction::LexerTypeAction(ty) => lexer.set_type(ty), + &LexerAction::LexerIndexedCustomAction { ref action, .. } => action.execute(lexer), + } + } +} diff --git a/runtime/Rust/src/lexer_action_executor.rs b/runtime/Rust/src/lexer_action_executor.rs new file mode 100644 index 0000000000..4bfaaa73e6 --- /dev/null +++ b/runtime/Rust/src/lexer_action_executor.rs @@ -0,0 +1,86 @@ +use std::hash::{Hash, Hasher}; + +use murmur3::murmur3_32::MurmurHasher; + +use crate::int_stream::IntStream; +use crate::lexer::Lexer; +use crate::lexer_action::LexerAction; +use crate::lexer_action::LexerAction::LexerIndexedCustomAction; + +#[derive(Clone, Eq, PartialEq, Debug)] +pub(crate) struct LexerActionExecutor { + cached_hash: u64, + lexer_actions: Vec, +} + +impl Hash for LexerActionExecutor { + fn hash(&self, state: &mut H) { + state.write_u64(self.cached_hash) + } +} + +impl LexerActionExecutor { + pub(crate) fn new(lexer_actions: Vec) -> LexerActionExecutor { + // let mut hasher = ; + let cached_hash = lexer_actions + .iter() + .fold(MurmurHasher::default(), |mut acc, x| { + x.hash(&mut acc); + acc + }) + .finish(); + LexerActionExecutor { + lexer_actions, + cached_hash, + } + } + + pub(crate) fn new_copy_append( + old: Option<&Self>, + lexer_action: LexerAction, + ) -> LexerActionExecutor { + let mut new = old + .cloned() + .unwrap_or_else(|| LexerActionExecutor::new(Vec::new())); + new.lexer_actions.push(lexer_action); + new + } + + pub fn fix_offset_before_match(mut self, offset: isize) -> LexerActionExecutor { + for action in self.lexer_actions.iter_mut() { + match action { + LexerAction::LexerIndexedCustomAction { .. } => {} + _ => { + if action.is_position_dependent() { + *action = LexerIndexedCustomAction { + offset, + action: Box::new(action.clone()), + }; + } + } + } + } + self + } + + pub fn execute<'input>(&self, lexer: &mut impl Lexer<'input>, start_index: isize) { + let mut requires_seek = false; + let stop_index = lexer.input().index(); + for action in self.lexer_actions.iter() { + //println!("executing action {:?}",action); + if let LexerAction::LexerIndexedCustomAction { offset, .. } = action { + lexer.input().seek(start_index + offset); + requires_seek = start_index + offset != stop_index; + } else if action.is_position_dependent() { + lexer.input().seek(stop_index); + requires_seek = false + } + action.execute(lexer); + } + if requires_seek { + lexer.input().seek(stop_index); + } + } + + // fn hash(&self) -> int { unimplemented!() } +} diff --git a/runtime/Rust/src/lexer_atn_simulator.rs b/runtime/Rust/src/lexer_atn_simulator.rs new file mode 100644 index 0000000000..2ab653e226 --- /dev/null +++ b/runtime/Rust/src/lexer_atn_simulator.rs @@ -0,0 +1,737 @@ +//! Implementation of lexer automata(DFA) +use std::cell::{Cell, RefCell}; + +use std::ops::Deref; +use std::rc::Rc; +use std::usize; + +use crate::atn::ATN; +use crate::atn_config::{ATNConfig, ATNConfigType}; +use crate::atn_config_set::ATNConfigSet; +use crate::atn_simulator::{BaseATNSimulator, IATNSimulator}; +use crate::atn_state::ATNStateType::RuleStopState; +use crate::atn_state::{ATNState, ATNStateType}; + +use crate::dfa::DFA; +use crate::dfa_state::{DFAState, DFAStateRef}; +use crate::errors::ANTLRError; +use crate::errors::ANTLRError::LexerNoAltError; +use crate::int_stream::{IntStream, EOF}; +use crate::lexer::{Lexer, LexerPosition, LEXER_MAX_CHAR_VALUE, LEXER_MIN_CHAR_VALUE}; +use crate::lexer_action_executor::LexerActionExecutor; +use crate::prediction_context::EMPTY_PREDICTION_CONTEXT; +use crate::prediction_context::{ + PredictionContext, PredictionContextCache, PREDICTION_CONTEXT_EMPTY_RETURN_STATE, +}; +use crate::token::TOKEN_EOF; + +use crate::transition::{ + ActionTransition, PredicateTransition, RuleTransition, Transition, TransitionType, +}; +use crate::utils::cell_update; + +#[allow(missing_docs)] +pub const ERROR_DFA_STATE_REF: DFAStateRef = usize::MAX; + +// todo rewrite this to be actually usable +#[doc(hidden)] +pub trait ILexerATNSimulator: IATNSimulator { + fn reset(&mut self); + fn match_token<'input>( + &mut self, + mode: usize, + lexer: &mut impl Lexer<'input>, + ) -> Result; + fn get_char_position_in_line(&self) -> isize; + fn set_char_position_in_line(&mut self, column: isize); + fn get_line(&self) -> isize; + fn set_line(&mut self, line: isize); + fn consume(&self, input: &mut T); + #[cold] + fn recover(&mut self, _re: ANTLRError, input: &mut impl IntStream) { + if input.la(1) != EOF { + self.consume(input) + } + } +} + +/// Simple DFA implementation enough for lexer. +#[derive(Debug)] +pub struct LexerATNSimulator { + base: BaseATNSimulator, + + // merge_cache: DoubleDict, + start_index: isize, + pub(crate) current_pos: Rc, + mode: usize, + prev_accept: SimState, + // lexer_action_executor: Option>, +} + +impl ILexerATNSimulator for LexerATNSimulator { + fn reset(&mut self) { + self.prev_accept.reset() + } + + fn match_token<'input>( + &mut self, + mode: usize, + // input:&mut dyn CharStream, + lexer: &mut impl Lexer<'input>, + ) -> Result { + self.mode = mode; + let mark = lexer.input().mark(); + // println!("start matching on mode {}",mode); + let result = (|| { + self.start_index = lexer.input().index(); + self.prev_accept.reset(); + let temp = self.base.decision_to_dfa.clone(); + let dfa = temp + .get(mode) + .ok_or_else(|| ANTLRError::IllegalStateError("invalid mode".into()))?; + let mut dfa = dfa.borrow_mut(); + + let s0 = dfa.s0; + match s0 { + None => self.match_atn(lexer, &mut dfa), + Some(s0) => self.exec_atn(s0, lexer, &mut dfa), + // Err(_) => panic!("dfa rwlock error") + } + })(); + lexer.input().release(mark); + result + } + + fn get_char_position_in_line(&self) -> isize { + self.current_pos.char_position_in_line.get() + } + + fn set_char_position_in_line(&mut self, column: isize) { + self.current_pos.char_position_in_line.set(column) + } + + fn get_line(&self) -> isize { + self.current_pos.line.get() + } + + fn set_line(&mut self, line: isize) { + self.current_pos.char_position_in_line.set(line) + } + + fn consume(&self, _input: &mut T) { + let ch = _input.la(1); + if ch == '\n' as isize { + cell_update(&self.current_pos.line, |x| x + 1); + self.current_pos.char_position_in_line.set(0); + } else { + cell_update(&self.current_pos.char_position_in_line, |x| x + 1); + } + _input.consume(); + } + + // fn get_recog(&self) -> Rc>>{ + // Rc::clone(&self.recog) + // } +} + +impl IATNSimulator for LexerATNSimulator { + fn shared_context_cache(&self) -> &PredictionContextCache { + self.base.shared_context_cache() + } + + fn atn(&self) -> &ATN { + self.base.atn() + } + + fn decision_to_dfa(&self) -> &Vec> { + self.base.decision_to_dfa() + } +} + +#[allow(missing_docs)] +pub const MIN_DFA_EDGE: isize = 0; +#[allow(missing_docs)] +pub const MAX_DFA_EDGE: isize = 127; + +impl LexerATNSimulator { + /// Creates `LexerATNSimulator` instance which creates DFA over `atn` + /// + /// Called from generated parser. + pub fn new_lexer_atnsimulator( + atn: Rc, + decision_to_dfa: Rc>>, + shared_context_cache: Rc, + ) -> LexerATNSimulator { + LexerATNSimulator { + base: BaseATNSimulator::new_base_atnsimulator( + atn, + decision_to_dfa, + shared_context_cache, + ), + start_index: 0, + current_pos: Rc::new(LexerPosition { + line: Cell::new(0), + char_position_in_line: Cell::new(0), + }), + mode: 0, + prev_accept: SimState::new(), + // lexer_action_executor: None, + } + } + + // fn copy_state(&self, _simulator: &mut LexerATNSimulator) { + // unimplemented!() + // } + + #[cold] + fn match_atn<'input>( + &mut self, + lexer: &mut impl Lexer<'input>, + dfa: &mut DFA, + ) -> Result { + // let start_state = self.atn().mode_to_start_state.get(self.mode as usize).ok_or(ANTLRError::IllegalStateError("invalid mode".into()))?; + let atn = self.atn(); + let start_state = *atn + .mode_to_start_state + .get(self.mode) + .ok_or_else(|| ANTLRError::IllegalStateError("invalid mode".into()))?; + + let _old_mode = self.mode; + let mut s0_closure = self.compute_start_state(atn.states[start_state].as_ref(), lexer); + let _supress_edge = s0_closure.has_semantic_context(); + s0_closure.set_has_semantic_context(false); + + let next_state = self.add_dfastate(dfa, s0_closure); + if !_supress_edge { + dfa.s0 = Some(next_state); + } + + self.exec_atn(next_state, lexer, dfa) + } + + fn exec_atn<'input>( + &mut self, + // input: &'a mut dyn CharStream, + ds0: DFAStateRef, + lexer: &mut impl Lexer<'input>, + dfa: &mut DFA, + ) -> Result { + // if self.get_dfa().states.read().unwrap().get(ds0).unwrap().is_accept_state{ + self.capture_sim_state(&dfa, lexer.input(), ds0); + // } + + let mut symbol = lexer.input().la(1); + let mut s = ds0; + loop { + let target = Self::get_existing_target_state(dfa, s, symbol); + let target = target.unwrap_or_else(|| self.compute_target_state(dfa, s, symbol, lexer)); + // let target = dfastates.deref().get(s).unwrap() ;x + + if target == ERROR_DFA_STATE_REF { + break; + } + // println!(" --- target computed {:?}", self.get_dfa().states.read().unwrap()[target].configs.configs.iter().map(|it|it.get_state()).collect::>()); + + if symbol != EOF { + self.consume(lexer.input()); + } + + if self.capture_sim_state(dfa, lexer.input(), target) { + if symbol == EOF { + break; + } + } + + symbol = lexer.input().la(1); + + s = target; + } + // let _last = self.get_dfa().states.read().get(s).unwrap(); + + self.fail_or_accept(symbol, lexer, dfa) + } + + #[inline(always)] + fn get_existing_target_state(dfa: &DFA, _s: DFAStateRef, t: isize) -> Option { + // if t < MIN_DFA_EDGE || t > MAX_DFA_EDGE { + // return None; + // } + + dfa.states[_s] + .edges + .get((t - MIN_DFA_EDGE) as usize) + .and_then(|x| match x { + 0 => None, + x => Some(x), + }) + .copied() + } + + #[cold] + fn compute_target_state<'input>( + &self, + dfa: &mut DFA, + s: DFAStateRef, + _t: isize, + lexer: &mut impl Lexer<'input>, + ) -> DFAStateRef { + let mut reach = ATNConfigSet::new_ordered(); + self.get_reachable_config_set(&dfa.states[s].configs, &mut reach, _t, lexer); + // println!(" --- target computed {:?}", reach.configs.iter().map(|it|it.get_state()).collect::>()); + + // let mut states = dfa_mut.states; + if reach.is_empty() { + if !reach.has_semantic_context() { + self.add_dfaedge(&mut dfa.states[s], _t, ERROR_DFA_STATE_REF); + } + return ERROR_DFA_STATE_REF; + } + + let supress_edge = reach.has_semantic_context(); + reach.set_has_semantic_context(false); + let to = self.add_dfastate(dfa, Box::new(reach)); + if !supress_edge { + let from = &mut dfa.states[s]; + self.add_dfaedge(from, _t, to); + } + // println!("target state computed from {:?} to {:?} on symbol {}", _s, to, char::try_from(_t as u32).unwrap()); + to + // states.get(to).unwrap() + } + + fn get_reachable_config_set<'input>( + &self, + // _states: &V, + // _input: &mut dyn CharStream, + _closure: &ATNConfigSet, + _reach: &mut ATNConfigSet, + _t: isize, + lexer: &mut impl Lexer<'input>, + ) { + let mut skip_alt = 0; + // println!(" --- source {:?}", _closure.configs.iter().map(|it|it.get_state()).collect::>()); + for config in _closure.get_items() { + let current_alt_reached_accept_state = config.get_alt() == skip_alt; + if current_alt_reached_accept_state { + if let ATNConfigType::LexerATNConfig { + passed_through_non_greedy_decision: true, + .. + } = config.get_type() + { + continue; + } + } + let atn_state = self.atn().states[config.get_state()].as_ref(); + for tr in atn_state.get_transitions() { + if let Some(target) = tr.get_reachable_target(_t) { + let exec = config.get_lexer_executor().map(|x| { + x.clone() + .fix_offset_before_match(lexer.input().index() - self.start_index) + }); + + let new = config.cloned_with_new_exec(self.atn().states[target].as_ref(), exec); + if self.closure( + new, + _reach, + current_alt_reached_accept_state, + true, + _t == EOF, + lexer, + ) { + skip_alt = config.get_alt(); + break; + } + } + } + } + } + + // fn get_reachable_target(&self, states: &T, _trans: &Transition, _t: isize) -> &ATNState + // where + // T: Deref>, + // { + // unimplemented!() + // } + + fn fail_or_accept<'input>( + &mut self, + _t: isize, + lexer: &mut impl Lexer<'input>, + dfa: &DFA, + ) -> Result { + // println!("fail_or_accept"); + if let Some(state) = self.prev_accept.dfa_state { + // let lexer_action_executor; + self.accept(lexer.input()); + + let prediction = { + let dfa_state_prediction = &dfa.states[state]; + // println!("accepted, prediction = {}, on dfastate {}", dfa_state_prediction.prediction, dfa_state_prediction.state_number); + // lexer_action_executor = dfa_state_prediction.lexer_action_executor.clone(); + // let recog = self.recog.clone(); + if let Some(x) = dfa_state_prediction.lexer_action_executor.as_ref() { + x.execute(lexer, self.start_index) + } + + dfa_state_prediction.prediction + }; + + // self.lexer_action_executor = lexer_action_executor; + Ok(prediction) + } else { + if _t == EOF && lexer.input().index() == self.start_index { + return Ok(TOKEN_EOF); + } + Err(LexerNoAltError { + start_index: self.start_index, + }) + } + } + + fn accept<'input>(&mut self, input: &mut impl IntStream) { + input.seek(self.prev_accept.index); + self.current_pos.line.set(self.prev_accept.line); + self.current_pos + .char_position_in_line + .set(self.prev_accept.column); + } + + fn compute_start_state<'input>( + &self, + _p: &dyn ATNState, + lexer: &mut impl Lexer<'input>, + ) -> Box { + // let initial_context = &EMPTY_PREDICTION_CONTEXT; + let mut config_set = ATNConfigSet::new_ordered(); + + for (i, tr) in _p.get_transitions().iter().enumerate() { + let target = tr.get_target(); + let atn_config = ATNConfig::new_lexer_atnconfig6( + target, + (i + 1) as isize, + EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()), + ); + self.closure(atn_config, &mut config_set, false, false, false, lexer); + } + + Box::new(config_set) + } + + fn closure<'input>( + &self, + // _input: &mut dyn CharStream, + mut config: ATNConfig, + _configs: &mut ATNConfigSet, + mut _current_alt_reached_accept_state: bool, + _speculative: bool, + _treat_eofas_epsilon: bool, + lexer: &mut impl Lexer<'input>, + ) -> bool { + // let config = &config; + let atn = self.atn(); + let state = atn.states[config.get_state()].as_ref(); + // println!("closure called on state {} {:?}", state.get_state_number(), state.get_state_type()); + + if let ATNStateType::RuleStopState {} = state.get_state_type() { + // println!("reached rulestopstate {}",state.get_state_number()); + if config.get_context().map(|x| x.has_empty_path()) != Some(false) { + if config.get_context().map(|x| x.is_empty()) != Some(false) { + _configs.add(Box::new(config)); + return true; + } else { + _configs.add(Box::new(config.cloned_with_new_ctx( + state, + Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), + ))); + _current_alt_reached_accept_state = true + } + } + + if config.get_context().map(|x| x.is_empty()) == Some(false) { + let ctx = config.take_context(); + for i in 0..ctx.length() { + if ctx.get_return_state(i) != PREDICTION_CONTEXT_EMPTY_RETURN_STATE { + let new_ctx = ctx.get_parent(i).cloned(); + let return_state = + self.atn().states[ctx.get_return_state(i) as usize].as_ref(); + let next_config = config.cloned_with_new_ctx(return_state, new_ctx); + _current_alt_reached_accept_state = self.closure( + next_config, + _configs, + _current_alt_reached_accept_state, + _speculative, + _treat_eofas_epsilon, + lexer, + ) + } + } + } + + return _current_alt_reached_accept_state; + } + + if !state.has_epsilon_only_transitions() { + if let ATNConfigType::LexerATNConfig { + passed_through_non_greedy_decision, + .. + } = config.config_type + { + if !_current_alt_reached_accept_state || !passed_through_non_greedy_decision { + _configs.add(Box::new(config.clone())); + } + } + } + + let state = atn.states[config.get_state()].as_ref(); + + for tr in state.get_transitions() { + let c = self.get_epsilon_target( + &mut config, + tr.as_ref(), + _configs, + _speculative, + _treat_eofas_epsilon, + lexer, + ); + + if let Some(c) = c { + _current_alt_reached_accept_state = self.closure( + c, + _configs, + _current_alt_reached_accept_state, + _speculative, + _treat_eofas_epsilon, + lexer, + ); + } + } + + _current_alt_reached_accept_state + } + + fn get_epsilon_target<'input>( + &self, + // _input: &mut dyn CharStream, + _config: &mut ATNConfig, + _trans: &dyn Transition, + _configs: &mut ATNConfigSet, + _speculative: bool, + _treat_eofas_epsilon: bool, + lexer: &mut impl Lexer<'input>, + ) -> Option { + let mut result = None; + let target = self.atn().states.get(_trans.get_target()).unwrap().as_ref(); + // println!("epsilon target for {:?} is {:?}", _trans, target.get_state_type()); + match _trans.get_serialization_type() { + TransitionType::TRANSITION_EPSILON => { + result = Some(_config.cloned(target)); + } + TransitionType::TRANSITION_RULE => { + let rt = _trans.cast::(); + //println!("rule transition follow state{}", rt.follow_state); + let pred_ctx = PredictionContext::new_singleton( + Some(_config.get_context().unwrap().clone()), + rt.follow_state as isize, + ); + result = Some(_config.cloned_with_new_ctx(target, Some(pred_ctx.into()))); + } + TransitionType::TRANSITION_PREDICATE => { + let tr = _trans.cast::(); + _configs.set_has_semantic_context(true); + if self.evaluate_predicate(tr.rule_index, tr.pred_index, _speculative, lexer) { + result = Some(_config.cloned(target)); + } + } + TransitionType::TRANSITION_ACTION => { + //println!("action transition"); + if _config.get_context().map(|x| x.has_empty_path()) != Some(false) { + if let ATNConfigType::LexerATNConfig { + lexer_action_executor, + .. + } = _config.get_type() + { + let tr = _trans.cast::(); + let lexer_action = + self.atn().lexer_actions[tr.action_index as usize].clone(); + //dbg!(&lexer_action); + let lexer_action_executor = LexerActionExecutor::new_copy_append( + lexer_action_executor.as_deref(), + lexer_action, + ); + result = + Some(_config.cloned_with_new_exec(target, Some(lexer_action_executor))) + } + } else { + result = Some(_config.cloned(target)); + } + } + TransitionType::TRANSITION_RANGE + | TransitionType::TRANSITION_SET + | TransitionType::TRANSITION_ATOM => { + if _treat_eofas_epsilon { + if _trans.matches(EOF, LEXER_MIN_CHAR_VALUE, LEXER_MAX_CHAR_VALUE) { + let target = self.atn().states[_trans.get_target()].as_ref(); + result = Some(_config.cloned(target)); + } + } + } + TransitionType::TRANSITION_WILDCARD => {} + TransitionType::TRANSITION_NOTSET => {} + TransitionType::TRANSITION_PRECEDENCE => { + panic!("precedence predicates are not supposed to be in lexer"); + } + } + + result + } + + fn evaluate_predicate<'input, T: Lexer<'input>>( + &self, + // input: &mut dyn CharStream, + rule_index: isize, + pred_index: isize, + speculative: bool, + lexer: &mut T, + ) -> bool { + if !speculative { + return lexer.sempred(None, rule_index, pred_index); + } + + let saved_column = self.current_pos.char_position_in_line.get(); + let saved_line = self.current_pos.line.get(); + let index = lexer.input().index(); + let marker = lexer.input().mark(); + self.consume(lexer.input()); + + let result = lexer.sempred(None, rule_index, pred_index); + + self.current_pos.char_position_in_line.set(saved_column); + self.current_pos.line.set(saved_line); + lexer.input().seek(index); + lexer.input().release(marker); + return result; + } + + fn capture_sim_state( + &mut self, + dfa: &DFA, + input: &impl IntStream, + dfa_state: DFAStateRef, + ) -> bool { + if dfa.states[dfa_state].is_accept_state { + self.prev_accept = SimState { + index: input.index(), + line: self.current_pos.line.get(), + column: self.current_pos.char_position_in_line.get(), + dfa_state: Some(dfa_state), + }; + // self.prev_accept.index = input.index(); + // self.prev_accept.dfa_state = Some(dfa_state); + return true; + } + false + } + + fn add_dfaedge(&self, _from: &mut DFAState, t: isize, _to: DFAStateRef) { + if t < MIN_DFA_EDGE || t > MAX_DFA_EDGE { + return; + } + + if _from.edges.len() < (MAX_DFA_EDGE - MIN_DFA_EDGE + 1) as usize { + _from + .edges + .resize((MAX_DFA_EDGE - MIN_DFA_EDGE + 1) as usize, 0); + } + _from.edges[(t - MIN_DFA_EDGE) as usize] = _to; + } + + fn add_dfastate(&self, dfa: &mut DFA, _configs: Box) -> DFAStateRef +// where + // V: DerefMut>, + { + assert!(!_configs.has_semantic_context()); + let mut dfastate = DFAState::new_dfastate(usize::MAX, _configs); + let rule_index = dfastate + .configs //_configs + .get_items() + .find(|c| RuleStopState == *self.atn().states[c.get_state()].get_state_type()) + .map(|c| { + let rule_index = self.atn().states[c.get_state()].get_rule_index(); + + //println!("accepted rule {} on state {}",rule_index,c.get_state()); + ( + self.atn().rule_to_token_type[rule_index], + c.get_lexer_executor() + .map(LexerActionExecutor::clone) + .map(Box::new), + ) + }); + + if let Some((prediction, exec)) = rule_index { + dfastate.prediction = prediction; + dfastate.lexer_action_executor = exec; + dfastate.is_accept_state = true; + } + + let states = &mut dfa.states; + let key = dfastate.default_hash(); + let dfastate_index = *dfa + .states_map + .entry(key) + .or_insert_with(|| { + dfastate.state_number = states.deref().len(); + dfastate.configs.set_read_only(true); + let i = dfastate.state_number; + //println!("inserting new DFA state {} with size {}", i, dfastate.configs.length()); + states.push(dfastate); + vec![i] + }) + .first() + .unwrap(); + + //println!("new DFA state {}", dfastate_index); + + // dfa.states.write().unwrap().get_mut(*dfastate_index).unwrap() + dfastate_index + } + + /// Returns current DFA that is currently used. + pub fn get_dfa(&self) -> &RefCell { + &self.decision_to_dfa()[self.mode] + } + + /// Returns current DFA for particular lexer mode + pub fn get_dfa_for_mode(&self, mode: usize) -> &RefCell { + &self.decision_to_dfa()[mode] + } + + // fn get_token_name(&self, _tt: isize) -> String { unimplemented!() } + + // fn reset_sim_state(_sim: &mut SimState) { unimplemented!() } +} + +#[derive(Debug)] +pub(crate) struct SimState { + index: isize, + line: isize, + column: isize, + dfa_state: Option, +} + +impl SimState { + pub(crate) fn new() -> SimState { + SimState { + index: -1, + line: 0, + column: -1, + dfa_state: None, + } + } + + pub(crate) fn reset(&mut self) { + // self.index = -1; + // self.line = 0; + // self.column = -1; + self.dfa_state = None; + } +} diff --git a/runtime/Rust/src/lib.rs b/runtime/Rust/src/lib.rs new file mode 100644 index 0000000000..9b8f720d4f --- /dev/null +++ b/runtime/Rust/src/lib.rs @@ -0,0 +1,242 @@ +#![crate_type = "lib"] +// #![feature(try_blocks)] +//#![feature(nll)] +// #![feature(raw)] +// #![feature(is_sorted)] +// #![feature(cell_update)] +// #![feature(get_mut_unchecked)] +// #![feature(specialization)] +// #![feature(coerce_unsized)] +// #![feature(associated_type_defaults)] +// #![feature(generic_associated_types)] +// #![feature(crate_visibility_modifier)] +// #![feature(generic_associated_types)] +#![warn(rust_2018_idioms)] +//#![warn(missing_docs)] // warn if there is missing docs +#![warn(missing_debug_implementations)] +#![warn(trivial_numeric_casts)] +// #![allow(incomplete_features)] + +//! # Antlr4 runtime +//! +//! This is a Rust runtime for [ANTLR4] parser generator. +//! It is required to use parsers and lexers generated by [ANTLR4] parser generator +//! +//! This documentation refers to particular api used by generated parsers,lexers and syntax trees. +//! +//! For info on what is [ANTLR4] and how to generate parser please refer to: +//! - [ANTLR4] main repository +//! - [README] for Rust target +//! +//! [ANTLR4]: https://github.com/antlr/antlr4 +//! [README]: https://github.com/rrevenantt/antlr4rust/blob/master/README.md +//! +//! ### Customization +//! +//! All input and output can be customized and optimized for particular usecase by implementing +//! related trait. Each of them already has different implementations that should be enough for most cases. +//! For more details see docs for corresponding trait and containing module. +//! +//! Currently available are: +//! - [`CharStream`] - Lexer input, stream of char values with slicing support +//! - [`TokenFactory`] - How lexer creates tokens. +//! - [`Token`] - Element of [`TokenStream`] +//! - [`TokenStream`] - Parser input, created from lexer or other token source. +//! - [`ParserRuleContext`] - Node of created syntax tree. +//! +//! ### Zero-copy and lifetimes +//! +//! This library supports full zero-copy parsing. To allow this +//! `'input` lifetime is used everywhere inside to refer to data borrowed by parser/lexer. +//! Besides references to input it also can be [`TokenFactory`] if it returns references to tokens. +//! See [`ArenaFactory`] as an example of such behavior. +//! It allocates tokens in [`Arena`](typed_arena::Arena) and returns references. +//! +//! Using generated parse tree you should be careful to not require longer lifetime after the parsing. +//! If that's the case you will likely get "does not live long enough" error on the input string, +//! despite actual lifetime conflict is happening much later +//! +//! If you need to generate owned versions of parse tree or you want simpler usage, +//! you can opt out zero-copy by requiring `'input` to be static. In this case it is easier to also use +//! types that contains "owned" in their name or constructor function like `OwningTokenFactory` +//! or `InputStream::new_owned()`. +//! +//! ### Visitors and Listeners +//! +//! Parse listeners must outlive `'input` because they have to be stored inside of the parser. +//! It still allows to retrieve borrowed data from parse tree which should be enough to cover 99% use cases. +//! +//! `ParseTreeWalker` can accept listeners with arbitrary lifetime. +//! +//! `Visitor`s also can have arbitrary lifetime. +//! +//! ### Downcasting +//! +//! Rule context trait object support downcasting even for zero-copy case. +//! Also generic types(currently these are `H:ErrorStrategy` and `I:`[`TokenStream`]) that you can +//! access in generated parser from embedded actions also can be downcasted to concrete types. +//! To do it `TidExt::downcast_*` extension methods should be used. +//! +//! [`CharStream`]: crate::char_stream::CharStream +//! [`TokenFactory`]: crate::token_factory::TokenFactory +//! [`ArenaFactory`]: crate::token_factory::ArenaFactory +//! [`Token`]: crate::token::Token +//! [`TokenStream`]: crate::token_stream::TokenStream +//! [`ParserRuleContext`]: crate::parser_rule_context::ParserRuleContext + +#[macro_use] +extern crate lazy_static; + +#[doc(hidden)] +pub use lazy_static::lazy_static; + +#[doc(hidden)] +#[doc(hidden)] +pub use better_any::{tid, Tid, TidAble, TidExt}; + +#[doc(inline)] +pub use error_strategy::{BailErrorStrategy, DefaultErrorStrategy, ErrorStrategy}; + +pub use input_stream::InputStream; + +#[doc(inline)] +pub use lexer::{BaseLexer, Lexer}; +#[doc(inline)] +pub use parser::{BaseParser, ListenerId, Parser}; +#[doc(inline)] +pub use token_source::TokenSource; +//extern crate uuid; +#[doc(hidden)] +pub use prediction_context::PredictionContextCache; + +#[doc(inline)] +pub use prediction_mode::PredictionMode; + +#[doc(hidden)] +pub mod atn_config; +#[doc(hidden)] +pub mod atn_simulator; +pub mod int_stream; +mod lexer_action; +mod ll1_analyzer; +#[doc(hidden)] +pub mod recognizer; +pub mod token_factory; +//pub mod tokenstream_rewriter; +#[doc(hidden)] +pub mod atn_deserialization_options; +#[doc(hidden)] +pub mod atn_state; +pub mod char_stream; +#[doc(hidden)] +pub mod dfa_state; +#[doc(hidden)] +pub mod interval_set; +pub mod parser_rule_context; +mod prediction_context; +#[doc(hidden)] +pub mod semantic_context; +mod token_source; +pub mod token_stream; +//pub mod trace_listener; +#[doc(hidden)] +pub mod dfa; +#[doc(hidden)] +pub mod transition; +pub mod tree; +//pub mod file_stream; +#[doc(hidden)] +pub mod atn; +#[doc(hidden)] +pub mod atn_config_set; +#[doc(hidden)] +pub mod atn_deserializer; +pub mod common_token_stream; +mod dfa_serializer; +pub mod error_listener; +pub mod error_strategy; +pub mod errors; +pub mod input_stream; +pub mod lexer; +#[doc(hidden)] +pub mod lexer_action_executor; +pub mod lexer_atn_simulator; +pub mod parser; +pub mod parser_atn_simulator; +mod prediction_mode; +pub mod token; +pub mod trees; +mod utils; +//pub mod tokenstream_rewriter_test; +mod atn_type; +// mod context_factory; +pub mod rule_context; +pub mod vocabulary; +//#[cfg(test)] +// tests are either integration tests in "tests" foulder or unit tests in some modules + +use std::rc::Rc; +/// Stable workaround for CoerceUnsized +// #[doc(hidden)] +pub trait CoerceFrom { + fn coerce_rc(from: Rc) -> Rc; + fn coerce_box(from: Box) -> Box; + fn coerce_ref(from: &T) -> &Self; + fn coerce_mut(from: &mut T) -> &mut Self; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! coerce_from { + ($lt:lifetime : $p:path) => { + const _: () = { + use std::rc::Rc; + impl<$lt, T> $crate::CoerceFrom for dyn $p + $lt + where + T: $p + $lt, + { + fn coerce_rc(from: Rc) -> Rc { + from as _ + } + fn coerce_box(from: Box) -> Box { + from as _ + } + fn coerce_ref(from: &T) -> &Self { + from as _ + } + fn coerce_mut(from: &mut T) -> &mut Self { + from as _ + } + } + }; + }; +} + +/// Stable workaround for CoerceUnsized +// #[doc(hidden)] +pub trait CoerceTo { + fn coerce_rc_to(self: Rc) -> Rc; + fn coerce_box_to(self: Box) -> Box; + fn coerce_ref_to(self: &Self) -> &T; + fn coerce_mut_to(self: &mut Self) -> &mut T; +} + +impl CoerceTo for X +where + T: CoerceFrom, +{ + fn coerce_rc_to(self: Rc) -> Rc { + T::coerce_rc(self) + } + fn coerce_box_to(self: Box) -> Box { + T::coerce_box(self) + } + + fn coerce_ref_to(self: &Self) -> &T { + T::coerce_ref(self) + } + + fn coerce_mut_to(self: &mut Self) -> &mut T { + T::coerce_mut(self) + } +} diff --git a/runtime/Rust/src/ll1_analyzer.rs b/runtime/Rust/src/ll1_analyzer.rs new file mode 100644 index 0000000000..8093cb47d8 --- /dev/null +++ b/runtime/Rust/src/ll1_analyzer.rs @@ -0,0 +1,187 @@ +use std::collections::HashSet; +use std::ops::Deref; +use std::rc::Rc; + +use bit_set::BitSet; + +use crate::atn::ATN; +use crate::atn_config::ATNConfig; +use crate::atn_state::{ATNState, ATNStateType}; +use crate::interval_set::IntervalSet; +use crate::parser::ParserNodeType; +use crate::prediction_context::PredictionContext; +use crate::prediction_context::EMPTY_PREDICTION_CONTEXT; +use crate::token::{TOKEN_EOF, TOKEN_EPSILON, TOKEN_INVALID_TYPE, TOKEN_MIN_USER_TOKEN_TYPE}; +use crate::transition::TransitionType::TRANSITION_NOTSET; +use crate::transition::{RuleTransition, TransitionType}; + +pub struct LL1Analyzer<'a> { + atn: &'a ATN, +} + +impl LL1Analyzer<'_> { + pub fn new(atn: &ATN) -> LL1Analyzer<'_> { + LL1Analyzer { atn } + } + + // fn get_decision_lookahead(&self, _s: &dyn ATNState) -> &Vec { unimplemented!() } + + pub fn look<'input, Ctx: ParserNodeType<'input>>( + &self, + s: &dyn ATNState, + stop_state: Option<&dyn ATNState>, + ctx: Option<&Ctx::Type>, + ) -> IntervalSet { + let mut r = IntervalSet::new(); + let look_ctx = ctx.map(|x| PredictionContext::from_rule_context::(self.atn, x)); + let mut looks_busy: HashSet = HashSet::new(); + let mut called_rule_stack = BitSet::new(); + self.look_work( + s, + stop_state, + look_ctx, + &mut r, + &mut looks_busy, + &mut called_rule_stack, + true, + true, + ); + r + } + + fn look_work( + &self, + // atn:&ATN, + s: &dyn ATNState, + stop_state: Option<&dyn ATNState>, + ctx: Option>, + look: &mut IntervalSet, + look_busy: &mut HashSet, + called_rule_stack: &mut BitSet, + see_thru_preds: bool, + add_eof: bool, + ) { + let c = ATNConfig::new(s.get_state_number(), 0, ctx.clone()); + if !look_busy.insert(c) { + return; + } + + if Some(s.get_state_number()) == stop_state.map(|x| x.get_state_number()) { + match ctx { + None => { + look.add_one(TOKEN_EPSILON); + return; + } + Some(x) if x.is_empty() && add_eof => { + look.add_one(TOKEN_EOF); + return; + } + _ => {} + } + } + + if let ATNStateType::RuleStopState = s.get_state_type() { + match ctx { + None => { + look.add_one(TOKEN_EPSILON); + return; + } + Some(x) if x.is_empty() && add_eof => { + look.add_one(TOKEN_EOF); + return; + } + Some(ctx) if EMPTY_PREDICTION_CONTEXT.with(|x| &ctx != &*x) => { + let removed = called_rule_stack.contains(s.get_rule_index()); + called_rule_stack.remove(s.get_rule_index()); + for i in 0..ctx.length() { + self.look_work( + self.atn.states[ctx.get_return_state(i) as usize].as_ref(), + stop_state, + ctx.get_parent(i).cloned(), + look, + look_busy, + called_rule_stack, + see_thru_preds, + add_eof, + ) + } + if removed { + called_rule_stack.insert(s.get_rule_index()); + } + + return; + } + _ => {} + } + } + + for tr in s.get_transitions() { + let target = self.atn.states[tr.get_target()].as_ref(); + match tr.get_serialization_type() { + TransitionType::TRANSITION_RULE => { + let rule_tr = tr.as_ref().cast::(); + if called_rule_stack.contains(target.get_rule_index()) { + continue; + } + + let new_ctx = Rc::new(PredictionContext::new_singleton( + ctx.clone(), + rule_tr.follow_state as isize, + )); + + called_rule_stack.insert(target.get_rule_index()); + self.look_work( + target, + stop_state, + Some(new_ctx), + look, + look_busy, + called_rule_stack, + see_thru_preds, + add_eof, + ); + called_rule_stack.remove(target.get_rule_index()); + } + TransitionType::TRANSITION_PREDICATE | TransitionType::TRANSITION_PRECEDENCE => { + if see_thru_preds { + self.look_work( + target, + stop_state, + ctx.clone(), + look, + look_busy, + called_rule_stack, + see_thru_preds, + add_eof, + ) + } else { + look.add_one(TOKEN_INVALID_TYPE) + } + } + TransitionType::TRANSITION_WILDCARD => { + look.add_range(TOKEN_MIN_USER_TOKEN_TYPE, self.atn.max_token_type) + } + _ if tr.is_epsilon() => self.look_work( + target, + stop_state, + ctx.clone(), + look, + look_busy, + called_rule_stack, + see_thru_preds, + add_eof, + ), + _ => { + if let Some(mut set) = tr.get_label() { + if tr.get_serialization_type() == TRANSITION_NOTSET { + let complement = + set.complement(TOKEN_MIN_USER_TOKEN_TYPE, self.atn.max_token_type); + *set.to_mut() = complement; + } + look.add_set(set.deref()) + } + } + } + } + } +} diff --git a/runtime/Rust/src/parser.rs b/runtime/Rust/src/parser.rs new file mode 100644 index 0000000000..8f9beea2fb --- /dev/null +++ b/runtime/Rust/src/parser.rs @@ -0,0 +1,717 @@ +//! Base parser implementation +use std::borrow::Borrow; +use std::cell::{Cell, RefCell}; +use std::marker::PhantomData; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +use crate::atn::ATN; +use crate::atn_simulator::IATNSimulator; +use crate::error_listener::{ConsoleErrorListener, ErrorListener, ProxyErrorListener}; +use crate::error_strategy::ErrorStrategy; +use crate::errors::ANTLRError; +use crate::interval_set::IntervalSet; +use crate::parser_atn_simulator::ParserATNSimulator; +use crate::parser_rule_context::ParserRuleContext; +use crate::recognizer::{Actions, Recognizer}; +use crate::rule_context::{states_stack, CustomRuleContext, RuleContext}; +use crate::token::{Token, TOKEN_EOF}; +use crate::token_factory::{TokenAware, TokenFactory}; +use crate::token_stream::TokenStream; +use crate::tree::{ErrorNode, Listenable, ParseTreeListener, TerminalNode}; +use crate::utils::cell_update; +use crate::vocabulary::Vocabulary; +use crate::{CoerceFrom, CoerceTo}; +use better_any::TidAble; + +/// parser functionality required for `ParserATNSimulator` to work +#[allow(missing_docs)] // todo rewrite it so downstream crates actually could meaningfully implement it +pub trait Parser<'input>: Recognizer<'input> { + fn get_interpreter(&self) -> &ParserATNSimulator; + + fn get_token_factory(&self) -> &'input Self::TF; + fn get_parser_rule_context(&self) -> &Rc<>::Type>; + // fn set_parser_rule_context(&self, v: ParserRuleContext); + fn consume(&mut self, err_handler: &mut impl ErrorStrategy<'input, Self>) + where + Self: Sized; + // fn get_parse_listeners(&self) -> Vec; + //fn sempred(&mut self, _localctx: Option<&dyn ParserRuleContext>, rule_index: isize, action_index: isize) -> bool { true } + + fn precpred( + &self, + localctx: Option<&>::Type>, + precedence: isize, + ) -> bool; + + // fn get_error_handler(&self) -> ErrorStrategy; + // fn set_error_handler(&self, e: ErrorStrategy); + fn get_input_stream_mut(&mut self) -> &mut dyn TokenStream<'input, TF = Self::TF>; + fn get_input_stream(&self) -> &dyn TokenStream<'input, TF = Self::TF>; + fn get_current_token(&self) -> &>::Tok; + fn get_expected_tokens(&self) -> IntervalSet; + + fn add_error_listener(&mut self, listener: Box>) + where + Self: Sized; + fn remove_error_listeners(&mut self); + fn notify_error_listeners( + &self, + msg: String, + offending_token: Option, + err: Option<&ANTLRError>, + ); + fn get_error_lister_dispatch<'a>(&'a self) -> Box + 'a> + where + Self: Sized; + + fn is_expected_token(&self, symbol: isize) -> bool; + fn get_precedence(&self) -> isize; + + fn get_state(&self) -> isize; + fn set_state(&mut self, v: isize); + fn get_rule_invocation_stack(&self) -> Vec; +} + +// trait CsvContext<'input>: for<'x> Listenable<'input, dyn CsvParseTreeListener<'input,CsvTreeNodeType> + 'x> + ParserRuleContext<'input,TF=CommonTokenFactory,Ctx=CsvTreeNodeType>{} +// +// struct CsvTreeNodeType; +// impl<'a> ParserNodeType<'a> for CsvTreeNodeType{ +// type Type = dyn CsvContext<'a>; +// } + +// workaround trait for rustc not being able to handle cycles in trait defenition yet, e.g. `trait A: Super{}` +// whyyy rustc... whyyy... (╯°□°)╯︵ ┻━┻ It would have been so much cleaner. +/// Workaround trait for rustc current limitations. +/// +/// Basically you can consider it as if context trait for generated parser has been implemented as +/// ```text +/// trait GenratedParserContext:ParserRuleContext{ ... } +/// ``` +/// which is not possible, hence this a bit ugly workaround. +/// +/// Implemented by generated parser for the type that is going to carry information about +/// parse tree node. +pub trait ParserNodeType<'input>: TidAble<'input> + Sized { + /// Shortcut for `Type::TF` + type TF: TokenFactory<'input> + 'input; + /// Actual type of the parse tree node + type Type: ?Sized + ParserRuleContext<'input, Ctx = Self, TF = Self::TF> + 'input; + // type Visitor: ?Sized + ParseTreeVisitor<'input, Self>; +} + +/// ### Main underlying Parser struct +/// +/// It is a member of generated parser struct, so +/// almost always you don't need to create it yourself. +/// Generated parser hides complexity of this struct and expose required flexibility via generic parameters +#[derive(Debug)] +pub struct BaseParser< + 'input, + Ext, //: 'static, //: ParserRecog<'input, Self> + 'static, // user provided behavior, such as semantic predicates + I: TokenStream<'input>, // input stream + Ctx: ParserNodeType<'input, TF = I::TF>, // Ctx::Type is trait object type for tree node of the parser + T: ParseTreeListener<'input, Ctx> + ?Sized = dyn ParseTreeListener<'input, Ctx>, +> { + interp: Rc, + /// Rule context parser is currently processing + pub ctx: Option>, + + /// Track the {@link ParserRuleContext} objects during the parse and hook + /// them up using the {@link ParserRuleContext#children} list so that it + /// forms a parse tree. The {@link ParserRuleContext} returned from the start + /// rule represents the root of the parse tree. + /// + ///

Note that if we are not building parse trees, rule contexts only point + /// upwards. When a rule exits, it returns the context bute that gets garbage + /// collected if nobody holds a reference. It points upwards but nobody + /// points at it.

+ /// + ///

When we build parse trees, we are adding all of these contexts to + /// {@link ParserRuleContext#children} list. Contexts are then not candidates + /// for garbage collection.

+ /// + /// Returns {@code true} if a complete parse tree will be constructed while + /// parsing, otherwise {@code false} + pub build_parse_trees: bool, + + /// true if parser reached EOF + pub matched_eof: bool, + + state: isize, + /// Token stream that is currently used by this parser + pub input: I, + precedence_stack: Vec, + + parse_listeners: Vec>, + _syntax_errors: Cell, + error_listeners: RefCell>>>, + + ext: Ext, + pd: PhantomData &'input str>, +} + +better_any::tid! { + impl<'input, Ext, I, Ctx, T> TidAble<'input> for BaseParser<'input,Ext, I, Ctx, T> + where I: TokenStream<'input>, + Ctx: ParserNodeType<'input, TF = I::TF>, + T: ParseTreeListener<'input, Ctx> + ?Sized +} + +impl<'input, Ext, I, Ctx, T> Deref for BaseParser<'input, Ext, I, Ctx, T> +where + Ext: ParserRecog<'input, Self>, + I: TokenStream<'input>, + Ctx: ParserNodeType<'input, TF = I::TF>, + T: ParseTreeListener<'input, Ctx> + ?Sized, + // Ctx::Type: Listenable, +{ + type Target = Ext; + + fn deref(&self) -> &Self::Target { + &self.ext + } +} + +impl<'input, Ext, I, Ctx, T> DerefMut for BaseParser<'input, Ext, I, Ctx, T> +where + Ext: ParserRecog<'input, Self>, + I: TokenStream<'input>, + Ctx: ParserNodeType<'input, TF = I::TF>, + T: ParseTreeListener<'input, Ctx> + ?Sized, + // Ctx::Type: Listenable, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.ext + } +} + +/// +pub trait ParserRecog<'a, P: Recognizer<'a>>: Actions<'a, P> {} + +impl<'input, Ext, I, Ctx, T> Recognizer<'input> for BaseParser<'input, Ext, I, Ctx, T> +where + Ext: ParserRecog<'input, Self>, + I: TokenStream<'input>, + Ctx: ParserNodeType<'input, TF = I::TF>, + T: ParseTreeListener<'input, Ctx> + ?Sized, + // Ctx::Type: Listenable, +{ + type Node = Ctx; + + fn sempred( + &mut self, + localctx: Option<&Ctx::Type>, + rule_index: isize, + action_index: isize, + ) -> bool { + >::sempred(localctx, rule_index, action_index, self) + } + + fn get_rule_names(&self) -> &[&str] { + self.ext.get_rule_names() + } + + fn get_vocabulary(&self) -> &dyn Vocabulary { + self.ext.get_vocabulary() + } + + fn get_grammar_file_name(&self) -> &str { + self.ext.get_grammar_file_name() + } + + fn get_atn(&self) -> &ATN { + self.interp.atn() + } +} + +impl<'input, Ext, I, Ctx, T> TokenAware<'input> for BaseParser<'input, Ext, I, Ctx, T> +where + Ext: ParserRecog<'input, Self>, + I: TokenStream<'input>, + Ctx: ParserNodeType<'input, TF = I::TF>, + T: ParseTreeListener<'input, Ctx> + ?Sized, + // Ctx::Type: Listenable, +{ + type TF = I::TF; +} + +impl<'input, Ext, I, Ctx, T> Parser<'input> for BaseParser<'input, Ext, I, Ctx, T> +where + Ext: ParserRecog<'input, Self>, + I: TokenStream<'input>, + Ctx: ParserNodeType<'input, TF = I::TF>, + T: ParseTreeListener<'input, Ctx> + ?Sized, + Ctx::Type: + Listenable + CoerceFrom> + CoerceFrom>, + // TerminalNode<'input, Ctx>: CoerceTo, + // ErrorNode<'input, Ctx>: CoerceTo, +{ + fn get_interpreter(&self) -> &ParserATNSimulator { + self.interp.as_ref() + } + + fn get_token_factory(&self) -> &'input Self::TF { + // &**crate::common_token_factory::COMMON_TOKEN_FACTORY_DEFAULT + self.input.get_token_source().get_token_factory() + } + + #[inline(always)] + fn get_parser_rule_context(&self) -> &Rc { + self.ctx.as_ref().unwrap() + } + + fn consume(&mut self, err_handler: &mut impl ErrorStrategy<'input, Self>) { + let o = self.get_current_token().clone(); + if o.borrow().get_token_type() != TOKEN_EOF { + self.input.consume(); + } + if self.build_parse_trees || !self.parse_listeners.is_empty() { + if err_handler.in_error_recovery_mode(self) { + // todo report ructc inference issue + let node: Rc> = self.create_error_node(o.clone()); + self.ctx + .as_deref() + .unwrap() + .add_child(node.clone().coerce_rc_to()); + for listener in &mut self.parse_listeners { + listener.visit_error_node(&*node) + } + } else { + let node: Rc> = self.create_token_node(o.clone()); + self.ctx + .as_deref() + .unwrap() + .add_child(node.clone().coerce_rc_to()); + for listener in &mut self.parse_listeners { + listener.visit_terminal(&*node) + } + } + } + } + + fn precpred(&self, _localctx: Option<&Ctx::Type>, precedence: isize) -> bool { + // localctx.map(|it|println!("check at{}",it.to_string_tree(self))); + // println!("{}",self.get_precedence()); + precedence >= self.get_precedence() + } + + fn get_input_stream_mut(&mut self) -> &mut dyn TokenStream<'input, TF = Self::TF> { + &mut self.input //.as_mut() + } + + fn get_input_stream(&self) -> &dyn TokenStream<'input, TF = Self::TF> { + &self.input + } + + #[inline] + fn get_current_token(&self) -> &>::Tok { + self.input.get(self.input.index()) + } + + fn get_expected_tokens(&self) -> IntervalSet { + let states_stack = states_stack(self.ctx.as_ref().unwrap().clone()); + self.interp + .atn() + .get_expected_tokens(self.state, states_stack) + } + + fn add_error_listener(&mut self, listener: Box>) { + self.error_listeners.borrow_mut().push(listener) + } + + fn remove_error_listeners(&mut self) { + self.error_listeners.borrow_mut().clear(); + } + + fn notify_error_listeners( + &self, + msg: String, + offending_token: Option, + err: Option<&ANTLRError>, + ) { + cell_update(&self._syntax_errors, |it| it + 1); + let offending_token: Option<&_> = match offending_token { + None => Some(self.get_current_token().borrow()), + Some(x) => Some(self.input.get(x).borrow()), + }; + let line = offending_token.map(|x| x.get_line()).unwrap_or(-1); + let column = offending_token.map(|x| x.get_column()).unwrap_or(-1); + + for listener in self.error_listeners.borrow().iter() { + listener.syntax_error(self, offending_token, line, column, &msg, err) + } + } + + fn get_error_lister_dispatch<'a>(&'a self) -> Box + 'a> { + Box::new(ProxyErrorListener { + delegates: self.error_listeners.borrow(), + }) + } + + fn is_expected_token(&self, _symbol: isize) -> bool { + unimplemented!() + } + + fn get_precedence(&self) -> isize { + *self.precedence_stack.last().unwrap_or(&-1) + } + + #[inline(always)] + fn get_state(&self) -> isize { + self.state + } + + #[inline(always)] + fn set_state(&mut self, v: isize) { + self.state = v; + } + + fn get_rule_invocation_stack(&self) -> Vec { + let mut vec = Vec::new(); + let rule_names = self.get_rule_names(); + let mut ctx = self.get_parser_rule_context().clone(); + loop { + let rule_index = ctx.get_rule_index(); + vec.push(rule_names.get(rule_index).unwrap_or(&"n/a").to_string()); + ctx = if let Some(parent) = ctx.get_parent_ctx() { + parent + } else { + break; + } + } + vec + } + + // fn get_rule_invocation_stack(&self, c: _) -> Vec { + // unimplemented!() + // } +} + +#[allow(missing_docs)] // todo docs +impl<'input, Ext, I, Ctx, T> BaseParser<'input, Ext, I, Ctx, T> +where + Ext: ParserRecog<'input, Self>, + I: TokenStream<'input>, + Ctx: ParserNodeType<'input, TF = I::TF>, + T: ParseTreeListener<'input, Ctx> + ?Sized, + Ctx::Type: + Listenable + CoerceFrom> + CoerceFrom>, + // TerminalNode<'input, Ctx>: CoerceTo, + // ErrorNode<'input, Ctx>: CoerceTo, +{ + pub fn new_base_parser(input: I, interpreter: Rc, ext: Ext) -> Self { + Self { + interp: interpreter, + ctx: None, + build_parse_trees: true, + matched_eof: false, + state: -1, + input, + precedence_stack: vec![0], + parse_listeners: vec![], + _syntax_errors: Cell::new(0), + error_listeners: RefCell::new(vec![Box::new(ConsoleErrorListener {})]), + ext, + pd: PhantomData, + } + } + + // + // fn reset(&self) { unimplemented!() } + + #[inline] + pub fn match_token( + &mut self, + ttype: isize, + err_handler: &mut impl ErrorStrategy<'input, Self>, + ) -> Result<>::Tok, ANTLRError> { + let mut token = self.get_current_token().clone(); + if token.borrow().get_token_type() == ttype { + if ttype == TOKEN_EOF { + self.matched_eof = true; + } + + err_handler.report_match(self); + self.consume(err_handler); + } else { + token = err_handler.recover_inline(self)?; + if self.build_parse_trees && token.borrow().get_token_index() == -1 { + self.ctx + .as_ref() + .unwrap() + .add_child(self.create_error_node(token.clone()).coerce_rc_to()); + } + } + return Ok(token); + } + + #[inline] + pub fn match_wildcard( + &mut self, + err_handler: &mut impl ErrorStrategy<'input, Self>, + ) -> Result<>::Tok, ANTLRError> { + let mut t = self.get_current_token().clone(); + if t.borrow().get_token_type() > 0 { + err_handler.report_match(self); + self.consume(err_handler); + } else { + t = err_handler.recover_inline(self)?; + if self.build_parse_trees && t.borrow().get_token_index() == -1 { + self.ctx + .as_ref() + .unwrap() + .add_child(self.create_error_node(t.clone()).coerce_rc_to()); + } + } + return Ok(t); + } + + /// Adds parse listener for this parser + /// returns `listener_id` that can be used later to get listener back + /// + /// Embedded listener currently must outlive `'input`. If you need to have arbitrary listener use ParseTreeWalker. + /// + /// ### Example for listener usage: + /// todo + pub fn add_parse_listener(&mut self, listener: Box) -> ListenerId + where + L: CoerceTo, + { + let id = ListenerId::new(&listener); + self.parse_listeners.push(listener.coerce_box_to()); + id + } + + /// Removes parse listener with corresponding `listener_id`, casts it back to user type and returns it to the caller. + /// `listener_id` is returned when listener is added via `add_parse_listener` + pub fn remove_parse_listener(&mut self, listener_id: ListenerId) -> Box + where + L: CoerceTo, + { + let index = self + .parse_listeners + .iter() + .position(|it| ListenerId::new(it).actual_id == listener_id.actual_id) + .expect("listener not found"); + unsafe { listener_id.into_listener(self.parse_listeners.remove(index)) } + } + + /// Removes all added parse listeners without returning them + pub fn remove_parse_listeners(&mut self) { + self.parse_listeners.clear() + } + + pub fn trigger_enter_rule_event(&mut self) { + let ctx = self.ctx.as_deref().unwrap(); + for listener in self.parse_listeners.iter_mut() { + // listener.enter_every_rule(ctx); + ctx.enter(listener); + } + } + + pub fn trigger_exit_rule_event(&mut self) { + let ctx = self.ctx.as_deref().unwrap(); + for listener in self.parse_listeners.iter_mut().rev() { + ctx.exit(listener); + // listener.exit_every_rule(ctx); + } + } + // + // fn set_token_factory(&self, factory: TokenFactory) { unimplemented!() } + // + // + // fn get_atn_with_bypass_alts(&self) { unimplemented!() } + // + // fn compile_parse_tree_pattern(&self, pattern, patternRuleIndex: Lexer, lexer: Lexer) { unimplemented!() } + // + // fn set_input_stream(&self, input: TokenStream) { unimplemented!() } + // + // fn set_token_stream(&self, input: TokenStream) { unimplemented!() } + + fn add_context_to_parse_tree(&mut self) { + let parent = self.ctx.as_ref().unwrap().get_parent_ctx(); + + if let Some(parent) = parent { + parent.add_child(self.ctx.clone().unwrap()) + } + } + + #[inline] + pub fn enter_rule(&mut self, localctx: Rc, state: isize, _rule_index: usize) { + self.set_state(state); + localctx.set_start(self.input.lt(1).cloned()); + self.ctx = Some(localctx); + // let mut localctx = Rc::get_mut(self.ctx.as_mut().unwrap()).unwrap(); + if self.build_parse_trees { + self.add_context_to_parse_tree() + } + } + + #[inline] + pub fn exit_rule(&mut self) { + if self.matched_eof { + self.ctx + .as_ref() + .unwrap() + .set_stop(self.input.lt(1).cloned()) + } else { + self.ctx + .as_ref() + .unwrap() + .set_stop(self.input.lt(-1).cloned()) + } + self.trigger_exit_rule_event(); + self.set_state(self.get_parser_rule_context().get_invoking_state()); + let parent = self.ctx.as_ref().unwrap().get_parent_ctx(); + // mem::replace(&mut self.ctx, parent); + self.ctx = parent; + } + + // todo make new_ctx not option + #[inline] + pub fn enter_outer_alt(&mut self, new_ctx: Option>, alt_num: isize) { + if let Some(new_ctx) = new_ctx { + new_ctx.set_alt_number(alt_num); + + let ctx = self.ctx.as_ref().unwrap(); + if self.build_parse_trees && self.ctx.is_some() && !Rc::ptr_eq(&new_ctx, ctx) { + if let Some(parent) = ctx.get_parent_ctx() { + parent.remove_last_child(); + parent.add_child(new_ctx.clone()) + } + } + + self.ctx = Some(new_ctx); + } + + self.trigger_enter_rule_event(); + } + + pub fn enter_recursion_rule( + &mut self, + localctx: Rc, + state: isize, + _rule_index: usize, + precedence: isize, + ) { + self.set_state(state); + self.precedence_stack.push(precedence); + localctx.set_start(self.input.lt(1).cloned()); + //println!("{}",self.input.lt(1).map(Token::to_owned).unwrap()); + self.ctx = Some(localctx); + } + + pub fn push_new_recursion_context( + &mut self, + localctx: Rc, + state: isize, + _rule_index: usize, + ) { + let prev = self.ctx.take().unwrap(); + prev.set_parent(&Some(localctx.clone())); + prev.set_invoking_state(state); + prev.set_stop(self.input.lt(-1).cloned()); + + // println!("{}",prev.get_start().unwrap()); + localctx.set_start(Some(prev.start_mut().clone())); + self.ctx = Some(localctx); + + if self.build_parse_trees { + self.ctx.as_ref().unwrap().add_child(prev); + } + self.trigger_enter_rule_event(); + } + + pub fn unroll_recursion_context(&mut self, parent_ctx: Option>) { + self.precedence_stack.pop(); + let retctx = self.ctx.clone().unwrap(); + retctx.set_stop(self.input.lt(-1).cloned()); + if !self.parse_listeners.is_empty() { + while self.ctx.as_ref().map(|x| Rc::as_ptr(x)) + != parent_ctx.as_ref().map(|x| Rc::as_ptr(x)) + { + self.trigger_exit_rule_event(); + self.ctx = self.ctx.as_ref().unwrap().get_parent_ctx() + } + } else { + self.ctx = parent_ctx; + } + + //self.ctx is now parent + retctx.set_parent(&self.ctx); + + // println!("{:?}",self.ctx.as_ref().map(|it|it.to_string_tree(self))); + if self.build_parse_trees && self.ctx.is_some() { + self.ctx.as_ref().unwrap().add_child(retctx); + } + } + + fn create_token_node( + &self, + token: >::Tok, + ) -> Rc> { + TerminalNode::new(token).into() + } + + fn create_error_node( + &self, + token: >::Tok, + ) -> Rc> { + ErrorNode::new(token).into() + } + + /// Text representation of generated DFA for debugging purposes + pub fn dump_dfa(&self) { + let mut seen_one = false; + for dfa in self.interp.decision_to_dfa() { + let dfa = dfa.borrow(); + // because s0 is saved in dfa for Rust version + if dfa.states.len() > 1 + (dfa.is_precedence_dfa() as usize) { + if seen_one { + println!() + } + println!("Decision {}:", dfa.decision); + print!("{}", (*dfa).to_string(self.get_vocabulary())); + seen_one = true; + } + } + } + + // fn get_invoking_context(&self, ruleIndex: isize) -> ParserRuleContext { unimplemented!() } + // + // fn in_context(&self, context: ParserRuleContext) -> bool { unimplemented!() } + // + // fn get_expected_tokens_within_current_rule(&self) -> * IntervalSet { unimplemented!() } + // + // + // fn get_rule_index(&self, ruleName: String) -> int { unimplemented!() } + // + // fn get_dfaStrings(&self) -> String { unimplemented!() } + // + // fn get_source_name(&self) -> String { unimplemented!() } + // + // fn set_trace(&self, trace: * TraceListener) { unimplemented!() } +} + +/// Allows to safely cast listener back to user type +#[derive(Debug)] +pub struct ListenerId { + pub(crate) actual_id: usize, + phantom: PhantomData T>, +} + +impl ListenerId { + fn new(listener: &Box) -> ListenerId { + ListenerId { + actual_id: listener.as_ref() as *const T as *const () as usize, + phantom: Default::default(), + } + } +} + +impl ListenerId { + unsafe fn into_listener(self, boxed: Box) -> Box { + Box::from_raw(Box::into_raw(boxed) as *mut T) + } +} diff --git a/runtime/Rust/src/parser_atn_simulator.rs b/runtime/Rust/src/parser_atn_simulator.rs new file mode 100644 index 0000000000..68bc8c1756 --- /dev/null +++ b/runtime/Rust/src/parser_atn_simulator.rs @@ -0,0 +1,1537 @@ +//! Base parser implementation +use std::borrow::Borrow; +use std::cell::{Cell, RefCell}; +use std::collections::{HashMap, HashSet}; + +use std::marker::PhantomData; +use std::ops::Deref; +use std::rc::Rc; +use std::{ptr, usize}; + +use bit_set::BitSet; + +use crate::atn::{ATN, INVALID_ALT}; +use crate::atn_config::ATNConfig; +use crate::atn_config_set::ATNConfigSet; +use crate::atn_simulator::{BaseATNSimulator, IATNSimulator}; +use crate::atn_state::ATNStateType::RuleStopState; +use crate::atn_state::{ATNDecisionState, ATNState, ATNStateRef, ATNStateType, ATNSTATE_BLOCK_END}; +use crate::dfa::{ScopeExt, DFA}; +use crate::dfa_state::{DFAState, DFAStateRef, PredPrediction}; +use crate::errors::{ANTLRError, NoViableAltError}; +use crate::int_stream::EOF; +use crate::interval_set::IntervalSet; +use crate::lexer_atn_simulator::ERROR_DFA_STATE_REF; +use crate::parser::{Parser, ParserNodeType}; + +use crate::prediction_context::{ + MurmurHasherBuilder, PredictionContext, PredictionContextCache, EMPTY_PREDICTION_CONTEXT, + PREDICTION_CONTEXT_EMPTY_RETURN_STATE, +}; +use crate::prediction_mode::*; +use crate::semantic_context::SemanticContext; +use crate::token::{Token, TOKEN_EOF, TOKEN_EPSILON}; + +use crate::token_stream::TokenStream; +use crate::transition::{ + ActionTransition, EpsilonTransition, PrecedencePredicateTransition, PredicateTransition, + RuleTransition, Transition, TransitionType, +}; + +/// ### The embodiment of the adaptive LL(*), ALL(*), parsing strategy. +/// +///

+/// The basic complexity of the adaptive strategy makes it harder to understand. +/// We begin with ATN simulation to build paths in a DFA. Subsequent prediction +/// requests go through the DFA first. If they reach a state without an edge for +/// the current symbol, the algorithm fails over to the ATN simulation to +/// complete the DFA path for the current input (until it finds a conflict state +/// or uniquely predicting state).

+/// +///

+/// All of that is done without using the outer context because we want to create +/// a DFA that is not dependent upon the rule invocation stack when we do a +/// prediction. One DFA works in all contexts. We avoid using context not +/// necessarily because it's slower, although it can be, but because of the DFA +/// caching problem. The closure routine only considers the rule invocation stack +/// created during prediction beginning in the decision rule. For example, if +/// prediction occurs without invoking another rule's ATN, there are no context +/// stacks in the configurations. When lack of context leads to a conflict, we +/// don't know if it's an ambiguity or a weakness in the strong LL(*) parsing +/// strategy (versus full LL(*)).

+/// +///

+/// When SLL yields a configuration set with conflict, we rewind the input and +/// retry the ATN simulation, this time using full outer context without adding +/// to the DFA. Configuration context stacks will be the full invocation stacks +/// from the start rule. If we get a conflict using full context, then we can +/// definitively say we have a true ambiguity for that input sequence. If we +/// don't get a conflict, it implies that the decision is sensitive to the outer +/// context. (It is not context-sensitive in the sense of context-sensitive +/// grammars.)

+/// +///

+/// The next time we reach this DFA state with an SLL conflict, through DFA +/// simulation, we will again retry the ATN simulation using full context mode. +/// This is slow because we can't save the results and have to "interpret" the +/// ATN each time we get that input.

+/// +/// **For more info see Java version** +#[derive(Debug)] +pub struct ParserATNSimulator { + base: BaseATNSimulator, + prediction_mode: Cell, + start_index: Cell, + // pd:PhantomData

+} + +/// Just a local helper structure to spoil function parameters as little as possible +struct Local<'a, 'input, T: Parser<'input>> { + outer_context: Rc<>::Type>, + dfa_ref: &'a RefCell, + merge_cache: &'a mut MergeCache, + precedence: isize, + parser: &'a mut T, + pd: PhantomData>>, +} + +impl<'a, 'input, T: Parser<'input> + 'a> Local<'a, 'input, T> { + // fn dfa(&self) -> &DFA { + // self.dfa_ref.borrow().deref() + // } + // fn dfa_mut(&mut self) -> &mut DFA { + // self.dfa_ref.borrow_mut().deref_mut() + // } + fn input(&mut self) -> &mut dyn TokenStream<'input, TF = T::TF> { + self.parser.get_input_stream_mut() + } + // fn seek(&mut self, i: isize) { self.input().seek(i) } + fn outer_context(&self) -> &>::Type { + self.outer_context.deref() + } +} + +pub(crate) type MergeCache = HashMap< + (Rc, Rc), + Rc, + MurmurHasherBuilder, +>; + +impl ParserATNSimulator { + /// creates new `ParserATNSimulator` + pub fn new( + atn: Rc, + decision_to_dfa: Rc>>, + shared_context_cache: Rc, + ) -> ParserATNSimulator { + ParserATNSimulator { + base: BaseATNSimulator::new_base_atnsimulator( + atn, + decision_to_dfa, + shared_context_cache, + ), + prediction_mode: Cell::new(PredictionMode::LL), + start_index: Cell::new(0), + } + } + + /// Returns current prediction mode + pub fn get_prediction_mode(&self) -> PredictionMode { + self.prediction_mode.get() + } + + /// Sets current prediction mode + pub fn set_prediction_mode(&self, v: PredictionMode) { + self.prediction_mode.set(v) + } + + // fn reset(&self) { unimplemented!() } + + /// Called by generated parser to choose an alternative when LL(1) parsing is not enough + pub fn adaptive_predict<'a, T: Parser<'a>>( + &self, + decision: isize, + parser: &mut T, + ) -> Result { + self.start_index.set(parser.get_input_stream_mut().index()); + let mut merge_cache: MergeCache = HashMap::with_hasher(MurmurHasherBuilder {}); + let mut local = Local { + outer_context: parser.get_parser_rule_context().clone(), + dfa_ref: &self.decision_to_dfa()[decision as usize], + merge_cache: &mut merge_cache, + precedence: parser.get_precedence(), + parser, + pd: PhantomData, + }; + // 4!("adaptive_predict decision {}, is_prec {}",decision,local.dfa.is_precedence_dfa()); + + let m = local.input().mark(); + + let result = { + let s0 = { + let dfa = local.dfa_ref.borrow(); + + if dfa.is_precedence_dfa() { + dfa.get_precedence_start_state( + local.precedence, /*parser.get_precedence()*/ + ) + } else { + dfa.s0 + } + }; + + let s0 = s0.unwrap_or_else(|| { + let s0_closure = { + let dfa = local.dfa_ref.borrow(); + self.compute_start_state( + dfa.atn_start_state, + // PredictionContext::from_rule_context::<'a,T::Node>(self.atn(), empty_ctx::().as_ref()), + EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()), + false, + &mut local, + ) + }; + + let mut s0; + if local.dfa_ref.borrow().is_precedence_dfa() { + s0 = { + let dfa = local.dfa_ref.borrow(); + dfa.s0.unwrap() + }; + let s0_closure_updated = self.apply_precedence_filter(&s0_closure, &mut local); + + let mut dfa_mut = local.dfa_ref.borrow_mut(); + dfa_mut.states[s0].configs = Box::new(s0_closure); + + s0 = self.add_dfastate( + &mut dfa_mut, + DFAState::new_dfastate(0, Box::new(s0_closure_updated)), + ); + + dfa_mut.set_precedence_start_state(local.precedence, s0); + } else { + let mut dfa_mut = local.dfa_ref.borrow_mut(); + + s0 = self.add_dfastate( + &mut dfa_mut, + DFAState::new_dfastate(0, Box::new(s0_closure)), + ); + dfa_mut.s0.replace(s0); + } + s0 + }); + + self.exec_atn(&mut local, s0)? + }; + + local.input().seek(self.start_index.get()); + local.input().release(m); + // println!("result = {}", result); + Ok(result) + } + + #[allow(non_snake_case)] + fn exec_atn<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + s0: DFAStateRef, + ) -> Result { + let mut previousD = s0; + + let mut token = local.input().la(1); + + loop { + // println!("exec atn loop previous D {}",previousD as isize -1); + + let D = { + let dfa = local.dfa_ref.borrow(); + Self::get_existing_target_state(&dfa, previousD, token) + }; + let D = if let Some(s) = D { + s + } else { + self.compute_target_state(previousD, token, local) + }; + debug_assert!(D > 0); + + // let dfa = local.dfa_ref.borrow(); + // let states = &dfa.states; + if D == ERROR_DFA_STATE_REF { + let previousDstate = &local.dfa_ref.borrow().states[previousD]; + let err = self.no_viable_alt( + local, + previousDstate.configs.as_ref(), + self.start_index.get(), + ); + local.input().seek(self.start_index.get()); + let alt = self.get_syn_valid_or_sem_invalid_alt_that_finished_decision_entry_rule( + previousDstate.configs.as_ref(), + local, + ); + if alt != INVALID_ALT { + return Ok(alt); + } + return Err(err); + } + + let dfa = local.dfa_ref.borrow(); + let Dstate = &dfa.states[D]; + if Dstate.requires_full_context && self.prediction_mode.get() != PredictionMode::SLL { + let mut conflicting_alts = Dstate.configs.conflicting_alts.clone(); //todo get rid of clone? + if !Dstate.predicates.is_empty() { + let conflict_index = local.input().index(); + if conflict_index != self.start_index.get() { + local.input().seek(self.start_index.get()) + } + + conflicting_alts = self.eval_semantic_context(local, &Dstate.predicates, true); + // println!("conflicting_alts {:?}",&conflicting_alts); + if conflicting_alts.len() == 1 { + return Ok(conflicting_alts.iter().next().unwrap() as isize); + } + + if conflict_index != self.start_index.get() { + local.input().seek(conflict_index) + } + } + + self.report_attempting_full_context( + &dfa, + &conflicting_alts, + Dstate.configs.as_ref(), + self.start_index.get(), + local.input().index(), + local.parser, + ); + + let atn_start_state = dfa.atn_start_state; + drop(dfa); + + let s0_closure = self.compute_start_state( + atn_start_state, + PredictionContext::from_rule_context::( + self.atn(), + local.outer_context(), + ), + true, + local, + ); + + return self.exec_atn_with_full_context(local, s0_closure); + } + + if Dstate.is_accept_state { + if Dstate.predicates.is_empty() { + // println!("prediction !!{}",Dstate.prediction); + return Ok(Dstate.prediction); + } + + let stop_index = local.input().index(); + local.input().seek(self.start_index.get()); + + let alts = self.eval_semantic_context(local, &Dstate.predicates, true); + match alts.len() { + 0 => { + return Err(self.no_viable_alt( + local, + Dstate.configs.as_ref(), + self.start_index.get(), + )) + } + 1 => return Ok(alts.iter().next().unwrap() as isize), + _ => { + self.report_ambiguity( + &dfa, + self.start_index.get(), + stop_index, + false, + &alts, + Dstate.configs.as_ref(), + local.parser, + ); + return Ok(alts.iter().next().unwrap() as isize); + } + } + } + previousD = D; + + if token != EOF { + local.input().consume(); + token = local.input().la(1); + } + } + } + + #[allow(non_snake_case)] + fn get_existing_target_state( + dfa: &DFA, + previousD: DFAStateRef, + t: isize, + ) -> Option { + dfa.states[previousD] + .edges + .get((t + 1) as usize) + .and_then(|x| match *x { + 0 => None, + x => Some(x), + }) + } + + #[allow(non_snake_case)] + fn compute_target_state<'a, T: Parser<'a>>( + &self, + // dfa: &mut DFA, + previousD: DFAStateRef, + t: isize, + local: &mut Local<'_, 'a, T>, + ) -> DFAStateRef { + // println!("source config {:?}",dfa.states.read()[previousD].configs.as_ref()); + let reach = { + let dfa = local.dfa_ref.borrow(); + let closure = dfa.states[previousD].configs.as_ref(); + self.compute_reach_set(closure, t, false, local) + }; + + let reach = match reach { + None => { + let mut dfa_mut = local.dfa_ref.borrow_mut(); + self.add_dfaedge(&mut dfa_mut.states[previousD], t, ERROR_DFA_STATE_REF); + return ERROR_DFA_STATE_REF; + } + Some(x) => x, + }; + + let predicted_alt = self.get_unique_alt(&reach); + // println!("predicted_alt {}",predicted_alt); + + let mut D = DFAState::new_dfastate(0, reach.into()); + let reach = D.configs.as_ref(); + + if predicted_alt != INVALID_ALT { + D.is_accept_state = true; + D.configs.set_unique_alt(predicted_alt); + D.prediction = predicted_alt + } else if self.all_configs_in_rule_stop_state(reach) + || has_sll_conflict_terminating_prediction(self.prediction_mode.get(), reach) + { + let alts = self.get_conflicting_alts(reach); + D.prediction = alts.iter().next().unwrap() as isize; + D.configs.conflicting_alts = alts; + D.requires_full_context = true; + D.is_accept_state = true; + } + + // println!("target config {:?}",&D.configs); + if D.is_accept_state && D.configs.has_semantic_context() { + let decision_state = + self.atn().decision_to_state[local.dfa_ref.borrow().decision as usize]; + self.predicate_dfa_state(&mut D, self.atn().states[decision_state].deref()); + // println!("predicates compute target {:?}",&D.predicates); + if !D.predicates.is_empty() { + D.prediction = INVALID_ALT + } + } + + let mut dfa_mut = local.dfa_ref.borrow_mut(); + let D = self.add_dfastate(&mut dfa_mut, D); + self.add_dfaedge(&mut dfa_mut.states[previousD], t, D); + + D + } + + fn predicate_dfa_state(&self, dfa_state: &mut DFAState, decision_state: &dyn ATNState) { + let nalts = decision_state.get_transitions().len(); + let alts_to_collect_preds_from = + self.get_conflicting_alts_or_unique_alt(dfa_state.configs.as_ref()); + let alt_to_pred = self.get_preds_for_ambig_alts( + &alts_to_collect_preds_from, + dfa_state.configs.as_ref(), + nalts, + ); + if let Some(alt_to_pred) = alt_to_pred { + dfa_state.predicates = + self.get_predicate_predictions(&alts_to_collect_preds_from, alt_to_pred); + dfa_state.prediction = INVALID_ALT; + } else { + dfa_state.prediction = alts_to_collect_preds_from + .iter() + .next() + .unwrap_or(0 /*in java it is -1 but looks like 0 is good enough*/) + as isize; + } + } + + fn exec_atn_with_full_context<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + // _D: &DFAState, + s0: ATNConfigSet, + ) -> Result { + //println!("exec_atn_with_full_context"); + let full_ctx = true; + let mut found_exact_ambig = false; + let mut prev = s0; + local.input().seek(self.start_index.get()); + let mut t = local.input().la(1); + let mut predicted_alt; + // local.upgrade_lock(); + loop { + // println!("full_ctx loop"); + + let reach = self.compute_reach_set(&prev, t, full_ctx, local); + prev = match reach { + None => { + local.input().seek(self.start_index.get()); + let alt = self + .get_syn_valid_or_sem_invalid_alt_that_finished_decision_entry_rule( + &prev, local, + ); + if alt != INVALID_ALT { + return Ok(alt); + } + return Err(self.no_viable_alt(local, &prev, self.start_index.get())); + } + Some(x) => x, + }; + + let alt_sub_sets = get_conflicting_alt_subsets(&prev); + prev.set_unique_alt(self.get_unique_alt(&prev)); + if prev.get_unique_alt() != INVALID_ALT { + predicted_alt = prev.get_unique_alt(); + break; + } + if self.prediction_mode.get() != PredictionMode::LL_EXACT_AMBIG_DETECTION { + predicted_alt = resolves_to_just_one_viable_alt(&alt_sub_sets); + if predicted_alt != INVALID_ALT { + break; + } + } else if all_subsets_conflict(&alt_sub_sets) && all_subsets_equal(&alt_sub_sets) { + found_exact_ambig = true; + predicted_alt = get_single_viable_alt(&alt_sub_sets); + break; + } + + if t != TOKEN_EOF { + local.input().consume(); + t = local.input().la(1); + } + } + + let dfa = local.dfa_ref.borrow(); + if prev.get_unique_alt() != INVALID_ALT { + self.report_context_sensitivity( + &dfa, + predicted_alt, + &prev, + self.start_index.get(), + local.input().index(), + local.parser, + ); + return Ok(predicted_alt); + } + self.report_ambiguity( + &dfa, + self.start_index.get(), + local.input().index(), + found_exact_ambig, + &prev.get_alts(), + &prev, + local.parser, + ); + + Ok(predicted_alt) + } + + // ATNConfigSet is pretty big so should be boxed to move it cheaper + fn compute_reach_set<'a, T: Parser<'a>>( + &self, + closure: &ATNConfigSet, + t: isize, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + // println!("in computeReachSet, starting closure: {:?}",closure); + let mut intermediate = ATNConfigSet::new_base_atnconfig_set(full_ctx); + + let mut skipped_stop_states = Vec::<&ATNConfig>::new(); + + for c in closure.get_items() { + let state = self.atn().states[c.get_state()].as_ref(); + if let RuleStopState = state.get_state_type() { + assert!(c.get_context().unwrap().is_empty()); + if full_ctx || t == TOKEN_EOF { + skipped_stop_states.push(c); + } + continue; + } + + for tr in state.get_transitions() { + self.get_reachable_target(tr.as_ref(), t).map(|target| { + let added = Box::new(c.cloned(self.atn().states[target].as_ref())); + intermediate.add_cached(added, Some(local.merge_cache)) + }); + } + } + // println!("intermediate {:?}",intermediate); + + let mut look_to_end_of_rule = false; + let mut reach = if skipped_stop_states.is_empty() + && t != TOKEN_EOF + && (intermediate.length() == 1 || self.get_unique_alt(&intermediate) != INVALID_ALT) + { + look_to_end_of_rule = true; + intermediate + } else { + let mut reach = ATNConfigSet::new_base_atnconfig_set(full_ctx); + let mut closure_busy = HashSet::new(); + // println!("calc reach {:?}",intermediate.length()); + + for c in intermediate.configs { + let treat_eofas_epsilon = t == TOKEN_EOF; + self.closure( + *c, + &mut reach, + &mut closure_busy, + false, + full_ctx, + treat_eofas_epsilon, + local, + ); + } + // println!("calc reach {:?}",reach); + reach + }; + + if t == TOKEN_EOF { + reach = self.remove_all_configs_not_in_rule_stop_state( + reach, + look_to_end_of_rule, + local.merge_cache, + ); + } + + if !skipped_stop_states.is_empty() + && (!full_ctx || !self.has_config_in_rule_stop_state(&reach)) + { + for c in skipped_stop_states { + reach.add_cached(c.clone().into(), Some(local.merge_cache)); + } + } + // println!("result?"); + if reach.is_empty() { + return None; + } + + // println!("result {:?}",&reach); + return Some(reach); + } + + fn has_config_in_rule_stop_state(&self, configs: &ATNConfigSet) -> bool { + for c in configs.get_items() { + if let RuleStopState = self.atn().states[c.get_state()].get_state_type() { + return true; + } + } + return false; + } + + fn all_configs_in_rule_stop_state(&self, configs: &ATNConfigSet) -> bool { + for c in configs.get_items() { + if let RuleStopState = self.atn().states[c.get_state()].get_state_type() { + } else { + return false; + } + } + return true; + } + + fn remove_all_configs_not_in_rule_stop_state( + &self, + configs: ATNConfigSet, + look_to_end_of_rule: bool, + merge_cache: &mut MergeCache, + ) -> ATNConfigSet { + if self.all_configs_in_rule_stop_state(&configs) { + return configs; + } + + // can just remove instead of creating new instance because we own configs + // it significantly differs from java version though + let mut result = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + for c in configs.configs { + let state = self.atn().states[c.get_state()].as_ref(); + if let RuleStopState = state.get_state_type() { + result.add_cached(c, Some(merge_cache)); + continue; + } + + if look_to_end_of_rule && state.has_epsilon_only_transitions() { + let next_tokens = self.atn().next_tokens(state); + if next_tokens.contains(TOKEN_EPSILON) { + let end_of_rule_state = self.atn().rule_to_stop_state[state.get_rule_index()]; + result.add_cached( + c.cloned(self.atn().states[end_of_rule_state].as_ref()) + .into(), + Some(merge_cache), + ); + } + } + } + + result + } + + fn compute_start_state<'a, T: Parser<'a>>( + &self, + a: ATNStateRef, + initial_ctx: Rc, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> ATNConfigSet { + // let initial_ctx = PredictionContext::prediction_context_from_rule_context(self.atn(),ctx); + let mut configs = ATNConfigSet::new_base_atnconfig_set(full_ctx); + // println!("initial {:?}",initial_ctx); + // println!("initial state {:?}",a); + + let atn_states = &self.atn().states; + for (i, tr) in atn_states[a].get_transitions().iter().enumerate() { + let target = &atn_states[tr.get_target()]; + let c = ATNConfig::new( + target.get_state_number(), + (i + 1) as isize, + Some(initial_ctx.clone()), + ); + let mut closure_busy = HashSet::new(); + self.closure( + c, + &mut configs, + &mut closure_busy, + true, + full_ctx, + false, + local, + ); + } + // println!("start state {:?}",configs); + + configs + } + + fn apply_precedence_filter<'a, T: Parser<'a>>( + &self, + configs: &ATNConfigSet, + local: &mut Local<'_, 'a, T>, + ) -> ATNConfigSet { + //println!("apply_precedence_filter"); + let mut states_from_alt1 = HashMap::new(); + let mut config_set = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + + for config in configs.get_items() { + if config.get_alt() != 1 { + continue; + } + + let updated_sem_ctx = config + .semantic_context + .eval_precedence(local.parser, local.outer_context()); + + if let Some(updated_sem_ctx) = updated_sem_ctx.as_deref() { + states_from_alt1.insert(config.get_state(), config.get_context()); + + if *updated_sem_ctx != *config.semantic_context { + config_set.add_cached( + Box::new(ATNConfig::new_with_semantic( + config.get_state(), + config.get_alt(), + config.get_context().cloned(), + Box::new(updated_sem_ctx.clone()), + )), + Some(local.merge_cache), + ); + } else { + config_set.add_cached(Box::new(config.clone()), Some(local.merge_cache)); + } + } + } + + for config in configs.get_items() { + if config.get_alt() == 1 { + continue; + } + if !config.is_precedence_filter_suppressed() { + if let Some(context) = states_from_alt1.get(&config.get_state()) { + if *context == config.get_context() { + continue; + } + } + } + config_set.add(Box::new(config.clone())); + } + + config_set + } + + fn get_reachable_target(&self, trans: &dyn Transition, ttype: isize) -> Option { + if trans.matches(ttype, 0, self.atn().max_token_type) { + return Some(trans.get_target()); + } + None + } + + fn get_preds_for_ambig_alts( + &self, + ambig_alts: &BitSet, + configs: &ATNConfigSet, + nalts: usize, + ) -> Option> { + let mut alt_to_pred = Vec::with_capacity(nalts + 1); + alt_to_pred.resize_with(nalts + 1, || None); + for c in configs.configs.iter() { + let alt = c.get_alt() as usize; + if ambig_alts.contains(alt) { + alt_to_pred[alt] = Some(SemanticContext::or( + alt_to_pred[alt].as_ref(), + Some(&*c.semantic_context), + )); + } + } + + let alt_to_pred: Vec = alt_to_pred + .into_iter() + .map(|it| { + if let Some(inner) = it { + inner + } else { + SemanticContext::NONE + } + }) + .collect(); + + let npred_alts = alt_to_pred + .iter() + .filter(|it| **it != SemanticContext::NONE) + .count(); + + if npred_alts == 0 { + return None; + } + return Some(alt_to_pred); + } + + fn get_predicate_predictions( + &self, + ambig_alts: &BitSet, + alt_to_pred: Vec, + ) -> Vec { + let mut pairs = vec![]; + let mut contains_predicate = false; + for (i, pred) in alt_to_pred.into_iter().enumerate().skip(1) { + if pred != SemanticContext::NONE { + contains_predicate = true + } + + if ambig_alts.contains(i) { + pairs.push(PredPrediction { + alt: i as isize, + pred, + }) + } + } + if !contains_predicate { + return Vec::new(); + } + + pairs + } + + fn get_syn_valid_or_sem_invalid_alt_that_finished_decision_entry_rule<'a, T: Parser<'a>>( + &self, + configs: &ATNConfigSet, + local: &mut Local<'_, 'a, T>, + ) -> isize { + let (sem_valid_configs, sem_invalid_configs) = + self.split_according_to_semantic_validity(configs, local); + + let alt = self.get_alt_that_finished_decision_entry_rule(&sem_valid_configs); + if alt != INVALID_ALT { + return alt; + } + + if !sem_invalid_configs.is_empty() { + let alt = self.get_alt_that_finished_decision_entry_rule(&sem_invalid_configs); + if alt != INVALID_ALT { + return alt; + } + } + + INVALID_ALT + } + + fn split_according_to_semantic_validity<'a, T: Parser<'a>>( + &self, + configs: &ATNConfigSet, + local: &mut Local<'_, 'a, T>, + ) -> (ATNConfigSet, ATNConfigSet) { + let mut succeeded = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + let mut failed = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + for c in configs.get_items() { + let clone = Box::new(c.clone()); + if *c.semantic_context != SemanticContext::NONE { + let predicate_eval_result = self.eval_predicate( + local, + &*c.semantic_context, + c.get_alt(), + configs.full_context(), + ); + if predicate_eval_result { + succeeded.add(clone); + } else { + failed.add(clone); + } + } else { + succeeded.add(clone); + } + } + (succeeded, failed) + } + + fn get_alt_that_finished_decision_entry_rule(&self, configs: &ATNConfigSet) -> isize { + let mut alts = IntervalSet::new(); + for c in configs.get_items() { + let has_empty_path = c.get_context().map(|x| x.has_empty_path()) == Some(true); + let is_stop = self.atn().states[c.get_state()].get_state_type() == &RuleStopState; + if c.get_reaches_into_outer_context() > 0 || (is_stop && has_empty_path) { + alts.add_one(c.get_alt()) + } + } + + return alts.get_min().unwrap_or(INVALID_ALT); + } + + fn eval_semantic_context<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + pred_predictions: &Vec, + complete: bool, + ) -> BitSet { + let mut predictions = BitSet::new(); + for pred in pred_predictions { + if pred.pred == SemanticContext::NONE { + predictions.insert(pred.alt as usize); + + if !complete { + break; + } + continue; + } + + let full_ctx = false; + let predicate_evaluation_result = + self.eval_predicate(local, &pred.pred, pred.alt, full_ctx); + + if predicate_evaluation_result { + predictions.insert(pred.alt as usize); + if !complete { + break; + } + } + } + predictions + } + + fn eval_predicate<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + pred: impl Borrow, + _alt: isize, + _full_ctx: bool, + ) -> bool { + pred.borrow().evaluate(local.parser, &*local.outer_context) + } + + fn closure<'a, T: Parser<'a>>( + &self, + config: ATNConfig, + configs: &mut ATNConfigSet, + closure_busy: &mut HashSet, + collect_predicates: bool, + full_ctx: bool, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) { + // println!("cl{}", config.get_state()); + let initial_depth = 0; + // local.merge_cache.clear(); + + self.closure_checking_stop_state( + config, + configs, + closure_busy, + collect_predicates, + full_ctx, + initial_depth, + treat_eofas_epsilon, + local, + ); + assert!(!full_ctx || !configs.get_dips_into_outer_context()) + } + + fn closure_checking_stop_state<'a, T: Parser<'a>>( + &self, + mut config: ATNConfig, + configs: &mut ATNConfigSet, + closure_busy: &mut HashSet, + collect_predicates: bool, + full_ctx: bool, + depth: isize, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) { + // println!("closure({:?})",config); + if let RuleStopState = self.atn().states[config.get_state()].get_state_type() { + if !config.get_context().unwrap().is_empty() { + config.get_context().unwrap().run(|temp| { + if temp.get_return_state(temp.length() - 1) + == PREDICTION_CONTEXT_EMPTY_RETURN_STATE + { + if full_ctx { + let new_config = config.cloned_with_new_ctx( + self.atn().states[config.get_state()].as_ref(), + Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), + ); + configs.add_cached(Box::new(new_config), Some(local.merge_cache)); + } else { + self.closure_work( + config.clone(), + configs, + closure_busy, + collect_predicates, + full_ctx, + depth, + treat_eofas_epsilon, + local, + ) + } + } + }); + let context = config.take_context(); + for i in 0..context.length() { + if context.get_return_state(i) == PREDICTION_CONTEXT_EMPTY_RETURN_STATE { + if i != context.length() - 1 { + panic!("EMPTY_RETURN_STATE is not last for some reason, please report error") + } + continue; + } + let return_state = context.get_return_state(i) as ATNStateRef; + // let new_ctx = context.take_parent(i).unwrap(); + let new_ctx = context.get_parent(i).cloned(); + let mut c = ATNConfig::new_with_semantic( + return_state, + config.get_alt(), + new_ctx, + config.semantic_context.clone(), + ); + c.set_reaches_into_outer_context(config.get_reaches_into_outer_context()); + assert!(depth > isize::min_value()); + self.closure_checking_stop_state( + c, + configs, + closure_busy, + collect_predicates, + full_ctx, + depth - 1, + treat_eofas_epsilon, + local, + ) + } + return; + } else if full_ctx { + configs.add_cached(Box::new(config), Some(local.merge_cache)); + return; + } else { + } + } + self.closure_work( + config, + configs, + closure_busy, + collect_predicates, + full_ctx, + depth, + treat_eofas_epsilon, + local, + ) + } + + fn closure_work<'a, T: Parser<'a>>( + &self, + config: ATNConfig, + configs: &mut ATNConfigSet, + closure_busy: &mut HashSet, + collect_predicates: bool, + full_ctx: bool, + depth: isize, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) { + //println!("depth {}",depth); + // println!("closure_work started {:?}",config); + let p = self.atn().states[config.get_state()].as_ref(); + if !p.has_epsilon_only_transitions() { + configs.add_cached(Box::new(config.clone()), Some(local.merge_cache)); + } + + for (i, tr) in p.get_transitions().iter().enumerate() { + if i == 0 && self.can_drop_loop_entry_edge_in_left_recursive_rule(&config) { + continue; + } + + let continue_collecting = tr.get_serialization_type() + != TransitionType::TRANSITION_ACTION + && collect_predicates; + let c = self.get_epsilon_target( + &config, + tr.as_ref(), + continue_collecting, + depth == 0, + full_ctx, + treat_eofas_epsilon, + local, + ); + if let Some(mut c) = c { + let mut new_depth = depth; + if let RuleStopState = self.atn().states[config.get_state()].get_state_type() { + assert!(!full_ctx); + + let dfa = local.dfa_ref.borrow(); + if dfa.is_precedence_dfa() { + let outermost_precedence_return = tr + .as_ref() + .cast::() + .outermost_precedence_return; + let atn_start_state = self.atn().states[dfa.atn_start_state].as_ref(); + if outermost_precedence_return == atn_start_state.get_rule_index() as isize + { + c.set_precedence_filter_suppressed(true); + } + } + + c.reaches_into_outer_context += 1; + if !closure_busy.insert(c.clone()) { + continue; + } + configs.set_dips_into_outer_context(true); + assert!(new_depth > isize::min_value()); + new_depth -= 1; + } else { + if !tr.is_epsilon() && !closure_busy.insert(c.clone()) { + continue; + } + + if tr.get_serialization_type() == TransitionType::TRANSITION_RULE { + if new_depth >= 0 { + new_depth += 1 + } + } + } + + self.closure_checking_stop_state( + c, + configs, + closure_busy, + continue_collecting, + full_ctx, + new_depth, + treat_eofas_epsilon, + local, + ) + }; + } + // println!("closure_work ended {:?}",config); + } + + fn can_drop_loop_entry_edge_in_left_recursive_rule(&self, _config: &ATNConfig) -> bool { + // if std::env::var("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT").ok() + // .and_then(|it|str::parse::(&it).ok()) == Some(true) + // { return false } + + let state = self.atn().states[_config.get_state()].as_ref(); + + if let ATNStateType::DecisionState { + state: ATNDecisionState::StarLoopEntry { is_precedence, .. }, + .. + } = state.get_state_type() + { + if !*is_precedence + || _config.get_context().unwrap().is_empty() + || _config.get_context().unwrap().has_empty_path() + { + return false; + } + } else { + return false; + } + + let pred_ctx = _config.get_context().unwrap(); + let ctx_len = pred_ctx.length(); + for i in 0..ctx_len { + let return_state = self.atn().states[pred_ctx.get_return_state(i) as usize].as_ref(); + if return_state.get_rule_index() != state.get_rule_index() { + return false; + } + } + + let decision_start_state = state.get_transitions()[0].get_target(); + let decision_start_state = self.atn().states[decision_start_state].as_ref(); + let block_end_state_num = if let ATNStateType::DecisionState { + state: ATNDecisionState::BlockStartState { end_state, .. }, + .. + } = decision_start_state.get_state_type() + { + *end_state + } else { + unreachable!("cast error") + }; + + for i in 0..ctx_len { + let return_state = self.atn().states[pred_ctx.get_return_state(i) as usize].as_ref(); + if return_state.get_transitions().len() != 1 + || !return_state.get_transitions()[0].is_epsilon() + { + // println!("test1"); + return false; + } + let return_state_target = + self.atn().states[return_state.get_transitions()[0].get_target()].as_ref(); + if return_state.get_state_type_id() == ATNSTATE_BLOCK_END + && ptr::eq(return_state_target, state) + { + continue; + } + if return_state.get_state_number() == block_end_state_num { + continue; + } + if return_state_target.get_state_number() == block_end_state_num { + continue; + } + + if return_state_target.get_state_type_id() == ATNSTATE_BLOCK_END + && return_state_target.get_transitions().len() == 1 + && return_state_target.get_transitions()[0].is_epsilon() + && return_state_target.get_transitions()[0].get_target() == state.get_state_number() + { + continue; + } + // println!("test2"); + return false; + } + // println!("dropping on state {} ", state.get_state_number()); + + return true; + } + // + // fn get_rule_name(&self, index: isize) -> String { unimplemented!() } + + fn get_epsilon_target<'a, T: Parser<'a>>( + &self, + config: &ATNConfig, + t: &dyn Transition, + collect_predicates: bool, + in_context: bool, + full_ctx: bool, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + match t.get_serialization_type() { + TransitionType::TRANSITION_EPSILON => { + Some(config.cloned(self.atn().states[t.get_target()].as_ref())) + } + TransitionType::TRANSITION_RULE => { + Some(self.rule_transition(config, t.cast::())) + } + TransitionType::TRANSITION_PREDICATE => self.pred_transition( + config, + t.cast::(), + collect_predicates, + in_context, + full_ctx, + local, + ), + TransitionType::TRANSITION_ACTION => { + Some(self.action_transition(config, t.cast::())) + } + TransitionType::TRANSITION_PRECEDENCE => self.precedence_transition( + config, + t.cast::(), + collect_predicates, + in_context, + full_ctx, + local, + ), + TransitionType::TRANSITION_ATOM + | TransitionType::TRANSITION_SET + | TransitionType::TRANSITION_RANGE => { + if treat_eofas_epsilon && t.matches(TOKEN_EOF, 0, 1) { + Some(config.cloned(self.atn().states[t.get_target()].as_ref())) + } else { + None + } + } + TransitionType::TRANSITION_NOTSET | TransitionType::TRANSITION_WILDCARD => None, + } + } + + fn action_transition(&self, config: &ATNConfig, t: &ActionTransition) -> ATNConfig { + config.cloned(self.atn().states[t.target].as_ref()) + } + + fn precedence_transition<'a, T: Parser<'a>>( + &self, + config: &ATNConfig, + pt: &PrecedencePredicateTransition, + collect_predicates: bool, + in_context: bool, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + let target = self.atn().states[pt.target].deref(); + if collect_predicates && in_context { + if full_ctx { + let curr_pos = local.input().index(); + local.input().seek(self.start_index.get()); + let prec_succeeds = self.eval_predicate( + local, + pt.get_predicate().unwrap(), + config.get_alt(), + full_ctx, + ); + local.input().seek(curr_pos); + if prec_succeeds { + return Some(config.cloned(target)); + } + } else { + let new_sem_ctx = + SemanticContext::and(Some(&*config.semantic_context), pt.get_predicate()); + return Some(config.cloned_with_new_semantic(target, Box::new(new_sem_ctx))); + } + } else { + return Some(config.cloned(target)); + } + + None + } + + fn pred_transition<'a, T: Parser<'a>>( + &self, + config: &ATNConfig, + pt: &PredicateTransition, + collect_predicates: bool, + in_context: bool, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + let target = self.atn().states[pt.target].deref(); + if collect_predicates && (!pt.is_ctx_dependent || (pt.is_ctx_dependent && in_context)) { + if full_ctx { + let curr_pos = local.input().index(); + local.input().seek(self.start_index.get()); + let prec_succeeds = self.eval_predicate( + local, + pt.get_predicate().unwrap(), + config.get_alt(), + full_ctx, + ); + local.input().seek(curr_pos); + if prec_succeeds { + return Some(config.cloned(target)); + } + } else { + let new_sem_ctx = + SemanticContext::and(Some(&*config.semantic_context), pt.get_predicate()); + return Some(config.cloned_with_new_semantic(target, Box::new(new_sem_ctx))); + } + } else { + return Some(config.cloned(target)); + } + + None + } + + fn rule_transition(&self, config: &ATNConfig, t: &RuleTransition) -> ATNConfig { + assert!(config.get_context().is_some()); + let new_ctx = PredictionContext::new_singleton( + config.get_context().cloned(), + t.follow_state as isize, + ); + config.cloned_with_new_ctx(self.atn().states[t.target].as_ref(), Some(new_ctx.into())) + } + + fn get_conflicting_alts(&self, configs: &ATNConfigSet) -> BitSet { + let altsets = get_conflicting_alt_subsets(configs); + get_alts(&altsets) + } + + //todo can return Cow + fn get_conflicting_alts_or_unique_alt(&self, configs: &ATNConfigSet) -> BitSet { + return if configs.get_unique_alt() != INVALID_ALT { + BitSet::new().modify_with(|it| { + it.insert(configs.get_unique_alt() as usize); + }) + } else { + configs.conflicting_alts.clone() + }; + } + // + // fn get_token_name(&self, t: isize) -> String { unimplemented!() } + // + // fn get_lookahead_name(&self, input: TokenStream) -> String { unimplemented!() } + // + // fn dump_dead_end_configs(&self, nvae: * NoViableAltError) { unimplemented!() } + // + fn no_viable_alt<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + _configs: &ATNConfigSet, + start_index: isize, + ) -> ANTLRError { + let start_token = local.parser.get_input_stream().get(start_index).borrow(); + let start_token = Token::to_owned(start_token); + let offending_token = local.input().lt(1).unwrap().borrow(); + let offending_token = Token::to_owned(offending_token); + ANTLRError::NoAltError(NoViableAltError::new_full( + local.parser, + start_token, + offending_token, + )) + } + + fn get_unique_alt(&self, configs: &ATNConfigSet) -> isize { + let mut alt = INVALID_ALT; + for c in configs.get_items() { + if alt == INVALID_ALT { + alt = c.get_alt() + } else if c.get_alt() != alt { + return INVALID_ALT; + } + } + + alt + } + + fn add_dfaedge(&self, from: &mut DFAState, t: isize, to: DFAStateRef) -> DFAStateRef { + if t < -1 || t > self.atn().max_token_type { + return to; + } + if from.edges.is_empty() { + from.edges.resize(self.atn().max_token_type as usize + 2, 0); + } + from.edges[(t + 1) as usize] = to; + + to + } + + fn add_dfastate(&self, dfa: &mut DFA, mut dfastate: DFAState) -> DFAStateRef { + if dfastate.state_number == ERROR_DFA_STATE_REF { + return ERROR_DFA_STATE_REF; + } + let states = &mut dfa.states; + + let state_number = states.len(); + dfastate.state_number = state_number; + + let key = dfastate.default_hash(); + //let mut new_hash = key; + if let Some(st) = dfa.states_map.get_mut(&key) { + if let Some(&st) = st.iter().find(|&&it| states[it] == dfastate) { + return st; + } + } + + if !dfastate.configs.read_only() { + dfastate.configs.optimize_configs(self); + dfastate.configs.set_read_only(true); + // new_hash = dfastate.default_hash(); + } + + states.push(dfastate); + + // if key != new_hash { + dfa.states_map + .entry(key) + .or_insert(Vec::new()) + .push(state_number); + // } + state_number + } + + fn report_attempting_full_context<'a, T: Parser<'a>>( + &self, + dfa: &DFA, + conflicting_alts: &BitSet, + configs: &ATNConfigSet, + start_index: isize, + stop_index: isize, + parser: &mut T, + ) { + // let ambig_index = parser.get_current_token().get_token_index(); + parser + .get_error_lister_dispatch() + .report_attempting_full_context( + parser, + dfa, + start_index, + stop_index, + conflicting_alts, + configs, + ) + } + + fn report_context_sensitivity<'a, T: Parser<'a>>( + &self, + dfa: &DFA, + prediction: isize, + configs: &ATNConfigSet, + start_index: isize, + stop_index: isize, + parser: &mut T, + ) { + parser + .get_error_lister_dispatch() + .report_context_sensitivity(parser, dfa, start_index, stop_index, prediction, configs) + } + + fn report_ambiguity<'a, T: Parser<'a>>( + &self, + dfa: &DFA, + start_index: isize, + stop_index: isize, + exact: bool, + ambig_alts: &BitSet, + configs: &ATNConfigSet, + parser: &mut T, + ) { + parser.get_error_lister_dispatch().report_ambiguity( + parser, + dfa, + start_index, + stop_index, + exact, + ambig_alts, + configs, + ) + } +} + +impl IATNSimulator for ParserATNSimulator { + fn shared_context_cache(&self) -> &PredictionContextCache { + self.base.shared_context_cache() + } + + fn atn(&self) -> &ATN { + self.base.atn() + } + + fn decision_to_dfa(&self) -> &Vec> { + self.base.decision_to_dfa() + } +} diff --git a/runtime/Rust/src/parser_rule_context.rs b/runtime/Rust/src/parser_rule_context.rs new file mode 100644 index 0000000000..2cb6afff10 --- /dev/null +++ b/runtime/Rust/src/parser_rule_context.rs @@ -0,0 +1,707 @@ +//! Full parser node +use std::any::{type_name, Any}; +use std::borrow::{Borrow, BorrowMut}; +use std::cell::{Ref, RefCell, RefMut}; +use std::fmt::{Debug, Error, Formatter}; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +use better_any::{Tid, TidAble, TidExt}; + +use crate::errors::ANTLRError; +use crate::interval_set::Interval; +use crate::parser::ParserNodeType; +use crate::rule_context::{BaseRuleContext, CustomRuleContext, RuleContext}; +use crate::token::Token; +use crate::token_factory::TokenFactory; +use crate::tree::{ParseTree, ParseTreeVisitor, TerminalNode, Tree, VisitableDyn}; +use crate::CoerceTo; + +/// Syntax tree node for particular parser rule. +/// +/// Not yet good for custom implementations so currently easiest option +/// is to just copy `BaseParserRuleContext` or `BaseRuleContext` and strip/extend them +#[allow(missing_docs)] +pub trait ParserRuleContext<'input>: + ParseTree<'input> + RuleContext<'input> + Debug + Tid<'input> +{ + fn set_exception(&self, _e: ANTLRError) {} + + fn set_start(&self, _t: Option<>::Tok>) {} + + /// Get the initial token in this context. + /// Note that the range from start to stop is inclusive, so for rules that do not consume anything + /// (for example, zero length or error productions) this token may exceed stop. + /// + fn start<'a>(&'a self) -> Ref<'a, >::Inner> + where + 'input: 'a, + { + unimplemented!() + } + fn start_mut<'a>(&'a self) -> RefMut<'a, >::Tok> + where + 'input: 'a, + { + unimplemented!() + } + + fn set_stop(&self, _t: Option<>::Tok>) {} + /// + /// Get the final token in this context. + /// Note that the range from start to stop is inclusive, so for rules that do not consume anything + /// (for example, zero length or error productions) this token may precede start. + /// + fn stop<'a>(&'a self) -> Ref<'a, >::Inner> + where + 'input: 'a, + { + unimplemented!() + } + fn stop_mut<'a>(&'a self) -> RefMut<'a, >::Tok> + where + 'input: 'a, + { + unimplemented!() + } + + // fn add_token_node(&self, token: TerminalNode<'input, Self::TF>) { } + // fn add_error_node(&self, bad_token: ErrorNode<'input, Self::TF>) { } + + fn add_child(&self, _child: Rc<>::Type>) {} + fn remove_last_child(&self) {} + + // fn enter_rule(&self, listener: &mut dyn Any); + // fn exit_rule(&self, listener: &mut dyn Any); + + fn child_of_type(&self, pos: usize) -> Option> + where + T: ParserRuleContext<'input, TF = Self::TF, Ctx = Self::Ctx> + 'input, + Self: Sized, + { + self.get_children() + .filter(|it| it.deref().self_id() == T::id()) + .nth(pos) + .and_then(|it| it.downcast_rc().ok()) + } + + // todo, return iterator + fn children_of_type(&self) -> Vec> + where + T: ParserRuleContext<'input, TF = Self::TF, Ctx = Self::Ctx> + 'input, + Self: Sized, + { + self.get_children() + // .filter(|it| it.deref().self_id() == T::id()) + .filter_map(|it| it.downcast_rc().ok()) + .collect() + } + + fn get_token(&self, ttype: isize, pos: usize) -> Option>> { + self.get_children() + // .filter(|it| it.deref().self_id() == TerminalNode::<'input, Self::Ctx>::id()) + .filter_map(|it| it.downcast_rc::>().ok()) + .filter(|it| it.symbol.borrow().get_token_type() == ttype) + .nth(pos) + } + + fn get_tokens(&self, ttype: isize) -> Vec>> { + self.get_children() + // .iter() + .filter_map(|it| it.downcast_rc::>().ok()) + // .filter(|it| it.deref().self_id() == TerminalNode::<'input, Self::Ctx>::id()) + // .map(|it| cast_rc::<'input, TerminalNode<'input, Self::Ctx>>(it.clone())) + .filter(|it| it.symbol.borrow().get_token_type() == ttype) + .collect() + } + + // fn upcast(&self) -> &dyn ParserRuleContext<'input, TF=Self::TF>; +} + +//Allows to implement generic functions on trait object as well +/// Extention trait implemented for all `ParserRuleContext`s +pub trait RuleContextExt<'input>: ParserRuleContext<'input> { + /// Prints list of parent rules + fn to_string(self: &Rc, rule_names: Option<&[&str]>, stop: Option>) -> String + where + Z: ParserRuleContext<'input, Ctx = Self::Ctx, TF = Self::TF> + ?Sized + 'input, + Self::Ctx: ParserNodeType<'input, Type = Z>, + Self: CoerceTo; + + #[doc(hidden)] + fn accept_children(&self, visitor: &mut V) + where + V: ParseTreeVisitor<'input, Self::Ctx> + ?Sized, + >::Type: VisitableDyn; +} + +impl<'input, T: ParserRuleContext<'input> + ?Sized + 'input> RuleContextExt<'input> for T { + fn to_string(self: &Rc, rule_names: Option<&[&str]>, stop: Option>) -> String + where + Z: ParserRuleContext<'input, Ctx = T::Ctx, TF = T::TF> + ?Sized + 'input, + T::Ctx: ParserNodeType<'input, Type = Z>, + T: CoerceTo, + { + let mut result = String::from("["); + let mut next: Option> = Some(self.clone().coerce_rc_to()); + while let Some(ref p) = next { + if stop.is_some() && (stop.is_none() || Rc::ptr_eq(p, stop.as_ref().unwrap())) { + break; + } + + if let Some(rule_names) = rule_names { + let rule_index = p.get_rule_index(); + let rule_name = rule_names + .get(rule_index) + .map(|&it| it.to_owned()) + .unwrap_or_else(|| rule_index.to_string()); + result.extend(rule_name.chars()); + result.push(' '); + } else { + if !p.is_empty() { + result.extend(p.get_invoking_state().to_string().chars()); + result.push(' '); + } + } + + next = p.get_parent().clone(); + } + + if result.chars().last() == Some(' ') { + result.pop(); + } + + result.push(']'); + return result; + } + + fn accept_children(&self, visitor: &mut V) + where + V: ParseTreeVisitor<'input, Self::Ctx> + ?Sized, + >::Type: VisitableDyn, + { + self.get_children() + .for_each(|child| child.accept_dyn(visitor)) + } +} + +#[inline] +#[doc(hidden)] +pub fn cast<'a, T, Result>(ctx: &T) -> &Result +where + T: ParserRuleContext<'a> + 'a + ?Sized, + Result: ParserRuleContext<'a, Ctx = T::Ctx> + 'a, +{ + ctx.downcast_ref().unwrap() + // unsafe { &*(ctx as *const T as *const Result) } +} + +/// Should be called from generated parser only +/// Don't call it by yourself. +#[inline] +#[doc(hidden)] +// technically should be unsafe but in order to not force unsafe into user code +// it is just #[doc(hidden)] +pub fn cast_mut<'a, T: ParserRuleContext<'a> + 'a + ?Sized, Result: 'a>( + ctx: &mut Rc, +) -> &mut Result { + // if Rc::strong_count(ctx) != 1 { panic!("cant mutate Rc with multiple strong ref count"); } + // is it safe because parser does not save/move mutable references anywhere. + // they are only used to write data immediately in the corresponding expression + // unsafe { &mut *(Rc::get_mut_unchecked(ctx) as *mut T as *mut Result) } + unsafe { + let ptr = Rc::as_ptr(ctx) as *mut T as *mut Result; + + &mut *ptr + } +} + +// workaround newtype for cycle in trait definition +// i.e. you can't have `trait ParserRuleContext:BaseTrait` +// #[derive(Clone)] +// pub struct ParseTreeNode<'input,TF:TokenFactory<'input>>(pub Rc>); +// +// impl<'input,TF:TokenFactory<'input>> Deref for ParseTreeNode<'input,TF>{ +// type Target = dyn ParserRuleContext<'input,TF=TF>; +// +// fn deref(&self) -> &Self::Target { +// self.0.deref() +// } +// } + +// pub type ParserRuleContextType<'input, T> = Rc + 'input>; +// pub type ParserRuleContextType<'input,T> = ParseTreeNode<'input,T>; + +/// Default rule context implementation that keeps everything provided by parser +pub struct BaseParserRuleContext<'input, Ctx: CustomRuleContext<'input>> { + base: BaseRuleContext<'input, Ctx>, + + start: RefCell<>::Tok>, + stop: RefCell<>::Tok>, + /// error if there was any in this node + pub exception: Option>, + /// List of children of current node + pub(crate) children: RefCell>::Type>>>, +} + +better_any::tid! { impl<'i,Ctx> TidAble<'i> for BaseParserRuleContext<'i,Ctx> where Ctx:CustomRuleContext<'i> } + +impl<'input, Ctx: CustomRuleContext<'input>> Debug for BaseParserRuleContext<'input, Ctx> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + f.write_str(type_name::()) + } +} + +impl<'input, Ctx: CustomRuleContext<'input>> RuleContext<'input> + for BaseParserRuleContext<'input, Ctx> +{ + fn get_invoking_state(&self) -> isize { + self.base.get_invoking_state() + } + + fn set_invoking_state(&self, t: isize) { + self.base.set_invoking_state(t) + } + + fn get_parent_ctx(&self) -> Option>::Type>> { + self.base.get_parent_ctx() + } + + fn set_parent(&self, parent: &Option>::Type>>) { + self.base.set_parent(parent) + } +} + +impl<'input, Ctx: CustomRuleContext<'input>> CustomRuleContext<'input> + for BaseParserRuleContext<'input, Ctx> +{ + type TF = Ctx::TF; + type Ctx = Ctx::Ctx; + + fn get_rule_index(&self) -> usize { + self.base.ext.get_rule_index() + } +} + +// unsafe impl<'input, Ctx: CustomRuleContext<'input>> Tid for BaseParserRuleContext<'input, Ctx> { +// fn self_id(&self) -> TypeId { self.base.ext.self_id() } +// +// fn id() -> TypeId +// where +// Self: Sized, +// { +// Ctx::id() +// } +// } + +impl<'input, Ctx: CustomRuleContext<'input>> Deref for BaseParserRuleContext<'input, Ctx> { + type Target = Ctx; + + fn deref(&self) -> &Self::Target { + &self.base.ext + } +} + +impl<'input, Ctx: CustomRuleContext<'input>> DerefMut for BaseParserRuleContext<'input, Ctx> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.base.ext + } +} + +impl<'input, Ctx: CustomRuleContext<'input>> Borrow for BaseParserRuleContext<'input, Ctx> { + fn borrow(&self) -> &Ctx { + &self.base.ext + } +} + +impl<'input, Ctx: CustomRuleContext<'input>> BorrowMut for BaseParserRuleContext<'input, Ctx> { + fn borrow_mut(&mut self) -> &mut Ctx { + &mut self.base.ext + } +} + +impl<'input, Ctx: CustomRuleContext<'input> + TidAble<'input>> ParserRuleContext<'input> + for BaseParserRuleContext<'input, Ctx> +{ + fn set_exception(&self, _e: ANTLRError) { /*self.exception = Some(Box::new(e));*/ + } + + fn set_start(&self, t: Option<>::Tok>) { + *self.start.borrow_mut() = t.unwrap_or(Ctx::TF::create_invalid().clone()); + } + + fn start<'a>(&'a self) -> Ref<'a, >::Inner> + where + 'input: 'a, + { + Ref::map(self.start.borrow(), |t| t.borrow()) + } + + fn start_mut<'a>(&'a self) -> RefMut<'a, >::Tok> + where + 'input: 'a, + { + self.start.borrow_mut() + } + + fn set_stop(&self, t: Option<>::Tok>) { + *self.stop.borrow_mut() = t.unwrap_or(Ctx::TF::create_invalid().clone()); + } + + fn stop<'a>(&'a self) -> Ref<'a, >::Inner> + where + 'input: 'a, + { + Ref::map(self.stop.borrow(), |t| t.borrow()) + } + + fn stop_mut<'a>(&'a self) -> RefMut<'a, >::Tok> + where + 'input: 'a, + { + self.stop.borrow_mut() + } + + // fn add_token_node(&self, token: TerminalNode<'input, Ctx::TF>) -> ParserRuleContextType<'input, Ctx::TF> { + // let node: ParserRuleContextType<'input, Ctx::TF> = Rc::new(token); + // self.children.borrow_mut().push(node.clone()); + // node + // } + // + // fn add_error_node(&self, bad_token: ErrorNode<'input, Ctx::TF>) -> ParserRuleContextType<'input, Ctx::TF> { + // // bad_token.base.parent_ctx = + // let node: ParserRuleContextType<'input, Ctx::TF> = Rc::new(bad_token); + // // Backtrace::new().frames()[0].symbols()[0]; + // + // self.children.borrow_mut().push(node.clone()); + // node + // } + + fn add_child(&self, child: Rc<>::Type>) { + self.children.borrow_mut().push(child); + } + + fn remove_last_child(&self) { + self.children.borrow_mut().pop(); + } + + // fn enter_rule(&self, listener: &mut dyn Any) { + // Ctx::enter(self, listener) + // } + // + // fn exit_rule(&self, listener: &mut dyn Any) { + // Ctx::exit(self, listener) + // } + // + // fn upcast(&self) -> &dyn ParserRuleContext<'input, TF=Ctx::TF> { + // self + // } +} + +impl<'input, Ctx: CustomRuleContext<'input>> Tree<'input> for BaseParserRuleContext<'input, Ctx> { + fn get_parent(&self) -> Option>::Type>> { + self.get_parent_ctx() + } + + fn has_parent(&self) -> bool { + self.base.parent_ctx.borrow().is_some() + } + + fn get_payload(&self) -> Box { + unimplemented!() + } + + fn get_child(&self, i: usize) -> Option>::Type>> { + self.children.borrow().get(i).cloned() + } + + fn get_child_count(&self) -> usize { + self.children.borrow().len() + } + + // fn get_children<'a>(&'a self) -> Box>::Type>> + 'a> where 'input:'a{ + // let len = self.children.borrow().len(); + // + // Box::new(IndexIter::new(self.children.borrow(),len)) + // } + + // fn get_children_full(&self) -> &RefCell>::Type>>> { + // &self.children + // } +} + +impl<'input, Ctx: CustomRuleContext<'input> + TidAble<'input>> ParseTree<'input> + for BaseParserRuleContext<'input, Ctx> +{ + fn get_source_interval(&self) -> Interval { + Interval { + a: self.start().get_token_index(), + b: self.stop().get_token_index(), + } + } + + fn get_text(&self) -> String { + let children = self.get_children(); + let mut result = String::new(); + + for child in children { + result += &child.get_text() + } + + result + } +} +#[allow(missing_docs)] +impl<'input, Ctx: CustomRuleContext<'input> + 'input> BaseParserRuleContext<'input, Ctx> { + pub fn new_parser_ctx( + parent_ctx: Option>::Type>>, + invoking_state: isize, + ext: Ctx, + ) -> Self { + Self { + base: BaseRuleContext::new_parser_ctx(parent_ctx, invoking_state, ext), + start: RefCell::new(Ctx::TF::create_invalid()), + stop: RefCell::new(Ctx::TF::create_invalid()), + exception: None, + children: RefCell::new(vec![]), + } + } + pub fn copy_from + ?Sized>( + ctx: &T, + ext: Ctx, + ) -> Self { + Self { + base: BaseRuleContext::new_parser_ctx( + ctx.get_parent_ctx(), + ctx.get_invoking_state(), + ext, + ), + start: RefCell::new(ctx.start_mut().clone()), + stop: RefCell::new(ctx.stop_mut().clone()), + exception: None, + children: RefCell::new(ctx.get_children().collect()), + } + } + + // pub fn to_string(self: Rc, rule_names: Option<&[&str]>, stop: Option>) -> String { + // (self as Rc<>::Type>).to_string(rule_names, stop) + // } +} + +/////////////////////////////////////////////// +// Needed to significantly reduce boilerplate in the generated code, +// because there is no simple way to delegate trait for enum. +// Will not be necessary if some kind of variant types RFC will be merged +////////////////////////////////////////////// +/// workaround trait to overcome conflicting implementations error +#[doc(hidden)] +pub trait DerefSeal: Deref {} + +impl<'input, T, I> ParserRuleContext<'input> for T +where + T: DerefSeal + 'input + Debug + Tid<'input>, + I: ParserRuleContext<'input> + 'input + ?Sized, +{ + fn set_exception(&self, e: ANTLRError) { + self.deref().set_exception(e) + } + + fn set_start(&self, t: Option<>::Tok>) { + self.deref().set_start(t) + } + + fn start<'a>(&'a self) -> Ref<'a, >::Inner> + where + 'input: 'a, + { + self.deref().start() + } + + fn start_mut<'a>(&'a self) -> RefMut<'a, >::Tok> + where + 'input: 'a, + { + self.deref().start_mut() + } + + fn set_stop(&self, t: Option<>::Tok>) { + self.deref().set_stop(t) + } + + fn stop<'a>(&'a self) -> Ref<'a, >::Inner> + where + 'input: 'a, + { + self.deref().stop() + } + + fn stop_mut<'a>(&'a self) -> RefMut<'a, >::Tok> + where + 'input: 'a, + { + self.deref().stop_mut() + } + + fn add_child(&self, child: Rc<>::Type>) { + self.deref().add_child(child) + } + + fn remove_last_child(&self) { + self.deref().remove_last_child() + } + + // fn enter_rule(&self, listener: &mut dyn Any) { self.deref().enter_rule(listener) } + // + // fn exit_rule(&self, listener: &mut dyn Any) { self.deref().exit_rule(listener) } + // + // fn upcast(&self) -> &dyn ParserRuleContext<'input, TF=Self::TF> { self.deref().upcast() } +} + +impl<'input, T, I> RuleContext<'input> for T +where + T: DerefSeal + 'input + Debug + Tid<'input>, + I: ParserRuleContext<'input> + 'input + ?Sized, +{ + fn get_invoking_state(&self) -> isize { + self.deref().get_invoking_state() + } + + fn set_invoking_state(&self, t: isize) { + self.deref().set_invoking_state(t) + } + + fn is_empty(&self) -> bool { + self.deref().is_empty() + } + + fn get_parent_ctx(&self) -> Option>::Type>> { + self.deref().get_parent_ctx() + } + + fn set_parent(&self, parent: &Option>::Type>>) { + self.deref().set_parent(parent) + } +} + +impl<'input, T, I> ParseTree<'input> for T +where + T: DerefSeal + 'input + Debug + Tid<'input>, + I: ParserRuleContext<'input> + 'input + ?Sized, +{ + fn get_source_interval(&self) -> Interval { + self.deref().get_source_interval() + } + + fn get_text(&self) -> String { + self.deref().get_text() + } +} + +impl<'input, T, I> Tree<'input> for T +where + T: DerefSeal + 'input + Debug + Tid<'input>, + I: ParserRuleContext<'input> + 'input + ?Sized, +{ + fn get_parent(&self) -> Option>::Type>> { + self.deref().get_parent() + } + + fn has_parent(&self) -> bool { + self.deref().has_parent() + } + + fn get_payload(&self) -> Box { + self.deref().get_payload() + } + + fn get_child(&self, i: usize) -> Option>::Type>> { + self.deref().get_child(i) + } + + fn get_child_count(&self) -> usize { + self.deref().get_child_count() + } + + fn get_children<'a>( + &'a self, + ) -> Box>::Type>> + 'a> + where + 'input: 'a, + { + self.deref().get_children() + } + + // fn get_children_full(&self) -> &RefCell>::Type>>> { self.deref().get_children_full() } +} + +impl<'input, T, I> CustomRuleContext<'input> for T +where + T: DerefSeal + 'input + Debug + Tid<'input>, + I: ParserRuleContext<'input> + 'input + ?Sized, +{ + type TF = I::TF; + type Ctx = I::Ctx; + + fn get_rule_index(&self) -> usize { + self.deref().get_rule_index() + } + + // fn type_rule_index() -> usize where Self: Sized { unimplemented!() } + + fn get_alt_number(&self) -> isize { + self.deref().get_alt_number() + } + + fn set_alt_number(&self, _alt_number: isize) { + self.deref().set_alt_number(_alt_number) + } +} + +// +// fn get_text(&self) -> String { unimplemented!() } +// +// fn add_terminal_node_child(&self, child: TerminalNode) -> TerminalNode { unimplemented!() } +// +// fn get_child_of_type(&self, i: isize, childType: reflect.Type) -> RuleContext { unimplemented!() } +// +// fn to_string_tree(&self, ruleNames Vec, recog: Recognizer) -> String { unimplemented!() } +// +// fn get_rule_context(&self) -> RuleContext { unimplemented!() } +// +// fn accept(&self, visitor: ParseTreeVisitor) -> interface { unimplemented!() } { +// return visitor.VisitChildren(prc) +// } +// +// fn get_token(&self, ttype: isize, i: isize) -> TerminalNode { unimplemented!() } +// +// fn get_tokens(&self, ttype: isize) -> Vec { unimplemented!() } +// +// fn get_payload(&self) -> interface { unimplemented!() } { +// return: prc, +// } +// +// fn get_child(&self, ctxType: reflect.Type, i: isize) -> RuleContext { unimplemented!() } +// +// +// fn get_typed_rule_context(&self, ctxType: reflect.Type, i: isize) -> RuleContext { unimplemented!() } +// +// fn get_typed_rule_contexts(&self, ctxType: reflect.Type) -> Vec { unimplemented!() } +// +// fn get_child_count(&self) -> int { unimplemented!() } +// +// fn get_source_interval(&self) -> * Interval { unimplemented!() } +// +// +// fn String(&self, ruleNames Vec, stop: RuleContext) -> String { unimplemented!() } +// +// var RuleContextEmpty = NewBaseParserRuleContext(nil, - 1) +// +// pub trait InterpreterRuleContext { +// parser_rule_context +// } +// +// pub struct BaseInterpreterRuleContext { +// base: BaseParserRuleContext, +// } +// +// fn new_base_interpreter_rule_context(parent BaseInterpreterRuleContext, invokingStateNumber: isize, ruleIndex: isize) -> * BaseInterpreterRuleContext { unimplemented!() } diff --git a/runtime/Rust/src/prediction_context.rs b/runtime/Rust/src/prediction_context.rs new file mode 100644 index 0000000000..ea751aa578 --- /dev/null +++ b/runtime/Rust/src/prediction_context.rs @@ -0,0 +1,656 @@ +use std::borrow::Cow; +use std::cell::RefCell; +use std::collections::HashMap; +use std::fmt::{Display, Error, Formatter}; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::ops::Deref; +use std::rc::Rc; + +use murmur3::murmur3_32::MurmurHasher; + +use crate::atn::ATN; +use crate::dfa::ScopeExt; +use crate::parser::ParserNodeType; +use crate::parser_atn_simulator::MergeCache; + +use crate::prediction_context::PredictionContext::{Array, Singleton}; +use crate::rule_context::RuleContext; + +use crate::transition::RuleTransition; + +pub const PREDICTION_CONTEXT_EMPTY_RETURN_STATE: isize = 0x7FFFFFFF; + +#[cfg(test)] +mod test; + +//todo make return states ATNStateRef +#[derive(Eq, Clone, Debug)] +pub enum PredictionContext { + Singleton(SingletonPredictionContext), + Array(ArrayPredictionContext), +} + +impl PartialEq for PredictionContext { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Array(s), Array(o)) => *s == *o, + (Singleton(s), Singleton(o)) => *s == *o, + _ => false, + } + } +} + +#[derive(Eq, Clone, Debug)] +pub struct ArrayPredictionContext { + cached_hash: i32, + return_states: Vec, + parents: Vec>>, +} + +impl PartialEq for ArrayPredictionContext { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + self.cached_hash == other.cached_hash + && self.return_states == other.return_states + && self.parents.iter().zip(other.parents.iter()).all(opt_eq) + } +} + +#[inline(always)] +fn opt_eq( + arg: ( + &Option>, + &Option>, + ), +) -> bool { + match arg { + (Some(s), Some(o)) => Rc::ptr_eq(s, o) || *s == *o, + (None, None) => true, + _ => false, + } +} + +#[derive(Eq, Clone, Debug)] +pub struct SingletonPredictionContext { + cached_hash: i32, + return_state: isize, + parent_ctx: Option>, +} + +impl PartialEq for SingletonPredictionContext { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + self.cached_hash == other.cached_hash + && self.return_state == other.return_state + && opt_eq((&self.parent_ctx, &other.parent_ctx)) + } +} + +impl SingletonPredictionContext { + #[inline(always)] + fn is_empty(&self) -> bool { + self.return_state == PREDICTION_CONTEXT_EMPTY_RETURN_STATE && self.parent_ctx == None + } +} + +impl Display for PredictionContext { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + Singleton(s) => { + if s.return_state == PREDICTION_CONTEXT_EMPTY_RETURN_STATE { + f.write_str("$") + } else { + if let Some(parent) = &s.parent_ctx { + f.write_fmt(format_args!("{} {}", s.return_state, parent)) + } else { + f.write_fmt(format_args!("{}", s.return_state)) + } + } + } + Array(arr) => { + f.write_str("[")?; + for i in 0..arr.return_states.len() { + if i > 0 { + f.write_str(", ")?; + } + if arr.return_states[i] == PREDICTION_CONTEXT_EMPTY_RETURN_STATE { + f.write_str("$")?; + } + f.write_str(&arr.return_states[i].to_string())?; + if let Some(parent) = &arr.parents[i] { + f.write_fmt(format_args!(" {}", parent))?; + } else { + f.write_str(" null")?; + } + } + + f.write_str("]") + } + } + } +} + +//impl PartialEq for PredictionContext { +// fn eq(&self, other: &Self) -> bool { +// self.hash_code() == other.hash_code() +// } +//} + +impl Hash for PredictionContext { + fn hash(&self, state: &mut H) { + state.write_i32(self.hash_code()) + } +} + +thread_local! { + pub static EMPTY_PREDICTION_CONTEXT: Rc = + Rc::new(PredictionContext::new_empty()); +} + +impl PredictionContext { + pub fn new_array( + parents: Vec>>, + return_states: Vec, + ) -> PredictionContext { + PredictionContext::Array(ArrayPredictionContext { + cached_hash: 0, + parents, + return_states, + }) + } + + pub fn new_singleton( + parent_ctx: Option>, + return_state: isize, + ) -> PredictionContext { + PredictionContext::Singleton(SingletonPredictionContext { + cached_hash: 0, + parent_ctx, + return_state, + }) + .modify_with(|x| x.calc_hash()) + } + + pub fn new_empty() -> PredictionContext { + let mut ctx = PredictionContext::Singleton(SingletonPredictionContext { + cached_hash: 0, + parent_ctx: None, + return_state: PREDICTION_CONTEXT_EMPTY_RETURN_STATE, + }); + ctx.calc_hash(); + ctx + } + + pub fn calc_hash(&mut self) { + let mut hasher = MurmurHasher::default(); + match self { + PredictionContext::Singleton(SingletonPredictionContext { + parent_ctx, + return_state, + .. + }) => { + hasher.write_i32(match parent_ctx { + None => 0, + Some(x) => x.hash_code(), + }); + hasher.write_i32(*return_state as i32); + } + PredictionContext::Array(ArrayPredictionContext { + parents, + return_states, + .. + }) => { + parents.iter().for_each(|x| { + hasher.write_i32(match x { + None => 0, + Some(x) => x.hash_code(), + }) + }); + return_states + .iter() + .for_each(|x| hasher.write_i32(*x as i32)); + } // PredictionContext::Empty { .. } => {} + }; + + let hash = hasher.finish() as i32; + + match self { + PredictionContext::Singleton(SingletonPredictionContext { cached_hash, .. }) + | PredictionContext::Array(ArrayPredictionContext { cached_hash, .. }) +// | PredictionContext::Empty { cached_hash, .. } + => *cached_hash = hash, + }; + } + + pub fn get_parent(&self, index: usize) -> Option<&Rc> { + match self { + PredictionContext::Singleton(singleton) => { + // assert_eq!(index, 0); + singleton.parent_ctx.as_ref() + } + PredictionContext::Array(array) => array.parents[index].as_ref(), + } + } + + pub fn get_return_state(&self, index: usize) -> isize { + match self { + PredictionContext::Singleton(SingletonPredictionContext { return_state, .. }) => { + *return_state + } + PredictionContext::Array(ArrayPredictionContext { return_states, .. }) => { + return_states[index] + } + } + } + + pub fn length(&self) -> usize { + match self { + PredictionContext::Singleton { .. } => 1, + PredictionContext::Array(ArrayPredictionContext { return_states, .. }) => { + return_states.len() + } + } + } + + #[inline(always)] + pub fn is_empty(&self) -> bool { + if let PredictionContext::Singleton(singleton) = self { + return singleton.is_empty(); + } + self.get_return_state(0) == PREDICTION_CONTEXT_EMPTY_RETURN_STATE + } + + #[inline(always)] + pub fn has_empty_path(&self) -> bool { + self.get_return_state(self.length() - 1) == PREDICTION_CONTEXT_EMPTY_RETURN_STATE + } + + #[inline(always)] + pub fn hash_code(&self) -> i32 { + match self { + PredictionContext::Singleton(SingletonPredictionContext { cached_hash, .. }) + | PredictionContext::Array(ArrayPredictionContext { cached_hash, .. }) => *cached_hash, + } + } + + fn to_array(&self) -> Cow<'_, ArrayPredictionContext> { + match self { + PredictionContext::Singleton(s) => Cow::Owned(ArrayPredictionContext { + cached_hash: 0, + parents: vec![s.parent_ctx.clone()], + return_states: vec![s.return_state], + }), + PredictionContext::Array(arr) => Cow::Borrowed(arr), + } + } + + #[inline(always)] + pub fn alloc(mut self) -> Rc { + self.calc_hash(); + Rc::new(self) + } + + pub fn merge( + a: &Rc, + b: &Rc, + root_is_wildcard: bool, + merge_cache: &mut Option<&mut MergeCache>, + // eq_hash:&mut HashSet<(*const PredictionContext,*const PredictionContext)> + ) -> Rc { + if Rc::ptr_eq(a, b) || **a == **b { + return a.clone(); + } + + if let Some(cache) = merge_cache { + if let Some(old) = cache + .get(&(a.clone(), b.clone())) + .or_else(|| cache.get(&(b.clone(), a.clone()))) + { + // if let Some(old) = cache.get(a) + // .and_then(|it|it.get(b)) + // .or_else(||cache.get(b).and_then(|it|it.get(a))){ + return old.clone(); + } + } + // println!("merging {} {}",a,b); + + let r = match (a.deref(), b.deref()) { + (PredictionContext::Singleton(sa), PredictionContext::Singleton(sb)) => { + let result = Self::merge_singletons(sa, sb, root_is_wildcard, merge_cache); + // println!("single result = {}",result); + result + } + (sa, sb) => { + if root_is_wildcard { + if sa.is_empty() { + return EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()); + } + if sb.is_empty() { + return EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()); + } + } + + let result = + Self::merge_arrays(sa.to_array(), sb.to_array(), root_is_wildcard, merge_cache) + .alloc(); + + let result = if &*result == sa { + a.clone() + } else if &*result == sb { + b.clone() + } else { + result //.alloc() + }; + // println!("array result = {}",result); + + result + } + }; + assert_ne!(r.hash_code(), 0); + if let Some(cache) = merge_cache { + // cache.entry(a.clone()).or_insert_with(||HashMap::new()) + // .insert(b.clone(),r.clone()); + cache.insert((a.clone(), b.clone()), r.clone()); + } + return r; + } + + fn merge_singletons( + a: &SingletonPredictionContext, + b: &SingletonPredictionContext, + root_is_wildcard: bool, + merge_cache: &mut Option<&mut MergeCache>, + ) -> Rc { + Self::merge_root(a, b, root_is_wildcard).unwrap_or_else(|| { + if a.return_state == b.return_state { + let parent = Self::merge( + a.parent_ctx.as_ref().unwrap(), + b.parent_ctx.as_ref().unwrap(), + root_is_wildcard, + merge_cache, + ); + if Rc::ptr_eq(&parent, a.parent_ctx.as_ref().unwrap()) { + Singleton(a.clone()) + } else if Rc::ptr_eq(&parent, b.parent_ctx.as_ref().unwrap()) { + Singleton(b.clone()) + } else { + Self::new_singleton(Some(parent), a.return_state) + } + } else { + let parents = if a.parent_ctx == b.parent_ctx { + vec![a.parent_ctx.clone(), a.parent_ctx.clone()] + } else { + vec![a.parent_ctx.clone(), b.parent_ctx.clone()] + }; + let mut result = ArrayPredictionContext { + cached_hash: -1, + parents, + return_states: vec![a.return_state, b.return_state], + }; + // if !result.return_states.is_sorted() + if !result.return_states.windows(2).all(|x| x[0] <= x[1]) { + result.parents.swap(0, 1); + result.return_states.swap(0, 1); + } + Array(result) + } + .alloc() + }) + } + + fn merge_root( + a: &SingletonPredictionContext, + b: &SingletonPredictionContext, + root_is_wildcard: bool, + ) -> Option> { + if root_is_wildcard { + if a.is_empty() || b.is_empty() { + return Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())); + } + } else { + if a.is_empty() && b.is_empty() { + return Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())); + } + if a.is_empty() { + return Some( + Self::new_array( + vec![b.parent_ctx.clone(), None], + vec![b.return_state, PREDICTION_CONTEXT_EMPTY_RETURN_STATE], + ) + .alloc(), + ); + } + if b.is_empty() { + return Some( + Self::new_array( + vec![a.parent_ctx.clone(), None], + vec![a.return_state, PREDICTION_CONTEXT_EMPTY_RETURN_STATE], + ) + .alloc(), + ); + } + } + + None + } + + fn merge_arrays( + a: Cow<'_, ArrayPredictionContext>, + b: Cow<'_, ArrayPredictionContext>, + root_is_wildcard: bool, + merge_cache: &mut Option<&mut MergeCache>, + ) -> PredictionContext { + // let a = a.deref(); + // let b = b.deref(); + let mut merged = ArrayPredictionContext { + cached_hash: -1, + parents: Vec::with_capacity(a.return_states.len() + b.return_states.len()), + return_states: Vec::with_capacity(a.return_states.len() + b.return_states.len()), + }; + let mut i = 0; + let mut j = 0; + + while i < a.parents.len() && j < b.parents.len() { + let a_parent = a.parents[i].as_ref(); + let b_parent = b.parents[j].as_ref(); + if a.return_states[i] == b.return_states[j] { + let payload = a.return_states[i]; + let both = payload == PREDICTION_CONTEXT_EMPTY_RETURN_STATE + && a_parent.is_none() + && b_parent.is_none(); + let ax_ax = a_parent.is_some() && b_parent.is_some() && a_parent == b_parent; + + if both || ax_ax { + merged.return_states.push(payload); + merged.parents.push(a_parent.cloned()); + } else { + let merged_parent = Self::merge( + a_parent.unwrap(), + b_parent.unwrap(), + root_is_wildcard, + merge_cache, + ); + merged.return_states.push(payload); + merged.parents.push(Some(merged_parent)); + } + i += 1; + j += 1; + } else if a.return_states[i] < b.return_states[j] { + merged.return_states.push(a.return_states[i]); + merged.parents.push(a_parent.cloned()); + i += 1; + } else { + merged.return_states.push(b.return_states[j]); + merged.parents.push(b_parent.cloned()); + j += 1; + } + } + + if i < a.return_states.len() { + for p in i..a.return_states.len() { + merged.parents.push(a.parents[p].clone()); + merged.return_states.push(a.return_states[p]); + } + } + if j < b.return_states.len() { + for p in j..b.return_states.len() { + merged.parents.push(b.parents[p].clone()); + merged.return_states.push(b.return_states[p]); + } + } + + if merged.parents.len() < a.return_states.len() + b.return_states.len() { + if merged.parents.len() == 1 { + Self::new_singleton(merged.parents[0].clone(), merged.return_states[0]); + } + merged.return_states.shrink_to_fit(); + merged.parents.shrink_to_fit(); + } + + PredictionContext::combine_common_parents(&mut merged); + + let m = Array(merged); + + // if &m == a.deref(){ return ; } + // if &m == b.deref(){ return ; } + + return m; + } + + pub fn from_rule_context<'input, Ctx: ParserNodeType<'input>>( + atn: &ATN, + outer_context: &Ctx::Type, + ) -> Rc { + if outer_context.get_parent_ctx().is_none() || outer_context.is_empty() + /*ptr::eq(outer_context, empty_ctx().as_ref())*/ + { + return EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()); + } + + let parent = PredictionContext::from_rule_context::( + atn, + outer_context.get_parent_ctx().unwrap().deref(), + ); + + let transition = atn.states[outer_context.get_invoking_state() as usize] + .get_transitions() + .first() + .unwrap() + .deref() + .cast::(); + + PredictionContext::new_singleton(Some(parent), transition.follow_state as isize).alloc() + } + + fn combine_common_parents(array: &mut ArrayPredictionContext) { + let mut uniq_parents = + HashMap::>, Option>>::new(); + for p in 0..array.parents.len() { + let parent = array.parents[p].as_ref().cloned(); + if !uniq_parents.contains_key(&parent) { + uniq_parents.insert(parent.clone(), parent.clone()); + } + } + + array.parents.iter_mut().for_each(|parent| { + *parent = (*uniq_parents.get(parent).unwrap()).clone(); + }); + } +} + +// +// fn get_cached_base_prediction_context(context PredictionContext, contextCache: * PredictionContextCache, visited: map[PredictionContext]PredictionContext) -> PredictionContext { unimplemented!() } + +///Public for implementation reasons +#[derive(Debug)] +pub struct PredictionContextCache { + //todo test dashmap + cache: RefCell, Rc, MurmurHasherBuilder>>, +} + +#[doc(hidden)] +#[derive(Debug)] +pub struct MurmurHasherBuilder {} + +impl BuildHasher for MurmurHasherBuilder { + type Hasher = MurmurHasher; + + fn build_hasher(&self) -> Self::Hasher { + MurmurHasher::default() + } +} + +impl PredictionContextCache { + #[doc(hidden)] + pub fn new() -> PredictionContextCache { + PredictionContextCache { + cache: RefCell::new(HashMap::with_hasher(MurmurHasherBuilder {})), + } + } + + #[doc(hidden)] + pub fn get_shared_context( + &self, + context: &Rc, + visited: &mut HashMap<*const PredictionContext, Rc>, + ) -> Rc { + if context.is_empty() { + return context.clone(); + } + + if let Some(old) = visited.get(&(context.deref() as *const PredictionContext)) { + return old.clone(); + } + + if let Some(old) = self.cache.borrow().get(context) { + return old.clone(); + } + let mut parents = Vec::with_capacity(context.length()); + let mut changed = false; + for i in 0..parents.len() { + let parent = self.get_shared_context(context.get_parent(i).unwrap(), visited); + if changed || &parent != context.get_parent(i).unwrap() { + if !changed { + for j in 0..i { + parents.push(context.get_parent(j).cloned()) + } + changed = true; + } + parents.push(Some(parent.clone())) + } + } + if !changed { + self.cache + .borrow_mut() + .insert(context.clone(), context.clone()); + visited.insert(context.deref(), context.clone()); + return context.clone(); + } + + let updated = if parents.len() == 0 { + return EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()); + } else if parents.len() == 1 { + PredictionContext::new_singleton(parents[0].clone(), context.get_return_state(0)) + } else { + if let Array(array) = context.deref() { + PredictionContext::new_array(parents, array.return_states.clone()) + } else { + unreachable!() + } + }; + + let updated = Rc::new(updated); + self.cache + .borrow_mut() + .insert(updated.clone(), updated.clone()); + visited.insert(context.deref(), updated.clone()); + visited.insert(updated.deref(), updated.clone()); + + return updated; + } + + #[doc(hidden)] + pub fn length(&self) -> usize { + self.cache.borrow().len() + } +} diff --git a/runtime/Rust/src/prediction_context/test.rs b/runtime/Rust/src/prediction_context/test.rs new file mode 100644 index 0000000000..cfe6f0a1f8 --- /dev/null +++ b/runtime/Rust/src/prediction_context/test.rs @@ -0,0 +1,819 @@ +#![allow(non_snake_case)] + +use std::collections::VecDeque; + +use super::*; + +fn root_is_wildcard() -> bool { + true +} + +fn full_ctx() -> bool { + false +} + +fn empty_prediction_context() -> Rc { + EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()) +} + +#[test] +fn test_e_e() { + let r = PredictionContext::merge( + &empty_prediction_context(), + &empty_prediction_context(), + root_is_wildcard(), + &mut None, + ); + let expecting = "digraph G { +rankdir=LR; + s0[label=\"*\"]; +}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_e_e_fullctx() { + let r = PredictionContext::merge( + &empty_prediction_context(), + &empty_prediction_context(), + full_ctx(), + &mut None, + ); + let expecting = "digraph G { +rankdir=LR; + s0[label=\"$\"]; +}\n"; + assert_eq!(expecting, to_dot_string(r, full_ctx())) +} + +#[test] +fn test_x_e() { + let r = PredictionContext::merge( + &x(), + &empty_prediction_context(), + root_is_wildcard(), + &mut None, + ); + let expecting = + String::new() + "digraph G {\n" + "rankdir=LR;\n" + " s0[label=\"*\"];\n" + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_x_e_fullctx() { + let r = PredictionContext::merge(&x(), &empty_prediction_context(), full_ctx(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|$\"];\n" + + " s1[label=\"$\"];\n" + + " s0:p0->s1[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, full_ctx())) +} + +#[test] +fn test_e_x() { + let r = PredictionContext::merge( + &empty_prediction_context(), + &x(), + root_is_wildcard(), + &mut None, + ); + let expecting = + String::new() + "digraph G {\n" + "rankdir=LR;\n" + " s0[label=\"*\"];\n" + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_e_x_fullctx() { + let r = PredictionContext::merge(&empty_prediction_context(), &x(), full_ctx(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|$\"];\n" + + " s1[label=\"$\"];\n" + + " s0:p0->s1[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, full_ctx())) +} + +#[test] +fn test_a_a() { + let r = PredictionContext::merge(&a(), &a(), root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ae_ax() { + let a1 = a(); + let x = x(); + let a2 = PredictionContext::new_singleton(Some(x), 1).alloc(); + let r = PredictionContext::merge(&a1, &a2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ae_ax_fullctx() { + let a1 = a(); + let x = x(); + let a2 = PredictionContext::new_singleton(Some(x), 1).alloc(); + let r = PredictionContext::merge(&a1, &a2, full_ctx(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[shape=record, label=\"|$\"];\n" + + " s2[label=\"$\"];\n" + + " s0->s1[label=\"1\"];\n" + + " s1:p0->s2[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, full_ctx())) +} + +#[test] +fn test_axe_ae() { + let x = x(); + let a1 = PredictionContext::new_singleton(Some(x), 1).alloc(); + let a2 = a(); + let r = PredictionContext::merge(&a1, &a2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_aae_ae_e_fullctx() { + let empty = EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()); + let child1 = PredictionContext::new_singleton(Some(empty.clone()), 8).alloc(); + let right = PredictionContext::merge(&empty, &child1, false, &mut None); + let left = PredictionContext::new_singleton(Some(right.clone()), 8).alloc(); + let r = PredictionContext::merge(&left, &right, false, &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|$\"];\n" + + " s1[shape=record, label=\"|$\"];\n" + + " s2[label=\"$\"];\n" + + " s0:p0->s1[label=\"8\"];\n" + + " s1:p0->s2[label=\"8\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, false)) +} + +#[test] +fn test_axe_ae_fullctx() { + let x = x(); + let a1 = PredictionContext::new_singleton(Some(x), 1).alloc(); + let a2 = a(); + let r = PredictionContext::merge(&a1, &a2, full_ctx(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[shape=record, label=\"|$\"];\n" + + " s2[label=\"$\"];\n" + + " s0->s1[label=\"1\"];\n" + + " s1:p0->s2[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, full_ctx())) +} + +#[test] +fn test_a_b() { + let r = PredictionContext::merge(&a(), &b(), root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s1[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ax_ax_same() { + let x = x(); + let a1 = PredictionContext::new_singleton(Some(x.clone()), 1).alloc(); + let a2 = PredictionContext::new_singleton(Some(x.clone()), 1).alloc(); + let r = PredictionContext::merge(&a1, &a2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[label=\"1\"];\n" + + " s2[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + " s1->s2[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ax_ax() { + let a1 = PredictionContext::new_singleton(Some(x()), 1).alloc(); + let a2 = PredictionContext::new_singleton(Some(x()), 1).alloc(); + let r = PredictionContext::merge(&a1, &a2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[label=\"1\"];\n" + + " s2[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + " s1->s2[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_abx_abx() { + let b1 = PredictionContext::new_singleton(Some(x()), 2).alloc(); + let b2 = PredictionContext::new_singleton(Some(x()), 2).alloc(); + let a1 = PredictionContext::new_singleton(Some(b1), 1).alloc(); + let a2 = PredictionContext::new_singleton(Some(b2), 1).alloc(); + let r = PredictionContext::merge(&a1, &a2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[label=\"1\"];\n" + + " s2[label=\"2\"];\n" + + " s3[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + " s1->s2[label=\"2\"];\n" + + " s2->s3[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_abx_acx() { + let b1 = PredictionContext::new_singleton(Some(x()), 2).alloc(); + let c = PredictionContext::new_singleton(Some(x()), 3).alloc(); + let a1 = PredictionContext::new_singleton(Some(b1), 1).alloc(); + let a2 = PredictionContext::new_singleton(Some(c), 1).alloc(); + let r = PredictionContext::merge(&a1, &a2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[shape=record, label=\"|\"];\n" + + " s2[label=\"2\"];\n" + + " s3[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + " s1:p0->s2[label=\"2\"];\n" + + " s1:p1->s2[label=\"3\"];\n" + + " s2->s3[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ax_bx_same() { + let x = x(); + let a = PredictionContext::new_singleton(Some(x.clone()), 1).alloc(); + let b = PredictionContext::new_singleton(Some(x.clone()), 2).alloc(); + let r = PredictionContext::merge(&a, &b, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s1[label=\"1\"];\n" + + " s2[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + " s1->s2[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ax_bx() { + let a = PredictionContext::new_singleton(Some(x()), 1).alloc(); + let b = PredictionContext::new_singleton(Some(x()), 2).alloc(); + let r = PredictionContext::merge(&a, &b, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s1[label=\"1\"];\n" + + " s2[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + " s1->s2[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ae_bx() { + let x2 = x(); + let a = a(); + let b = PredictionContext::new_singleton(Some(x2), 2).alloc(); + let r = PredictionContext::merge(&a, &b, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s2[label=\"2\"];\n" + + " s1[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s2->s1[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_ae_bx_fullctx() { + let x2 = x(); + let a = a(); + let b = PredictionContext::new_singleton(Some(x2), 2).alloc(); + let r = PredictionContext::merge(&a, &b, full_ctx(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s2[label=\"2\"];\n" + + " s1[label=\"$\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s2->s1[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, full_ctx())) +} + +#[ignore] // see org/antlr/v4/test/tool/TestGraphNodes.java:405 +#[test] +fn test_aex_bfx() { + let x1 = x(); + let x2 = x(); + let e = PredictionContext::new_singleton(Some(x1), 5).alloc(); + let f = PredictionContext::new_singleton(Some(x2), 6).alloc(); + let a = PredictionContext::new_singleton(Some(e), 1).alloc(); + let b = PredictionContext::new_singleton(Some(f), 2).alloc(); + let r = PredictionContext::merge(&a, &b, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s2[label=\"2\"];\n" + + " s3[label=\"3\"];\n" + + " s4[label=\"*\"];\n" + + " s1[label=\"1\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s2->s3[label=\"6\"];\n" + + " s3->s4[label=\"9\"];\n" + + " s1->s3[label=\"5\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Ae_Ae_fullctx() { + let A1 = array(vec![EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())]); + let A2 = array(vec![EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())]); + let r = PredictionContext::merge(&A1, &A2, full_ctx(), &mut None); + let expecting = + String::new() + "digraph G {\n" + "rankdir=LR;\n" + " s0[label=\"$\"];\n" + "}\n"; + assert_eq!(expecting, to_dot_string(r, full_ctx())) +} + +#[test] +fn test_Aab_Ac() { + let A1 = array(vec![a(), b()]); + let A2 = array(vec![c()]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"||\"];\n" + + " s1[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + " s0:p2->s1[label=\"3\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aa_Aa() { + let A1 = array(vec![a()]); + let A2 = array(vec![a()]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aa_Abc() { + let A1 = array(vec![a()]); + let A2 = array(vec![b(), c()]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"||\"];\n" + + " s1[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + " s0:p2->s1[label=\"3\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aac_Ab() { + let A1 = array(vec![a(), c()]); + let A2 = array(vec![b()]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"||\"];\n" + + " s1[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + " s0:p2->s1[label=\"3\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aab_Aa() { + let A1 = array(vec![a(), b()]); + let A2 = array(vec![a()]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s1[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aab_Ab() { + let A1 = array(vec![a(), b()]); + let A2 = array(vec![b()]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s1[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aax_Aby() { + let a = PredictionContext::new_singleton(x().into(), 1).alloc(); + let b = PredictionContext::new_singleton(y().into(), 2).alloc(); + let A1 = array(vec![a]); + let A2 = array(vec![b]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|\"];\n" + + " s2[label=\"2\"];\n" + + " s3[label=\"*\"];\n" + + " s1[label=\"1\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s2->s3[label=\"10\"];\n" + + " s1->s3[label=\"9\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aax_Aay() { + let a1 = PredictionContext::new_singleton(x().into(), 1).alloc(); + let a2 = PredictionContext::new_singleton(y().into(), 1).alloc(); + let A1 = array(vec![a1]); + let A2 = array(vec![a2]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[label=\"0\"];\n" + + " s1[shape=record, label=\"|\"];\n" + + " s2[label=\"*\"];\n" + + " s0->s1[label=\"1\"];\n" + + " s1:p0->s2[label=\"9\"];\n" + + " s1:p1->s2[label=\"10\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aaxc_Aayd() { + let a1 = PredictionContext::new_singleton(x().into(), 1).alloc(); + let a2 = PredictionContext::new_singleton(y().into(), 1).alloc(); + let A1 = array(vec![a1, c()]); + let A2 = array(vec![a2, d()]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"||\"];\n" + + " s2[label=\"*\"];\n" + + " s1[shape=record, label=\"|\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"3\"];\n" + + " s0:p2->s2[label=\"4\"];\n" + + " s1:p0->s2[label=\"9\"];\n" + + " s1:p1->s2[label=\"10\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aaubv_Acwdx() { + let a = PredictionContext::new_singleton(u().into(), 1).alloc(); + let b = PredictionContext::new_singleton(v().into(), 2).alloc(); + let c = PredictionContext::new_singleton(w().into(), 3).alloc(); + let d = PredictionContext::new_singleton(x().into(), 4).alloc(); + let A1 = array(vec![a, b]); + let A2 = array(vec![c, d]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|||\"];\n" + + " s4[label=\"4\"];\n" + + " s5[label=\"*\"];\n" + + " s3[label=\"3\"];\n" + + " s2[label=\"2\"];\n" + + " s1[label=\"1\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s0:p2->s3[label=\"3\"];\n" + + " s0:p3->s4[label=\"4\"];\n" + + " s4->s5[label=\"9\"];\n" + + " s3->s5[label=\"8\"];\n" + + " s2->s5[label=\"7\"];\n" + + " s1->s5[label=\"6\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aaubv_Abvdx() { + let a = PredictionContext::new_singleton(u().into(), 1).alloc(); + let b1 = PredictionContext::new_singleton(v().into(), 2).alloc(); + let b2 = PredictionContext::new_singleton(v().into(), 2).alloc(); + let d = PredictionContext::new_singleton(x().into(), 4).alloc(); + let A1 = array(vec![a, b1]); + let A2 = array(vec![b2, d]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"||\"];\n" + + " s3[label=\"3\"];\n" + + " s4[label=\"*\"];\n" + + " s2[label=\"2\"];\n" + + " s1[label=\"1\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s0:p2->s3[label=\"4\"];\n" + + " s3->s4[label=\"9\"];\n" + + " s2->s4[label=\"7\"];\n" + + " s1->s4[label=\"6\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aaubv_Abwdx() { + let a = PredictionContext::new_singleton(u().into(), 1).alloc(); + let b1 = PredictionContext::new_singleton(v().into(), 2).alloc(); + let b2 = PredictionContext::new_singleton(w().into(), 2).alloc(); + let d = PredictionContext::new_singleton(x().into(), 4).alloc(); + let A1 = array(vec![a, b1]); + let A2 = array(vec![b2, d]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"||\"];\n" + + " s3[label=\"3\"];\n" + + " s4[label=\"*\"];\n" + + " s2[shape=record, label=\"|\"];\n" + + " s1[label=\"1\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s0:p2->s3[label=\"4\"];\n" + + " s3->s4[label=\"9\"];\n" + + " s2:p0->s4[label=\"7\"];\n" + + " s2:p1->s4[label=\"8\"];\n" + + " s1->s4[label=\"6\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aaubv_Abvdu() { + let a = PredictionContext::new_singleton(u().into(), 1).alloc(); + let b1 = PredictionContext::new_singleton(v().into(), 2).alloc(); + let b2 = PredictionContext::new_singleton(v().into(), 2).alloc(); + let d = PredictionContext::new_singleton(u().into(), 4).alloc(); + let A1 = array(vec![a, b1]); + let A2 = array(vec![b2, d]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"||\"];\n" + + " s2[label=\"2\"];\n" + + " s3[label=\"*\"];\n" + + " s1[label=\"1\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s2[label=\"2\"];\n" + + " s0:p2->s1[label=\"4\"];\n" + + " s2->s3[label=\"7\"];\n" + + " s1->s3[label=\"6\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +#[test] +fn test_Aaubu_Acudu() { + let a = PredictionContext::new_singleton(u().into(), 1).alloc(); + let b = PredictionContext::new_singleton(u().into(), 2).alloc(); + let c = PredictionContext::new_singleton(u().into(), 3).alloc(); + let d = PredictionContext::new_singleton(u().into(), 4).alloc(); + let A1 = array(vec![a, b]); + let A2 = array(vec![c, d]); + let r = PredictionContext::merge(&A1, &A2, root_is_wildcard(), &mut None); + let expecting = String::new() + + "digraph G {\n" + + "rankdir=LR;\n" + + " s0[shape=record, label=\"|||\"];\n" + + " s1[label=\"1\"];\n" + + " s2[label=\"*\"];\n" + + " s0:p0->s1[label=\"1\"];\n" + + " s0:p1->s1[label=\"2\"];\n" + + " s0:p2->s1[label=\"3\"];\n" + + " s0:p3->s1[label=\"4\"];\n" + + " s1->s2[label=\"6\"];\n" + + "}\n"; + assert_eq!(expecting, to_dot_string(r, root_is_wildcard())) +} + +fn array(nodes: Vec>) -> Rc { + let mut parents = Vec::with_capacity(nodes.len()); + let mut invoking_states = Vec::with_capacity(nodes.len()); + for node in nodes { + parents.push(node.get_parent(0).cloned()); + invoking_states.push(node.get_return_state(0)); + } + + PredictionContext::new_array(parents, invoking_states).alloc() +} + +fn y() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 10).alloc() +} + +fn x() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 9).alloc() +} + +fn w() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 8).alloc() +} + +fn v() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 7).alloc() +} + +fn u() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 6).alloc() +} + +fn d() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 4).alloc() +} + +fn c() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 3).alloc() +} + +fn b() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 2).alloc() +} + +fn a() -> Rc { + PredictionContext::new_singleton(Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())), 1).alloc() +} + +fn to_dot_string(context: Rc, is_root_wildcard: bool) -> String { + let mut nodes = String::new(); + let mut edges = String::new(); + let mut visited = HashMap::<*const PredictionContext, Rc>::new(); + let mut context_ids = HashMap::<*const PredictionContext, usize>::new(); + let mut work_list = VecDeque::>::new(); + visited.insert(context.deref(), context.clone()); + context_ids.insert(context.deref(), context_ids.len()); + work_list.push_back(context); + while !work_list.is_empty() { + let current = work_list.pop_back().unwrap(); + let current_ptr = current.deref() as *const PredictionContext; + nodes.extend(format!(" s{}[", context_ids.get(¤t_ptr).unwrap()).chars()); + + if current.length() > 1 { + nodes.extend("shape=record, ".chars()); + } + + nodes.extend("label=\"".chars()); + + if current.is_empty() { + nodes.push(if is_root_wildcard { '*' } else { '$' }); + } else if current.length() > 1 { + for i in 0..current.length() { + if i > 0 { + nodes.push('|'); + } + + nodes.extend(format!("", i).chars()); + if current.get_return_state(i) == PREDICTION_CONTEXT_EMPTY_RETURN_STATE { + nodes.push(if is_root_wildcard { '*' } else { '$' }); + } + } + } else { + nodes.extend(context_ids.get(¤t_ptr).unwrap().to_string().chars()); + } + + nodes.extend("\"];\n".chars()); + + if current.is_empty() { + continue; + } + + for i in 0..current.length() { + if current.get_return_state(i) == PREDICTION_CONTEXT_EMPTY_RETURN_STATE { + continue; + } + + let parent = current.get_parent(i).unwrap(); + if visited.insert(parent.deref(), parent.clone()).is_none() { + context_ids.insert(parent.deref(), context_ids.len()); + work_list.push_back(parent.clone()); + } + + edges.extend(format!(" s{}", context_ids.get(¤t_ptr).unwrap()).chars()); + if current.length() > 1 { + edges += ":p"; + edges += &i.to_string(); + } + + edges += &format!( + "->s{}[label=\"{}\"];\n", + context_ids + .get(&(current.get_parent(i).unwrap().deref() as *const PredictionContext)) + .unwrap(), + current.get_return_state(i) + ); + } + } + + return format!("digraph G {{\nrankdir=LR;\n{}{}}}\n", nodes, edges); +} diff --git a/runtime/Rust/src/prediction_mode.rs b/runtime/Rust/src/prediction_mode.rs new file mode 100644 index 0000000000..d1f33627c5 --- /dev/null +++ b/runtime/Rust/src/prediction_mode.rs @@ -0,0 +1,199 @@ +use std::collections::HashMap; + +use bit_set::BitSet; + +use crate::atn::INVALID_ALT; +use crate::atn_config::ATNConfig; +use crate::atn_config_set::ATNConfigSet; +use crate::atn_state::ATNStateRef; +use crate::prediction_context::PredictionContext; +use crate::semantic_context::SemanticContext; + +/// This enum defines the prediction modes available in ANTLR 4 along with +/// utility methods for analyzing configuration sets for conflicts and/or +/// ambiguities. +/// +/// It is set through `ParserATNSimulator:: +#[allow(non_camel_case_types)] +#[derive(Eq, PartialEq, Copy, Clone, Debug)] +pub enum PredictionMode { + /// The SLL(*) prediction mode. This prediction mode ignores the current + /// parser context when making predictions. This is the fastest prediction + /// mode, and provides correct results for many grammars. This prediction + /// mode is more powerful than the prediction mode provided by ANTLR 3, but + /// may result in syntax errors for grammar and input combinations which are + /// not SLL. + /// + ///

+ /// When using this prediction mode, the parser will either return a correct + /// parse tree (i.e. the same parse tree that would be returned with the + /// {@link #LL} prediction mode), or it will report a syntax error. If a + /// syntax error is encountered when using the {@link #SLL} prediction mode, + /// it may be due to either an actual syntax error in the input or indicate + /// that the particular combination of grammar and input requires the more + /// powerful {@link #LL} prediction abilities to complete successfully.

+ /// + ///

+ /// This prediction mode does not provide any guarantees for prediction + /// behavior for syntactically-incorrect inputs.

+ /// + SLL = 0, + /// + /// The LL(*) prediction mode. This prediction mode allows the current parser + /// context to be used for resolving SLL conflicts that occur during + /// prediction. This is the fastest prediction mode that guarantees correct + /// parse results for all combinations of grammars with syntactically correct + /// inputs. + /// + ///

+ /// When using this prediction mode, the parser will make correct decisions + /// for all syntactically-correct grammar and input combinations. However, in + /// cases where the grammar is truly ambiguous this prediction mode might not + /// report a precise answer for exactly which alternatives are + /// ambiguous.

+ /// + ///

+ /// This prediction mode does not provide any guarantees for prediction + /// behavior for syntactically-incorrect inputs.

+ /// + LL, + /// + /// The LL(*) prediction mode with exact ambiguity detection. In addition to + /// the correctness guarantees provided by the {@link #LL} prediction mode, + /// this prediction mode instructs the prediction algorithm to determine the + /// complete and exact set of ambiguous alternatives for every ambiguous + /// decision encountered while parsing. + /// + ///

+ /// This prediction mode may be used for diagnosing ambiguities during + /// grammar development. Due to the performance overhead of calculating sets + /// of ambiguous alternatives, this prediction mode should be avoided when + /// the exact results are not necessary.

+ /// + ///

+ /// This prediction mode does not provide any guarantees for prediction + /// behavior for syntactically-incorrect inputs.

+ /// + LL_EXACT_AMBIG_DETECTION, +} + +impl PredictionMode { + //todo move everything here +} + +// +// +pub(crate) fn has_sll_conflict_terminating_prediction( + mode: PredictionMode, + configs: &ATNConfigSet, +) -> bool { + // if all_configs_in_rule_stop_states(configs) { + // return true checked outside + // } + let mut dup = ATNConfigSet::new_base_atnconfig_set(true); + let mut configs = &*configs; + if mode == PredictionMode::SLL { + if configs.has_semantic_context() { + configs.get_items().for_each(|it| { + let c = ATNConfig::new_with_semantic( + it.get_state(), + it.get_alt(), + it.get_context().cloned(), + Box::new(SemanticContext::NONE), + ); + dup.add(Box::new(c)); + }); + configs = &dup; + } + } + + let altsets = get_conflicting_alt_subsets(&configs); + let heuristic = + has_conflicting_alt_set(&altsets) && !has_state_associated_with_one_alt(&configs); + return heuristic; +} + +//fn all_configs_in_rule_stop_states(configs: &ATNConfigSet) -> bool { +// for co +//} + +pub(crate) fn resolves_to_just_one_viable_alt(altsets: &Vec) -> isize { + get_single_viable_alt(altsets) +} + +pub(crate) fn all_subsets_conflict(altsets: &Vec) -> bool { + !has_non_conflicting_alt_set(altsets) +} + +pub(crate) fn all_subsets_equal(altsets: &Vec) -> bool { + let mut iter = altsets.iter(); + let first = iter.next(); + iter.all(|it| it == first.unwrap()) +} + +fn has_non_conflicting_alt_set(altsets: &Vec) -> bool { + altsets.iter().any(|it| it.len() == 1) +} + +fn has_conflicting_alt_set(altsets: &Vec) -> bool { + for alts in altsets { + if alts.len() > 1 { + return true; + } + } + false +} + +//fn get_unique_alt(altsets: &Vec) -> int { unimplemented!() } +// +pub(crate) fn get_alts(altsets: &Vec) -> BitSet { + altsets.iter().fold(BitSet::new(), |mut acc, it| { + acc.extend(it); + acc + }) +} + +// +pub(crate) fn get_conflicting_alt_subsets(configs: &ATNConfigSet) -> Vec { + let mut configs_to_alts: HashMap<(ATNStateRef, &PredictionContext), BitSet> = HashMap::new(); + for c in configs.get_items() { + let alts = configs_to_alts + .entry((c.get_state(), c.get_context().unwrap())) + .or_default(); + + alts.insert(c.get_alt() as usize); + } + configs_to_alts.drain().map(|(_, x)| x).collect() +} + +fn get_state_to_alt_map(configs: &ATNConfigSet) -> HashMap { + let mut m = HashMap::new(); + for c in configs.get_items() { + let alts = m.entry(c.get_state()).or_insert(BitSet::new()); + alts.insert(c.get_alt() as usize); + } + m +} + +fn has_state_associated_with_one_alt(configs: &ATNConfigSet) -> bool { + let x = get_state_to_alt_map(configs); + for alts in x.values() { + if alts.len() == 1 { + return true; + } + } + false +} + +pub(crate) fn get_single_viable_alt(altsets: &Vec) -> isize { + let mut viable_alts = BitSet::new(); + let mut min_alt = INVALID_ALT as usize; + for alt in altsets { + min_alt = alt.iter().next().unwrap(); + viable_alts.insert(min_alt); + if viable_alts.len() > 1 { + return INVALID_ALT; + } + } + min_alt as isize +} diff --git a/runtime/Rust/src/recognizer.rs b/runtime/Rust/src/recognizer.rs new file mode 100644 index 0000000000..286a87a985 --- /dev/null +++ b/runtime/Rust/src/recognizer.rs @@ -0,0 +1,166 @@ +use crate::atn::ATN; +use crate::parser::ParserNodeType; + +use crate::token_factory::TokenAware; +use crate::vocabulary::Vocabulary; + +/// Major version of this runtime. +/// Used by generated parser to verify that it is compatible with current version of runtime +pub const VERSION_MAJOR: &'static str = env!("CARGO_PKG_VERSION_MAJOR"); +/// Major version of this runtime. +/// Used by generated parser to verify that it is compatible with current version of runtime +pub const VERSION_MINOR: &'static str = env!("CARGO_PKG_VERSION_MINOR"); + +// todo move to compile time check when it will be possible to compare strings in constants +/// Used by generated parser to verify that it is compatible with current version of runtime +pub fn check_version(major: &str, minor: &str) { + assert!(major == VERSION_MAJOR && minor == VERSION_MINOR, + "parser is not compatible with current runtime version, please generate parser with the latest version of ANTLR") +} +//todo just a reminder to update version to be inserted in generated parser, +//const _:[();0-!(VERSION_MAJOR == "0" && VERSION_MINOR == "2") as usize] = []; + +/// **! Usually generated by ANTLR !** +pub trait Recognizer<'input>: TokenAware<'input> { + type Node: ParserNodeType<'input, TF = Self::TF>; + fn sempred( + &mut self, + _localctx: Option<&>::Type>, + _rule_index: isize, + _action_index: isize, + ) -> bool + where + Self: Sized, + { + true + } + fn action( + &mut self, + _localctx: Option<&>::Type>, + _rule_index: isize, + _action_index: isize, + ) where + Self: Sized, + { + } + + /// Returns array of rule names. + /// Used for debugging and error reporting + fn get_rule_names(&self) -> &[&str] { + &[] + } + fn get_vocabulary(&self) -> &dyn Vocabulary { + unimplemented!() + } + + /// Name of the file this recognizer was generated from + fn get_grammar_file_name(&self) -> &str { + "" + } + fn get_atn(&self) -> &ATN { + unimplemented!() + } +} + +/// **! Usually generated by ANTLR !** +/// +/// Used to make user predicates and actions callable by parser +/// Generated by ANTLR tool from actions and predicated added in grammar file +pub trait Actions<'a, P: Recognizer<'a>> { + fn sempred( + _localctx: Option<&>::Type>, + _rule_index: isize, + _action_index: isize, + _recog: &mut P, + ) -> bool { + true + } + + fn action( + _localctx: Option<&>::Type>, + _rule_index: isize, + _action_index: isize, + _recog: &mut P, + ) { + } + + /// Returns array of rule names. + /// Used for debugging and error reporting + fn get_rule_names(&self) -> &[&str] { + &[] + } + fn get_vocabulary(&self) -> &dyn Vocabulary { + unimplemented!() + } + + /// Name of the file this recognizer was generated from + fn get_grammar_file_name(&self) -> &str { + "" + } + fn get_atn(&self) -> &ATN { + unimplemented!() + } +} + +//impl Recognizer for BaseRecognizer { +// fn get_state(&self) -> isize { +// self.state +// } +// +// fn set_state(&mut self, _v: isize) { +// self.state = _v; +// } +// +// fn add_error_listener(&mut self, _listener: Box) { +// self.listeners.push(_listener) +// } +// +// fn remove_error_listeners(&self) { +// unimplemented!() +// } +// +// fn get_error_listener_dispatch(&self) -> Box { +// unimplemented!() +// } +//} +// +//pub struct BaseRecognizer { +// pub listeners: Vec>, +// pub state: isize, // rule_names: Vec, +// // literal_names: Vec, +// // symbolic_names: Vec, +// // grammar_file_name: String +//} +// +//impl BaseRecognizer { +// pub fn new_base_recognizer() -> BaseRecognizer { +// BaseRecognizer { +// listeners: Vec::new(), +// state: -1, +// } +// } +// +// fn check_version(&self, _toolVersion: String) { +// unimplemented!() +// } +// +// fn get_token_names(&self) -> Vec { +// unimplemented!() +// } +// +// fn get_rule_index_map(&self) -> Map { +// unimplemented!() +// } +// +// fn get_token_type(&self, _tokenName: String) -> isize { +// unimplemented!() +// } +// +// fn get_error_header(&self, _e: ANTLRError) -> String { +// unimplemented!() +// } +// +// fn get_token_error_display(&self, _t: &Token) -> String { +// unimplemented!() +// } +//} diff --git a/runtime/Rust/src/rule_context.rs b/runtime/Rust/src/rule_context.rs new file mode 100644 index 0000000000..7ce1410d6e --- /dev/null +++ b/runtime/Rust/src/rule_context.rs @@ -0,0 +1,255 @@ +//! Minimal parser node +use std::borrow::{Borrow, BorrowMut}; +use std::cell::{Cell, RefCell}; +use std::fmt::{Debug, Formatter}; +use std::iter::from_fn; +use std::marker::PhantomData; +use std::rc::{Rc, Weak}; + +use crate::atn::INVALID_ALT; +use crate::parser::ParserNodeType; +use crate::parser_rule_context::ParserRuleContext; +use crate::token_factory::TokenFactory; +use crate::tree::{ParseTree, Tree}; +use better_any::TidAble; +use std::any::type_name; + +//pub trait RuleContext:RuleNode { +/// Minimal rule context functionality required for parser to work properly +pub trait RuleContext<'input>: CustomRuleContext<'input> { + /// Internal parser state + fn get_invoking_state(&self) -> isize { + -1 + } + + /// Sets internal parser state + fn set_invoking_state(&self, _t: isize) {} + + /// A context is empty if there is no invoking state; meaning nobody called + /// current context. Which is usually true for the root of the syntax tree + fn is_empty(&self) -> bool { + self.get_invoking_state() == -1 + } + + /// Get parent context + fn get_parent_ctx(&self) -> Option>::Type>> { + None + } + + /// Set parent context + fn set_parent(&self, _parent: &Option>::Type>>) {} +} + +pub(crate) fn states_stack<'input, T: ParserRuleContext<'input> + ?Sized + 'input>( + mut ctx: Rc, +) -> impl Iterator +where + T::Ctx: ParserNodeType<'input, Type = T>, +{ + from_fn(move || { + if ctx.get_invoking_state() < 0 { + None + } else { + let state = ctx.get_invoking_state(); + ctx = ctx.get_parent_ctx().unwrap(); + Some(state) + } + }) +} + +// #[doc(hidden)] +// pub unsafe trait Tid { +// fn self_id(&self) -> TypeId; +// fn id() -> TypeId +// where +// Self: Sized; +// } + +#[derive(Debug)] +#[doc(hidden)] +pub struct EmptyCustomRuleContext<'a, TF: TokenFactory<'a> + 'a>( + pub(crate) PhantomData<&'a TF::Tok>, +); + +better_any::tid! { impl <'a,TF> TidAble<'a> for EmptyCustomRuleContext<'a,TF> where TF:TokenFactory<'a> + 'a} + +impl<'a, TF: TokenFactory<'a> + 'a> CustomRuleContext<'a> for EmptyCustomRuleContext<'a, TF> { + type TF = TF; + type Ctx = EmptyContextType<'a, TF>; + + fn get_rule_index(&self) -> usize { + usize::max_value() + } +} + +// unsafe impl<'a, TF: TokenFactory<'a> + 'a> Tid for EmptyCustomRuleContext<'a, TF> { +// fn self_id(&self) -> TypeId { +// TypeId::of::>() +// } +// +// fn id() -> TypeId +// where +// Self: Sized, +// { +// TypeId::of::>() +// } +// } +#[doc(hidden)] // public for implementation reasons +pub type EmptyContext<'a, TF> = + dyn ParserRuleContext<'a, TF = TF, Ctx = EmptyContextType<'a, TF>> + 'a; + +#[derive(Debug)] +#[doc(hidden)] // public for implementation reasons +pub struct EmptyContextType<'a, TF: TokenFactory<'a>>(pub PhantomData<&'a TF>); + +better_any::tid! { impl <'a,TF> TidAble<'a> for EmptyContextType<'a,TF> where TF:TokenFactory<'a> } + +impl<'a, TF: TokenFactory<'a>> ParserNodeType<'a> for EmptyContextType<'a, TF> { + type TF = TF; + type Type = dyn ParserRuleContext<'a, TF = Self::TF, Ctx = Self> + 'a; + // type Visitor = dyn ParseTreeVisitor<'a, Self> + 'a; +} + +/// Implemented by generated parser for context extension for particular rule +#[allow(missing_docs)] +pub trait CustomRuleContext<'input> { + type TF: TokenFactory<'input> + 'input; + /// Type that describes type of context nodes, stored in this context + type Ctx: ParserNodeType<'input, TF = Self::TF>; + //const RULE_INDEX:usize; + /// Rule index that corresponds to this context type + fn get_rule_index(&self) -> usize; + + fn get_alt_number(&self) -> isize { + INVALID_ALT + } + fn set_alt_number(&self, _alt_number: isize) {} + + /// Returns text representation of current node type, + /// rule name for context nodes and token text for terminal nodes + fn get_node_text(&self, rule_names: &[&str]) -> String { + let rule_index = self.get_rule_index(); + let rule_name = rule_names[rule_index]; + let alt_number = self.get_alt_number(); + if alt_number != INVALID_ALT { + return format!("{}:{}", rule_name, alt_number); + } + return rule_name.to_owned(); + } + // fn enter(_ctx: &dyn Tree<'input, Node=Self>, _listener: &mut dyn Any) where Self: Sized {} + // fn exit(_ctx: &dyn Tree<'input, Node=Self>, _listener: &mut dyn Any) where Self: Sized {} +} + +/// Minimal parse tree node implementation, that stores only data required for correct parsing +pub struct BaseRuleContext<'input, ExtCtx: CustomRuleContext<'input>> { + pub(crate) parent_ctx: RefCell>::Type>>>, + invoking_state: Cell, + pub(crate) ext: ExtCtx, +} + +better_any::tid! { impl <'input,Ctx> TidAble<'input> for BaseRuleContext<'input,Ctx> where Ctx:CustomRuleContext<'input>} + +#[allow(missing_docs)] +impl<'input, ExtCtx: CustomRuleContext<'input>> BaseRuleContext<'input, ExtCtx> { + pub fn new_parser_ctx( + parent_ctx: Option>::Type>>, + invoking_state: isize, + ext: ExtCtx, + ) -> Self { + Self { + parent_ctx: RefCell::new(parent_ctx.as_ref().map(Rc::downgrade)), + invoking_state: Cell::new(invoking_state), + ext, + } + } + + pub fn copy_from + ?Sized>( + ctx: &T, + ext: ExtCtx, + ) -> Self { + Self::new_parser_ctx(ctx.get_parent_ctx(), ctx.get_invoking_state(), ext) + } +} + +impl<'input, Ctx: CustomRuleContext<'input>> Borrow for BaseRuleContext<'input, Ctx> { + fn borrow(&self) -> &Ctx { + &self.ext + } +} + +impl<'input, Ctx: CustomRuleContext<'input>> BorrowMut for BaseRuleContext<'input, Ctx> { + fn borrow_mut(&mut self) -> &mut Ctx { + &mut self.ext + } +} + +impl<'input, ExtCtx: CustomRuleContext<'input>> CustomRuleContext<'input> + for BaseRuleContext<'input, ExtCtx> +{ + type TF = ExtCtx::TF; + type Ctx = ExtCtx::Ctx; + + fn get_rule_index(&self) -> usize { + self.ext.get_rule_index() + } +} + +// unsafe impl<'input, Ctx: CustomRuleContext<'input>> Tid for BaseRuleContext<'input, Ctx> { +// fn self_id(&self) -> TypeId { self.ext.self_id() } +// +// fn id() -> TypeId +// where +// Self: Sized, +// { +// Ctx::id() +// } +// } + +impl<'input, ExtCtx: CustomRuleContext<'input>> RuleContext<'input> + for BaseRuleContext<'input, ExtCtx> +{ + fn get_invoking_state(&self) -> isize { + self.invoking_state.get() + } + + fn set_invoking_state(&self, t: isize) { + self.invoking_state.set(t) + } + + fn get_parent_ctx(&self) -> Option>::Type>> { + self.parent_ctx + .borrow() + .as_ref() + .map(Weak::upgrade) + .flatten() + } + + // fn get_parent_ctx(&self) -> Option { + // self.parent_ctx.borrow().as_ref().map(Weak::upgrade).map(Option::unwrap) + // } + + fn set_parent(&self, parent: &Option>::Type>>) { + *self.parent_ctx.borrow_mut() = parent.as_ref().map(Rc::downgrade); + } +} + +impl<'input, ExtCtx: CustomRuleContext<'input>> Debug for BaseRuleContext<'input, ExtCtx> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct(type_name::()) + .field("invoking_state", &self.invoking_state) + .field("..", &"..") + .finish() + } +} + +impl<'input, ExtCtx: CustomRuleContext<'input>> Tree<'input> for BaseRuleContext<'input, ExtCtx> {} + +impl<'input, ExtCtx: CustomRuleContext<'input>> ParseTree<'input> + for BaseRuleContext<'input, ExtCtx> +{ +} + +impl<'input, ExtCtx: CustomRuleContext<'input> + TidAble<'input>> ParserRuleContext<'input> + for BaseRuleContext<'input, ExtCtx> +{ +} diff --git a/runtime/Rust/src/semantic_context.rs b/runtime/Rust/src/semantic_context.rs new file mode 100644 index 0000000000..a32f5e9d59 --- /dev/null +++ b/runtime/Rust/src/semantic_context.rs @@ -0,0 +1,253 @@ +use std::borrow::Cow::{Borrowed, Owned}; +use std::borrow::{Borrow, Cow}; +use std::cmp::Ordering; +use std::collections::HashSet; + +use crate::parser::{Parser, ParserNodeType}; + +//pub trait SemanticContext:Sync + Send { +//// fn evaluate(&self, parser: &Recognizer, outerContext: &RuleContext) -> bool; +//// fn eval_precedence(&self, parser: &Recognizer, outerContext: &RuleContext, ) -> Box; +//} + +// fn empty() -> SemanticContext { +// SemanticContext::Predicate { +// rule_index: -1, +// pred_index: -1, +// is_ctx_dependent: false, +// } +// } + +#[derive(Clone, Eq, PartialEq, Hash, Debug)] +pub enum SemanticContext { + Predicate { + rule_index: isize, + pred_index: isize, + is_ctx_dependent: bool, + }, + Precedence(isize), + AND(Vec), + OR(Vec), +} + +impl SemanticContext { + pub const NONE: SemanticContext = SemanticContext::Predicate { + rule_index: -1, + pred_index: -1, + is_ctx_dependent: false, + }; + pub(crate) fn evaluate<'a, T: Parser<'a>>( + &self, + parser: &mut T, + outer_context: &>::Type, + ) -> bool { + match self { + SemanticContext::Predicate { + rule_index, + pred_index, + is_ctx_dependent, + } => { + let _localctx = if *is_ctx_dependent { + Some(outer_context) + } else { + None + }; + parser.sempred(_localctx, *rule_index, *pred_index) + } + SemanticContext::Precedence(prec) => parser.precpred(Some(outer_context), *prec), + SemanticContext::AND(ops) => ops.iter().all(|sem| sem.evaluate(parser, outer_context)), + SemanticContext::OR(ops) => ops.iter().any(|sem| sem.evaluate(parser, outer_context)), + } + } + pub(crate) fn eval_precedence<'a, 'b, T: Parser<'b>>( + &'a self, + parser: &T, + outer_context: &>::Type, + ) -> Option> { + match self { + SemanticContext::Predicate { .. } => Some(Borrowed(self)), + SemanticContext::Precedence(prec) => { + if parser.precpred(Some(outer_context), *prec) { + Some(Owned(Self::NONE)) + } else { + None + } + } + SemanticContext::OR(ops) => { + let mut differs = false; + let mut operands = vec![]; + for context in ops { + let evaluated = context.eval_precedence(parser, outer_context); + differs |= evaluated.is_some() && context == evaluated.as_deref().unwrap(); + + if let Some(evaluated) = evaluated { + if *evaluated == Self::NONE { + return Some(Owned(Self::NONE)); + } else { + operands.push(evaluated); + } + } + } + + if !differs { + return Some(Borrowed(self)); + } + + if operands.is_empty() { + return None; + } + + let mut operands = operands.drain(..); + let result = operands.next().unwrap(); + Some(operands.fold(result, |acc, it| { + Owned(SemanticContext::or(Some(acc), Some(it))) + })) + } + SemanticContext::AND(ops) => { + let mut differs = false; + let mut operands = vec![]; + for context in ops { + let evaluated = context.eval_precedence(parser, outer_context); + differs |= evaluated.is_some() && context == evaluated.as_deref().unwrap(); + + if let Some(evaluated) = evaluated { + if *evaluated != Self::NONE { + operands.push(evaluated); + } + } else { + return None; + } + } + + if !differs { + return Some(Borrowed(self)); + } + + if operands.is_empty() { + return Some(Owned(Self::NONE)); + } + + let mut operands = operands.drain(..); + let result = operands.next().unwrap(); + Some(operands.fold(result, |acc, it| { + Owned(SemanticContext::and(Some(acc), Some(it))) + })) + } + } + } + + pub fn new_and(a: &SemanticContext, b: &SemanticContext) -> SemanticContext { + let mut operands = HashSet::new(); + if let SemanticContext::AND(ops) = a { + operands.extend(ops.iter().cloned()) + } else { + operands.insert(a.clone()); + } + if let SemanticContext::AND(ops) = b { + operands.extend(ops.iter().cloned()) + } else { + operands.insert(b.clone()); + } + + let precedence_predicates = filter_precedence_predicate(&mut operands); + if !precedence_predicates.is_empty() { + let reduced = precedence_predicates.iter().min_by(sort_prec_pred); + operands.insert(reduced.unwrap().clone()); + } + + if operands.len() == 1 { + return operands.into_iter().next().unwrap(); + } + + SemanticContext::AND(operands.into_iter().collect()) + } + + pub fn new_or(a: &SemanticContext, b: &SemanticContext) -> SemanticContext { + let mut operands = HashSet::new(); + if let SemanticContext::OR(ops) = a { + operands.extend(ops.iter().cloned()) + } else { + operands.insert(a.clone()); + } + if let SemanticContext::OR(ops) = b { + ops.iter().for_each(|it| { + operands.insert(it.clone()); + }); + } else { + operands.insert(b.clone()); + } + + let precedence_predicates = filter_precedence_predicate(&mut operands); + if !precedence_predicates.is_empty() { + let reduced = precedence_predicates.iter().max_by(sort_prec_pred); + operands.insert(reduced.unwrap().clone()); + } + + if operands.len() == 1 { + return operands.into_iter().next().unwrap(); + } + + SemanticContext::OR(operands.into_iter().collect()) + } + + pub fn and( + a: Option>, + b: Option>, + ) -> SemanticContext { + match (a, b) { + (None, None) => Self::NONE, + (None, Some(b)) => b.borrow().clone(), + (Some(a), None) => a.borrow().clone(), + (Some(a), Some(b)) => { + let (a, b) = (a.borrow(), b.borrow()); + if *a == Self::NONE { + return b.clone(); + } + if *b == Self::NONE { + return a.clone(); + } + + Self::new_and(a, b) + } + } + } + + pub fn or( + a: Option>, + b: Option>, + ) -> SemanticContext { + match (a, b) { + (None, None) => Self::NONE, + (None, Some(b)) => b.borrow().clone(), + (Some(a), None) => a.borrow().clone(), + (Some(a), Some(b)) => { + let (a, b) = (a.borrow(), b.borrow()); + if *a == Self::NONE || *b == Self::NONE { + return Self::NONE; + } + + Self::new_or(a, b) + } + } + } +} + +fn sort_prec_pred(a: &&SemanticContext, b: &&SemanticContext) -> Ordering { + match (*a, *b) { + (SemanticContext::Precedence(a), SemanticContext::Precedence(b)) => a.cmp(b), + _ => panic!("shoudl be sorting list of precedence predicates"), + } +} + +fn filter_precedence_predicate(collection: &mut HashSet) -> Vec { + let mut result = vec![]; + collection.retain(|it| { + if let SemanticContext::Precedence(_) = it { + result.push(it.clone()); + false + } else { + true + } + }); + result +} diff --git a/runtime/Rust/src/token.rs b/runtime/Rust/src/token.rs new file mode 100644 index 0000000000..f450eda39d --- /dev/null +++ b/runtime/Rust/src/token.rs @@ -0,0 +1,248 @@ +//! Symbols that parser works on +use std::borrow::{Borrow, Cow}; + +use std::fmt::Formatter; +use std::fmt::{Debug, Display}; + +use std::sync::atomic::{AtomicIsize, Ordering}; + +use crate::char_stream::InputData; +use crate::int_stream::EOF; +use crate::token_factory::{INVALID_COMMON, INVALID_OWNING}; + +use better_any::type_id; + +/// Type of tokens that parser considers invalid +pub const TOKEN_INVALID_TYPE: isize = 0; +/// Type of tokens that DFA can use to advance to next state without consuming actual input token. +/// Should not be created by downstream implementations. +pub const TOKEN_EPSILON: isize = -2; +/// Min token type that can be assigned to tokens created by downstream implementations. +pub const TOKEN_MIN_USER_TOKEN_TYPE: isize = 1; +/// Type of EOF token +pub const TOKEN_EOF: isize = EOF; +/// Default channel lexer emits tokens to +pub const TOKEN_DEFAULT_CHANNEL: isize = 0; +/// Predefined additional channel for lexer to assign tokens to +pub const TOKEN_HIDDEN_CHANNEL: isize = 1; +/// Shorthand for TOKEN_HIDDEN_CHANNEL +pub const HIDDEN: isize = TOKEN_HIDDEN_CHANNEL; + +/// Implemented by tokens that are produced by a `TokenFactory` +#[allow(missing_docs)] +pub trait Token: Debug + Display { + /// Type of the underlying data this token refers to + type Data: ?Sized + InputData; + // fn get_source(&self) -> Option<(Box, Box)>; + fn get_token_type(&self) -> isize; + fn get_channel(&self) -> isize { + TOKEN_DEFAULT_CHANNEL + } + fn get_start(&self) -> isize { + 0 + } + fn get_stop(&self) -> isize { + 0 + } + fn get_line(&self) -> isize { + 0 + } + fn get_column(&self) -> isize { + 0 + } + + fn get_text(&self) -> &Self::Data; + fn set_text(&mut self, _text: ::Owned) {} + + fn get_token_index(&self) -> isize { + 0 + } + fn set_token_index(&self, _v: isize) {} + + // fn get_token_source(&self) -> &dyn TokenSource; + // fn get_input_stream(&self) -> &dyn CharStream; + + /// returns fully owned representation of this token + fn to_owned(&self) -> OwningToken { + OwningToken { + token_type: self.get_token_type(), + channel: self.get_channel(), + start: self.get_start(), + stop: self.get_stop(), + token_index: AtomicIsize::from(self.get_token_index()), + line: self.get_line(), + column: self.get_column(), + text: self.get_text().to_display(), + read_only: true, + } + } +} + +/// Token that owns its data +pub type OwningToken = GenericToken; +/// Most versatile Token that uses Cow to save data +/// Can be used seamlessly switch from owned to zero-copy parsing +pub type CommonToken<'a> = GenericToken>; + +type_id!(OwningToken); +type_id!(CommonToken<'a>); + +#[derive(Debug)] +#[allow(missing_docs)] +pub struct GenericToken { + // source: Option<(Box,Box)>, + pub token_type: isize, + pub channel: isize, + pub start: isize, + pub stop: isize, + pub token_index: AtomicIsize, + pub line: isize, + pub column: isize, + pub text: T, + pub read_only: bool, +} + +impl Clone for GenericToken +where + Self: Token, +{ + fn clone(&self) -> Self { + Self { + token_type: self.token_type, + channel: self.channel, + start: self.start, + stop: self.stop, + token_index: AtomicIsize::new(self.get_token_index()), + line: self.line, + column: self.column, + text: self.text.clone(), + read_only: false, + } + } +} + +impl + Debug> Display for GenericToken { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let txt = if self.token_type == TOKEN_EOF { + "" + } else { + self.text.borrow() + }; + let txt = txt.replace("\n", "\\n"); + let txt = txt.replace("\r", "\\r"); + let txt = txt.replace("\t", "\\t"); + // let txt = escape_whitespaces(txt,false); + f.write_fmt(format_args!( + "[@{},{}:{}='{}',<{}>{},{}:{}]", + self.get_token_index(), + self.start, + self.stop, + txt, + self.token_type, + if self.channel > 0 { + self.channel.to_string() + } else { + String::new() + }, + self.line, + self.column + )) + } +} + +// impl + Debug> TokenWrapper for GenericToken { type Inner = Self; } + +impl + Debug> Token for GenericToken { + type Data = str; + + fn get_token_type(&self) -> isize { + self.token_type + } + + fn get_channel(&self) -> isize { + self.channel + } + + fn get_start(&self) -> isize { + self.start + } + + fn get_stop(&self) -> isize { + self.stop + } + + fn get_line(&self) -> isize { + self.line + } + + fn get_column(&self) -> isize { + self.column + } + + // fn get_source(&self) -> Option<(Box, Box)> { + // unimplemented!() + // } + + fn get_text(&self) -> &str { + if self.token_type == EOF { + "" + } else { + self.text.borrow() + } + } + + fn set_text(&mut self, _text: String) { + unimplemented!() + } + + fn get_token_index(&self) -> isize { + self.token_index.load(Ordering::Relaxed) + } + + fn set_token_index(&self, _v: isize) { + self.token_index.store(_v, Ordering::Relaxed) + } + + fn to_owned(&self) -> OwningToken { + OwningToken { + token_type: self.token_type, + channel: self.channel, + start: self.start, + stop: self.stop, + token_index: AtomicIsize::new(self.get_token_index()), + line: self.line, + column: self.column, + text: self.text.borrow().to_owned(), + read_only: self.read_only, + } + } +} + +impl Default for &'_ OwningToken { + fn default() -> Self { + INVALID_OWNING.with(|x| unsafe { std::mem::transmute(&**x) }) + } +} + +impl Default for &'_ CommonToken<'_> { + fn default() -> Self { + INVALID_COMMON.with(|x| unsafe { std::mem::transmute(&**x) }) + } +} + +// +// impl CommonToken { +// fn new_common_token( +// _source: Option<(Box, Box)>, +// _token_type: isize, +// _channel: isize, +// _start: isize, +// _stop: isize, +// ) -> CommonToken { +// unimplemented!() +// } +// +// fn clone(&self) -> CommonToken { +// unimplemented!() +// } +// } diff --git a/runtime/Rust/src/token_factory.rs b/runtime/Rust/src/token_factory.rs new file mode 100644 index 0000000000..43d68b1bf8 --- /dev/null +++ b/runtime/Rust/src/token_factory.rs @@ -0,0 +1,311 @@ +//! How Lexer should produce tokens +use std::borrow::Cow::{Borrowed, Owned}; +use std::borrow::{Borrow, Cow}; + +use std::fmt::{Debug, Formatter}; +use std::marker::PhantomData; + +use std::sync::atomic::AtomicIsize; + +use typed_arena::Arena; + +use crate::char_stream::{CharStream, InputData}; +use crate::token::Token; +use crate::token::{CommonToken, OwningToken, TOKEN_INVALID_TYPE}; +use better_any::TidAble; + +thread_local! { + pub(crate) static COMMON_TOKEN_FACTORY_DEFAULT: Box = + Box::new(CommonTokenFactory {}); + pub(crate) static INVALID_OWNING: Box = Box::new(OwningToken { + token_type: TOKEN_INVALID_TYPE, + channel: 0, + start: -1, + stop: -1, + token_index: AtomicIsize::new(-1), + line: -1, + column: -1, + text: "".to_owned(), + read_only: true, + }); + pub(crate) static INVALID_COMMON: Box> = Box::new(CommonToken { + token_type: TOKEN_INVALID_TYPE, + channel: 0, + start: -1, + stop: -1, + token_index: AtomicIsize::new(-1), + line: -1, + column: -1, + text: Borrowed(""), + read_only: true, + }); +} + +/// Trait for creating tokens. +pub trait TokenFactory<'a>: TidAble<'a> + Sized { + /// Type of tokens emitted by this factory. + type Inner: Token + ?Sized + 'a; + /// Ownership of the emitted tokens + type Tok: Borrow + Clone + 'a + Debug; + // can relax InputData to just ToOwned here? + /// Type of the underlying storage + type Data: InputData + ?Sized; + /// Type of the `CharStream` that factory can produce tokens from + type From; + + /// Creates token either from `sourse` or from pure data in `text` + /// Either `source` or `text` are not None + fn create( + &'a self, + source: Option<&mut T>, + ttype: isize, + text: Option<::Owned>, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized; + + /// Creates invalid token + /// Invalid tokens must have `TOKEN_INVALID_TYPE` token type. + fn create_invalid() -> Self::Tok; + + /// Creates `Self::Data` representation for `from` for lexer to work with + /// when it does not need to create full token + fn get_data(from: Self::From) -> Cow<'a, Self::Data>; +} + +/// Default token factory +#[derive(Default, Debug)] +pub struct CommonTokenFactory; + +better_any::tid! {CommonTokenFactory} + +impl Default for &'_ CommonTokenFactory { + fn default() -> Self { + COMMON_TOKEN_FACTORY_DEFAULT.with(|x| unsafe { std::mem::transmute(&**x) }) + } +} + +impl<'a> TokenFactory<'a> for CommonTokenFactory { + type Inner = CommonToken<'a>; + type Tok = Box; + type Data = str; + type From = Cow<'a, str>; + + #[inline] + fn create( + &'a self, + source: Option<&mut T>, + ttype: isize, + text: Option, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized, + { + let text = match (text, source) { + (Some(t), _) => Owned(t), + (None, Some(x)) => { + if stop >= x.size() || start >= x.size() { + Borrowed("") + } else { + x.get_text(start, stop).into() + } + } + _ => Borrowed(""), + }; + Box::new(CommonToken { + token_type: ttype, + channel, + start, + stop, + token_index: AtomicIsize::new(-1), + line, + column, + text, + read_only: false, + }) + } + + fn create_invalid() -> Self::Tok { + INVALID_COMMON.with(|x| (*x).clone()) + } + + fn get_data(from: Self::From) -> Cow<'a, Self::Data> { + from + } +} + +/// Token factory that produces heap allocated +/// `OwningToken`s +#[derive(Default, Debug)] +pub struct OwningTokenFactory; + +better_any::tid! {OwningTokenFactory} + +impl<'a> TokenFactory<'a> for OwningTokenFactory { + type Inner = OwningToken; + type Tok = Box; + type Data = str; + type From = String; + + #[inline] + fn create( + &'a self, + source: Option<&mut T>, + ttype: isize, + text: Option, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized, + { + let text = match (text, source) { + (Some(t), _) => t, + (None, Some(x)) => { + if stop >= x.size() || start >= x.size() { + "".to_owned() + } else { + x.get_text(start, stop) + } + } + _ => String::new(), + }; + Box::new(OwningToken { + token_type: ttype, + channel, + start, + stop, + token_index: AtomicIsize::new(-1), + line, + column, + text, + read_only: false, + }) + } + + fn create_invalid() -> Self::Tok { + INVALID_OWNING.with(|x| (*x).clone()) + } + + fn get_data(from: Self::From) -> Cow<'a, Self::Data> { + from.into() + } +} + +// pub struct DynFactory<'input,TF:TokenFactory<'.into()input>>(TF) where TF::Tok:CoerceUnsized>; +// impl <'input,TF:TokenFactory<'input>> TokenFactory<'input> for DynFactory<'input,TF> +// where TF::Tok:CoerceUnsized> +// { +// +// } + +///Arena token factory that contains `OwningToken`s +pub type ArenaOwningFactory<'a> = ArenaFactory<'a, OwningTokenFactory, OwningToken>; +///Arena token factory that contains `CommonToken`s +pub type ArenaCommonFactory<'a> = ArenaFactory<'a, CommonTokenFactory, CommonToken<'a>>; + +/// This is a wrapper for Token factory that allows to allocate tokens in separate arena. +/// It can allow to significantly improve performance by passing Tokens by references everywhere. +/// +/// Requires `&'a Tok: Default` bound to produce invalid tokens, which can be easily implemented +/// like this: +/// ```text +/// thread_local!{ static ref INVALID_TOKEN:Box = ... } +/// impl Default for &'_ CustomToken { +/// fn default() -> Self { &**INVALID_TOKEN } +/// } +/// ``` +/// or if possible just +/// ```text +/// const INVALID_TOKEN:CustomToken = ... +/// ``` +// Box is used here because it is almost always should be used for token factory +pub struct ArenaFactory<'input, TF, T> { + arena: Arena, + factory: TF, + pd: PhantomData<&'input str>, +} + +better_any::tid! {impl<'input,TF,T> TidAble<'input> for ArenaFactory<'input,TF,T>} + +impl<'input, TF: Debug, T> Debug for ArenaFactory<'input, TF, T> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ArenaFactory") + .field("arena", &"Arena") + .field("factory", &self.factory) + .finish() + } +} + +impl<'input, TF, T> Default for ArenaFactory<'input, TF, T> +where + TF: Default, +{ + fn default() -> Self { + Self { + arena: Default::default(), + factory: Default::default(), + pd: Default::default(), + } + } +} + +impl<'input, TF, Tok> TokenFactory<'input> for ArenaFactory<'input, TF, Tok> +where + TF: TokenFactory<'input, Tok = Box, Inner = Tok>, + Tok: Token + Clone + TidAble<'input>, + for<'a> &'a Tok: Default, +{ + type Inner = Tok; + type Tok = &'input Tok; + type Data = TF::Data; + type From = TF::From; + + #[inline] + fn create( + &'input self, + source: Option<&mut T>, + ttype: isize, + text: Option<::Owned>, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized, + { + // todo remove redundant allocation + let token = self + .factory + .create(source, ttype, text, channel, start, stop, line, column); + self.arena.alloc(*token) + } + + fn create_invalid() -> &'input Tok { + <&Tok as Default>::default() + } + + fn get_data(from: Self::From) -> Cow<'input, Self::Data> { + TF::get_data(from) + } +} + +#[doc(hidden)] +pub trait TokenAware<'input> { + type TF: TokenFactory<'input> + 'input; +} diff --git a/runtime/Rust/src/token_source.rs b/runtime/Rust/src/token_source.rs new file mode 100644 index 0000000000..7395ae39ce --- /dev/null +++ b/runtime/Rust/src/token_source.rs @@ -0,0 +1,127 @@ +use crate::int_stream::IntStream; +use crate::token_factory::TokenFactory; + +/// Produces tokens to be used by parser. +/// `TokenStream` implementations are responsible for buffering tokens for parser lookahead +pub trait TokenSource<'input> { + /// TokenFactory this token source produce tokens with + type TF: TokenFactory<'input> + 'input; + /// Return a {@link Token} object from your input stream (usually a + /// {@link CharStream}). Do not fail/return upon lexing error; keep chewing + /// on the characters until you get a good one; errors are not passed through + /// to the parser. + fn next_token(&mut self) -> >::Tok; + /** + * Get the line number for the current position in the input stream. The + * first line in the input is line 1. + * + * Returns the line number for the current position in the input stream, or + * 0 if the current token source does not track line numbers. + */ + fn get_line(&self) -> isize { + 0 + } + /** + * Get the index into the current line for the current position in the input + * stream. The first character on a line has position 0. + * + * Returns the line number for the current position in the input stream, or + * -1 if the current token source does not track character positions. + */ + fn get_char_position_in_line(&self) -> isize { + -1 + } + + /// Returns underlying input stream + fn get_input_stream(&mut self) -> Option<&mut dyn IntStream>; + + /// Returns string identifier of underlying input e.g. file name + fn get_source_name(&self) -> String; + // fn set_token_factory<'c: 'b>(&mut self, f: &'c TokenFactory); + /// Gets the `TokenFactory` this token source is currently using for + /// creating `Token` objects from the input. + /// + /// Required by `Parser` for creating missing tokens. + fn get_token_factory(&self) -> &'input Self::TF; +} + +// allows user to call parser with &mut reference to Lexer +impl<'input, T> TokenSource<'input> for &mut T +where + T: TokenSource<'input>, +{ + type TF = T::TF; + #[inline(always)] + fn next_token(&mut self) -> >::Tok { + (**self).next_token() + } + + #[inline(always)] + fn get_line(&self) -> isize { + (**self).get_line() + } + + #[inline(always)] + fn get_char_position_in_line(&self) -> isize { + (**self).get_char_position_in_line() + } + + #[inline(always)] + fn get_input_stream(&mut self) -> Option<&mut dyn IntStream> { + (**self).get_input_stream() + } + + #[inline(always)] + fn get_source_name(&self) -> String { + (**self).get_source_name() + } + + #[inline(always)] + fn get_token_factory(&self) -> &'input Self::TF { + (**self).get_token_factory() + } +} + +// / adaptor to feed parser with existing tokens +// pub struct IterTokenSource where S: Iterator, S::Item: Token, F: TokenFactory { +// iter: S, +// fact: F, +// } +// +// impl TokenSource for IterTokenSource where S: Iterator, S::Item: Token, F: TokenFactory { +// type Tok = S::Item; +// +// fn next_token(&mut self) -> Box { +// self.iter.next().map(Box::new).unwrap_or_else( +// || self.get_token_factory().create( +// None, +// EOF, +// TOKEN_DEFAULT_CHANNEL, +// -1, +// -1, +// self.get_line(), +// self.get_char_position_in_line(), +// ) +// ) +// } +// +// fn get_line(&self) -> isize { +// 0 +// } +// +// fn get_char_position_in_line(&self) -> isize { +// -1 +// } +// +// fn get_input_stream(&mut self) -> Option<&mut dyn CharStream> { +// None +// } +// +// fn get_source_name(&self) -> String { +// "".to_string() +// } +// +// fn get_token_factory(&self) -> &dyn TokenFactory { +// &self.fact +// } +// } diff --git a/runtime/Rust/src/token_stream.rs b/runtime/Rust/src/token_stream.rs new file mode 100644 index 0000000000..badc8e0eb9 --- /dev/null +++ b/runtime/Rust/src/token_stream.rs @@ -0,0 +1,296 @@ +//! `IntStream` that produces tokens for Parser +use std::borrow::Borrow; +use std::cmp::min; +use std::marker::PhantomData; + +use crate::char_stream::InputData; +use crate::int_stream::{IntStream, IterWrapper}; +use crate::token::{OwningToken, Token, TOKEN_EOF, TOKEN_INVALID_TYPE}; +use crate::token_factory::TokenFactory; +use crate::token_source::TokenSource; +use std::fmt::{Debug, Formatter}; + +/// An `IntSteam` of `Token`s +/// +/// Used as an input for `Parser`s +/// If there is an existing source of tokens, you should implement +/// `TokenSource`, not `TokenStream` +pub trait TokenStream<'input>: IntStream { + /// Token factory that created tokens in this stream + type TF: TokenFactory<'input> + 'input; + + /// Lookahead for tokens, same as `IntSteam::la` but return reference to full token + fn lt(&mut self, k: isize) -> Option<&>::Tok>; + /// Returns reference to token at `index` + fn get(&self, index: isize) -> &>::Tok; + + /// Token source that produced data for tokens for this stream + fn get_token_source(&self) -> &dyn TokenSource<'input, TF = Self::TF>; + // fn set_token_source(&self,source: Box); + /// Get combined text of all tokens in this stream + fn get_all_text(&self) -> String { + self.get_text_from_interval(0, self.size() - 1) + } + /// Get combined text of tokens in start..=stop interval + fn get_text_from_interval(&self, start: isize, stop: isize) -> String; + // fn get_text_from_rule_context(&self,context: RuleContext) -> String; + /// Get combined text of tokens in between `a` and `b` + fn get_text_from_tokens(&self, a: &T, b: &T) -> String + where + Self: Sized, + { + self.get_text_from_interval(a.get_token_index(), b.get_token_index()) + } +} + +/// Iterator over tokens in `T` +#[derive(Debug)] +pub struct TokenIter<'a, 'input: 'a, T: TokenStream<'input>>( + &'a mut T, + bool, + PhantomData &'input str>, +); + +impl<'a, 'input: 'a, T: TokenStream<'input>> Iterator for TokenIter<'a, 'input, T> { + type Item = OwningToken; + + fn next(&mut self) -> Option { + if self.1 { + return None; + } + let result = self.0.lt(1).unwrap().borrow().to_owned(); + if result.get_token_type() == TOKEN_EOF { + self.1 = true; + } else { + self.0.consume(); + } + Some(result) + } +} + +/// Token stream that keeps all data in internal Vec +pub struct UnbufferedTokenStream<'input, T: TokenSource<'input>> { + token_source: T, + pub(crate) tokens: Vec<>::Tok>, + //todo prev token for lt(-1) + pub(crate) current_token_index: isize, + markers_count: isize, + pub(crate) p: isize, +} +better_any::tid! { impl<'input,T> TidAble<'input> for UnbufferedTokenStream<'input, T> where T: TokenSource<'input>} + +impl<'input, T: TokenSource<'input>> Debug for UnbufferedTokenStream<'input, T> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("UnbufferedTokenStream") + .field("tokens", &self.tokens) + .field("current_token_index", &self.current_token_index) + .field("markers_count", &self.markers_count) + .field("p(buffer index)", &self.p) + .finish() + } +} + +impl<'input, T: TokenSource<'input>> UnbufferedTokenStream<'input, T> { + /// Creates iterator over this token stream + pub fn iter(&mut self) -> IterWrapper<'_, Self> { + IterWrapper(self) + } + + /// Creates iterator over tokens in this token stream + pub fn token_iter(&mut self) -> TokenIter<'_, 'input, Self> { + TokenIter(self, false, PhantomData) + } + + /// Creates token stream that keeps all tokens inside + pub fn new_buffered(source: T) -> Self { + let mut a = UnbufferedTokenStream::new_unbuffered(source); + a.mark(); + a + } + + /// Creates token stream that keeps only tokens required by `mark` + pub fn new_unbuffered(source: T) -> Self { + UnbufferedTokenStream { + token_source: source, + tokens: vec![], + current_token_index: 0, + markers_count: 0, + p: 0, + } + } + + fn sync(&mut self, want: isize) { + let need = (self.p + want - 1) - self.tokens.len() as isize + 1; + if need > 0 { + self.fill(need); + } + } + + fn get_buffer_start_index(&self) -> isize { + self.current_token_index - self.p + } + + pub(crate) fn fill(&mut self, need: isize) -> isize { + for i in 0..need { + if self.tokens.len() > 0 + && self.tokens.last().unwrap().borrow().get_token_type() == TOKEN_EOF + { + return i; + } + let token = self.token_source.next_token(); + token + .borrow() + .set_token_index(self.get_buffer_start_index() + self.tokens.len() as isize); + self.tokens.push(token); + } + + need + } +} + +impl<'input, T: TokenSource<'input>> TokenStream<'input> for UnbufferedTokenStream<'input, T> { + type TF = T::TF; + + #[inline] + fn lt(&mut self, i: isize) -> Option<&>::Tok> { + if i == -1 { + return self.tokens.get(self.p as usize - 1); + } + + self.sync(i); + + self.tokens.get((self.p + i - 1) as usize) + } + + #[inline] + fn get(&self, index: isize) -> &>::Tok { + &self.tokens[(index - self.get_buffer_start_index()) as usize] + } + + fn get_token_source(&self) -> &dyn TokenSource<'input, TF = Self::TF> { + &self.token_source + } + + fn get_text_from_interval(&self, start: isize, stop: isize) -> String { + // println!("get_text_from_interval {}..{}",start,stop); + // println!("all tokens {:?}",self.tokens.iter().map(|x|x.as_ref().to_owned()).collect::>()); + + let buffer_start_index = self.get_buffer_start_index(); + let buffer_stop_index = buffer_start_index + self.tokens.len() as isize - 1; + if start < buffer_start_index || stop > buffer_stop_index { + panic!( + "interval {}..={} not in token buffer window: {}..{}", + start, stop, buffer_start_index, buffer_stop_index + ); + } + + let a = start - buffer_start_index; + let b = stop - buffer_start_index; + + let mut buf = String::new(); + for i in a..(b + 1) { + let t = self.tokens[i as usize].borrow(); + if t.get_token_type() == TOKEN_EOF { + break; + } + buf.extend(t.get_text().to_display().chars()); + } + + return buf; + } +} + +impl<'input, T: TokenSource<'input>> IntStream for UnbufferedTokenStream<'input, T> { + #[inline] + fn consume(&mut self) { + if self.la(1) == TOKEN_EOF { + panic!("cannot consume EOF"); + } + + if self.p == self.tokens.len() as isize && self.markers_count == 0 { + self.tokens.clear(); + self.p = -1; + } + + self.p += 1; + self.current_token_index += 1; + + self.sync(1); + // Ok(()) + } + + #[inline] + fn la(&mut self, i: isize) -> isize { + self.lt(i) + .map(|t| t.borrow().get_token_type()) + .unwrap_or(TOKEN_INVALID_TYPE) + } + + #[inline] + fn mark(&mut self) -> isize { + self.markers_count += 1; + -self.markers_count + } + + #[inline] + fn release(&mut self, marker: isize) { + assert_eq!(marker, -self.markers_count); + + self.markers_count -= 1; + if self.markers_count == 0 { + if self.p > 0 { + self.tokens.drain(0..self.p as usize); + //todo drain assembly is almost 2x longer than + // unsafe manual copy but need to bench before using unsafe + //let new_len = self.tokens.len() - self.p as usize; + // unsafe { + // // drop first p elements + // for i in 0..(self.p as usize) { + // drop_in_place(&mut self.tokens[i]); + // } + // // move len-p elements to beginning + // std::intrinsics::copy( + // &self.tokens[self.p as usize], + // &mut self.tokens[0], + // new_len, + // ); + // self.tokens.set_len(new_len); + // } + + self.p = 0; + } + } + } + + #[inline(always)] + fn index(&self) -> isize { + self.current_token_index + } + + #[inline] + fn seek(&mut self, mut index: isize) { + if self.current_token_index == index { + return; + } + if index > self.current_token_index { + self.sync(index - self.current_token_index); + index = min(index, self.get_buffer_start_index() + self.size() + 1); + } + let i = index - self.get_buffer_start_index(); + if i < 0 || i >= self.tokens.len() as isize { + panic!() + } + + self.p = i; + self.current_token_index = index; + } + + #[inline(always)] + fn size(&self) -> isize { + self.tokens.len() as isize + } + + fn get_source_name(&self) -> String { + self.token_source.get_source_name() + } +} diff --git a/runtime/Rust/src/tokenstream_rewriter.rs b/runtime/Rust/src/tokenstream_rewriter.rs new file mode 100644 index 0000000000..e3d4bc21d1 --- /dev/null +++ b/runtime/Rust/src/tokenstream_rewriter.rs @@ -0,0 +1,166 @@ +const ( +default_program_name = "default" +program_init_size = 100 +min_token_index = 0 +) + + +pub trait RewriteOperation { + fn execute(&self, buffer: * bytes.Buffer) -> isize; + fn String(&self) -> String; + get_instruction_index() int + get_index() int + get_text() String + get_op_name() String + get_tokens() TokenStream + set_instruction_index(val isize) + set_index(isize) + set_text(String) + set_op_name(String) + set_tokens(TokenStream) +} + +pub struct BaseRewriteOperation { + instruction_index: isize, + index: isize, + text: String, + op_name: String, + tokens: TokenStream, +} + +impl BaseRewriteOperation { + fn (op * BaseRewriteOperation)GetInstructionIndex() int { unimplemented ! () } + + fn (op * BaseRewriteOperation)GetIndex() int { unimplemented ! () } + + fn (op * BaseRewriteOperation)GetText() String { unimplemented ! () } + + fn (op * BaseRewriteOperation)GetOpName() String { unimplemented ! () } + + fn (op * BaseRewriteOperation)GetTokens() TokenStream { unimplemented ! () } + + fn (op * BaseRewriteOperation)SetInstructionIndex(val isize) { unimplemented ! () } + + fn (op * BaseRewriteOperation)SetIndex(val isize) { unimplemented ! () } + + fn (op * BaseRewriteOperation)SetText(val String) { unimplemented ! () } + + fn (op * BaseRewriteOperation)SetOpName(val String) { unimplemented ! () } + + fn (op * BaseRewriteOperation)SetTokens(val TokenStream) { unimplemented ! () } + + + fn execute(&self, buffer: * bytes.Buffer) -> int { unimplemented!() } + + fn String(&self) -> String { unimplemented!() } + + + pub struct InsertBeforeOp { + base: base_rewrite_operation, + } + + fn new_insert_before_op(index isize, text: String, stream: TokenStream) -> * InsertBeforeOp { unimplemented!() } + + fn execute(&self, buffer: * bytes.Buffer) -> int { unimplemented!() } + + fn String(&self) -> String { unimplemented!() } + + + pub struct InsertAfterOp { + base: base_rewrite_operation, + } + + fn new_insert_after_op(index isize, text: String, stream: TokenStream) -> * InsertAfterOp { unimplemented!() } + + fn execute(&self, buffer: * bytes.Buffer) -> int { unimplemented!() } + + fn String(&self) -> String { unimplemented!() } + + pub struct ReplaceOp{ + base: base_rewrite_operation, + last_index: isize, + } + + fn new_replace_op(from, to: isize, text: String, stream: TokenStream) -> * ReplaceOp { unimplemented!() } + + fn (op * ReplaceOp)Execute(buffer * bytes.Buffer) int { unimplemented ! () } + + fn String(&self) -> String { unimplemented!() } + + + pub struct TokenStreamRewriter { + tokens: TokenStream, + programs: map[String]Vec < RewriteOperation >, + last_rewrite_token_indexes: map[String]isize, + } + + fn new_token_stream_rewriter(tokens TokenStream) -> * TokenStreamRewriter { unimplemented!() } + + fn get_token_stream(&self) -> TokenStream { unimplemented!() } + + fn rollback(&self, program_name: String, instruction_index: isize) { unimplemented!() } + + fn rollback_default(&self, instruction_index: isize) { unimplemented!() } + fn delete_program(&self, program_name: String) { unimplemented!() } + + fn delete_program_default(&self) { unimplemented!() } + + fn insert_after(&self, program_name: String, index: isize, text: String) { unimplemented!() } + + fn insert_after_default(&self, index: isize, text: String) { unimplemented!() } + + fn insert_after_token(&self, program_name: String, token: Token, text: String) { unimplemented!() } + + fn insert_before(&self, program_name: String, index: isize, text: String) { unimplemented!() } + + fn insert_before_default(&self, index: isize, text: String) { unimplemented!() } + + fn insert_before_token(&self, program_name: String, token Token, text: String) { unimplemented!() } + + fn replace(&self, program_name: String, from: isize, to: isize, text: String) { unimplemented!() } + + fn (tsr * TokenStreamRewriter)ReplaceDefault(from, to: isize, text: String) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)ReplaceDefaultPos(index isize, text: String) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)ReplaceToken(program_name String, from: Token, to: Token, text: String) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)ReplaceTokenDefault(from, to: Token, text: String) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)ReplaceTokenDefaultPos(index Token, text: String) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)Delete(program_name String, from: isize, to: isize) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)DeleteDefault(from, to: isize) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)DeleteDefaultPos(index isize) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)DeleteToken(program_name String, from: Token, to: Token) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)DeleteTokenDefault(from, to Token) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)GetLastRewriteTokenIndex(program_name String)int { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)GetLastRewriteTokenIndexDefault()int { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)SetLastRewriteTokenIndex(program_name String, i: isize) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)InitializeProgram(name String)Vec< RewriteOperation > { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)AddToProgram(name String, op: RewriteOperation) { unimplemented ! () } + + fn (tsr * TokenStreamRewriter)GetProgram(name String) Vec< RewriteOperation > { unimplemented ! () } + fn (tsr * TokenStreamRewriter)GetTextDefault() String { unimplemented ! () } + fn (tsr * TokenStreamRewriter)GetText(program_name String, interval: * Interval) String { unimplemented ! () } + + fn reduce_to_single_operation_per_index(rewrites Vec) -> map[int]RewriteOperation { unimplemented ! () } + + + /* + quick fixing Go lack of: overloads, + */ + + fn max(a, b isize) -> int { unimplemented!() } + fn min(a, b isize) -> int { unimplemented!() } +} + \ No newline at end of file diff --git a/runtime/Rust/src/tokenstream_rewriter_test.rs b/runtime/Rust/src/tokenstream_rewriter_test.rs new file mode 100644 index 0000000000..ff3abbb900 --- /dev/null +++ b/runtime/Rust/src/tokenstream_rewriter_test.rs @@ -0,0 +1,76 @@ +fn test_insert_before_index0(t *testing.T) { unimplemented!() } + +fn prepare_rewriter(str String) -> * TokenStreamRewriter { unimplemented!() } + +pub struct LexerTest { + input: String, + expected: String, + description: String, + expected_exception: Vec, + ops func( * TokenStreamRewriter), +} + +impl LexerTest { + fn new_lexer_test(input, expected: String, desc: String, ops: func(* TokenStreamRewriter)) LexerTest { unimplemented ! () } + + fn new_lexer_exception_test(input String, expected_err Vec, desc: String, ops: func(* TokenStreamRewriter)) LexerTest { unimplemented ! () } + + fn panic_tester(t *testing.T, expected_msg Vec, r: * TokenStreamRewriter) { unimplemented!() } + + fn test_lexer_a(t *testing.T) { unimplemented!() } + + + var _ = fmt.Printf + var _ = unicode.IsLetter + + var serializedLexerAtn = Vec < uint16 > { + 3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 5, 15, 8, + 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3, 4, 3, + 4, 2, 2, 5, 3, 3, 5, 4, 7, 5, 3, 2, 2, 2, 14, 2, 3, 3, 2, 2, 2, 2, 5, 3, + 2, 2, 2, 2, 7, 3, 2, 2, 2, 3, 9, 3, 2, 2, 2, 5, 11, 3, 2, 2, 2, 7, 13, + 3, 2, 2, 2, 9, 10, 7, 99, 2, 2, 10, 4, 3, 2, 2, 2, 11, 12, 7, 100, 2, 2, + 12, 6, 3, 2, 2, 2, 13, 14, 7, 101, 2, 2, 14, 8, 3, 2, 2, 2, 3, 2, 2, + } + + var lexerDeserializer = NewATNDeserializer(nil) + var lexerAtn = lexerDeserializer.DeserializeFromUInt16(serializedLexerAtn) + + var lexerChannelNames = Vec< String > { + "DEFAULT_TOKEN_CHANNEL", "HIDDEN", + } + + var lexerModeNames = Vec < String > { + "DEFAULT_MODE", + } + + var lexerLiteralNames = Vec < String > { + "", "'a'", "'b'", "'c'", + } + + var lexerSymbolicNames = Vec < String > { + "", "A", "B", "C", + } + + var lexerRuleNames = Vec < String > { + "A", "B", "C", + } + + pub struct LexerA { + base: BaseLexer, + channel_names: Vec < String > , + mode_names: Vec < String >, + } + + var lexerDecisionToDFA = make( & Vec < DFA >, len(lexerAtn.DecisionToState)) + + fn init() { unimplemented!() } + + fn new_lexer_a(input CharStream) -> * LexerA { unimplemented!() } + + const ( + lexer_aa = 1 + lexer_ab = 2 + lexer_ac = 3 + ) +} + \ No newline at end of file diff --git a/runtime/Rust/src/trace_listener.rs b/runtime/Rust/src/trace_listener.rs new file mode 100644 index 0000000000..7921156c04 --- /dev/null +++ b/runtime/Rust/src/trace_listener.rs @@ -0,0 +1,19 @@ +use std::rc::Weak; +use parser::BaseParser; + +pub struct TraceListener { + parser: Box, +} + +impl TraceListener { + fn new_trace_listener(parser: Box) -> * TraceListener { unimplemented!() } + + fn visit_error_node(&self, _: ErrorNode) { unimplemented!() } + + fn enter_every_rule(&self, ctx: ParserRuleContext) { unimplemented!() } + + fn visit_terminal(&self, node: TerminalNode) { unimplemented!() } + + fn exit_every_rule(&self, ctx: ParserRuleContext) { unimplemented!() } +} + \ No newline at end of file diff --git a/runtime/Rust/src/transition.rs b/runtime/Rust/src/transition.rs new file mode 100644 index 0000000000..15f527c10a --- /dev/null +++ b/runtime/Rust/src/transition.rs @@ -0,0 +1,372 @@ +use std::any::{Any, TypeId}; +use std::borrow::Cow; +use std::fmt::Debug; + +use crate::atn_state::ATNStateRef; +use crate::interval_set::IntervalSet; +use crate::lexer::{LEXER_MAX_CHAR_VALUE, LEXER_MIN_CHAR_VALUE}; +use crate::semantic_context::SemanticContext; + +const _TRANSITION_NAMES: [&'static str; 11] = [ + "INVALID", + "EPSILON", + "RANGE", + "RULE", + "PREDICATE", + "ATOM", + "ACTION", + "SET", + "NOT_SET", + "WILDCARD", + "PRECEDENCE", +]; + +pub const TRANSITION_EPSILON: isize = 1; +pub const TRANSITION_RANGE: isize = 2; +pub const TRANSITION_RULE: isize = 3; +pub const TRANSITION_PREDICATE: isize = 4; +pub const TRANSITION_ATOM: isize = 5; +pub const TRANSITION_ACTION: isize = 6; +pub const TRANSITION_SET: isize = 7; +pub const TRANSITION_NOTSET: isize = 8; +pub const TRANSITION_WILDCARD: isize = 9; +pub const TRANSITION_PRECEDENCE: isize = 10; + +#[allow(non_camel_case_types)] +#[derive(Debug, Eq, PartialEq)] +pub enum TransitionType { + TRANSITION_EPSILON = 1, + TRANSITION_RANGE, + TRANSITION_RULE, + TRANSITION_PREDICATE, + TRANSITION_ATOM, + TRANSITION_ACTION, + TRANSITION_SET, + TRANSITION_NOTSET, + TRANSITION_WILDCARD, + TRANSITION_PRECEDENCE, +} + +// todo remove trait because it is too slow +/// Transition between ATNStates +pub trait Transition: Sync + Send + Debug + Any { + fn get_target(&self) -> ATNStateRef; + fn set_target(&mut self, s: ATNStateRef); + fn is_epsilon(&self) -> bool { + false + } + fn get_label(&self) -> Option> { + None + } + fn get_serialization_type(&self) -> TransitionType; + fn matches(&self, symbol: isize, min_vocab_symbol: isize, max_vocab_symbol: isize) -> bool; + fn get_predicate(&self) -> Option { + None + } + fn get_reachable_target(&self, symbol: isize) -> Option { + // println!("reachable target called on {:?}", self); + if self.matches(symbol, LEXER_MIN_CHAR_VALUE, LEXER_MAX_CHAR_VALUE) { + return Some(self.get_target()); + } + None + } +} + +impl dyn Transition { + #[inline] + pub fn cast(&self) -> &T { + assert_eq!(self.type_id(), TypeId::of::()); + unsafe { &*(self as *const dyn Transition as *const T) } + } +} + +#[derive(Debug)] +pub struct AtomTransition { + pub target: ATNStateRef, + pub label: isize, +} + +impl Transition for AtomTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn get_label(&self) -> Option> { + let mut r = IntervalSet::new(); + r.add_one(self.label); + Some(Cow::Owned(r)) + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_ATOM + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + _symbol == self.label + } +} + +#[derive(Debug)] +pub struct RuleTransition { + pub target: ATNStateRef, + pub follow_state: ATNStateRef, + pub rule_index: isize, + pub precedence: isize, +} + +impl Transition for RuleTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn is_epsilon(&self) -> bool { + true + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_RULE + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + unimplemented!() + } +} + +#[derive(Debug)] +pub struct EpsilonTransition { + pub target: ATNStateRef, + pub outermost_precedence_return: isize, +} + +impl Transition for EpsilonTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn is_epsilon(&self) -> bool { + true + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_EPSILON + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + false + } +} + +#[derive(Debug)] +pub struct RangeTransition { + pub target: ATNStateRef, + pub start: isize, + pub stop: isize, +} + +impl Transition for RangeTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn get_label(&self) -> Option> { + let mut r = IntervalSet::new(); + r.add_range(self.start, self.stop); + Some(Cow::Owned(r)) + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_RANGE + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + _symbol >= self.start && _symbol <= self.stop + } +} + +#[derive(Debug)] +pub struct ActionTransition { + pub target: ATNStateRef, + pub is_ctx_dependent: bool, + pub rule_index: isize, + pub action_index: isize, + pub pred_index: isize, +} + +impl Transition for ActionTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn is_epsilon(&self) -> bool { + true + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_ACTION + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + false + } +} + +#[derive(Debug)] +pub struct SetTransition { + pub target: ATNStateRef, + pub set: IntervalSet, +} + +impl Transition for SetTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn get_label(&self) -> Option> { + Some(Cow::Borrowed(&self.set)) + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_SET + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + self.set.contains(_symbol) + } +} + +#[derive(Debug)] +pub struct NotSetTransition { + pub target: ATNStateRef, + pub set: IntervalSet, +} + +impl Transition for NotSetTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn get_label(&self) -> Option> { + Some(Cow::Borrowed(&self.set)) + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_NOTSET + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + _symbol >= _min_vocab_symbol && _symbol <= _max_vocab_symbol && !self.set.contains(_symbol) + } +} + +#[derive(Debug)] +pub struct WildcardTransition { + pub target: ATNStateRef, +} + +impl Transition for WildcardTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_WILDCARD + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + _symbol < _max_vocab_symbol && _symbol > _min_vocab_symbol + } +} + +#[derive(Debug)] +pub struct PredicateTransition { + pub target: ATNStateRef, + pub is_ctx_dependent: bool, + pub rule_index: isize, + pub pred_index: isize, +} + +impl Transition for PredicateTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn is_epsilon(&self) -> bool { + true + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_PREDICATE + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + false + } + + fn get_predicate(&self) -> Option { + Some(SemanticContext::Predicate { + rule_index: self.rule_index, + pred_index: self.pred_index, + is_ctx_dependent: self.is_ctx_dependent, + }) + } +} + +#[derive(Debug)] +pub struct PrecedencePredicateTransition { + pub target: ATNStateRef, + pub precedence: isize, +} + +impl Transition for PrecedencePredicateTransition { + fn get_target(&self) -> ATNStateRef { + self.target + } + fn set_target(&mut self, s: ATNStateRef) { + self.target = s + } + + fn is_epsilon(&self) -> bool { + true + } + + fn get_serialization_type(&self) -> TransitionType { + TransitionType::TRANSITION_PRECEDENCE + } + + fn matches(&self, _symbol: isize, _min_vocab_symbol: isize, _max_vocab_symbol: isize) -> bool { + false + } + + fn get_predicate(&self) -> Option { + Some(SemanticContext::Precedence(self.precedence)) + } +} diff --git a/runtime/Rust/src/tree.rs b/runtime/Rust/src/tree.rs new file mode 100644 index 0000000000..bbcf02c332 --- /dev/null +++ b/runtime/Rust/src/tree.rs @@ -0,0 +1,462 @@ +//! General AST +use std::any::Any; +use std::borrow::Borrow; + +use std::fmt::{Debug, Formatter}; +use std::iter::from_fn; +use std::marker::PhantomData; +use std::ops::Deref; +use std::rc::Rc; + +use crate::char_stream::InputData; +use crate::int_stream::EOF; +use crate::interval_set::Interval; +use crate::parser::ParserNodeType; +use crate::parser_rule_context::ParserRuleContext; +use crate::recognizer::Recognizer; +use crate::rule_context::{CustomRuleContext, RuleContext}; +use crate::token::Token; +use crate::token_factory::TokenFactory; +use crate::{interval_set, trees, CoerceTo}; +use std::mem; + +//todo try to make in more generic +#[allow(missing_docs)] +pub trait Tree<'input>: RuleContext<'input> { + fn get_parent(&self) -> Option>::Type>> { + None + } + fn has_parent(&self) -> bool { + false + } + fn get_payload(&self) -> Box { + unimplemented!() + } + fn get_child(&self, _i: usize) -> Option>::Type>> { + None + } + fn get_child_count(&self) -> usize { + 0 + } + fn get_children<'a>( + &'a self, + ) -> Box>::Type>> + 'a> + where + 'input: 'a, + { + let mut index = 0; + let iter = from_fn(move || { + if index < self.get_child_count() { + index += 1; + self.get_child(index - 1) + } else { + None + } + }); + + Box::new(iter) + } + // fn get_children_full(&self) -> &RefCell>::Type>>> { unimplemented!() } +} + +/// Tree that knows about underlying text +pub trait ParseTree<'input>: Tree<'input> { + /// Return an {@link Interval} indicating the index in the + /// {@link TokenStream} of the first and last token associated with this + /// subtree. If this node is a leaf, then the interval represents a single + /// token and has interval i..i for token index i. + fn get_source_interval(&self) -> Interval { + interval_set::INVALID + } + + /// Return combined text of this AST node. + /// To create resulting string it does traverse whole subtree, + /// also it includes only tokens added to the parse tree + /// + /// Since tokens on hidden channels (e.g. whitespace or comments) are not + /// added to the parse trees, they will not appear in the output of this + /// method. + fn get_text(&self) -> String { + String::new() + } + + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// We have to know the recognizer so we can get rule names. + fn to_string_tree( + &self, + r: &dyn Recognizer<'input, TF = Self::TF, Node = Self::Ctx>, + ) -> String { + trees::string_tree(self, r.get_rule_names()) + } +} + +/// text of the node. +/// Already implemented for all rule contexts +// pub trait NodeText { +// fn get_node_text(&self, rule_names: &[&str]) -> String; +// } +// +// impl NodeText for T { +// default fn get_node_text(&self, _rule_names: &[&str]) -> String { "".to_owned() } +// } +// +// impl<'input, T: CustomRuleContext<'input>> NodeText for T { +// default fn get_node_text(&self, rule_names: &[&str]) -> String { +// let rule_index = self.get_rule_index(); +// let rule_name = rule_names[rule_index]; +// let alt_number = self.get_alt_number(); +// if alt_number != INVALID_ALT { +// return format!("{}:{}", rule_name, alt_number); +// } +// return rule_name.to_owned(); +// } +// } + +#[doc(hidden)] +#[derive(Debug)] +pub struct NoError; + +#[doc(hidden)] +#[derive(Debug)] +pub struct IsError; + +/// Generic leaf AST node +pub struct LeafNode<'input, Node: ParserNodeType<'input>, T: 'static> { + /// Token, this leaf consist of + pub symbol: >::Tok, + iserror: PhantomData, +} +better_any::tid! { impl <'input, Node, T:'static> TidAble<'input> for LeafNode<'input, Node, T> where Node:ParserNodeType<'input> } + +impl<'input, Node: ParserNodeType<'input>, T: 'static> CustomRuleContext<'input> + for LeafNode<'input, Node, T> +{ + type TF = Node::TF; + type Ctx = Node; + + fn get_rule_index(&self) -> usize { + usize::max_value() + } + + fn get_node_text(&self, _rule_names: &[&str]) -> String { + self.symbol.borrow().get_text().to_display() + } +} + +impl<'input, Node: ParserNodeType<'input>, T: 'static> ParserRuleContext<'input> + for LeafNode<'input, Node, T> +{ +} + +impl<'input, Node: ParserNodeType<'input>, T: 'static> Tree<'input> for LeafNode<'input, Node, T> {} + +impl<'input, Node: ParserNodeType<'input>, T: 'static> RuleContext<'input> + for LeafNode<'input, Node, T> +{ +} + +// impl<'input, Node: ParserNodeType<'input>, T: 'static> NodeText for LeafNode<'input, Node, T> { +// fn get_node_text(&self, _rule_names: &[&str]) -> String { +// self.symbol.borrow().get_text().to_display() +// } +// } + +impl<'input, Node: ParserNodeType<'input>, T: 'static> ParseTree<'input> + for LeafNode<'input, Node, T> +{ + fn get_source_interval(&self) -> Interval { + let i = self.symbol.borrow().get_token_index(); + Interval { a: i, b: i } + } + + fn get_text(&self) -> String { + self.symbol.borrow().get_text().to_display() + } +} + +impl<'input, Node: ParserNodeType<'input>, T: 'static> Debug for LeafNode<'input, Node, T> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if self.symbol.borrow().get_token_type() == EOF { + f.write_str("") + } else { + let a = self.symbol.borrow().get_text().to_display(); + f.write_str(&a) + } + } +} + +impl<'input, Node: ParserNodeType<'input>, T: 'static> LeafNode<'input, Node, T> { + /// creates new leaf node + pub fn new(symbol: >::Tok) -> Self { + Self { + symbol, + iserror: Default::default(), + } + } +} + +/// non-error AST leaf node +pub type TerminalNode<'input, NodeType> = LeafNode<'input, NodeType, NoError>; + +impl<'input, Node: ParserNodeType<'input>, Listener: ParseTreeListener<'input, Node> + ?Sized> + Listenable for TerminalNode<'input, Node> +{ + fn enter(&self, listener: &mut Listener) { + listener.visit_terminal(self) + } + + fn exit(&self, _listener: &mut Listener) { + // do nothing + } +} + +impl<'input, Node: ParserNodeType<'input>, Visitor: ParseTreeVisitor<'input, Node> + ?Sized> + Visitable for TerminalNode<'input, Node> +{ + fn accept(&self, visitor: &mut Visitor) { + visitor.visit_terminal(self) + } +} + +/// # Error Leaf +/// Created for each token created or consumed during recovery +pub type ErrorNode<'input, NodeType> = LeafNode<'input, NodeType, IsError>; + +impl<'input, Node: ParserNodeType<'input>, Listener: ParseTreeListener<'input, Node> + ?Sized> + Listenable for ErrorNode<'input, Node> +{ + fn enter(&self, listener: &mut Listener) { + listener.visit_error_node(self) + } + + fn exit(&self, _listener: &mut Listener) { + // do nothing + } +} + +impl<'input, Node: ParserNodeType<'input>, Visitor: ParseTreeVisitor<'input, Node> + ?Sized> + Visitable for ErrorNode<'input, Node> +{ + fn accept(&self, visitor: &mut Visitor) { + visitor.visit_error_node(self) + } +} + +pub trait ParseTreeVisitorCompat<'input>: VisitChildren<'input, Self::Node> { + type Node: ParserNodeType<'input>; + type Return: Default; + + /// Temporary storage for `ParseTreeVisitor` blanket implementation to work + /// + /// If you have `()` as a return value + /// either use `YourGrammarParseTreeVisitor` directly + /// or make + /// ```rust + /// Box::leak(Box::new(())) + /// # ; + /// ``` + /// as an implementation of that method so that there is no need to create dummy field in your visitor + fn temp_result(&mut self) -> &mut Self::Return; + + fn visit(&mut self, node: &>::Type) -> Self::Return { + self.visit_node(&node); + mem::take(self.temp_result()) + } + + /// Called on terminal(leaf) node + fn visit_terminal(&mut self, _node: &TerminalNode<'input, Self::Node>) -> Self::Return { + Self::Return::default() + } + /// Called on error node + fn visit_error_node(&mut self, _node: &ErrorNode<'input, Self::Node>) -> Self::Return { + Self::Return::default() + } + + fn visit_children( + &mut self, + node: &>::Type, + ) -> Self::Return { + let mut result = Self::Return::default(); + for node in node.get_children() { + if !self.should_visit_next_child(&node, &result) { + break; + } + + let child_result = self.visit(&node); + result = self.aggregate_results(result, child_result); + } + return result; + } + + fn aggregate_results(&self, _aggregate: Self::Return, next: Self::Return) -> Self::Return { + next + } + + fn should_visit_next_child( + &self, + _node: &>::Type, + _current: &Self::Return, + ) -> bool { + true + } +} + +// struct VisitorAdapter<'input, T: ParseTreeVisitorCompat<'input>> { +// visitor: T, +// pub curr_value: T::Return, +// _pd: PhantomData<&'input str>, +// } + +impl<'input, Node, T> ParseTreeVisitor<'input, Node> for T +where + Node: ParserNodeType<'input>, + Node::Type: VisitableDyn, + T: ParseTreeVisitorCompat<'input, Node = Node>, +{ + fn visit_terminal(&mut self, node: &TerminalNode<'input, Node>) { + let result = ::visit_terminal(self, node); + *::temp_result(self) = result; + } + + fn visit_error_node(&mut self, node: &ErrorNode<'input, Node>) { + let result = ::visit_error_node(self, node); + *::temp_result(self) = result; + } + + fn visit_children(&mut self, node: &Node::Type) { + let result = ::visit_children(self, node); + *::temp_result(self) = result; + } +} + +/// Base interface for visiting over syntax tree +pub trait ParseTreeVisitor<'input, Node: ParserNodeType<'input>>: + VisitChildren<'input, Node> +{ + /// Basically alias for `node.accept(self)` in visitor implementation + /// just to make api closer to java + + /// Called on terminal(leaf) node + fn visit_terminal(&mut self, _node: &TerminalNode<'input, Node>) {} + /// Called on error node + fn visit_error_node(&mut self, _node: &ErrorNode<'input, Node>) {} + /// Implement this only if you want to change children visiting algorithm + fn visit_children(&mut self, node: &Node::Type) { + node.get_children() + .for_each(|child| self.visit_node(&child)) + } +} + +/// Workaround for default recursive children visiting +/// +/// Already blanket implemented for all visitors. +/// To override it you would need to implement `ParseTreeVisitor::visit_children` +pub trait VisitChildren<'input, Node: ParserNodeType<'input>> { + // fn visit_children_inner(&mut self, node: &Node::Type); + fn visit_node(&mut self, node: &Node::Type); +} + +impl<'input, Node, T> VisitChildren<'input, Node> for T +where + Node: ParserNodeType<'input>, + T: ParseTreeVisitor<'input, Node> + ?Sized, + // for<'a> &'a mut Self: CoerceUnsized<&'a mut Node::Visitor>, + Node::Type: VisitableDyn, +{ + // #[inline(always)] + // fn visit_children_inner(&mut self, node: &Node::Type) { + // // node.accept_children(self) + // + // } + + fn visit_node(&mut self, node: &Node::Type) { + node.accept_dyn(self) + } +} + +/// Types that can accept particular visitor +/// ** Usually implemented only in generated parser ** +pub trait Visitable { + /// Calls corresponding visit callback on visitor`Vis` + fn accept(&self, _visitor: &mut Vis) { + unreachable!("should have been properly implemented by generated context when reachable") + } +} + +// workaround trait for accepting sized visitor on rule context trait object +#[doc(hidden)] +pub trait VisitableDyn { + fn accept_dyn(&self, _visitor: &mut Vis) { + unreachable!("should have been properly implemented by generated context when reachable") + } +} + +/// Base parse listener interface +pub trait ParseTreeListener<'input, Node: ParserNodeType<'input>> { + /// Called when parser creates terminal node + fn visit_terminal(&mut self, _node: &TerminalNode<'input, Node>) {} + /// Called when parser creates error node + fn visit_error_node(&mut self, _node: &ErrorNode<'input, Node>) {} + /// Called when parser enters any rule node + fn enter_every_rule(&mut self, _ctx: &Node::Type) {} + /// Called when parser exits any rule node + fn exit_every_rule(&mut self, _ctx: &Node::Type) {} +} + +/// Types that can accept particular listener +/// ** Usually implemented only in generated parser ** +pub trait Listenable { + /// Calls corresponding enter callback on listener `T` + fn enter(&self, _listener: &mut T) {} + /// Calls corresponding exit callback on listener `T` + fn exit(&self, _listener: &mut T) {} +} + +// #[inline] +// pub fn temp_to_trait(mut input: Z, f:impl FnOnce(&mut TraitObject)) -> Z where &mut Z:CoerceUnsized<&mut TraitObject>{ +// let a = &mut input as &mut TraitObject; +// f(a) +// } + +/// Helper struct to accept parse listener on already generated tree +#[derive(Debug)] +pub struct ParseTreeWalker<'input, 'a, Node, T = dyn ParseTreeListener<'input, Node> + 'a>( + PhantomData &'input Node::Type>, +) +where + Node: ParserNodeType<'input>, + T: ParseTreeListener<'input, Node> + ?Sized; + +impl<'input, 'a, Node, T> ParseTreeWalker<'input, 'a, Node, T> +where + Node: ParserNodeType<'input>, + T: ParseTreeListener<'input, Node> + 'a + ?Sized, + Node::Type: Listenable, +{ + /// Walks recursively over tree `t` with `listener` + pub fn walk(mut listener: Box, t: &Ctx) -> Box + where + // for<'x> &'x mut Listener: CoerceUnsized<&'x mut T>, + // for<'x> &'x Ctx: CoerceUnsized<&'x Node::Type>, + Listener: CoerceTo, + Ctx: CoerceTo, + { + // let mut listener = listener as Box; + Self::walk_inner(listener.as_mut().coerce_mut_to(), t.coerce_ref_to()); + + // just cast back + // unsafe { Box::::from_raw(Box::into_raw(listener) as *mut _) } + listener + } + + fn walk_inner(listener: &mut T, t: &Node::Type) { + t.enter(listener); + + for child in t.get_children() { + Self::walk_inner(listener, child.deref()) + } + + t.exit(listener); + } +} diff --git a/runtime/Rust/src/trees.rs b/runtime/Rust/src/trees.rs new file mode 100644 index 0000000000..67602ea4f9 --- /dev/null +++ b/runtime/Rust/src/trees.rs @@ -0,0 +1,50 @@ +/*! +A set of utility routines useful for all kinds of ANTLR trees. +*/ + +use std::ops::Deref; + +use crate::tree::Tree; +use crate::utils; + +/// Print out a whole tree, not just a node, in LISP format +/// {@code (root child1 .. childN)}. Print just a node if this is a leaf. +pub fn string_tree<'a, T: Tree<'a> + ?Sized>(tree: &T, rule_names: &[&str]) -> String { + let s = utils::escape_whitespaces(get_node_text(tree, rule_names), false); + if tree.get_child_count() == 0 { + return s; + } + let mut result = String::new(); + result.push('('); + result.extend(s.chars()); + result = tree + .get_children() + // .iter() + .map(|child| string_tree(child.deref(), rule_names)) + .fold(result, |mut acc, text| { + acc.push(' '); + acc.extend(text.chars()); + acc + }); + result.push(')'); + result +} + +/// Print out tree node text representation (rule name or token text) +pub fn get_node_text<'a>(t: &(impl Tree<'a> + ?Sized), rule_names: &[&str]) -> String { + t.get_node_text(rule_names) +} + +//pub fn get_children(t: impl Tree) -> Vec> { unimplemented!() } +// +//pub fn get_ancestors(t: impl Tree) -> Vec> { unimplemented!() } +// +//pub fn find_all_token_nodes(t: impl ParseTree, ttype: isize) -> Vec> { unimplemented!() } +// +//pub fn find_all_rule_nodes(t: impl ParseTree, rule_index: isize) -> Vec> { unimplemented!() } +// +//pub fn find_all_nodes(t: impl ParseTree, index: isize, find_tokens: bool) -> Vec> { unimplemented!() } +// +////fn trees_find_all_nodes(t: ParseTree, index: isize, findTokens: bool, nodes: * Vec) { unimplemented!() } +// +//pub fn descendants(t: impl ParseTree) -> Vec { unimplemented!() } diff --git a/runtime/Rust/src/utils.rs b/runtime/Rust/src/utils.rs new file mode 100644 index 0000000000..ac6e1f6cc3 --- /dev/null +++ b/runtime/Rust/src/utils.rs @@ -0,0 +1,58 @@ +use std::borrow::Borrow; +use std::cell::Cell; +// use crate::utils::Cow2::{Borrowed2, Owned2}; + +pub fn escape_whitespaces(data: impl Borrow, escape_spaces: bool) -> String { + let data = data.borrow(); + let mut res = String::with_capacity(data.len()); + data.chars().for_each(|ch| match ch { + ' ' if escape_spaces => res.extend("\u{00B7}".chars()), + '\t' => res.extend("\\t".chars()), + '\n' => res.extend("\\n".chars()), + '\r' => res.extend("\\r".chars()), + _ => res.push(ch), + }); + res +} + +pub trait Sealed {} + +pub fn cell_update(cell: &Cell, f: F) -> T +where + F: FnOnce(T) -> T, +{ + let old = cell.get(); + let new = f(old); + cell.set(new); + new +} + +// pub enum Cow2<'a,Ref,T:Borrow = Ref>{ +// Borrowed2(&'a Ref), +// Owned2(T) +// } +// +// impl<'a,Ref,T:Borrow + > Cow2<'a,Ref,T>{ +// fn to_owned(&self) -> T +// } +// +// impl> Borrow for Cow2<'_,Ref,T>{ +// fn borrow(&self) -> &Ref { +// match self{ +// Cow2::Borrowed2(x) => x, +// Cow2::Owned2(x) => x.borrow(), +// } +// } +// } +// +// impl<'a,Ref,T:Borrow> From<&'a Ref> for Cow2<'a,Ref,T>{ +// fn from(f: &'a Ref) -> Self { +// Borrowed2(f) +// } +// } +// +// impl<'a,Ref,T:Borrow> From for Cow2<'a,Ref,T>{ +// fn from(f: T) -> Self { +// Owned2(f) +// } +// } diff --git a/runtime/Rust/src/vocabulary.rs b/runtime/Rust/src/vocabulary.rs new file mode 100644 index 0000000000..cf734bf84d --- /dev/null +++ b/runtime/Rust/src/vocabulary.rs @@ -0,0 +1,141 @@ +#![allow(missing_docs)] +//! Mapping from symbol type to its string representation +use std::borrow::Borrow; +//use std::borrow::Cow; +use std::borrow::Cow::{self, Borrowed, Owned}; +use std::cmp::max; +use std::fmt::Debug; + +use crate::dfa::ScopeExt; +use crate::token::TOKEN_EOF; + +pub trait Vocabulary: Sync + Debug { + fn get_max_token_type(&self) -> isize; + fn get_literal_name(&self, token_type: isize) -> Option<&str>; + fn get_symbolic_name(&self, token_type: isize) -> Option<&str>; + fn get_display_name(&self, token_type: isize) -> Cow<'_, str>; +} + +#[derive(Debug)] +pub struct VocabularyImpl { + literal_names: Vec>, + symbolic_names: Vec>, + display_names: Vec>, + max_token_type: isize, +} + +fn collect_to_string<'b, T: Borrow + 'b>( + iter: impl IntoIterator>, +) -> Vec> { + iter.into_iter() + .map(|x| x.as_ref().map(|it| it.borrow().to_owned())) + .collect() +} + +impl VocabularyImpl { + pub fn new<'b, T: Borrow + 'b, Iter: IntoIterator>>( + literal_names: Iter, + symbolic_names: Iter, + display_names: Option, + ) -> VocabularyImpl { + // let display_names = display_names.unwrap_or(&[]); + VocabularyImpl { + literal_names: collect_to_string(literal_names), + symbolic_names: collect_to_string(symbolic_names), + display_names: collect_to_string(display_names.into_iter().flatten()), + max_token_type: 0, + } + .modify_with(|it| { + it.max_token_type = max( + it.literal_names.len(), + max(it.symbolic_names.len(), it.display_names.len()), + ) as isize + - 1 + }) + } + + pub fn from_token_names(token_names: &[Option<&str>]) -> VocabularyImpl { + let token_names = collect_to_string(token_names.iter()); + let mut literal_names = token_names.clone(); + let mut symbolic_names = token_names.clone(); + + for (i, tn) in token_names.iter().enumerate() { + match tn { + Some(tn) if !tn.is_empty() && tn.chars().next().unwrap() == '\'' => { + symbolic_names[i] = None; + continue; + } + Some(tn) if !tn.is_empty() && tn.chars().next().unwrap().is_uppercase() => { + literal_names[i] = None; + continue; + } + None => { + continue; + } + _ => {} + } + literal_names[i] = None; + symbolic_names[i] = None; + } + + Self::new( + literal_names.iter(), + symbolic_names.iter(), + Some(token_names.iter()), + ) + } +} + +impl Vocabulary for VocabularyImpl { + fn get_max_token_type(&self) -> isize { + self.max_token_type + } + + fn get_literal_name(&self, token_type: isize) -> Option<&str> { + self.literal_names + .get(token_type as usize) + .and_then(|x| x.as_deref()) + } + + fn get_symbolic_name(&self, token_type: isize) -> Option<&str> { + if token_type == TOKEN_EOF { + return Some("EOF"); + } + self.symbolic_names + .get(token_type as usize) + .and_then(|x| x.as_deref()) + } + + fn get_display_name(&self, token_type: isize) -> Cow<'_, str> { + self.display_names + .get(token_type as usize) + .and_then(|x| x.as_deref()) + .or_else(|| self.get_literal_name(token_type)) + .or_else(|| self.get_symbolic_name(token_type)) + .map(|x| Borrowed(x)) + .unwrap_or(Owned(token_type.to_string())) + } +} + +pub(crate) static DUMMY_VOCAB: DummyVocab = DummyVocab; + +#[derive(Debug)] +pub(crate) struct DummyVocab; + +impl Vocabulary for DummyVocab { + fn get_max_token_type(&self) -> isize { + unimplemented!() + } + + fn get_literal_name(&self, _token_type: isize) -> Option<&str> { + unimplemented!() + } + + fn get_symbolic_name(&self, _token_type: isize) -> Option<&str> { + unimplemented!() + } + + fn get_display_name(&self, token_type: isize) -> Cow<'_, str> { + token_type.to_string().into() + } +} diff --git a/runtime/Rust/templates/BaseRustTest.java b/runtime/Rust/templates/BaseRustTest.java new file mode 100644 index 0000000000..4c0422becf --- /dev/null +++ b/runtime/Rust/templates/BaseRustTest.java @@ -0,0 +1,924 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.Tool; +import org.antlr.v4.analysis.AnalysisPipeline; +import org.antlr.v4.automata.ATNFactory; +import org.antlr.v4.automata.ATNPrinter; +import org.antlr.v4.automata.LexerATNFactory; +import org.antlr.v4.automata.ParserATNFactory; +import org.antlr.v4.codegen.CodeGenerator; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.atn.*; +import org.antlr.v4.runtime.dfa.DFA; +import org.antlr.v4.runtime.misc.IntegerList; +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.semantics.SemanticPipeline; +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.ErrorQueue; +import org.antlr.v4.test.runtime.RuntimeTestSupport; +import org.antlr.v4.test.runtime.StreamVacuum; +import org.antlr.v4.tool.*; +import org.stringtemplate.v4.ST; +import org.stringtemplate.v4.STGroup; +import org.stringtemplate.v4.STGroupString; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.net.URL; +import java.util.*; + +import static junit.framework.TestCase.*; +import static org.antlr.v4.test.runtime.BaseRuntimeTest.writeFile; +import static org.junit.Assert.assertArrayEquals; + +public class BaseRustTest implements RuntimeTestSupport { + public static final String newline = System.getProperty("line.separator"); + public static final String pathSep = System.getProperty("path.separator"); + + /** + * When the {@code antlr.preserve-test-dir} runtime property is set to + * {@code true}, the temporary directories created by the test run will not + * be removed at the end of the test run, even for tests that completed + * successfully. + *

+ *

+ * The default behavior (used in all other cases) is removing the temporary + * directories for all tests which completed successfully, and preserving + * the directories for tests which failed.

+ */ + public static final boolean PRESERVE_TEST_DIR = Boolean.parseBoolean(System.getProperty("antlr.preserve-test-dir")); + + /** + * The base test directory is the directory where generated files get placed + * during unit test execution. + *

+ *

+ * The default value for this property is the {@code java.io.tmpdir} system + * property, and can be overridden by setting the + * {@code antlr.java-test-dir} property to a custom location. Note that the + * {@code antlr.java-test-dir} property directly affects the + * {@link #CREATE_PER_TEST_DIRECTORIES} value as well.

+ */ + public static final String BASE_TEST_DIR; + + /** + * When {@code true}, a temporary directory will be created for each test + * executed during the test run. + *

+ *

+ * This value is {@code true} when the {@code antlr.java-test-dir} system + * property is set, and otherwise {@code false}.

+ */ + public static final boolean CREATE_PER_TEST_DIRECTORIES; + + public String cargo_options = ""; + + static { + String baseTestDir = System.getProperty("antlr.java-test-dir"); + boolean perTestDirectories = false; + if (baseTestDir == null || baseTestDir.isEmpty()) { + baseTestDir = System.getProperty("java.io.tmpdir"); + perTestDirectories = true; + } + + if (!new File(baseTestDir).isDirectory()) { + throw new UnsupportedOperationException("The specified base test directory does not exist: " + baseTestDir); + } + + BASE_TEST_DIR = baseTestDir; + CREATE_PER_TEST_DIRECTORIES = perTestDirectories; + } + + public String tmpdir = null; + public String outputdir = null; + private String srcdir; + + /** + * If error during parser execution, store stderr here; can't return + * stdout and stderr. This doesn't trap errors from running antlr. + */ + protected String stderrDuringParse; + + /** + * Errors found while running antlr + */ + protected StringBuilder antlrToolErrors; + + @Override + public void testSetUp() throws Exception { +// STGroup.verbose = true; + if (CREATE_PER_TEST_DIRECTORIES) { + // new output dir for each test + String threadName = Thread.currentThread().getName(); + String testDirectory = getClass().getSimpleName() + "-" + threadName + "-" + System.nanoTime(); + tmpdir = new File(BASE_TEST_DIR, testDirectory).getAbsolutePath(); + + } else { + tmpdir = new File(BASE_TEST_DIR).getAbsolutePath(); + if (!PRESERVE_TEST_DIR && new File(tmpdir).exists()) { + eraseFiles(); + } + } + // single output dir for all test which allows to not rebuild runtime every time + outputdir = new File(BASE_TEST_DIR, "output").getAbsolutePath(); + srcdir = new File(tmpdir, "src").getAbsolutePath(); + +// System.out.println(tmpdir); + antlrToolErrors = new StringBuilder(); + } + + @Override + public void testTearDown() throws Exception { + } + + @Override + public String getTmpDir() { + return srcdir; + } + + @Override + public String getStdout() { + return null; + } + + @Override + public String getParseErrors() { + return stderrDuringParse; + } + + @Override + public String getANTLRToolErrors() { + if (antlrToolErrors.length() == 0) { + return null; + } + return antlrToolErrors.toString(); + } + + protected Tool newTool(String[] args) { + Tool tool = new Tool(args); + return tool; + } + + protected ATN createATN(Grammar g, boolean useSerializer) { + if (g.atn == null) { + semanticProcess(g); + assertEquals(0, g.tool.getNumErrors()); + + ParserATNFactory f; + if (g.isLexer()) { + f = new LexerATNFactory((LexerGrammar) g); + } else { + f = new ParserATNFactory(g); + } + + g.atn = f.createATN(); + assertEquals(0, g.tool.getNumErrors()); + } + + ATN atn = g.atn; + if (useSerializer) { + char[] serialized = ATNSerializer.getSerializedAsChars(atn); + return new ATNDeserializer().deserialize(serialized); + } + + return atn; + } + + protected void semanticProcess(Grammar g) { + if (g.ast != null && !g.ast.hasErrors) { +// System.out.println(g.ast.toStringTree()); + Tool antlr = new Tool(); + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); + if (g.getImportedGrammars() != null) { // process imported grammars (if any) + for (Grammar imp : g.getImportedGrammars()) { + antlr.processNonCombinedGrammar(imp, false); + } + } + } + } + + public DFA createDFA(Grammar g, DecisionState s) { +// PredictionDFAFactory conv = new PredictionDFAFactory(g, s); +// DFA dfa = conv.createDFA(); +// conv.issueAmbiguityWarnings(); +// System.out.print("DFA="+dfa); +// return dfa; + return null; + } + +// public void minimizeDFA(DFA dfa) { +// DFAMinimizer dmin = new DFAMinimizer(dfa); +// dfa.minimized = dmin.minimize(); +// } + + IntegerList getTypesFromString(Grammar g, String expecting) { + IntegerList expectingTokenTypes = new IntegerList(); + if (expecting != null && !expecting.trim().isEmpty()) { + for (String tname : expecting.replace(" ", "").split(",")) { + int ttype = g.getTokenType(tname); + expectingTokenTypes.add(ttype); + } + } + return expectingTokenTypes; + } + + public IntegerList getTokenTypesViaATN(String input, LexerATNSimulator lexerATN) { + ANTLRInputStream in = new ANTLRInputStream(input); + IntegerList tokenTypes = new IntegerList(); + int ttype; + do { + ttype = lexerATN.match(in, Lexer.DEFAULT_MODE); + tokenTypes.add(ttype); + } while (ttype != Token.EOF); + return tokenTypes; + } + + public List getTokenTypes(LexerGrammar lg, + ATN atn, + CharStream input) { + LexerATNSimulator interp = new LexerATNSimulator(atn, new DFA[]{new DFA(atn.modeToStartState.get(Lexer.DEFAULT_MODE))}, null); + List tokenTypes = new ArrayList(); + int ttype; + boolean hitEOF = false; + do { + if (hitEOF) { + tokenTypes.add("EOF"); + break; + } + int t = input.LA(1); + ttype = interp.match(input, Lexer.DEFAULT_MODE); + if (ttype == Token.EOF) { + tokenTypes.add("EOF"); + } else { + tokenTypes.add(lg.typeToTokenList.get(ttype)); + } + + if (t == IntStream.EOF) { + hitEOF = true; + } + } while (ttype != Token.EOF); + return tokenTypes; + } + + List checkRuleDFA(String gtext, String ruleName, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(gtext, equeue); + ATN atn = createATN(g, false); + ATNState s = atn.ruleToStartState[g.getRule(ruleName).index]; + if (s == null) { + System.err.println("no such rule: " + ruleName); + return null; + } + ATNState t = s.transition(0).target; + if (!(t instanceof DecisionState)) { + System.out.println(ruleName + " has no decision"); + return null; + } + DecisionState blk = (DecisionState) t; + checkRuleDFA(g, blk, expecting); + return equeue.all; + } + + List checkRuleDFA(String gtext, int decision, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(gtext, equeue); + ATN atn = createATN(g, false); + DecisionState blk = atn.decisionToState.get(decision); + checkRuleDFA(g, blk, expecting); + return equeue.all; + } + + void checkRuleDFA(Grammar g, DecisionState blk, String expecting) + throws Exception { + DFA dfa = createDFA(g, blk); + String result = null; + if (dfa != null) result = dfa.toString(); + assertEquals(expecting, result); + } + + List checkLexerDFA(String gtext, String expecting) + throws Exception { + return checkLexerDFA(gtext, LexerGrammar.DEFAULT_MODE_NAME, expecting); + } + + List checkLexerDFA(String gtext, String modeName, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + LexerGrammar g = new LexerGrammar(gtext, equeue); + g.atn = createATN(g, false); +// LexerATNToDFAConverter conv = new LexerATNToDFAConverter(g); +// DFA dfa = conv.createDFA(modeName); +// g.setLookaheadDFA(0, dfa); // only one decision to worry about +// +// String result = null; +// if ( dfa!=null ) result = dfa.toString(); +// assertEquals(expecting, result); +// +// return equeue.all; + return null; + } + + protected String execLexer(String grammarFileName, String grammarStr, + String lexerName, String input) { + return execLexer(grammarFileName, grammarStr, lexerName, input, false); + } + + @Override + public String execLexer(String grammarFileName, String grammarStr, + String lexerName, String input, boolean showDFA) { + boolean success = rawGenerateAndBuildRecognizer(grammarFileName, + grammarStr, null, lexerName, "-no-listener"); + assertTrue(success); + writeFile(tmpdir, "input", input); + writeLexerTestFile(lexerName, showDFA); + return executeRecognizer(); + } + + @Override + public String execParser(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String listenerName, + String visitorName, + String startRuleName, + String input, + boolean showDiagnosticErrors) { + return execParser(grammarFileName, grammarStr, parserName, lexerName, + listenerName, visitorName, startRuleName, input, showDiagnosticErrors, false); + } + + /** + * ANTLR isn't thread-safe to process grammars so we use a global lock for testing + */ + public static final Object antlrLock = new Object(); + + public String execParser(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String listenerName, + String visitorName, + String startRuleName, + String input, + boolean showDiagnosticErrors, + boolean profile) { + boolean success = rawGenerateAndBuildRecognizer(grammarFileName, + grammarStr, + parserName, + lexerName, + "-visitor"); + assertTrue(success); + writeFile(tmpdir, "input", input); + rawBuildRecognizerTestFile(parserName, lexerName, listenerName, + visitorName, startRuleName, showDiagnosticErrors); + return executeRecognizer(); + } + + protected void rawBuildRecognizerTestFile(String parserName, + String lexerName, String listenerName, String visitorName, + String parserStartRuleName, boolean debug) { + this.stderrDuringParse = null; + if (parserName == null) { + writeLexerTestFile(lexerName, false); + } else { + writeParserTestFile(parserName, lexerName, listenerName, + visitorName, parserStartRuleName, debug); + } + } + + /** + * Return true if all is well + */ + protected boolean rawGenerateAndBuildRecognizer(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String... extraOptions) { + return rawGenerateAndBuildRecognizer(grammarFileName, grammarStr, parserName, lexerName, false, extraOptions); + } + + /** + * Return true if all is well + */ + protected boolean rawGenerateAndBuildRecognizer(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + boolean defaultListener, + String... extraOptions) { + ErrorQueue equeue = + BaseRuntimeTest.antlrOnString(getTmpDir(), "Rust", grammarFileName, grammarStr, defaultListener, extraOptions); + if (!equeue.errors.isEmpty()) { + System.out.println(equeue.errors); + return false; + } + return true; + } + + private String executeRecognizer() { +// System.out.println("dir: " + tmpdir); + writeFile(tmpdir, "Cargo.toml", + "[package]\n" + + "name = \"antlr-test\"\n" + + "version=\"0.1.0\"\n" + + "edition=\"2018\"\n" + + "\n" + + "[dependencies]\n" + + "antlr-rust = { path = \"" + locateRuntimeSrc() + "\" }\n" + + "[profile.release]\n" + + "opt-level=1\n"); + + cargo("build"); + this.stderrDuringParse = null; + + return cargo("run"); + } + + private String locateRuntimeSrc() { + ClassLoader loader = Thread.currentThread().getContextClassLoader(); + URL rustRuntime = loader.getResource("Rust"); + if (rustRuntime == null) { + throw new RuntimeException("Rust runtime file not found at:" + rustRuntime.getPath()); + } + File runtimeDir = new File(rustRuntime.getPath()); + if (!runtimeDir.exists()) { + throw new RuntimeException("Cannot find Rust ANTLR runtime"); + } + + return runtimeDir.getAbsolutePath(); + } + + private String cargo(String command) { + try { + ProcessBuilder builder = new ProcessBuilder("cargo", command, "--quiet"/*, "--offline"*/, cargo_options); + builder.environment().put("CARGO_TARGET_DIR", outputdir); + builder.environment().put("RUST_BACKTRACE", "1"); + builder.environment().put("RUSTFLAGS", "-Awarnings"); + builder.directory(new File(tmpdir)); + long time = System.currentTimeMillis(); + Process process = builder.start(); + StreamVacuum stdoutVacuum = new StreamVacuum(process.getInputStream()); + StreamVacuum stderrVacuum = new StreamVacuum(process.getErrorStream()); + stdoutVacuum.start(); + stderrVacuum.start(); + int rc = process.waitFor(); +// System.out.println("cargo " + command + ", exec time: " + (System.currentTimeMillis() - time)); + stdoutVacuum.join(); + stderrVacuum.join(); + String output = stdoutVacuum.toString(); + if (output.length() == 0) { + output = null; + } + if (stderrVacuum.toString().length() > 0) { + stderrDuringParse = stderrVacuum.toString(); + } + return output; + } catch (Exception e) { + System.err.println("can't exec recognizer"); + e.printStackTrace(System.err); + e.printStackTrace(System.out); + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + e.printStackTrace(pw); + return sw.toString(); + } +// return null; + } + + List getMessagesOfType(List msgs, Class c) { + List filtered = new ArrayList(); + for (ANTLRMessage m : msgs) { + if (m.getClass() == c) filtered.add(m); + } + return filtered; + } + + public void checkRuleATN(Grammar g, String ruleName, String expecting) { +// DOTGenerator dot = new DOTGenerator(g); +// System.out.println(dot.getDOT(g.atn.ruleToStartState[g.getRule(ruleName).index])); + + Rule r = g.getRule(ruleName); + ATNState startState = g.getATN().ruleToStartState[r.index]; + ATNPrinter serializer = new ATNPrinter(g, startState); + String result = serializer.asString(); + + //System.out.print(result); + assertEquals(expecting, result); + } + + public void testActions(String templates, String actionName, String action, String expected) throws org.antlr.runtime.RecognitionException { + int lp = templates.indexOf('('); + String name = templates.substring(0, lp); + STGroup group = new STGroupString(templates); + ST st = group.getInstanceOf(name); + st.add(actionName, action); + String grammar = st.render(); + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(grammar, equeue); + if (g.ast != null && !g.ast.hasErrors) { + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); + + ATNFactory factory = new ParserATNFactory(g); + if (g.isLexer()) factory = new LexerATNFactory((LexerGrammar) g); + g.atn = factory.createATN(); + + AnalysisPipeline anal = new AnalysisPipeline(g); + anal.process(); + + CodeGenerator gen = new CodeGenerator(g); + ST outputFileST = gen.generateParser(false); + String output = outputFileST.render(); + //System.out.println(output); + String b = "#" + actionName + "#"; + int start = output.indexOf(b); + String e = "#end-" + actionName + "#"; + int end = output.indexOf(e); + String snippet = output.substring(start + b.length(), end); + assertEquals(expected, snippet); + } + if (equeue.size() > 0) { +// System.err.println(equeue.toString()); + } + } + + protected void checkGrammarSemanticsError(ErrorQueue equeue, + GrammarSemanticsMessage expectedMessage) + throws Exception { + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.errors.size(); i++) { + ANTLRMessage m = equeue.errors.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertNotNull("no error; " + expectedMessage.getErrorType() + " expected", foundMsg); + assertTrue("error is not a GrammarSemanticsMessage", + foundMsg instanceof GrammarSemanticsMessage); + assertEquals(Arrays.toString(expectedMessage.getArgs()), Arrays.toString(foundMsg.getArgs())); + if (equeue.size() != 1) { + System.err.println(equeue); + } + } + + protected void checkGrammarSemanticsWarning(ErrorQueue equeue, + GrammarSemanticsMessage expectedMessage) + throws Exception { + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.warnings.size(); i++) { + ANTLRMessage m = equeue.warnings.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertNotNull("no error; " + expectedMessage.getErrorType() + " expected", foundMsg); + assertTrue("error is not a GrammarSemanticsMessage", + foundMsg instanceof GrammarSemanticsMessage); + assertEquals(Arrays.toString(expectedMessage.getArgs()), Arrays.toString(foundMsg.getArgs())); + if (equeue.size() != 1) { + System.err.println(equeue); + } + } + + protected void checkError(ErrorQueue equeue, + ANTLRMessage expectedMessage) + throws Exception { + //System.out.println("errors="+equeue); + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.errors.size(); i++) { + ANTLRMessage m = equeue.errors.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertTrue("no error; " + expectedMessage.getErrorType() + " expected", !equeue.errors.isEmpty()); + assertTrue("too many errors; " + equeue.errors, equeue.errors.size() <= 1); + assertNotNull("couldn't find expected error: " + expectedMessage.getErrorType(), foundMsg); + /* + assertTrue("error is not a GrammarSemanticsMessage", + foundMsg instanceof GrammarSemanticsMessage); + */ + assertArrayEquals(expectedMessage.getArgs(), foundMsg.getArgs()); + } + + public static class FilteringTokenStream extends CommonTokenStream { + public FilteringTokenStream(TokenSource src) { + super(src); + } + + Set hide = new HashSet(); + + @Override + protected boolean sync(int i) { + if (!super.sync(i)) { + return false; + } + + Token t = get(i); + if (hide.contains(t.getType())) { + ((WritableToken) t).setChannel(Token.HIDDEN_CHANNEL); + } + + return true; + } + + public void setTokenTypeChannel(int ttype, int channel) { + hide.add(ttype); + } + } + + protected void writeParserTestFile(String parserName, String lexerName, + String listenerName, String visitorName, + String parserStartRuleName, boolean debug) { + ST outputFileST = new ST( +// "#![feature(try_blocks)]\n" + +// "#![feature(inner_deref)]\n" + +// "#![feature(specialization)]\n" + + "mod ;\n" + + "use ::*;\n" + + "mod ;\n" + + "use ::*;\n" + + "mod ;\n" + + "mod ;\n" + + "use antlr_rust::InputStream;\n" + + "use antlr_rust::token::OwningToken;\n" + + "use antlr_rust::token_stream::{UnbufferedTokenStream, TokenStream};\n" + + "use antlr_rust::common_token_stream::CommonTokenStream;\n" + + "use antlr_rust::parser::Parser;\n" + + "use antlr_rust::error_listener::DiagnosticErrorListener;\n" + + "\n" + + "fn main() -> std::io::Result\\<()>{\n" + + " let input = std::fs::read_to_string(std::env::current_dir()?.join(\"input\"))?;\n" + + " let input = input.chars().map(|x|x as u32).collect::\\ >();\n" + + " let mut lexer = ::new(InputStream::new(&*input));\n" + + " let mut token_source = CommonTokenStream::new(lexer);\n" + + "" + + " let result = parser.().unwrap();\n" + + " \n" + + " Ok(())" + + "}\n" + ); + ST createParserST = new ST(" let mut parser = ::with_dyn_strategy(token_source);\n"); + if (debug) { + createParserST = + new ST( + " let mut parser = ::with_dyn_strategy(token_source);\n" + + " parser.add_error_listener(Box::new(DiagnosticErrorListener::new(true)));\n"); + } + outputFileST.add("createParser", createParserST); + outputFileST.add("parserName", parserName); + outputFileST.add("importParser", parserName.toLowerCase()); + outputFileST.add("lexerName", lexerName); + outputFileST.add("importLexer", lexerName.toLowerCase()); + outputFileST.add("importListener", listenerName.toLowerCase()); + outputFileST.add("importVisitor", visitorName.toLowerCase()); + outputFileST.add("parserStartRuleName", parserStartRuleName); + writeFile(srcdir, "main.rs", outputFileST.render()); + } + + + protected void writeLexerTestFile(String lexerName, boolean showDFA) { + ST outputFileST = new ST("use antlr_rust::*;\n" + + "mod ;\n" + + "use ::*;\n" + + "use antlr_rust::InputStream;\n" + + "use antlr_rust::lexer::Lexer;\n" + + "use antlr_rust::token::OwningToken;\n" + + "use antlr_rust::token_stream::{UnbufferedTokenStream, TokenStream};\n" + + "\n" + + "fn main() -> std::io::Result\\<()>{\n" + + " let input = std::fs::read_to_string(std::env::current_dir()?.join(\"input\"))?;\n" + + " let input = input.chars().map(|x|x as u32).collect::\\ >();\n" + + " let mut _lexer = ::new(InputStream::new(&*input));\n" + + " {let mut token_source = UnbufferedTokenStream::new_unbuffered(&mut _lexer);\n" + + " let tokens = token_source.token_iter().collect::\\>();\n" + + " for token in tokens.iter(){\n" + + " println!(\"{}\",token);\n" + + " }\n" + + " }\n" + + (showDFA ? + "print!(\"{}\",_lexer.get_interpreter().unwrap()" + + ".get_dfa_for_mode(antlr_rust::lexer::LEXER_DEFAULT_MODE)" + + ".borrow().to_lexer_string());\n" + : "") + + " Ok(())" + + "}\n" + ); + + outputFileST.add("lexerName", lexerName); + if (lexerName.endsWith("Lexer")) { + outputFileST.add("importName", lexerName.toLowerCase()); + } else { + outputFileST.add("importName", lexerName.toLowerCase() + "lexer"); + } + writeFile(srcdir, "main.rs", outputFileST.render()); + } + + protected void eraseFiles(final String filesEndingWith) { + File tmpdirF = new File(tmpdir); + String[] files = tmpdirF.list(); + for (int i = 0; files != null && i < files.length; i++) { + if (files[i].endsWith(filesEndingWith)) { + new File(tmpdir + "/" + files[i]).delete(); + } + } + } + + protected void eraseFiles() { + if (tmpdir == null) { + return; + } + + File tmpdirF = new File(tmpdir); + String[] files = tmpdirF.list(); + for (int i = 0; files != null && i < files.length; i++) { + new File(tmpdir + "/" + files[i]).delete(); + } + } + + public void eraseTempDir() { + File tmpdirF = new File(tmpdir); + if (tmpdirF.exists()) { + eraseFiles(); + tmpdirF.delete(); + } + } + + public String getFirstLineOfException() { + if (this.stderrDuringParse == null) { + return null; + } + String[] lines = this.stderrDuringParse.split("\n"); + String prefix = "Exception in thread \"main\" "; + return lines[0].substring(prefix.length(), lines[0].length()); + } + + /** + * When looking at a result set that consists of a Map/HashTable + * we cannot rely on the output order, as the hashing algorithm or other aspects + * of the implementation may be different on differnt JDKs or platforms. Hence + * we take the Map, convert the keys to a List, sort them and Stringify the Map, which is a + * bit of a hack, but guarantees that we get the same order on all systems. We assume that + * the keys are strings. + * + * @param m The Map that contains keys we wish to return in sorted order + * @return A string that represents all the keys in sorted order. + */ + public String sortMapToString(Map m) { + // Pass in crap, and get nothing back + // + if (m == null) { + return null; + } + + System.out.println("Map toString looks like: " + m.toString()); + + // Sort the keys in the Map + // + TreeMap nset = new TreeMap(m); + + System.out.println("Tree map looks like: " + nset.toString()); + return nset.toString(); + } + + public List realElements(List elements) { + return elements.subList(Token.MIN_USER_TOKEN_TYPE, elements.size()); + } + + public void assertNotNullOrEmpty(String message, String text) { + assertNotNull(message, text); + assertFalse(message, text.isEmpty()); + } + + public void assertNotNullOrEmpty(String text) { + assertNotNull(text); + assertFalse(text.isEmpty()); + } + + public static class IntTokenStream implements TokenStream { + public IntegerList types; + int p = 0; + + public IntTokenStream(IntegerList types) { + this.types = types; + } + + @Override + public void consume() { + p++; + } + + @Override + public int LA(int i) { + return LT(i).getType(); + } + + @Override + public int mark() { + return index(); + } + + @Override + public int index() { + return p; + } + + @Override + public void release(int marker) { + seek(marker); + } + + @Override + public void seek(int index) { + p = index; + } + + @Override + public int size() { + return types.size(); + } + + @Override + public String getSourceName() { + return UNKNOWN_SOURCE_NAME; + } + + @Override + public Token LT(int i) { + CommonToken t; + int rawIndex = p + i - 1; + if (rawIndex >= types.size()) t = new CommonToken(Token.EOF); + else t = new CommonToken(types.get(rawIndex)); + t.setTokenIndex(rawIndex); + return t; + } + + @Override + public Token get(int i) { + return new CommonToken(types.get(i)); + } + + @Override + public TokenSource getTokenSource() { + return null; + } + + + @Override + public String getText() { + throw new UnsupportedOperationException("can't give strings"); + } + + + @Override + public String getText(Interval interval) { + throw new UnsupportedOperationException("can't give strings"); + } + + + @Override + public String getText(RuleContext ctx) { + throw new UnsupportedOperationException("can't give strings"); + } + + + @Override + public String getText(Token start, Token stop) { + throw new UnsupportedOperationException("can't give strings"); + } + } + + /** + * Sort a list + */ + public > List sort(List data) { + List dup = new ArrayList(); + dup.addAll(data); + Collections.sort(dup); + return dup; + } + + /** + * Return map sorted by key + */ + public , V> LinkedHashMap sort(Map data) { + LinkedHashMap dup = new LinkedHashMap(); + List keys = new ArrayList(); + keys.addAll(data.keySet()); + Collections.sort(keys); + for (K k : keys) { + dup.put(k, data.get(k)); + } + return dup; + } +} diff --git a/runtime/Rust/templates/Rust.stg b/runtime/Rust/templates/Rust.stg new file mode 100644 index 0000000000..63e02f9031 --- /dev/null +++ b/runtime/Rust/templates/Rust.stg @@ -0,0 +1,1473 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012-2016 Terence Parr + * Copyright (c) 2012-2016 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +rutsTypeInitMap ::= [ + "int":"0", + "long":"0", + "float":"0.0f", + "double":"0.0", + "boolean":"false", + "byte":"0", + "short":"0", + "char":"0", + "String":<>, + default:"null" // anything other than a primitive type is an object +] + +// args must be , + +ParserFile(file, parser, namedActions, contextSuperClass) ::= << + +#![allow(dead_code)] +#![allow(non_snake_case)] +#![allow(non_upper_case_globals)] +#![allow(nonstandard_style)] +#![allow(unused_imports)] +#![allow(unused_mut)] +#![allow(unused_braces)] + +use antlr_rust::PredictionContextCache; +use antlr_rust::error_listener::ErrorListener; +use antlr_rust::parser::{Parser, BaseParser, ParserRecog, ParserNodeType}; +use antlr_rust::token_stream::TokenStream; +use antlr_rust::TokenSource; +use antlr_rust::parser_atn_simulator::ParserATNSimulator; +use antlr_rust::errors::*; +use antlr_rust::rule_context::{BaseRuleContext, CustomRuleContext, RuleContext}; +use antlr_rust::recognizer::{Recognizer,Actions}; +use antlr_rust::atn_deserializer::ATNDeserializer; +use antlr_rust::dfa::DFA; +use antlr_rust::atn::{ATN, INVALID_ALT}; +use antlr_rust::error_strategy::{ErrorStrategy, DefaultErrorStrategy}; +use antlr_rust::parser_rule_context::{BaseParserRuleContext, ParserRuleContext,cast,cast_mut}; +use antlr_rust::tree::*; +use antlr_rust::token::{TOKEN_EOF,OwningToken,Token}; +use antlr_rust::int_stream::EOF; +use antlr_rust::vocabulary::{Vocabulary,VocabularyImpl}; +use antlr_rust::token_factory::{CommonTokenFactory,TokenFactory, TokenAware}; + +use super::listener::*; + + +use super::visitor::*; + + +use antlr_rust::{TidAble,TidExt}; + +use std::marker::PhantomData; +use std::rc::Rc; +use std::convert::TryFrom; +use std::cell::RefCell; +use std::ops::{DerefMut, Deref}; +use std::borrow::{Borrow,BorrowMut}; +use std::any::{Any,TypeId}; + + +>> + +ListenerFile(file, header, namedActions) ::= << +#![allow(nonstandard_style)] + +
+use antlr_rust::tree::ParseTreeListener; +use super::::*; + +pub trait Listener\<'input> : ParseTreeListener\<'input,ContextType>{ + + * Enter a parse tree produced by the {@code \} + * labeled alternative in {@link #\}. + + * Enter a parse tree produced by {@link #\}. + + * @param ctx the parse tree + */ +fn enter_(&mut self, _ctx: &Context\<'input>) { \} +/** + + * Exit a parse tree produced by the {@code \} + * labeled alternative in {@link #\}. + + * Exit a parse tree produced by {@link #\}. + + * @param ctx the parse tree + */ +fn exit_(&mut self, _ctx: &Context\<'input>) { \}}; separator="\n"> + +} + +antlr_rust::coerce_from!{ 'input : Listener\<'input> } + + + +>> + +BaseListenerFile(file, header, namedActions) ::= <<>> + +VisitorFile(file, header, namedActions) ::= << +#![allow(nonstandard_style)] + +
+use antlr_rust::tree::{ParseTreeVisitor,ParseTreeVisitorCompat}; +use super::::*; + +/** + * This interface defines a complete generic visitor for a parse tree produced + * by {@link }. + */ +pub trait Visitor\<'input>: ParseTreeVisitor\<'input,ContextType>{ + + * Visit a parse tree produced by the {@code \} + * labeled alternative in {@link #\}. + + * Visit a parse tree produced by {@link #\}. + + * @param ctx the parse tree + */ +fn visit_(&mut self, ctx: &Context\<'input>) { self.visit_children(ctx) \} +}; separator="\n"> +} + +pub trait VisitorCompat\<'input>:ParseTreeVisitorCompat\<'input, Node= ContextType>{ + + * Visit a parse tree produced by the {@code \} + * labeled alternative in {@link #\}. + + * Visit a parse tree produced by {@link #\}. + + * @param ctx the parse tree + */ + fn visit_(&mut self, ctx: &Context\<'input>) -> Self::Return { + self.visit_children(ctx) + \} +}; separator="\n"> +} + +impl\<'input,T> Visitor\<'input> for T +where + T: VisitorCompat\<'input> +{ +(&mut self, ctx: &Context\<'input>){ + let result = \VisitorCompat>::visit_(self, ctx); + *\::temp_result(self) = result; + \} +}; separator="\n"> +} +>> + +// no need for base visitor +BaseVisitorFile(file, header, namedActions) ::= <<>> + +fileHeader(grammarFileName, ANTLRVersion) ::= << +// Generated from by ANTLR +>> + +Parser(parser, funcs, atn, sempredFuncs, superClass) ::= << + +>> + +Parser_(parser, funcs, atn, sempredFuncs, ctor, superClass) ::= << + + :isize=;}; separator=" \n"> + + :usize = ;}; separator=" \n"> + pub const ruleNames: [&'static str; ] = [ + "}; separator=", ", wrap, anchor> + ]; + + + + + +type BaseParserType\<'input, I> = + BaseParser\<'input,Ext\<'input>, I, ContextType , dyn Listener\<'input> + 'input >; + +type TokenType\<'input> = \< as TokenFactory\<'input>\>::Tok; + + + +pub type LocalTokenFactory\<'input> = CommonTokenFactory; + + +pub type TreeWalker\<'input,'a> = + ParseTreeWalker\<'input, 'a, ContextType , dyn Listener\<'input> + 'a\>; + +/// Parser for grammar +pub struct \<'input,I,H> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, + H: ErrorStrategy\<'input,BaseParserType\<'input,I>\> +{ + base:BaseParserType\<'input,I>, + interpreter:Rc\, + _shared_context_cache: Box\, + pub err_handler: H, +} + +impl\<'input, I, H> \<'input, I, H> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, + H: ErrorStrategy\<'input,BaseParserType\<'input,I>\> +{ + pub fn get_serialized_atn() -> &'static str { _serializedATN } + + pub fn set_error_strategy(&mut self, strategy: H) { + self.err_handler = strategy + } + + pub fn with_strategy(input: I, strategy: H) -> Self { + antlr_rust::recognizer::check_version("0","3"); + let interpreter = Rc::new(ParserATNSimulator::new( + _ATN.with(|atn| atn.clone()), + _decision_to_DFA.with(|decision| decision.clone()), + _shared_context_cache.with(|ctx| ctx.clone()), + )); + Self { + base: BaseParser::new_base_parser( + input, + Rc::clone(&interpreter), + Ext{ + _pd: Default::default(), + + } + ), + interpreter, + _shared_context_cache: Box::new(PredictionContextCache::new()), + err_handler: strategy, + } + } + + pub fn add_error_listener(&mut self, listener: Box\<(dyn ErrorListener\<'input, BaseParser\<'input, Ext\<'input>, I, ContextType, (dyn Listener\<'input> + 'input)>\> + 'static)>) { + self.base.add_error_listener(listener) + } + + pub fn remove_error_listeners(&mut self) { + self.base.remove_error_listeners() + } +} + +type DynStrategy\<'input,I> = Box\\> + 'input>; + +impl\<'input, I> \<'input, I, DynStrategy\<'input,I>\> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, +{ + pub fn with_dyn_strategy(input: I) -> Self{ + Self::with_strategy(input,Box::new(DefaultErrorStrategy::new())) + } +} + +impl\<'input, I> \<'input, I, DefaultErrorStrategy\<'input,ContextType>\> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, +{ + pub fn new(input: I) -> Self{ + Self::with_strategy(input,DefaultErrorStrategy::new()) + } +} + +/// Trait for monomorphized trait object that corresponds to the nodes of parse tree generated for +pub trait Context\<'input>: + for\<'x> Listenable\Listener\<'input> + 'x > + + for\<'x> Visitable\Visitor\<'input> + 'x > + + ParserRuleContext\<'input, TF=, Ctx=ContextType> +{} + +antlr_rust::coerce_from!{ 'input : Context\<'input> } + + +impl\<'input, 'x, T> VisitableDyn\ for dyn Context\<'input> + 'input +where + T: Visitor\<'input> + 'x, +{ + fn accept_dyn(&self, visitor: &mut T) { + self.accept(visitor as &mut (dyn Visitor\<'input> + 'x)) + } +} + + +impl\<'input> Context\<'input> for TerminalNode\<'input,ContextType> {} +impl\<'input> Context\<'input> for ErrorNode\<'input,ContextType> {} + +antlr_rust::tid! { impl\<'input> TidAble\<'input> for dyn Context\<'input> + 'input } + +antlr_rust::tid! { impl\<'input> TidAble\<'input> for dyn Listener\<'input> + 'input } + +pub struct ContextType; +antlr_rust::tid!{ContextType} + +impl\<'input> ParserNodeType\<'input> for ContextType{ + type TF = ; + type Type = dyn Context\<'input> + 'input; + +} + +impl\<'input, I, H> Deref for \<'input, I, H> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, + H: ErrorStrategy\<'input,BaseParserType\<'input,I>\> +{ + type Target = BaseParserType\<'input,I>; + + fn deref(&self) -> &Self::Target { + &self.base + } +} + +impl\<'input, I, H> DerefMut for \<'input, I, H> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, + H: ErrorStrategy\<'input,BaseParserType\<'input,I>\> +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.base + } +} + +pub struct Ext\<'input>{ + _pd: PhantomData\<&'input str>, + +} + +impl\<'input> Ext\<'input>{ + +} +antlr_rust::tid! { Ext\<'a> } + +impl\<'input> TokenAware\<'input> for Ext\<'input>{ + type TF = ; +} + +impl\<'input,I: TokenStream\<'input, TF = > + TidAble\<'input>\> ParserRecog\<'input, BaseParserType\<'input,I>\> for Ext\<'input>{} + +impl\<'input,I: TokenStream\<'input, TF = > + TidAble\<'input>\> Actions\<'input, BaseParserType\<'input,I>\> for Ext\<'input>{ + fn get_grammar_file_name(&self) -> & str{ ""} + + fn get_rule_names(&self) -> &[& str] {&ruleNames} + + fn get_vocabulary(&self) -> &dyn Vocabulary { VOCABULARY.with(|v| unsafe { std::mem::transmute(&**v) }) } + + fn sempred(_localctx: Option\<&(dyn Context\<'input> + 'input)>, rule_index: isize, pred_index: isize, + recog:&mut BaseParserType\<'input,I> + )->bool{ + match rule_index { + => ::\<'input,I,_>::_sempred(_localctx.and_then(|x|x.downcast_ref()), pred_index, recog),}; separator="\n"> + _ => true + } + } +} + +impl\<'input, I> \<'input, I, DefaultErrorStrategy\<'input,ContextType>\> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, +{ + + +} + + + +>> + +vocabulary(literalNames, symbolicNames) ::= << + +pub const _LITERAL_NAMES: [Option\<&'static str>;] = [ + )}; null="None", separator=", ", wrap, anchor> +]; +pub const _SYMBOLIC_NAMES: [Option\<&'static str>;] = [ + )}; null="None", separator=", ", wrap, anchor> +]; +thread_local!{ + static _shared_context_cache: Rc\ = Rc::new(PredictionContextCache::new()); + static VOCABULARY: Box\ = Box::new(VocabularyImpl::new(_LITERAL_NAMES.iter(), _SYMBOLIC_NAMES.iter(), None)); +} +>> + +dumpActions(recog, argFuncs, actionFuncs, sempredFuncs) ::= << + + +fn action(_localctx: Option\<&EmptyContext\<'input,\> >, rule_index: isize, action_index: isize, + recog:&mut BaseLexer\<'input,Actions,Input,\> + ){ + match rule_index { + => + ::\<'input>::_action(None, action_index, recog), }; separator="\n"> + _ => {} + } +} + + +fn sempred(_localctx: Option\<&EmptyContext\<'input,\> >, rule_index: isize, pred_index: isize, + recog:&mut BaseLexer\<'input,Actions,Input,\> + ) -> bool { + match rule_index { + => + ::\<'input>::_sempred(None, pred_index, recog), }; separator="\n"> + _ => true + } +} + + +} + +impl\<'input, Input:CharStream\ >\> \<'input,Input>{ + + + + + + + + +>> + +parser_ctor(p) ::= << +>> + +RuleActionFunction(r, actions) ::= << + +fn _action(_localctx: Option\<&\<'input>\>, action_index: isize, + recog:&mut \::Target + ) { + match action_index { + =>{ + + \}, + }; separator="\n"> + _ => {} + } +} +>> + +RuleSempredFunction(r, actions) ::= << +fn _sempred(_localctx: Option\<&\<'input>\>, pred_index:isize, + recog:&mut \::Target + ) -> bool { + match pred_index { + =>{ + + \}}; separator="\n"> + _ => true + } +} +>> + +RuleTypeForAlt(currentRule,ruleCtx,altLabelCtxs) ::= << + +#[derive(Debug)] +pub enum All\<'input>{ + (\<'input>), + }>Error(\<'input>) +} +antlr_rust::tid!{All\<'a>} + +impl\<'input> antlr_rust::parser_rule_context::DerefSeal for All\<'input>{} + +impl\<'input> ParserContext\<'input> for All\<'input>{} + +impl\<'input> Deref for All\<'input>{ + type Target = dyn Attrs\<'input> + 'input; + fn deref(&self) -> &Self::Target{ + use All::*; + match self{ + (inner) => inner, + }>Error(inner) => inner + } + } +} + +impl\<'input,'a> Visitable\Visitor\<'input> + 'a> for All\<'input>{ + fn accept(&self, visitor: &mut (dyn Visitor\<'input> + 'a)) { self.deref().accept(visitor) } +} + + +impl\<'input,'a> Listenable\Listener\<'input> + 'a> for All\<'input>{ + fn enter(&self, listener: &mut (dyn Listener\<'input> + 'a)) { self.deref().enter(listener) } + fn exit(&self, listener: &mut (dyn Listener\<'input> + 'a)) { self.deref().exit(listener) } +} + + + +pub type All\<'input> = \<'input>; + + +>> + +RuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAction,postamble,exceptions) ::= << +//------------------- ---------------- + + + +}; separator="\n"> + +impl\<'input, I, H> \<'input, I, H> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, + H: ErrorStrategy\<'input,BaseParserType\<'input,I>\> +{ + }>pub fn (&mut self,) + -> Result\All\<'input>\>,ANTLRError> { + let mut recog = self; + + let _parentctx = .ctx.take(); + let mut _localctx = Ext::new(_parentctx.clone(), .base.get_state()}>); + .base.enter_rule(_localctx.clone(), , RULE_); + let mut _localctx: Rc\<All> = _localctx; + + + let result: Result\<(), ANTLRError> = (|| { + + + let mut _alt: isize; + + + + + Ok(()) + })(); + match result { + Ok(_)=>{}, + + + + Err(e @ ANTLRError::FallThrough(_)) => return Err(e), + Err(ref re) => { + //_localctx.exception = re; + .err_handler.report_error(&mut .base, re); + .err_handler.recover(&mut .base, re)?; + } + } + + .base.exit_rule(); + + Ok(_localctx) + } +} +>> + +LeftRecursiveRuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs, + namedActions,finallyAction,postamble,exceptions) ::= +< ---------------- + + + +}; separator="\n"> + +impl\<'input, I, H> \<'input, I, H> +where + I: TokenStream\<'input, TF = > + TidAble\<'input>, + H: ErrorStrategy\<'input,BaseParserType\<'input,I>\> +{ + }>pub fn (&mut self,) + -> Result\All\<'input>\>,ANTLRError> { + self._rec(0}>) + } + + fn _rec(&mut self, _p: isize}>) + -> Result\All\<'input>\>,ANTLRError> { + let recog = self; + let _parentctx = .ctx.take(); + let _parentState = .base.get_state(); + let mut _localctx = Ext::new(_parentctx.clone(), .base.get_state()}>); + .base.enter_recursion_rule(_localctx.clone(), , RULE_, _p); + let mut _localctx: Rc\<All> = _localctx; + let mut _prevctx = _localctx.clone(); + let _startState = ; + + + let result: Result\<(), ANTLRError> = (|| { + + let mut _alt: isize; + + + + + Ok(()) + })(); + match result { + Ok(_) => {}, + + + + Err(e @ ANTLRError::FallThrough(_)) => return Err(e), + Err(ref re)=>{ + //_localctx.exception = re; + .err_handler.report_error(&mut .base, re); + .err_handler.recover(&mut .base, re)?;} + } + + .base.unroll_recursion_context(_parentctx); + + Ok(_localctx) + } +} +>> + +CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= << + +let tmp = ContextExt::new(&**_localctx); +.base.enter_outer_alt(Some(tmp.clone()), ); +_localctx = tmp; + +//.base.enter_outer_alt(_localctx.clone(), ); +.base.enter_outer_alt(None, ); + + +>> + +CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= << +{ + + + +} +>> + +LL1AltBlock(choice, preamble, alts, error) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; +.base.input.lt(1).cloned();})> + +match .base.input.la(1) { + + => { + + \} + }; separator="\n"> + _ => Err()? +} +>> + +LL1OptionalBlock(choice, alts, error) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; +match .base.input.la(1) { + + => { + + \} + }; separator="\n"> + _ => {} +} +>> + +LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; + +if { + +} +) ) !> +>> + +LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; + +while { + + .base.set_state(); + .err_handler.sync(&mut .base)?; + +} +>> + +LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; + +loop { + + .base.set_state(); + .err_handler.sync(&mut .base)?; + + if !() {break} +} +>> + +// LL(*) stuff + +AltBlock(choice, preamble, alts, error) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; +.base.input.lt(1).cloned();})> + +match .interpreter.adaptive_predict(,&mut .base)? { + =>{ + + \} + }; separator=",\n"> + _ => {} +} +>> + +OptionalBlock(choice, alts, error) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; +match .interpreter.adaptive_predict(,&mut .base)? { ++1=>{ + + \} + }; separator="\n"> + _ => {} +} +>> + +StarBlock(choice, alts, sync, iteration) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; +_alt = .interpreter.adaptive_predict(,&mut .base)?; +while { _alt!= && _alt!=INVALID_ALT } { + if _alt==1+1 { + + + } + .base.set_state(); + .err_handler.sync(&mut .base)?; + _alt = .interpreter.adaptive_predict(,&mut .base)?; +} +>> + +PlusBlock(choice, alts, error) ::= << +.base.set_state(); +.err_handler.sync(&mut .base)?; +_alt = 1+1; +loop { + match _alt { + +1=> + + }; separator=",\n"> + _ => Err()? + } + .base.set_state(); + .err_handler.sync(&mut .base)?; + _alt = .interpreter.adaptive_predict(,&mut .base)?; + if _alt== || _alt==INVALID_ALT { break } +} +>> + +Sync(s) ::= ".err_handler.sync()?;" + +ThrowNoViableAlt(t) ::= "ANTLRError::NoAltError(NoViableAltError::new(&mut .base))" + +TestSetInline(s) ::= << +}; separator=" || "> +>> + +// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test +testShiftInRange(shiftAmount) ::= << +(() & !0x3f) == 0 +>> + +// produces smaller bytecode only when bits.ttypes contains more than two items +bitsetBitfieldComparison(s, bits) ::= <% +(})> && ((1usize \<\< ) & ()}; separator=" | ">)) != 0) +%> + +isZero ::= [ +"0":true, +default:false +] + +offsetShift(shiftAmount, offset) ::= <% +( - ) +%> + +// produces more efficient bytecode when bits.ttypes contains at most two items +bitsetInlineComparison(s, bits) ::= <% +==}; separator=" || "> +%> + +cases(ttypes) ::= << + }; separator="|",wrap> +>> + +InvokeRule(r, argExprsChunks) ::= << +/*InvokeRule */ +.base.set_state(); +._rec(,)?})> +>> + +MatchToken(m) ::= << +.base.set_state(); +.base.match_token(,&mut .err_handler)?})> +>> + +LabelsAssignCloned(labels,assign) ::= << + +let tmp = ; + };separator="\n"> + +; + +>> + +LabelsAssignOwned(labels,assign) ::= << + +let tmp = ; + };separator="\n"> + +; + +>> + +MatchSet(m, expr, capture) ::= "" + +MatchNotSet(m, expr, capture) ::= "" + +CommonSetStuff(m, expr, capture, invert) ::= << +.base.set_state(); +.base.input.lt(1).cloned();})>};separator="\n"> + +if { \<= 0 || !() } { + .err_handler.recover_inline(&mut .base)?})> +} +else { + if .base.input.la(1)==TOKEN_EOF { .base.matched_eof = true }; + .err_handler.report_match(&mut .base); + .base.consume(&mut .err_handler); +} +>> + +Wildcard(w) ::= << +.base.set_state(); +.base.match_wildcard(&mut .err_handler)?})> +>> + +// ACTION STUFF + +Action(a, foo, chunks) ::= << +let _localctx = _localctx.unwrap(); + +>> + +ArgAction(a, chunks) ::= "" + +SemPred(p, chunks, failChunks) ::= << +.base.set_state(); + +if !({}) { + Err(FailedPredicateError::new(&mut .base, Some(.to_owned()), Some(.to_owned()), Some(.to_owned()), None))?; +} +>> + +ExceptionClause(e, catchArg, catchAction) ::= << +Err() => {} +>> + +// lexer actions are not associated with model objects + +LexerSkipCommand() ::= "skip();" +LexerMoreCommand() ::= "more();" +LexerPopModeCommand() ::= "pop_mode();" + +LexerTypeCommand(arg, grammar) ::= "_type = ;" +LexerChannelCommand(arg, grammar) ::= "_channel = ;" +LexerModeCommand(arg, grammar) ::= "_mode = ;" +LexerPushModeCommand(arg, grammar) ::= "push_mode();" + +ActionText(t) ::= "" +ActionTemplate(t) ::= "" +ArgRef(a) ::= "*_localctx.get_()" +LocalRef(a) ::= "*_localctx.get_()" +RetValueRef(a) ::= "*_localctx.get_()" +QRetValueRef(a) ::= "..as_ref().unwrap().get_()" +/** How to translate $tokenLabel */ +TokenRef(t) ::= "..as_ref()" +LabelRef(t) ::= "..as_ref().unwrap()" +ListLabelRef(t) ::= "." +SetAttr(s,rhsChunks) ::= << +let tmp = {}.to_owned(); +(tmp);})> +>> + +//TokenLabelType() ::= "" +TokenFactory() ::= "LocalTokenFactory\<'input>" +TokenLabelType() ::= "TokenType\<'input>" +InputSymbolType() ::= "" + +TokenPropertyRef_text(t) ::= << if let Some(it) = &. { it.get_text() } else { "null" } >> +TokenPropertyRef_type(t) ::= << if let Some(it) = &. { it.get_token_type() } else { 0 } >> +TokenPropertyRef_line(t) ::= << if let Some(it) = &. { it.get_line() } else { 0 } >> +TokenPropertyRef_pos(t) ::= << if let Some(it) = &. { it.get_column() } else { 0 } >> +TokenPropertyRef_channel(t) ::= << if let Some(it) = &. { it.get_chanel() } else { 0 } >> +TokenPropertyRef_index(t) ::= << if let Some(it) = &. { it.get_token_index() } else { 0 } >> +TokenPropertyRef_int(t) ::= "if let Some(it) = &. { isize::from_str_radix(it.get_text(),10).unwrap() } else { 0 }" + +RulePropertyRef_start(r) ::= "..as_ref().map(|it| it.start()) " +RulePropertyRef_stop(r) ::= "..as_ref().map(|it| it.stop()) " +RulePropertyRef_text(r) ::= <<(..as_ref().map(|it| .input.get_text_from_interval(it.start().get_token_index(),it.stop().get_token_index())).unwrap_or("null".to_owned()) )>> +RulePropertyRef_ctx(r) ::= "..as_ref().unwrap()" +RulePropertyRef_parser(r) ::= "" + +ThisRulePropertyRef_start(r) ::= "_localctx.start()" +ThisRulePropertyRef_stop(r) ::= "_localctx.stop()" +ThisRulePropertyRef_text(r) ::= "{let temp = .base.input.lt(-1).map(|it|it.get_token_index()).unwrap_or(-1); .input.get_text_from_interval(.get_parser_rule_context().start().get_token_index(), temp)}" +ThisRulePropertyRef_ctx(r) ::= "_localctx" +ThisRulePropertyRef_parser(r) ::= "" + +// not self because we need to access parser from sempred functions where it is a fn parameter so it can't be self +self() ::= "recog" + +NonLocalAttrRef(s) ::= "((\Context)getInvokingContext())." +SetNonLocalAttr(s, rhsChunks) ::= + "((Context)getInvokingContext()). = ;" + +AddToLabelList(a) ::= << +let temp = ; +.push(temp);})> >> + +TokenDecl(t) ::= ": Option\<>" +TokenTypeDecl(t) ::= "let mut : isize = -1;" +TokenListDecl(t) ::= ":Vec\<>" +RuleContextDecl(r) ::= ": Option\All\<'input>>>" +RuleContextListDecl(rdecl) ::= ":Vec\All\<'input>>>" + +ContextTokenGetterDecl(t) ::= << +/// Retrieves first TerminalNode corresponding to token +/// Returns `None` if there is no child corresponding to token +fn (&self) -> Option\ContextType>\>> where Self:Sized{ + self.get_token(, 0) +}>> +ContextTokenListGetterDecl(t) ::=<< +/// Retrieves all `TerminalNode`s corresponding to token in current rule +fn _all(&self) -> Vec\ContextType>\>> where Self:Sized{ + self.children_of_type() +}>> +ContextTokenListIndexedGetterDecl(t) ::= << +/// Retrieves 'i's TerminalNode corresponding to token , starting from 0. +/// Returns `None` if number of children corresponding to token is less or equal than `i`. +fn (&self, i: usize) -> Option\ContextType>\>> where Self:Sized{ + self.get_token(, i) +} +>> +ContextRuleGetterDecl(r) ::= << +fn (&self) -> Option\All\<'input>\>> where Self:Sized{ + self.child_of_type(0) +} +>> +ContextRuleListGetterDecl(r) ::= << +fn _all(&self) -> Vec\All\<'input>\>> where Self:Sized{ + self.children_of_type() +} +>> +ContextRuleListIndexedGetterDecl(r) ::= << +fn (&self, i: usize) -> Option\All\<'input>\>> where Self:Sized{ + self.child_of_type(i) +} +>> + +LexerRuleContext() ::= "LexerContext" + +/** The rule context name is the rule followed by a suffix; e.g., + * r becomes rContext. + */ +RuleContextNameSuffix() ::= "Context" + +ImplicitTokenLabel(tokenName) ::= "" +ImplicitRuleLabel(ruleName) ::= "" +ImplicitSetLabel(id) ::= "_tset" +ListLabelName(label) ::= "