This error strategy is useful in the following scenarios.
+///
+/// - Two-stage parsing: This error strategy allows the first
+/// stage of two-stage parsing to immediately terminate if an error is
+/// encountered, and immediately fall back to the second stage. In addition to
+/// avoiding wasted work by attempting to recover from errors here, the empty
+/// implementation of `sync` improves the performance of
+/// the first stage.
+/// - Silent validation: When syntax errors are not being
+/// reported or logged, and the parse result is simply ignored if errors occur,
+/// the `BailErrorStrategy` avoids wasting work on recovering from errors
+/// when the result will be ignored either way.
+///
+/// # Usage
+/// ```ignore
+/// use antlr_rust::error_strategy::BailErrorStrategy;
+/// myparser.err_handler = BailErrorStrategy::new();
+/// ```
+///
+/// [`ParserRuleContext.exception`]: todo
+/// */
+#[derive(Default, Debug)]
+pub struct BailErrorStrategy<'input, Ctx: ParserNodeType<'input>>(
+ DefaultErrorStrategy<'input, Ctx>,
+);
+
+better_any::tid! {impl<'i,Ctx> TidAble<'i> for BailErrorStrategy<'i,Ctx> where Ctx:ParserNodeType<'i> }
+
+impl<'input, Ctx: ParserNodeType<'input>> BailErrorStrategy<'input, Ctx> {
+ /// Creates new instance of `BailErrorStrategy`
+ pub fn new() -> Self {
+ Self(DefaultErrorStrategy::new())
+ }
+
+ fn process_error>(
+ &self,
+ recognizer: &mut T,
+ e: &ANTLRError,
+ ) -> ANTLRError {
+ let mut ctx = recognizer.get_parser_rule_context().clone();
+ let _: Option<()> = (|| loop {
+ ctx.set_exception(e.clone());
+ ctx = ctx.get_parent()?
+ })();
+ return ANTLRError::FallThrough(Rc::new(ParseCancelledError(e.clone())));
+ }
+}
+
+/// `ANTLRError::FallThrough` Error returned `BailErrorStrategy` to bail out from parsing
+#[derive(Debug)]
+pub struct ParseCancelledError(ANTLRError);
+
+impl Error for ParseCancelledError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ Some(&self.0)
+ }
+}
+
+impl Display for ParseCancelledError {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.write_str("ParseCancelledError, caused by ")?;
+ self.0.fmt(f)
+ }
+}
+
+impl<'a, T: Parser<'a>> ErrorStrategy<'a, T> for BailErrorStrategy<'a, T::Node> {
+ #[inline(always)]
+ fn reset(&mut self, recognizer: &mut T) {
+ self.0.reset(recognizer)
+ }
+
+ #[cold]
+ fn recover_inline(
+ &mut self,
+ recognizer: &mut T,
+ ) -> Result<>::Tok, ANTLRError> {
+ let err = ANTLRError::InputMismatchError(InputMisMatchError::new(recognizer));
+
+ Err(self.process_error(recognizer, &err))
+ }
+
+ #[cold]
+ fn recover(&mut self, recognizer: &mut T, e: &ANTLRError) -> Result<(), ANTLRError> {
+ Err(self.process_error(recognizer, &e))
+ }
+
+ #[inline(always)]
+ fn sync(&mut self, _recognizer: &mut T) -> Result<(), ANTLRError> {
+ /* empty */
+ Ok(())
+ }
+
+ #[inline(always)]
+ fn in_error_recovery_mode(&mut self, recognizer: &mut T) -> bool {
+ self.0.in_error_recovery_mode(recognizer)
+ }
+
+ #[inline(always)]
+ fn report_error(&mut self, recognizer: &mut T, e: &ANTLRError) {
+ self.0.report_error(recognizer, e)
+ }
+
+ #[inline(always)]
+ fn report_match(&mut self, _recognizer: &mut T) {}
+}
diff --git a/runtime/Rust/src/errors.rs b/runtime/Rust/src/errors.rs
new file mode 100644
index 0000000000..83ca5312fd
--- /dev/null
+++ b/runtime/Rust/src/errors.rs
@@ -0,0 +1,264 @@
+//! Error types
+use std::borrow::Borrow;
+use std::error::Error;
+use std::fmt;
+use std::fmt::Formatter;
+use std::fmt::{Debug, Display};
+use std::ops::Deref;
+use std::rc::Rc;
+
+use crate::atn_simulator::IATNSimulator;
+use crate::interval_set::IntervalSet;
+use crate::parser::{Parser, ParserNodeType};
+use crate::rule_context::states_stack;
+use crate::token::{OwningToken, Token};
+use crate::transition::PredicateTransition;
+use crate::transition::TransitionType::TRANSITION_PREDICATE;
+
+/// Main ANTLR4 Rust runtime error
+#[derive(Debug, Clone)]
+pub enum ANTLRError {
+ /// Returned from Lexer when it fails to find matching token type for current input
+ ///
+ /// Usually Lexers contain last rule that captures all invalid tokens like:
+ /// ```text
+ /// ERROR_TOKEN: . ;
+ /// ```
+ /// to prevent lexer from throwing errors and have all error handling in parser.
+ LexerNoAltError {
+ /// Index at which error has happened
+ start_index: isize,
+ },
+
+ /// Indicates that the parser could not decide which of two or more paths
+ /// to take based upon the remaining input. It tracks the starting token
+ /// of the offending input and also knows where the parser was
+ /// in the various paths when the error. Reported by reportNoViableAlternative()
+ NoAltError(NoViableAltError),
+
+ /// This signifies any kind of mismatched input exceptions such as
+ /// when the current input does not match the expected token.
+ InputMismatchError(InputMisMatchError),
+
+ /// A semantic predicate failed during validation. Validation of predicates
+ /// occurs when normally parsing the alternative just like matching a token.
+ /// Disambiguating predicate evaluation occurs when we test a predicate during
+ /// prediction.
+ PredicateError(FailedPredicateError),
+
+ /// Internal error. Or user provided type returned data that is
+ /// incompatible with current parser state
+ IllegalStateError(String),
+
+ /// Unrecoverable error. Indicates that error should not be processed by parser/error strategy
+ /// and it should abort parsing and immediately return to caller.
+ FallThrough(Rc),
+
+ /// Potentially recoverable error.
+ /// Used to allow user to emit his own errors from parser actions or from custom error strategy.
+ /// Parser will try to recover with provided `ErrorStrategy`
+ OtherError(Rc),
+}
+
+// impl Clone for ANTLRError {
+// fn clone(&self) -> Self {
+// match self {
+// ANTLRError::LexerNoAltError { start_index } => ANTLRError::LexerNoAltError {
+// start_index: *start_index,
+// },
+// ANTLRError::NoAltError(e) => ANTLRError::NoAltError(e.clone()),
+// ANTLRError::InputMismatchError(e) => ANTLRError::InputMismatchError(e.clone()),
+// ANTLRError::PredicateError(e) => ANTLRError::PredicateError(e.clone()),
+// ANTLRError::IllegalStateError(e) => ANTLRError::IllegalStateError(e.clone()),
+// ANTLRError::FallThrough(_) => panic!("clone not supported"),
+// ANTLRError::OtherError(_) => panic!("clone not supported"),
+// }
+// }
+// }
+
+impl Display for ANTLRError {
+ fn fmt(&self, _f: &mut Formatter<'_>) -> fmt::Result {
+ ::fmt(self, _f)
+ }
+}
+
+impl Error for ANTLRError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ match self {
+ ANTLRError::FallThrough(x) => Some(x.as_ref()),
+ ANTLRError::OtherError(x) => Some(x.as_ref()),
+ _ => None,
+ }
+ }
+}
+
+impl ANTLRError {
+ /// Returns first token that caused parser to fail.
+ pub fn get_offending_token(&self) -> Option<&OwningToken> {
+ Some(match self {
+ ANTLRError::NoAltError(e) => &e.base.offending_token,
+ ANTLRError::InputMismatchError(e) => &e.base.offending_token,
+ ANTLRError::PredicateError(e) => &e.base.offending_token,
+ _ => return None,
+ })
+ }
+}
+
+//impl ANTLRError {
+// fn get_expected_tokens(&self, _atn: &ATN) -> IntervalSet {
+//// atn.get_expected_tokens(se)
+// unimplemented!()
+// }
+//}
+
+/// Common part of ANTLR parser errors
+#[derive(Debug, Clone)]
+#[allow(missing_docs)]
+pub struct BaseRecognitionError {
+ pub message: String,
+ // recognizer: Box,
+ pub offending_token: OwningToken,
+ pub offending_state: isize,
+ states_stack: Vec, // ctx: Rc
+ // input: Box
+}
+
+impl BaseRecognitionError {
+ /// Returns tokens that were expected by parser in error place
+ pub fn get_expected_tokens<'a, T: Parser<'a>>(&self, recognizer: &T) -> IntervalSet {
+ recognizer
+ .get_interpreter()
+ .atn()
+ .get_expected_tokens(self.offending_state, self.states_stack.iter().copied())
+ }
+
+ fn new<'a, T: Parser<'a>>(recog: &mut T) -> BaseRecognitionError {
+ BaseRecognitionError {
+ message: "".to_string(),
+ offending_token: recog.get_current_token().borrow().to_owned(),
+ offending_state: recog.get_state(),
+ // ctx: recog.get_parser_rule_context().clone(),
+ states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(),
+ }
+ }
+}
+
+/// See `ANTLRError::NoAltError`
+#[derive(Debug, Clone)]
+#[allow(missing_docs)]
+pub struct NoViableAltError {
+ pub base: BaseRecognitionError,
+ pub start_token: OwningToken,
+ // ctx: Rc,
+ // dead_end_configs: BaseATNConfigSet,
+}
+
+#[allow(missing_docs)]
+impl NoViableAltError {
+ pub fn new<'a, T: Parser<'a>>(recog: &mut T) -> NoViableAltError {
+ Self {
+ base: BaseRecognitionError {
+ message: "".to_string(),
+ offending_token: recog.get_current_token().borrow().to_owned(),
+ offending_state: recog.get_state(),
+ // ctx: recog.get_parser_rule_context().clone(),
+ states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(),
+ },
+ start_token: recog.get_current_token().borrow().to_owned(),
+ // ctx: recog.get_parser_rule_context().clone()
+ }
+ }
+ pub fn new_full<'a, T: Parser<'a>>(
+ recog: &mut T,
+ start_token: OwningToken,
+ offending_token: OwningToken,
+ ) -> NoViableAltError {
+ Self {
+ base: BaseRecognitionError {
+ message: "".to_string(),
+ offending_token,
+ offending_state: recog.get_state(),
+ states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(), // ctx: recog.get_parser_rule_context().clone(),
+ },
+ start_token,
+ // ctx
+ }
+ }
+}
+
+/// See `ANTLRError::InputMismatchError`
+#[derive(Debug, Clone)]
+#[allow(missing_docs)]
+pub struct InputMisMatchError {
+ pub base: BaseRecognitionError,
+}
+
+#[allow(missing_docs)]
+impl InputMisMatchError {
+ pub fn new<'a, T: Parser<'a>>(recognizer: &mut T) -> InputMisMatchError {
+ InputMisMatchError {
+ base: BaseRecognitionError::new(recognizer),
+ }
+ }
+
+ pub fn with_state<'a, T: Parser<'a>>(
+ recognizer: &mut T,
+ offending_state: isize,
+ ctx: Rc<>::Type>,
+ ) -> InputMisMatchError {
+ let mut a = Self::new(recognizer);
+ // a.base.ctx = ctx;
+ a.base.offending_state = offending_state;
+ a.base.states_stack = states_stack(ctx).collect();
+ a
+ }
+}
+
+//fn new_input_mis_match_exception(recognizer: Parser) -> InputMisMatchError { unimplemented!() }
+
+/// See `ANTLRError::PredicateError`
+#[derive(Debug, Clone)]
+#[allow(missing_docs, dead_code)]
+pub struct FailedPredicateError {
+ pub base: BaseRecognitionError,
+ pub rule_index: isize,
+ predicate_index: isize,
+ pub predicate: String,
+}
+
+#[allow(missing_docs)]
+impl FailedPredicateError {
+ pub fn new<'a, T: Parser<'a>>(
+ recog: &mut T,
+ predicate: Option,
+ msg: Option,
+ ) -> ANTLRError {
+ let tr = recog.get_interpreter().atn().states[recog.get_state() as usize]
+ .get_transitions()
+ .first()
+ .unwrap();
+ let (rule_index, predicate_index) = if tr.get_serialization_type() == TRANSITION_PREDICATE {
+ let pr = tr.deref().cast::();
+ (pr.rule_index, pr.pred_index)
+ } else {
+ (0, 0)
+ };
+
+ ANTLRError::PredicateError(FailedPredicateError {
+ base: BaseRecognitionError {
+ message: msg.unwrap_or_else(|| {
+ format!(
+ "failed predicate: {}",
+ predicate.as_deref().unwrap_or("None")
+ )
+ }),
+ offending_token: recog.get_current_token().borrow().to_owned(),
+ offending_state: recog.get_state(),
+ states_stack: states_stack(recog.get_parser_rule_context().clone()).collect(), // ctx: recog.get_parser_rule_context().clone()
+ },
+ rule_index,
+ predicate_index,
+ predicate: predicate.unwrap_or_default(),
+ })
+ }
+}
diff --git a/runtime/Rust/src/file_stream.rs b/runtime/Rust/src/file_stream.rs
new file mode 100644
index 0000000000..bb3bf8611b
--- /dev/null
+++ b/runtime/Rust/src/file_stream.rs
@@ -0,0 +1,14 @@
+use std;
+
+pub struct FileStream {
+ base: InputStream,
+
+ filename: String,
+}
+
+impl FileStream {
+ fn new(fileName: String) -> Result { unimplemented!() }
+
+ fn get_source_name(&self) -> String { unimplemented!() }
+}
+
\ No newline at end of file
diff --git a/runtime/Rust/src/input_stream.rs b/runtime/Rust/src/input_stream.rs
new file mode 100644
index 0000000000..ecc44f7f20
--- /dev/null
+++ b/runtime/Rust/src/input_stream.rs
@@ -0,0 +1,262 @@
+//! Input to lexer
+use std::borrow::Cow;
+
+use crate::char_stream::{CharStream, InputData};
+use crate::int_stream::IntStream;
+use std::ops::Deref;
+
+/// Default rust target input stream.
+///
+/// Since Rust uses UTF-8 format which does not support indexing by char,
+/// `InputStream<&str>` has slightly different index behavior in compare to java runtime when there are
+/// non-ASCII unicode characters.
+/// If you need it to generate exactly the same indexes as Java runtime, you have to use `CodePoint8/16/32BitCharStream`,
+/// which does not use rusts native `str` type, so it would do additional conversions and allocations along the way.
+#[derive(Debug)]
+pub struct InputStream {
+ name: String,
+ data_raw: Data,
+ index: isize,
+}
+
+// #[impl_tid]
+// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream> {}
+// #[impl_tid]
+// impl<'a, T: ?Sized + 'static> TidAble<'a> for InputStream<&'a T> {}
+better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream<&'a T> where T: ?Sized}
+better_any::tid! {impl<'a, T: 'static> TidAble<'a> for InputStream> where T: ?Sized}
+
+impl<'a, T: From<&'a str>> CharStream for InputStream<&'a str> {
+ #[inline]
+ fn get_text(&self, start: isize, stop: isize) -> T {
+ self.get_text_inner(start, stop).into()
+ }
+}
+
+impl, D: ?Sized + InputData> CharStream for InputStream> {
+ #[inline]
+ fn get_text(&self, start: isize, stop: isize) -> T {
+ self.get_text_owned(start, stop).into()
+ }
+}
+/// `InputStream` over byte slice
+pub type ByteStream<'a> = InputStream<&'a [u8]>;
+/// InputStream which treats the input as a series of Unicode code points that fit into `u8`
+pub type CodePoint8BitCharStream<'a> = InputStream<&'a [u8]>;
+/// InputStream which treats the input as a series of Unicode code points that fit into `u16`
+pub type CodePoint16BitCharStream<'a> = InputStream<&'a [u16]>;
+/// InputStream which treats the input as a series of Unicode code points
+pub type CodePoint32BitCharStream<'a> = InputStream<&'a [u32]>;
+
+impl<'a, T> CharStream> for InputStream<&'a [T]>
+where
+ [T]: InputData,
+{
+ #[inline]
+ fn get_text(&self, a: isize, b: isize) -> Cow<'a, [T]> {
+ Cow::Borrowed(self.get_text_inner(a, b))
+ }
+}
+
+impl<'a, T> CharStream for InputStream<&'a [T]>
+where
+ [T]: InputData,
+{
+ fn get_text(&self, a: isize, b: isize) -> String {
+ self.get_text_inner(a, b).to_display()
+ }
+}
+
+impl<'a, 'b, T> CharStream> for InputStream<&'a [T]>
+where
+ [T]: InputData,
+{
+ #[inline]
+ fn get_text(&self, a: isize, b: isize) -> Cow<'b, str> {
+ self.get_text_inner(a, b).to_display().into()
+ }
+}
+
+impl<'a, T> CharStream<&'a [T]> for InputStream<&'a [T]>
+where
+ [T]: InputData,
+{
+ #[inline]
+ fn get_text(&self, a: isize, b: isize) -> &'a [T] {
+ self.get_text_inner(a, b)
+ }
+}
+
+impl InputStream> {
+ fn get_text_owned(&self, start: isize, stop: isize) -> Data::Owned {
+ let start = start as usize;
+ let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
+
+ if stop < self.data_raw.len() {
+ &self.data_raw[start..stop]
+ } else {
+ &self.data_raw[start..]
+ }
+ .to_owned()
+ }
+
+ /// Creates new `InputStream` over owned data
+ pub fn new_owned(data: Box) -> Self {
+ Self {
+ name: "".to_string(),
+ data_raw: data.into(),
+ index: 0,
+ }
+ }
+}
+
+impl<'a, Data> InputStream<&'a Data>
+where
+ Data: ?Sized + InputData,
+{
+ fn get_text_inner(&self, start: isize, stop: isize) -> &'a Data {
+ // println!("get text {}..{} of {:?}",start,stop,self.data_raw.to_display());
+ let start = start as usize;
+ let stop = self.data_raw.offset(stop, 1).unwrap_or(stop) as usize;
+ // println!("justed range {}..{} ",start,stop);
+ // let start = self.data_raw.offset(0,start).unwrap() as usize;
+ // let stop = self.data_raw.offset(0,stop + 1).unwrap() as usize;
+
+ if stop < self.data_raw.len() {
+ &self.data_raw[start..stop]
+ } else {
+ &self.data_raw[start..]
+ }
+ }
+
+ /// Creates new `InputStream` over borrowed data
+ pub fn new(data_raw: &'a Data) -> Self {
+ // let data_raw = data_raw.as_ref();
+ // let data = data_raw.to_indexed_vec();
+ Self {
+ name: "".to_string(),
+ data_raw,
+ index: 0,
+ // phantom: Default::default(),
+ }
+ }
+}
+impl<'a, Data: Deref> InputStream
+where
+ Data::Target: InputData,
+{
+ /// Resets input stream to start from the beginning of this slice
+ #[inline]
+ pub fn reset(&mut self) {
+ self.index = 0
+ }
+}
+
+impl<'a, Data: Deref> IntStream for InputStream
+where
+ Data::Target: InputData,
+{
+ #[inline]
+ fn consume(&mut self) {
+ if let Some(index) = self.data_raw.offset(self.index, 1) {
+ self.index = index;
+ // self.current = self.data_raw.deref().item(index).unwrap_or(TOKEN_EOF);
+ // Ok(())
+ } else {
+ panic!("cannot consume EOF");
+ }
+ }
+
+ #[inline]
+ fn la(&mut self, mut offset: isize) -> isize {
+ if offset == 1 {
+ return self
+ .data_raw
+ .item(self.index)
+ .unwrap_or(crate::int_stream::EOF);
+ }
+ if offset == 0 {
+ panic!("should not be called with offset 0");
+ }
+ if offset < 0 {
+ offset += 1; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
+ }
+
+ self.data_raw
+ .offset(self.index, offset - 1)
+ .and_then(|index| self.data_raw.item(index))
+ .unwrap_or(crate::int_stream::EOF)
+ }
+
+ #[inline]
+ fn mark(&mut self) -> isize {
+ -1
+ }
+
+ #[inline]
+ fn release(&mut self, _marker: isize) {}
+
+ #[inline]
+ fn index(&self) -> isize {
+ self.index
+ }
+
+ #[inline]
+ fn seek(&mut self, index: isize) {
+ self.index = index
+ }
+
+ #[inline]
+ fn size(&self) -> isize {
+ self.data_raw.len() as isize
+ }
+
+ fn get_source_name(&self) -> String {
+ self.name.clone()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use std::ops::Deref;
+
+ use crate::char_stream::CharStream;
+ use crate::int_stream::{IntStream, EOF};
+
+ use super::InputStream;
+
+ #[test]
+ fn test_str_input_stream() {
+ let mut input = InputStream::new("V1は3");
+ let input = &mut input as &mut dyn CharStream;
+ assert_eq!(input.la(1), 'V' as isize);
+ assert_eq!(input.index(), 0);
+ input.consume();
+ assert_eq!(input.la(1), '1' as isize);
+ assert_eq!(input.la(-1), 'V' as isize);
+ assert_eq!(input.index(), 1);
+ input.consume();
+ assert_eq!(input.la(1), 0x306F);
+ assert_eq!(input.index(), 2);
+ input.consume();
+ assert_eq!(input.index(), 5);
+ assert_eq!(input.la(-2), '1' as isize);
+ assert_eq!(input.la(2), EOF);
+ assert_eq!(input.get_text(1, 1).deref(), "1");
+ assert_eq!(input.get_text(1, 2).deref(), "1は");
+ assert_eq!(input.get_text(2, 2).deref(), "は");
+ assert_eq!(input.get_text(2, 5).deref(), "は3");
+ assert_eq!(input.get_text(5, 5).deref(), "3");
+ }
+
+ #[test]
+ fn test_byte_input_stream() {
+ let mut input = InputStream::new(&b"V\xaa\xbb"[..]);
+ assert_eq!(input.la(1), 'V' as isize);
+ input.seek(2);
+ assert_eq!(input.la(1), 0xBB);
+ assert_eq!(input.index(), 2);
+ let mut input = InputStream::new("は".as_bytes());
+ assert_eq!(input.la(1), 227);
+ }
+}
diff --git a/runtime/Rust/src/int_stream.rs b/runtime/Rust/src/int_stream.rs
new file mode 100644
index 0000000000..1ae40bcc83
--- /dev/null
+++ b/runtime/Rust/src/int_stream.rs
@@ -0,0 +1,85 @@
+//! isize;
+
+ /// After this call subsequent calls to seek must succeed if seek index is greater than mark index
+ ///
+ /// Returns marker that should be used later by `release` call to release this stream from
+ fn mark(&mut self) -> isize;
+
+ /// Releases `marker`
+ fn release(&mut self, marker: isize);
+
+ /// Returns current position of the input stream
+ ///
+ /// If there is active marker from `mark` then calling `seek` later with result of this call
+ /// should put stream in same state it is currently in.
+ fn index(&self) -> isize;
+ /// Put stream back in state it was when it was in `index` position
+ ///
+ /// Allowed to panic if `index` does not belong to marked region(via `mark`-`release` calls)
+ fn seek(&mut self, index: isize);
+
+ /// Returns the total number of symbols in the stream.
+ fn size(&self) -> isize;
+
+ /// Returns name of the source this stream operates over if any
+ fn get_source_name(&self) -> String;
+}
+
+/// Iterator over `IntStream`
+#[derive(Debug)]
+pub struct IterWrapper<'a, T: IntStream>(pub &'a mut T);
+
+impl<'a, T: IntStream> Iterator for IterWrapper<'a, T> {
+ type Item = isize;
+
+ fn next(&mut self) -> Option {
+ let result = self.0.la(1);
+ self.0.consume();
+ match result {
+ EOF => None,
+ x => Some(x),
+ }
+ }
+}
diff --git a/runtime/Rust/src/interval_set.rs b/runtime/Rust/src/interval_set.rs
new file mode 100644
index 0000000000..9707c1fabf
--- /dev/null
+++ b/runtime/Rust/src/interval_set.rs
@@ -0,0 +1,403 @@
+use std::borrow::Cow;
+use std::borrow::Cow::Borrowed;
+use std::cmp::{max, min, Ordering};
+
+use crate::token::{TOKEN_EOF, TOKEN_EPSILON};
+use crate::vocabulary::{Vocabulary, DUMMY_VOCAB};
+
+/// Represents interval equivalent to `a..=b`
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub struct Interval {
+ /// start
+ pub a: isize,
+ /// end >= start
+ pub b: isize,
+}
+
+pub(crate) const INVALID: Interval = Interval { a: -1, b: -2 };
+
+impl Interval {
+ /* stop is not included! */
+ fn new(a: isize, b: isize) -> Interval {
+ Interval { a, b }
+ }
+
+ // fn contains(&self, _item: isize) -> bool { unimplemented!() }
+
+ fn length(&self) -> isize {
+ self.b - self.a
+ }
+
+ fn union(&self, another: &Interval) -> Interval {
+ Interval {
+ a: min(self.a, another.a),
+ b: max(self.b, another.b),
+ }
+ }
+
+ /** Does self start completely before other? Disjoint */
+ pub fn starts_before_disjoint(&self, other: &Interval) -> bool {
+ return self.a < other.a && self.b < other.a;
+ }
+
+ /** Does self start at or before other? Nondisjoint */
+ pub fn starts_before_non_disjoint(&self, other: &Interval) -> bool {
+ return self.a <= other.a && self.b >= other.a;
+ }
+
+ /** Does self.a start after other.b? May or may not be disjoint */
+ pub fn starts_after(&self, other: &Interval) -> bool {
+ return self.a > other.a;
+ }
+
+ /** Does self start completely after other? Disjoint */
+ pub fn starts_after_disjoint(&self, other: &Interval) -> bool {
+ return self.a > other.b;
+ }
+
+ /** Does self start after other? NonDisjoint */
+ pub fn starts_after_non_disjoint(&self, other: &Interval) -> bool {
+ return self.a > other.a && self.a <= other.b; // self.b>=other.b implied
+ }
+
+ /** Are both ranges disjoint? I.e., no overlap? */
+ pub fn disjoint(&self, other: &Interval) -> bool {
+ return self.starts_before_disjoint(other) || self.starts_after_disjoint(other);
+ }
+
+ /** Are two intervals adjacent such as 0..41 and 42..42? */
+ pub fn adjacent(&self, other: &Interval) -> bool {
+ return self.a == other.b + 1 || self.b == other.a - 1;
+ }
+
+ // public boolean properlyContains(Interval other) {
+ // return other.a >= self.a && other.b <= self.b;
+ // }
+ //
+ // /** Return the interval computed from combining self and other */
+ // public Interval union(Interval other) {
+ // return Interval.of(Math.min(a, other.a), Math.max(b, other.b));
+ // }
+ //
+ // /** Return the interval in common between self and o */
+ // public Interval intersection(Interval other) {
+ // return Interval.of(Math.max(a, other.a), Math.min(b, other.b));
+ // }
+}
+
+/// Set of disjoint intervals
+///
+/// Basically a set of integers but optimized for cases when it is sparse and created by adding
+/// intervals of integers.
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub struct IntervalSet {
+ intervals: Vec,
+ #[allow(missing_docs)]
+ pub read_only: bool,
+}
+
+#[allow(missing_docs)]
+impl IntervalSet {
+ pub fn new() -> IntervalSet {
+ IntervalSet {
+ intervals: Vec::new(),
+ read_only: false,
+ }
+ }
+
+ pub fn get_min(&self) -> Option {
+ self.intervals.first().map(|x| x.a)
+ }
+
+ pub fn add_one(&mut self, _v: isize) {
+ self.add_range(_v, _v)
+ }
+
+ pub fn add_range(&mut self, l: isize, h: isize) {
+ self.add_interval(Interval { a: l, b: h })
+ }
+
+ pub fn add_interval(&mut self, added: Interval) {
+ if added.length() < 0 {
+ return;
+ }
+
+ let mut i = 0;
+ while let Some(r) = self.intervals.get_mut(i) {
+ if *r == added {
+ return;
+ }
+
+ if added.adjacent(r) || !added.disjoint(r) {
+ // next to each other, make a single larger interval
+ let bigger = added.union(r);
+ *r = bigger;
+ // make sure we didn't just create an interval that
+ // should be merged with next interval in list
+ loop {
+ i += 1;
+ let next = match self.intervals.get(i) {
+ Some(v) => v,
+ None => break,
+ };
+ if !bigger.adjacent(next) && bigger.disjoint(next) {
+ break;
+ }
+
+ // if we bump up against or overlap next, merge
+ self.intervals[i - 1] = bigger.union(next); // set to 3 merged ones
+ self.intervals.remove(i);
+ }
+ return;
+ }
+ if added.starts_before_disjoint(r) {
+ // insert before r
+ self.intervals.insert(i, added);
+ return;
+ }
+ i += 1;
+ }
+
+ self.intervals.push(added);
+ }
+
+ pub fn add_set(&mut self, _other: &IntervalSet) {
+ for i in &_other.intervals {
+ self.add_interval(*i)
+ }
+ }
+
+ pub fn substract(&mut self, right: &IntervalSet) {
+ let result = self;
+ let mut result_i = 0usize;
+ let mut right_i = 0usize;
+
+ while result_i < result.intervals.len() && right_i < right.intervals.len() {
+ let result_interval = result.intervals[result_i];
+ let right_interval = right.intervals[right_i];
+
+ if right_interval.b < result_interval.a {
+ right_i += 1;
+ continue;
+ }
+
+ if right_interval.a > result_interval.b {
+ result_i += 1;
+ continue;
+ }
+
+ let before_curr = if right_interval.a > result_interval.a {
+ Some(Interval::new(result_interval.a, right_interval.a - 1))
+ } else {
+ None
+ };
+ let after_curr = if right_interval.b < result_interval.b {
+ Some(Interval::new(right_interval.b + 1, result_interval.b))
+ } else {
+ None
+ };
+
+ match (before_curr, after_curr) {
+ (Some(before_curr), Some(after_curr)) => {
+ result.intervals[result_i] = before_curr;
+ result.intervals.insert(result_i + 1, after_curr);
+ result_i += 1;
+ right_i += 1;
+ }
+ (Some(before_curr), None) => {
+ result.intervals[result_i] = before_curr;
+ result_i += 1;
+ }
+ (None, Some(after_curr)) => {
+ result.intervals[result_i] = after_curr;
+ right_i += 1;
+ }
+ (None, None) => {
+ result.intervals.remove(result_i);
+ }
+ }
+ }
+
+ // return result;
+ }
+
+ pub fn complement(&self, start: isize, stop: isize) -> IntervalSet {
+ let mut vocablulary_is = IntervalSet::new();
+ vocablulary_is.add_range(start, stop);
+ vocablulary_is.substract(self);
+ return vocablulary_is;
+ }
+
+ pub fn contains(&self, _item: isize) -> bool {
+ self.intervals
+ .binary_search_by(|x| {
+ if _item < x.a {
+ return Ordering::Greater;
+ }
+ if _item > x.b {
+ return Ordering::Less;
+ }
+ Ordering::Equal
+ })
+ .is_ok()
+ }
+
+ pub fn length(&self) -> isize {
+ self.intervals
+ .iter()
+ .fold(0, |acc, it| acc + it.b - it.a + 1)
+ }
+
+ // fn remove_range(&self, _v: &Interval) { unimplemented!() }
+
+ pub fn remove_one(&mut self, el: isize) {
+ if self.read_only {
+ panic!("can't alter readonly IntervalSet")
+ }
+
+ for i in 0..self.intervals.len() {
+ let int = &mut self.intervals[i];
+ if el < int.a {
+ break;
+ }
+
+ if el == int.a && el == int.b {
+ self.intervals.remove(i);
+ break;
+ }
+
+ if el == int.a {
+ int.a += 1;
+ break;
+ }
+
+ if el == int.b {
+ int.b -= 1;
+ break;
+ }
+
+ if el > int.a && el < int.b {
+ let old_b = int.b;
+ int.b = el - 1;
+ self.add_range(el + 1, old_b);
+ }
+ }
+ }
+
+ // fn String(&self) -> String {
+ // unimplemented!()
+ // }
+ //
+ // fn String_verbose(
+ // &self,
+ // _literalNames: Vec,
+ // _symbolicNames: Vec,
+ // _elemsAreChar: bool,
+ // ) -> String {
+ // unimplemented!()
+ // }
+ //
+ // fn to_char_String(&self) -> String {
+ // unimplemented!()
+ // }
+ //
+ pub fn to_index_string(&self) -> String {
+ self.to_token_string(&DUMMY_VOCAB)
+ }
+
+ pub fn to_token_string(&self, vocabulary: &dyn Vocabulary) -> String {
+ if self.intervals.is_empty() {
+ return "{}".to_owned();
+ }
+ let mut buf = String::new();
+ if self.length() > 1 {
+ buf += "{";
+ }
+ let mut iter = self.intervals.iter();
+ while let Some(int) = iter.next() {
+ if int.a == int.b {
+ buf += self.element_name(vocabulary, int.a).as_ref();
+ } else {
+ for i in int.a..(int.b + 1) {
+ if i > int.a {
+ buf += ", ";
+ }
+ buf += self.element_name(vocabulary, i).as_ref();
+ }
+ }
+ if iter.len() > 0 {
+ buf += ", ";
+ }
+ }
+
+ if self.length() > 1 {
+ buf += "}";
+ }
+
+ return buf;
+ }
+
+ fn element_name<'a>(&self, vocabulary: &'a dyn Vocabulary, a: isize) -> Cow<'a, str> {
+ if a == TOKEN_EOF {
+ Borrowed("")
+ } else if a == TOKEN_EPSILON {
+ Borrowed("")
+ } else {
+ vocabulary.get_display_name(a)
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_add_1() {
+ let mut set = IntervalSet::new();
+ set.add_range(1, 2);
+ assert_eq!(&set.intervals, &[Interval { a: 1, b: 2 }]);
+ set.add_range(2, 3);
+ assert_eq!(&set.intervals, &[Interval { a: 1, b: 3 }]);
+ set.add_range(1, 5);
+ assert_eq!(&set.intervals, &[Interval { a: 1, b: 5 }]);
+ }
+
+ #[test]
+ fn test_add_2() {
+ let mut set = IntervalSet::new();
+ set.add_range(1, 3);
+ set.add_range(5, 6);
+ assert_eq!(
+ &set.intervals,
+ &[Interval { a: 1, b: 3 }, Interval { a: 5, b: 6 }]
+ );
+ set.add_range(3, 4);
+ assert_eq!(&set.intervals, &[Interval { a: 1, b: 6 }]);
+ }
+
+ #[test]
+ fn test_remove() {
+ let mut set = IntervalSet::new();
+ set.add_range(1, 5);
+ set.remove_one(3);
+ assert_eq!(
+ &set.intervals,
+ &[Interval { a: 1, b: 2 }, Interval { a: 4, b: 5 }]
+ );
+ }
+
+ #[test]
+ fn test_substract() {
+ let mut set1 = IntervalSet::new();
+ set1.add_range(1, 2);
+ set1.add_range(4, 5);
+ let mut set2 = IntervalSet::new();
+ set2.add_range(2, 4);
+ set1.substract(&set2);
+ assert_eq!(
+ &set1.intervals,
+ &[Interval { a: 1, b: 1 }, Interval { a: 5, b: 5 }]
+ );
+ }
+}
diff --git a/runtime/Rust/src/lexer.rs b/runtime/Rust/src/lexer.rs
new file mode 100644
index 0000000000..bfc1e270a1
--- /dev/null
+++ b/runtime/Rust/src/lexer.rs
@@ -0,0 +1,540 @@
+//! Lexer implementation
+use std::borrow::Cow::Borrowed;
+use std::borrow::{Borrow, Cow};
+use std::cell::{Cell, RefCell};
+
+use std::rc::Rc;
+
+use crate::char_stream::{CharStream, InputData};
+use crate::error_listener::{ConsoleErrorListener, ErrorListener};
+use crate::errors::ANTLRError;
+use crate::int_stream::IntStream;
+use crate::lexer_atn_simulator::{ILexerATNSimulator, LexerATNSimulator};
+use crate::parser::ParserNodeType;
+
+use crate::recognizer::{Actions, Recognizer};
+use crate::rule_context::EmptyContextType;
+use crate::token::TOKEN_INVALID_TYPE;
+use crate::token_factory::{CommonTokenFactory, TokenAware, TokenFactory};
+use crate::token_source::TokenSource;
+use std::ops::{Deref, DerefMut};
+
+/// Lexer functionality required by `LexerATNSimulator` to work properly
+pub trait Lexer<'input>:
+ TokenSource<'input>
+ + Recognizer<'input, Node = EmptyContextType<'input, >::TF>>
+{
+ /// Concrete input stream used by this parser
+ type Input: IntStream;
+ /// Same as `TokenStream::get_input_stream` but returns concrete type instance
+ /// important for proper inlining in hot code of `LexerATNSimulator`
+ fn input(&mut self) -> &mut Self::Input;
+ /// Sets channel where current token will be pushed
+ ///
+ /// By default two channels are available:
+ /// - `LEXER_DEFAULT_TOKEN_CHANNEL`
+ /// - `LEXER_HIDDEN`
+ fn set_channel(&mut self, v: isize);
+
+ /// Pushes current mode to internal mode stack and sets `m` as current lexer mode
+ /// `pop_mode should be used to recover previous mode
+ fn push_mode(&mut self, m: usize);
+
+ /// Pops mode from internal mode stack
+ fn pop_mode(&mut self) -> Option;
+
+ /// Sets type of the current token
+ /// Called from action to override token that will be emitted by lexer
+ fn set_type(&mut self, t: isize);
+
+ /// Sets lexer mode discarding current one
+ fn set_mode(&mut self, m: usize);
+
+ /// Used to informs lexer that it should consider next token as a continuation of the current one
+ fn more(&mut self);
+
+ /// Tells lexer to completely ignore and not emit current token.
+ fn skip(&mut self);
+
+ #[doc(hidden)]
+ fn reset(&mut self);
+
+ #[doc(hidden)]
+ fn get_interpreter(&self) -> Option<&LexerATNSimulator>;
+}
+
+/// **! Usually generated by ANTLR !**
+///
+/// This trait combines everything that can be used to extend Lexer behavior
+pub trait LexerRecog<'a, T: Recognizer<'a>>: Actions<'a, T> + Sized + 'static {
+ /// Callback to extend emit behavior
+ fn before_emit(_lexer: &mut T) {}
+}
+
+/// Default implementation of Lexer
+///
+/// Public fields in this struct are intended to be used by embedded actions
+#[allow(missing_docs)]
+pub struct BaseLexer<
+ 'input,
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input> = CommonTokenFactory,
+> {
+ /// `LexerATNSimulator` instance of this lexer
+ pub interpreter: Option>,
+ /// `CharStream` used by this lexer
+ pub input: Option,
+ recog: T,
+
+ factory: &'input TF,
+
+ error_listeners: RefCell>>>,
+
+ pub token_start_char_index: isize,
+ pub token_start_line: isize,
+ pub token_start_column: isize,
+ current_pos: Rc,
+ /// Overrides token type emitted by lexer for current token
+ pub token_type: isize,
+ /// Make it `Some` to override token that is currently being generated by lexer
+ pub token: Option,
+ hit_eof: bool,
+ /// Channel lexer is currently assigning tokens to
+ pub channel: isize,
+ /// stack of modes, which is used for pushMode,popMode lexer actions
+ pub mode_stack: Vec,
+ /// Mode lexer is currently in
+ pub mode: usize,
+ /// Make it `Some` to override text for token that is currently being generated by lexer
+ pub text: Option<::Owned>,
+}
+
+#[derive(Debug)]
+pub(crate) struct LexerPosition {
+ pub(crate) line: Cell,
+ pub(crate) char_position_in_line: Cell,
+}
+
+impl<'input, T, Input, TF> core::fmt::Debug for BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("BaseLexer")
+ .field("interpreter", &self.interpreter)
+ // TODO: fix this
+ // .field("input", &self.input)
+ // .field("recog", &self.recog)
+ // .field("factory", &self.factory)
+ .field("error_listeners", &self.error_listeners)
+ .field("token_start_char_index", &self.token_start_char_index)
+ .field("token_start_line", &self.token_start_line)
+ .field("token_start_column", &self.token_start_column)
+ .field("current_pos", &self.current_pos)
+ .field("token_type", &self.token_type)
+ .field("token", &self.token)
+ .field("hit_eof", &self.hit_eof)
+ .field("channel", &self.channel)
+ .field("mode_stack", &self.mode_stack)
+ .field("mode", &self.mode)
+ .field(
+ "text",
+ match &self.text {
+ Some(_) => &"Some",
+ None => &"None",
+ },
+ )
+ .finish()
+ }
+}
+
+impl<'input, T, Input, TF> Deref for BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ &self.recog
+ }
+}
+
+impl<'input, T, Input, TF> DerefMut for BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.recog
+ }
+}
+
+impl<'input, T, Input, TF> Recognizer<'input> for BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ type Node = EmptyContextType<'input, TF>;
+
+ fn sempred(
+ &mut self,
+ _localctx: Option<&>::Type>,
+ rule_index: isize,
+ action_index: isize,
+ ) -> bool {
+ >::sempred(_localctx, rule_index, action_index, self)
+ }
+
+ fn action(
+ &mut self,
+ _localctx: Option<&>::Type>,
+ rule_index: isize,
+ action_index: isize,
+ ) {
+ >::action(_localctx, rule_index, action_index, self)
+ }
+}
+
+/// Default lexer mode id
+pub const LEXER_DEFAULT_MODE: usize = 0;
+/// Special token type to indicate that lexer should continue current token on next iteration
+/// see `Lexer::more()`
+pub const LEXER_MORE: isize = -2;
+/// Special token type to indicate that lexer should not return current token
+/// usually used to skip whitespaces and comments
+/// see `Lexer::skip()`
+pub const LEXER_SKIP: isize = -3;
+
+#[doc(inline)]
+pub use super::token::TOKEN_DEFAULT_CHANNEL as LEXER_DEFAULT_TOKEN_CHANNEL;
+
+#[doc(inline)]
+pub use super::token::TOKEN_HIDDEN_CHANNEL as LEXER_HIDDEN;
+
+pub(crate) const LEXER_MIN_CHAR_VALUE: isize = 0x0000;
+pub(crate) const LEXER_MAX_CHAR_VALUE: isize = 0x10FFFF;
+
+impl<'input, T, Input, TF> BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ fn emit_token(&mut self, token: TF::Tok) {
+ self.token = Some(token);
+ }
+
+ fn emit(&mut self) {
+ >::before_emit(self);
+ let stop = self.get_char_index() - 1;
+ let token = self.factory.create(
+ Some(self.input.as_mut().unwrap()),
+ self.token_type,
+ self.text.take(),
+ self.channel,
+ self.token_start_char_index,
+ stop,
+ self.token_start_line,
+ self.token_start_column,
+ );
+ self.emit_token(token);
+ }
+
+ fn emit_eof(&mut self) {
+ let token = self.factory.create(
+ None::<&mut Input>,
+ super::int_stream::EOF,
+ None,
+ LEXER_DEFAULT_TOKEN_CHANNEL,
+ self.get_char_index(),
+ self.get_char_index() - 1,
+ self.get_line(),
+ self.get_char_position_in_line(),
+ );
+ self.emit_token(token)
+ }
+
+ /// Current position in input stream
+ pub fn get_char_index(&self) -> isize {
+ self.input.as_ref().unwrap().index()
+ }
+
+ /// Current token text
+ pub fn get_text<'a>(&'a self) -> Cow<'a, TF::Data>
+ where
+ 'input: 'a,
+ {
+ self.text
+ .as_ref()
+ .map(|it| Borrowed(it.borrow()))
+ // .unwrap_or("")
+ .unwrap_or_else(|| {
+ let text = self
+ .input
+ .as_ref()
+ .unwrap()
+ .get_text(self.token_start_char_index, self.get_char_index() - 1);
+ TF::get_data(text)
+ })
+ }
+
+ /// Used from lexer actions to override text of the token that will be emitted next
+ pub fn set_text(&mut self, _text: ::Owned) {
+ self.text = Some(_text);
+ }
+
+ // fn get_all_tokens(&mut self) -> Vec { unimplemented!() }
+
+ // fn get_char_error_display(&self, _c: char) -> String { unimplemented!() }
+
+ /// Add error listener
+ pub fn add_error_listener(&mut self, listener: Box>) {
+ self.error_listeners.borrow_mut().push(listener);
+ }
+
+ /// Remove and drop all error listeners
+ pub fn remove_error_listeners(&mut self) {
+ self.error_listeners.borrow_mut().clear();
+ }
+
+ /// Creates new lexer instance
+ pub fn new_base_lexer(
+ input: Input,
+ interpreter: LexerATNSimulator,
+ recog: T,
+ factory: &'input TF,
+ ) -> Self {
+ let mut lexer = Self {
+ interpreter: Some(Box::new(interpreter)),
+ input: Some(input),
+ recog,
+ factory,
+ error_listeners: RefCell::new(vec![Box::new(ConsoleErrorListener {})]),
+ token_start_char_index: 0,
+ token_start_line: 0,
+ token_start_column: 0,
+ current_pos: Rc::new(LexerPosition {
+ line: Cell::new(1),
+ char_position_in_line: Cell::new(0),
+ }),
+ token_type: super::token::TOKEN_INVALID_TYPE,
+ text: None,
+ token: None,
+ hit_eof: false,
+ channel: super::token::TOKEN_DEFAULT_CHANNEL,
+ // token_factory_source_pair: None,
+ mode_stack: Vec::new(),
+ mode: self::LEXER_DEFAULT_MODE,
+ };
+ let pos = lexer.current_pos.clone();
+ lexer.interpreter.as_mut().unwrap().current_pos = pos;
+ lexer
+ }
+}
+
+impl<'input, T, Input, TF> TokenAware<'input> for BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ type TF = TF;
+}
+
+impl<'input, T, Input, TF> TokenSource<'input> for BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ type TF = TF;
+ #[inline]
+ #[allow(unused_labels)]
+ fn next_token(&mut self) -> >::Tok {
+ assert!(self.input.is_some());
+
+ let _marker = self.input().mark();
+ 'outer: loop {
+ if self.hit_eof {
+ self.emit_eof();
+ break;
+ }
+ self.token = None;
+ self.channel = LEXER_DEFAULT_TOKEN_CHANNEL;
+ self.token_start_column = self
+ .interpreter
+ .as_ref()
+ .unwrap()
+ .get_char_position_in_line();
+ self.token_start_line = self.interpreter.as_ref().unwrap().get_line();
+ self.text = None;
+ let index = self.input().index();
+ self.token_start_char_index = index;
+
+ 'inner: loop {
+ self.token_type = TOKEN_INVALID_TYPE;
+ // detach from self, to allow self to be passed deeper
+ let mut interpreter = self.interpreter.take().unwrap();
+ // let mut input = self.input.take().unwrap();
+ let result = interpreter.match_token(self.mode, self);
+ self.interpreter = Some(interpreter);
+
+ let ttype = result.unwrap_or_else(|err| {
+ // println!("error, recovering");
+ notify_listeners(&mut self.error_listeners.borrow_mut(), &err, self);
+ self.interpreter
+ .as_mut()
+ .unwrap()
+ .recover(err, self.input.as_mut().unwrap());
+ LEXER_SKIP
+ });
+ // self.input = Some(input)
+
+ if self.input().la(1) == super::int_stream::EOF {
+ self.hit_eof = true;
+ }
+
+ if self.token_type == TOKEN_INVALID_TYPE {
+ self.token_type = ttype;
+ }
+
+ if self.token_type == LEXER_SKIP {
+ continue 'outer;
+ }
+
+ if self.token_type != LEXER_MORE {
+ break;
+ }
+ }
+
+ if self.token.is_none() {
+ self.emit();
+ break;
+ }
+ }
+ self.input().release(_marker);
+ self.token.take().unwrap()
+ }
+
+ fn get_line(&self) -> isize {
+ self.current_pos.line.get()
+ }
+
+ fn get_char_position_in_line(&self) -> isize {
+ self.current_pos.char_position_in_line.get()
+ }
+
+ fn get_input_stream(&mut self) -> Option<&mut dyn IntStream> {
+ match &mut self.input {
+ None => None,
+ Some(x) => Some(x as _),
+ }
+ }
+
+ fn get_source_name(&self) -> String {
+ self.input
+ .as_ref()
+ .map(|it| it.get_source_name())
+ .unwrap_or("".to_string())
+ }
+
+ // fn set_token_factory<'c: 'b>(&mut self, f: &'c TokenFactory) {
+ // self.factory = f;
+ // }
+
+ fn get_token_factory(&self) -> &'input TF {
+ self.factory
+ }
+}
+
+#[cold]
+#[inline(never)]
+fn notify_listeners<'input, T, Input, TF>(
+ liseners: &mut Vec>>>,
+ e: &ANTLRError,
+ lexer: &BaseLexer<'input, T, Input, TF>,
+) where
+ T: LexerRecog<'input, BaseLexer<'input, T, Input, TF>> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ let inner = lexer
+ .input
+ .as_ref()
+ .unwrap()
+ .get_text(lexer.token_start_char_index, lexer.get_char_index());
+ let text = format!(
+ "token recognition error at: '{}'",
+ TF::get_data(inner).to_display()
+ );
+ for listener in liseners.iter_mut() {
+ listener.syntax_error(
+ lexer,
+ None,
+ lexer.token_start_line,
+ lexer.token_start_column,
+ &text,
+ Some(e),
+ )
+ }
+}
+
+impl<'input, T, Input, TF> Lexer<'input> for BaseLexer<'input, T, Input, TF>
+where
+ T: LexerRecog<'input, Self> + 'static,
+ Input: CharStream,
+ TF: TokenFactory<'input>,
+{
+ type Input = Input;
+
+ fn input(&mut self) -> &mut Self::Input {
+ self.input.as_mut().unwrap()
+ }
+
+ fn set_channel(&mut self, v: isize) {
+ self.channel = v;
+ }
+
+ fn push_mode(&mut self, m: usize) {
+ self.mode_stack.push(self.mode);
+ self.mode = m;
+ }
+
+ fn pop_mode(&mut self) -> Option {
+ self.mode_stack.pop().map(|mode| {
+ self.mode = mode;
+ mode
+ })
+ }
+
+ fn set_type(&mut self, t: isize) {
+ self.token_type = t;
+ }
+
+ fn set_mode(&mut self, m: usize) {
+ self.mode = m;
+ }
+
+ fn more(&mut self) {
+ self.set_type(LEXER_MORE)
+ }
+
+ fn skip(&mut self) {
+ self.set_type(LEXER_SKIP)
+ }
+
+ fn reset(&mut self) {
+ unimplemented!()
+ }
+
+ fn get_interpreter(&self) -> Option<&LexerATNSimulator> {
+ self.interpreter.as_deref()
+ }
+}
diff --git a/runtime/Rust/src/lexer_action.rs b/runtime/Rust/src/lexer_action.rs
new file mode 100644
index 0000000000..01953e9016
--- /dev/null
+++ b/runtime/Rust/src/lexer_action.rs
@@ -0,0 +1,65 @@
+use std::hash::Hash;
+
+use crate::lexer::Lexer;
+
+pub(crate) const LEXER_ACTION_TYPE_CHANNEL: isize = 0;
+pub(crate) const LEXER_ACTION_TYPE_CUSTOM: isize = 1;
+pub(crate) const LEXER_ACTION_TYPE_MODE: isize = 2;
+pub(crate) const LEXER_ACTION_TYPE_MORE: isize = 3;
+pub(crate) const LEXER_ACTION_TYPE_POP_MODE: isize = 4;
+pub(crate) const LEXER_ACTION_TYPE_PUSH_MODE: isize = 5;
+pub(crate) const LEXER_ACTION_TYPE_SKIP: isize = 6;
+pub(crate) const LEXER_ACTION_TYPE_TYPE: isize = 7;
+
+#[derive(Clone, Eq, PartialEq, Debug, Hash)]
+pub(crate) enum LexerAction {
+ LexerChannelAction(isize),
+ LexerCustomAction {
+ rule_index: isize,
+ action_index: isize,
+ },
+ LexerModeAction(isize),
+ LexerMoreAction,
+ LexerPopModeAction,
+ LexerPushModeAction(isize),
+ LexerSkipAction,
+ LexerTypeAction(isize),
+ LexerIndexedCustomAction {
+ offset: isize,
+ action: Box,
+ },
+}
+
+impl LexerAction {
+ // fn get_action_type(&self) -> isize {
+ // unimplemented!()
+ //// unsafe {discriminant_value(self)} as isize
+ // }
+ pub fn is_position_dependent(&self) -> bool {
+ match self {
+ LexerAction::LexerCustomAction { .. }
+ | LexerAction::LexerIndexedCustomAction { .. } => true,
+ _ => false,
+ }
+ }
+ pub(crate) fn execute<'input, T: Lexer<'input>>(&self, lexer: &mut T) {
+ match self {
+ &LexerAction::LexerChannelAction(channel) => lexer.set_channel(channel),
+ &LexerAction::LexerCustomAction {
+ rule_index,
+ action_index,
+ } => {
+ lexer.action(None, rule_index, action_index);
+ }
+ &LexerAction::LexerModeAction(mode) => lexer.set_mode(mode as usize),
+ &LexerAction::LexerMoreAction => lexer.more(),
+ &LexerAction::LexerPopModeAction => {
+ lexer.pop_mode();
+ }
+ &LexerAction::LexerPushModeAction(mode) => lexer.push_mode(mode as usize),
+ &LexerAction::LexerSkipAction => lexer.skip(),
+ &LexerAction::LexerTypeAction(ty) => lexer.set_type(ty),
+ &LexerAction::LexerIndexedCustomAction { ref action, .. } => action.execute(lexer),
+ }
+ }
+}
diff --git a/runtime/Rust/src/lexer_action_executor.rs b/runtime/Rust/src/lexer_action_executor.rs
new file mode 100644
index 0000000000..4bfaaa73e6
--- /dev/null
+++ b/runtime/Rust/src/lexer_action_executor.rs
@@ -0,0 +1,86 @@
+use std::hash::{Hash, Hasher};
+
+use murmur3::murmur3_32::MurmurHasher;
+
+use crate::int_stream::IntStream;
+use crate::lexer::Lexer;
+use crate::lexer_action::LexerAction;
+use crate::lexer_action::LexerAction::LexerIndexedCustomAction;
+
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub(crate) struct LexerActionExecutor {
+ cached_hash: u64,
+ lexer_actions: Vec,
+}
+
+impl Hash for LexerActionExecutor {
+ fn hash(&self, state: &mut H) {
+ state.write_u64(self.cached_hash)
+ }
+}
+
+impl LexerActionExecutor {
+ pub(crate) fn new(lexer_actions: Vec) -> LexerActionExecutor {
+ // let mut hasher = ;
+ let cached_hash = lexer_actions
+ .iter()
+ .fold(MurmurHasher::default(), |mut acc, x| {
+ x.hash(&mut acc);
+ acc
+ })
+ .finish();
+ LexerActionExecutor {
+ lexer_actions,
+ cached_hash,
+ }
+ }
+
+ pub(crate) fn new_copy_append(
+ old: Option<&Self>,
+ lexer_action: LexerAction,
+ ) -> LexerActionExecutor {
+ let mut new = old
+ .cloned()
+ .unwrap_or_else(|| LexerActionExecutor::new(Vec::new()));
+ new.lexer_actions.push(lexer_action);
+ new
+ }
+
+ pub fn fix_offset_before_match(mut self, offset: isize) -> LexerActionExecutor {
+ for action in self.lexer_actions.iter_mut() {
+ match action {
+ LexerAction::LexerIndexedCustomAction { .. } => {}
+ _ => {
+ if action.is_position_dependent() {
+ *action = LexerIndexedCustomAction {
+ offset,
+ action: Box::new(action.clone()),
+ };
+ }
+ }
+ }
+ }
+ self
+ }
+
+ pub fn execute<'input>(&self, lexer: &mut impl Lexer<'input>, start_index: isize) {
+ let mut requires_seek = false;
+ let stop_index = lexer.input().index();
+ for action in self.lexer_actions.iter() {
+ //println!("executing action {:?}",action);
+ if let LexerAction::LexerIndexedCustomAction { offset, .. } = action {
+ lexer.input().seek(start_index + offset);
+ requires_seek = start_index + offset != stop_index;
+ } else if action.is_position_dependent() {
+ lexer.input().seek(stop_index);
+ requires_seek = false
+ }
+ action.execute(lexer);
+ }
+ if requires_seek {
+ lexer.input().seek(stop_index);
+ }
+ }
+
+ // fn hash(&self) -> int { unimplemented!() }
+}
diff --git a/runtime/Rust/src/lexer_atn_simulator.rs b/runtime/Rust/src/lexer_atn_simulator.rs
new file mode 100644
index 0000000000..2ab653e226
--- /dev/null
+++ b/runtime/Rust/src/lexer_atn_simulator.rs
@@ -0,0 +1,737 @@
+//! Implementation of lexer automata(DFA)
+use std::cell::{Cell, RefCell};
+
+use std::ops::Deref;
+use std::rc::Rc;
+use std::usize;
+
+use crate::atn::ATN;
+use crate::atn_config::{ATNConfig, ATNConfigType};
+use crate::atn_config_set::ATNConfigSet;
+use crate::atn_simulator::{BaseATNSimulator, IATNSimulator};
+use crate::atn_state::ATNStateType::RuleStopState;
+use crate::atn_state::{ATNState, ATNStateType};
+
+use crate::dfa::DFA;
+use crate::dfa_state::{DFAState, DFAStateRef};
+use crate::errors::ANTLRError;
+use crate::errors::ANTLRError::LexerNoAltError;
+use crate::int_stream::{IntStream, EOF};
+use crate::lexer::{Lexer, LexerPosition, LEXER_MAX_CHAR_VALUE, LEXER_MIN_CHAR_VALUE};
+use crate::lexer_action_executor::LexerActionExecutor;
+use crate::prediction_context::EMPTY_PREDICTION_CONTEXT;
+use crate::prediction_context::{
+ PredictionContext, PredictionContextCache, PREDICTION_CONTEXT_EMPTY_RETURN_STATE,
+};
+use crate::token::TOKEN_EOF;
+
+use crate::transition::{
+ ActionTransition, PredicateTransition, RuleTransition, Transition, TransitionType,
+};
+use crate::utils::cell_update;
+
+#[allow(missing_docs)]
+pub const ERROR_DFA_STATE_REF: DFAStateRef = usize::MAX;
+
+// todo rewrite this to be actually usable
+#[doc(hidden)]
+pub trait ILexerATNSimulator: IATNSimulator {
+ fn reset(&mut self);
+ fn match_token<'input>(
+ &mut self,
+ mode: usize,
+ lexer: &mut impl Lexer<'input>,
+ ) -> Result;
+ fn get_char_position_in_line(&self) -> isize;
+ fn set_char_position_in_line(&mut self, column: isize);
+ fn get_line(&self) -> isize;
+ fn set_line(&mut self, line: isize);
+ fn consume(&self, input: &mut T);
+ #[cold]
+ fn recover(&mut self, _re: ANTLRError, input: &mut impl IntStream) {
+ if input.la(1) != EOF {
+ self.consume(input)
+ }
+ }
+}
+
+/// Simple DFA implementation enough for lexer.
+#[derive(Debug)]
+pub struct LexerATNSimulator {
+ base: BaseATNSimulator,
+
+ // merge_cache: DoubleDict,
+ start_index: isize,
+ pub(crate) current_pos: Rc,
+ mode: usize,
+ prev_accept: SimState,
+ // lexer_action_executor: Option>,
+}
+
+impl ILexerATNSimulator for LexerATNSimulator {
+ fn reset(&mut self) {
+ self.prev_accept.reset()
+ }
+
+ fn match_token<'input>(
+ &mut self,
+ mode: usize,
+ // input:&mut dyn CharStream,
+ lexer: &mut impl Lexer<'input>,
+ ) -> Result {
+ self.mode = mode;
+ let mark = lexer.input().mark();
+ // println!("start matching on mode {}",mode);
+ let result = (|| {
+ self.start_index = lexer.input().index();
+ self.prev_accept.reset();
+ let temp = self.base.decision_to_dfa.clone();
+ let dfa = temp
+ .get(mode)
+ .ok_or_else(|| ANTLRError::IllegalStateError("invalid mode".into()))?;
+ let mut dfa = dfa.borrow_mut();
+
+ let s0 = dfa.s0;
+ match s0 {
+ None => self.match_atn(lexer, &mut dfa),
+ Some(s0) => self.exec_atn(s0, lexer, &mut dfa),
+ // Err(_) => panic!("dfa rwlock error")
+ }
+ })();
+ lexer.input().release(mark);
+ result
+ }
+
+ fn get_char_position_in_line(&self) -> isize {
+ self.current_pos.char_position_in_line.get()
+ }
+
+ fn set_char_position_in_line(&mut self, column: isize) {
+ self.current_pos.char_position_in_line.set(column)
+ }
+
+ fn get_line(&self) -> isize {
+ self.current_pos.line.get()
+ }
+
+ fn set_line(&mut self, line: isize) {
+ self.current_pos.char_position_in_line.set(line)
+ }
+
+ fn consume(&self, _input: &mut T) {
+ let ch = _input.la(1);
+ if ch == '\n' as isize {
+ cell_update(&self.current_pos.line, |x| x + 1);
+ self.current_pos.char_position_in_line.set(0);
+ } else {
+ cell_update(&self.current_pos.char_position_in_line, |x| x + 1);
+ }
+ _input.consume();
+ }
+
+ // fn get_recog(&self) -> Rc>>{
+ // Rc::clone(&self.recog)
+ // }
+}
+
+impl IATNSimulator for LexerATNSimulator {
+ fn shared_context_cache(&self) -> &PredictionContextCache {
+ self.base.shared_context_cache()
+ }
+
+ fn atn(&self) -> &ATN {
+ self.base.atn()
+ }
+
+ fn decision_to_dfa(&self) -> &Vec> {
+ self.base.decision_to_dfa()
+ }
+}
+
+#[allow(missing_docs)]
+pub const MIN_DFA_EDGE: isize = 0;
+#[allow(missing_docs)]
+pub const MAX_DFA_EDGE: isize = 127;
+
+impl LexerATNSimulator {
+ /// Creates `LexerATNSimulator` instance which creates DFA over `atn`
+ ///
+ /// Called from generated parser.
+ pub fn new_lexer_atnsimulator(
+ atn: Rc,
+ decision_to_dfa: Rc>>,
+ shared_context_cache: Rc,
+ ) -> LexerATNSimulator {
+ LexerATNSimulator {
+ base: BaseATNSimulator::new_base_atnsimulator(
+ atn,
+ decision_to_dfa,
+ shared_context_cache,
+ ),
+ start_index: 0,
+ current_pos: Rc::new(LexerPosition {
+ line: Cell::new(0),
+ char_position_in_line: Cell::new(0),
+ }),
+ mode: 0,
+ prev_accept: SimState::new(),
+ // lexer_action_executor: None,
+ }
+ }
+
+ // fn copy_state(&self, _simulator: &mut LexerATNSimulator) {
+ // unimplemented!()
+ // }
+
+ #[cold]
+ fn match_atn<'input>(
+ &mut self,
+ lexer: &mut impl Lexer<'input>,
+ dfa: &mut DFA,
+ ) -> Result {
+ // let start_state = self.atn().mode_to_start_state.get(self.mode as usize).ok_or(ANTLRError::IllegalStateError("invalid mode".into()))?;
+ let atn = self.atn();
+ let start_state = *atn
+ .mode_to_start_state
+ .get(self.mode)
+ .ok_or_else(|| ANTLRError::IllegalStateError("invalid mode".into()))?;
+
+ let _old_mode = self.mode;
+ let mut s0_closure = self.compute_start_state(atn.states[start_state].as_ref(), lexer);
+ let _supress_edge = s0_closure.has_semantic_context();
+ s0_closure.set_has_semantic_context(false);
+
+ let next_state = self.add_dfastate(dfa, s0_closure);
+ if !_supress_edge {
+ dfa.s0 = Some(next_state);
+ }
+
+ self.exec_atn(next_state, lexer, dfa)
+ }
+
+ fn exec_atn<'input>(
+ &mut self,
+ // input: &'a mut dyn CharStream,
+ ds0: DFAStateRef,
+ lexer: &mut impl Lexer<'input>,
+ dfa: &mut DFA,
+ ) -> Result {
+ // if self.get_dfa().states.read().unwrap().get(ds0).unwrap().is_accept_state{
+ self.capture_sim_state(&dfa, lexer.input(), ds0);
+ // }
+
+ let mut symbol = lexer.input().la(1);
+ let mut s = ds0;
+ loop {
+ let target = Self::get_existing_target_state(dfa, s, symbol);
+ let target = target.unwrap_or_else(|| self.compute_target_state(dfa, s, symbol, lexer));
+ // let target = dfastates.deref().get(s).unwrap() ;x
+
+ if target == ERROR_DFA_STATE_REF {
+ break;
+ }
+ // println!(" --- target computed {:?}", self.get_dfa().states.read().unwrap()[target].configs.configs.iter().map(|it|it.get_state()).collect::>());
+
+ if symbol != EOF {
+ self.consume(lexer.input());
+ }
+
+ if self.capture_sim_state(dfa, lexer.input(), target) {
+ if symbol == EOF {
+ break;
+ }
+ }
+
+ symbol = lexer.input().la(1);
+
+ s = target;
+ }
+ // let _last = self.get_dfa().states.read().get(s).unwrap();
+
+ self.fail_or_accept(symbol, lexer, dfa)
+ }
+
+ #[inline(always)]
+ fn get_existing_target_state(dfa: &DFA, _s: DFAStateRef, t: isize) -> Option {
+ // if t < MIN_DFA_EDGE || t > MAX_DFA_EDGE {
+ // return None;
+ // }
+
+ dfa.states[_s]
+ .edges
+ .get((t - MIN_DFA_EDGE) as usize)
+ .and_then(|x| match x {
+ 0 => None,
+ x => Some(x),
+ })
+ .copied()
+ }
+
+ #[cold]
+ fn compute_target_state<'input>(
+ &self,
+ dfa: &mut DFA,
+ s: DFAStateRef,
+ _t: isize,
+ lexer: &mut impl Lexer<'input>,
+ ) -> DFAStateRef {
+ let mut reach = ATNConfigSet::new_ordered();
+ self.get_reachable_config_set(&dfa.states[s].configs, &mut reach, _t, lexer);
+ // println!(" --- target computed {:?}", reach.configs.iter().map(|it|it.get_state()).collect::>());
+
+ // let mut states = dfa_mut.states;
+ if reach.is_empty() {
+ if !reach.has_semantic_context() {
+ self.add_dfaedge(&mut dfa.states[s], _t, ERROR_DFA_STATE_REF);
+ }
+ return ERROR_DFA_STATE_REF;
+ }
+
+ let supress_edge = reach.has_semantic_context();
+ reach.set_has_semantic_context(false);
+ let to = self.add_dfastate(dfa, Box::new(reach));
+ if !supress_edge {
+ let from = &mut dfa.states[s];
+ self.add_dfaedge(from, _t, to);
+ }
+ // println!("target state computed from {:?} to {:?} on symbol {}", _s, to, char::try_from(_t as u32).unwrap());
+ to
+ // states.get(to).unwrap()
+ }
+
+ fn get_reachable_config_set<'input>(
+ &self,
+ // _states: &V,
+ // _input: &mut dyn CharStream,
+ _closure: &ATNConfigSet,
+ _reach: &mut ATNConfigSet,
+ _t: isize,
+ lexer: &mut impl Lexer<'input>,
+ ) {
+ let mut skip_alt = 0;
+ // println!(" --- source {:?}", _closure.configs.iter().map(|it|it.get_state()).collect::>());
+ for config in _closure.get_items() {
+ let current_alt_reached_accept_state = config.get_alt() == skip_alt;
+ if current_alt_reached_accept_state {
+ if let ATNConfigType::LexerATNConfig {
+ passed_through_non_greedy_decision: true,
+ ..
+ } = config.get_type()
+ {
+ continue;
+ }
+ }
+ let atn_state = self.atn().states[config.get_state()].as_ref();
+ for tr in atn_state.get_transitions() {
+ if let Some(target) = tr.get_reachable_target(_t) {
+ let exec = config.get_lexer_executor().map(|x| {
+ x.clone()
+ .fix_offset_before_match(lexer.input().index() - self.start_index)
+ });
+
+ let new = config.cloned_with_new_exec(self.atn().states[target].as_ref(), exec);
+ if self.closure(
+ new,
+ _reach,
+ current_alt_reached_accept_state,
+ true,
+ _t == EOF,
+ lexer,
+ ) {
+ skip_alt = config.get_alt();
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // fn get_reachable_target(&self, states: &T, _trans: &Transition, _t: isize) -> &ATNState
+ // where
+ // T: Deref>,
+ // {
+ // unimplemented!()
+ // }
+
+ fn fail_or_accept<'input>(
+ &mut self,
+ _t: isize,
+ lexer: &mut impl Lexer<'input>,
+ dfa: &DFA,
+ ) -> Result {
+ // println!("fail_or_accept");
+ if let Some(state) = self.prev_accept.dfa_state {
+ // let lexer_action_executor;
+ self.accept(lexer.input());
+
+ let prediction = {
+ let dfa_state_prediction = &dfa.states[state];
+ // println!("accepted, prediction = {}, on dfastate {}", dfa_state_prediction.prediction, dfa_state_prediction.state_number);
+ // lexer_action_executor = dfa_state_prediction.lexer_action_executor.clone();
+ // let recog = self.recog.clone();
+ if let Some(x) = dfa_state_prediction.lexer_action_executor.as_ref() {
+ x.execute(lexer, self.start_index)
+ }
+
+ dfa_state_prediction.prediction
+ };
+
+ // self.lexer_action_executor = lexer_action_executor;
+ Ok(prediction)
+ } else {
+ if _t == EOF && lexer.input().index() == self.start_index {
+ return Ok(TOKEN_EOF);
+ }
+ Err(LexerNoAltError {
+ start_index: self.start_index,
+ })
+ }
+ }
+
+ fn accept<'input>(&mut self, input: &mut impl IntStream) {
+ input.seek(self.prev_accept.index);
+ self.current_pos.line.set(self.prev_accept.line);
+ self.current_pos
+ .char_position_in_line
+ .set(self.prev_accept.column);
+ }
+
+ fn compute_start_state<'input>(
+ &self,
+ _p: &dyn ATNState,
+ lexer: &mut impl Lexer<'input>,
+ ) -> Box {
+ // let initial_context = &EMPTY_PREDICTION_CONTEXT;
+ let mut config_set = ATNConfigSet::new_ordered();
+
+ for (i, tr) in _p.get_transitions().iter().enumerate() {
+ let target = tr.get_target();
+ let atn_config = ATNConfig::new_lexer_atnconfig6(
+ target,
+ (i + 1) as isize,
+ EMPTY_PREDICTION_CONTEXT.with(|x| x.clone()),
+ );
+ self.closure(atn_config, &mut config_set, false, false, false, lexer);
+ }
+
+ Box::new(config_set)
+ }
+
+ fn closure<'input>(
+ &self,
+ // _input: &mut dyn CharStream,
+ mut config: ATNConfig,
+ _configs: &mut ATNConfigSet,
+ mut _current_alt_reached_accept_state: bool,
+ _speculative: bool,
+ _treat_eofas_epsilon: bool,
+ lexer: &mut impl Lexer<'input>,
+ ) -> bool {
+ // let config = &config;
+ let atn = self.atn();
+ let state = atn.states[config.get_state()].as_ref();
+ // println!("closure called on state {} {:?}", state.get_state_number(), state.get_state_type());
+
+ if let ATNStateType::RuleStopState {} = state.get_state_type() {
+ // println!("reached rulestopstate {}",state.get_state_number());
+ if config.get_context().map(|x| x.has_empty_path()) != Some(false) {
+ if config.get_context().map(|x| x.is_empty()) != Some(false) {
+ _configs.add(Box::new(config));
+ return true;
+ } else {
+ _configs.add(Box::new(config.cloned_with_new_ctx(
+ state,
+ Some(EMPTY_PREDICTION_CONTEXT.with(|x| x.clone())),
+ )));
+ _current_alt_reached_accept_state = true
+ }
+ }
+
+ if config.get_context().map(|x| x.is_empty()) == Some(false) {
+ let ctx = config.take_context();
+ for i in 0..ctx.length() {
+ if ctx.get_return_state(i) != PREDICTION_CONTEXT_EMPTY_RETURN_STATE {
+ let new_ctx = ctx.get_parent(i).cloned();
+ let return_state =
+ self.atn().states[ctx.get_return_state(i) as usize].as_ref();
+ let next_config = config.cloned_with_new_ctx(return_state, new_ctx);
+ _current_alt_reached_accept_state = self.closure(
+ next_config,
+ _configs,
+ _current_alt_reached_accept_state,
+ _speculative,
+ _treat_eofas_epsilon,
+ lexer,
+ )
+ }
+ }
+ }
+
+ return _current_alt_reached_accept_state;
+ }
+
+ if !state.has_epsilon_only_transitions() {
+ if let ATNConfigType::LexerATNConfig {
+ passed_through_non_greedy_decision,
+ ..
+ } = config.config_type
+ {
+ if !_current_alt_reached_accept_state || !passed_through_non_greedy_decision {
+ _configs.add(Box::new(config.clone()));
+ }
+ }
+ }
+
+ let state = atn.states[config.get_state()].as_ref();
+
+ for tr in state.get_transitions() {
+ let c = self.get_epsilon_target(
+ &mut config,
+ tr.as_ref(),
+ _configs,
+ _speculative,
+ _treat_eofas_epsilon,
+ lexer,
+ );
+
+ if let Some(c) = c {
+ _current_alt_reached_accept_state = self.closure(
+ c,
+ _configs,
+ _current_alt_reached_accept_state,
+ _speculative,
+ _treat_eofas_epsilon,
+ lexer,
+ );
+ }
+ }
+
+ _current_alt_reached_accept_state
+ }
+
+ fn get_epsilon_target<'input>(
+ &self,
+ // _input: &mut dyn CharStream,
+ _config: &mut ATNConfig,
+ _trans: &dyn Transition,
+ _configs: &mut ATNConfigSet,
+ _speculative: bool,
+ _treat_eofas_epsilon: bool,
+ lexer: &mut impl Lexer<'input>,
+ ) -> Option {
+ let mut result = None;
+ let target = self.atn().states.get(_trans.get_target()).unwrap().as_ref();
+ // println!("epsilon target for {:?} is {:?}", _trans, target.get_state_type());
+ match _trans.get_serialization_type() {
+ TransitionType::TRANSITION_EPSILON => {
+ result = Some(_config.cloned(target));
+ }
+ TransitionType::TRANSITION_RULE => {
+ let rt = _trans.cast::();
+ //println!("rule transition follow state{}", rt.follow_state);
+ let pred_ctx = PredictionContext::new_singleton(
+ Some(_config.get_context().unwrap().clone()),
+ rt.follow_state as isize,
+ );
+ result = Some(_config.cloned_with_new_ctx(target, Some(pred_ctx.into())));
+ }
+ TransitionType::TRANSITION_PREDICATE => {
+ let tr = _trans.cast::();
+ _configs.set_has_semantic_context(true);
+ if self.evaluate_predicate(tr.rule_index, tr.pred_index, _speculative, lexer) {
+ result = Some(_config.cloned(target));
+ }
+ }
+ TransitionType::TRANSITION_ACTION => {
+ //println!("action transition");
+ if _config.get_context().map(|x| x.has_empty_path()) != Some(false) {
+ if let ATNConfigType::LexerATNConfig {
+ lexer_action_executor,
+ ..
+ } = _config.get_type()
+ {
+ let tr = _trans.cast::();
+ let lexer_action =
+ self.atn().lexer_actions[tr.action_index as usize].clone();
+ //dbg!(&lexer_action);
+ let lexer_action_executor = LexerActionExecutor::new_copy_append(
+ lexer_action_executor.as_deref(),
+ lexer_action,
+ );
+ result =
+ Some(_config.cloned_with_new_exec(target, Some(lexer_action_executor)))
+ }
+ } else {
+ result = Some(_config.cloned(target));
+ }
+ }
+ TransitionType::TRANSITION_RANGE
+ | TransitionType::TRANSITION_SET
+ | TransitionType::TRANSITION_ATOM => {
+ if _treat_eofas_epsilon {
+ if _trans.matches(EOF, LEXER_MIN_CHAR_VALUE, LEXER_MAX_CHAR_VALUE) {
+ let target = self.atn().states[_trans.get_target()].as_ref();
+ result = Some(_config.cloned(target));
+ }
+ }
+ }
+ TransitionType::TRANSITION_WILDCARD => {}
+ TransitionType::TRANSITION_NOTSET => {}
+ TransitionType::TRANSITION_PRECEDENCE => {
+ panic!("precedence predicates are not supposed to be in lexer");
+ }
+ }
+
+ result
+ }
+
+ fn evaluate_predicate<'input, T: Lexer<'input>>(
+ &self,
+ // input: &mut dyn CharStream,
+ rule_index: isize,
+ pred_index: isize,
+ speculative: bool,
+ lexer: &mut T,
+ ) -> bool {
+ if !speculative {
+ return lexer.sempred(None, rule_index, pred_index);
+ }
+
+ let saved_column = self.current_pos.char_position_in_line.get();
+ let saved_line = self.current_pos.line.get();
+ let index = lexer.input().index();
+ let marker = lexer.input().mark();
+ self.consume(lexer.input());
+
+ let result = lexer.sempred(None, rule_index, pred_index);
+
+ self.current_pos.char_position_in_line.set(saved_column);
+ self.current_pos.line.set(saved_line);
+ lexer.input().seek(index);
+ lexer.input().release(marker);
+ return result;
+ }
+
+ fn capture_sim_state(
+ &mut self,
+ dfa: &DFA,
+ input: &impl IntStream,
+ dfa_state: DFAStateRef,
+ ) -> bool {
+ if dfa.states[dfa_state].is_accept_state {
+ self.prev_accept = SimState {
+ index: input.index(),
+ line: self.current_pos.line.get(),
+ column: self.current_pos.char_position_in_line.get(),
+ dfa_state: Some(dfa_state),
+ };
+ // self.prev_accept.index = input.index();
+ // self.prev_accept.dfa_state = Some(dfa_state);
+ return true;
+ }
+ false
+ }
+
+ fn add_dfaedge(&self, _from: &mut DFAState, t: isize, _to: DFAStateRef) {
+ if t < MIN_DFA_EDGE || t > MAX_DFA_EDGE {
+ return;
+ }
+
+ if _from.edges.len() < (MAX_DFA_EDGE - MIN_DFA_EDGE + 1) as usize {
+ _from
+ .edges
+ .resize((MAX_DFA_EDGE - MIN_DFA_EDGE + 1) as usize, 0);
+ }
+ _from.edges[(t - MIN_DFA_EDGE) as usize] = _to;
+ }
+
+ fn add_dfastate(&self, dfa: &mut DFA, _configs: Box) -> DFAStateRef
+// where
+ // V: DerefMut>,
+ {
+ assert!(!_configs.has_semantic_context());
+ let mut dfastate = DFAState::new_dfastate(usize::MAX, _configs);
+ let rule_index = dfastate
+ .configs //_configs
+ .get_items()
+ .find(|c| RuleStopState == *self.atn().states[c.get_state()].get_state_type())
+ .map(|c| {
+ let rule_index = self.atn().states[c.get_state()].get_rule_index();
+
+ //println!("accepted rule {} on state {}",rule_index,c.get_state());
+ (
+ self.atn().rule_to_token_type[rule_index],
+ c.get_lexer_executor()
+ .map(LexerActionExecutor::clone)
+ .map(Box::new),
+ )
+ });
+
+ if let Some((prediction, exec)) = rule_index {
+ dfastate.prediction = prediction;
+ dfastate.lexer_action_executor = exec;
+ dfastate.is_accept_state = true;
+ }
+
+ let states = &mut dfa.states;
+ let key = dfastate.default_hash();
+ let dfastate_index = *dfa
+ .states_map
+ .entry(key)
+ .or_insert_with(|| {
+ dfastate.state_number = states.deref().len();
+ dfastate.configs.set_read_only(true);
+ let i = dfastate.state_number;
+ //println!("inserting new DFA state {} with size {}", i, dfastate.configs.length());
+ states.push(dfastate);
+ vec![i]
+ })
+ .first()
+ .unwrap();
+
+ //println!("new DFA state {}", dfastate_index);
+
+ // dfa.states.write().unwrap().get_mut(*dfastate_index).unwrap()
+ dfastate_index
+ }
+
+ /// Returns current DFA that is currently used.
+ pub fn get_dfa(&self) -> &RefCell {
+ &self.decision_to_dfa()[self.mode]
+ }
+
+ /// Returns current DFA for particular lexer mode
+ pub fn get_dfa_for_mode(&self, mode: usize) -> &RefCell {
+ &self.decision_to_dfa()[mode]
+ }
+
+ // fn get_token_name(&self, _tt: isize) -> String { unimplemented!() }
+
+ // fn reset_sim_state(_sim: &mut SimState) { unimplemented!() }
+}
+
+#[derive(Debug)]
+pub(crate) struct SimState {
+ index: isize,
+ line: isize,
+ column: isize,
+ dfa_state: Option,
+}
+
+impl SimState {
+ pub(crate) fn new() -> SimState {
+ SimState {
+ index: -1,
+ line: 0,
+ column: -1,
+ dfa_state: None,
+ }
+ }
+
+ pub(crate) fn reset(&mut self) {
+ // self.index = -1;
+ // self.line = 0;
+ // self.column = -1;
+ self.dfa_state = None;
+ }
+}
diff --git a/runtime/Rust/src/lib.rs b/runtime/Rust/src/lib.rs
new file mode 100644
index 0000000000..9b8f720d4f
--- /dev/null
+++ b/runtime/Rust/src/lib.rs
@@ -0,0 +1,242 @@
+#![crate_type = "lib"]
+// #![feature(try_blocks)]
+//#![feature(nll)]
+// #![feature(raw)]
+// #![feature(is_sorted)]
+// #![feature(cell_update)]
+// #![feature(get_mut_unchecked)]
+// #![feature(specialization)]
+// #![feature(coerce_unsized)]
+// #![feature(associated_type_defaults)]
+// #![feature(generic_associated_types)]
+// #![feature(crate_visibility_modifier)]
+// #![feature(generic_associated_types)]
+#![warn(rust_2018_idioms)]
+//#![warn(missing_docs)] // warn if there is missing docs
+#![warn(missing_debug_implementations)]
+#![warn(trivial_numeric_casts)]
+// #![allow(incomplete_features)]
+
+//! # Antlr4 runtime
+//!
+//! This is a Rust runtime for [ANTLR4] parser generator.
+//! It is required to use parsers and lexers generated by [ANTLR4] parser generator
+//!
+//! This documentation refers to particular api used by generated parsers,lexers and syntax trees.
+//!
+//! For info on what is [ANTLR4] and how to generate parser please refer to:
+//! - [ANTLR4] main repository
+//! - [README] for Rust target
+//!
+//! [ANTLR4]: https://github.com/antlr/antlr4
+//! [README]: https://github.com/rrevenantt/antlr4rust/blob/master/README.md
+//!
+//! ### Customization
+//!
+//! All input and output can be customized and optimized for particular usecase by implementing
+//! related trait. Each of them already has different implementations that should be enough for most cases.
+//! For more details see docs for corresponding trait and containing module.
+//!
+//! Currently available are:
+//! - [`CharStream`] - Lexer input, stream of char values with slicing support
+//! - [`TokenFactory`] - How lexer creates tokens.
+//! - [`Token`] - Element of [`TokenStream`]
+//! - [`TokenStream`] - Parser input, created from lexer or other token source.
+//! - [`ParserRuleContext`] - Node of created syntax tree.
+//!
+//! ### Zero-copy and lifetimes
+//!
+//! This library supports full zero-copy parsing. To allow this
+//! `'input` lifetime is used everywhere inside to refer to data borrowed by parser/lexer.
+//! Besides references to input it also can be [`TokenFactory`] if it returns references to tokens.
+//! See [`ArenaFactory`] as an example of such behavior.
+//! It allocates tokens in [`Arena`](typed_arena::Arena) and returns references.
+//!
+//! Using generated parse tree you should be careful to not require longer lifetime after the parsing.
+//! If that's the case you will likely get "does not live long enough" error on the input string,
+//! despite actual lifetime conflict is happening much later
+//!
+//! If you need to generate owned versions of parse tree or you want simpler usage,
+//! you can opt out zero-copy by requiring `'input` to be static. In this case it is easier to also use
+//! types that contains "owned" in their name or constructor function like `OwningTokenFactory`
+//! or `InputStream::new_owned()`.
+//!
+//! ### Visitors and Listeners
+//!
+//! Parse listeners must outlive `'input` because they have to be stored inside of the parser.
+//! It still allows to retrieve borrowed data from parse tree which should be enough to cover 99% use cases.
+//!
+//! `ParseTreeWalker` can accept listeners with arbitrary lifetime.
+//!
+//! `Visitor`s also can have arbitrary lifetime.
+//!
+//! ### Downcasting
+//!
+//! Rule context trait object support downcasting even for zero-copy case.
+//! Also generic types(currently these are `H:ErrorStrategy` and `I:`[`TokenStream`]) that you can
+//! access in generated parser from embedded actions also can be downcasted to concrete types.
+//! To do it `TidExt::downcast_*` extension methods should be used.
+//!
+//! [`CharStream`]: crate::char_stream::CharStream
+//! [`TokenFactory`]: crate::token_factory::TokenFactory
+//! [`ArenaFactory`]: crate::token_factory::ArenaFactory
+//! [`Token`]: crate::token::Token
+//! [`TokenStream`]: crate::token_stream::TokenStream
+//! [`ParserRuleContext`]: crate::parser_rule_context::ParserRuleContext
+
+#[macro_use]
+extern crate lazy_static;
+
+#[doc(hidden)]
+pub use lazy_static::lazy_static;
+
+#[doc(hidden)]
+#[doc(hidden)]
+pub use better_any::{tid, Tid, TidAble, TidExt};
+
+#[doc(inline)]
+pub use error_strategy::{BailErrorStrategy, DefaultErrorStrategy, ErrorStrategy};
+
+pub use input_stream::InputStream;
+
+#[doc(inline)]
+pub use lexer::{BaseLexer, Lexer};
+#[doc(inline)]
+pub use parser::{BaseParser, ListenerId, Parser};
+#[doc(inline)]
+pub use token_source::TokenSource;
+//extern crate uuid;
+#[doc(hidden)]
+pub use prediction_context::PredictionContextCache;
+
+#[doc(inline)]
+pub use prediction_mode::PredictionMode;
+
+#[doc(hidden)]
+pub mod atn_config;
+#[doc(hidden)]
+pub mod atn_simulator;
+pub mod int_stream;
+mod lexer_action;
+mod ll1_analyzer;
+#[doc(hidden)]
+pub mod recognizer;
+pub mod token_factory;
+//pub mod tokenstream_rewriter;
+#[doc(hidden)]
+pub mod atn_deserialization_options;
+#[doc(hidden)]
+pub mod atn_state;
+pub mod char_stream;
+#[doc(hidden)]
+pub mod dfa_state;
+#[doc(hidden)]
+pub mod interval_set;
+pub mod parser_rule_context;
+mod prediction_context;
+#[doc(hidden)]
+pub mod semantic_context;
+mod token_source;
+pub mod token_stream;
+//pub mod trace_listener;
+#[doc(hidden)]
+pub mod dfa;
+#[doc(hidden)]
+pub mod transition;
+pub mod tree;
+//pub mod file_stream;
+#[doc(hidden)]
+pub mod atn;
+#[doc(hidden)]
+pub mod atn_config_set;
+#[doc(hidden)]
+pub mod atn_deserializer;
+pub mod common_token_stream;
+mod dfa_serializer;
+pub mod error_listener;
+pub mod error_strategy;
+pub mod errors;
+pub mod input_stream;
+pub mod lexer;
+#[doc(hidden)]
+pub mod lexer_action_executor;
+pub mod lexer_atn_simulator;
+pub mod parser;
+pub mod parser_atn_simulator;
+mod prediction_mode;
+pub mod token;
+pub mod trees;
+mod utils;
+//pub mod tokenstream_rewriter_test;
+mod atn_type;
+// mod context_factory;
+pub mod rule_context;
+pub mod vocabulary;
+//#[cfg(test)]
+// tests are either integration tests in "tests" foulder or unit tests in some modules
+
+use std::rc::Rc;
+/// Stable workaround for CoerceUnsized
+// #[doc(hidden)]
+pub trait CoerceFrom {
+ fn coerce_rc(from: Rc) -> Rc;
+ fn coerce_box(from: Box) -> Box;
+ fn coerce_ref(from: &T) -> &Self;
+ fn coerce_mut(from: &mut T) -> &mut Self;
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! coerce_from {
+ ($lt:lifetime : $p:path) => {
+ const _: () = {
+ use std::rc::Rc;
+ impl<$lt, T> $crate::CoerceFrom for dyn $p + $lt
+ where
+ T: $p + $lt,
+ {
+ fn coerce_rc(from: Rc) -> Rc {
+ from as _
+ }
+ fn coerce_box(from: Box) -> Box {
+ from as _
+ }
+ fn coerce_ref(from: &T) -> &Self {
+ from as _
+ }
+ fn coerce_mut(from: &mut T) -> &mut Self {
+ from as _
+ }
+ }
+ };
+ };
+}
+
+/// Stable workaround for CoerceUnsized
+// #[doc(hidden)]
+pub trait CoerceTo {
+ fn coerce_rc_to(self: Rc) -> Rc;
+ fn coerce_box_to(self: Box) -> Box;
+ fn coerce_ref_to(self: &Self) -> &T;
+ fn coerce_mut_to(self: &mut Self) -> &mut T;
+}
+
+impl CoerceTo for X
+where
+ T: CoerceFrom,
+{
+ fn coerce_rc_to(self: Rc) -> Rc {
+ T::coerce_rc(self)
+ }
+ fn coerce_box_to(self: Box) -> Box {
+ T::coerce_box(self)
+ }
+
+ fn coerce_ref_to(self: &Self) -> &T {
+ T::coerce_ref(self)
+ }
+
+ fn coerce_mut_to(self: &mut Self) -> &mut T {
+ T::coerce_mut(self)
+ }
+}
diff --git a/runtime/Rust/src/ll1_analyzer.rs b/runtime/Rust/src/ll1_analyzer.rs
new file mode 100644
index 0000000000..8093cb47d8
--- /dev/null
+++ b/runtime/Rust/src/ll1_analyzer.rs
@@ -0,0 +1,187 @@
+use std::collections::HashSet;
+use std::ops::Deref;
+use std::rc::Rc;
+
+use bit_set::BitSet;
+
+use crate::atn::ATN;
+use crate::atn_config::ATNConfig;
+use crate::atn_state::{ATNState, ATNStateType};
+use crate::interval_set::IntervalSet;
+use crate::parser::ParserNodeType;
+use crate::prediction_context::PredictionContext;
+use crate::prediction_context::EMPTY_PREDICTION_CONTEXT;
+use crate::token::{TOKEN_EOF, TOKEN_EPSILON, TOKEN_INVALID_TYPE, TOKEN_MIN_USER_TOKEN_TYPE};
+use crate::transition::TransitionType::TRANSITION_NOTSET;
+use crate::transition::{RuleTransition, TransitionType};
+
+pub struct LL1Analyzer<'a> {
+ atn: &'a ATN,
+}
+
+impl LL1Analyzer<'_> {
+ pub fn new(atn: &ATN) -> LL1Analyzer<'_> {
+ LL1Analyzer { atn }
+ }
+
+ // fn get_decision_lookahead(&self, _s: &dyn ATNState) -> &Vec { unimplemented!() }
+
+ pub fn look<'input, Ctx: ParserNodeType<'input>>(
+ &self,
+ s: &dyn ATNState,
+ stop_state: Option<&dyn ATNState>,
+ ctx: Option<&Ctx::Type>,
+ ) -> IntervalSet {
+ let mut r = IntervalSet::new();
+ let look_ctx = ctx.map(|x| PredictionContext::from_rule_context::(self.atn, x));
+ let mut looks_busy: HashSet = HashSet::new();
+ let mut called_rule_stack = BitSet::new();
+ self.look_work(
+ s,
+ stop_state,
+ look_ctx,
+ &mut r,
+ &mut looks_busy,
+ &mut called_rule_stack,
+ true,
+ true,
+ );
+ r
+ }
+
+ fn look_work(
+ &self,
+ // atn:&ATN,
+ s: &dyn ATNState,
+ stop_state: Option<&dyn ATNState>,
+ ctx: Option>,
+ look: &mut IntervalSet,
+ look_busy: &mut HashSet,
+ called_rule_stack: &mut BitSet,
+ see_thru_preds: bool,
+ add_eof: bool,
+ ) {
+ let c = ATNConfig::new(s.get_state_number(), 0, ctx.clone());
+ if !look_busy.insert(c) {
+ return;
+ }
+
+ if Some(s.get_state_number()) == stop_state.map(|x| x.get_state_number()) {
+ match ctx {
+ None => {
+ look.add_one(TOKEN_EPSILON);
+ return;
+ }
+ Some(x) if x.is_empty() && add_eof => {
+ look.add_one(TOKEN_EOF);
+ return;
+ }
+ _ => {}
+ }
+ }
+
+ if let ATNStateType::RuleStopState = s.get_state_type() {
+ match ctx {
+ None => {
+ look.add_one(TOKEN_EPSILON);
+ return;
+ }
+ Some(x) if x.is_empty() && add_eof => {
+ look.add_one(TOKEN_EOF);
+ return;
+ }
+ Some(ctx) if EMPTY_PREDICTION_CONTEXT.with(|x| &ctx != &*x) => {
+ let removed = called_rule_stack.contains(s.get_rule_index());
+ called_rule_stack.remove(s.get_rule_index());
+ for i in 0..ctx.length() {
+ self.look_work(
+ self.atn.states[ctx.get_return_state(i) as usize].as_ref(),
+ stop_state,
+ ctx.get_parent(i).cloned(),
+ look,
+ look_busy,
+ called_rule_stack,
+ see_thru_preds,
+ add_eof,
+ )
+ }
+ if removed {
+ called_rule_stack.insert(s.get_rule_index());
+ }
+
+ return;
+ }
+ _ => {}
+ }
+ }
+
+ for tr in s.get_transitions() {
+ let target = self.atn.states[tr.get_target()].as_ref();
+ match tr.get_serialization_type() {
+ TransitionType::TRANSITION_RULE => {
+ let rule_tr = tr.as_ref().cast::();
+ if called_rule_stack.contains(target.get_rule_index()) {
+ continue;
+ }
+
+ let new_ctx = Rc::new(PredictionContext::new_singleton(
+ ctx.clone(),
+ rule_tr.follow_state as isize,
+ ));
+
+ called_rule_stack.insert(target.get_rule_index());
+ self.look_work(
+ target,
+ stop_state,
+ Some(new_ctx),
+ look,
+ look_busy,
+ called_rule_stack,
+ see_thru_preds,
+ add_eof,
+ );
+ called_rule_stack.remove(target.get_rule_index());
+ }
+ TransitionType::TRANSITION_PREDICATE | TransitionType::TRANSITION_PRECEDENCE => {
+ if see_thru_preds {
+ self.look_work(
+ target,
+ stop_state,
+ ctx.clone(),
+ look,
+ look_busy,
+ called_rule_stack,
+ see_thru_preds,
+ add_eof,
+ )
+ } else {
+ look.add_one(TOKEN_INVALID_TYPE)
+ }
+ }
+ TransitionType::TRANSITION_WILDCARD => {
+ look.add_range(TOKEN_MIN_USER_TOKEN_TYPE, self.atn.max_token_type)
+ }
+ _ if tr.is_epsilon() => self.look_work(
+ target,
+ stop_state,
+ ctx.clone(),
+ look,
+ look_busy,
+ called_rule_stack,
+ see_thru_preds,
+ add_eof,
+ ),
+ _ => {
+ if let Some(mut set) = tr.get_label() {
+ if tr.get_serialization_type() == TRANSITION_NOTSET {
+ let complement =
+ set.complement(TOKEN_MIN_USER_TOKEN_TYPE, self.atn.max_token_type);
+ *set.to_mut() = complement;
+ }
+ look.add_set(set.deref())
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/runtime/Rust/src/parser.rs b/runtime/Rust/src/parser.rs
new file mode 100644
index 0000000000..8f9beea2fb
--- /dev/null
+++ b/runtime/Rust/src/parser.rs
@@ -0,0 +1,717 @@
+//! Base parser implementation
+use std::borrow::Borrow;
+use std::cell::{Cell, RefCell};
+use std::marker::PhantomData;
+use std::ops::{Deref, DerefMut};
+use std::rc::Rc;
+
+use crate::atn::ATN;
+use crate::atn_simulator::IATNSimulator;
+use crate::error_listener::{ConsoleErrorListener, ErrorListener, ProxyErrorListener};
+use crate::error_strategy::ErrorStrategy;
+use crate::errors::ANTLRError;
+use crate::interval_set::IntervalSet;
+use crate::parser_atn_simulator::ParserATNSimulator;
+use crate::parser_rule_context::ParserRuleContext;
+use crate::recognizer::{Actions, Recognizer};
+use crate::rule_context::{states_stack, CustomRuleContext, RuleContext};
+use crate::token::{Token, TOKEN_EOF};
+use crate::token_factory::{TokenAware, TokenFactory};
+use crate::token_stream::TokenStream;
+use crate::tree::{ErrorNode, Listenable, ParseTreeListener, TerminalNode};
+use crate::utils::cell_update;
+use crate::vocabulary::Vocabulary;
+use crate::{CoerceFrom, CoerceTo};
+use better_any::TidAble;
+
+/// parser functionality required for `ParserATNSimulator` to work
+#[allow(missing_docs)] // todo rewrite it so downstream crates actually could meaningfully implement it
+pub trait Parser<'input>: Recognizer<'input> {
+ fn get_interpreter(&self) -> &ParserATNSimulator;
+
+ fn get_token_factory(&self) -> &'input Self::TF;
+ fn get_parser_rule_context(&self) -> &Rc<>::Type>;
+ // fn set_parser_rule_context(&self, v: ParserRuleContext);
+ fn consume(&mut self, err_handler: &mut impl ErrorStrategy<'input, Self>)
+ where
+ Self: Sized;
+ // fn get_parse_listeners(&self) -> Vec;
+ //fn sempred(&mut self, _localctx: Option<&dyn ParserRuleContext>, rule_index: isize, action_index: isize) -> bool { true }
+
+ fn precpred(
+ &self,
+ localctx: Option<&>::Type>,
+ precedence: isize,
+ ) -> bool;
+
+ // fn get_error_handler(&self) -> ErrorStrategy;
+ // fn set_error_handler(&self, e: ErrorStrategy);
+ fn get_input_stream_mut(&mut self) -> &mut dyn TokenStream<'input, TF = Self::TF>;
+ fn get_input_stream(&self) -> &dyn TokenStream<'input, TF = Self::TF>;
+ fn get_current_token(&self) -> &>::Tok;
+ fn get_expected_tokens(&self) -> IntervalSet;
+
+ fn add_error_listener(&mut self, listener: Box>)
+ where
+ Self: Sized;
+ fn remove_error_listeners(&mut self);
+ fn notify_error_listeners(
+ &self,
+ msg: String,
+ offending_token: Option,
+ err: Option<&ANTLRError>,
+ );
+ fn get_error_lister_dispatch<'a>(&'a self) -> Box + 'a>
+ where
+ Self: Sized;
+
+ fn is_expected_token(&self, symbol: isize) -> bool;
+ fn get_precedence(&self) -> isize;
+
+ fn get_state(&self) -> isize;
+ fn set_state(&mut self, v: isize);
+ fn get_rule_invocation_stack(&self) -> Vec;
+}
+
+// trait CsvContext<'input>: for<'x> Listenable<'input, dyn CsvParseTreeListener<'input,CsvTreeNodeType> + 'x> + ParserRuleContext<'input,TF=CommonTokenFactory,Ctx=CsvTreeNodeType>{}
+//
+// struct CsvTreeNodeType;
+// impl<'a> ParserNodeType<'a> for CsvTreeNodeType{
+// type Type = dyn CsvContext<'a>;
+// }
+
+// workaround trait for rustc not being able to handle cycles in trait defenition yet, e.g. `trait A: Super{}`
+// whyyy rustc... whyyy... (╯°□°)╯︵ ┻━┻ It would have been so much cleaner.
+/// Workaround trait for rustc current limitations.
+///
+/// Basically you can consider it as if context trait for generated parser has been implemented as
+/// ```text
+/// trait GenratedParserContext:ParserRuleContext{ ... }
+/// ```
+/// which is not possible, hence this a bit ugly workaround.
+///
+/// Implemented by generated parser for the type that is going to carry information about
+/// parse tree node.
+pub trait ParserNodeType<'input>: TidAble<'input> + Sized {
+ /// Shortcut for `Type::TF`
+ type TF: TokenFactory<'input> + 'input;
+ /// Actual type of the parse tree node
+ type Type: ?Sized + ParserRuleContext<'input, Ctx = Self, TF = Self::TF> + 'input;
+ // type Visitor: ?Sized + ParseTreeVisitor<'input, Self>;
+}
+
+/// ### Main underlying Parser struct
+///
+/// It is a member of generated parser struct, so
+/// almost always you don't need to create it yourself.
+/// Generated parser hides complexity of this struct and expose required flexibility via generic parameters
+#[derive(Debug)]
+pub struct BaseParser<
+ 'input,
+ Ext, //: 'static, //: ParserRecog<'input, Self> + 'static, // user provided behavior, such as semantic predicates
+ I: TokenStream<'input>, // input stream
+ Ctx: ParserNodeType<'input, TF = I::TF>, // Ctx::Type is trait object type for tree node of the parser
+ T: ParseTreeListener<'input, Ctx> + ?Sized = dyn ParseTreeListener<'input, Ctx>,
+> {
+ interp: Rc,
+ /// Rule context parser is currently processing
+ pub ctx: Option>,
+
+ /// Track the {@link ParserRuleContext} objects during the parse and hook
+ /// them up using the {@link ParserRuleContext#children} list so that it
+ /// forms a parse tree. The {@link ParserRuleContext} returned from the start
+ /// rule represents the root of the parse tree.
+ ///
+ ///
Note that if we are not building parse trees, rule contexts only point
+ /// upwards. When a rule exits, it returns the context bute that gets garbage
+ /// collected if nobody holds a reference. It points upwards but nobody
+ /// points at it.
+ ///
+ ///
When we build parse trees, we are adding all of these contexts to
+ /// {@link ParserRuleContext#children} list. Contexts are then not candidates
+ /// for garbage collection.
+/// The basic complexity of the adaptive strategy makes it harder to understand.
+/// We begin with ATN simulation to build paths in a DFA. Subsequent prediction
+/// requests go through the DFA first. If they reach a state without an edge for
+/// the current symbol, the algorithm fails over to the ATN simulation to
+/// complete the DFA path for the current input (until it finds a conflict state
+/// or uniquely predicting state).
+///
+///
+/// All of that is done without using the outer context because we want to create
+/// a DFA that is not dependent upon the rule invocation stack when we do a
+/// prediction. One DFA works in all contexts. We avoid using context not
+/// necessarily because it's slower, although it can be, but because of the DFA
+/// caching problem. The closure routine only considers the rule invocation stack
+/// created during prediction beginning in the decision rule. For example, if
+/// prediction occurs without invoking another rule's ATN, there are no context
+/// stacks in the configurations. When lack of context leads to a conflict, we
+/// don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
+/// strategy (versus full LL(*)).
+///
+///
+/// When SLL yields a configuration set with conflict, we rewind the input and
+/// retry the ATN simulation, this time using full outer context without adding
+/// to the DFA. Configuration context stacks will be the full invocation stacks
+/// from the start rule. If we get a conflict using full context, then we can
+/// definitively say we have a true ambiguity for that input sequence. If we
+/// don't get a conflict, it implies that the decision is sensitive to the outer
+/// context. (It is not context-sensitive in the sense of context-sensitive
+/// grammars.)
+///
+///
+/// The next time we reach this DFA state with an SLL conflict, through DFA
+/// simulation, we will again retry the ATN simulation using full context mode.
+/// This is slow because we can't save the results and have to "interpret" the
+/// ATN each time we get that input.
+///
+/// **For more info see Java version**
+#[derive(Debug)]
+pub struct ParserATNSimulator {
+ base: BaseATNSimulator,
+ prediction_mode: Cell,
+ start_index: Cell,
+ // pd:PhantomData