Skip to content

Commit 16c7380

Browse files
authored
feat: optimize expression parse (#18871)
* perf: optimize expression parse * chore: update ast bench comment * chore: fix 03_0016_insert_into_values.test * chore: codefmt
1 parent ba3f3e8 commit 16c7380

File tree

8 files changed

+191
-92
lines changed

8 files changed

+191
-92
lines changed

src/query/ast/benches/bench.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ fn main() {
1818

1919
// bench fastest │ slowest │ median │ mean │ samples │ iters
2020
// ╰─ dummy │ │ │ │ │
21-
// ├─ deep_function_call 1.238 ms │ 1.781 ms │ 1.355 ms │ 1.353 ms │ 100 │ 100
22-
// ├─ deep_query 285.6 µs │ 434.6 µs │ 306.8 µs │ 307 µs │ 100 │ 100
23-
// ├─ large_query 1.739 ms │ 1.893 ms │ 1.795 ms │ 1.801 ms │ 100 │ 100
24-
// ├─ large_statement 1.745 ms │ 1.885 ms │ 1.807 ms │ 1.806 ms │ 100 │ 100
25-
// ╰─ wide_expr 562 µs │ 651.9 µs │ 588.2 µs │ 590.5 µs │ 100 │ 100
21+
// ├─ deep_function_call 802.2 µs │ 1.207 ms │ 842 µs │ 850.6 µs │ 100 │ 100
22+
// ├─ deep_query 242.3 µs │ 426.3 µs │ 254.2 µs │ 257.3 µs │ 100 │ 100
23+
// ├─ large_query 1.104 ms │ 1.264 ms │ 1.14 ms │ 1.142 ms │ 100 │ 100
24+
// ├─ large_statement 1.097 ms │ 1.2 ms │ 1.15 ms │ 1.148 ms │ 100 │ 100
25+
// ╰─ wide_expr 282.4 µs │ 368.6 µs │ 298 µs │ 298.7 µs │ 100 │ 100
2626

2727
#[divan::bench_group(max_time = 0.5)]
2828
mod dummy {

src/query/ast/src/parser/common.rs

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::cell::RefCell;
16+
use std::rc::Rc;
17+
1518
use nom::branch::alt;
1619
use nom::combinator::consumed;
1720
use nom::combinator::map;
@@ -580,20 +583,61 @@ pub fn transform_span(tokens: &[Token]) -> Span {
580583
})
581584
}
582585

583-
pub fn run_pratt_parser<'a, I, P, E>(
586+
pub(crate) trait IterProvider<'a> {
587+
type Item;
588+
type Iter: Iterator<Item = Self::Item> + ExactSizeIterator;
589+
590+
fn create_iter(self, span: Rc<RefCell<Option<Input<'a>>>>) -> Self::Iter;
591+
}
592+
593+
impl<'a, T> IterProvider<'a> for Vec<WithSpan<'a, T>>
594+
where T: Clone
595+
{
596+
type Item = WithSpan<'a, T>;
597+
type Iter = ErrorSpan<'a, T, std::vec::IntoIter<WithSpan<'a, T>>>;
598+
599+
fn create_iter(self, span: Rc<RefCell<Option<Input<'a>>>>) -> Self::Iter {
600+
ErrorSpan::new(self.into_iter(), span)
601+
}
602+
}
603+
604+
pub(crate) struct ErrorSpan<'a, T, I: Iterator<Item = WithSpan<'a, T>>> {
605+
iter: I,
606+
span: Rc<RefCell<Option<Input<'a>>>>,
607+
}
608+
609+
impl<'a, T, I: Iterator<Item = WithSpan<'a, T>>> ErrorSpan<'a, T, I> {
610+
fn new(iter: I, span: Rc<RefCell<Option<Input<'a>>>>) -> Self {
611+
Self { iter, span }
612+
}
613+
}
614+
615+
impl<'a, T, I: Iterator<Item = WithSpan<'a, T>>> Iterator for ErrorSpan<'a, T, I> {
616+
type Item = WithSpan<'a, T>;
617+
618+
fn next(&mut self) -> Option<Self::Item> {
619+
self.iter
620+
.next()
621+
.inspect(|item| *self.span.borrow_mut() = Some(item.span))
622+
}
623+
}
624+
625+
impl<'a, T, I: Iterator<Item = WithSpan<'a, T>>> ExactSizeIterator for ErrorSpan<'a, T, I> {}
626+
627+
pub fn run_pratt_parser<'a, I, P, E, T>(
584628
mut parser: P,
585-
iter: &I,
629+
parsers: T,
586630
rest: Input<'a>,
587631
input: Input<'a>,
588632
) -> IResult<'a, P::Output>
589633
where
590634
E: std::fmt::Debug,
591635
P: PrattParser<I, Input = WithSpan<'a, E>, Error = &'static str>,
592-
I: Iterator<Item = P::Input> + ExactSizeIterator + Clone,
636+
I: Iterator<Item = P::Input> + ExactSizeIterator,
637+
T: IterProvider<'a, Item = P::Input, Iter = I>,
593638
{
594-
let mut iter_cloned = iter.clone();
595-
let mut iter = iter.clone().peekable();
596-
let len = iter.len();
639+
let span = Rc::new(RefCell::new(None));
640+
let mut iter = parsers.create_iter(span.clone()).peekable();
597641
let expr = parser
598642
.parse_input(&mut iter, Precedence(0))
599643
.map_err(|err| {
@@ -602,22 +646,27 @@ where
602646

603647
let err_kind = match err {
604648
PrattError::EmptyInput => ErrorKind::Other("expecting an operand"),
605-
PrattError::UnexpectedNilfix(_) => ErrorKind::Other("unable to parse the element"),
606-
PrattError::UnexpectedPrefix(_) => {
649+
PrattError::UnexpectedNilfix(i) => {
650+
*span.borrow_mut() = Some(i.span);
651+
ErrorKind::Other("unable to parse the element")
652+
}
653+
PrattError::UnexpectedPrefix(i) => {
654+
*span.borrow_mut() = Some(i.span);
607655
ErrorKind::Other("unable to parse the prefix operator")
608656
}
609-
PrattError::UnexpectedInfix(_) => {
657+
PrattError::UnexpectedInfix(i) => {
658+
*span.borrow_mut() = Some(i.span);
610659
ErrorKind::Other("missing lhs or rhs for the binary operator")
611660
}
612-
PrattError::UnexpectedPostfix(_) => {
661+
PrattError::UnexpectedPostfix(i) => {
662+
*span.borrow_mut() = Some(i.span);
613663
ErrorKind::Other("unable to parse the postfix operator")
614664
}
615665
PrattError::UserError(err) => ErrorKind::Other(err),
616666
};
617667

618-
let span = iter_cloned
619-
.nth(len - iter.len() - 1)
620-
.map(|elem| elem.span)
668+
let span = span
669+
.take()
621670
// It's safe to slice one more token because input must contain EOI.
622671
.unwrap_or_else(|| rest.slice(..1));
623672

src/query/ast/src/parser/expr.rs

Lines changed: 116 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use nom::combinator::consumed;
1919
use nom::combinator::map;
2020
use nom::combinator::value;
2121
use nom::error::context;
22+
use nom::Slice;
2223
use nom_rule::rule;
2324
use pratt::Affix;
2425
use pratt::Associativity;
@@ -126,7 +127,7 @@ pub fn subexpr(min_precedence: u32) -> impl FnMut(Input) -> IResult<Expr> {
126127
}
127128
}
128129

129-
run_pratt_parser(ExprParser, &expr_elements.into_iter(), rest, i)
130+
run_pratt_parser(ExprParser, expr_elements, rest, i)
130131
}
131132
}
132133

@@ -1567,76 +1568,127 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
15671568
)(i)
15681569
}
15691570

1571+
#[inline]
1572+
fn return_op<T>(i: Input, start: usize, op: T) -> IResult<T> {
1573+
Ok((i.slice(start..), op))
1574+
}
1575+
1576+
macro_rules! op_branch {
1577+
($i:ident, $token_0:ident, $($kind:ident => $op:expr),+ $(,)?) => {
1578+
match $token_0.kind {
1579+
$(
1580+
TokenKind::$kind => return return_op($i, 1, $op),
1581+
)+
1582+
_ => (),
1583+
}
1584+
};
1585+
}
1586+
15701587
pub fn unary_op(i: Input) -> IResult<UnaryOperator> {
15711588
// Plus and Minus are parsed as binary op at first.
1572-
alt((
1573-
value(UnaryOperator::Not, rule! { NOT }),
1574-
value(UnaryOperator::Factorial, rule! { Factorial }),
1575-
value(UnaryOperator::SquareRoot, rule! { SquareRoot }),
1576-
value(UnaryOperator::BitwiseNot, rule! { BitWiseNot }),
1577-
value(UnaryOperator::CubeRoot, rule! { CubeRoot }),
1578-
value(UnaryOperator::Abs, rule! { Abs }),
1579-
))(i)
1589+
if let Some(token_0) = i.tokens.first() {
1590+
op_branch!(
1591+
i, token_0,
1592+
NOT => UnaryOperator::Not,
1593+
Factorial => UnaryOperator::Factorial,
1594+
SquareRoot => UnaryOperator::SquareRoot,
1595+
BitWiseNot => UnaryOperator::BitwiseNot,
1596+
CubeRoot => UnaryOperator::CubeRoot,
1597+
Abs => UnaryOperator::Abs,
1598+
);
1599+
}
1600+
Err(nom::Err::Error(Error::from_error_kind(
1601+
i,
1602+
ErrorKind::Other("expecting `NOT`, '!', '|/', '~', '||/', '@', or more ..."),
1603+
)))
15801604
}
15811605

15821606
pub fn binary_op(i: Input) -> IResult<BinaryOperator> {
1583-
alt((
1584-
alt((
1585-
value(BinaryOperator::Plus, rule! { "+" }),
1586-
value(BinaryOperator::Minus, rule! { "-" }),
1587-
value(BinaryOperator::Multiply, rule! { "*" }),
1588-
value(BinaryOperator::Divide, rule! { "/" }),
1589-
value(BinaryOperator::IntDiv, rule! { "//" }),
1590-
value(BinaryOperator::Div, rule! { DIV }),
1591-
value(BinaryOperator::Modulo, rule! { "%" }),
1592-
value(BinaryOperator::StringConcat, rule! { "||" }),
1593-
value(BinaryOperator::CosineDistance, rule! { "<=>" }),
1594-
value(BinaryOperator::L1Distance, rule! { "<+>" }),
1595-
value(BinaryOperator::L2Distance, rule! { "<->" }),
1596-
value(BinaryOperator::Gt, rule! { ">" }),
1597-
value(BinaryOperator::Lt, rule! { "<" }),
1598-
value(BinaryOperator::Gte, rule! { ">=" }),
1599-
value(BinaryOperator::Lte, rule! { "<=" }),
1600-
value(BinaryOperator::Eq, rule! { "=" }),
1601-
value(BinaryOperator::NotEq, rule! { "<>" | "!=" }),
1602-
value(BinaryOperator::Caret, rule! { "^" }),
1603-
)),
1604-
alt((
1605-
value(BinaryOperator::And, rule! { AND }),
1606-
value(BinaryOperator::Or, rule! { OR }),
1607-
value(BinaryOperator::Xor, rule! { XOR }),
1608-
value(BinaryOperator::LikeAny(None), rule! { LIKE ~ ANY }),
1609-
value(BinaryOperator::Like(None), rule! { LIKE }),
1610-
value(BinaryOperator::NotLike(None), rule! { NOT ~ LIKE }),
1611-
value(BinaryOperator::Regexp, rule! { REGEXP }),
1612-
value(BinaryOperator::NotRegexp, rule! { NOT ~ REGEXP }),
1613-
value(BinaryOperator::RLike, rule! { RLIKE }),
1614-
value(BinaryOperator::NotRLike, rule! { NOT ~ RLIKE }),
1615-
value(BinaryOperator::SoundsLike, rule! { SOUNDS ~ LIKE }),
1616-
value(BinaryOperator::BitwiseOr, rule! { BitWiseOr }),
1617-
value(BinaryOperator::BitwiseAnd, rule! { BitWiseAnd }),
1618-
value(BinaryOperator::BitwiseXor, rule! { BitWiseXor }),
1619-
value(BinaryOperator::BitwiseShiftLeft, rule! { ShiftLeft }),
1620-
value(BinaryOperator::BitwiseShiftRight, rule! { ShiftRight }),
1621-
)),
1622-
))(i)
1607+
if let Some(token_0) = i.tokens.first() {
1608+
op_branch!(
1609+
i, token_0,
1610+
Plus => BinaryOperator::Plus,
1611+
Minus => BinaryOperator::Minus,
1612+
Multiply => BinaryOperator::Multiply,
1613+
Divide => BinaryOperator::Divide,
1614+
IntDiv => BinaryOperator::IntDiv,
1615+
DIV => BinaryOperator::Div,
1616+
Modulo => BinaryOperator::Modulo,
1617+
StringConcat => BinaryOperator::StringConcat,
1618+
Spaceship => BinaryOperator::CosineDistance,
1619+
L1DISTANCE => BinaryOperator::L1Distance,
1620+
L2DISTANCE => BinaryOperator::L2Distance,
1621+
Gt => BinaryOperator::Gt,
1622+
Lt => BinaryOperator::Lt,
1623+
Gte => BinaryOperator::Gte,
1624+
Lte => BinaryOperator::Lte,
1625+
Eq => BinaryOperator::Eq,
1626+
NotEq => BinaryOperator::NotEq,
1627+
Caret => BinaryOperator::Caret,
1628+
AND => BinaryOperator::And,
1629+
OR => BinaryOperator::Or,
1630+
XOR => BinaryOperator::Xor,
1631+
REGEXP => BinaryOperator::Regexp,
1632+
RLIKE => BinaryOperator::RLike,
1633+
BitWiseOr => BinaryOperator::BitwiseOr,
1634+
BitWiseAnd => BinaryOperator::BitwiseAnd,
1635+
BitWiseXor => BinaryOperator::BitwiseXor,
1636+
ShiftLeft => BinaryOperator::BitwiseShiftLeft,
1637+
ShiftRight => BinaryOperator::BitwiseShiftRight,
1638+
);
1639+
match token_0.kind {
1640+
TokenKind::LIKE => {
1641+
return if matches!(
1642+
i.tokens.get(1).map(|first| first.kind == TokenKind::ANY),
1643+
Some(true)
1644+
) {
1645+
return_op(i, 2, BinaryOperator::LikeAny(None))
1646+
} else {
1647+
return_op(i, 1, BinaryOperator::Like(None))
1648+
}
1649+
}
1650+
TokenKind::NOT => match i.tokens.get(1).map(|first| first.kind) {
1651+
Some(TokenKind::LIKE) => {
1652+
return return_op(i, 2, BinaryOperator::NotLike(None));
1653+
}
1654+
Some(TokenKind::REGEXP) => {
1655+
return return_op(i, 2, BinaryOperator::NotRegexp);
1656+
}
1657+
Some(TokenKind::RLIKE) => {
1658+
return return_op(i, 2, BinaryOperator::NotRLike);
1659+
}
1660+
_ => (),
1661+
},
1662+
TokenKind::SOUNDS => {
1663+
if let Some(TokenKind::LIKE) = i.tokens.get(1).map(|first| first.kind) {
1664+
return return_op(i, 2, BinaryOperator::SoundsLike);
1665+
}
1666+
}
1667+
_ => (),
1668+
}
1669+
}
1670+
Err(nom::Err::Error(Error::from_error_kind(i, ErrorKind::Other("expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ..."))))
16231671
}
16241672

1625-
pub fn json_op(i: Input) -> IResult<JsonOperator> {
1626-
alt((
1627-
value(JsonOperator::Arrow, rule! { "->" }),
1628-
value(JsonOperator::LongArrow, rule! { "->>" }),
1629-
value(JsonOperator::HashArrow, rule! { "#>" }),
1630-
value(JsonOperator::HashLongArrow, rule! { "#>>" }),
1631-
value(JsonOperator::Question, rule! { "?" }),
1632-
value(JsonOperator::QuestionOr, rule! { "?|" }),
1633-
value(JsonOperator::QuestionAnd, rule! { "?&" }),
1634-
value(JsonOperator::AtArrow, rule! { "@>" }),
1635-
value(JsonOperator::ArrowAt, rule! { "<@" }),
1636-
value(JsonOperator::AtQuestion, rule! { "@?" }),
1637-
value(JsonOperator::AtAt, rule! { "@@" }),
1638-
value(JsonOperator::HashMinus, rule! { "#-" }),
1639-
))(i)
1673+
pub(crate) fn json_op(i: Input) -> IResult<JsonOperator> {
1674+
if let Some(token_0) = i.tokens.first() {
1675+
op_branch!(
1676+
i, token_0,
1677+
RArrow => JsonOperator::Arrow,
1678+
LongRArrow => JsonOperator::LongArrow,
1679+
HashRArrow => JsonOperator::HashArrow,
1680+
HashLongRArrow => JsonOperator::HashLongArrow,
1681+
Placeholder => JsonOperator::Question,
1682+
QuestionOr => JsonOperator::QuestionOr,
1683+
QuestionAnd => JsonOperator::QuestionAnd,
1684+
AtArrow => JsonOperator::AtArrow,
1685+
ArrowAt => JsonOperator::ArrowAt,
1686+
AtQuestion => JsonOperator::AtQuestion,
1687+
AtAt => JsonOperator::AtAt,
1688+
HashMinus => JsonOperator::HashMinus,
1689+
);
1690+
}
1691+
Err(nom::Err::Error(Error::from_error_kind(i, ErrorKind::Other("expecting `->`, '->>', '#>', '#>>', '?', '?|', '?&', '@>', '<@', '@?', '@@', '#-', or more ..."))))
16401692
}
16411693

16421694
pub fn literal(i: Input) -> IResult<Literal> {

src/query/ast/src/parser/query.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ pub fn query(i: Input) -> IResult<Query> {
4848

4949
pub fn set_operation(i: Input) -> IResult<SetExpr> {
5050
let (rest, set_operation_elements) = rule! { #set_operation_element+ }(i)?;
51-
let iter = &mut set_operation_elements.into_iter();
52-
run_pratt_parser(SetOperationParser, iter, rest, i)
51+
run_pratt_parser(SetOperationParser, set_operation_elements, rest, i)
5352
}
5453

5554
#[derive(Debug, Clone, PartialEq)]
@@ -711,8 +710,7 @@ pub fn order_by_expr(i: Input) -> IResult<OrderByExpr> {
711710

712711
pub fn table_reference(i: Input) -> IResult<TableReference> {
713712
let (rest, table_reference_elements) = rule! { #table_reference_element+ }(i)?;
714-
let iter = &mut table_reference_elements.into_iter();
715-
run_pratt_parser(TableReferenceParser, iter, rest, i)
713+
run_pratt_parser(TableReferenceParser, table_reference_elements, rest, i)
716714
}
717715

718716
#[derive(Debug, Clone, PartialEq)]

src/query/ast/tests/it/testdata/dialect.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ error:
237237
1 | "a\"b"
238238
| ^^^^^^
239239
| |
240-
| invalid identifier
240+
| expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ...
241241
| while parsing expression
242242

243243

src/query/ast/tests/it/testdata/expr-error.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ error:
5252
--> SQL:1:10
5353
|
5454
1 | CAST(col1)
55-
| ---- ^ unexpected `)`, expecting `AS`, `,`, `(`, `IS`, `NOT`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `::`, `POSITION`, or 49 more ...
55+
| ---- ^ expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ...
5656
| |
5757
| while parsing `CAST(... AS ...)`
5858
| while parsing expression
@@ -81,7 +81,7 @@ error:
8181
1 | $ abc + 3
8282
| ^
8383
| |
84-
| unexpected `$`, expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `::`, `POSITION`, `IdentVariable`, `DATEADD`, `DATE_ADD`, or 47 more ...
84+
| expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ...
8585
| while parsing expression
8686

8787

0 commit comments

Comments
 (0)