Skip to content

Commit

Permalink
Merge pull request #361 from mindsdb/staging
Browse files Browse the repository at this point in the history
Release 0.10.6
  • Loading branch information
ea-rus authored Mar 14, 2024
2 parents 0b1c3a1 + 727cd00 commit c766c87
Show file tree
Hide file tree
Showing 22 changed files with 459 additions and 124 deletions.
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,32 @@ SLY does not support inheritance, therefore every dialect is described completel
- get_string - to return object as sql expression (or sub-expression)
- copy - to copy AST-tree to new object

### Error handling

For better user experience parsing error contains useful information about problem location and possible solution to solve it.
1. it shows location of error if
- character isn't parsed (by lexer)
- token is unexpected (by parser)
2. it tries to propose correct token instead (or before) error location. Possible options
- Keyword will be showed as is.
- '[number]' - if float and integer is expected
- '[string]' - if string is expected
- '[identifier]' - if name of the objects is expected. For example, they are bold words here:
- "select **x** as **name** from **tbl1** where **col**=1"

How suggestion works:
It uses next possible tokens defined by syntax rules.
If this is the end of the query: just shows these tokens.
Else:
- it tries to replace bad token with other token from list of possible tokens
- tries to parse query once again, if there is no error:
- add this token to suggestion list
- second iteration: put possible token before bad token (instead of replacement) and repeat the same operation.

Example:
![image](https://github.com/mindsdb/mindsdb_sql/assets/8502631/c4707087-ca6e-47f6-aaba-db3a641947a6)


# Planner


Expand Down
2 changes: 1 addition & 1 deletion mindsdb_sql/__about__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__title__ = 'mindsdb_sql'
__package_name__ = 'mindsdb_sql'
__version__ = '0.10.5'
__version__ = '0.10.6'
__description__ = "Pure python SQL parser"
__email__ = "[email protected]"
__author__ = 'MindsDB Inc'
Expand Down
165 changes: 165 additions & 0 deletions mindsdb_sql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,166 @@
import re
from collections import defaultdict

from sly.lex import Token

from mindsdb_sql.exceptions import ParsingException
from mindsdb_sql.parser.ast import *


class ErrorHandling:

def __init__(self, lexer, parser):
self.parser = parser
self.lexer = lexer

def process(self, error_info):
self.tokens = [t for t in error_info['tokens'] if t is not None]
self.bad_token = error_info['bad_token']
self.expected_tokens = error_info['expected_tokens']

if len(self.tokens) == 0:
return 'Empty input'

# show error location
msgs = self.error_location()

# suggestion
suggestions = self.make_suggestion()

if suggestions:
prefix = 'Possible inputs: ' if len(suggestions) > 1 else 'Expected symbol: '
msgs.append(prefix + ', '.join([f'"{item}"' for item in suggestions]))
return '\n'.join(msgs)

def error_location(self):

# restore query text
lines_idx = defaultdict(str)

# used + unused tokens
for token in self.tokens:
if token is None:
continue
line = lines_idx[token.lineno]

if len(line) > token.index:
line = line[: token.index]
else:
line = line.ljust(token.index)

line += token.value
lines_idx[token.lineno] = line

msgs = []

# error message and location
if self.bad_token is None:
msgs.append('Syntax error, unexpected end of query:')
error_len = 1
# last line
error_line_num = list(lines_idx.keys())[-1]
error_index = len(lines_idx[error_line_num])
else:
msgs.append('Syntax error, unknown input:')
error_len = len(self.bad_token.value)
error_line_num = self.bad_token.lineno
error_index = self.bad_token.index

# shift lines indexes (it removes spaces from beginnings of the lines)
lines = []
shift = 0
error_line = 0
for i, line_num in enumerate(lines_idx.keys()):
if line_num == error_line_num:
error_index -= shift
error_line = i

line = lines_idx[line_num]
lines.append(line[shift:])
shift = len(line)

# add source code
first_line = error_line - 2 if error_line > 1 else 0
for line in lines[first_line: error_line + 1]:
msgs.append('>' + line)

# error position
msgs.append('-' * (error_index + 1) + '^' * error_len)
return msgs

def make_suggestion(self):
if len(self.expected_tokens) == 0:
return []

# find error index
error_index = None
for i, token in enumerate(self.tokens):
if token is self.bad_token :
error_index = i

expected = {} # value: token

for token_name in self.expected_tokens:
value = getattr(self.lexer, token_name, None)
if token_name == 'ID':
# a lot of other tokens could be ID
expected = {'[identifier]': token_name}
break
elif token_name in ('FLOAT', 'INTEGER'):
expected['[number]'] = token_name

elif token_name in ('DQUOTE_STRING', 'QUOTE_STRING'):
expected['[string]'] = token_name

elif isinstance(value, str):
value = value.replace('\\b', '').replace('\\', '')

# doesn't content regexp
if '\\s' not in value and '|' not in value:
expected[value] = token_name

suggestions = []
if len(expected) == 1:
# use only it
first_value = list(expected.keys())[0]
suggestions.append(first_value)

elif 1 < len(expected) < 20:
if self.bad_token is None:
# if this is the end of query, just show next expected keywords
return list(expected.keys())

# not every suggestion satisfy the end of the query. we have to check if it works
for value, token_name in expected.items():
# make up a token
token = Token()
token.type = token_name
token.value = value
token.end = 0
token.index = 0
token.lineno = 0

# try to add token
tokens2 = self.tokens[:error_index] + [token] + self.tokens[error_index:]
if self.query_is_valid(tokens2):
suggestions.append(value)
continue

# try to replace token
tokens2 = self.tokens[:error_index - 1] + [token] + self.tokens[error_index:]
if self.query_is_valid(tokens2):
suggestions.append(value)
continue

return suggestions

def query_is_valid(self, tokens):
# try to parse list of tokens

ast = self.parser.parse(iter(tokens))
return ast is not None


def get_lexer_parser(dialect):
if dialect == 'sqlite':
from mindsdb_sql.parser.lexer import SQLLexer
Expand All @@ -29,4 +186,12 @@ def parse_sql(sql, dialect='mindsdb'):
lexer, parser = get_lexer_parser(dialect)
tokens = lexer.tokenize(sql)
ast = parser.parse(tokens)

if ast is None:

eh = ErrorHandling(lexer, parser)
message = eh.process(parser.error_info)

raise ParsingException(message)

return ast
11 changes: 8 additions & 3 deletions mindsdb_sql/parser/ast/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@


class TableColumn():
def __init__(self, name, type='integer'):
def __init__(self, name, type='integer', length=None):
self.name = name
self.type = type
self.is_primary_key = False
self.default = None
self.length = length


class CreateTable(ASTNode):
Expand Down Expand Up @@ -72,14 +73,18 @@ def get_string(self, *args, **kwargs):
if self.columns is not None:
columns = []
for col in self.columns:
type = str(col.type)
if sa_types is not None:

if not isinstance(col.type, str) and sa_types is not None:
if issubclass(col.type, sa_types.Integer):
type = 'int'
elif issubclass(col.type, sa_types.Float):
type = 'float'
elif issubclass(col.type, sa_types.Text):
type = 'text'
else:
type = str(col.type)
if col.length is not None:
type = f'{type}({col.length})'
columns.append( f'{col.name} {type}')

columns_str = '({})'.format(', '.join(columns))
Expand Down
3 changes: 2 additions & 1 deletion mindsdb_sql/parser/ast/select/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from .join import Join
from .type_cast import TypeCast
from .tuple import Tuple
from .operation import Operation, BinaryOperation, UnaryOperation, BetweenOperation, Function, WindowFunction, Object
from .operation import (Operation, BinaryOperation, UnaryOperation, BetweenOperation,
Function, WindowFunction, Object, Interval)
from .order_by import OrderBy
from .parameter import Parameter
from .case import Case
Expand Down
16 changes: 16 additions & 0 deletions mindsdb_sql/parser/ast/select/operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,19 @@ def to_string(self, *args, **kwargs):

def __repr__(self):
return self.to_tree()


class Interval(Operation):

def __init__(self, info):
super().__init__(op='interval', args=[info, ])

def get_string(self, *args, **kwargs):
return f'INTERVAL {repr(self.args[0])}'

def to_tree(self, *args, level=0, **kwargs):
return self.get_string( *args, **kwargs)

def assert_arguments(self):
if len(self.args) != 1:
raise ParsingException(f'Expected one argument for operation "{self.op}"')
1 change: 0 additions & 1 deletion mindsdb_sql/parser/dialects/mindsdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from .drop_dataset import DropDataset
from .evaluate import Evaluate
from .latest import Latest
from .create_file import CreateFile
from .create_ml_engine import CreateMLEngine
from .drop_ml_engine import DropMLEngine
from .create_job import CreateJob
Expand Down
29 changes: 0 additions & 29 deletions mindsdb_sql/parser/dialects/mindsdb/create_file.py

This file was deleted.

32 changes: 28 additions & 4 deletions mindsdb_sql/parser/dialects/mindsdb/lexer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
from sly import Lexer
from sly.lex import LexError

"""
Unfortunately we can't inherit from base SQLLexer, because the order of rules is important.
Expand All @@ -14,7 +15,7 @@ class MindsDBLexer(Lexer):
ignore_line_comment = r'--[^\n]*'

tokens = {
USE, DROP, CREATE, DESCRIBE, RETRAIN,REPLACE,
USE, DROP, CREATE, DESCRIBE, RETRAIN, REPLACE,

# Misc
SET, START, TRANSACTION, COMMIT, ROLLBACK, ALTER, EXPLAIN,
Expand Down Expand Up @@ -72,7 +73,7 @@ class MindsDBLexer(Lexer):
EQUALS, NEQUALS, GREATER, GEQ, LESS, LEQ,
AND, OR, NOT, IS, IS_NOT,
IN, LIKE, NOT_LIKE, CONCAT, BETWEEN, WINDOW, OVER, PARTITION_BY,
JSON_GET, JSON_GET_STR,
JSON_GET, JSON_GET_STR, INTERVAL,

# Data types
CAST, ID, INTEGER, FLOAT, QUOTE_STRING, DQUOTE_STRING, NULL, TRUE, FALSE,
Expand Down Expand Up @@ -287,6 +288,7 @@ class MindsDBLexer(Lexer):
CAST = r'\bCAST\b'
CONCAT = r'\|\|'
BETWEEN = r'\bBETWEEN\b'
INTERVAL = r'\bINTERVAL\b'
WINDOW = r'\bWINDOW\b'
OVER = r'\bOVER\b'
PARTITION_BY = r'\bPARTITION BY\b'
Expand All @@ -308,12 +310,12 @@ def FLOAT(self, t):
def INTEGER(self, t):
return t

@_(r"'(?:[^\'\\]|\\.)*'")
@_(r"'(?:\\.|[^'])*'")
def QUOTE_STRING(self, t):
t.value = t.value.replace('\\"', '"').replace("\\'", "'")
return t

@_(r'"(?:[^\"\\]|\\.)*"')
@_(r'"(?:\\.|[^"])*"')
def DQUOTE_STRING(self, t):
t.value = t.value.replace('\\"', '"').replace("\\'", "'")
return t
Expand Down Expand Up @@ -354,3 +356,25 @@ def SYSTEM_VARIABLE(self, t):
t.value = t.value.strip('`')
return t

def error(self, t):

# convert to lines
lines = []
shift = 0
error_line = 0
error_index = 0
for i, line in enumerate(self.text.split('\n')):
if 0 <= t.index - shift < len(line):
error_line = i
error_index = t.index - shift
lines.append(line)
shift += len(line) + 1

msgs = [f'Illegal character {t.value[0]!r}:']
# show error code
for line in lines[error_line - 1: error_line + 1]:
msgs.append('>' + line)

msgs.append('-' * (error_index + 1) + '^')

raise LexError('\n'.join(msgs), t.value, self.index)
Loading

0 comments on commit c766c87

Please sign in to comment.