Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for RAG queries to sql parser #324

Draft
wants to merge 10 commits into
base: staging
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mindsdb_sql/__about__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__title__ = 'mindsdb_sql'
__package_name__ = 'mindsdb_sql'
__version__ = '0.7.4'
__version__ = '0.7.5'
__description__ = "Pure python SQL parser"
__email__ = "[email protected]"
__author__ = 'MindsDB Inc'
Expand Down
1 change: 1 addition & 0 deletions mindsdb_sql/parser/dialects/mindsdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .chatbot import CreateChatBot, UpdateChatBot, DropChatBot
from .trigger import CreateTrigger, DropTrigger
from .knowledge_base import CreateKnowledgeBase, DropKnowledgeBase
from .rag import CreateRAG, DropRAG, UpdateRAG
from .skills import CreateSkill, DropSkill, UpdateSkill

# remove it in next release
Expand Down
10 changes: 5 additions & 5 deletions mindsdb_sql/parser/dialects/mindsdb/knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class CreateKnowledgeBase(ASTNode):
def __init__(
self,
name,
model,
model=None,
storage=None,
from_select=None,
params=None,
Expand Down Expand Up @@ -37,13 +37,13 @@ def __init__(
def to_tree(self, *args, level=0, **kwargs):
ind = indent(level)
storage_str = f"{ind} storage={self.storage.to_string()},\n" if self.storage else ""
model_str = f"{ind} model={self.model.to_string()},\n" if self.model else ""
out_str = f"""
{ind}CreateKnowledgeBase(
{ind} if_not_exists={self.if_not_exists},
{ind} name={self.name.to_string()},
{ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None},
{ind} model={self.model.to_string()},
{storage_str}{ind} params={self.params}
{model_str}{storage_str}{ind} params={self.params}
{ind})
"""
return out_str
Expand All @@ -56,13 +56,13 @@ def get_string(self, *args, **kwargs):
f"FROM ({self.from_query.get_string()})" if self.from_query else ""
)
storage_str = f" STORAGE = {self.storage.to_string()}" if self.storage else ""
model_str = f" MODEL = {self.model.to_string()},\n" if self.model else ""

out_str = (
f"CREATE KNOWLEDGE_BASE {'IF NOT EXISTS' if self.if_not_exists else ''}{self.name.to_string()} "
f"{from_query_str} "
f"USING {using_str},"
f" MODEL = {self.model.to_string()}, "
f"{storage_str}"
f"{model_str}{storage_str}"
)

return out_str
Expand Down
3 changes: 3 additions & 0 deletions mindsdb_sql/parser/dialects/mindsdb/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class MindsDBLexer(Lexer):
ENGINE, TRAIN, PREDICT, PARAMETERS, JOB, CHATBOT, EVERY,PROJECT,
ANOMALY, DETECTION,
KNOWLEDGE_BASE, KNOWLEDGE_BASES,
RAG, RAGS,
SKILL,
AGENT,

Expand Down Expand Up @@ -121,6 +122,8 @@ class MindsDBLexer(Lexer):

KNOWLEDGE_BASE = r'\bKNOWLEDGE[_|\s]BASE\b'
KNOWLEDGE_BASES = r'\bKNOWLEDGE[_|\s]BASES\b'
RAG = r'\bRAG\b'
RAGS = r'\bRAGS\b'
SKILL = r'\bSKILL\b'
AGENT = r'\bAGENT\b'

Expand Down
39 changes: 39 additions & 0 deletions mindsdb_sql/parser/dialects/mindsdb/parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from mindsdb_sql.parser.dialects.mindsdb.rag import CreateRAG, DropRAG, UpdateRAG
from sly import Parser
from mindsdb_sql.parser.ast import *
from mindsdb_sql.parser.ast.drop import DropDatabase, DropView
Expand Down Expand Up @@ -87,6 +88,9 @@ class MindsDBParser(Parser):
'drop_trigger',
'create_kb',
'drop_kb',
'create_rag',
'drop_rag',
'update_rag',
'create_skill',
'drop_skill',
'update_skill',
Expand Down Expand Up @@ -138,6 +142,41 @@ def create_kb(self, p):
def drop_kb(self, p):
return DropKnowledgeBase(name=p.identifier, if_exists=p.if_exists_or_empty)

# -- RAG --
@_('CREATE RAG if_not_exists_or_empty identifier USING kw_parameter_list')
def create_rag(self, p):
params = p.kw_parameter_list

llm = params.pop('llm', None)
knowledge_base_store = params.pop('knowledge_base_store', None)

if not llm:
raise ParsingException('Missing llm parameter')

if isinstance(llm, str):
# convert to identifier
llm = Identifier(llm)

if isinstance(knowledge_base_store, str):
# convert to identifier
knowledge_base_store = Identifier(knowledge_base_store)

return CreateRAG(
name=p.identifier,
llm=llm,
knowledge_base_store=knowledge_base_store,
params=params,
if_not_exists=p.if_not_exists_or_empty
)

@_('DROP RAG if_exists_or_empty identifier')
def drop_rag(self, p):
return DropRAG(name=p.identifier, if_exists=p.if_exists_or_empty)

@_('UPDATE RAG identifier SET kw_parameter_list')
def update_rag(self, p):
return UpdateRAG(name=p.identifier, updated_params=p.kw_parameter_list)

# -- Skills --
@_('CREATE SKILL if_not_exists_or_empty identifier USING kw_parameter_list')
def create_skill(self, p):
Expand Down
130 changes: 130 additions & 0 deletions mindsdb_sql/parser/dialects/mindsdb/rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from mindsdb_sql.parser.ast.base import ASTNode
from mindsdb_sql.parser.utils import indent


class CreateRAG(ASTNode):
"""
Create a RAG
"""
def __init__(
self,
name,
llm,
knowledge_base_store=None,
from_select=None,
params=None,
if_not_exists=False,
*args,
**kwargs,
):
"""
Args:
name: Identifier -- name of the RAG
llm: Identifier -- name of the LLM to use
knowledge_base_store: Identifier -- name of the knowledge_base_store to use
from_select: SelectStatement -- select statement to use as the source of the RAG
params: dict -- additional parameters to pass to the RAG.
if_not_exists: bool -- if True, do not raise an error if the RAG already exists
"""
super().__init__(*args, **kwargs)
self.name = name
self.llm = llm
self.knowledge_base_store = knowledge_base_store
self.params = params
self.if_not_exists = if_not_exists
self.from_query = from_select

def to_tree(self, *args, level=0, **kwargs):
ind = indent(level)
kb_str = f"{ind} knowledge_base_store={self.knowledge_base_store.to_string()},\n" if self.knowledge_base_store else ""
out_str = f"""
{ind}CreateRAG(
{ind} if_not_exists={self.if_not_exists},
{ind} name={self.name.to_string()},
{ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None},
{ind} llm={self.llm.to_string()},
{kb_str}{ind} params={self.params}
{ind})
"""
return out_str

def get_string(self, *args, **kwargs):
params = self.params.copy()
using_ar = [f"{k}={repr(v)}" for k, v in params.items()]
using_str = ", ".join(using_ar)
from_query_str = (
f"FROM ({self.from_query.get_string()})" if self.from_query else ""
)
# only add knowledge base if it is provided, else we will use the default
knowledge_base_str = f" knowledge_base_store = {self.knowledge_base_store.to_string()}" if self.knowledge_base_store else ""

out_str = (
f"CREATE RAG {'IF NOT EXISTS' if self.if_not_exists else ''}{self.name.to_string()} "
f"{from_query_str} "
f"USING {using_str},"
f" LLM = {self.llm.to_string()}, "
f"{knowledge_base_str}"
)

return out_str

def __repr__(self) -> str:
return self.to_tree()


class DropRAG(ASTNode):
"""
Delete a RAG
"""
def __init__(self, name, if_exists=False, *args, **kwargs):
"""
Args:
name: Identifier -- name of the RAG
if_exists: bool -- if True, do not raise an error if the RAG does not exist
"""
super().__init__(*args, **kwargs)
self.name = name
self.if_exists = if_exists

def to_tree(self, *args, level=0, **kwargs):
ind = indent(level)
out_str = (
f"{ind}DropRAG("
f"{ind} if_exists={self.if_exists},"
f"name={self.name.to_string()})"
)
return out_str

def get_string(self, *args, **kwargs):
out_str = f'DROP RAG {"IF EXISTS" if self.if_exists else ""}{self.name.to_string()}'
return out_str


class UpdateRAG(ASTNode):
"""
Node for updating a RAG
"""

def __init__(self, name, updated_params, *args, **kwargs):
"""
Parameters:
name (Identifier): name of the RAG to update
updated_params (dict): new SET parameters of the RAG to update
"""
super().__init__(*args, **kwargs)
self.name = name
self.params = updated_params

def to_tree(self, level=0, *args, **kwargs):
ind = indent(level)
out_str = f'{ind}UpdateRAG(' \
f'name={self.name.to_string()}, ' \
f'updated_params={self.params})'
return out_str

def get_string(self, *args, **kwargs):
set_ar = [f'{k}={repr(v)}' for k, v in self.params.items()]
set_str = ', '.join(set_ar)

out_str = f'UPDATE RAG {self.name.to_string()} SET {set_str}'
return out_str
2 changes: 1 addition & 1 deletion tests/test_parser/test_mindsdb/test_knowledgebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)


def test_create_knowledeg_base():
def test_create_knowledge_base():
# create without select
sql = """
CREATE KNOWLEDGE_BASE my_knowledge_base
Expand Down
Loading
Loading