Skip to content

Commit

Permalink
support insert from values
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhuishi-convect committed Sep 6, 2023
1 parent a536adf commit e3aab82
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 4 deletions.
45 changes: 44 additions & 1 deletion mindsdb_sql/planner/query_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1446,7 +1446,50 @@ def find_embeddings_field(node, **kwargs):

return self.plan_insert(query)
else:
raise NotImplementedError("Not implemented insert without select")
if not query.columns:
raise PlanningException("Columns list is empty when using values")

keys = [column.name for column in query.columns]
is_embeddings_field_present = EMBEDDINGS_FIELD in keys

query.table = Identifier(vector_database_table)
# directly dispatch to the underlying storage table
if is_embeddings_field_present:
return self.plan_insert(query)

# if the embeddings field is not present in the columns list
# we need to wrap values in ast.Data
# join it with a model table
# modify the query using from_table
# and dispatch to the underlying storage table

records = []
_unwrap_constant_or_self = lambda node: node.value if isinstance(node, Constant) else node
for row in query.values:
records.append(
dict(
zip(
keys,
map(_unwrap_constant_or_self, row)
)
)
)

data = ast.Data(records, alias=Identifier("data"))
predictor_select = Select(
targets=[Identifier(col.name) for col in query.columns] + [Identifier(EMBEDDINGS_FIELD)],
from_table=Join(
left=data,
right=Identifier(model_name),
join_type="JOIN"
)
)

query.columns += [ast.TableColumn(name=EMBEDDINGS_FIELD)]
query.from_select = predictor_select
query.values = None

return self.plan_insert(query)

# method for compatibility
def from_query(self, query=None):
Expand Down
5 changes: 2 additions & 3 deletions tests/test_planner/test_knowledege_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,8 @@ def test_insert_into_kb(planner_context):
(2, 'hello world', '{"a": 1, "b": 2}'),
(3, 'hello world', '{"a": 1, "b": 2}');
"""
# this will dispatch the underlying dataframes to the underlying model
# then it will dispatch the query to the underlying storage
# TODO: need to figure out what to do with this situation
plan = _plan_sql(sql)
assert len(plan.steps) > 0 # TODO: better to specify t the detail of the plan

# insert into kb with select
sql = """
Expand Down

0 comments on commit e3aab82

Please sign in to comment.