-
Notifications
You must be signed in to change notification settings - Fork 1
Enabling symbolic execution support for filters, joins and unions #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: michael/sym-eval
Are you sure you want to change the base?
Changes from all commits
93ee79e
a7740a7
c0dc1a4
0d8607c
54daa6c
86b37fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| use arrow_schema::DataType; | ||
|
|
||
| /// Literal values used in symbolic expressions | ||
| #[derive(Debug, Clone, PartialEq)] | ||
| pub enum ScalarValue { | ||
|
|
@@ -31,6 +33,34 @@ pub enum ScalarValue { | |
| Utf8(Option<String>), | ||
| } | ||
|
|
||
| #[derive(Debug, Copy, Clone, PartialEq)] | ||
| pub enum VarType { | ||
| /// A boolean variable | ||
| Boolean, | ||
| /// A string variable | ||
| String, | ||
| /// A int variable | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: An int |
||
| Int, | ||
| } | ||
|
|
||
| pub fn arrow_type_to_var_type(arrow_type: DataType) -> VarType { | ||
| match arrow_type { | ||
| DataType::Boolean => VarType::Boolean, | ||
| DataType::Utf8 | DataType::LargeUtf8 => VarType::String, | ||
| DataType::Int8 | ||
| | DataType::Int16 | ||
| | DataType::Int32 | ||
| | DataType::Int64 | ||
| | DataType::UInt8 | ||
| | DataType::UInt16 | ||
| | DataType::UInt32 | ||
| | DataType::UInt64 | ||
| | DataType::Decimal128(_, _) | ||
| | DataType::Decimal256(_, _) => VarType::Int, | ||
| _ => VarType::String, // Default to String for other types | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what are those other types and why is string ok? Is it safer to return an error in this case |
||
| } | ||
| } | ||
|
|
||
| /// Operators applied to symbolic expressions | ||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Hash)] | ||
| pub enum Operator { | ||
|
|
@@ -60,6 +90,10 @@ pub enum Operator { | |
| And, | ||
| /// Logical OR, like `||` | ||
| Or, | ||
| /// Logical NOT, like `!` | ||
| Not, | ||
| /// In, like `in` | ||
| In, | ||
| } | ||
|
|
||
| /// A symbolic expression for a columnar array. | ||
|
|
@@ -79,6 +113,29 @@ pub enum Expr { | |
| column: usize, | ||
| /// The position of the row | ||
| row: usize, | ||
| /// The variable type | ||
| var_type: VarType, | ||
| }, | ||
|
|
||
| /// A symbolic variable representing a row in a table | ||
| Row { | ||
| /// The name of the table | ||
| table: String, | ||
| /// The position of the row | ||
| row_id: usize, | ||
| /// The row constraint | ||
| constraint: Option<Vec<Expr>>, | ||
| }, | ||
|
|
||
| /// A list of symbolic expressions | ||
| List(Vec<Expr>), | ||
|
|
||
| /// A count symbolic expression | ||
| Count { | ||
| /// The column to count | ||
| expr: Box<Expr>, | ||
| /// Whether the count is distinct | ||
| is_distinct: bool, | ||
| }, | ||
|
|
||
| /// A binary symbolic expression | ||
|
|
@@ -108,8 +165,39 @@ impl Expr { | |
| } | ||
|
|
||
| /// Create a symbolic variable | ||
| pub fn variable(table: String, column: usize, row: usize) -> Self { | ||
| Expr::Variable { table, column, row } | ||
| pub fn variable(table: String, column: usize, row: usize, var_type: VarType) -> Self { | ||
| Expr::Variable { | ||
| table, | ||
| column, | ||
| row, | ||
| var_type, | ||
| } | ||
| } | ||
|
|
||
| /// Create a symbolic row | ||
| pub fn row(table: String, row_id: usize) -> Self { | ||
| Expr::Row { | ||
| table, | ||
| row_id, | ||
| constraint: None, | ||
| } | ||
| } | ||
|
|
||
| /// Create a symbolic row with a constraint | ||
| pub fn row_with_constraint(table: String, row_id: usize, constraint: Vec<Expr>) -> Self { | ||
| Expr::Row { | ||
| table, | ||
| row_id, | ||
| constraint: Some(constraint), | ||
| } | ||
| } | ||
|
|
||
| /// Create a count symbolic expression | ||
| pub fn count(expr: Expr, is_distinct: bool) -> Self { | ||
| Expr::Count { | ||
| expr: Box::new(expr), | ||
| is_distinct, | ||
| } | ||
| } | ||
|
|
||
| /// Create a binary symbolic expression | ||
|
|
@@ -138,14 +226,44 @@ impl Expr { | |
| } | ||
|
|
||
| /// Create a symbolic expression array for a column reference using table name and column position | ||
| pub fn make_colref_symbolic_expr_array(table: String, column: usize, len: usize) -> Vec<Expr> { | ||
| pub fn make_colref_symbolic_expr_array( | ||
| table: String, | ||
| column: usize, | ||
| len: usize, | ||
| row_offset: usize, | ||
| var_type: VarType, | ||
| ) -> Vec<Expr> { | ||
| let mut res = Vec::with_capacity(len); | ||
| for i in 0..len { | ||
| res.push(Expr::Variable { | ||
| table: table.clone(), | ||
| column, | ||
| row: i, | ||
| row: i + row_offset, | ||
| var_type, | ||
| }); | ||
| } | ||
| res | ||
| } | ||
|
|
||
| pub fn add_constraint_to_row(row: &mut Expr, constraint: Expr) { | ||
| if let Expr::Row { | ||
| constraint: Some(constraints), | ||
| .. | ||
| } = row | ||
| { | ||
| constraints.push(constraint); | ||
| } else if let Expr::Row { | ||
| table, | ||
| row_id, | ||
| constraint: None, | ||
| } = row | ||
| { | ||
| *row = Expr::Row { | ||
| table: table.clone(), | ||
| row_id: *row_id, | ||
| constraint: Some(vec![constraint]), | ||
| }; | ||
| } else { | ||
| panic!("Expr is not a symbolic row"); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,13 +73,15 @@ macro_rules! sym { | |
|
|
||
| let mut sym_res = vec![]; | ||
| for i in 0..syms1.len() { | ||
| let l = &syms1[i]; | ||
| let r = &syms2[i]; | ||
| sym_res.push(SymbolicExpr::binary( | ||
| l.clone(), | ||
| SymbolicOperator::$op, | ||
| r.clone(), | ||
| )); | ||
| for j in 0..syms2.len() { | ||
| let l = &syms1[i]; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this not outside the nested loop? |
||
| let r = &syms2[j]; | ||
| sym_res.push(SymbolicExpr::binary( | ||
| l.clone(), | ||
| SymbolicOperator::$op, | ||
| r.clone(), | ||
| )); | ||
| } | ||
| } | ||
| Ok(expr.with_symbolic_data(&sym_res)) | ||
| }}; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how did we end up with dbg lines in the first place? For other repos we have a protection, should we lint here as well?