From cae719574453894550b7088e46fdfdb65fb7cfc2 Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Thu, 21 Sep 2023 23:08:42 +0200 Subject: [PATCH 1/9] Further on blank nodes support --- arrow_python_utils/src/to_python.rs | 2 +- arrow_python_utils/src/to_rust.rs | 2 +- maplib/src/ast.rs | 45 +++++- maplib/src/document.rs | 6 +- maplib/src/mapping.rs | 133 ++++++++++++------ maplib/src/mapping/constant_terms.rs | 67 ++++++--- maplib/src/mapping/default.rs | 8 +- maplib/src/mapping/validation_inference.rs | 16 +-- maplib/src/parsing.rs | 2 +- maplib/src/parsing/nom_parsing.rs | 12 +- maplib/src/templates.rs | 107 +++++++------- maplib/tests/test_stottr.rs | 32 ++--- parquet_io/src/lib.rs | 8 +- py_maplib/tests/test_blank_nodes.py | 51 +++++++ representation/src/lib.rs | 6 +- representation/src/literals.rs | 2 +- triplestore/src/conversion.rs | 2 +- triplestore/src/io_funcs.rs | 8 +- triplestore/src/lib.rs | 26 ++-- triplestore/src/native_parquet_write.rs | 4 +- triplestore/src/ntriples_write.rs | 4 +- triplestore/src/sparql.rs | 18 +-- triplestore/src/sparql/lazy_aggregate.rs | 3 +- triplestore/src/sparql/lazy_expressions.rs | 54 ++++--- .../sparql/lazy_expressions/exists_helper.rs | 2 +- triplestore/src/sparql/lazy_graph_patterns.rs | 2 +- .../src/sparql/lazy_graph_patterns/extend.rs | 2 +- .../src/sparql/lazy_graph_patterns/filter.rs | 2 +- .../src/sparql/lazy_graph_patterns/group.rs | 2 +- .../sparql/lazy_graph_patterns/left_join.rs | 2 +- .../src/sparql/lazy_graph_patterns/path.rs | 25 ++-- .../src/sparql/lazy_graph_patterns/triple.rs | 12 +- .../src/sparql/lazy_graph_patterns/union.rs | 2 +- .../src/sparql/lazy_graph_patterns/values.rs | 10 +- triplestore/src/sparql/query_context.rs | 124 ++++++++-------- triplestore/src/sparql/solution_mapping.rs | 2 +- triplestore/src/sparql/sparql_to_polars.rs | 22 ++- 37 files changed, 490 insertions(+), 337 deletions(-) create mode 100644 py_maplib/tests/test_blank_nodes.py diff --git a/arrow_python_utils/src/to_python.rs b/arrow_python_utils/src/to_python.rs index 5be6d87..ceed9c1 100644 --- a/arrow_python_utils/src/to_python.rs +++ b/arrow_python_utils/src/to_python.rs @@ -76,7 +76,7 @@ fn to_py_df( pyarrow: &PyModule, polars: &PyModule, ) -> PyResult { - let py_rb = to_py_rb(rb, names, py.clone(), pyarrow)?; + let py_rb = to_py_rb(rb, names, py, pyarrow)?; let py_rb_list = PyList::empty(py); py_rb_list.append(py_rb)?; let py_table = pyarrow diff --git a/arrow_python_utils/src/to_rust.rs b/arrow_python_utils/src/to_rust.rs index a598535..933e61f 100644 --- a/arrow_python_utils/src/to_rust.rs +++ b/arrow_python_utils/src/to_rust.rs @@ -66,7 +66,7 @@ pub fn array_to_rust(obj: &PyAny) -> PyResult { unsafe { let field = ffi::import_field_from_c(schema.as_ref()).map_err(ToRustError::from)?; let array = ffi::import_array_from_c(*array, field.data_type).map_err(ToRustError::from)?; - Ok(array.into()) + Ok(array) } } diff --git a/maplib/src/ast.rs b/maplib/src/ast.rs index 49a919b..e451949 100644 --- a/maplib/src/ast.rs +++ b/maplib/src/ast.rs @@ -1,3 +1,4 @@ +use crate::constants::BLANK_NODE_IRI; #[cfg(test)] use crate::constants::OTTR_TRIPLE; use oxrdf::vocab::xsd; @@ -62,7 +63,7 @@ impl Display for Signature { write!(f, ", ")?; } } - if let Some(_) = self.annotation_list { + if self.annotation_list.is_some() { todo!(); } write!(f, " ]") @@ -111,6 +112,26 @@ pub enum PType { NEListType(Box), } +impl PType { + pub fn is_blank_node(&self) -> bool { + if let PType::BasicType(nn, _) = &self { + if nn.as_str() == BLANK_NODE_IRI { + return true; + } + } + false + } + + pub fn is_iri(&self) -> bool { + if let PType::BasicType(nn, _) = self { + if nn.as_ref() == xsd::ANY_URI { + return true; + } + } + true + } +} + impl Display for PType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { @@ -161,6 +182,22 @@ pub enum ConstantTerm { ConstantList(Vec), } +impl ConstantTerm { + pub fn has_blank_node(&self) -> bool { + match self { + ConstantTerm::Constant(c) => c.is_blank_node(), + ConstantTerm::ConstantList(l) => { + for c in l { + if c.has_blank_node() { + return true; + } + } + false + } + } + } +} + impl Display for ConstantTerm { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { @@ -187,6 +224,12 @@ pub enum ConstantLiteral { None, } +impl ConstantLiteral { + pub fn is_blank_node(&self) -> bool { + matches!(self, ConstantLiteral::BlankNode(_)) + } +} + impl Display for ConstantLiteral { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { diff --git a/maplib/src/document.rs b/maplib/src/document.rs index 7ab9aba..78caad0 100644 --- a/maplib/src/document.rs +++ b/maplib/src/document.rs @@ -6,11 +6,11 @@ use std::fs::read_to_string; use std::path::Path; pub fn document_from_str(s: &str) -> Result { - let unresolved = whole_stottr_doc(s).map_err(|x| TemplateError::ParsingError(x))?; - resolve_document(unresolved).map_err(|x| TemplateError::ResolutionError(x)) + let unresolved = whole_stottr_doc(s).map_err(TemplateError::ParsingError)?; + resolve_document(unresolved).map_err(TemplateError::ResolutionError) } pub fn document_from_file>(p: P) -> Result { - let s = read_to_string(p).map_err(|x| TemplateError::ReadTemplateFileError(x))?; + let s = read_to_string(p).map_err(TemplateError::ReadTemplateFileError)?; document_from_str(&s) } diff --git a/maplib/src/mapping.rs b/maplib/src/mapping.rs index d92e656..c283921 100644 --- a/maplib/src/mapping.rs +++ b/maplib/src/mapping.rs @@ -10,7 +10,7 @@ use crate::ast::{ use crate::constants::OTTR_TRIPLE; use crate::document::document_from_str; use crate::errors::MaplibError; -use crate::mapping::constant_terms::constant_to_expr; +use crate::mapping::constant_terms::{constant_blank_node_to_series, constant_to_expr}; use crate::mapping::errors::MappingError; use crate::templates::TemplateDataset; use log::debug; @@ -21,7 +21,7 @@ use polars_core::series::Series; use rayon::iter::ParallelDrainRange; use rayon::iter::ParallelIterator; use representation::RDFNodeType; -use std::cmp::min; +use std::cmp::{max, min}; use std::collections::{HashMap, HashSet}; use std::io::Write; use std::path::Path; @@ -33,9 +33,11 @@ pub struct Mapping { template_dataset: TemplateDataset, pub triplestore: Triplestore, use_caching: bool, + blank_node_counter: usize, } #[derive(Clone)] +#[derive(Default)] pub struct ExpandOptions { pub language_tags: Option>, pub unique_subsets: Option>>, @@ -54,14 +56,7 @@ struct StaticColumn { ptype: Option, } -impl Default for ExpandOptions { - fn default() -> Self { - ExpandOptions { - language_tags: None, - unique_subsets: None, - } - } -} + #[derive(Clone, Debug)] pub struct PrimitiveColumn { @@ -84,8 +79,9 @@ impl Mapping { Ok(Mapping { template_dataset: template_dataset.clone(), triplestore: Triplestore::new(caching_folder) - .map_err(|x| MappingError::TriplestoreError(x))?, + .map_err(MappingError::TriplestoreError)?, use_caching, + blank_node_counter: 0, }) } @@ -94,7 +90,7 @@ impl Mapping { caching_folder: Option, ) -> Result { let dataset = - TemplateDataset::from_folder(path).map_err(|x| MaplibError::TemplateError(x))?; + TemplateDataset::from_folder(path).map_err(MaplibError::TemplateError)?; Mapping::new(&dataset, caching_folder) } @@ -103,13 +99,13 @@ impl Mapping { caching_folder: Option, ) -> Result { let dataset = - TemplateDataset::from_file(path).map_err(|x| MaplibError::TemplateError(x))?; + TemplateDataset::from_file(path).map_err(MaplibError::TemplateError)?; Mapping::new(&dataset, caching_folder) } pub fn from_str(s: &str, caching_folder: Option) -> Result { - let doc = document_from_str(s.into())?; - let dataset = TemplateDataset::new(vec![doc]).map_err(|x| MaplibError::TemplateError(x))?; + let doc = document_from_str(s)?; + let dataset = TemplateDataset::new(vec![doc]).map_err(MaplibError::TemplateError)?; Mapping::new(&dataset, caching_folder) } @@ -119,10 +115,10 @@ impl Mapping { ) -> Result { let mut docs = vec![]; for s in ss { - let doc = document_from_str(s.into())?; + let doc = document_from_str(s)?; docs.push(doc); } - let dataset = TemplateDataset::new(docs).map_err(|x| MaplibError::TemplateError(x))?; + let dataset = TemplateDataset::new(docs).map_err(MaplibError::TemplateError)?; Mapping::new(&dataset, caching_folder) } @@ -136,20 +132,20 @@ impl Mapping { pub fn write_native_parquet(&mut self, path: &str) -> Result<(), MappingError> { self.triplestore .write_native_parquet(Path::new(path)) - .map_err(|x| MappingError::TriplestoreError(x)) + .map_err(MappingError::TriplestoreError) } pub fn export_oxrdf_triples(&mut self) -> Result, MappingError> { self.triplestore .export_oxrdf_triples() - .map_err(|x| MappingError::TriplestoreError(x)) + .map_err(MappingError::TriplestoreError) } fn resolve_template(&self, s: &str) -> Result<&Template, MappingError> { if let Some(t) = self.template_dataset.get(s) { return Ok(t); } else { - let mut split_colon = s.split(":"); + let mut split_colon = s.split(':'); let prefix_maybe = split_colon.next(); if let Some(prefix) = prefix_maybe { if let Some(nn) = self.template_dataset.prefix_map.get(prefix) { @@ -198,28 +194,32 @@ impl Mapping { let to_row = min(df.height(), offset as usize + chunk_size); let df_slice = df.slice_par(offset, to_row); offset += chunk_size as i64; - let result_vec = self._expand( + let (result_vec, new_blank_node_counter) = self._expand( + 0, + self.blank_node_counter, &target_template_name, df_slice, columns.clone(), HashMap::new(), unique_subsets.clone(), )?; - self.process_results(result_vec, &call_uuid)?; + self.process_results(result_vec, &call_uuid, new_blank_node_counter)?; debug!("Finished processing {} rows", to_row); if offset >= df.height() as i64 { break; } } } else { - let result_vec = self._expand( + let (result_vec, new_blank_node_counter) = self._expand( + 0, + self.blank_node_counter, &target_template_name, df, columns, HashMap::new(), unique_subsets, )?; - self.process_results(result_vec, &call_uuid)?; + self.process_results(result_vec, &call_uuid, new_blank_node_counter)?; debug!("Expansion took {} seconds", now.elapsed().as_secs_f32()); } Ok(MappingReport {}) @@ -227,24 +227,26 @@ impl Mapping { fn _expand( &self, + layer: usize, + mut blank_node_counter: usize, name: &str, df: DataFrame, dynamic_columns: HashMap, static_columns: HashMap, unique_subsets: Vec>, - ) -> Result, MappingError> { - //At this point, the lf should have columns with names appropriate for the template to be instantiated (named_node). + ) -> Result<(Vec, usize), MappingError> { if let Some(template) = self.template_dataset.get(name) { if template.signature.template_name.as_str() == OTTR_TRIPLE { - Ok(vec![OTTRTripleInstance { - df, - dynamic_columns, - static_columns, - has_unique_subset: !unique_subsets.is_empty(), - }]) + Ok(( + vec![OTTRTripleInstance { + df, + dynamic_columns, + static_columns, + has_unique_subset: !unique_subsets.is_empty(), + }], + blank_node_counter, + )) } else { - let now = Instant::now(); - let mut expand_params_vec = vec![]; let colnames: HashSet<_> = df .get_column_names() @@ -262,9 +264,7 @@ impl Mapping { expand_params_vec.push((i, instance_series)); } - debug!("Cloning args took {} seconds", now.elapsed().as_secs_f64()); - - let results: Vec, MappingError>> = expand_params_vec + let results: Vec<_> = expand_params_vec .par_drain(..) .map(|(i, series_vec)| { let target_template = @@ -274,16 +274,22 @@ impl Mapping { instance_dynamic_columns, instance_static_columns, new_unique_subsets, + updated_blank_node_counter, ) = create_remapped( + self.blank_node_counter, + layer, i, &target_template.signature, series_vec, &dynamic_columns, &static_columns, &unique_subsets, + df.height(), )?; self._expand( + layer + 1, + updated_blank_node_counter, i.template_name.as_str(), instance_df, instance_dynamic_columns, @@ -294,10 +300,12 @@ impl Mapping { .collect(); let mut results_ok = vec![]; for r in results { - results_ok.push(r?) + let (r, new_counter) = r?; + results_ok.push(r); + blank_node_counter = max(blank_node_counter, new_counter); } - Ok(flatten(results_ok)) + Ok((flatten(results_ok), blank_node_counter)) } } else { Err(MappingError::TemplateNotFound(name.to_string())) @@ -308,13 +316,14 @@ impl Mapping { &mut self, mut result_vec: Vec, call_uuid: &String, + new_blank_node_counter: usize, ) -> Result<(), MappingError> { let now = Instant::now(); let triples: Vec< Result<(DataFrame, RDFNodeType, Option, Option, bool), MappingError>, > = result_vec .par_drain(..) - .map(|i| create_triples(i)) + .map(create_triples) .collect(); let mut ok_triples = vec![]; for t in triples { @@ -332,8 +341,9 @@ impl Mapping { } self.triplestore .add_triples_vec(all_triples_to_add, call_uuid) - .map_err(|x| MappingError::TriplestoreError(x))?; + .map_err(MappingError::TriplestoreError)?; + self.blank_node_counter = new_blank_node_counter; debug!( "Result processing took {} seconds", now.elapsed().as_secs_f32() @@ -385,6 +395,7 @@ fn create_triples( } else { let (expr, mapped_column) = create_dynamic_expression_from_static(&k, &sc.constant_term, &sc.ptype)?; + expressions.push(expr.alias(&k)); dynamic_columns.insert(k, mapped_column); } @@ -421,25 +432,47 @@ fn create_dynamic_expression_from_static( Ok((expr, mapped_column)) } +fn create_series_from_blank_node_constant( + layer: usize, + blank_node_counter: usize, + column_name: &str, + constant_term: &ConstantTerm, + n_rows: usize, +) -> Result<(Series, PrimitiveColumn), MappingError> { + let (mut series, _, rdf_node_type) = + constant_blank_node_to_series(layer, blank_node_counter, constant_term, n_rows)?; + series.rename(column_name); + let mapped_column = PrimitiveColumn { + rdf_node_type, + language_tag: None, + }; + Ok((series, mapped_column)) +} + fn create_remapped( + mut blank_node_counter: usize, + layer: usize, instance: &Instance, signature: &Signature, mut series_vec: Vec, dynamic_columns: &HashMap, constant_columns: &HashMap, unique_subsets: &Vec>, + input_df_height: usize, ) -> Result< ( DataFrame, HashMap, HashMap, Vec>, + usize, ), MappingError, > { let now = Instant::now(); let mut new_dynamic_columns = HashMap::new(); let mut new_constant_columns = HashMap::new(); + let mut new_series = vec![]; let mut new_dynamic_from_constant = vec![]; let mut to_expand = vec![]; @@ -476,7 +509,19 @@ fn create_remapped( } } StottrTerm::ConstantTerm(ct) => { - if original.list_expand { + if ct.has_blank_node() { + let (series, primitive_column) = create_series_from_blank_node_constant( + layer, + blank_node_counter, + target_colname, + ct, + input_df_height, + )?; + new_series.push(series); + new_dynamic_columns.insert(target_colname.clone(), primitive_column); + new_dynamic_from_constant.push(target_colname); + blank_node_counter += input_df_height; + } else if original.list_expand { let (expr, primitive_column) = create_dynamic_expression_from_static(target_colname, ct, &target.ptype)?; expressions.push(expr); @@ -500,6 +545,9 @@ fn create_remapped( let sname = s.name().to_string(); s.rename(rename_map.get_mut(&sname).unwrap().pop().unwrap()); } + for s in new_series { + series_vec.push(s); + } let mut lf = DataFrame::new(series_vec).unwrap().lazy(); for expr in expressions { @@ -554,6 +602,7 @@ fn create_remapped( new_dynamic_columns, new_constant_columns, new_unique_subsets, + blank_node_counter, )) } diff --git a/maplib/src/mapping/constant_terms.rs b/maplib/src/mapping/constant_terms.rs index 86431ac..ca8bec6 100644 --- a/maplib/src/mapping/constant_terms.rs +++ b/maplib/src/mapping/constant_terms.rs @@ -7,9 +7,13 @@ use oxrdf::NamedNode; use polars::prelude::{concat_list, lit, Expr, LiteralValue, SpecialEq}; use polars_core::datatypes::DataType; use polars_core::prelude::{AnyValue, IntoSeries, ListChunked, Series}; +use rayon::iter::ParallelIterator; +use rayon::prelude::IntoParallelIterator; use representation::literals::sparql_literal_to_any_value; use std::ops::Deref; +const BLANK_NODE_SERIES_NAME: &str = "blank_node_series"; + pub fn constant_to_expr( constant_term: &ConstantTerm, ptype_opt: &Option, @@ -22,15 +26,9 @@ pub fn constant_to_expr( RDFNodeType::IRI, None, ), - ConstantLiteral::BlankNode(bn) => ( - Expr::Literal(LiteralValue::Utf8(bn.as_str().to_string())), - PType::BasicType( - NamedNode::new_unchecked(BLANK_NODE_IRI), - BLANK_NODE_IRI.to_string(), - ), - RDFNodeType::BlankNode, - None, - ), + ConstantLiteral::BlankNode(_) => { + panic!("Should never happen") + } ConstantLiteral::Literal(lit) => { let (mut any, dt) = sparql_literal_to_any_value(&lit.value, &lit.data_type_iri); let mut value_series = Series::new_empty("literal", &DataType::Utf8); @@ -41,11 +39,7 @@ pub fn constant_to_expr( } else { value_series = value_series.extend_constant(any, 1).unwrap(); } - let language_tag = if let Some(tag) = &lit.language { - Some(tag.clone()) - } else { - None - }; + let language_tag = lit.language.as_ref().cloned(); ( Expr::Literal(LiteralValue::Series(SpecialEq::new(value_series))), PType::BasicType( @@ -117,14 +111,51 @@ pub fn constant_to_expr( } } }; - if let Some(ptype_in) = ptype_opt { - if ptype_in != &ptype { + if let Some(ptype_inferred) = ptype_opt { + if ptype_inferred != &ptype { return Err(MappingError::ConstantDoesNotMatchDataType( constant_term.clone(), - ptype_in.clone(), - ptype.clone(), + ptype_inferred.clone(), + ptype, )); } } Ok((expr, ptype, rdf_node_type, language_tag)) } + +pub fn constant_blank_node_to_series( + layer: usize, + blank_node_counter: usize, + constant_term: &ConstantTerm, + n_rows: usize, +) -> Result<(Series, PType, RDFNodeType), MappingError> { + Ok(match constant_term { + ConstantTerm::Constant(ConstantLiteral::BlankNode(bl)) => { + let any_value_vec: Vec<_> = (blank_node_counter..(blank_node_counter + n_rows)) + .into_par_iter() + .map(|i| AnyValue::Utf8Owned(format!("_:{}_l{}_r{}", bl.as_str(), layer, i).into())) + .collect(); + + ( + Series::from_any_values_and_dtype( + BLANK_NODE_SERIES_NAME, + any_value_vec.as_slice(), + &DataType::Utf8, + false, + ) + .unwrap(), + PType::BasicType( + NamedNode::new_unchecked(BLANK_NODE_IRI), + BLANK_NODE_IRI.to_string(), + ), + RDFNodeType::BlankNode, + ) + } + ConstantTerm::ConstantList(_) => { + todo!("Not yet implemented support for lists of blank nodes") + } + _ => { + panic!("Should never happen") + } + }) +} diff --git a/maplib/src/mapping/default.rs b/maplib/src/mapping/default.rs index 78ceb42..ba0d1f2 100644 --- a/maplib/src/mapping/default.rs +++ b/maplib/src/mapping/default.rs @@ -34,7 +34,7 @@ impl Mapping { .map(|x| x.to_string()) .collect(); for c in &columns { - let dt = df.column(&c).unwrap().dtype().clone(); + let dt = df.column(c).unwrap().dtype().clone(); let has_null = df.column(c).unwrap().is_null().any(); if c == &pk_col { if let DataType::List(..) = dt { @@ -47,7 +47,7 @@ impl Mapping { ); df = df .lazy() - .with_column(col(&c).cast(DataType::Utf8)) + .with_column(col(c).cast(DataType::Utf8)) .collect() .unwrap(); } @@ -64,7 +64,7 @@ impl Mapping { }, default_value: None, }) - } else if fk_cols.contains(&c) { + } else if fk_cols.contains(c) { if let DataType::List(..) = dt { todo!() } @@ -76,7 +76,7 @@ impl Mapping { ); df = df .lazy() - .with_column(col(&c).cast(DataType::Utf8)) + .with_column(col(c).cast(DataType::Utf8)) .collect() .unwrap(); } diff --git a/maplib/src/mapping/validation_inference.rs b/maplib/src/mapping/validation_inference.rs index 1a31ab2..789f01c 100644 --- a/maplib/src/mapping/validation_inference.rs +++ b/maplib/src/mapping/validation_inference.rs @@ -26,15 +26,15 @@ impl Mapping { if df_columns.contains(variable_name.as_str()) { df_columns.remove(variable_name.as_str()); if !parameter.optional { - validate_non_optional_parameter(&df, variable_name)?; + validate_non_optional_parameter(df, variable_name)?; } if parameter.non_blank { //TODO handle blanks; - validate_non_blank_parameter(&df, variable_name)?; + validate_non_blank_parameter(df, variable_name)?; } let column_data_type = validate_infer_column_data_type( df, - ¶meter, + parameter, variable_name, &options.language_tags, )?; @@ -67,16 +67,12 @@ fn validate_infer_column_data_type( validate_datatype(series.name(), dtype, ptype)?; ptype.clone() } else { - let target_ptype = polars_datatype_to_xsd_datatype(dtype); - target_ptype + + polars_datatype_to_xsd_datatype(dtype) }; let rdf_node_type = infer_rdf_node_type(&ptype); let language_tag = if let Some(map) = language_tag_map { - if let Some(tag) = map.get(column_name) { - Some(tag.clone()) - } else { - None - } + map.get(column_name).cloned() } else { None }; diff --git a/maplib/src/parsing.rs b/maplib/src/parsing.rs index 12aa6da..30d7722 100644 --- a/maplib/src/parsing.rs +++ b/maplib/src/parsing.rs @@ -12,7 +12,7 @@ pub fn whole_stottr_doc(s: &str) -> Result { - if rest != "" { + if !rest.is_empty() { Err(ParsingError { kind: ParsingErrorKind::CouldNotParseEverything(rest.to_string()), }) diff --git a/maplib/src/parsing/nom_parsing.rs b/maplib/src/parsing/nom_parsing.rs index c65cdff..e0ee451 100644 --- a/maplib/src/parsing/nom_parsing.rs +++ b/maplib/src/parsing/nom_parsing.rs @@ -269,10 +269,10 @@ fn parameter(p: &str) -> IResult<&str, UnresolvedParameter> { let mut optional = false; let mut non_blank = false; if let Some(mode) = opt_mode { - if mode.contains(&"!") { + if mode.contains('!') { non_blank = true; } - if mode.contains(&"?") { + if mode.contains('?') { optional = true; } } @@ -401,7 +401,7 @@ fn blank_node_label(b: &str) -> IResult<&str, String> { ))(b)?; let mut out = startchar.to_string(); if let Some(period) = opt_period { - out += &period.to_string(); + out += period; } let stringvec: Vec = period_sep_list .iter() @@ -707,7 +707,7 @@ fn base(b: &str) -> IResult<&str, NamedNode> { } fn prefix_id(p: &str) -> IResult<&str, Prefix> { - let (p, (_, _, _, name, _, iri,_ , _)) = tuple(( + let (p, (_, _, _, name, _, iri, _, _)) = tuple(( multispace0, tag("@prefix"), multispace0, @@ -862,7 +862,7 @@ fn pn_local(p: &str) -> IResult<&str, String> { ))(p)?; let mut out = s1.to_string(); if let Some(period) = opt_period { - out += . + out += period; } let liststrings: Vec = s2.into_iter().map(|x| x.join("")).collect(); out += &liststrings.join("."); @@ -890,7 +890,7 @@ fn one_digit(d: &str) -> IResult<&str, char> { } fn pn_local_esc(s: &str) -> IResult<&str, String> { - let esc = r#"\(_~.-!$&\()*+,;=/?#@%"#; + let esc = r"\(_~.-!$&\()*+,;=/?#@%"; let (s, (_, c)) = tuple((tag("\\"), one_of(esc)))(s)?; Ok((s, c.to_string())) } diff --git a/maplib/src/templates.rs b/maplib/src/templates.rs index 1c7a628..f4913bd 100644 --- a/maplib/src/templates.rs +++ b/maplib/src/templates.rs @@ -115,9 +115,9 @@ impl TemplateDataset { pub fn from_folder>(path: P) -> Result { let mut docs = vec![]; let files_result = - read_dir(path).map_err(|e| TemplateError::ReadTemplateDirectoryError(e))?; + read_dir(path).map_err(TemplateError::ReadTemplateDirectoryError)?; for f in files_result { - let f = f.map_err(|x| TemplateError::ResolveDirectoryEntryError(x))?; + let f = f.map_err(TemplateError::ResolveDirectoryEntryError)?; if let Some(e) = f.path().extension() { if let Some(s) = e.to_str() { let extension = s.to_lowercase(); @@ -128,21 +128,16 @@ impl TemplateDataset { } } } - Ok(TemplateDataset::new(docs)?) + TemplateDataset::new(docs) } pub fn from_file>(path: P) -> Result { let doc = document_from_file(path)?; - Ok(TemplateDataset::new(vec![doc])?) + TemplateDataset::new(vec![doc]) } pub fn get(&self, template: &str) -> Option<&Template> { - for t in &self.templates { - if t.signature.template_name.as_str() == template { - return Some(t); - } - } - None + self.templates.iter().find(|&t| t.signature.template_name.as_str() == template) } fn infer_types(&mut self) -> Result<(), TemplateError> { @@ -155,7 +150,7 @@ impl TemplateDataset { inner_changed = inner_changed || infer_template_types( element.first_mut().unwrap(), - (&left).iter().chain((&right).iter()).collect(), + left.iter().chain(right.iter()).collect(), )?; } if !inner_changed { @@ -242,23 +237,21 @@ fn lub_update( if my_parameter.ptype.is_none() { my_parameter.ptype = Some(right.clone()); Ok(true) - } else { - if my_parameter.ptype.as_ref().unwrap() != right { - let ptype = lub( - template_name, - variable, - my_parameter.ptype.as_ref().unwrap(), - right, - )?; - if my_parameter.ptype.as_ref().unwrap() != &ptype { - my_parameter.ptype = Some(ptype); - Ok(true) - } else { - Ok(false) - } + } else if my_parameter.ptype.as_ref().unwrap() != right { + let ptype = lub( + template_name, + variable, + my_parameter.ptype.as_ref().unwrap(), + right, + )?; + if my_parameter.ptype.as_ref().unwrap() != &ptype { + my_parameter.ptype = Some(ptype); + Ok(true) } else { Ok(false) } + } else { + Ok(false) } } @@ -271,39 +264,37 @@ fn lub( ) -> Result { if left == right { return Ok(left.clone()); - } else { - if let PType::NEListType(left_inner) = left { - if let PType::ListType(right_inner) = right { - return Ok(PType::NEListType(Box::new(lub( - template_name, - variable, - left_inner, - right_inner, - )?))); - } else if let PType::NEListType(right_inner) = right { - return Ok(PType::NEListType(Box::new(lub( - template_name, - variable, - left_inner, - right_inner, - )?))); - } - } else if let PType::ListType(left_inner) = left { - if let PType::NEListType(right_inner) = right { - return Ok(PType::NEListType(Box::new(lub( - template_name, - variable, - left_inner, - right_inner, - )?))); - } else if let PType::ListType(right_inner) = right { - return Ok(PType::ListType(Box::new(lub( - template_name, - variable, - left_inner, - right_inner, - )?))); - } + } else if let PType::NEListType(left_inner) = left { + if let PType::ListType(right_inner) = right { + return Ok(PType::NEListType(Box::new(lub( + template_name, + variable, + left_inner, + right_inner, + )?))); + } else if let PType::NEListType(right_inner) = right { + return Ok(PType::NEListType(Box::new(lub( + template_name, + variable, + left_inner, + right_inner, + )?))); + } + } else if let PType::ListType(left_inner) = left { + if let PType::NEListType(right_inner) = right { + return Ok(PType::NEListType(Box::new(lub( + template_name, + variable, + left_inner, + right_inner, + )?))); + } else if let PType::ListType(right_inner) = right { + return Ok(PType::ListType(Box::new(lub( + template_name, + variable, + left_inner, + right_inner, + )?))); } } Err(TemplateError::IncompatibleTypes( diff --git a/maplib/tests/test_stottr.rs b/maplib/tests/test_stottr.rs index 6f1363b..4e82a86 100644 --- a/maplib/tests/test_stottr.rs +++ b/maplib/tests/test_stottr.rs @@ -46,7 +46,7 @@ fn test_maplib_easy_case(testdata_path: PathBuf) { let series = [v1, v2]; let df = DataFrame::from_iter(series); - let mut mapping = Mapping::from_str(&t_str, None).unwrap(); + let mut mapping = Mapping::from_str(t_str, None).unwrap(); let _report = mapping .expand( "http://example.net/ns#ExampleTemplate", @@ -88,7 +88,7 @@ fn test_all_iri_case() { let series = [v1]; let df = DataFrame::from_iter(series); - let mut mapping = Mapping::from_str(&t_str, None).unwrap(); + let mut mapping = Mapping::from_str(t_str, None).unwrap(); let _report = mapping .expand( "http://example.net/ns#ExampleTemplate", @@ -98,7 +98,7 @@ fn test_all_iri_case() { .expect(""); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#anObject")), @@ -134,7 +134,7 @@ fn test_string_language_tag_cases() { let series = [my_string]; let df = DataFrame::from_iter(series); - let mut mapping = Mapping::from_str(&t_str, None).unwrap(); + let mut mapping = Mapping::from_str(t_str, None).unwrap(); let _report = mapping .expand( "http://example.net/ns#ExampleTemplate", @@ -150,7 +150,7 @@ fn test_string_language_tag_cases() { .expect(""); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#anObject")), @@ -197,7 +197,7 @@ fn test_const_list_case() { let series = [v1]; let df = DataFrame::from_iter(series); - let mut mapping = Mapping::from_str(&t_str, None).unwrap(); + let mut mapping = Mapping::from_str(t_str, None).unwrap(); let _report = mapping .expand( "http://example.net/ns#ExampleTemplate", @@ -207,7 +207,7 @@ fn test_const_list_case() { .expect(""); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#OneThing")), @@ -262,7 +262,7 @@ ex:Nested [?myVar] :: { ottr:Triple(ex:anObject, ex:hasNumber, ?myVar) } . "#; - let mut mapping = Mapping::from_str(&stottr, None).unwrap(); + let mut mapping = Mapping::from_str(stottr, None).unwrap(); let mut v1 = Series::from_iter(&[1, 2i32]); v1.rename("myVar1"); let mut v2 = Series::from_iter(&[3, 4i32]); @@ -278,7 +278,7 @@ ex:Nested [?myVar] :: { .unwrap(); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#anObject")), @@ -350,7 +350,7 @@ ex:ExampleTemplate [ ottr:Triple(ex:yetAnotherObject, ex:hasDateTime, ?Datetime_ms) } . "#; - let mut mapping = Mapping::from_str(&stottr, None).unwrap(); + let mut mapping = Mapping::from_str(stottr, None).unwrap(); let mut boolean = Series::from_iter(&[true, false]); boolean.rename("Boolean"); let mut uint32 = Series::from_iter(&[5u32, 6u32]); @@ -612,7 +612,7 @@ ex:AnotherExampleTemplate [?object, ?predicate, ?myList] :: { cross | ottr:Triple(?object, ?predicate, ++?myList) } . "#; - let mut mapping = Mapping::from_str(&stottr, None).unwrap(); + let mut mapping = Mapping::from_str(stottr, None).unwrap(); let mut object = Series::from_iter([ "http://example.net/ns#obj1", "http://example.net/ns#obj1", @@ -647,7 +647,7 @@ ex:AnotherExampleTemplate [?object, ?predicate, ?myList] :: { .unwrap(); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#obj1")), @@ -698,7 +698,7 @@ ex:AnotherExampleTemplate [?subject, ?myList1, ?myList2] :: { ottr:Triple(?subject, ex:hasOtherNumber, ?myVar2) } . "#; - let mut mapping = Mapping::from_str(&stottr, None).unwrap(); + let mut mapping = Mapping::from_str(stottr, None).unwrap(); let mut subject = Series::from_iter([ "http://example.net/ns#obj1", "http://example.net/ns#obj1", @@ -730,7 +730,7 @@ ex:AnotherExampleTemplate [?subject, ?myList1, ?myList2] :: { .unwrap(); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#obj1")), @@ -838,7 +838,7 @@ fn test_default() { .unwrap(); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#obj1")), @@ -932,7 +932,7 @@ fn test_default_list() { .unwrap(); let triples = mapping.export_oxrdf_triples().unwrap(); //println!("{:?}", triples); - let actual_triples_set: HashSet = HashSet::from_iter(triples.into_iter()); + let actual_triples_set: HashSet = HashSet::from_iter(triples); let expected_triples_set = HashSet::from([ Triple { subject: Subject::NamedNode(NamedNode::new_unchecked("http://example.net/ns#obj1")), diff --git a/parquet_io/src/lib.rs b/parquet_io/src/lib.rs index c479b41..dfe1f3a 100644 --- a/parquet_io/src/lib.rs +++ b/parquet_io/src/lib.rs @@ -43,12 +43,12 @@ pub fn property_to_filename(property_name: &str) -> String { } pub fn write_parquet(df: &mut DataFrame, file_path: &Path) -> Result<(), ParquetIOError> { - let file = File::create(file_path).map_err(|x| ParquetIOError::FileCreateIOError(x))?; + let file = File::create(file_path).map_err(ParquetIOError::FileCreateIOError)?; let mut writer = ParquetWriter::new(file); writer = writer.with_row_group_size(Some(1_000)); writer .finish(df) - .map_err(|x| ParquetIOError::WriteParquetError(x))?; + .map_err(ParquetIOError::WriteParquetError)?; Ok(()) } @@ -65,7 +65,7 @@ pub fn read_parquet(file_path: &String) -> Result { ..Default::default() }, ) - .map_err(|x| ParquetIOError::ReadParquetError(x)) + .map_err(ParquetIOError::ReadParquetError) } pub fn split_write_tmp_df( @@ -80,7 +80,7 @@ pub fn split_write_tmp_df( loop { let to_row = min(df.height(), offset as usize + chunk_size); let mut df_slice = df.slice_par(offset, to_row); - let file_name = format!("tmp_{}_{}.parquet", predicate, Uuid::new_v4().to_string()); + let file_name = format!("tmp_{}_{}.parquet", predicate, Uuid::new_v4()); let path_buf: PathBuf = [caching_folder, &file_name].iter().collect(); let path = path_buf.as_path(); write_parquet(&mut df_slice, path)?; diff --git a/py_maplib/tests/test_blank_nodes.py b/py_maplib/tests/test_blank_nodes.py new file mode 100644 index 0000000..99e41b8 --- /dev/null +++ b/py_maplib/tests/test_blank_nodes.py @@ -0,0 +1,51 @@ +import polars as pl +import pytest +from polars.testing import assert_frame_equal + +from maplib import Mapping + +pl.Config.set_fmt_str_lengths(300) + + +@pytest.fixture(scope="function") +def blank_person_mapping(): + # The following example comes from https://primer.ottr.xyz/01-basics.html + + doc = """ + @prefix rdf: . + @prefix rdfs: . + @prefix owl: . + @prefix xsd: . + @prefix foaf: . + @prefix dbp: . + @prefix ex: . + @prefix ottr: . + @prefix ax: . + @prefix rstr: . + ex:Person[ ?firstName, ?lastName, ?email ] :: { + ottr:Triple(_:person, rdf:type, foaf:Person ), + ottr:Triple(_:person, foaf:firstName, ?firstName ), + ottr:Triple(_:person, foaf:lastName, ?lastName ), + ottr:Triple(_:person, foaf:mbox, ?email ) + } . + """ + m = Mapping([doc]) + df = pl.DataFrame({"firstName": ["Ann", "Bob"], + "lastName": ["Strong", "Brite"], + "email": ["mailto:ann.strong@example.com", "mailto:bob.brite@example.com"]}) + m.expand("ex:Person", df) + return m + + +def test_simple_query_no_error(blank_person_mapping): + df = blank_person_mapping.query(""" + PREFIX foaf: + + SELECT ?p ?lastName WHERE { + ?p a foaf:Person . + ?p foaf:lastName ?lastName . + } ORDER BY ?p ?lastName + """) + expected_df = pl.DataFrame({"p": ["_:person_l0_r0", "_:person_l0_r1"], + "lastName": ["Strong", "Brite"]}) + assert_frame_equal(df, expected_df) diff --git a/representation/src/lib.rs b/representation/src/lib.rs index a5bba75..bdb47fe 100644 --- a/representation/src/lib.rs +++ b/representation/src/lib.rs @@ -38,7 +38,8 @@ impl RDFNodeType { } pub fn find_triple_type(&self) -> TripleType { - let triple_type = if let RDFNodeType::IRI = self { + + if let RDFNodeType::IRI = self { TripleType::ObjectProperty } else if let RDFNodeType::Literal(lit) = self { if lit.as_ref() == xsd::STRING { @@ -48,8 +49,7 @@ impl RDFNodeType { } } else { todo!("Triple type {:?} not supported", self) - }; - triple_type + } } pub fn polars_data_type(&self) -> DataType { diff --git a/representation/src/literals.rs b/representation/src/literals.rs index 11c1e35..6ec527c 100644 --- a/representation/src/literals.rs +++ b/representation/src/literals.rs @@ -64,5 +64,5 @@ pub fn sparql_literal_to_any_value( } else { (AnyValue::Utf8Owned(value.into()), xsd::STRING.into_owned()) }; - return (anyv.into_static().unwrap(), dt); + (anyv.into_static().unwrap(), dt) } diff --git a/triplestore/src/conversion.rs b/triplestore/src/conversion.rs index cf5b7f3..91906ae 100644 --- a/triplestore/src/conversion.rs +++ b/triplestore/src/conversion.rs @@ -79,6 +79,6 @@ fn hack_format_timestamp_with_timezone(series: &Series, tz: &mut TimeZone) -> Se datetime_strings } else { - panic!("Unknown timezone{}", tz.to_string()); + panic!("Unknown timezone{}", tz); } } diff --git a/triplestore/src/io_funcs.rs b/triplestore/src/io_funcs.rs index 9200e9d..8c206db 100644 --- a/triplestore/src/io_funcs.rs +++ b/triplestore/src/io_funcs.rs @@ -7,12 +7,12 @@ pub(crate) fn delete_tmp_parquets_in_caching_folder( caching_folder: &Path, ) -> Result<(), TriplestoreError> { let contents = - read_dir(caching_folder).map_err(|x| TriplestoreError::ReadCachingDirectoryError(x))?; + read_dir(caching_folder).map_err(TriplestoreError::ReadCachingDirectoryError)?; for f in contents { - let entry = f.map_err(|x| TriplestoreError::ReadCachingDirectoryEntryError(x))?; + let entry = f.map_err(TriplestoreError::ReadCachingDirectoryEntryError)?; let fname = entry.file_name().to_str().unwrap().to_string(); if fname.starts_with("tmp_") && fname.ends_with(".parquet") { - remove_file(entry.path()).map_err(|x| TriplestoreError::RemoveParquetFileError(x))?; + remove_file(entry.path()).map_err(TriplestoreError::RemoveParquetFileError)?; } } Ok(()) @@ -20,7 +20,7 @@ pub(crate) fn delete_tmp_parquets_in_caching_folder( pub(crate) fn create_folder_if_not_exists(path: &Path) -> Result<(), TriplestoreError> { if !path.exists() { - create_dir(path).map_err(|x| TriplestoreError::FolderCreateIOError(x))?; + create_dir(path).map_err(TriplestoreError::FolderCreateIOError)?; } Ok(()) } diff --git a/triplestore/src/lib.rs b/triplestore/src/lib.rs index e44da44..21a2aef 100644 --- a/triplestore/src/lib.rs +++ b/triplestore/src/lib.rs @@ -62,7 +62,7 @@ impl TripleTable { Ok(dfs.get(idx).unwrap()) } else if let Some(paths) = &self.df_paths { let tmp_df = read_parquet(paths.get(idx).unwrap()) - .map_err(|x| TriplestoreError::ParquetIOError(x))? + .map_err(TriplestoreError::ParquetIOError)? .collect() .unwrap(); self.tmp_df = Some(tmp_df); @@ -81,10 +81,10 @@ impl TripleTable { Ok(vec![concat_df(dfs).unwrap().lazy()]) } else if let Some(paths) = &self.df_paths { let lf_results: Vec> = - paths.par_iter().map(|x| read_parquet(x)).collect(); + paths.par_iter().map(read_parquet).collect(); let mut lfs = vec![]; for lfr in lf_results { - lfs.push(lfr.map_err(|x| TriplestoreError::ParquetIOError(x))?); + lfs.push(lfr.map_err(TriplestoreError::ParquetIOError)?); } Ok(lfs) } else { @@ -132,11 +132,11 @@ impl Triplestore { .as_ref() .unwrap() .par_iter() - .map(|x| read_parquet(x)) + .map(read_parquet) .collect(); let mut lfs = vec![]; for lf_res in lf_results { - lfs.push(lf_res.map_err(|x| TriplestoreError::ParquetIOError(x))?); + lfs.push(lf_res.map_err(TriplestoreError::ParquetIOError)?); } let unique_df = concat(lfs, UnionArgs::default()) .unwrap() @@ -152,14 +152,14 @@ impl Triplestore { .map(|x| remove_file(Path::new(x))) .collect(); for r in removed { - r.map_err(|x| TriplestoreError::RemoveParquetFileError(x))? + r.map_err(TriplestoreError::RemoveParquetFileError)? } let paths = split_write_tmp_df( self.caching_folder.as_ref().unwrap(), unique_df, predicate, ) - .map_err(|x| TriplestoreError::ParquetIOError(x))?; + .map_err(TriplestoreError::ParquetIOError)?; v.df_paths = Some(paths); v.unique = true; } else { @@ -198,14 +198,14 @@ impl Triplestore { static_verb_column, has_unique_subset, } = t; - let prepared_triples = prepare_triples( + + prepare_triples( df, &object_type, &language_tag, static_verb_column, has_unique_subset, - ); - prepared_triples + ) }) .collect(); let dfs_to_add = flatten(df_vecs_to_add); @@ -218,7 +218,7 @@ impl Triplestore { triples_df: Vec, call_uuid: &String, ) -> Result<(), TriplestoreError> { - if let Some(_) = &self.caching_folder { + if self.caching_folder.is_some() { self.add_triples_df_with_caching_folder(triples_df, call_uuid)?; } else { self.add_triples_df_without_folder(triples_df, call_uuid); @@ -250,14 +250,14 @@ impl Triplestore { let file_path = file_path_buf.as_path(); ( file_path.to_str().unwrap().to_string(), - write_parquet(&mut df, &file_path), + write_parquet(&mut df, file_path), predicate, object_type, ) }) .collect(); for (file_path, res, predicate, object_type) in file_paths { - res.map_err(|x| TriplestoreError::ParquetIOError(x))?; + res.map_err(TriplestoreError::ParquetIOError)?; //Safe to assume everything is unique if let Some(m) = self.df_map.get_mut(&predicate) { if let Some(v) = m.get_mut(&object_type) { diff --git a/triplestore/src/native_parquet_write.rs b/triplestore/src/native_parquet_write.rs index f5041ff..48985ff 100644 --- a/triplestore/src/native_parquet_write.rs +++ b/triplestore/src/native_parquet_write.rs @@ -35,7 +35,7 @@ impl Triplestore { filename = format!("{}_object_property", property_to_filename(property),) } let file_path = path_buf.clone(); - if let Some(_) = &self.caching_folder { + if self.caching_folder.is_some() { } else { for (i, df) in tt.dfs.as_mut().unwrap().iter_mut().enumerate() { let filename = format!("{filename}_part_{i}.parquet"); @@ -52,7 +52,7 @@ impl Triplestore { .map(|(df, file_path)| write_parquet(df, file_path.as_path())) .collect(); for r in results { - r.map_err(|x| TriplestoreError::ParquetIOError(x))?; + r.map_err(TriplestoreError::ParquetIOError)?; } debug!( diff --git a/triplestore/src/ntriples_write.rs b/triplestore/src/ntriples_write.rs index a38964e..493c7b7 100644 --- a/triplestore/src/ntriples_write.rs +++ b/triplestore/src/ntriples_write.rs @@ -80,7 +80,7 @@ impl Triplestore { } else if let Some(paths) = &tt.df_paths { for p in paths { let df = read_parquet(p) - .map_err(|x| TriplestoreError::ParquetIOError(x))? + .map_err(TriplestoreError::ParquetIOError)? .collect() .unwrap(); write_ntriples_for_df( @@ -204,7 +204,7 @@ fn write_ntriples_for_df( for mut buf in result_buf.drain(..) { let _ = writer .write(&buf) - .map_err(|x| TriplestoreError::WriteNTriplesError(x)); + .map_err(TriplestoreError::WriteNTriplesError); buf.clear(); write_buffer_pool.set(buf); } diff --git a/triplestore/src/sparql.rs b/triplestore/src/sparql.rs index 2f5307a..c69068e 100644 --- a/triplestore/src/sparql.rs +++ b/triplestore/src/sparql.rs @@ -33,14 +33,14 @@ pub enum QueryResult { impl Triplestore { pub fn query(&mut self, query: &str) -> Result { - let query = Query::parse(query, None).map_err(|x| SparqlError::ParseError(x))?; + let query = Query::parse(query, None).map_err(SparqlError::ParseError)?; self.query_parsed(&query) } fn query_parsed(&mut self, query: &Query) -> Result { if !self.deduplicated { self.deduplicate() - .map_err(|x| SparqlError::DeduplicationError(x))?; + .map_err(SparqlError::DeduplicationError)?; } enable_string_cache(true); let context = Context::new(); @@ -54,7 +54,7 @@ impl Triplestore { mappings, columns: _, rdf_node_types: _, - } = self.lazy_graph_pattern(&pattern, None, &context)?; + } = self.lazy_graph_pattern(pattern, None, &context)?; let mut df = mappings.collect().unwrap(); df = cats_to_utf8s(df); @@ -70,7 +70,7 @@ impl Triplestore { mappings, columns: _, rdf_node_types, - } = self.lazy_graph_pattern(&pattern, None, &context)?; + } = self.lazy_graph_pattern(pattern, None, &context)?; let mut df = mappings.collect().unwrap(); df = cats_to_utf8s(df); let mut dfs = vec![]; @@ -85,7 +85,7 @@ impl Triplestore { pub fn insert(&mut self, query: &str) -> Result<(), SparqlError> { let call_uuid = Uuid::new_v4().to_string(); - let query = Query::parse(query, None).map_err(|x| SparqlError::ParseError(x))?; + let query = Query::parse(query, None).map_err(SparqlError::ParseError)?; if let Query::Construct { .. } = &query { let res = self.query_parsed(&query)?; match res { @@ -104,7 +104,7 @@ impl Triplestore { }); } self.add_triples_vec(all_triples_to_add, &call_uuid) - .map_err(|x| SparqlError::StoreTriplesError(x))?; + .map_err(SparqlError::StoreTriplesError)?; Ok(()) } } @@ -155,7 +155,7 @@ fn triple_has_variable(t: &TriplePattern) -> bool { if let TermPattern::Variable(_) = t.object { return true; } - return false; + false } fn term_pattern_series( @@ -206,7 +206,7 @@ fn named_node_pattern_series( } fn named_node_series(nn: &NamedNode, name: &str, len: usize) -> (Series, RDFNodeType) { - let nn_vec = vec![nn.as_str()].repeat(len); + let nn_vec = [nn.as_str()].repeat(len); let mut ser = Series::from_iter(nn_vec); ser.rename(name); (ser, RDFNodeType::IRI) @@ -234,5 +234,5 @@ fn cats_to_utf8s(df: DataFrame) -> DataFrame { for c in cats { lf = lf.with_column(col(&c).cast(DataType::Utf8)) } - return lf.collect().unwrap(); + lf.collect().unwrap() } diff --git a/triplestore/src/sparql/lazy_aggregate.rs b/triplestore/src/sparql/lazy_aggregate.rs index 96dd431..73037ba 100644 --- a/triplestore/src/sparql/lazy_aggregate.rs +++ b/triplestore/src/sparql/lazy_aggregate.rs @@ -47,8 +47,7 @@ impl Triplestore { column_context = None; let all_proper_column_names: Vec = output_solution_mappings .columns - .iter() - .map(|x| x.clone()) + .iter().cloned() .collect(); let columns_expr = Expr::Columns(all_proper_column_names); if *distinct { diff --git a/triplestore/src/sparql/lazy_expressions.rs b/triplestore/src/sparql/lazy_expressions.rs index 12c9cd2..42be17d 100644 --- a/triplestore/src/sparql/lazy_expressions.rs +++ b/triplestore/src/sparql/lazy_expressions.rs @@ -415,7 +415,7 @@ impl Triplestore { (Expr::BinaryExpr { left: Box::new(Expr::Literal(LiteralValue::Int32(0))), op: Operator::Plus, - right: Box::new(col(&plus_context.as_str())), + right: Box::new(col(plus_context.as_str())), }) .alias(context.as_str()), ) @@ -439,7 +439,7 @@ impl Triplestore { (Expr::BinaryExpr { left: Box::new(Expr::Literal(LiteralValue::Int32(0))), op: Operator::Minus, - right: Box::new(col(&minus_context.as_str())), + right: Box::new(col(minus_context.as_str())), }) .alias(context.as_str()), ) @@ -459,7 +459,7 @@ impl Triplestore { self.lazy_expression(inner, solution_mappings, ¬_context)?; output_solution_mappings.mappings = output_solution_mappings .mappings - .with_column(col(¬_context.as_str()).not().alias(context.as_str())) + .with_column(col(not_context.as_str()).not().alias(context.as_str())) .drop_columns([¬_context.as_str()]); output_solution_mappings.rdf_node_types.insert( context.as_str().to_string(), @@ -473,11 +473,11 @@ impl Triplestore { output_solution_mappings.mappings = output_solution_mappings .mappings .with_column( - Expr::Literal(LiteralValue::Int64(1)).alias(&exists_context.as_str()), + Expr::Literal(LiteralValue::Int64(1)).alias(exists_context.as_str()), ) - .with_column(col(&exists_context.as_str()).cumsum(false).keep_name()); + .with_column(col(exists_context.as_str()).cumsum(false).keep_name()); - let new_inner = rewrite_exists_graph_pattern(inner, &exists_context.as_str()); + let new_inner = rewrite_exists_graph_pattern(inner, exists_context.as_str()); let SolutionMappings { mappings: exists_lf, .. @@ -493,19 +493,19 @@ impl Triplestore { } = output_solution_mappings; let mut df = mappings.collect().unwrap(); let exists_df = exists_lf - .select([col(&exists_context.as_str())]) + .select([col(exists_context.as_str())]) .unique(None, UniqueKeepStrategy::First) .collect() .expect("Collect lazy exists error"); let mut ser = Series::from( - df.column(&exists_context.as_str()) + df.column(exists_context.as_str()) .unwrap() - .is_in(exists_df.column(&exists_context.as_str()).unwrap()) + .is_in(exists_df.column(exists_context.as_str()).unwrap()) .unwrap(), ); ser.rename(context.as_str()); df.with_column(ser).unwrap(); - df = df.drop(&exists_context.as_str()).unwrap(); + df = df.drop(exists_context.as_str()).unwrap(); rdf_node_types.insert( context.as_str().to_string(), RDFNodeType::Literal(xsd::BOOLEAN.into_owned()), @@ -573,7 +573,7 @@ impl Triplestore { } let coalesced_context = inner_contexts.get(0).unwrap(); - let mut coalesced = col(&coalesced_context.as_str()); + let mut coalesced = col(coalesced_context.as_str()); for c in &inner_contexts[1..inner_contexts.len()] { coalesced = Expr::Ternary { predicate: Box::new(is_not_null(coalesced.clone())), @@ -626,7 +626,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .dt() .year() .alias(context.as_str()), @@ -641,7 +641,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .dt() .month() .alias(context.as_str()), @@ -656,7 +656,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .dt() .day() .alias(context.as_str()), @@ -671,7 +671,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .dt() .hour() .alias(context.as_str()), @@ -686,7 +686,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .dt() .minute() .alias(context.as_str()), @@ -701,7 +701,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .dt() .second() .alias(context.as_str()), @@ -716,7 +716,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()).abs().alias(context.as_str()), + col(first_context.as_str()).abs().alias(context.as_str()), ); let existing_type = output_solution_mappings .rdf_node_types @@ -731,7 +731,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()).ceil().alias(context.as_str()), + col(first_context.as_str()).ceil().alias(context.as_str()), ); output_solution_mappings.rdf_node_types.insert( context.as_str().to_string(), @@ -743,7 +743,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()).floor().alias(context.as_str()), + col(first_context.as_str()).floor().alias(context.as_str()), ); output_solution_mappings.rdf_node_types.insert( context.as_str().to_string(), @@ -782,7 +782,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .round(0) .alias(context.as_str()), ); @@ -802,7 +802,7 @@ impl Triplestore { if let Expression::Literal(l) = args.get(1).unwrap() { output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .str() .contains(lit(l.value()), false) .alias(context.as_str()), @@ -821,7 +821,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .cast(DataType::Int64) .alias(context.as_str()), ); @@ -834,7 +834,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(&first_context.as_str()) + col(first_context.as_str()) .cast(DataType::Utf8) .alias(context.as_str()), ); @@ -851,9 +851,7 @@ impl Triplestore { } } output_solution_mappings.mappings = output_solution_mappings.mappings.drop_columns( - args_contexts - .iter() - .map(|(_, x)| x.as_str()) + args_contexts.values().map(|x| x.as_str()) .collect::>(), ); output_solution_mappings @@ -868,7 +866,7 @@ fn binop_type(left_type: &RDFNodeType, right_type: &RDFNodeType) -> RDFNodeType (left_type, right_type) { if left_lit.as_ref() == xsd::DOUBLE { - return left_type.clone(); + left_type.clone() } else if right_lit.as_ref() == xsd::DOUBLE { return right_type.clone(); } else if left_lit.as_ref() == xsd::FLOAT { diff --git a/triplestore/src/sparql/lazy_expressions/exists_helper.rs b/triplestore/src/sparql/lazy_expressions/exists_helper.rs index 7a2ca0a..9630a76 100644 --- a/triplestore/src/sparql/lazy_expressions/exists_helper.rs +++ b/triplestore/src/sparql/lazy_expressions/exists_helper.rs @@ -70,7 +70,7 @@ pub fn rewrite_exists_graph_pattern( } => GraphPattern::Slice { inner: Box::new(rewrite_exists_graph_pattern(inner, helper_column_name)), start: *start, - length: length.clone(), + length: *length, }, GraphPattern::Group { inner, diff --git a/triplestore/src/sparql/lazy_graph_patterns.rs b/triplestore/src/sparql/lazy_graph_patterns.rs index 95fee60..a6c242b 100644 --- a/triplestore/src/sparql/lazy_graph_patterns.rs +++ b/triplestore/src/sparql/lazy_graph_patterns.rs @@ -56,7 +56,7 @@ impl Triplestore { expression, } => self.lazy_left_join(left, right, expression, solution_mappings, context), GraphPattern::Filter { expr, inner } => { - self.lazy_filter(inner, expr, solution_mappings, &context) + self.lazy_filter(inner, expr, solution_mappings, context) } GraphPattern::Union { left, right } => { self.lazy_union(left, right, solution_mappings, context) diff --git a/triplestore/src/sparql/lazy_graph_patterns/extend.rs b/triplestore/src/sparql/lazy_graph_patterns/extend.rs index e056373..19dbd6d 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/extend.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/extend.rs @@ -26,7 +26,7 @@ impl Triplestore { self.lazy_expression(expression, output_solution_mappings, &expression_context)?; output_solution_mappings.mappings = output_solution_mappings .mappings - .rename([expression_context.as_str()], &[variable.as_str()]); + .rename([expression_context.as_str()], [variable.as_str()]); let existing_rdf_node_type = output_solution_mappings .rdf_node_types .remove(expression_context.as_str()) diff --git a/triplestore/src/sparql/lazy_graph_patterns/filter.rs b/triplestore/src/sparql/lazy_graph_patterns/filter.rs index 5f8878b..02adad0 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/filter.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/filter.rs @@ -26,7 +26,7 @@ impl Triplestore { rdf_node_types: datatypes, } = self.lazy_expression(expression, output_solution_mappings, &expression_context)?; mappings = mappings - .filter(col(&expression_context.as_str())) + .filter(col(expression_context.as_str())) .drop_columns([&expression_context.as_str()]); Ok(SolutionMappings::new(mappings, columns, datatypes)) } diff --git a/triplestore/src/sparql/lazy_graph_patterns/group.rs b/triplestore/src/sparql/lazy_graph_patterns/group.rs index 11a5066..60da861 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/group.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/group.rs @@ -26,7 +26,7 @@ impl Triplestore { self.lazy_graph_pattern(inner, solution_mapping, &inner_context)?; let by: Vec; let dummy_varname = Uuid::new_v4().to_string(); - if variables.len() == 0 { + if variables.is_empty() { by = vec![col(&dummy_varname)]; output_solution_mappings.mappings = output_solution_mappings .mappings diff --git a/triplestore/src/sparql/lazy_graph_patterns/left_join.rs b/triplestore/src/sparql/lazy_graph_patterns/left_join.rs index b7a3ea0..abb2b84 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/left_join.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/left_join.rs @@ -32,7 +32,7 @@ impl Triplestore { self.lazy_expression(expr, right_solution_mappings, &expression_context)?; right_solution_mappings.mappings = right_solution_mappings .mappings - .filter(col(&expression_context.as_str())) + .filter(col(expression_context.as_str())) .drop_columns([&expression_context.as_str()]); } let SolutionMappings { diff --git a/triplestore/src/sparql/lazy_graph_patterns/path.rs b/triplestore/src/sparql/lazy_graph_patterns/path.rs index aa7f8a2..fe68f63 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/path.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/path.rs @@ -65,8 +65,11 @@ impl Triplestore { let cat_df_map = self.create_unique_cat_dfs(ppe, Some(subject), Some(object))?; let max_index = find_max_index(cat_df_map.values()); if create_sparse { - let SparsePathReturn { sparmat, soo: _, dt: _ } = - sparse_path(ppe, &cat_df_map, max_index as usize); + let SparsePathReturn { + sparmat, + soo: _, + dt: _, + } = sparse_path(ppe, &cat_df_map, max_index as usize); let mut subject_vec = vec![]; let mut object_vec = vec![]; for (i, row) in sparmat.outer_iterator().enumerate() { @@ -78,9 +81,9 @@ impl Triplestore { } } let mut lookup_df = find_lookup(&cat_df_map); - let mut subject_series = Series::from_iter(subject_vec.into_iter()); + let mut subject_series = Series::from_iter(subject_vec); subject_series.rename("subject_key"); - let mut object_series = Series::from_iter(object_vec.into_iter()); + let mut object_series = Series::from_iter(object_vec); object_series.rename("object_key"); out_df = DataFrame::new(vec![subject_series, object_series]).unwrap(); lookup_df.rename("value", "subject").unwrap(); @@ -182,7 +185,7 @@ impl Triplestore { .insert(v.as_str().to_string(), RDFNodeType::IRI); } - return Ok(mappings); + Ok(mappings) } else { let mut datatypes = HashMap::new(); if let TermPattern::Variable(v) = subject { @@ -191,11 +194,11 @@ impl Triplestore { if let TermPattern::Variable(v) = object { datatypes.insert(v.as_str().to_string(), RDFNodeType::IRI); } - return Ok(SolutionMappings { + Ok(SolutionMappings { mappings: out_df.lazy(), columns: var_cols.into_iter().map(|x| x.to_string()).collect(), rdf_node_types: datatypes, - }); + }) } } @@ -233,7 +236,7 @@ impl Triplestore { } PropertyPathExpression::Alternative(left, right) => { let mut left_df_map = - self.create_unique_cat_dfs(left, subject.clone(), object.clone())?; + self.create_unique_cat_dfs(left, subject, object)?; let right_df_map = self.create_unique_cat_dfs(right, subject, object)?; left_df_map.extend(right_df_map); Ok(left_df_map) @@ -259,7 +262,7 @@ impl Triplestore { } } let df; - if dfs.len() > 0 { + if !dfs.is_empty() { df = concat_df(dfs.as_slice()) .unwrap() .unique(None, UniqueKeepStrategy::First, None) @@ -293,7 +296,7 @@ impl Triplestore { assert!(tt.unique, "Should be deduplicated"); let mut lf = concat( tt.get_lazy_frames() - .map_err(|x| SparqlError::TripleTableReadError(x))?, + .map_err(SparqlError::TripleTableReadError)?, UnionArgs::default(), ) .unwrap() @@ -643,7 +646,7 @@ fn sparse_path( SparsePathReturn { sparmat: sparmat.transpose_into(), soo: soo.flip(), - dt: dt, + dt, } } PropertyPathExpression::Sequence(left, right) => { diff --git a/triplestore/src/sparql/lazy_graph_patterns/triple.rs b/triplestore/src/sparql/lazy_graph_patterns/triple.rs index c5957fc..605a359 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/triple.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/triple.rs @@ -11,11 +11,11 @@ use polars::prelude::{col, concat, lit, Expr}; use polars::prelude::{IntoLazy, UnionArgs}; use polars_core::datatypes::{AnyValue, DataType}; use polars_core::frame::DataFrame; -use polars_core::prelude::{JoinType, NamedFrom}; +use polars_core::prelude::JoinType; use polars_core::series::Series; use representation::RDFNodeType; use spargebra::term::{NamedNodePattern, TermPattern, TriplePattern}; -use std::collections::{HashMap}; +use std::collections::HashMap; impl Triplestore { pub fn lazy_triple_pattern( @@ -130,7 +130,7 @@ impl Triplestore { } } } else { - if overlap.len() > 0 { + if !overlap.is_empty() { //TODO: Introduce data type sensitivity here. let join_on: Vec = overlap.iter().map(|x| col(x)).collect(); let mut strcol = vec![]; @@ -155,7 +155,7 @@ impl Triplestore { mappings = mappings.join(df.lazy(), [], [], JoinType::Cross.into()); } - columns.extend(colnames.into_iter()); + columns.extend(colnames); rdf_node_types.extend(dts); } solution_mappings = Some(SolutionMappings { @@ -209,7 +209,7 @@ impl Triplestore { let mut out_datatypes = HashMap::new(); let mut lf = concat( tt.get_lazy_frames() - .map_err(|x| SparqlError::TripleTableReadError(x))?, + .map_err(SparqlError::TripleTableReadError)?, UnionArgs::default(), ) .unwrap() @@ -295,7 +295,7 @@ impl Triplestore { lfs.push(df.lazy()); } } - Ok(if lfs.len() > 0 { + Ok(if !lfs.is_empty() { ( concat(lfs, UnionArgs::default()) .unwrap() diff --git a/triplestore/src/sparql/lazy_graph_patterns/union.rs b/triplestore/src/sparql/lazy_graph_patterns/union.rs index 9c25f74..1900c1c 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/union.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/union.rs @@ -22,7 +22,7 @@ impl Triplestore { mappings: left_mappings, columns: mut left_columns, rdf_node_types: mut left_datatypes, - } = self.lazy_graph_pattern(&left, solution_mappings.clone(), &left_context)?; + } = self.lazy_graph_pattern(left, solution_mappings.clone(), &left_context)?; let SolutionMappings { mappings: right_mappings, diff --git a/triplestore/src/sparql/lazy_graph_patterns/values.rs b/triplestore/src/sparql/lazy_graph_patterns/values.rs index 016d3cb..5b58fb4 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/values.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/values.rs @@ -34,10 +34,8 @@ impl Triplestore { GroundTerm::NamedNode(nn) => { if i == 0 { datatypes.insert(j, RDFNodeType::IRI); - } else { - if datatypes.get(&j).unwrap() != &RDFNodeType::IRI { - todo!("No support yet for values of same variables having different types") - } + } else if datatypes.get(&j).unwrap() != &RDFNodeType::IRI { + todo!("No support yet for values of same variables having different types") } col_vecs .get_mut(&j) @@ -71,9 +69,7 @@ impl Triplestore { } } if i + 1 == bindings.len() { - if !datatypes.contains_key(&j) { - datatypes.insert(j, RDFNodeType::None); - } + datatypes.entry(j).or_insert(RDFNodeType::None); } col_vecs.get_mut(&j).unwrap().push(AnyValue::Null); } diff --git a/triplestore/src/sparql/query_context.rs b/triplestore/src/sparql/query_context.rs index 455eb54..798a713 100644 --- a/triplestore/src/sparql/query_context.rs +++ b/triplestore/src/sparql/query_context.rs @@ -71,187 +71,187 @@ impl fmt::Display for PathEntry { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { PathEntry::BGP => { - write!(f, "{}", "BGP") + write!(f, "BGP") } PathEntry::UnionLeftSide => { - write!(f, "{}", "UnionLeftSide") + write!(f, "UnionLeftSide") } PathEntry::UnionRightSide => { - write!(f, "{}", "UnionRightSide") + write!(f, "UnionRightSide") } PathEntry::JoinLeftSide => { - write!(f, "{}", "JoinLeftSide") + write!(f, "JoinLeftSide") } PathEntry::JoinRightSide => { - write!(f, "{}", "JoinRightSide") + write!(f, "JoinRightSide") } PathEntry::LeftJoinLeftSide => { - write!(f, "{}", "LeftJoinLeftSide") + write!(f, "LeftJoinLeftSide") } PathEntry::LeftJoinRightSide => { - write!(f, "{}", "LeftJoinRightSide") + write!(f, "LeftJoinRightSide") } PathEntry::LeftJoinExpression => { - write!(f, "{}", "LeftJoinExpression") + write!(f, "LeftJoinExpression") } PathEntry::MinusLeftSide => { - write!(f, "{}", "MinusLeftSide") + write!(f, "MinusLeftSide") } PathEntry::MinusRightSide => { - write!(f, "{}", "MinusRightSide") + write!(f, "MinusRightSide") } PathEntry::FilterInner => { - write!(f, "{}", "FilterInner") + write!(f, "FilterInner") } PathEntry::FilterExpression => { - write!(f, "{}", "FilterExpression") + write!(f, "FilterExpression") } PathEntry::GraphInner => { - write!(f, "{}", "GraphInner") + write!(f, "GraphInner") } PathEntry::ExtendInner => { - write!(f, "{}", "ExtendInner") + write!(f, "ExtendInner") } PathEntry::ExtendExpression => { - write!(f, "{}", "ExtendExpression") + write!(f, "ExtendExpression") } PathEntry::OrderByInner => { - write!(f, "{}", "OrderByInner") + write!(f, "OrderByInner") } PathEntry::OrderByExpression(i) => { - write!(f, "{}({})", "OrderByExpression", i) + write!(f, "OrderByExpression({})", i) } PathEntry::ProjectInner => { - write!(f, "{}", "ProjectInner") + write!(f, "ProjectInner") } PathEntry::DistinctInner => { - write!(f, "{}", "DistinctInner") + write!(f, "DistinctInner") } PathEntry::ReducedInner => { - write!(f, "{}", "ReducedInner") + write!(f, "ReducedInner") } PathEntry::SliceInner => { - write!(f, "{}", "SliceInner") + write!(f, "SliceInner") } PathEntry::ServiceInner => { - write!(f, "{}", "ServiceInner") + write!(f, "ServiceInner") } PathEntry::GroupInner => { - write!(f, "{}", "GroupInner") + write!(f, "GroupInner") } PathEntry::GroupAggregation(i) => { - write!(f, "{}({})", "GroupAggregation", i) + write!(f, "GroupAggregation({})", i) } PathEntry::IfLeft => { - write!(f, "{}", "IfLeft") + write!(f, "IfLeft") } PathEntry::IfMiddle => { - write!(f, "{}", "IfMiddle") + write!(f, "IfMiddle") } PathEntry::IfRight => { - write!(f, "{}", "IfRight") + write!(f, "IfRight") } PathEntry::OrLeft => { - write!(f, "{}", "OrLeft") + write!(f, "OrLeft") } PathEntry::OrRight => { - write!(f, "{}", "OrRight") + write!(f, "OrRight") } PathEntry::AndLeft => { - write!(f, "{}", "AndLeft") + write!(f, "AndLeft") } PathEntry::AndRight => { - write!(f, "{}", "AndRight") + write!(f, "AndRight") } PathEntry::EqualLeft => { - write!(f, "{}", "EqualLeft") + write!(f, "EqualLeft") } PathEntry::EqualRight => { - write!(f, "{}", "EqualRight") + write!(f, "EqualRight") } PathEntry::SameTermLeft => { - write!(f, "{}", "SameTermLeft") + write!(f, "SameTermLeft") } PathEntry::SameTermRight => { - write!(f, "{}", "SameTermRight") + write!(f, "SameTermRight") } PathEntry::GreaterLeft => { - write!(f, "{}", "GreaterLeft") + write!(f, "GreaterLeft") } PathEntry::GreaterRight => { - write!(f, "{}", "GreaterRight") + write!(f, "GreaterRight") } PathEntry::GreaterOrEqualLeft => { - write!(f, "{}", "GreaterOrEqualLeft") + write!(f, "GreaterOrEqualLeft") } PathEntry::GreaterOrEqualRight => { - write!(f, "{}", "GreaterOrEqualRight") + write!(f, "GreaterOrEqualRight") } PathEntry::LessLeft => { - write!(f, "{}", "LessLeft") + write!(f, "LessLeft") } PathEntry::LessRight => { - write!(f, "{}", "LessRight") + write!(f, "LessRight") } PathEntry::LessOrEqualLeft => { - write!(f, "{}", "LessOrEqualLeft") + write!(f, "LessOrEqualLeft") } PathEntry::LessOrEqualRight => { - write!(f, "{}", "LessOrEqualRight") + write!(f, "LessOrEqualRight") } PathEntry::InLeft => { - write!(f, "{}", "InLeft") + write!(f, "InLeft") } PathEntry::InRight(i) => { - write!(f, "{}({})", "InRight", i) + write!(f, "InRight({})", i) } PathEntry::MultiplyLeft => { - write!(f, "{}", "MultiplyLeft") + write!(f, "MultiplyLeft") } PathEntry::MultiplyRight => { - write!(f, "{}", "MultiplyRight") + write!(f, "MultiplyRight") } PathEntry::AddLeft => { - write!(f, "{}", "AddLeft") + write!(f, "AddLeft") } PathEntry::AddRight => { - write!(f, "{}", "AddRight") + write!(f, "AddRight") } PathEntry::SubtractLeft => { - write!(f, "{}", "SubtractLeft") + write!(f, "SubtractLeft") } PathEntry::SubtractRight => { - write!(f, "{}", "SubtractRight") + write!(f, "SubtractRight") } PathEntry::DivideLeft => { - write!(f, "{}", "DivideLeft") + write!(f, "DivideLeft") } PathEntry::DivideRight => { - write!(f, "{}", "DivideRight") + write!(f, "DivideRight") } PathEntry::UnaryPlus => { - write!(f, "{}", "UnaryPlus") + write!(f, "UnaryPlus") } PathEntry::UnaryMinus => { - write!(f, "{}", "UnaryMinus") + write!(f, "UnaryMinus") } PathEntry::Not => { - write!(f, "{}", "Not") + write!(f, "Not") } PathEntry::Exists => { - write!(f, "{}", "Exists") + write!(f, "Exists") } PathEntry::Coalesce(i) => { - write!(f, "{}({})", "Coalesce", i) + write!(f, "Coalesce({})", i) } PathEntry::FunctionCall(i) => { - write!(f, "{}({})", "FunctionCall", i) + write!(f, "FunctionCall({})", i) } PathEntry::AggregationOperation => { - write!(f, "{}", "AggregationOperation") + write!(f, "AggregationOperation") } PathEntry::OrderingOperation => { - write!(f, "{}", "OrderingOperation") + write!(f, "OrderingOperation") } } } @@ -454,7 +454,7 @@ impl Context { pub fn extension_with(&self, p: PathEntry) -> Context { let mut path = self.path.clone(); let mut string_rep = self.string_rep.clone(); - if path.len() > 0 { + if !path.is_empty() { string_rep += "-"; } let entry_rep = p.to_string(); diff --git a/triplestore/src/sparql/solution_mapping.rs b/triplestore/src/sparql/solution_mapping.rs index 50e2026..0143cea 100644 --- a/triplestore/src/sparql/solution_mapping.rs +++ b/triplestore/src/sparql/solution_mapping.rs @@ -38,5 +38,5 @@ pub fn is_string_col(rdf_node_type: &RDFNodeType) -> bool { panic!("No support for datatype {:?}", rdf_node_type) } } - return false; + false } diff --git a/triplestore/src/sparql/sparql_to_polars.rs b/triplestore/src/sparql/sparql_to_polars.rs index 6521328..6882f49 100644 --- a/triplestore/src/sparql/sparql_to_polars.rs +++ b/triplestore/src/sparql/sparql_to_polars.rs @@ -61,20 +61,16 @@ pub(crate) fn sparql_literal_to_polars_literal_value(lit: &Literal) -> LiteralVa } } } else if datatype == xsd::DATE { - let ymd_string: Vec<&str> = value.split("-").collect(); + let ymd_string: Vec<&str> = value.split('-').collect(); if ymd_string.len() != 3 { todo!("Unsupported date format {}", value) } - let y = i32::from_str(ymd_string.get(0).unwrap()).expect(&format!( - "Year parsing error {}", - ymd_string.get(0).unwrap() - )); - let m = u32::from_str(ymd_string.get(1).unwrap()).expect(&format!( - "Month parsing error {}", - ymd_string.get(1).unwrap() - )); + let y = i32::from_str(ymd_string.first().unwrap()).unwrap_or_else(|_| panic!("Year parsing error {}", + ymd_string.first().unwrap())); + let m = u32::from_str(ymd_string.get(1).unwrap()).unwrap_or_else(|_| panic!("Month parsing error {}", + ymd_string.get(1).unwrap())); let d = u32::from_str(ymd_string.get(2).unwrap()) - .expect(&format!("Day parsing error {}", ymd_string.get(1).unwrap())); + .unwrap_or_else(|_| panic!("Day parsing error {}", ymd_string.get(1).unwrap())); let date = NaiveDate::from_ymd_opt(y, m, d).unwrap(); let dt = date.and_hms_opt(0, 0, 0).unwrap(); @@ -209,7 +205,8 @@ fn polars_literal_values_to_series(literal_values: Vec, name: &str LiteralValue::DateTime(_, t, None) => //TODO: Assert time unit lik?? { - let s = Series::new( + + Series::new( name, literal_values .into_iter() @@ -222,8 +219,7 @@ fn polars_literal_values_to_series(literal_values: Vec, name: &str } }) .collect::>(), - ); - s + ) } LiteralValue::Duration(_, _) => { todo!() From ae0497c91926ab7ed05f0277ec003888b6782010 Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 07:48:17 +0200 Subject: [PATCH 2/9] Further on blank nodes support --- maplib/src/mapping.rs | 47 +++++++++++-------- maplib/src/mapping/constant_terms.rs | 7 ++- maplib/src/mapping/validation_inference.rs | 1 - maplib/src/templates.rs | 7 +-- representation/src/lib.rs | 1 - triplestore/src/io_funcs.rs | 3 +- triplestore/src/lib.rs | 2 +- triplestore/src/sparql/lazy_aggregate.rs | 6 +-- triplestore/src/sparql/lazy_expressions.rs | 15 +++--- .../src/sparql/lazy_graph_patterns/path.rs | 3 +- triplestore/src/sparql/sparql_to_polars.rs | 9 ++-- 11 files changed, 54 insertions(+), 47 deletions(-) diff --git a/maplib/src/mapping.rs b/maplib/src/mapping.rs index c283921..1158a06 100644 --- a/maplib/src/mapping.rs +++ b/maplib/src/mapping.rs @@ -18,6 +18,7 @@ use oxrdf::Triple; use polars::lazy::prelude::{col, Expr}; use polars::prelude::{DataFrame, IntoLazy}; use polars_core::series::Series; +use rayon::iter::IndexedParallelIterator; use rayon::iter::ParallelDrainRange; use rayon::iter::ParallelIterator; use representation::RDFNodeType; @@ -36,8 +37,7 @@ pub struct Mapping { blank_node_counter: usize, } -#[derive(Clone)] -#[derive(Default)] +#[derive(Clone, Default)] pub struct ExpandOptions { pub language_tags: Option>, pub unique_subsets: Option>>, @@ -56,8 +56,6 @@ struct StaticColumn { ptype: Option, } - - #[derive(Clone, Debug)] pub struct PrimitiveColumn { pub rdf_node_type: RDFNodeType, @@ -89,8 +87,7 @@ impl Mapping { path: P, caching_folder: Option, ) -> Result { - let dataset = - TemplateDataset::from_folder(path).map_err(MaplibError::TemplateError)?; + let dataset = TemplateDataset::from_folder(path).map_err(MaplibError::TemplateError)?; Mapping::new(&dataset, caching_folder) } @@ -98,8 +95,7 @@ impl Mapping { path: P, caching_folder: Option, ) -> Result { - let dataset = - TemplateDataset::from_file(path).map_err(MaplibError::TemplateError)?; + let dataset = TemplateDataset::from_file(path).map_err(MaplibError::TemplateError)?; Mapping::new(&dataset, caching_folder) } @@ -195,6 +191,7 @@ impl Mapping { let df_slice = df.slice_par(offset, to_row); offset += chunk_size as i64; let (result_vec, new_blank_node_counter) = self._expand( + 0, 0, self.blank_node_counter, &target_template_name, @@ -211,6 +208,7 @@ impl Mapping { } } else { let (result_vec, new_blank_node_counter) = self._expand( + 0, 0, self.blank_node_counter, &target_template_name, @@ -228,6 +226,7 @@ impl Mapping { fn _expand( &self, layer: usize, + pattern_num: usize, mut blank_node_counter: usize, name: &str, df: DataFrame, @@ -266,9 +265,12 @@ impl Mapping { let results: Vec<_> = expand_params_vec .par_drain(..) - .map(|(i, series_vec)| { - let target_template = - self.template_dataset.get(i.template_name.as_str()).unwrap(); + .enumerate() + .map(|(i, (instance, series_vec))| { + let target_template = self + .template_dataset + .get(instance.template_name.as_str()) + .unwrap(); let ( instance_df, instance_dynamic_columns, @@ -278,7 +280,8 @@ impl Mapping { ) = create_remapped( self.blank_node_counter, layer, - i, + pattern_num, + instance, &target_template.signature, series_vec, &dynamic_columns, @@ -289,8 +292,9 @@ impl Mapping { self._expand( layer + 1, + i, updated_blank_node_counter, - i.template_name.as_str(), + instance.template_name.as_str(), instance_df, instance_dynamic_columns, instance_static_columns, @@ -321,10 +325,7 @@ impl Mapping { let now = Instant::now(); let triples: Vec< Result<(DataFrame, RDFNodeType, Option, Option, bool), MappingError>, - > = result_vec - .par_drain(..) - .map(create_triples) - .collect(); + > = result_vec.par_drain(..).map(create_triples).collect(); let mut ok_triples = vec![]; for t in triples { ok_triples.push(t?); @@ -434,13 +435,19 @@ fn create_dynamic_expression_from_static( fn create_series_from_blank_node_constant( layer: usize, + pattern_num: usize, blank_node_counter: usize, column_name: &str, constant_term: &ConstantTerm, n_rows: usize, ) -> Result<(Series, PrimitiveColumn), MappingError> { - let (mut series, _, rdf_node_type) = - constant_blank_node_to_series(layer, blank_node_counter, constant_term, n_rows)?; + let (mut series, _, rdf_node_type) = constant_blank_node_to_series( + layer, + pattern_num, + blank_node_counter, + constant_term, + n_rows, + )?; series.rename(column_name); let mapped_column = PrimitiveColumn { rdf_node_type, @@ -452,6 +459,7 @@ fn create_series_from_blank_node_constant( fn create_remapped( mut blank_node_counter: usize, layer: usize, + pattern_num: usize, instance: &Instance, signature: &Signature, mut series_vec: Vec, @@ -512,6 +520,7 @@ fn create_remapped( if ct.has_blank_node() { let (series, primitive_column) = create_series_from_blank_node_constant( layer, + pattern_num, blank_node_counter, target_colname, ct, diff --git a/maplib/src/mapping/constant_terms.rs b/maplib/src/mapping/constant_terms.rs index ca8bec6..e727655 100644 --- a/maplib/src/mapping/constant_terms.rs +++ b/maplib/src/mapping/constant_terms.rs @@ -125,6 +125,7 @@ pub fn constant_to_expr( pub fn constant_blank_node_to_series( layer: usize, + pattern_num: usize, blank_node_counter: usize, constant_term: &ConstantTerm, n_rows: usize, @@ -133,7 +134,11 @@ pub fn constant_blank_node_to_series( ConstantTerm::Constant(ConstantLiteral::BlankNode(bl)) => { let any_value_vec: Vec<_> = (blank_node_counter..(blank_node_counter + n_rows)) .into_par_iter() - .map(|i| AnyValue::Utf8Owned(format!("_:{}_l{}_r{}", bl.as_str(), layer, i).into())) + .map(|i| { + AnyValue::Utf8Owned( + format!("_:{}_l{}_p{}_r{}", bl.as_str(), layer, pattern_num, i).into(), + ) + }) .collect(); ( diff --git a/maplib/src/mapping/validation_inference.rs b/maplib/src/mapping/validation_inference.rs index 789f01c..42b4d02 100644 --- a/maplib/src/mapping/validation_inference.rs +++ b/maplib/src/mapping/validation_inference.rs @@ -67,7 +67,6 @@ fn validate_infer_column_data_type( validate_datatype(series.name(), dtype, ptype)?; ptype.clone() } else { - polars_datatype_to_xsd_datatype(dtype) }; let rdf_node_type = infer_rdf_node_type(&ptype); diff --git a/maplib/src/templates.rs b/maplib/src/templates.rs index f4913bd..1300070 100644 --- a/maplib/src/templates.rs +++ b/maplib/src/templates.rs @@ -114,8 +114,7 @@ impl TemplateDataset { pub fn from_folder>(path: P) -> Result { let mut docs = vec![]; - let files_result = - read_dir(path).map_err(TemplateError::ReadTemplateDirectoryError)?; + let files_result = read_dir(path).map_err(TemplateError::ReadTemplateDirectoryError)?; for f in files_result { let f = f.map_err(TemplateError::ResolveDirectoryEntryError)?; if let Some(e) = f.path().extension() { @@ -137,7 +136,9 @@ impl TemplateDataset { } pub fn get(&self, template: &str) -> Option<&Template> { - self.templates.iter().find(|&t| t.signature.template_name.as_str() == template) + self.templates + .iter() + .find(|&t| t.signature.template_name.as_str() == template) } fn infer_types(&mut self) -> Result<(), TemplateError> { diff --git a/representation/src/lib.rs b/representation/src/lib.rs index bdb47fe..4c6a624 100644 --- a/representation/src/lib.rs +++ b/representation/src/lib.rs @@ -38,7 +38,6 @@ impl RDFNodeType { } pub fn find_triple_type(&self) -> TripleType { - if let RDFNodeType::IRI = self { TripleType::ObjectProperty } else if let RDFNodeType::Literal(lit) = self { diff --git a/triplestore/src/io_funcs.rs b/triplestore/src/io_funcs.rs index 8c206db..0fdd2b4 100644 --- a/triplestore/src/io_funcs.rs +++ b/triplestore/src/io_funcs.rs @@ -6,8 +6,7 @@ use std::path::Path; pub(crate) fn delete_tmp_parquets_in_caching_folder( caching_folder: &Path, ) -> Result<(), TriplestoreError> { - let contents = - read_dir(caching_folder).map_err(TriplestoreError::ReadCachingDirectoryError)?; + let contents = read_dir(caching_folder).map_err(TriplestoreError::ReadCachingDirectoryError)?; for f in contents { let entry = f.map_err(TriplestoreError::ReadCachingDirectoryEntryError)?; let fname = entry.file_name().to_str().unwrap().to_string(); diff --git a/triplestore/src/lib.rs b/triplestore/src/lib.rs index 21a2aef..56e25ed 100644 --- a/triplestore/src/lib.rs +++ b/triplestore/src/lib.rs @@ -198,7 +198,7 @@ impl Triplestore { static_verb_column, has_unique_subset, } = t; - + prepare_triples( df, &object_type, diff --git a/triplestore/src/sparql/lazy_aggregate.rs b/triplestore/src/sparql/lazy_aggregate.rs index 73037ba..6994f1c 100644 --- a/triplestore/src/sparql/lazy_aggregate.rs +++ b/triplestore/src/sparql/lazy_aggregate.rs @@ -45,10 +45,8 @@ impl Triplestore { } else { output_solution_mappings = solution_mappings; column_context = None; - let all_proper_column_names: Vec = output_solution_mappings - .columns - .iter().cloned() - .collect(); + let all_proper_column_names: Vec = + output_solution_mappings.columns.iter().cloned().collect(); let columns_expr = Expr::Columns(all_proper_column_names); if *distinct { out_expr = columns_expr.n_unique(); diff --git a/triplestore/src/sparql/lazy_expressions.rs b/triplestore/src/sparql/lazy_expressions.rs index 42be17d..2216536 100644 --- a/triplestore/src/sparql/lazy_expressions.rs +++ b/triplestore/src/sparql/lazy_expressions.rs @@ -714,10 +714,9 @@ impl Triplestore { Function::Abs => { assert_eq!(args.len(), 1); let first_context = args_contexts.get(&0).unwrap(); - output_solution_mappings.mappings = - output_solution_mappings.mappings.with_column( - col(first_context.as_str()).abs().alias(context.as_str()), - ); + output_solution_mappings.mappings = output_solution_mappings + .mappings + .with_column(col(first_context.as_str()).abs().alias(context.as_str())); let existing_type = output_solution_mappings .rdf_node_types .get(first_context.as_str()) @@ -782,9 +781,7 @@ impl Triplestore { let first_context = args_contexts.get(&0).unwrap(); output_solution_mappings.mappings = output_solution_mappings.mappings.with_column( - col(first_context.as_str()) - .round(0) - .alias(context.as_str()), + col(first_context.as_str()).round(0).alias(context.as_str()), ); let existing_type = output_solution_mappings .rdf_node_types @@ -851,7 +848,9 @@ impl Triplestore { } } output_solution_mappings.mappings = output_solution_mappings.mappings.drop_columns( - args_contexts.values().map(|x| x.as_str()) + args_contexts + .values() + .map(|x| x.as_str()) .collect::>(), ); output_solution_mappings diff --git a/triplestore/src/sparql/lazy_graph_patterns/path.rs b/triplestore/src/sparql/lazy_graph_patterns/path.rs index fe68f63..5f2cfee 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/path.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/path.rs @@ -235,8 +235,7 @@ impl Triplestore { Ok(left_df_map) } PropertyPathExpression::Alternative(left, right) => { - let mut left_df_map = - self.create_unique_cat_dfs(left, subject, object)?; + let mut left_df_map = self.create_unique_cat_dfs(left, subject, object)?; let right_df_map = self.create_unique_cat_dfs(right, subject, object)?; left_df_map.extend(right_df_map); Ok(left_df_map) diff --git a/triplestore/src/sparql/sparql_to_polars.rs b/triplestore/src/sparql/sparql_to_polars.rs index 6882f49..c9ed8e6 100644 --- a/triplestore/src/sparql/sparql_to_polars.rs +++ b/triplestore/src/sparql/sparql_to_polars.rs @@ -65,10 +65,10 @@ pub(crate) fn sparql_literal_to_polars_literal_value(lit: &Literal) -> LiteralVa if ymd_string.len() != 3 { todo!("Unsupported date format {}", value) } - let y = i32::from_str(ymd_string.first().unwrap()).unwrap_or_else(|_| panic!("Year parsing error {}", - ymd_string.first().unwrap())); - let m = u32::from_str(ymd_string.get(1).unwrap()).unwrap_or_else(|_| panic!("Month parsing error {}", - ymd_string.get(1).unwrap())); + let y = i32::from_str(ymd_string.first().unwrap()) + .unwrap_or_else(|_| panic!("Year parsing error {}", ymd_string.first().unwrap())); + let m = u32::from_str(ymd_string.get(1).unwrap()) + .unwrap_or_else(|_| panic!("Month parsing error {}", ymd_string.get(1).unwrap())); let d = u32::from_str(ymd_string.get(2).unwrap()) .unwrap_or_else(|_| panic!("Day parsing error {}", ymd_string.get(1).unwrap())); let date = NaiveDate::from_ymd_opt(y, m, d).unwrap(); @@ -205,7 +205,6 @@ fn polars_literal_values_to_series(literal_values: Vec, name: &str LiteralValue::DateTime(_, t, None) => //TODO: Assert time unit lik?? { - Series::new( name, literal_values From 0d9e62bd1cf108b9e7096011821dd9cebd83c26e Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 09:52:41 +0200 Subject: [PATCH 3/9] Further on blank nodes support --- representation/src/literals.rs | 5 +---- triplestore/src/lib.rs | 2 +- triplestore/src/sparql/sparql_to_polars.rs | 5 +---- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/representation/src/literals.rs b/representation/src/literals.rs index 6ec527c..b1bc526 100644 --- a/representation/src/literals.rs +++ b/representation/src/literals.rs @@ -20,10 +20,7 @@ pub fn sparql_literal_to_any_value( } else if datatype == xsd::UNSIGNED_LONG { let u = u64::from_str(value).expect("Integer parsing error"); AnyValue::from(u) - } else if datatype == xsd::INTEGER { - let i = i64::from_str(value).expect("Integer parsing error"); - AnyValue::from(i) - } else if datatype == xsd::LONG { + } else if datatype == xsd::INTEGER || datatype == xsd::LONG { let i = i64::from_str(value).expect("Integer parsing error"); AnyValue::from(i) } else if datatype == xsd::INT { diff --git a/triplestore/src/lib.rs b/triplestore/src/lib.rs index 56e25ed..4880579 100644 --- a/triplestore/src/lib.rs +++ b/triplestore/src/lib.rs @@ -124,7 +124,7 @@ impl Triplestore { pub fn deduplicate(&mut self) -> Result<(), TriplestoreError> { let now = Instant::now(); for (predicate, map) in &mut self.df_map { - for (_, v) in map { + for v in map.values_mut() { if !v.unique { if self.caching_folder.is_some() { let lf_results: Vec> = v diff --git a/triplestore/src/sparql/sparql_to_polars.rs b/triplestore/src/sparql/sparql_to_polars.rs index c9ed8e6..46e7143 100644 --- a/triplestore/src/sparql/sparql_to_polars.rs +++ b/triplestore/src/sparql/sparql_to_polars.rs @@ -30,10 +30,7 @@ pub(crate) fn sparql_literal_to_polars_literal_value(lit: &Literal) -> LiteralVa } else if datatype == xsd::UNSIGNED_LONG { let u = u64::from_str(value).expect("Integer parsing error"); LiteralValue::UInt64(u) - } else if datatype == xsd::INTEGER { - let i = i64::from_str(value).expect("Integer parsing error"); - LiteralValue::Int64(i) - } else if datatype == xsd::LONG { + } else if datatype == xsd::INTEGER || datatype == xsd::LONG { let i = i64::from_str(value).expect("Integer parsing error"); LiteralValue::Int64(i) } else if datatype == xsd::INT { From a27f413a34223003bff83b0300de3b6ed278efdc Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:26:00 +0200 Subject: [PATCH 4/9] Further on blank nodes support --- maplib/Cargo.toml | 2 +- maplib/src/mapping.rs | 1 + triplestore/src/conversion.rs | 13 +- .../src/sparql/lazy_graph_patterns/triple.rs | 3 +- triplestore/src/sparql/sparql_to_polars.rs | 308 +----------------- 5 files changed, 16 insertions(+), 311 deletions(-) diff --git a/maplib/Cargo.toml b/maplib/Cargo.toml index c18217d..1fb6236 100644 --- a/maplib/Cargo.toml +++ b/maplib/Cargo.toml @@ -13,7 +13,7 @@ nom={version="7.1.1", features=["alloc"]} sprs = {version="0.11.0", features=["rayon"]} spargebra = "0.2.2" oxrdf = "0.1.0" -polars = {version="0.32.1", features=["semi_anti_join","abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "horizontal_concat", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant"] } +polars = {version="0.32.1", features=["semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "horizontal_concat", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant"] } unic-char-range = "0.9.0" log="0.4.19" rio_turtle = "0.7.1" diff --git a/maplib/src/mapping.rs b/maplib/src/mapping.rs index 1158a06..d71fe99 100644 --- a/maplib/src/mapping.rs +++ b/maplib/src/mapping.rs @@ -169,6 +169,7 @@ impl Mapping { let now = Instant::now(); let target_template = self.resolve_template(template)?.clone(); let target_template_name = target_template.signature.template_name.as_str().to_string(); + let columns = self.validate_infer_dataframe_columns(&target_template.signature, &df, &options)?; let ExpandOptions { diff --git a/triplestore/src/conversion.rs b/triplestore/src/conversion.rs index 91906ae..b5ac2cd 100644 --- a/triplestore/src/conversion.rs +++ b/triplestore/src/conversion.rs @@ -70,8 +70,17 @@ fn hack_format_timestamp_with_timezone(series: &Series, tz: &mut TimeZone) -> Se format!( "{}", timezone - .ymd(x.year(), x.month(), x.day()) - .and_hms_nano(x.hour(), x.minute(), x.second(), x.nanosecond()) + .with_ymd_and_hms( + x.year(), + x.month(), + x.day(), + x.hour(), + x.minute(), + x.second() + ) + .unwrap() + .with_nanosecond(x.nanosecond()) + .unwrap() .format(XSD_DATETIME_WITH_TZ_FORMAT) ) }), diff --git a/triplestore/src/sparql/lazy_graph_patterns/triple.rs b/triplestore/src/sparql/lazy_graph_patterns/triple.rs index 605a359..e631fd7 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/triple.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/triple.rs @@ -33,8 +33,7 @@ impl Triplestore { xsd::ANY_URI => Some(RDFNodeType::IRI), _ => Some(RDFNodeType::Literal(l.datatype().into_owned())), }, - TermPattern::Variable(_) => None, - _ => None, + TermPattern::Variable(_) => None }; let subject_rename = get_keep_rename_term_pattern(&triple_pattern.subject); let verb_rename = get_keep_rename_named_node_pattern(&triple_pattern.predicate); diff --git a/triplestore/src/sparql/sparql_to_polars.rs b/triplestore/src/sparql/sparql_to_polars.rs index 46e7143..fc989f0 100644 --- a/triplestore/src/sparql/sparql_to_polars.rs +++ b/triplestore/src/sparql/sparql_to_polars.rs @@ -1,20 +1,10 @@ use chrono::NaiveDate; use oxrdf::vocab::xsd; -use oxrdf::{Literal, NamedNode, Term}; +use oxrdf::{Literal, NamedNode}; use polars::export::chrono::{DateTime, NaiveDateTime, Utc}; -use polars::prelude::{LiteralValue, NamedFrom, Series, TimeUnit}; +use polars::prelude::{LiteralValue, TimeUnit}; use std::str::FromStr; -pub(crate) fn sparql_term_to_polars_literal_value(term: &Term) -> polars::prelude::LiteralValue { - match term { - Term::NamedNode(named_node) => sparql_named_node_to_polars_literal_value(named_node), - Term::Literal(lit) => sparql_literal_to_polars_literal_value(lit), - _ => { - panic!("Not supported") - } - } -} - pub(crate) fn sparql_named_node_to_polars_literal_value(named_node: &NamedNode) -> LiteralValue { LiteralValue::Utf8(named_node.as_str().to_string()) } @@ -80,297 +70,3 @@ pub(crate) fn sparql_literal_to_polars_literal_value(lit: &Literal) -> LiteralVa }; literal_value } - -fn polars_literal_values_to_series(literal_values: Vec, name: &str) -> Series { - let first_non_null_opt = literal_values - .iter() - .find(|x| &&LiteralValue::Null != x) - .cloned(); - let first_null_opt = literal_values - .iter() - .find(|x| &&LiteralValue::Null == x) - .cloned(); - if let (Some(first_non_null), None) = (&first_non_null_opt, &first_null_opt) { - match first_non_null { - LiteralValue::Boolean(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Boolean(b) = x { - b - } else { - panic!("Not possible") - } - }) - .collect::>(), - ), - LiteralValue::Utf8(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Utf8(u) = x { - u - } else { - panic!("Not possible") - } - }) - .collect::>(), - ), - LiteralValue::UInt32(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::UInt32(i) = x { - i - } else { - panic!("Not possible") - } - }) - .collect::>(), - ), - LiteralValue::UInt64(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::UInt64(i) = x { - i - } else { - panic!("Not possible") - } - }) - .collect::>(), - ), - LiteralValue::Int32(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Int32(i) = x { - i - } else { - panic!("Not possible") - } - }) - .collect::>(), - ), - LiteralValue::Int64(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Int64(i) = x { - i - } else { - panic!("Not possible") - } - }) - .collect::>(), - ), - LiteralValue::Float32(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Float32(f) = x { - f - } else { - panic!("Not possible") - } - }) - .collect::>(), - ), - LiteralValue::Float64(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Float64(f) = x { - Some(f) - } else { - panic!("Not possible") - } - }) - .collect::>>(), - ), - LiteralValue::Range { .. } => { - todo!() - } - LiteralValue::DateTime(_, t, None) => - //TODO: Assert time unit lik?? - { - Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::DateTime(n, t_prime, None) = x { - assert_eq!(t, &t_prime); - n - } else { - panic!("Not possible") - } - }) - .collect::>(), - ) - } - LiteralValue::Duration(_, _) => { - todo!() - } - LiteralValue::Series(_) => { - todo!() - } - _ => { - todo!() - } - } - } else if let (Some(first_non_null), Some(_)) = (&first_non_null_opt, &first_null_opt) { - match first_non_null { - LiteralValue::Boolean(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Boolean(b) = x { - Some(b) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::Utf8(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Utf8(u) = x { - Some(u) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::UInt32(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::UInt32(i) = x { - Some(i) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::UInt64(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::UInt64(i) = x { - Some(i) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::Int32(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Int32(i) = x { - Some(i) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::Int64(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Int64(i) = x { - Some(i) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::Float32(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Float32(f) = x { - Some(f) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::Float64(_) => Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::Float64(f) = x { - Some(f) - } else { - None - } - }) - .collect::>>(), - ), - LiteralValue::Range { .. } => { - todo!() - } - LiteralValue::DateTime(_, t, None) => - //TODO: Assert time unit lik?? - { - Series::new( - name, - literal_values - .into_iter() - .map(|x| { - if let LiteralValue::DateTime(n, t_prime, None) = x { - assert_eq!(t, &t_prime); - Some(n) - } else { - None - } - }) - .collect::>>(), - ) - } - LiteralValue::Duration(_, _) => { - todo!() - } - LiteralValue::Series(_) => { - todo!() - } - _ => { - todo!() - } - } - } else { - Series::new( - name, - literal_values - .iter() - .map(|_| None) - .collect::>>(), - ) - } -} From 243f4f38495513e17386dc19127a7f53f2d2453e Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:04:18 +0200 Subject: [PATCH 5/9] Fix a bunch of clippy warnings --- maplib/src/ast.rs | 26 +++++------ maplib/src/mapping.rs | 14 ++---- maplib/src/mapping/constant_terms.rs | 12 ++--- maplib/src/mapping/default.rs | 6 +-- maplib/src/mapping/validation_inference.rs | 20 ++++----- maplib/src/parsing/nom_parsing.rs | 12 ++--- maplib/src/parsing/parser_test.rs | 44 +++++++++---------- maplib/src/parsing/parsing_ast.rs | 10 ++--- maplib/src/resolver.rs | 18 ++++---- maplib/src/templates.rs | 42 +++++++++--------- triplestore/src/conversion.rs | 2 +- triplestore/src/ntriples_write.rs | 18 ++++---- triplestore/src/sparql/lazy_graph_patterns.rs | 2 +- .../src/sparql/lazy_graph_patterns/path.rs | 21 +++++---- triplestore/src/sparql/query_context.rs | 19 ++++---- 15 files changed, 131 insertions(+), 135 deletions(-) diff --git a/maplib/src/ast.rs b/maplib/src/ast.rs index e451949..19002bd 100644 --- a/maplib/src/ast.rs +++ b/maplib/src/ast.rs @@ -106,15 +106,15 @@ impl Display for Parameter { #[derive(PartialEq, Debug, Clone)] pub enum PType { - BasicType(NamedNode, String), - LUBType(Box), - ListType(Box), - NEListType(Box), + Basic(NamedNode, String), + Lub(Box), + List(Box), + NEList(Box), } impl PType { pub fn is_blank_node(&self) -> bool { - if let PType::BasicType(nn, _) = &self { + if let PType::Basic(nn, _) = &self { if nn.as_str() == BLANK_NODE_IRI { return true; } @@ -123,7 +123,7 @@ impl PType { } pub fn is_iri(&self) -> bool { - if let PType::BasicType(nn, _) = self { + if let PType::Basic(nn, _) = self { if nn.as_ref() == xsd::ANY_URI { return true; } @@ -135,18 +135,18 @@ impl PType { impl Display for PType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - PType::BasicType(_nn, s) => { + PType::Basic(_nn, s) => { write!(f, "{}", s) } - PType::LUBType(lt) => { + PType::Lub(lt) => { let s = lt.to_string(); write!(f, "LUBType({})", s) } - PType::ListType(lt) => { + PType::List(lt) => { let s = lt.to_string(); write!(f, "ListType({})", s) } - PType::NEListType(lt) => { + PType::NEList(lt) => { let s = lt.to_string(); write!(f, "NEListType({})", s) } @@ -218,7 +218,7 @@ impl Display for ConstantTerm { #[derive(PartialEq, Debug, Clone)] pub enum ConstantLiteral { - IRI(NamedNode), + Iri(NamedNode), BlankNode(BlankNode), Literal(StottrLiteral), None, @@ -233,7 +233,7 @@ impl ConstantLiteral { impl Display for ConstantLiteral { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - ConstantLiteral::IRI(i) => std::fmt::Display::fmt(i, f), + ConstantLiteral::Iri(i) => std::fmt::Display::fmt(i, f), ConstantLiteral::BlankNode(bn) => std::fmt::Display::fmt(bn, f), ConstantLiteral::Literal(lit) => std::fmt::Display::fmt(lit, f), ConstantLiteral::None => { @@ -389,7 +389,7 @@ fn test_display_easy_template() { parameter_list: vec![Parameter { optional: true, non_blank: true, - ptype: Some(PType::BasicType( + ptype: Some(PType::Basic( xsd::DOUBLE.into_owned(), "xsd:double".to_string(), )), diff --git a/maplib/src/mapping.rs b/maplib/src/mapping.rs index d71fe99..48174c1 100644 --- a/maplib/src/mapping.rs +++ b/maplib/src/mapping.rs @@ -70,9 +70,9 @@ impl Mapping { template_dataset: &TemplateDataset, caching_folder: Option, ) -> Result { - match env_logger::try_init() { - _ => {} - } + #[allow(clippy::match_single_binding)] + match env_logger::try_init() { _=>{}}; + let use_caching = caching_folder.is_some(); Ok(Mapping { template_dataset: template_dataset.clone(), @@ -381,14 +381,8 @@ fn create_triples( let mut verb = None; for (k, sc) in static_columns { if k == "verb" { - if let ConstantTerm::Constant(c) = &sc.constant_term { - if let ConstantLiteral::IRI(nn) = c { + if let ConstantTerm::Constant(ConstantLiteral::Iri(nn)) = &sc.constant_term { verb = Some(nn.as_str().to_string()); - } else { - return Err(MappingError::InvalidPredicateConstant( - sc.constant_term.clone(), - )); - } } else { return Err(MappingError::InvalidPredicateConstant( sc.constant_term.clone(), diff --git a/maplib/src/mapping/constant_terms.rs b/maplib/src/mapping/constant_terms.rs index e727655..71fd41f 100644 --- a/maplib/src/mapping/constant_terms.rs +++ b/maplib/src/mapping/constant_terms.rs @@ -20,9 +20,9 @@ pub fn constant_to_expr( ) -> Result<(Expr, PType, RDFNodeType, Option), MappingError> { let (expr, ptype, rdf_node_type, language_tag) = match constant_term { ConstantTerm::Constant(c) => match c { - ConstantLiteral::IRI(iri) => ( + ConstantLiteral::Iri(iri) => ( Expr::Literal(LiteralValue::Utf8(iri.as_str().to_string())), - PType::BasicType(xsd::ANY_URI.into_owned(), "xsd:anyURI".to_string()), + PType::Basic(xsd::ANY_URI.into_owned(), "xsd:anyURI".to_string()), RDFNodeType::IRI, None, ), @@ -42,7 +42,7 @@ pub fn constant_to_expr( let language_tag = lit.language.as_ref().cloned(); ( Expr::Literal(LiteralValue::Series(SpecialEq::new(value_series))), - PType::BasicType( + PType::Basic( lit.data_type_iri.as_ref().unwrap().clone(), lit.data_type_iri.as_ref().unwrap().to_string(), ), @@ -52,7 +52,7 @@ pub fn constant_to_expr( } ConstantLiteral::None => ( Expr::Literal(LiteralValue::Null), - PType::BasicType(NamedNode::new_unchecked(NONE_IRI), NONE_IRI.to_string()), + PType::Basic(NamedNode::new_unchecked(NONE_IRI), NONE_IRI.to_string()), RDFNodeType::None, None, ), @@ -79,7 +79,7 @@ pub fn constant_to_expr( last_rdf_node_type = Some(rdf_node_type); expressions.push(constant_expr); } - let out_ptype = PType::ListType(Box::new(last_ptype.unwrap())); + let out_ptype = PType::List(Box::new(last_ptype.unwrap())); let out_rdf_node_type = last_rdf_node_type.as_ref().unwrap().clone(); if let RDFNodeType::Literal(_lit) = last_rdf_node_type.as_ref().unwrap() { @@ -149,7 +149,7 @@ pub fn constant_blank_node_to_series( false, ) .unwrap(), - PType::BasicType( + PType::Basic( NamedNode::new_unchecked(BLANK_NODE_IRI), BLANK_NODE_IRI.to_string(), ), diff --git a/maplib/src/mapping/default.rs b/maplib/src/mapping/default.rs index ba0d1f2..9c660aa 100644 --- a/maplib/src/mapping/default.rs +++ b/maplib/src/mapping/default.rs @@ -55,7 +55,7 @@ impl Mapping { params.push(Parameter { optional: has_null, non_blank: false, - ptype: Some(PType::BasicType( + ptype: Some(PType::Basic( xsd::ANY_URI.into_owned(), "xsd:anyURI".to_string(), )), @@ -84,7 +84,7 @@ impl Mapping { params.push(Parameter { optional: has_null, non_blank: false, - ptype: Some(PType::BasicType( + ptype: Some(PType::Basic( xsd::ANY_URI.into_owned(), "xsd:anyURI".to_string(), )), @@ -129,7 +129,7 @@ impl Mapping { Argument { list_expand: false, term: StottrTerm::ConstantTerm(ConstantTerm::Constant( - ConstantLiteral::IRI( + ConstantLiteral::Iri( NamedNode::new(format!("{}{}", &use_predicate_uri_prefix, c)) .unwrap(), ), diff --git a/maplib/src/mapping/validation_inference.rs b/maplib/src/mapping/validation_inference.rs index 42b4d02..8ae9b85 100644 --- a/maplib/src/mapping/validation_inference.rs +++ b/maplib/src/mapping/validation_inference.rs @@ -83,16 +83,16 @@ fn validate_infer_column_data_type( fn infer_rdf_node_type(ptype: &PType) -> RDFNodeType { match ptype { - PType::BasicType(b, _) => { + PType::Basic(b, _) => { if b.as_str() == xsd::ANY_URI.as_str() { RDFNodeType::IRI } else { RDFNodeType::Literal(b.clone()) } } - PType::LUBType(l) => infer_rdf_node_type(l), - PType::ListType(l) => infer_rdf_node_type(l), - PType::NEListType(l) => infer_rdf_node_type(l), + PType::Lub(l) => infer_rdf_node_type(l), + PType::List(l) => infer_rdf_node_type(l), + PType::NEList(l) => infer_rdf_node_type(l), } } @@ -151,16 +151,16 @@ fn validate_datatype( } }; match target_ptype { - PType::BasicType(bt, _) => { + PType::Basic(bt, _) => { if let DataType::List(_) = datatype { mismatch_error() } else { Ok(validate_basic_datatype(column_name, datatype, bt)?) } } - PType::LUBType(inner) => validate_if_series_list(inner), - PType::ListType(inner) => validate_if_series_list(inner), - PType::NEListType(inner) => validate_if_series_list(inner), + PType::Lub(inner) => validate_if_series_list(inner), + PType::List(inner) => validate_if_series_list(inner), + PType::NEList(inner) => validate_if_series_list(inner), } } @@ -196,11 +196,11 @@ pub fn polars_datatype_to_xsd_datatype(datatype: &DataType) -> PType { DataType::Duration(_) => xsd::DURATION, DataType::Categorical(_) => xsd::STRING, DataType::List(inner) => { - return PType::ListType(Box::new(polars_datatype_to_xsd_datatype(inner))) + return PType::List(Box::new(polars_datatype_to_xsd_datatype(inner))) } _ => { panic!("Unsupported datatype:{}", datatype) } }; - PType::BasicType(xsd_nn_ref.into_owned(), "".to_string()) + PType::Basic(xsd_nn_ref.into_owned(), "".to_string()) } diff --git a/maplib/src/parsing/nom_parsing.rs b/maplib/src/parsing/nom_parsing.rs index e0ee451..8f49141 100644 --- a/maplib/src/parsing/nom_parsing.rs +++ b/maplib/src/parsing/nom_parsing.rs @@ -296,24 +296,24 @@ fn ptype(p: &str) -> IResult<&str, UnresolvedPType> { fn list_type(l: &str) -> IResult<&str, UnresolvedPType> { let (l, (_, t, _)) = tuple((tag("List<"), ptype, tag(">")))(l)?; - Ok((l, UnresolvedPType::ListType(Box::new(t)))) + Ok((l, UnresolvedPType::List(Box::new(t)))) } fn ne_list_type(l: &str) -> IResult<&str, UnresolvedPType> { let (l, (_, t, _)) = tuple((tag("NEList<"), ptype, tag(">")))(l)?; - Ok((l, UnresolvedPType::NEListType(Box::new(t)))) + Ok((l, UnresolvedPType::NEList(Box::new(t)))) } fn lub_type(l: &str) -> IResult<&str, UnresolvedPType> { let (l, (_, t, _)) = tuple((tag("LUB<"), basic_type, tag(">")))(l)?; - Ok((l, UnresolvedPType::LUBType(Box::new(t)))) + Ok((l, UnresolvedPType::Lub(Box::new(t)))) } fn basic_type(b: &str) -> IResult<&str, UnresolvedPType> { let (b, t) = prefixed_name(b)?; Ok(( b, - UnresolvedPType::BasicType(ResolvesToNamedNode::PrefixedName(t)), + UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName(t)), )) } @@ -374,7 +374,7 @@ fn literal_as_constant_literal(l: &str) -> IResult<&str, UnresolvedConstantLiter fn iri_as_constant_literal(i: &str) -> IResult<&str, UnresolvedConstantLiteral> { let (i, iri) = iri(i)?; - Ok((i, UnresolvedConstantLiteral::IRI(iri))) + Ok((i, UnresolvedConstantLiteral::Iri(iri))) } fn blank_node_as_constant_literal(b: &str) -> IResult<&str, UnresolvedConstantLiteral> { @@ -960,7 +960,7 @@ fn test_instance() { UnresolvedArgument { list_expand: false, term: UnresolvedStottrTerm::ConstantTerm(UnresolvedConstantTerm::Constant( - UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName( + UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "foaf".to_string(), name: "Person".to_string(), diff --git a/maplib/src/parsing/parser_test.rs b/maplib/src/parsing/parser_test.rs index 94f33e5..3094a37 100644 --- a/maplib/src/parsing/parser_test.rs +++ b/maplib/src/parsing/parser_test.rs @@ -87,7 +87,7 @@ fn test_easy_template() { UnresolvedArgument { list_expand: false, term: UnresolvedStottrTerm::ConstantTerm(UnresolvedConstantTerm::Constant( - UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName( + UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "foaf".to_string(), name: "Person".to_string(), @@ -175,7 +175,7 @@ fn test_easy_template_extra_comma() { UnresolvedArgument { list_expand: false, term: UnresolvedStottrTerm::ConstantTerm(UnresolvedConstantTerm::Constant( - UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName( + UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "foaf".to_string(), name: "Person".to_string(), @@ -352,7 +352,7 @@ fn test_spec_type_1() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -392,7 +392,7 @@ fn test_spec_type_2() { parameter_list: vec![UnresolvedParameter { optional: true, non_blank: false, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -432,7 +432,7 @@ fn test_spec_type_3() { parameter_list: vec![UnresolvedParameter { optional: true, non_blank: true, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -476,7 +476,7 @@ fn test_spec_default_value_1() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -487,7 +487,7 @@ fn test_spec_default_value_1() { }, default_value: Some(UnresolvedDefaultValue { constant_term: UnresolvedConstantTerm::Constant( - UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName( + UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "p".to_string(), name: "pizza".to_string(), @@ -529,7 +529,7 @@ fn test_spec_default_value_2() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -583,7 +583,7 @@ fn test_spec_default_value_3() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -803,7 +803,7 @@ fn test_spec_more_complex_types() { UnresolvedParameter { optional: false, non_blank: true, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -817,7 +817,7 @@ fn test_spec_more_complex_types() { UnresolvedParameter { optional: true, non_blank: true, - ptype: Some(UnresolvedPType::BasicType( + ptype: Some(UnresolvedPType::Basic( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "NamedIndividual".to_string(), @@ -828,7 +828,7 @@ fn test_spec_more_complex_types() { }, default_value: Some(UnresolvedDefaultValue { constant_term: UnresolvedConstantTerm::Constant( - UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName( + UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "ex".to_string(), name: "Class".to_string(), @@ -840,9 +840,9 @@ fn test_spec_more_complex_types() { UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::NEListType(Box::new( - UnresolvedPType::ListType(Box::new(UnresolvedPType::ListType(Box::new( - UnresolvedPType::BasicType(ResolvesToNamedNode::PrefixedName( + ptype: Some(UnresolvedPType::NEList(Box::new( + UnresolvedPType::List(Box::new(UnresolvedPType::List(Box::new( + UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), @@ -900,7 +900,7 @@ fn test_spec_example_1() { list_expand: false, term: UnresolvedStottrTerm::ConstantTerm(UnresolvedConstantTerm::ConstantList( vec![UnresolvedConstantTerm::Constant( - UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName( + UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "ex".to_string(), name: "template".to_string(), @@ -962,7 +962,7 @@ fn test_spec_example_2() { term: UnresolvedStottrTerm::ConstantTerm(UnresolvedConstantTerm::ConstantList( vec![UnresolvedConstantTerm::ConstantList(vec![ UnresolvedConstantTerm::ConstantList(vec![ - UnresolvedConstantTerm::Constant(UnresolvedConstantLiteral::IRI( + UnresolvedConstantTerm::Constant(UnresolvedConstantLiteral::Iri( ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "ex".to_string(), name: "template".to_string(), @@ -1016,7 +1016,7 @@ fn test_spec_example_3() { list_expand: false, term: UnresolvedStottrTerm::ConstantTerm(UnresolvedConstantTerm::ConstantList( vec![UnresolvedConstantTerm::Constant( - UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName( + UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName( PrefixedName { prefix: "ex".to_string(), name: "template".to_string(), @@ -1073,13 +1073,13 @@ fn test_spec_example_4() { UnresolvedParameter { optional: false, non_blank: true, - ptype: Some(UnresolvedPType::BasicType(ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), }))), stottr_variable: StottrVariable { name: "pizza".to_string() }, default_value: Some(UnresolvedDefaultValue { - constant_term: UnresolvedConstantTerm::Constant(UnresolvedConstantLiteral::IRI(ResolvesToNamedNode::PrefixedName(PrefixedName { + constant_term: UnresolvedConstantTerm::Constant(UnresolvedConstantLiteral::Iri(ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "p".to_string(), name: "Grandiosa".to_string(), }))), @@ -1088,7 +1088,7 @@ fn test_spec_example_4() { UnresolvedParameter { optional: true, non_blank: true, - ptype: Some(UnresolvedPType::LUBType(Box::new(UnresolvedPType::BasicType(ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Lub(Box::new(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "NamedIndividual".to_string(), }))))), @@ -1098,7 +1098,7 @@ fn test_spec_example_4() { UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::ListType(Box::new(UnresolvedPType::BasicType(ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::List(Box::new(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName(PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), }))))), diff --git a/maplib/src/parsing/parsing_ast.rs b/maplib/src/parsing/parsing_ast.rs index 2f6cbb3..1dfb2b1 100644 --- a/maplib/src/parsing/parsing_ast.rs +++ b/maplib/src/parsing/parsing_ast.rs @@ -38,10 +38,10 @@ pub struct UnresolvedParameter { #[derive(PartialEq, Debug)] pub enum UnresolvedPType { - BasicType(ResolvesToNamedNode), - LUBType(Box), - ListType(Box), - NEListType(Box), + Basic(ResolvesToNamedNode), + Lub(Box), + List(Box), + NEList(Box), } #[derive(PartialEq, Debug)] @@ -57,7 +57,7 @@ pub enum UnresolvedConstantTerm { #[derive(PartialEq, Debug)] pub enum UnresolvedConstantLiteral { - IRI(ResolvesToNamedNode), + Iri(ResolvesToNamedNode), BlankNode(BlankNode), Literal(UnresolvedStottrLiteral), None, diff --git a/maplib/src/resolver.rs b/maplib/src/resolver.rs index 14461c1..eebeefe 100644 --- a/maplib/src/resolver.rs +++ b/maplib/src/resolver.rs @@ -220,10 +220,10 @@ fn resolve_constant_term( fn resolve_constant_literal( unresolved_constant_literal: &UnresolvedConstantLiteral, - prefix_map: &mut HashMap, + prefix_map: &HashMap, ) -> Result { Ok(match unresolved_constant_literal { - UnresolvedConstantLiteral::IRI(iri) => ConstantLiteral::IRI(resolve(iri, prefix_map)?), + UnresolvedConstantLiteral::Iri(iri) => ConstantLiteral::Iri(resolve(iri, prefix_map)?), UnresolvedConstantLiteral::BlankNode(bn) => ConstantLiteral::BlankNode(bn.clone()), UnresolvedConstantLiteral::Literal(lit) => { ConstantLiteral::Literal(resolve_stottr_literal(lit, prefix_map)?) @@ -234,7 +234,7 @@ fn resolve_constant_literal( fn resolve_stottr_literal( unresolved_stottr_literal: &UnresolvedStottrLiteral, - prefix_map: &mut HashMap, + prefix_map: &HashMap, ) -> Result { Ok(StottrLiteral { value: unresolved_stottr_literal.value.clone(), @@ -284,18 +284,18 @@ fn resolve_ptype( prefix_map: &mut HashMap, ) -> Result { Ok(match unresolved_ptype { - UnresolvedPType::BasicType(b) => PType::BasicType(resolve(b, prefix_map)?, get_name(b)), - UnresolvedPType::LUBType(l) => PType::LUBType(Box::new(resolve_ptype(l, prefix_map)?)), - UnresolvedPType::ListType(l) => PType::ListType(Box::new(resolve_ptype(l, prefix_map)?)), - UnresolvedPType::NEListType(l) => { - PType::NEListType(Box::new(resolve_ptype(l, prefix_map)?)) + UnresolvedPType::Basic(b) => PType::Basic(resolve(b, prefix_map)?, get_name(b)), + UnresolvedPType::Lub(l) => PType::Lub(Box::new(resolve_ptype(l, prefix_map)?)), + UnresolvedPType::List(l) => PType::List(Box::new(resolve_ptype(l, prefix_map)?)), + UnresolvedPType::NEList(l) => { + PType::NEList(Box::new(resolve_ptype(l, prefix_map)?)) } }) } fn resolve( resolves_to_named_node: &ResolvesToNamedNode, - prefix_map: &mut HashMap, + prefix_map: &HashMap, ) -> Result { Ok(match resolves_to_named_node { ResolvesToNamedNode::PrefixedName(pn) => { diff --git a/maplib/src/templates.rs b/maplib/src/templates.rs index 1300070..d84cabe 100644 --- a/maplib/src/templates.rs +++ b/maplib/src/templates.rs @@ -65,7 +65,7 @@ impl TemplateDataset { let ottr_triple_subject = Parameter { optional: false, non_blank: false, - ptype: Some(PType::BasicType( + ptype: Some(PType::Basic( xsd::ANY_URI.into_owned(), "xsd:anyURI".to_string(), )), @@ -77,7 +77,7 @@ impl TemplateDataset { let ottr_triple_verb = Parameter { optional: false, non_blank: false, - ptype: Some(PType::BasicType( + ptype: Some(PType::Basic( xsd::ANY_URI.into_owned(), "xsd:anyURI".to_string(), )), @@ -170,7 +170,7 @@ fn infer_template_types( for i in &mut template.pattern_list { let other = *templates .iter() - .find(|t| &t.signature.template_name == &i.template_name) + .find(|t| t.signature.template_name == i.template_name) .unwrap(); if i.argument_list.len() != other.signature.parameter_list.len() { return Err(TemplateError::InconsistentNumberOfArguments( @@ -194,18 +194,18 @@ fn infer_template_types( if !other_parameter.optional { changed = changed || lub_update( - &template.signature.template_name, - v, - my_parameter, - &PType::NEListType(Box::new(other_ptype.clone())), + &template.signature.template_name, + v, + my_parameter, + &PType::NEList(Box::new(other_ptype.clone())), )?; } else { changed = changed || lub_update( - &template.signature.template_name, - v, - my_parameter, - &PType::ListType(Box::new(other_ptype.clone())), + &template.signature.template_name, + v, + my_parameter, + &PType::List(Box::new(other_ptype.clone())), )?; } } else { @@ -265,32 +265,32 @@ fn lub( ) -> Result { if left == right { return Ok(left.clone()); - } else if let PType::NEListType(left_inner) = left { - if let PType::ListType(right_inner) = right { - return Ok(PType::NEListType(Box::new(lub( + } else if let PType::NEList(left_inner) = left { + if let PType::List(right_inner) = right { + return Ok(PType::NEList(Box::new(lub( template_name, variable, left_inner, right_inner, )?))); - } else if let PType::NEListType(right_inner) = right { - return Ok(PType::NEListType(Box::new(lub( + } else if let PType::NEList(right_inner) = right { + return Ok(PType::NEList(Box::new(lub( template_name, variable, left_inner, right_inner, )?))); } - } else if let PType::ListType(left_inner) = left { - if let PType::NEListType(right_inner) = right { - return Ok(PType::NEListType(Box::new(lub( + } else if let PType::List(left_inner) = left { + if let PType::NEList(right_inner) = right { + return Ok(PType::NEList(Box::new(lub( template_name, variable, left_inner, right_inner, )?))); - } else if let PType::ListType(right_inner) = right { - return Ok(PType::ListType(Box::new(lub( + } else if let PType::List(right_inner) = right { + return Ok(PType::List(Box::new(lub( template_name, variable, left_inner, diff --git a/triplestore/src/conversion.rs b/triplestore/src/conversion.rs index b5ac2cd..2961874 100644 --- a/triplestore/src/conversion.rs +++ b/triplestore/src/conversion.rs @@ -57,7 +57,7 @@ pub fn convert_to_string(series: &Series) -> Option { Some(series.cast(&DataType::Utf8).unwrap()) } -fn hack_format_timestamp_with_timezone(series: &Series, tz: &mut TimeZone) -> Series { +fn hack_format_timestamp_with_timezone(series: &Series, tz: &TimeZone) -> Series { let timezone_opt: Result = tz.parse(); if let Ok(timezone) = timezone_opt { let datetime_strings = Series::from_iter( diff --git a/triplestore/src/ntriples_write.rs b/triplestore/src/ntriples_write.rs index 493c7b7..ce74e67 100644 --- a/triplestore/src/ntriples_write.rs +++ b/triplestore/src/ntriples_write.rs @@ -51,8 +51,8 @@ impl Triplestore { ) -> Result<(), TriplestoreError> { self.deduplicate()?; let n_threads = POOL.current_num_threads(); - let mut any_value_iter_pool = LowContentionPool::>::new(n_threads); - let mut write_buffer_pool = LowContentionPool::>::new(n_threads); + let any_value_iter_pool = LowContentionPool::>::new(n_threads); + let write_buffer_pool = LowContentionPool::>::new(n_threads); for (property, map) in &mut self.df_map { for (rdf_node_type, tt) in map { @@ -73,8 +73,8 @@ impl Triplestore { chunk_size, triple_type.clone(), n_threads, - &mut any_value_iter_pool, - &mut write_buffer_pool, + &any_value_iter_pool, + &write_buffer_pool, )?; } } else if let Some(paths) = &tt.df_paths { @@ -91,8 +91,8 @@ impl Triplestore { chunk_size, triple_type.clone(), n_threads, - &mut any_value_iter_pool, - &mut write_buffer_pool, + &any_value_iter_pool, + &write_buffer_pool, )?; } } @@ -104,14 +104,14 @@ impl Triplestore { fn write_ntriples_for_df( df: &DataFrame, - verb: &String, + verb: &str, dt: &Option, writer: &mut W, chunk_size: usize, triple_type: TripleType, n_threads: usize, - any_value_iter_pool: &mut LowContentionPool>, - write_buffer_pool: &mut LowContentionPool>, + any_value_iter_pool: &LowContentionPool>, + write_buffer_pool: &LowContentionPool>, ) -> Result<(), TriplestoreError> { let dt_str = if triple_type == TripleType::NonStringProperty { if let Some(nn) = dt { diff --git a/triplestore/src/sparql/lazy_graph_patterns.rs b/triplestore/src/sparql/lazy_graph_patterns.rs index a6c242b..cde3c23 100644 --- a/triplestore/src/sparql/lazy_graph_patterns.rs +++ b/triplestore/src/sparql/lazy_graph_patterns.rs @@ -31,7 +31,7 @@ impl Triplestore { match graph_pattern { GraphPattern::Bgp { patterns } => { let mut updated_solution_mappings = solution_mappings; - let bgp_context = context.extension_with(PathEntry::BGP); + let bgp_context = context.extension_with(PathEntry::Bgp); for tp in patterns { updated_solution_mappings = Some(self.lazy_triple_pattern( updated_solution_mappings, diff --git a/triplestore/src/sparql/lazy_graph_patterns/path.rs b/triplestore/src/sparql/lazy_graph_patterns/path.rs index 5f2cfee..963409d 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/path.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/path.rs @@ -41,9 +41,9 @@ struct DFPathReturn { impl SubjectOrObject { fn flip(&self) -> SubjectOrObject { - match self { - &SubjectOrObject::Subject => SubjectOrObject::Object, - &SubjectOrObject::Object => SubjectOrObject::Subject, + match *self { + SubjectOrObject::Subject => SubjectOrObject::Object, + SubjectOrObject::Object => SubjectOrObject::Subject, } } } @@ -260,19 +260,18 @@ impl Triplestore { } } } - let df; - if !dfs.is_empty() { - df = concat_df(dfs.as_slice()) + let df= if !dfs.is_empty() { + concat_df(dfs.as_slice()) .unwrap() .unique(None, UniqueKeepStrategy::First, None) - .unwrap(); + .unwrap() } else { - df = DataFrame::new(vec![ + DataFrame::new(vec![ Series::new_empty("subject", &DataType::Categorical(None)), Series::new_empty("object", &DataType::Categorical(None)), ]) - .unwrap(); - } + .unwrap() + }; Ok(HashMap::from([(nns_name(nns), df)])) } } @@ -336,7 +335,7 @@ impl Triplestore { fn find_lookup(map: &HashMap) -> DataFrame { let mut all_values = vec![]; - for (_k, v) in map { + for v in map.values() { let mut obj = v.column("object").unwrap().unique().unwrap(); obj.rename("value"); let mut sub = v.column("subject").unwrap().unique().unwrap(); diff --git a/triplestore/src/sparql/query_context.rs b/triplestore/src/sparql/query_context.rs index 798a713..31d3b10 100644 --- a/triplestore/src/sparql/query_context.rs +++ b/triplestore/src/sparql/query_context.rs @@ -4,7 +4,7 @@ use std::fmt::Formatter; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum PathEntry { - BGP, + Bgp, UnionLeftSide, UnionRightSide, JoinLeftSide, @@ -70,7 +70,7 @@ pub enum PathEntry { impl fmt::Display for PathEntry { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - PathEntry::BGP => { + PathEntry::Bgp => { write!(f, "BGP") } PathEntry::UnionLeftSide => { @@ -263,6 +263,12 @@ pub struct Context { pub path: Vec, } +impl Default for Context { + fn default() -> Self { + Self::new() + } +} + impl Context { pub fn in_scope(&self, other: &Context, partial_scope: bool) -> bool { let min_i = min(self.path.len(), other.path.len()); @@ -301,7 +307,7 @@ impl Context { fn exposes_variables(path_entry: &PathEntry) -> bool { match path_entry { - PathEntry::BGP => true, + PathEntry::Bgp => true, PathEntry::UnionLeftSide => true, PathEntry::UnionRightSide => true, PathEntry::JoinLeftSide => true, @@ -367,7 +373,7 @@ fn exposes_variables(path_entry: &PathEntry) -> bool { fn maintains_full_downward_scope(path_entry: &PathEntry) -> bool { match path_entry { - PathEntry::BGP => false, + PathEntry::Bgp => false, PathEntry::UnionLeftSide => false, PathEntry::UnionRightSide => false, PathEntry::JoinLeftSide => false, @@ -433,10 +439,7 @@ fn maintains_full_downward_scope(path_entry: &PathEntry) -> bool { impl Context { pub fn new() -> Context { - Context { - string_rep: "".to_string(), - path: vec![], - } + Context::default() } pub fn from_path(path: Vec) -> Context { From 37e4c3f16103ed65ac21468492ed56a4682b86ae Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:04:30 +0200 Subject: [PATCH 6/9] Fix a bunch of clippy warnings --- maplib/src/mapping.rs | 6 +- maplib/src/parsing/parser_test.rs | 80 +++++++++---------- maplib/src/resolver.rs | 4 +- maplib/src/templates.rs | 16 ++-- .../src/sparql/lazy_graph_patterns/path.rs | 2 +- .../src/sparql/lazy_graph_patterns/triple.rs | 2 +- 6 files changed, 54 insertions(+), 56 deletions(-) diff --git a/maplib/src/mapping.rs b/maplib/src/mapping.rs index 48174c1..61c1b31 100644 --- a/maplib/src/mapping.rs +++ b/maplib/src/mapping.rs @@ -71,7 +71,9 @@ impl Mapping { caching_folder: Option, ) -> Result { #[allow(clippy::match_single_binding)] - match env_logger::try_init() { _=>{}}; + match env_logger::try_init() { + _ => {} + }; let use_caching = caching_folder.is_some(); Ok(Mapping { @@ -382,7 +384,7 @@ fn create_triples( for (k, sc) in static_columns { if k == "verb" { if let ConstantTerm::Constant(ConstantLiteral::Iri(nn)) = &sc.constant_term { - verb = Some(nn.as_str().to_string()); + verb = Some(nn.as_str().to_string()); } else { return Err(MappingError::InvalidPredicateConstant( sc.constant_term.clone(), diff --git a/maplib/src/parsing/parser_test.rs b/maplib/src/parsing/parser_test.rs index 3094a37..feabce8 100644 --- a/maplib/src/parsing/parser_test.rs +++ b/maplib/src/parsing/parser_test.rs @@ -352,12 +352,12 @@ fn test_spec_type_1() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "pizza".to_string(), }, @@ -392,12 +392,12 @@ fn test_spec_type_2() { parameter_list: vec![UnresolvedParameter { optional: true, non_blank: false, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "pizza".to_string(), }, @@ -432,12 +432,12 @@ fn test_spec_type_3() { parameter_list: vec![UnresolvedParameter { optional: true, non_blank: true, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "pizza".to_string(), }, @@ -476,12 +476,12 @@ fn test_spec_default_value_1() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "pizza".to_string(), }, @@ -529,12 +529,12 @@ fn test_spec_default_value_2() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "pizza".to_string(), }, @@ -583,12 +583,12 @@ fn test_spec_default_value_3() { parameter_list: vec![UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "pizza".to_string(), }, @@ -803,12 +803,12 @@ fn test_spec_more_complex_types() { UnresolvedParameter { optional: false, non_blank: true, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "Class".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "pizza".to_string(), }, @@ -817,12 +817,12 @@ fn test_spec_more_complex_types() { UnresolvedParameter { optional: true, non_blank: true, - ptype: Some(UnresolvedPType::Basic( - ResolvesToNamedNode::PrefixedName(PrefixedName { + ptype: Some(UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( + PrefixedName { prefix: "owl".to_string(), name: "NamedIndividual".to_string(), - }), - )), + }, + ))), stottr_variable: StottrVariable { name: "country".to_string(), }, @@ -840,16 +840,14 @@ fn test_spec_more_complex_types() { UnresolvedParameter { optional: false, non_blank: false, - ptype: Some(UnresolvedPType::NEList(Box::new( - UnresolvedPType::List(Box::new(UnresolvedPType::List(Box::new( - UnresolvedPType::Basic(ResolvesToNamedNode::PrefixedName( - PrefixedName { - prefix: "owl".to_string(), - name: "Class".to_string(), - }, - )), + ptype: Some(UnresolvedPType::NEList(Box::new(UnresolvedPType::List( + Box::new(UnresolvedPType::List(Box::new(UnresolvedPType::Basic( + ResolvesToNamedNode::PrefixedName(PrefixedName { + prefix: "owl".to_string(), + name: "Class".to_string(), + }), )))), - ))), + )))), stottr_variable: StottrVariable { name: "toppings".to_string(), }, diff --git a/maplib/src/resolver.rs b/maplib/src/resolver.rs index eebeefe..0d76eb1 100644 --- a/maplib/src/resolver.rs +++ b/maplib/src/resolver.rs @@ -287,9 +287,7 @@ fn resolve_ptype( UnresolvedPType::Basic(b) => PType::Basic(resolve(b, prefix_map)?, get_name(b)), UnresolvedPType::Lub(l) => PType::Lub(Box::new(resolve_ptype(l, prefix_map)?)), UnresolvedPType::List(l) => PType::List(Box::new(resolve_ptype(l, prefix_map)?)), - UnresolvedPType::NEList(l) => { - PType::NEList(Box::new(resolve_ptype(l, prefix_map)?)) - } + UnresolvedPType::NEList(l) => PType::NEList(Box::new(resolve_ptype(l, prefix_map)?)), }) } diff --git a/maplib/src/templates.rs b/maplib/src/templates.rs index d84cabe..4615cba 100644 --- a/maplib/src/templates.rs +++ b/maplib/src/templates.rs @@ -194,18 +194,18 @@ fn infer_template_types( if !other_parameter.optional { changed = changed || lub_update( - &template.signature.template_name, - v, - my_parameter, - &PType::NEList(Box::new(other_ptype.clone())), + &template.signature.template_name, + v, + my_parameter, + &PType::NEList(Box::new(other_ptype.clone())), )?; } else { changed = changed || lub_update( - &template.signature.template_name, - v, - my_parameter, - &PType::List(Box::new(other_ptype.clone())), + &template.signature.template_name, + v, + my_parameter, + &PType::List(Box::new(other_ptype.clone())), )?; } } else { diff --git a/triplestore/src/sparql/lazy_graph_patterns/path.rs b/triplestore/src/sparql/lazy_graph_patterns/path.rs index 963409d..d661d2e 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/path.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/path.rs @@ -260,7 +260,7 @@ impl Triplestore { } } } - let df= if !dfs.is_empty() { + let df = if !dfs.is_empty() { concat_df(dfs.as_slice()) .unwrap() .unique(None, UniqueKeepStrategy::First, None) diff --git a/triplestore/src/sparql/lazy_graph_patterns/triple.rs b/triplestore/src/sparql/lazy_graph_patterns/triple.rs index e631fd7..9fecc40 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/triple.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/triple.rs @@ -33,7 +33,7 @@ impl Triplestore { xsd::ANY_URI => Some(RDFNodeType::IRI), _ => Some(RDFNodeType::Literal(l.datatype().into_owned())), }, - TermPattern::Variable(_) => None + TermPattern::Variable(_) => None, }; let subject_rename = get_keep_rename_term_pattern(&triple_pattern.subject); let verb_rename = get_keep_rename_named_node_pattern(&triple_pattern.predicate); From 0307eed90b8d3a4a697e3013de14eba87932e9b9 Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:37:24 +0200 Subject: [PATCH 7/9] Green tests again --- .github/workflows/python_tests.yml | 2 +- arrow_python_utils/Cargo.toml | 2 +- maplib/Cargo.toml | 10 +- py_maplib/tests/test_blank_nodes.py | 5 +- representation/src/literals.rs | 4 +- triplestore/Cargo.toml | 10 +- triplestore/src/conversion.rs | 15 +- triplestore/src/ntriples_write.rs | 18 +- .../src/sparql/lazy_graph_patterns/path.rs | 21 +- .../src/sparql/lazy_graph_patterns/triple.rs | 1 + triplestore/src/sparql/query_context.rs | 11 +- triplestore/src/sparql/sparql_to_polars.rs | 308 +++++++++++++++++- 12 files changed, 351 insertions(+), 56 deletions(-) diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index ff53cd0..affdb49 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -2,7 +2,7 @@ name: Python tests on: push: - branches: [ main ] + branches: [ main, "feature/*" ] pull_request: branches: [ main ] diff --git a/arrow_python_utils/Cargo.toml b/arrow_python_utils/Cargo.toml index e8ead39..79b00c3 100644 --- a/arrow_python_utils/Cargo.toml +++ b/arrow_python_utils/Cargo.toml @@ -8,4 +8,4 @@ pyo3 = {version = "0.19.2", features = ["extension-module"]} polars-core = {version="0.32.1", features=["dtype-array", "dtype-categorical", "dtype-date", "dtype-datetime", "dtype-decimal", "dtype-duration", "dtype-i8", "dtype-i16", "dtype-struct", "dtype-time", "dtype-u8", "dtype-u16"]} thiserror="1.0.31" -simple-error = "0.2.3" +simple-error = "0.3.0" diff --git a/maplib/Cargo.toml b/maplib/Cargo.toml index 1fb6236..30f3ef5 100644 --- a/maplib/Cargo.toml +++ b/maplib/Cargo.toml @@ -16,12 +16,12 @@ oxrdf = "0.1.0" polars = {version="0.32.1", features=["semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "horizontal_concat", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant"] } unic-char-range = "0.9.0" log="0.4.19" -rio_turtle = "0.7.1" -rio_api = "0.7.1" +rio_turtle = "0.8.4" +rio_api = "0.8.4" polars-utils = "0.32.1" polars-core = "0.32.1" chrono = "0.4" -chrono-tz = "0.6" +chrono-tz = "0.8" uuid = {version = "1.1.2", features = [ "v4", # Lets you generate random UUIDs "fast-rng", # Use a faster (but still sufficiently random) RNG @@ -30,5 +30,5 @@ thiserror="1.0.31" env_logger = "0.10.0" [dev-dependencies] -rstest = "0.14.0" -serial_test = "0.8.0" \ No newline at end of file +rstest = "0.18.2" +serial_test = "2.0.0" \ No newline at end of file diff --git a/py_maplib/tests/test_blank_nodes.py b/py_maplib/tests/test_blank_nodes.py index 99e41b8..20103db 100644 --- a/py_maplib/tests/test_blank_nodes.py +++ b/py_maplib/tests/test_blank_nodes.py @@ -38,6 +38,7 @@ def blank_person_mapping(): def test_simple_query_no_error(blank_person_mapping): + print("Hello!") df = blank_person_mapping.query(""" PREFIX foaf: @@ -46,6 +47,6 @@ def test_simple_query_no_error(blank_person_mapping): ?p foaf:lastName ?lastName . } ORDER BY ?p ?lastName """) - expected_df = pl.DataFrame({"p": ["_:person_l0_r0", "_:person_l0_r1"], - "lastName": ["Strong", "Brite"]}) + expected_df = pl.DataFrame({"p": ["_:person_l0_p0_r0", "_:person_l0_p0_r1"], + "lastName": ["Strong", "Brite"]}) assert_frame_equal(df, expected_df) diff --git a/representation/src/literals.rs b/representation/src/literals.rs index b1bc526..bb530b5 100644 --- a/representation/src/literals.rs +++ b/representation/src/literals.rs @@ -38,12 +38,12 @@ pub fn sparql_literal_to_any_value( } else if datatype == xsd::DATE_TIME { let dt_without_tz = value.parse::(); if let Ok(dt) = dt_without_tz { - AnyValue::Datetime(dt.timestamp_nanos(), TimeUnit::Nanoseconds, &None) + AnyValue::Datetime(dt.timestamp_nanos_opt().unwrap(), TimeUnit::Nanoseconds, &None) } else { let dt_without_tz = value.parse::>(); if let Ok(dt) = dt_without_tz { AnyValue::Datetime( - dt.naive_utc().timestamp_nanos(), + dt.naive_utc().timestamp_nanos_opt().unwrap(), TimeUnit::Nanoseconds, &None, ) diff --git a/triplestore/Cargo.toml b/triplestore/Cargo.toml index b9fb64e..ebf73d8 100644 --- a/triplestore/Cargo.toml +++ b/triplestore/Cargo.toml @@ -13,12 +13,12 @@ spargebra = "0.2.2" oxrdf = "0.1.0" polars = {version="0.32.1", features=["performant", "semi_anti_join","abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "horizontal_concat", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "diagonal_concat", "cross_join", "cum_agg"] } log="0.4.19" -rio_turtle = "0.7.1" -rio_api = "0.7.1" +rio_turtle = "0.8.4" +rio_api = "0.8.4" polars-utils = "0.32.1" polars-core = "0.32.1" chrono = "0.4" -chrono-tz = "0.6" +chrono-tz = "0.8" uuid = {version = "1.1.2", features = [ "v4", # Lets you generate random UUIDs "fast-rng", # Use a faster (but still sufficiently random) RNG @@ -27,5 +27,5 @@ thiserror="1.0.31" env_logger = "0.10.0" [dev-dependencies] -rstest = "0.14.0" -serial_test = "0.8.0" \ No newline at end of file +rstest = "0.18.2" +serial_test = "2.0.0" \ No newline at end of file diff --git a/triplestore/src/conversion.rs b/triplestore/src/conversion.rs index 2961874..91906ae 100644 --- a/triplestore/src/conversion.rs +++ b/triplestore/src/conversion.rs @@ -57,7 +57,7 @@ pub fn convert_to_string(series: &Series) -> Option { Some(series.cast(&DataType::Utf8).unwrap()) } -fn hack_format_timestamp_with_timezone(series: &Series, tz: &TimeZone) -> Series { +fn hack_format_timestamp_with_timezone(series: &Series, tz: &mut TimeZone) -> Series { let timezone_opt: Result = tz.parse(); if let Ok(timezone) = timezone_opt { let datetime_strings = Series::from_iter( @@ -70,17 +70,8 @@ fn hack_format_timestamp_with_timezone(series: &Series, tz: &TimeZone) -> Series format!( "{}", timezone - .with_ymd_and_hms( - x.year(), - x.month(), - x.day(), - x.hour(), - x.minute(), - x.second() - ) - .unwrap() - .with_nanosecond(x.nanosecond()) - .unwrap() + .ymd(x.year(), x.month(), x.day()) + .and_hms_nano(x.hour(), x.minute(), x.second(), x.nanosecond()) .format(XSD_DATETIME_WITH_TZ_FORMAT) ) }), diff --git a/triplestore/src/ntriples_write.rs b/triplestore/src/ntriples_write.rs index ce74e67..493c7b7 100644 --- a/triplestore/src/ntriples_write.rs +++ b/triplestore/src/ntriples_write.rs @@ -51,8 +51,8 @@ impl Triplestore { ) -> Result<(), TriplestoreError> { self.deduplicate()?; let n_threads = POOL.current_num_threads(); - let any_value_iter_pool = LowContentionPool::>::new(n_threads); - let write_buffer_pool = LowContentionPool::>::new(n_threads); + let mut any_value_iter_pool = LowContentionPool::>::new(n_threads); + let mut write_buffer_pool = LowContentionPool::>::new(n_threads); for (property, map) in &mut self.df_map { for (rdf_node_type, tt) in map { @@ -73,8 +73,8 @@ impl Triplestore { chunk_size, triple_type.clone(), n_threads, - &any_value_iter_pool, - &write_buffer_pool, + &mut any_value_iter_pool, + &mut write_buffer_pool, )?; } } else if let Some(paths) = &tt.df_paths { @@ -91,8 +91,8 @@ impl Triplestore { chunk_size, triple_type.clone(), n_threads, - &any_value_iter_pool, - &write_buffer_pool, + &mut any_value_iter_pool, + &mut write_buffer_pool, )?; } } @@ -104,14 +104,14 @@ impl Triplestore { fn write_ntriples_for_df( df: &DataFrame, - verb: &str, + verb: &String, dt: &Option, writer: &mut W, chunk_size: usize, triple_type: TripleType, n_threads: usize, - any_value_iter_pool: &LowContentionPool>, - write_buffer_pool: &LowContentionPool>, + any_value_iter_pool: &mut LowContentionPool>, + write_buffer_pool: &mut LowContentionPool>, ) -> Result<(), TriplestoreError> { let dt_str = if triple_type == TripleType::NonStringProperty { if let Some(nn) = dt { diff --git a/triplestore/src/sparql/lazy_graph_patterns/path.rs b/triplestore/src/sparql/lazy_graph_patterns/path.rs index d661d2e..5f2cfee 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/path.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/path.rs @@ -41,9 +41,9 @@ struct DFPathReturn { impl SubjectOrObject { fn flip(&self) -> SubjectOrObject { - match *self { - SubjectOrObject::Subject => SubjectOrObject::Object, - SubjectOrObject::Object => SubjectOrObject::Subject, + match self { + &SubjectOrObject::Subject => SubjectOrObject::Object, + &SubjectOrObject::Object => SubjectOrObject::Subject, } } } @@ -260,18 +260,19 @@ impl Triplestore { } } } - let df = if !dfs.is_empty() { - concat_df(dfs.as_slice()) + let df; + if !dfs.is_empty() { + df = concat_df(dfs.as_slice()) .unwrap() .unique(None, UniqueKeepStrategy::First, None) - .unwrap() + .unwrap(); } else { - DataFrame::new(vec![ + df = DataFrame::new(vec![ Series::new_empty("subject", &DataType::Categorical(None)), Series::new_empty("object", &DataType::Categorical(None)), ]) - .unwrap() - }; + .unwrap(); + } Ok(HashMap::from([(nns_name(nns), df)])) } } @@ -335,7 +336,7 @@ impl Triplestore { fn find_lookup(map: &HashMap) -> DataFrame { let mut all_values = vec![]; - for v in map.values() { + for (_k, v) in map { let mut obj = v.column("object").unwrap().unique().unwrap(); obj.rename("value"); let mut sub = v.column("subject").unwrap().unique().unwrap(); diff --git a/triplestore/src/sparql/lazy_graph_patterns/triple.rs b/triplestore/src/sparql/lazy_graph_patterns/triple.rs index 9fecc40..605a359 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/triple.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/triple.rs @@ -34,6 +34,7 @@ impl Triplestore { _ => Some(RDFNodeType::Literal(l.datatype().into_owned())), }, TermPattern::Variable(_) => None, + _ => None, }; let subject_rename = get_keep_rename_term_pattern(&triple_pattern.subject); let verb_rename = get_keep_rename_named_node_pattern(&triple_pattern.predicate); diff --git a/triplestore/src/sparql/query_context.rs b/triplestore/src/sparql/query_context.rs index 31d3b10..42e94e6 100644 --- a/triplestore/src/sparql/query_context.rs +++ b/triplestore/src/sparql/query_context.rs @@ -263,12 +263,6 @@ pub struct Context { pub path: Vec, } -impl Default for Context { - fn default() -> Self { - Self::new() - } -} - impl Context { pub fn in_scope(&self, other: &Context, partial_scope: bool) -> bool { let min_i = min(self.path.len(), other.path.len()); @@ -439,7 +433,10 @@ fn maintains_full_downward_scope(path_entry: &PathEntry) -> bool { impl Context { pub fn new() -> Context { - Context::default() + Context { + string_rep: "".to_string(), + path: vec![], + } } pub fn from_path(path: Vec) -> Context { diff --git a/triplestore/src/sparql/sparql_to_polars.rs b/triplestore/src/sparql/sparql_to_polars.rs index fc989f0..46e7143 100644 --- a/triplestore/src/sparql/sparql_to_polars.rs +++ b/triplestore/src/sparql/sparql_to_polars.rs @@ -1,10 +1,20 @@ use chrono::NaiveDate; use oxrdf::vocab::xsd; -use oxrdf::{Literal, NamedNode}; +use oxrdf::{Literal, NamedNode, Term}; use polars::export::chrono::{DateTime, NaiveDateTime, Utc}; -use polars::prelude::{LiteralValue, TimeUnit}; +use polars::prelude::{LiteralValue, NamedFrom, Series, TimeUnit}; use std::str::FromStr; +pub(crate) fn sparql_term_to_polars_literal_value(term: &Term) -> polars::prelude::LiteralValue { + match term { + Term::NamedNode(named_node) => sparql_named_node_to_polars_literal_value(named_node), + Term::Literal(lit) => sparql_literal_to_polars_literal_value(lit), + _ => { + panic!("Not supported") + } + } +} + pub(crate) fn sparql_named_node_to_polars_literal_value(named_node: &NamedNode) -> LiteralValue { LiteralValue::Utf8(named_node.as_str().to_string()) } @@ -70,3 +80,297 @@ pub(crate) fn sparql_literal_to_polars_literal_value(lit: &Literal) -> LiteralVa }; literal_value } + +fn polars_literal_values_to_series(literal_values: Vec, name: &str) -> Series { + let first_non_null_opt = literal_values + .iter() + .find(|x| &&LiteralValue::Null != x) + .cloned(); + let first_null_opt = literal_values + .iter() + .find(|x| &&LiteralValue::Null == x) + .cloned(); + if let (Some(first_non_null), None) = (&first_non_null_opt, &first_null_opt) { + match first_non_null { + LiteralValue::Boolean(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Boolean(b) = x { + b + } else { + panic!("Not possible") + } + }) + .collect::>(), + ), + LiteralValue::Utf8(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Utf8(u) = x { + u + } else { + panic!("Not possible") + } + }) + .collect::>(), + ), + LiteralValue::UInt32(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::UInt32(i) = x { + i + } else { + panic!("Not possible") + } + }) + .collect::>(), + ), + LiteralValue::UInt64(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::UInt64(i) = x { + i + } else { + panic!("Not possible") + } + }) + .collect::>(), + ), + LiteralValue::Int32(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Int32(i) = x { + i + } else { + panic!("Not possible") + } + }) + .collect::>(), + ), + LiteralValue::Int64(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Int64(i) = x { + i + } else { + panic!("Not possible") + } + }) + .collect::>(), + ), + LiteralValue::Float32(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Float32(f) = x { + f + } else { + panic!("Not possible") + } + }) + .collect::>(), + ), + LiteralValue::Float64(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Float64(f) = x { + Some(f) + } else { + panic!("Not possible") + } + }) + .collect::>>(), + ), + LiteralValue::Range { .. } => { + todo!() + } + LiteralValue::DateTime(_, t, None) => + //TODO: Assert time unit lik?? + { + Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::DateTime(n, t_prime, None) = x { + assert_eq!(t, &t_prime); + n + } else { + panic!("Not possible") + } + }) + .collect::>(), + ) + } + LiteralValue::Duration(_, _) => { + todo!() + } + LiteralValue::Series(_) => { + todo!() + } + _ => { + todo!() + } + } + } else if let (Some(first_non_null), Some(_)) = (&first_non_null_opt, &first_null_opt) { + match first_non_null { + LiteralValue::Boolean(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Boolean(b) = x { + Some(b) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::Utf8(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Utf8(u) = x { + Some(u) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::UInt32(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::UInt32(i) = x { + Some(i) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::UInt64(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::UInt64(i) = x { + Some(i) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::Int32(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Int32(i) = x { + Some(i) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::Int64(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Int64(i) = x { + Some(i) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::Float32(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Float32(f) = x { + Some(f) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::Float64(_) => Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::Float64(f) = x { + Some(f) + } else { + None + } + }) + .collect::>>(), + ), + LiteralValue::Range { .. } => { + todo!() + } + LiteralValue::DateTime(_, t, None) => + //TODO: Assert time unit lik?? + { + Series::new( + name, + literal_values + .into_iter() + .map(|x| { + if let LiteralValue::DateTime(n, t_prime, None) = x { + assert_eq!(t, &t_prime); + Some(n) + } else { + None + } + }) + .collect::>>(), + ) + } + LiteralValue::Duration(_, _) => { + todo!() + } + LiteralValue::Series(_) => { + todo!() + } + _ => { + todo!() + } + } + } else { + Series::new( + name, + literal_values + .iter() + .map(|_| None) + .collect::>>(), + ) + } +} From 6ebd55d00cbf17ab71c8f04d15b692d5afad09d6 Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:55:27 +0200 Subject: [PATCH 8/9] Fix some clippy warnings --- representation/src/literals.rs | 6 +++++- triplestore/src/ntriples_write.rs | 2 +- .../src/sparql/lazy_graph_patterns/path.rs | 21 +++++++++---------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/representation/src/literals.rs b/representation/src/literals.rs index bb530b5..194f4f1 100644 --- a/representation/src/literals.rs +++ b/representation/src/literals.rs @@ -38,7 +38,11 @@ pub fn sparql_literal_to_any_value( } else if datatype == xsd::DATE_TIME { let dt_without_tz = value.parse::(); if let Ok(dt) = dt_without_tz { - AnyValue::Datetime(dt.timestamp_nanos_opt().unwrap(), TimeUnit::Nanoseconds, &None) + AnyValue::Datetime( + dt.timestamp_nanos_opt().unwrap(), + TimeUnit::Nanoseconds, + &None, + ) } else { let dt_without_tz = value.parse::>(); if let Ok(dt) = dt_without_tz { diff --git a/triplestore/src/ntriples_write.rs b/triplestore/src/ntriples_write.rs index 493c7b7..67fec35 100644 --- a/triplestore/src/ntriples_write.rs +++ b/triplestore/src/ntriples_write.rs @@ -104,7 +104,7 @@ impl Triplestore { fn write_ntriples_for_df( df: &DataFrame, - verb: &String, + verb: &str, dt: &Option, writer: &mut W, chunk_size: usize, diff --git a/triplestore/src/sparql/lazy_graph_patterns/path.rs b/triplestore/src/sparql/lazy_graph_patterns/path.rs index 5f2cfee..d661d2e 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/path.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/path.rs @@ -41,9 +41,9 @@ struct DFPathReturn { impl SubjectOrObject { fn flip(&self) -> SubjectOrObject { - match self { - &SubjectOrObject::Subject => SubjectOrObject::Object, - &SubjectOrObject::Object => SubjectOrObject::Subject, + match *self { + SubjectOrObject::Subject => SubjectOrObject::Object, + SubjectOrObject::Object => SubjectOrObject::Subject, } } } @@ -260,19 +260,18 @@ impl Triplestore { } } } - let df; - if !dfs.is_empty() { - df = concat_df(dfs.as_slice()) + let df = if !dfs.is_empty() { + concat_df(dfs.as_slice()) .unwrap() .unique(None, UniqueKeepStrategy::First, None) - .unwrap(); + .unwrap() } else { - df = DataFrame::new(vec![ + DataFrame::new(vec![ Series::new_empty("subject", &DataType::Categorical(None)), Series::new_empty("object", &DataType::Categorical(None)), ]) - .unwrap(); - } + .unwrap() + }; Ok(HashMap::from([(nns_name(nns), df)])) } } @@ -336,7 +335,7 @@ impl Triplestore { fn find_lookup(map: &HashMap) -> DataFrame { let mut all_values = vec![]; - for (_k, v) in map { + for v in map.values() { let mut obj = v.column("object").unwrap().unique().unwrap(); obj.rename("value"); let mut sub = v.column("subject").unwrap().unique().unwrap(); From 985f993d51d9ea878c769ea79a5b0ecc309672e0 Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:21:26 +0200 Subject: [PATCH 9/9] Fix flaky test --- py_maplib/tests/test_blank_nodes.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/py_maplib/tests/test_blank_nodes.py b/py_maplib/tests/test_blank_nodes.py index 20103db..b6416d7 100644 --- a/py_maplib/tests/test_blank_nodes.py +++ b/py_maplib/tests/test_blank_nodes.py @@ -38,15 +38,17 @@ def blank_person_mapping(): def test_simple_query_no_error(blank_person_mapping): - print("Hello!") df = blank_person_mapping.query(""" PREFIX foaf: - SELECT ?p ?lastName WHERE { + SELECT ?firstName ?lastName WHERE { ?p a foaf:Person . ?p foaf:lastName ?lastName . - } ORDER BY ?p ?lastName + ?p foaf:firstName ?firstName . + } ORDER BY ?firstName ?lastName """) - expected_df = pl.DataFrame({"p": ["_:person_l0_p0_r0", "_:person_l0_p0_r1"], + expected_df = pl.DataFrame({"firstName": ["Ann", "Bob"], "lastName": ["Strong", "Brite"]}) + + assert_frame_equal(df, expected_df)