Skip to content

Commit bd76cbe

Browse files
authored
Merge pull request #6 from DataTreehouse/feature/new_multi
Feature/new multi
2 parents 882785f + d4a05a4 commit bd76cbe

38 files changed

+685
-632
lines changed

.github/workflows/python_release.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ permissions:
1313

1414
env:
1515
CARGO_TERM_COLOR: always
16-
RUST_TOOLCHAIN: nightly-2023-12-01
16+
RUST_TOOLCHAIN: nightly-2024-01-20
1717
MATURIN_VERSION: '1.2.3'
1818
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
1919

.github/workflows/python_tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ env:
1010
CARGO_TERM_COLOR: always
1111
RUST_LOG: debug
1212
MATURIN_VERSION: '1.2.3'
13-
RUST_TOOLCHAIN: nightly-2023-12-01
13+
RUST_TOOLCHAIN: nightly-2024-01-20
1414

1515
jobs:
1616
build_and_test:

.github/workflows/rust_tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ on:
99
env:
1010
CARGO_TERM_COLOR: always
1111
RUST_LOG: debug
12-
RUST_TOOLCHAIN: nightly-2023-12-01
12+
RUST_TOOLCHAIN: nightly-2024-01-20
1313

1414
jobs:
1515
build_and_test:

maplib/Cargo.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ rayon = "1.6.0"
1414
nom={version="7.1.3", features=["alloc"]}
1515
spargebra = { git = "https://github.com/DataTreehouse/spargebra"}
1616
oxrdf = "0.1.0"
17-
polars = {version="0.35.4", features=["semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "horizontal_concat", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant"] }
17+
polars = {version="0.37.0", features=["semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant"] }
1818
unic-char-range = "0.9.0"
1919
log="0.4.19"
2020
rio_turtle = "0.8.4"
2121
rio_api = "0.8.4"
22-
polars-utils = "0.35.4"
23-
polars-core = "0.35.4"
22+
polars-utils = "0.37.0"
23+
polars-core = "0.37.0"
2424
chrono = "0.4"
2525
chrono-tz = "0.8"
2626
uuid = {version = "1.1.2", features = [

maplib/src/mapping.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ use std::io::Write;
3131
use std::path::Path;
3232
use std::time::Instant;
3333
use triplestore::constants::{OBJECT_COL_NAME, SUBJECT_COL_NAME, VERB_COL_NAME};
34+
use triplestore::TripleFormat;
3435
use triplestore::{TriplesToAdd, Triplestore};
3536
use uuid::Uuid;
36-
use triplestore::TripleFormat;
3737

3838
pub struct Mapping {
3939
pub template_dataset: TemplateDataset,
@@ -392,9 +392,9 @@ impl Mapping {
392392
let mut fix_iris = vec![];
393393
for (coltype, colname) in coltypes_names {
394394
if coltype == &RDFNodeType::IRI {
395-
let nonnull = df.column(colname).unwrap().utf8().unwrap().first_non_null();
395+
let nonnull = df.column(colname).unwrap().str().unwrap().first_non_null();
396396
if let Some(i) = nonnull {
397-
let first_iri = df.column(colname).unwrap().utf8().unwrap().get(i).unwrap();
397+
let first_iri = df.column(colname).unwrap().str().unwrap().get(i).unwrap();
398398
{
399399
if !first_iri.starts_with('<') {
400400
fix_iris.push(colname);

maplib/src/mapping/constant_terms.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub fn constant_to_expr(
2121
let (expr, ptype, rdf_node_type) = match constant_term {
2222
ConstantTerm::Constant(c) => match c {
2323
ConstantLiteral::Iri(iri) => (
24-
Expr::Literal(LiteralValue::Utf8(iri.as_str().to_string())),
24+
Expr::Literal(LiteralValue::String(iri.as_str().to_string())),
2525
PType::Basic(NamedNode::new_unchecked(OTTR_IRI), "ottr:IRI".to_string()),
2626
RDFNodeType::IRI,
2727
),
@@ -33,9 +33,9 @@ pub fn constant_to_expr(
3333
let language = lit.language.as_deref();
3434
let (mut any, dt) = sparql_literal_to_any_value(&lit.value, language, &dt);
3535
//Workaround for owned utf 8..
36-
let value_series = if let AnyValue::Utf8Owned(s) = any {
37-
any = AnyValue::Utf8(&s);
38-
let mut value_series = Series::new_empty("literal", &DataType::Utf8);
36+
let value_series = if let AnyValue::StringOwned(s) = any {
37+
any = AnyValue::String(&s);
38+
let mut value_series = Series::new_empty("literal", &DataType::String);
3939
value_series = value_series.extend_constant(any, 1).unwrap();
4040
value_series
4141
} else {
@@ -129,7 +129,7 @@ pub fn constant_blank_node_to_series(
129129
let any_value_vec: Vec<_> = (blank_node_counter..(blank_node_counter + n_rows))
130130
.into_par_iter()
131131
.map(|i| {
132-
AnyValue::Utf8Owned(
132+
AnyValue::StringOwned(
133133
format!("_:{}_l{}_p{}_r{}", bl.as_str(), layer, pattern_num, i).into(),
134134
)
135135
})
@@ -139,7 +139,7 @@ pub fn constant_blank_node_to_series(
139139
Series::from_any_values_and_dtype(
140140
BLANK_NODE_SERIES_NAME,
141141
any_value_vec.as_slice(),
142-
&DataType::Utf8,
142+
&DataType::String,
143143
false,
144144
)
145145
.unwrap(),

maplib/src/mapping/default.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,14 @@ impl Mapping {
4242
if let DataType::List(..) = dt {
4343
todo!()
4444
}
45-
if dt != DataType::Utf8 {
45+
if dt != DataType::String {
4646
warn!(
4747
"Primary key column {} is not String but instead {}. Will be cast",
4848
&pk_col, dt
4949
);
5050
df = df
5151
.lazy()
52-
.with_column(col(c).cast(DataType::Utf8))
52+
.with_column(col(c).cast(DataType::String))
5353
.collect()
5454
.unwrap();
5555
}
@@ -71,14 +71,14 @@ impl Mapping {
7171
todo!()
7272
}
7373

74-
if dt != DataType::Utf8 {
74+
if dt != DataType::String {
7575
warn!(
7676
"Foreign key column {} is not String but instead {}. Will be cast",
7777
&c, dt
7878
);
7979
df = df
8080
.lazy()
81-
.with_column(col(c).cast(DataType::Utf8))
81+
.with_column(col(c).cast(DataType::String))
8282
.collect()
8383
.unwrap();
8484
}

maplib/src/resolver.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,22 @@ use crate::ast::{
22
Annotation, Argument, ConstantLiteral, ConstantTerm, DefaultValue, Directive, Instance, PType,
33
Parameter, Signature, Statement, StottrDocument, StottrLiteral, StottrTerm, Template,
44
};
5-
use crate::constants::{OTTR_PREFIX, OTTR_PREFIX_IRI, RDFS_PREFIX, RDFS_PREFIX_IRI, RDF_PREFIX, RDF_PREFIX_IRI, XSD_PREFIX, XSD_PREFIX_IRI, OTTR_IRI};
5+
use crate::constants::{
6+
OTTR_IRI, OTTR_PREFIX, OTTR_PREFIX_IRI, RDFS_PREFIX, RDFS_PREFIX_IRI, RDF_PREFIX,
7+
RDF_PREFIX_IRI, XSD_PREFIX, XSD_PREFIX_IRI,
8+
};
69
use crate::parsing::parsing_ast::{
710
ResolvesToNamedNode, UnresolvedAnnotation, UnresolvedArgument, UnresolvedBaseTemplate,
811
UnresolvedConstantLiteral, UnresolvedConstantTerm, UnresolvedDefaultValue, UnresolvedInstance,
912
UnresolvedPType, UnresolvedParameter, UnresolvedSignature, UnresolvedStatement,
1013
UnresolvedStottrDocument, UnresolvedStottrLiteral, UnresolvedStottrTerm, UnresolvedTemplate,
1114
};
1215
use log::warn;
16+
use oxrdf::vocab::xsd;
1317
use oxrdf::{IriParseError, NamedNode};
1418
use std::collections::HashMap;
1519
use std::error::Error;
1620
use std::fmt::{Debug, Display, Formatter};
17-
use oxrdf::vocab::xsd;
1821

1922
#[derive(Debug)]
2023
pub enum ResolutionError {
@@ -285,7 +288,7 @@ fn resolve_ptype(
285288
resolved = NamedNode::new_unchecked(OTTR_IRI);
286289
}
287290
PType::Basic(resolved, get_name(b))
288-
},
291+
}
289292
UnresolvedPType::Lub(l) => PType::Lub(Box::new(resolve_ptype(l, prefix_map)?)),
290293
UnresolvedPType::List(l) => PType::List(Box::new(resolve_ptype(l, prefix_map)?)),
291294
UnresolvedPType::NEList(l) => PType::NEList(Box::new(resolve_ptype(l, prefix_map)?)),

parquet_io/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ version = "0.5.0"
44
edition = "2021"
55

66
[dependencies]
7-
polars = {version="0.35.4", features=["parquet"] }
8-
polars-core = "0.35.4"
7+
polars = {version="0.37.0", features=["parquet"] }
8+
polars-core = "0.37.0"
99
thiserror="1.0.31"
1010
uuid = {version = "1.1.2", features = [
1111
"v4", # Lets you generate random UUIDs

parquet_io/src/lib.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,14 @@ pub fn write_parquet(df: &mut DataFrame, file_path: &Path) -> Result<(), Parquet
5252
Ok(())
5353
}
5454

55-
pub fn read_parquet(file_path: &String) -> Result<LazyFrame, ParquetIOError> {
55+
pub fn scan_parquet(file_path: &String) -> Result<LazyFrame, ParquetIOError> {
5656
LazyFrame::scan_parquet(
5757
Path::new(file_path),
5858
ScanArgsParquet {
5959
n_rows: None,
6060
cache: false,
6161
parallel: ParallelStrategy::Auto,
6262
rechunk: true,
63-
row_count: None,
6463
low_memory: false,
6564
..Default::default()
6665
},

py_maplib/Cargo.toml

+4-2
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ edition = "2021"
99
pyo3 = {version = "0.19.2", features = ["extension-module"] }
1010
maplib = {path="../maplib"}
1111
triplestore = {path="../triplestore"}
12+
#representation = {path="../../representation"}
1213
representation = { git = "https://github.com/DataTreehouse/representation"}
1314
shacl = {path="../shacl"}
1415
oxrdf = {version="0.1.7"}
16+
#pydf_io = {path = "../../pydf_io"}
1517
pydf_io = { git = "https://github.com/DataTreehouse/pydf_io"}
1618
thiserror="1.0.31"
17-
polars-lazy = "0.35.4"
18-
polars-core = {version="0.35.4", features=["dtype-array", "dtype-categorical", "dtype-date", "dtype-datetime",
19+
polars-lazy = "0.37.0"
20+
polars-core = {version="0.37.0", features=["dtype-array", "dtype-categorical", "dtype-date", "dtype-datetime",
1921
"dtype-decimal", "dtype-duration", "dtype-i8", "dtype-i16", "dtype-struct", "dtype-time", "dtype-u8", "dtype-u16"]}
2022
log ="0.4.19"
2123

py_maplib/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "maplib"
33
description = "Dataframe-based interactive knowledge graph construction using stOTTR templates"
4-
dependencies = ["polars==0.20.2", "pyarrow>=7.0.0"]
4+
dependencies = ["polars==0.20.13", "pyarrow>=7.0.0"]
55
readme = "README.md"
66
authors = [{name = "Magnus Bakken", email = "[email protected]" }]
77
license = {file = "LICENSE"}

py_maplib/python/maplib/_maplib.pyi

+11-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ from pathlib import Path
22
from typing import Union, List, Dict
33

44
from polars import DataFrame
5-
from .semantic_dataframe import SemanticDataFrame
65

76

87
class ValidationReport:
@@ -14,7 +13,7 @@ class ValidationReport:
1413
conforms: True if no violations were found.
1514
"""
1615

17-
def __init__(self, df: SemanticDataFrame, conforms: bool) -> ValidationReport:
16+
def __init__(self, df: DataFrame, conforms: bool) -> ValidationReport:
1817
self.df = df
1918
self.conforms = conforms
2019
...
@@ -82,24 +81,29 @@ class Mapping:
8281
:return: The generated template
8382
"""
8483

85-
def query(self, query: str, parameters: Dict[str, DataFrame] = None) -> Union[
86-
SemanticDataFrame, List[SemanticDataFrame], None]:
84+
def query(self, query: str, parameters: Dict[str, DataFrame] = None, include_datatypes=False, multi_as_strings=True) -> Union[
85+
DataFrame,
86+
Dict[str, Union[DataFrame, Dict[str, str]]],
87+
List[Union[DataFrame, Dict[str, Union[DataFrame, Dict[str, str]]]]],
88+
None]:
8789
"""
8890
Query the contained knowledge graph using SPARQL
8991
Currently, SELECT, CONSTRUCT and INSERT are supported.
9092
Usage:
9193
92-
>>> res = mapping.query('''
94+
>>> df = mapping.query('''
9395
... PREFIX ex:<http://example.net/ns#>
9496
... SELECT ?obj1 ?obj2 WHERE {
9597
... ?obj1 ex:hasObj ?obj2
9698
... }''')
97-
... print(res.df)
98-
... print(res.types)
99+
... print(df)
99100
100101
:param query: The SPARQL query string
101102
:param parameters: PVALUES Parameters, a DataFrame containing the value bindings in the custom PVALUES construction.
103+
:param multi_as_strings: Columns with multiple types are by default converted to their string representations, set to False to get the native Polars types in a struct.
104+
:param include_datatypes: Datatypes are not returned by default, set to true to return a dict with the solution mappings and the datatypes.
102105
:return: DataFrame (Select), list of DataFrames (Construct) containing results, or None for Insert-queries
106+
103107
"""
104108

105109
def insert(self, query: str, parameters: Dict[str, DataFrame] = None, transient: bool = False):

py_maplib/python/maplib/semantic_dataframe.py

-42
This file was deleted.

0 commit comments

Comments
 (0)