Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/new multi #6

Merged
merged 11 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ permissions:

env:
CARGO_TERM_COLOR: always
RUST_TOOLCHAIN: nightly-2023-12-01
RUST_TOOLCHAIN: nightly-2024-01-20
MATURIN_VERSION: '1.2.3'
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ env:
CARGO_TERM_COLOR: always
RUST_LOG: debug
MATURIN_VERSION: '1.2.3'
RUST_TOOLCHAIN: nightly-2023-12-01
RUST_TOOLCHAIN: nightly-2024-01-20

jobs:
build_and_test:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/rust_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
env:
CARGO_TERM_COLOR: always
RUST_LOG: debug
RUST_TOOLCHAIN: nightly-2023-12-01
RUST_TOOLCHAIN: nightly-2024-01-20

jobs:
build_and_test:
Expand Down
6 changes: 3 additions & 3 deletions maplib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ rayon = "1.6.0"
nom={version="7.1.3", features=["alloc"]}
spargebra = { git = "https://github.com/DataTreehouse/spargebra"}
oxrdf = "0.1.0"
polars = {version="0.35.4", features=["semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "horizontal_concat", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant"] }
polars = {version="0.37.0", features=["semi_anti_join", "abs", "round_series", "lazy", "concat_str", "is_in", "dtype-full", "strings", "rows", "timezones", "polars-time", "temporal", "list_eval", "partition_by", "parquet", "cse", "nightly", "performant"] }
unic-char-range = "0.9.0"
log="0.4.19"
rio_turtle = "0.8.4"
rio_api = "0.8.4"
polars-utils = "0.35.4"
polars-core = "0.35.4"
polars-utils = "0.37.0"
polars-core = "0.37.0"
chrono = "0.4"
chrono-tz = "0.8"
uuid = {version = "1.1.2", features = [
Expand Down
6 changes: 3 additions & 3 deletions maplib/src/mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ use std::io::Write;
use std::path::Path;
use std::time::Instant;
use triplestore::constants::{OBJECT_COL_NAME, SUBJECT_COL_NAME, VERB_COL_NAME};
use triplestore::TripleFormat;
use triplestore::{TriplesToAdd, Triplestore};
use uuid::Uuid;
use triplestore::TripleFormat;

pub struct Mapping {
pub template_dataset: TemplateDataset,
Expand Down Expand Up @@ -392,9 +392,9 @@ impl Mapping {
let mut fix_iris = vec![];
for (coltype, colname) in coltypes_names {
if coltype == &RDFNodeType::IRI {
let nonnull = df.column(colname).unwrap().utf8().unwrap().first_non_null();
let nonnull = df.column(colname).unwrap().str().unwrap().first_non_null();
if let Some(i) = nonnull {
let first_iri = df.column(colname).unwrap().utf8().unwrap().get(i).unwrap();
let first_iri = df.column(colname).unwrap().str().unwrap().get(i).unwrap();
{
if !first_iri.starts_with('<') {
fix_iris.push(colname);
Expand Down
12 changes: 6 additions & 6 deletions maplib/src/mapping/constant_terms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub fn constant_to_expr(
let (expr, ptype, rdf_node_type) = match constant_term {
ConstantTerm::Constant(c) => match c {
ConstantLiteral::Iri(iri) => (
Expr::Literal(LiteralValue::Utf8(iri.as_str().to_string())),
Expr::Literal(LiteralValue::String(iri.as_str().to_string())),
PType::Basic(NamedNode::new_unchecked(OTTR_IRI), "ottr:IRI".to_string()),
RDFNodeType::IRI,
),
Expand All @@ -33,9 +33,9 @@ pub fn constant_to_expr(
let language = lit.language.as_deref();
let (mut any, dt) = sparql_literal_to_any_value(&lit.value, language, &dt);
//Workaround for owned utf 8..
let value_series = if let AnyValue::Utf8Owned(s) = any {
any = AnyValue::Utf8(&s);
let mut value_series = Series::new_empty("literal", &DataType::Utf8);
let value_series = if let AnyValue::StringOwned(s) = any {
any = AnyValue::String(&s);
let mut value_series = Series::new_empty("literal", &DataType::String);
value_series = value_series.extend_constant(any, 1).unwrap();
value_series
} else {
Expand Down Expand Up @@ -129,7 +129,7 @@ pub fn constant_blank_node_to_series(
let any_value_vec: Vec<_> = (blank_node_counter..(blank_node_counter + n_rows))
.into_par_iter()
.map(|i| {
AnyValue::Utf8Owned(
AnyValue::StringOwned(
format!("_:{}_l{}_p{}_r{}", bl.as_str(), layer, pattern_num, i).into(),
)
})
Expand All @@ -139,7 +139,7 @@ pub fn constant_blank_node_to_series(
Series::from_any_values_and_dtype(
BLANK_NODE_SERIES_NAME,
any_value_vec.as_slice(),
&DataType::Utf8,
&DataType::String,
false,
)
.unwrap(),
Expand Down
8 changes: 4 additions & 4 deletions maplib/src/mapping/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ impl Mapping {
if let DataType::List(..) = dt {
todo!()
}
if dt != DataType::Utf8 {
if dt != DataType::String {
warn!(
"Primary key column {} is not String but instead {}. Will be cast",
&pk_col, dt
);
df = df
.lazy()
.with_column(col(c).cast(DataType::Utf8))
.with_column(col(c).cast(DataType::String))
.collect()
.unwrap();
}
Expand All @@ -71,14 +71,14 @@ impl Mapping {
todo!()
}

if dt != DataType::Utf8 {
if dt != DataType::String {
warn!(
"Foreign key column {} is not String but instead {}. Will be cast",
&c, dt
);
df = df
.lazy()
.with_column(col(c).cast(DataType::Utf8))
.with_column(col(c).cast(DataType::String))
.collect()
.unwrap();
}
Expand Down
9 changes: 6 additions & 3 deletions maplib/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,22 @@ use crate::ast::{
Annotation, Argument, ConstantLiteral, ConstantTerm, DefaultValue, Directive, Instance, PType,
Parameter, Signature, Statement, StottrDocument, StottrLiteral, StottrTerm, Template,
};
use crate::constants::{OTTR_PREFIX, OTTR_PREFIX_IRI, RDFS_PREFIX, RDFS_PREFIX_IRI, RDF_PREFIX, RDF_PREFIX_IRI, XSD_PREFIX, XSD_PREFIX_IRI, OTTR_IRI};
use crate::constants::{
OTTR_IRI, OTTR_PREFIX, OTTR_PREFIX_IRI, RDFS_PREFIX, RDFS_PREFIX_IRI, RDF_PREFIX,
RDF_PREFIX_IRI, XSD_PREFIX, XSD_PREFIX_IRI,
};
use crate::parsing::parsing_ast::{
ResolvesToNamedNode, UnresolvedAnnotation, UnresolvedArgument, UnresolvedBaseTemplate,
UnresolvedConstantLiteral, UnresolvedConstantTerm, UnresolvedDefaultValue, UnresolvedInstance,
UnresolvedPType, UnresolvedParameter, UnresolvedSignature, UnresolvedStatement,
UnresolvedStottrDocument, UnresolvedStottrLiteral, UnresolvedStottrTerm, UnresolvedTemplate,
};
use log::warn;
use oxrdf::vocab::xsd;
use oxrdf::{IriParseError, NamedNode};
use std::collections::HashMap;
use std::error::Error;
use std::fmt::{Debug, Display, Formatter};
use oxrdf::vocab::xsd;

#[derive(Debug)]
pub enum ResolutionError {
Expand Down Expand Up @@ -285,7 +288,7 @@ fn resolve_ptype(
resolved = NamedNode::new_unchecked(OTTR_IRI);
}
PType::Basic(resolved, get_name(b))
},
}
UnresolvedPType::Lub(l) => PType::Lub(Box::new(resolve_ptype(l, prefix_map)?)),
UnresolvedPType::List(l) => PType::List(Box::new(resolve_ptype(l, prefix_map)?)),
UnresolvedPType::NEList(l) => PType::NEList(Box::new(resolve_ptype(l, prefix_map)?)),
Expand Down
4 changes: 2 additions & 2 deletions parquet_io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ version = "0.5.0"
edition = "2021"

[dependencies]
polars = {version="0.35.4", features=["parquet"] }
polars-core = "0.35.4"
polars = {version="0.37.0", features=["parquet"] }
polars-core = "0.37.0"
thiserror="1.0.31"
uuid = {version = "1.1.2", features = [
"v4", # Lets you generate random UUIDs
Expand Down
3 changes: 1 addition & 2 deletions parquet_io/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,14 @@ pub fn write_parquet(df: &mut DataFrame, file_path: &Path) -> Result<(), Parquet
Ok(())
}

pub fn read_parquet(file_path: &String) -> Result<LazyFrame, ParquetIOError> {
pub fn scan_parquet(file_path: &String) -> Result<LazyFrame, ParquetIOError> {
LazyFrame::scan_parquet(
Path::new(file_path),
ScanArgsParquet {
n_rows: None,
cache: false,
parallel: ParallelStrategy::Auto,
rechunk: true,
row_count: None,
low_memory: false,
..Default::default()
},
Expand Down
6 changes: 4 additions & 2 deletions py_maplib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ edition = "2021"
pyo3 = {version = "0.19.2", features = ["extension-module"] }
maplib = {path="../maplib"}
triplestore = {path="../triplestore"}
#representation = {path="../../representation"}
representation = { git = "https://github.com/DataTreehouse/representation"}
shacl = {path="../shacl"}
oxrdf = {version="0.1.7"}
#pydf_io = {path = "../../pydf_io"}
pydf_io = { git = "https://github.com/DataTreehouse/pydf_io"}
thiserror="1.0.31"
polars-lazy = "0.35.4"
polars-core = {version="0.35.4", features=["dtype-array", "dtype-categorical", "dtype-date", "dtype-datetime",
polars-lazy = "0.37.0"
polars-core = {version="0.37.0", features=["dtype-array", "dtype-categorical", "dtype-date", "dtype-datetime",
"dtype-decimal", "dtype-duration", "dtype-i8", "dtype-i16", "dtype-struct", "dtype-time", "dtype-u8", "dtype-u16"]}
log ="0.4.19"

Expand Down
2 changes: 1 addition & 1 deletion py_maplib/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "maplib"
description = "Dataframe-based interactive knowledge graph construction using stOTTR templates"
dependencies = ["polars==0.20.2", "pyarrow>=7.0.0"]
dependencies = ["polars==0.20.13", "pyarrow>=7.0.0"]
readme = "README.md"
authors = [{name = "Magnus Bakken", email = "[email protected]" }]
license = {file = "LICENSE"}
Expand Down
18 changes: 11 additions & 7 deletions py_maplib/python/maplib/_maplib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ from pathlib import Path
from typing import Union, List, Dict

from polars import DataFrame
from .semantic_dataframe import SemanticDataFrame


class ValidationReport:
Expand All @@ -14,7 +13,7 @@ class ValidationReport:
conforms: True if no violations were found.
"""

def __init__(self, df: SemanticDataFrame, conforms: bool) -> ValidationReport:
def __init__(self, df: DataFrame, conforms: bool) -> ValidationReport:
self.df = df
self.conforms = conforms
...
Expand Down Expand Up @@ -82,24 +81,29 @@ class Mapping:
:return: The generated template
"""

def query(self, query: str, parameters: Dict[str, DataFrame] = None) -> Union[
SemanticDataFrame, List[SemanticDataFrame], None]:
def query(self, query: str, parameters: Dict[str, DataFrame] = None, include_datatypes=False, multi_as_strings=True) -> Union[
DataFrame,
Dict[str, Union[DataFrame, Dict[str, str]]],
List[Union[DataFrame, Dict[str, Union[DataFrame, Dict[str, str]]]]],
None]:
"""
Query the contained knowledge graph using SPARQL
Currently, SELECT, CONSTRUCT and INSERT are supported.
Usage:

>>> res = mapping.query('''
>>> df = mapping.query('''
... PREFIX ex:<http://example.net/ns#>
... SELECT ?obj1 ?obj2 WHERE {
... ?obj1 ex:hasObj ?obj2
... }''')
... print(res.df)
... print(res.types)
... print(df)

:param query: The SPARQL query string
:param parameters: PVALUES Parameters, a DataFrame containing the value bindings in the custom PVALUES construction.
:param multi_as_strings: Columns with multiple types are by default converted to their string representations, set to False to get the native Polars types in a struct.
:param include_datatypes: Datatypes are not returned by default, set to true to return a dict with the solution mappings and the datatypes.
:return: DataFrame (Select), list of DataFrames (Construct) containing results, or None for Insert-queries

"""

def insert(self, query: str, parameters: Dict[str, DataFrame] = None, transient: bool = False):
Expand Down
42 changes: 0 additions & 42 deletions py_maplib/python/maplib/semantic_dataframe.py

This file was deleted.

Loading
Loading