diff --git a/lib/maplib/src/model/errors.rs b/lib/maplib/src/model/errors.rs index 58a00450..ac2173d6 100644 --- a/lib/maplib/src/model/errors.rs +++ b/lib/maplib/src/model/errors.rs @@ -1,7 +1,6 @@ use oxrdf::IriParseError; use polars::prelude::{DataFrame, DataType}; use representation::errors::RepresentationError; -use std::fmt::{Display, Formatter}; use templates::ast::{ConstantTermOrList, PType}; use templates::dataset::errors::TemplateError; use templates::MappingColumnType; @@ -10,141 +9,54 @@ use triplestore::errors::TriplestoreError; #[derive(Error, Debug)] pub enum MappingError { + #[error("Invalid template name {}", .0)] InvalidTemplateNameError(IriParseError), + #[error("Could not find template: {}", .0)] TemplateNotFound(String), + #[error("Column {} which is non-optional has null values for keys: {:?}", .0, .1)] NonOptionalColumnHasNull(String, DataFrame), + #[error("Expected column {} is missing", .0)] MissingParameterColumn(String), + #[error("Unexpected columns: {:?}", .0)] ContainsIrrelevantColumns(Vec), + #[error("Could not infer OTTR type for column {} with polars datatype {:?}", .0, .1)] CouldNotInferOTTRDatatypeForColumn(String, DataType), + #[error("Column {} had datatype {:?} which was incompatible with the OTTR datatype {:?}, which expects {:?}", .0, .1, .2, .3)] ColumnDataTypeMismatch(String, DataType, PType, Option), + #[error("Column {} had datatype {:?} which was incompatible with the OTTR datatype {:?}", .0, .1, .2)] + IncompatibleColumnDataType(String, DataType, PType), + #[error("Expected datatype: {:?}, but got: {:?}", .0, .1)] DefaultDataTypeMismatch(MappingColumnType, MappingColumnType), + #[error("Predicate constant {} is not valid, must be an IRI, e.g. prefix:predicate", .0)] InvalidPredicateConstant(ConstantTermOrList), + #[error("Found value {} with unsupported OTTR datatype {}", .0, .1)] PTypeNotSupported(String, PType), + #[error("Unknown time zone {}", .0)] UnknownTimeZoneError(String), + #[error("Could not find variable {}, is the OTTR template invalid?", .0)] UnknownVariableError(String), + #[error("Expected constant term {:?} to have data type {} but was {}", .0, .1, .2)] ConstantDoesNotMatchDataType(ConstantTermOrList, PType, PType), + #[error("Constant term {:?} has inconsistent data types {} and {}", .0, .1, .2)] ConstantListHasInconsistentPType(ConstantTermOrList, PType, PType), + #[error("Template name {} inferred from prefix could not be found", .0)] NoTemplateForTemplateNameFromPrefix(String), + #[error("Missing DataFrame argument, but signature is not empty")] MissingDataFrameForNonEmptySignature, + #[error("{}", .0)] TooDeeplyNestedError(String), + #[error("{}", .0)] DatatypeInferenceError(RepresentationError), + #[error("Column {} has at least {} invalid IRIs. Examples: {:?}", .0, .1, .2)] InvalidIRIError(String, usize, String), + #[error("Template error: {}", .0)] TemplateError(#[from] TemplateError), + #[error("Error storing model results in triplestore: {}", .0)] TriplestoreError(#[from] TriplestoreError), + #[error("IRI parse error: {}", .0)] IriParseError(IriParseError), + #[error("Reached maximum templates recursion limit {} ({})", .0, .1)] MaximumRecursionLimit(usize, String), + #[error("Literal parse error: {}", .0)] LiteralParseError(String), } - -impl Display for MappingError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - MappingError::TemplateNotFound(t) => { - write!(f, "Could not find template: {t}") - } - MappingError::NonOptionalColumnHasNull(col, nullkey) => { - write!( - f, - "Column {col} which is non-optional has null values for keys: {nullkey}" - ) - } - MappingError::MissingParameterColumn(c) => { - write!(f, "Expected column {c} is missing") - } - MappingError::ContainsIrrelevantColumns(irr) => { - write!(f, "Unexpected columns: {}", irr.join(",")) - } - MappingError::CouldNotInferOTTRDatatypeForColumn(col, dt) => { - write!( - f, - "Could not infer OTTR type for column {col} with polars datatype {dt}" - ) - } - MappingError::ColumnDataTypeMismatch(col, dt, ptype, expected) => { - if let Some(expected) = expected { - write!( - f, - "Column {col} had datatype {dt} which was incompatible with the OTTR datatype {ptype}, which expects {expected}" - ) - } else { - write!( - f, - "Column {col} had datatype {dt} which was incompatible with the OTTR datatype {ptype}" - ) - } - } - MappingError::PTypeNotSupported(name, ptype) => { - write!( - f, - "Found value {name} with unsupported OTTR datatype {ptype}" - ) - } - MappingError::UnknownTimeZoneError(tz) => { - write!(f, "Unknown time zone {tz}") - } - MappingError::UnknownVariableError(v) => { - write!( - f, - "Could not find variable {v}, is the OTTR template invalid?" - ) - } - MappingError::ConstantDoesNotMatchDataType(constant_term, expected, actual) => { - write!( - f, - "Expected constant term {constant_term:?} to have data type {expected} but was {actual}" - ) - } - MappingError::ConstantListHasInconsistentPType(constant_term, prev, next) => { - write!( - f, - "Constant term {constant_term:?} has inconsistent data types {prev} and {next}" - ) - } - MappingError::InvalidTemplateNameError(t) => { - write!(f, "Invalid template name {t}") - } - MappingError::NoTemplateForTemplateNameFromPrefix(prefix) => { - write!( - f, - "Template name {prefix} inferred from prefix could not be found" - ) - } - MappingError::InvalidPredicateConstant(constant_term) => { - write!( - f, - "Predicate constant {constant_term} is not valid, must be an IRI, e.g. prefix:predicate", - ) - } - MappingError::MissingDataFrameForNonEmptySignature => { - write!(f, "Missing DataFrame argument, but signature is not empty") - } - MappingError::TooDeeplyNestedError(s) => { - write!(f, "{s}") - } - MappingError::DatatypeInferenceError(d) => { - write!(f, "{d}") - } - MappingError::DefaultDataTypeMismatch(expected, actual) => { - write!(f, "Default value data type {actual:?} does not correspond to data type provided: {expected:?}") - } - MappingError::InvalidIRIError(colname, n_errors, examples) => { - write!(f, "Found at least {n_errors} invalid IRIs for column {colname}, examples: {examples}") - } - MappingError::TemplateError(x) => { - write!(f, "Template error: {x}") - } - MappingError::TriplestoreError(x) => { - write!(f, "Error storing model results in triplestore: {x}") - } - MappingError::IriParseError(x) => { - write!(f, "IRI parse error: {x}") - } - MappingError::MaximumRecursionLimit(x, s) => { - write!(f, "Reached maximum templates recursion limit {x} ({s})") - } - MappingError::LiteralParseError(x) => { - write!(f, "Literal parse error: {x}") - } - } - } -} diff --git a/lib/maplib/src/model/expansion/validation.rs b/lib/maplib/src/model/expansion/validation.rs index 3de48b21..0fee18ec 100644 --- a/lib/maplib/src/model/expansion/validation.rs +++ b/lib/maplib/src/model/expansion/validation.rs @@ -361,11 +361,10 @@ fn infer_validate_mapping_column_type_from_ptype( }; Ok((MappingColumnType::Nested(Box::new(res)), new_expr)) } else { - Err(MappingError::ColumnDataTypeMismatch( + Err(MappingError::IncompatibleColumnDataType( column_name.to_string(), datatype.clone(), ptype.clone(), - None, )) } } diff --git a/lib/query_processing/src/expressions/functions.rs b/lib/query_processing/src/expressions/functions.rs index fcf56ca2..d1c1059d 100644 --- a/lib/query_processing/src/expressions/functions.rs +++ b/lib/query_processing/src/expressions/functions.rs @@ -1,4 +1,6 @@ mod abs_; +mod cast_iri_to_xsd_literal; +mod cast_literal_; mod ceil_; mod concat_; mod create_regex_expr; @@ -11,14 +13,17 @@ mod encode_for_uri; mod eval_expression_to_string; mod eval_uuid_namespace; mod floor_; +pub mod func_expression; mod hours_; mod iri; mod is_blank_; mod is_iri; mod is_literal; +mod keep_field_; mod lang_; mod lang_matches; mod lower_upper_substr; +mod maybe_add_regex_feature_flags; mod md5_; mod minutes_; mod month_; @@ -31,558 +36,16 @@ mod sparql_regex; mod sparql_uuid; mod starts_ends_contains; mod str_; +mod str_after; +mod str_before; mod str_before_or_after; mod str_dt; +mod str_function; mod str_lang; mod str_len; +mod str_starts_ends_contains_; mod struuid; mod struuid_v5; mod uuid_v5; +mod xsd_cast_literal; mod year_; - -use crate::errors::QueryProcessingError; -use crate::expressions::functions::abs_::abs_; -use crate::expressions::functions::ceil_::ceil_; -use crate::expressions::functions::concat_::concat_; -use crate::expressions::functions::custom_function::custom_; -use crate::expressions::functions::datatype_::datatype_; -use crate::expressions::functions::day_::day_; -use crate::expressions::functions::encode_for_uri::encode_for_uri; -use crate::expressions::functions::floor_::floor_; -use crate::expressions::functions::hours_::hours_; -use crate::expressions::functions::iri::iri; -use crate::expressions::functions::is_blank_::is_blank_; -use crate::expressions::functions::is_iri::is_iri; -use crate::expressions::functions::is_literal::is_literal; -use crate::expressions::functions::lang_::lang_; -use crate::expressions::functions::lang_matches::lang_matches; -use crate::expressions::functions::lower_upper_substr::lower_upper_substr; -use crate::expressions::functions::md5_::md5_; -use crate::expressions::functions::minutes_::minutes_; -use crate::expressions::functions::month_::month_; -use crate::expressions::functions::now_::now_; -use crate::expressions::functions::replace::sparql_replace; -use crate::expressions::functions::round_::round_; -use crate::expressions::functions::seconds_::seconds_; -use crate::expressions::functions::sha1_::sha1_; -use crate::expressions::functions::sparql_regex::sparql_regex; -use crate::expressions::functions::sparql_uuid::uuid; -use crate::expressions::functions::starts_ends_contains::starts_ends_contains; -use crate::expressions::functions::str_::str_; -use crate::expressions::functions::str_before_or_after::str_before_or_after; -use crate::expressions::functions::str_dt::str_dt; -use crate::expressions::functions::str_lang::str_lang; -use crate::expressions::functions::str_len::str_len; -use crate::expressions::functions::struuid::struuid; -use crate::expressions::functions::year_::year_; -use crate::expressions::{cast_lang_string_to_string, drop_inner_contexts}; -use oxrdf::vocab::xsd; -use oxrdf::NamedNodeRef; -use polars::datatypes::{DataType, Field}; -use polars::error::PolarsError; -use polars::prelude::{ - coalesce, col, lit, Column, Expr, IntoColumn, LiteralValue, Schema, Series, StrptimeOptions, -}; -use representation::cats::{maybe_decode_expr, LockedCats}; -use representation::multitype::{MULTI_BLANK_DT, MULTI_IRI_DT}; -use representation::query_context::Context; -use representation::solution_mapping::{BaseCatState, SolutionMappings}; -use representation::{BaseRDFNodeType, RDFNodeState}; -use spargebra::algebra::{Expression, Function}; -use std::collections::HashMap; - -pub fn func_expression( - mut solution_mappings: SolutionMappings, - func: &Function, - args: &[Expression], - args_contexts: HashMap, - outer_context: &Context, - global_cats: LockedCats, -) -> Result { - match func { - Function::Year => { - solution_mappings = - year_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Month => { - solution_mappings = - month_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Day => { - solution_mappings = day_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Hours => { - solution_mappings = - hours_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Minutes => { - solution_mappings = - minutes_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Seconds => { - solution_mappings = - seconds_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Abs => { - solution_mappings = abs_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Ceil => { - solution_mappings = - ceil_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Floor => { - solution_mappings = - floor_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Concat => { - solution_mappings = concat_( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::Now => { - solution_mappings = now_(solution_mappings, outer_context)?; - } - Function::Round => { - solution_mappings = - round_(solution_mappings, func, args, &args_contexts, outer_context)?; - } - Function::Str => { - solution_mappings = str_( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::Lang => { - solution_mappings = lang_( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::LangMatches => { - solution_mappings = lang_matches( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::Regex => { - solution_mappings = sparql_regex( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::Uuid => { - solution_mappings = uuid(solution_mappings, func, args, outer_context)?; - } - Function::Iri => { - solution_mappings = iri( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::StrUuid => { - solution_mappings = struuid(solution_mappings, func, args, outer_context)?; - } - Function::Replace => { - solution_mappings = sparql_replace( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::Custom(nn) => { - solution_mappings = custom_( - nn, - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::StrDt => { - solution_mappings = str_dt( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::StrBefore | Function::StrAfter => { - solution_mappings = str_before_or_after( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::StrLang => { - solution_mappings = str_lang( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::StrLen => { - solution_mappings = str_len( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::LCase | Function::UCase | Function::SubStr => { - solution_mappings = lower_upper_substr( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::StrStarts | Function::StrEnds | Function::Contains => { - solution_mappings = starts_ends_contains( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::IsBlank => { - solution_mappings = is_blank_(solution_mappings, &args_contexts, outer_context)?; - } - Function::IsIri => { - solution_mappings = is_iri(solution_mappings, &args_contexts, outer_context)?; - } - Function::IsLiteral => { - solution_mappings = is_literal(solution_mappings, &args_contexts, outer_context)?; - } - Function::Datatype => { - solution_mappings = datatype_(solution_mappings, &args_contexts, outer_context)?; - } - Function::EncodeForUri => { - solution_mappings = encode_for_uri( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::Sha1 => { - solution_mappings = sha1_( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - Function::Md5 => { - solution_mappings = md5_( - solution_mappings, - func, - args, - &args_contexts, - outer_context, - global_cats, - )?; - } - _ => { - return Err(QueryProcessingError::UnimplementedFunction( - func.to_string(), - )) - } - } - solution_mappings = drop_inner_contexts(solution_mappings, &args_contexts.values().collect()); - Ok(solution_mappings) -} - -pub fn str_function(c: &str, t: &RDFNodeState, global_cats: LockedCats) -> Expr { - if t.is_multi() { - let mut to_coalesce = vec![]; - for (t, s) in &t.map { - to_coalesce.push(match t { - BaseRDFNodeType::IRI => maybe_decode_expr( - col(c).struct_().field_by_name(MULTI_IRI_DT), - t, - s, - global_cats.clone(), - ), - BaseRDFNodeType::BlankNode => maybe_decode_expr( - col(c).struct_().field_by_name(MULTI_BLANK_DT), - t, - s, - global_cats.clone(), - ), - BaseRDFNodeType::Literal(_) => { - if t.is_lang_string() { - cast_lang_string_to_string(c, t, s, global_cats.clone()) - } else { - maybe_decode_expr( - col(c).struct_().field_by_name(&t.field_col_name()), - t, - s, - global_cats.clone(), - ) - .cast(DataType::String) - } - } - BaseRDFNodeType::None => lit(LiteralValue::untyped_null()).cast(DataType::String), - }) - } - coalesce(to_coalesce.as_slice()).alias(c) - } else { - let b = t.get_base_type().unwrap(); - let s = t.get_base_state().unwrap(); - match b { - BaseRDFNodeType::IRI | BaseRDFNodeType::BlankNode => { - maybe_decode_expr(col(c), b, s, global_cats) - } - BaseRDFNodeType::Literal(_) => { - if t.is_lang_string() { - cast_lang_string_to_string(c, b, s, global_cats) - } else { - maybe_decode_expr(col(c), b, s, global_cats).cast(DataType::String) - } - } - BaseRDFNodeType::None => lit(LiteralValue::untyped_null()).cast(DataType::String), - } - } -} - -pub fn xsd_cast_literal( - c: &str, - src: &RDFNodeState, - trg: &BaseRDFNodeType, - global_cats: LockedCats, -) -> Result { - let trg_type = trg.default_input_polars_data_type(); - let trg_nn = if let BaseRDFNodeType::Literal(nn) = trg { - nn.as_ref() - } else { - panic!("Invalid state") - }; - if src.is_multi() { - let mut to_coalesce = vec![]; - for (t, s) in &src.map { - to_coalesce.push(match t { - BaseRDFNodeType::IRI => cast_iri_to_xsd_literal( - col(c).struct_().field_by_name(&t.field_col_name()), - t, - s, - trg_nn, - trg_type.clone(), - global_cats.clone(), - )?, - BaseRDFNodeType::BlankNode => { - return Err(QueryProcessingError::BadCastDatatype( - c.to_string(), - trg.clone(), - t.clone(), - )) - } - BaseRDFNodeType::Literal(src_nn) => cast_literal( - col(c).struct_().field_by_name(&t.field_col_name()), - t, - s, - global_cats.clone(), - src_nn.as_ref(), - trg_nn, - trg_type.clone(), - ), - BaseRDFNodeType::None => lit(LiteralValue::untyped_null()).cast(trg_type.clone()), - }); - } - Ok(coalesce(to_coalesce.as_slice()).alias(c)) - } else { - let t = src.get_base_type().unwrap(); - let s = src.get_base_state().unwrap(); - match &t { - BaseRDFNodeType::IRI => { - cast_iri_to_xsd_literal(col(c), t, s, trg_nn, trg_type.clone(), global_cats.clone()) - } - BaseRDFNodeType::BlankNode => Err(QueryProcessingError::BadCastDatatype( - c.to_string(), - trg.clone(), - t.clone(), - )), - BaseRDFNodeType::Literal(src_nn) => Ok(cast_literal( - col(c), - t, - s, - global_cats.clone(), - src_nn.as_ref(), - trg_nn, - trg_type.clone(), - )), - BaseRDFNodeType::None => Ok(lit(LiteralValue::untyped_null()).cast(trg_type)), - } - } -} - -fn cast_iri_to_xsd_literal( - e: Expr, - t: &BaseRDFNodeType, - s: &BaseCatState, - trg_nn: NamedNodeRef, - trg_type: DataType, - global_cats: LockedCats, -) -> Result { - if trg_nn == xsd::STRING { - Ok(maybe_decode_expr(e, t, s, global_cats)) - } else { - Ok(lit(LiteralValue::untyped_null()).cast(trg_type.clone())) - } -} - -fn cast_literal( - mut c: Expr, - src_bt: &BaseRDFNodeType, - src_bs: &BaseCatState, - global_cats: LockedCats, - src: NamedNodeRef, - trg: NamedNodeRef, - trg_type: DataType, -) -> Expr { - if src == xsd::STRING && trg != xsd::STRING { - c = maybe_decode_expr(c, src_bt, src_bs, global_cats); - } - if src == xsd::STRING && trg == xsd::BOOLEAN { - c.cast(DataType::String) - .str() - .to_lowercase() - .eq(lit("true")) - } else if src == xsd::STRING && trg == xsd::DATE_TIME { - c.cast(DataType::String).str().to_datetime( - None, - None, - StrptimeOptions { - format: None, - strict: true, - exact: false, - cache: false, - }, - lit("raise"), - ) - } else if src == xsd::STRING && trg == xsd::DATE { - c.cast(DataType::String).str().to_date(StrptimeOptions { - format: None, - strict: true, - exact: false, - cache: false, - }) - } else if src == xsd::STRING && trg == xsd::TIME { - c.cast(DataType::String).str().to_time(StrptimeOptions { - format: None, - strict: true, - exact: false, - cache: false, - }) - } else if src == xsd::STRING && trg == xsd::DURATION { - //Todo handle durations - c - } else { - c.cast(trg_type) - } -} - -pub fn maybe_add_regex_feature_flags(pattern: &str, flags: Option<&str>) -> String { - if let Some(flags) = flags { - //TODO: Validate flags.. - format!("(?{}){}", flags, pattern) - } else { - pattern.to_string() - } -} - -pub fn str_starts_ends_contains(expr_decoded: Expr, second_decoded: Expr, f: &Function) -> Expr { - match f { - Function::StrStarts => expr_decoded.str().starts_with(second_decoded), - Function::StrEnds => expr_decoded.str().ends_with(second_decoded), - Function::Contains => expr_decoded.str().contains_literal(second_decoded), - _ => unreachable!("Should never happen"), - } -} - -fn str_before(c: Column, s: String) -> Result { - let bef = c.str()?.iter().map(|x: Option<&str>| { - if let Some(x) = x { - let range_to = x.find(&s); - if let Some(range_to) = range_to { - Some(&x[0..range_to]) - } else { - Some(x) - } - } else { - None - } - }); - let mut ser = Series::from_iter(bef); - ser.rename(c.name().clone()); - Ok(ser.into_column()) -} - -fn str_after(c: Column, s: String) -> Result { - let bef = c.str()?.iter().map(|x: Option<&str>| { - if let Some(x) = x { - let range_to = x.find(&s); - if let Some(range_to) = range_to { - Some(&x[range_to + s.len()..]) - } else { - Some(x) - } - } else { - None - } - }); - let mut ser = Series::from_iter(bef); - ser.rename(c.name().clone()); - Ok(ser.into_column()) -} - -fn keep_field(_s: &Schema, f: &Field) -> Result { - Ok(f.clone()) -} diff --git a/lib/query_processing/src/expressions/functions/cast_iri_to_xsd_literal.rs b/lib/query_processing/src/expressions/functions/cast_iri_to_xsd_literal.rs new file mode 100644 index 00000000..a12b0a32 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/cast_iri_to_xsd_literal.rs @@ -0,0 +1,23 @@ +use crate::errors::QueryProcessingError; +use oxrdf::vocab::xsd; +use oxrdf::NamedNodeRef; +use polars::datatypes::DataType; +use polars::prelude::{lit, Expr, LiteralValue}; +use representation::cats::{maybe_decode_expr, LockedCats}; +use representation::solution_mapping::BaseCatState; +use representation::BaseRDFNodeType; + +pub fn cast_iri_to_xsd_literal( + e: Expr, + t: &BaseRDFNodeType, + s: &BaseCatState, + trg_nn: NamedNodeRef, + trg_type: DataType, + global_cats: LockedCats, +) -> Result { + if trg_nn == xsd::STRING { + Ok(maybe_decode_expr(e, t, s, global_cats)) + } else { + Ok(lit(LiteralValue::untyped_null()).cast(trg_type.clone())) + } +} diff --git a/lib/query_processing/src/expressions/functions/cast_literal_.rs b/lib/query_processing/src/expressions/functions/cast_literal_.rs new file mode 100644 index 00000000..84a63627 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/cast_literal_.rs @@ -0,0 +1,58 @@ +use oxrdf::vocab::xsd; +use oxrdf::NamedNodeRef; +use polars::datatypes::DataType; +use polars::prelude::{lit, Expr, StrptimeOptions}; +use representation::cats::{maybe_decode_expr, LockedCats}; +use representation::solution_mapping::BaseCatState; +use representation::BaseRDFNodeType; + +pub fn cast_literal( + mut c: Expr, + src_bt: &BaseRDFNodeType, + src_bs: &BaseCatState, + global_cats: LockedCats, + src: NamedNodeRef, + trg: NamedNodeRef, + trg_type: DataType, +) -> Expr { + if src == xsd::STRING && trg != xsd::STRING { + c = maybe_decode_expr(c, src_bt, src_bs, global_cats); + } + if src == xsd::STRING && trg == xsd::BOOLEAN { + c.cast(DataType::String) + .str() + .to_lowercase() + .eq(lit("true")) + } else if src == xsd::STRING && trg == xsd::DATE_TIME { + c.cast(DataType::String).str().to_datetime( + None, + None, + StrptimeOptions { + format: None, + strict: true, + exact: false, + cache: false, + }, + lit("raise"), + ) + } else if src == xsd::STRING && trg == xsd::DATE { + c.cast(DataType::String).str().to_date(StrptimeOptions { + format: None, + strict: true, + exact: false, + cache: false, + }) + } else if src == xsd::STRING && trg == xsd::TIME { + c.cast(DataType::String).str().to_time(StrptimeOptions { + format: None, + strict: true, + exact: false, + cache: false, + }) + } else if src == xsd::STRING && trg == xsd::DURATION { + //Todo handle durations + c + } else { + c.cast(trg_type) + } +} diff --git a/lib/query_processing/src/expressions/functions/create_regex_string.rs b/lib/query_processing/src/expressions/functions/create_regex_string.rs index a3693a28..4d05b6d0 100644 --- a/lib/query_processing/src/expressions/functions/create_regex_string.rs +++ b/lib/query_processing/src/expressions/functions/create_regex_string.rs @@ -1,6 +1,6 @@ use crate::errors::QueryProcessingError; use crate::expressions::functions::eval_expression_to_string::eval_expression_to_string; -use crate::expressions::functions::maybe_add_regex_feature_flags; +use crate::expressions::functions::maybe_add_regex_feature_flags::maybe_add_regex_feature_flags; use oxrdf::vocab::xsd; use representation::RDFNodeState; use spargebra::algebra::Expression; diff --git a/lib/query_processing/src/expressions/functions/custom_function.rs b/lib/query_processing/src/expressions/functions/custom_function.rs index d0e1b198..1a29703d 100644 --- a/lib/query_processing/src/expressions/functions/custom_function.rs +++ b/lib/query_processing/src/expressions/functions/custom_function.rs @@ -5,7 +5,7 @@ use crate::constants::{ use crate::errors::QueryProcessingError; use crate::expressions::functions::struuid_v5::struuid_v5; use crate::expressions::functions::uuid_v5::uuid_v5; -use crate::expressions::functions::xsd_cast_literal; +use crate::expressions::functions::xsd_cast_literal::xsd_cast_literal; use oxrdf::vocab::xsd; use oxrdf::NamedNode; use polars::datatypes::{DataType, TimeUnit}; diff --git a/lib/query_processing/src/expressions/functions/encode_for_uri.rs b/lib/query_processing/src/expressions/functions/encode_for_uri.rs index 0c5ba570..1676c2b7 100644 --- a/lib/query_processing/src/expressions/functions/encode_for_uri.rs +++ b/lib/query_processing/src/expressions/functions/encode_for_uri.rs @@ -1,5 +1,5 @@ use crate::errors::QueryProcessingError; -use crate::expressions::functions::str_function; +use crate::expressions::functions::str_function::str_function; use oxrdf::vocab::xsd; use polars::prelude::{IntoColumn, StringChunked}; use representation::cats::LockedCats; diff --git a/lib/query_processing/src/expressions/functions/func_expression.rs b/lib/query_processing/src/expressions/functions/func_expression.rs new file mode 100644 index 00000000..4647b4a8 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/func_expression.rs @@ -0,0 +1,290 @@ +use crate::errors::QueryProcessingError; +use crate::expressions::drop_inner_contexts; +use crate::expressions::functions::abs_::abs_; +use crate::expressions::functions::ceil_::ceil_; +use crate::expressions::functions::concat_::concat_; +use crate::expressions::functions::custom_function::custom_; +use crate::expressions::functions::datatype_::datatype_; +use crate::expressions::functions::day_::day_; +use crate::expressions::functions::encode_for_uri::encode_for_uri; +use crate::expressions::functions::floor_::floor_; +use crate::expressions::functions::hours_::hours_; +use crate::expressions::functions::iri::iri; +use crate::expressions::functions::is_blank_::is_blank_; +use crate::expressions::functions::is_iri::is_iri; +use crate::expressions::functions::is_literal::is_literal; +use crate::expressions::functions::lang_::lang_; +use crate::expressions::functions::lang_matches::lang_matches; +use crate::expressions::functions::lower_upper_substr::lower_upper_substr; +use crate::expressions::functions::md5_::md5_; +use crate::expressions::functions::minutes_::minutes_; +use crate::expressions::functions::month_::month_; +use crate::expressions::functions::now_::now_; +use crate::expressions::functions::replace::sparql_replace; +use crate::expressions::functions::round_::round_; +use crate::expressions::functions::seconds_::seconds_; +use crate::expressions::functions::sha1_::sha1_; +use crate::expressions::functions::sparql_regex::sparql_regex; +use crate::expressions::functions::sparql_uuid::uuid; +use crate::expressions::functions::starts_ends_contains::starts_ends_contains; +use crate::expressions::functions::str_::str_; +use crate::expressions::functions::str_before_or_after::str_before_or_after; +use crate::expressions::functions::str_dt::str_dt; +use crate::expressions::functions::str_lang::str_lang; +use crate::expressions::functions::str_len::str_len; +use crate::expressions::functions::struuid::struuid; +use crate::expressions::functions::year_::year_; +use representation::cats::LockedCats; +use representation::query_context::Context; +use representation::solution_mapping::SolutionMappings; +use spargebra::algebra::{Expression, Function}; +use std::collections::HashMap; + +pub fn func_expression( + mut solution_mappings: SolutionMappings, + func: &Function, + args: &[Expression], + args_contexts: HashMap, + outer_context: &Context, + global_cats: LockedCats, +) -> Result { + match func { + Function::Year => { + solution_mappings = + year_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Month => { + solution_mappings = + month_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Day => { + solution_mappings = day_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Hours => { + solution_mappings = + hours_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Minutes => { + solution_mappings = + minutes_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Seconds => { + solution_mappings = + seconds_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Abs => { + solution_mappings = abs_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Ceil => { + solution_mappings = + ceil_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Floor => { + solution_mappings = + floor_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Concat => { + solution_mappings = concat_( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::Now => { + solution_mappings = now_(solution_mappings, outer_context)?; + } + Function::Round => { + solution_mappings = + round_(solution_mappings, func, args, &args_contexts, outer_context)?; + } + Function::Str => { + solution_mappings = str_( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::Lang => { + solution_mappings = lang_( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::LangMatches => { + solution_mappings = lang_matches( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::Regex => { + solution_mappings = sparql_regex( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::Uuid => { + solution_mappings = uuid(solution_mappings, func, args, outer_context)?; + } + Function::Iri => { + solution_mappings = iri( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::StrUuid => { + solution_mappings = struuid(solution_mappings, func, args, outer_context)?; + } + Function::Replace => { + solution_mappings = sparql_replace( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::Custom(nn) => { + solution_mappings = custom_( + nn, + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::StrDt => { + solution_mappings = str_dt( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::StrBefore | Function::StrAfter => { + solution_mappings = str_before_or_after( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::StrLang => { + solution_mappings = str_lang( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::StrLen => { + solution_mappings = str_len( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::LCase | Function::UCase | Function::SubStr => { + solution_mappings = lower_upper_substr( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::StrStarts | Function::StrEnds | Function::Contains => { + solution_mappings = starts_ends_contains( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::IsBlank => { + solution_mappings = is_blank_(solution_mappings, &args_contexts, outer_context)?; + } + Function::IsIri => { + solution_mappings = is_iri(solution_mappings, &args_contexts, outer_context)?; + } + Function::IsLiteral => { + solution_mappings = is_literal(solution_mappings, &args_contexts, outer_context)?; + } + Function::Datatype => { + solution_mappings = datatype_(solution_mappings, &args_contexts, outer_context)?; + } + Function::EncodeForUri => { + solution_mappings = encode_for_uri( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::Sha1 => { + solution_mappings = sha1_( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + Function::Md5 => { + solution_mappings = md5_( + solution_mappings, + func, + args, + &args_contexts, + outer_context, + global_cats, + )?; + } + _ => { + return Err(QueryProcessingError::UnimplementedFunction( + func.to_string(), + )) + } + } + solution_mappings = drop_inner_contexts(solution_mappings, &args_contexts.values().collect()); + Ok(solution_mappings) +} diff --git a/lib/query_processing/src/expressions/functions/keep_field_.rs b/lib/query_processing/src/expressions/functions/keep_field_.rs new file mode 100644 index 00000000..7d53a629 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/keep_field_.rs @@ -0,0 +1,7 @@ +use polars::datatypes::Field; +use polars::error::PolarsError; +use polars::prelude::Schema; + +pub fn keep_field(_s: &Schema, f: &Field) -> Result { + Ok(f.clone()) +} diff --git a/lib/query_processing/src/expressions/functions/maybe_add_regex_feature_flags.rs b/lib/query_processing/src/expressions/functions/maybe_add_regex_feature_flags.rs new file mode 100644 index 00000000..22bb795b --- /dev/null +++ b/lib/query_processing/src/expressions/functions/maybe_add_regex_feature_flags.rs @@ -0,0 +1,8 @@ +pub fn maybe_add_regex_feature_flags(pattern: &str, flags: Option<&str>) -> String { + if let Some(flags) = flags { + //TODO: Validate flags.. + format!("(?{}){}", flags, pattern) + } else { + pattern.to_string() + } +} diff --git a/lib/query_processing/src/expressions/functions/md5_.rs b/lib/query_processing/src/expressions/functions/md5_.rs index 326ecfe5..2f411942 100644 --- a/lib/query_processing/src/expressions/functions/md5_.rs +++ b/lib/query_processing/src/expressions/functions/md5_.rs @@ -1,5 +1,5 @@ use crate::errors::QueryProcessingError; -use crate::expressions::functions::str_function; +use crate::expressions::functions::str_function::str_function; use md5::{Digest, Md5}; use oxrdf::vocab::xsd; use polars::prelude::{IntoColumn, StringChunked}; diff --git a/lib/query_processing/src/expressions/functions/sha1_.rs b/lib/query_processing/src/expressions/functions/sha1_.rs index 22e6589c..ab9864fb 100644 --- a/lib/query_processing/src/expressions/functions/sha1_.rs +++ b/lib/query_processing/src/expressions/functions/sha1_.rs @@ -1,5 +1,5 @@ use crate::errors::QueryProcessingError; -use crate::expressions::functions::str_function; +use crate::expressions::functions::str_function::str_function; use md5::Digest; use oxrdf::vocab::xsd; use polars::prelude::{IntoColumn, StringChunked}; diff --git a/lib/query_processing/src/expressions/functions/starts_ends_contains.rs b/lib/query_processing/src/expressions/functions/starts_ends_contains.rs index 0f19cc5b..3312e7be 100644 --- a/lib/query_processing/src/expressions/functions/starts_ends_contains.rs +++ b/lib/query_processing/src/expressions/functions/starts_ends_contains.rs @@ -1,5 +1,5 @@ use crate::errors::QueryProcessingError; -use crate::expressions::functions::str_starts_ends_contains; +use crate::expressions::functions::str_starts_ends_contains_::str_starts_ends_contains; use oxrdf::vocab::xsd; use polars::datatypes::DataType; use polars::prelude::{coalesce, col, lit, LiteralValue}; diff --git a/lib/query_processing/src/expressions/functions/str_.rs b/lib/query_processing/src/expressions/functions/str_.rs index ee833355..168d3221 100644 --- a/lib/query_processing/src/expressions/functions/str_.rs +++ b/lib/query_processing/src/expressions/functions/str_.rs @@ -1,5 +1,5 @@ use crate::errors::QueryProcessingError; -use crate::expressions::functions::str_function; +use crate::expressions::functions::str_function::str_function; use oxrdf::vocab::xsd; use representation::cats::LockedCats; use representation::query_context::Context; diff --git a/lib/query_processing/src/expressions/functions/str_after.rs b/lib/query_processing/src/expressions/functions/str_after.rs new file mode 100644 index 00000000..e48996f8 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/str_after.rs @@ -0,0 +1,20 @@ +use polars::error::PolarsError; +use polars::prelude::{Column, IntoColumn, Series}; + +pub fn str_after(c: Column, s: String) -> Result { + let bef = c.str()?.iter().map(|x: Option<&str>| { + if let Some(x) = x { + let range_to = x.find(&s); + if let Some(range_to) = range_to { + Some(&x[range_to + s.len()..]) + } else { + Some(x) + } + } else { + None + } + }); + let mut ser = Series::from_iter(bef); + ser.rename(c.name().clone()); + Ok(ser.into_column()) +} diff --git a/lib/query_processing/src/expressions/functions/str_before.rs b/lib/query_processing/src/expressions/functions/str_before.rs new file mode 100644 index 00000000..b1279082 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/str_before.rs @@ -0,0 +1,20 @@ +use polars::error::PolarsError; +use polars::prelude::{Column, IntoColumn, Series}; + +pub fn str_before(c: Column, s: String) -> Result { + let bef = c.str()?.iter().map(|x: Option<&str>| { + if let Some(x) = x { + let range_to = x.find(&s); + if let Some(range_to) = range_to { + Some(&x[0..range_to]) + } else { + Some(x) + } + } else { + None + } + }); + let mut ser = Series::from_iter(bef); + ser.rename(c.name().clone()); + Ok(ser.into_column()) +} diff --git a/lib/query_processing/src/expressions/functions/str_before_or_after.rs b/lib/query_processing/src/expressions/functions/str_before_or_after.rs index cf10fadc..484c2276 100644 --- a/lib/query_processing/src/expressions/functions/str_before_or_after.rs +++ b/lib/query_processing/src/expressions/functions/str_before_or_after.rs @@ -1,6 +1,8 @@ use crate::errors::QueryProcessingError; use crate::expressions::functions::eval_expression_to_string::eval_expression_to_string; -use crate::expressions::functions::{keep_field, str_after, str_before}; +use crate::expressions::functions::keep_field_::keep_field; +use crate::expressions::functions::str_after::str_after; +use crate::expressions::functions::str_before::str_before; use oxrdf::vocab::{rdf, xsd}; use polars::prelude::{as_struct, col, lit, LiteralValue}; use representation::cats::{maybe_decode_expr, LockedCats}; diff --git a/lib/query_processing/src/expressions/functions/str_function.rs b/lib/query_processing/src/expressions/functions/str_function.rs new file mode 100644 index 00000000..dae3aa70 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/str_function.rs @@ -0,0 +1,59 @@ +use crate::expressions::cast_lang_string_to_string; +use polars::datatypes::DataType; +use polars::prelude::{coalesce, col, lit, Expr, LiteralValue}; +use representation::cats::{maybe_decode_expr, LockedCats}; +use representation::multitype::{MULTI_BLANK_DT, MULTI_IRI_DT}; +use representation::{BaseRDFNodeType, RDFNodeState}; + +pub fn str_function(c: &str, t: &RDFNodeState, global_cats: LockedCats) -> Expr { + if t.is_multi() { + let mut to_coalesce = vec![]; + for (t, s) in &t.map { + to_coalesce.push(match t { + BaseRDFNodeType::IRI => maybe_decode_expr( + col(c).struct_().field_by_name(MULTI_IRI_DT), + t, + s, + global_cats.clone(), + ), + BaseRDFNodeType::BlankNode => maybe_decode_expr( + col(c).struct_().field_by_name(MULTI_BLANK_DT), + t, + s, + global_cats.clone(), + ), + BaseRDFNodeType::Literal(_) => { + if t.is_lang_string() { + cast_lang_string_to_string(c, t, s, global_cats.clone()) + } else { + maybe_decode_expr( + col(c).struct_().field_by_name(&t.field_col_name()), + t, + s, + global_cats.clone(), + ) + .cast(DataType::String) + } + } + BaseRDFNodeType::None => lit(LiteralValue::untyped_null()).cast(DataType::String), + }) + } + coalesce(to_coalesce.as_slice()).alias(c) + } else { + let b = t.get_base_type().unwrap(); + let s = t.get_base_state().unwrap(); + match b { + BaseRDFNodeType::IRI | BaseRDFNodeType::BlankNode => { + maybe_decode_expr(col(c), b, s, global_cats) + } + BaseRDFNodeType::Literal(_) => { + if t.is_lang_string() { + cast_lang_string_to_string(c, b, s, global_cats) + } else { + maybe_decode_expr(col(c), b, s, global_cats).cast(DataType::String) + } + } + BaseRDFNodeType::None => lit(LiteralValue::untyped_null()).cast(DataType::String), + } + } +} diff --git a/lib/query_processing/src/expressions/functions/str_len.rs b/lib/query_processing/src/expressions/functions/str_len.rs index 7c885661..72a69683 100644 --- a/lib/query_processing/src/expressions/functions/str_len.rs +++ b/lib/query_processing/src/expressions/functions/str_len.rs @@ -1,5 +1,5 @@ use crate::errors::QueryProcessingError; -use crate::expressions::functions::str_function; +use crate::expressions::functions::str_function::str_function; use oxrdf::vocab::xsd; use polars::datatypes::DataType; use representation::cats::LockedCats; diff --git a/lib/query_processing/src/expressions/functions/str_starts_ends_contains_.rs b/lib/query_processing/src/expressions/functions/str_starts_ends_contains_.rs new file mode 100644 index 00000000..44dd8482 --- /dev/null +++ b/lib/query_processing/src/expressions/functions/str_starts_ends_contains_.rs @@ -0,0 +1,11 @@ +use polars::prelude::Expr; +use spargebra::algebra::Function; + +pub fn str_starts_ends_contains(expr_decoded: Expr, second_decoded: Expr, f: &Function) -> Expr { + match f { + Function::StrStarts => expr_decoded.str().starts_with(second_decoded), + Function::StrEnds => expr_decoded.str().ends_with(second_decoded), + Function::Contains => expr_decoded.str().contains_literal(second_decoded), + _ => unreachable!("Should never happen"), + } +} diff --git a/lib/query_processing/src/expressions/functions/struuid_v5.rs b/lib/query_processing/src/expressions/functions/struuid_v5.rs index d338ec21..a95b5d83 100644 --- a/lib/query_processing/src/expressions/functions/struuid_v5.rs +++ b/lib/query_processing/src/expressions/functions/struuid_v5.rs @@ -1,6 +1,6 @@ use crate::errors::QueryProcessingError; use crate::expressions::functions::eval_uuid_namespace::eval_uuid_namespace; -use crate::expressions::functions::str_function; +use crate::expressions::functions::str_function::str_function; use oxrdf::vocab::xsd; use polars::datatypes::{DataType, Field, PlSmallStr}; use polars::prelude::{as_struct, by_name, IntoColumn, StringChunkedBuilder}; diff --git a/lib/query_processing/src/expressions/functions/uuid_v5.rs b/lib/query_processing/src/expressions/functions/uuid_v5.rs index d629c1c4..bc8b405a 100644 --- a/lib/query_processing/src/expressions/functions/uuid_v5.rs +++ b/lib/query_processing/src/expressions/functions/uuid_v5.rs @@ -1,6 +1,6 @@ use crate::errors::QueryProcessingError; use crate::expressions::functions::eval_uuid_namespace::eval_uuid_namespace; -use crate::expressions::functions::str_function; +use crate::expressions::functions::str_function::str_function; use polars::datatypes::{DataType, Field, PlSmallStr}; use polars::prelude::{as_struct, by_name, IntoColumn, StringChunkedBuilder}; use representation::cats::LockedCats; diff --git a/lib/query_processing/src/expressions/functions/xsd_cast_literal.rs b/lib/query_processing/src/expressions/functions/xsd_cast_literal.rs new file mode 100644 index 00000000..8241192b --- /dev/null +++ b/lib/query_processing/src/expressions/functions/xsd_cast_literal.rs @@ -0,0 +1,76 @@ +use crate::errors::QueryProcessingError; +use crate::expressions::functions::cast_iri_to_xsd_literal::cast_iri_to_xsd_literal; +use crate::expressions::functions::cast_literal_::cast_literal; +use polars::prelude::{coalesce, col, lit, Expr, LiteralValue}; +use representation::cats::LockedCats; +use representation::{BaseRDFNodeType, RDFNodeState}; + +pub fn xsd_cast_literal( + c: &str, + src: &RDFNodeState, + trg: &BaseRDFNodeType, + global_cats: LockedCats, +) -> Result { + let trg_type = trg.default_input_polars_data_type(); + let trg_nn = if let BaseRDFNodeType::Literal(nn) = trg { + nn.as_ref() + } else { + panic!("Invalid state") + }; + if src.is_multi() { + let mut to_coalesce = vec![]; + for (t, s) in &src.map { + to_coalesce.push(match t { + BaseRDFNodeType::IRI => cast_iri_to_xsd_literal( + col(c).struct_().field_by_name(&t.field_col_name()), + t, + s, + trg_nn, + trg_type.clone(), + global_cats.clone(), + )?, + BaseRDFNodeType::BlankNode => { + return Err(QueryProcessingError::BadCastDatatype( + c.to_string(), + trg.clone(), + t.clone(), + )) + } + BaseRDFNodeType::Literal(src_nn) => cast_literal( + col(c).struct_().field_by_name(&t.field_col_name()), + t, + s, + global_cats.clone(), + src_nn.as_ref(), + trg_nn, + trg_type.clone(), + ), + BaseRDFNodeType::None => lit(LiteralValue::untyped_null()).cast(trg_type.clone()), + }); + } + Ok(coalesce(to_coalesce.as_slice()).alias(c)) + } else { + let t = src.get_base_type().unwrap(); + let s = src.get_base_state().unwrap(); + match &t { + BaseRDFNodeType::IRI => { + cast_iri_to_xsd_literal(col(c), t, s, trg_nn, trg_type.clone(), global_cats.clone()) + } + BaseRDFNodeType::BlankNode => Err(QueryProcessingError::BadCastDatatype( + c.to_string(), + trg.clone(), + t.clone(), + )), + BaseRDFNodeType::Literal(src_nn) => Ok(cast_literal( + col(c), + t, + s, + global_cats.clone(), + src_nn.as_ref(), + trg_nn, + trg_type.clone(), + )), + BaseRDFNodeType::None => Ok(lit(LiteralValue::untyped_null()).cast(trg_type)), + } + } +} diff --git a/lib/triplestore/src/sparql/lazy_expressions.rs b/lib/triplestore/src/sparql/lazy_expressions.rs index 0ebb626b..f2d1df3f 100644 --- a/lib/triplestore/src/sparql/lazy_expressions.rs +++ b/lib/triplestore/src/sparql/lazy_expressions.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; use crate::sparql::errors::SparqlError; use oxrdf::vocab::xsd; use polars::prelude::IntoLazy; -use query_processing::expressions::functions::func_expression; +use query_processing::expressions::functions::func_expression::func_expression; use query_processing::expressions::{ binary_expression, bound, coalesce_contexts, exists, if_expression, in_expression, literal, named_node, not_expression, unary_minus, unary_plus, variable, diff --git a/lib/triplestore/src/triples_write/hdt_write.rs b/lib/triplestore/src/triples_write/hdt_write.rs index da6d2fec..2a9b335a 100644 --- a/lib/triplestore/src/triples_write/hdt_write.rs +++ b/lib/triplestore/src/triples_write/hdt_write.rs @@ -159,7 +159,12 @@ fn statistics_header(dict: &FourSectDict, num_triples: usize) -> Header { insert_literal(&mut body, &base, RDF_TYPE, HDT_CONTAINER); insert_literal(&mut body, &base, RDF_TYPE, VOID_DATASET); insert_literal(&mut body, &base, VOID_TRIPLES, num_triples); - insert_literal(&mut body, &base, VOID_PROPERTIES, dict.predicates.num_strings()); + insert_literal( + &mut body, + &base, + VOID_PROPERTIES, + dict.predicates.num_strings(), + ); insert_literal(&mut body, &base, VOID_DISTINCT_SUBJECTS, distinct_subjects); insert_literal(&mut body, &base, VOID_DISTINCT_OBJECTS, distinct_objects); @@ -169,9 +174,19 @@ fn statistics_header(dict: &FourSectDict, num_triples: usize) -> Header { insert_id(&mut body, &format_id, HDT_DICTIONARY, &dict_id); insert_id(&mut body, &format_id, HDT_TRIPLES, &triples_id); - insert_literal(&mut body, &dict_id, HDT_DICT_SHARED_SO, dict.shared.num_strings()); + insert_literal( + &mut body, + &dict_id, + HDT_DICT_SHARED_SO, + dict.shared.num_strings(), + ); insert_literal(&mut body, &dict_id, HDT_DICT_MAPPING, "1"); - insert_literal(&mut body, &dict_id, HDT_DICT_SIZE_STRINGS, dict.size_in_bytes()); + insert_literal( + &mut body, + &dict_id, + HDT_DICT_SIZE_STRINGS, + dict.size_in_bytes(), + ); insert_literal(&mut body, &dict_id, HDT_DICT_BLOCK_SIZE, BLOCK_SIZE); insert_literal(&mut body, &triples_id, DC_TERMS_FORMAT, HDT_TYPE_BITMAP); @@ -198,7 +213,11 @@ fn insert_literal(body: &mut BTreeSet, s: &Id, p: &str, o: impl ToStr } fn insert_id(body: &mut BTreeSet, s: &Id, p: &str, o: &Id) { - body.insert(HdtTriple::new(s.clone(), p.to_owned(), HdtTerm::Id(o.clone()))); + body.insert(HdtTriple::new( + s.clone(), + p.to_owned(), + HdtTerm::Id(o.clone()), + )); } fn hdt_error(e: impl std::fmt::Display) -> TriplestoreError {