diff --git a/internal/server/spanner/dsutil.go b/internal/server/spanner/dsutil.go index 107fb5936..deaeb44fa 100644 --- a/internal/server/spanner/dsutil.go +++ b/internal/server/spanner/dsutil.go @@ -25,19 +25,28 @@ import ( "google.golang.org/protobuf/proto" ) -// nodePropsToNodeResponse converts a slice of properties to a NodeResponse proto. -func nodePropsToNodeResponse(props []*Property) *v3.NodeResponse { +const ( + // Indicates that all properties should be returned. + WILDCARD = "*" + // Indicates that recursive property paths should be returned. + CHAIN = "+" +) + +// nodePropsToNodeResponse converts a map from subject id to its properties to a NodeResponse proto. +func nodePropsToNodeResponse(propsBySubjectID map[string][]*Property) *v3.NodeResponse { nodeResponse := &v3.NodeResponse{ Data: make(map[string]*v2.LinkedGraph), } - for _, prop := range props { - linkedGraph, ok := nodeResponse.Data[prop.SubjectID] + for subjectID, props := range propsBySubjectID { + linkedGraph, ok := nodeResponse.Data[subjectID] if !ok { linkedGraph = &v2.LinkedGraph{} - nodeResponse.Data[prop.SubjectID] = linkedGraph + nodeResponse.Data[subjectID] = linkedGraph + } + for _, prop := range props { + linkedGraph.Properties = append(linkedGraph.Properties, prop.Predicate) } - linkedGraph.Properties = append(linkedGraph.Properties, prop.Predicate) } return nodeResponse @@ -73,12 +82,9 @@ func nodeEdgesToLinkedGraph(edges []*Edge) *v2.LinkedGraph { node := &pb.EntityInfo{ Name: edge.Name, Types: edge.Types, + Dcid: edge.ObjectID, ProvenanceId: edge.Provenance, - } - if edge.ObjectValue != "" { - node.Value = edge.ObjectValue - } else { - node.Dcid = edge.ObjectID + Value: edge.ObjectValue, } nodes.Nodes = append(nodes.Nodes, node) diff --git a/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json b/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json index 1d1433a6a..4a932e29b 100644 --- a/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json +++ b/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json @@ -36,7 +36,7 @@ { "SubjectID": "Aadhaar", "Predicate": "description", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "Aadhaar is a 12-digit unique identity number that can be obtained voluntarily by all residents of India", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -45,7 +45,7 @@ { "SubjectID": "Aadhaar", "Predicate": "name", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "Aadhaar", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -54,7 +54,7 @@ { "SubjectID": "Aadhaar", "Predicate": "localCuratorLevelId", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "dcid:Aadhaar", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -63,7 +63,7 @@ { "SubjectID": "Aadhaar", "Predicate": "extendedName", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "Aadhaar", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -174,7 +174,7 @@ { "SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential", "Predicate": "extendedName", - "ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential", + "ObjectID": "", "ObjectValue": "Average retail price of electricity, residential, monthly", "Provenance": "dc/base/HumanReadableStatVars", "Name": "", @@ -183,7 +183,7 @@ { "SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential", "Predicate": "name", - "ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential", + "ObjectID": "", "ObjectValue": "Average retail price of electricity, residential, monthly", "Provenance": "dc/base/HumanReadableStatVars", "Name": "", @@ -192,11 +192,12 @@ { "SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential", "Predicate": "localCuratorLevelId", - "ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential", + "ObjectID": "", "ObjectValue": "dcid:Monthly_Average_RetailPrice_Electricity_Residential", "Provenance": "dc/base/HumanReadableStatVars", "Name": "", "Types": [] } - ] + ], + "foo": [] } \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_bracket_props.json b/internal/server/spanner/golden/query/get_node_edges_in_bracket_props.json new file mode 100644 index 000000000..b1118249b --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_bracket_props.json @@ -0,0 +1,48 @@ +{ + "EarthquakeEvent": [ + { + "SubjectID": "EarthquakeEvent", + "Predicate": "naturalHazardType", + "ObjectID": "Annual_ExpectedLoss_NaturalHazardImpact_EarthquakeEvent", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "Annual Expected Loss from Natural Hazard Impact: Earthquake", + "Types": [ + "StatisticalVariable" + ] + }, + { + "SubjectID": "EarthquakeEvent", + "Predicate": "naturalHazardType", + "ObjectID": "FemaNaturalHazardRiskIndex_NaturalHazardImpact_EarthquakeEvent", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "FEMA National Risk Index for Natural Hazard Impact: Earthquake", + "Types": [ + "StatisticalVariable" + ] + }, + { + "SubjectID": "EarthquakeEvent", + "Predicate": "naturalHazardType", + "ObjectID": "NaturalHazardRiskScore_SuperfundSite_EarthquakeEvent", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "Natural Hazard Risk Score of Superfund Site: Earthquake Event", + "Types": [ + "StatisticalVariable" + ] + }, + { + "SubjectID": "EarthquakeEvent", + "Predicate": "domainIncludes", + "ObjectID": "affectedPlace", + "ObjectValue": "", + "Provenance": "dc/base/BaseSchema", + "Name": "affectedPlace", + "Types": [ + "Property" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_chain.json b/internal/server/spanner/golden/query/get_node_edges_in_chain.json new file mode 100644 index 000000000..3c4404076 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_chain.json @@ -0,0 +1,22 @@ +{ + "dc/g/Farm_FarmInventoryStatus": [ + { + "SubjectID": "dc/g/Farm_FarmInventoryStatus", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Farm_FarmInventoryStatus-InventorySold", + "ObjectValue": "", + "Provenance": "", + "Name": "Farm With Farm Inventory Status = Inventory Sold", + "Types": [] + }, + { + "SubjectID": "dc/g/Farm_FarmInventoryStatus", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Farm_FarmInventoryStatus-InventorySold_FarmInventoryType", + "ObjectValue": "", + "Provenance": "", + "Name": "Farm With Farm Inventory Status = Inventory Sold, Farm Inventory Type", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_filter.json b/internal/server/spanner/golden/query/get_node_edges_in_filter.json new file mode 100644 index 000000000..9a7da79f8 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_filter.json @@ -0,0 +1,15 @@ +{ + "Farm": [ + { + "SubjectID": "Farm", + "Predicate": "populationType", + "ObjectID": "Area_Farm_Melon", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "Area of Farm: Melon", + "Types": [ + "StatisticalVariable" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_single_prop.json b/internal/server/spanner/golden/query/get_node_edges_in_single_prop.json new file mode 100644 index 000000000..f9742b5ec --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_single_prop.json @@ -0,0 +1,15 @@ +{ + "EarthquakeEvent": [ + { + "SubjectID": "EarthquakeEvent", + "Predicate": "domainIncludes", + "ObjectID": "affectedPlace", + "ObjectValue": "", + "Provenance": "dc/base/BaseSchema", + "Name": "affectedPlace", + "Types": [ + "Property" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_malicious.json b/internal/server/spanner/golden/query/get_node_edges_malicious.json new file mode 100644 index 000000000..55e801876 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_malicious.json @@ -0,0 +1,3 @@ +{ + "foo OR 1=1;": [] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_bracket_props.json b/internal/server/spanner/golden/query/get_node_edges_out_bracket_props.json new file mode 100644 index 000000000..53a645a2a --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_bracket_props.json @@ -0,0 +1,24 @@ +{ + "Person": [ + { + "SubjectID": "Person", + "Predicate": "subClassOf", + "ObjectID": "Thing", + "ObjectValue": "", + "Provenance": "dc/base/BaseSchema", + "Name": "Thing", + "Types": [ + "Class" + ] + }, + { + "SubjectID": "Person", + "Predicate": "source", + "ObjectID": "", + "ObjectValue": "https://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources", + "Provenance": "dc/base/BaseSchema", + "Name": "", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_chain.json b/internal/server/spanner/golden/query/get_node_edges_out_chain.json new file mode 100644 index 000000000..fef4624d0 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_chain.json @@ -0,0 +1,49 @@ +{ + "dc/g/Person_Gender": [ + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Demographics", + "ObjectValue": "", + "Provenance": "", + "Name": "Demographics", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Economy", + "ObjectValue": "", + "Provenance": "", + "Name": "Economy", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Employment", + "ObjectValue": "", + "Provenance": "", + "Name": "Employment and Business", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Health", + "ObjectValue": "", + "Provenance": "", + "Name": "Health", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Root", + "ObjectValue": "", + "Provenance": "", + "Name": "Data Commons Variables", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_filter.json b/internal/server/spanner/golden/query/get_node_edges_out_filter.json new file mode 100644 index 000000000..243223c1e --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_filter.json @@ -0,0 +1,15 @@ +{ + "nuts/UKI1": [ + { + "SubjectID": "nuts/UKI1", + "Predicate": "typeOf", + "ObjectID": "AdministrativeArea2", + "ObjectValue": "", + "Provenance": "dc/base/EuroGeos", + "Name": "AdministrativeArea2", + "Types": [ + "Class" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_single_prop.json b/internal/server/spanner/golden/query/get_node_edges_out_single_prop.json new file mode 100644 index 000000000..0963f3fec --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_single_prop.json @@ -0,0 +1,13 @@ +{ + "Person": [ + { + "SubjectID": "Person", + "Predicate": "extendedName", + "ObjectID": "", + "ObjectValue": "Person", + "Provenance": "dc/base/BaseSchema", + "Name": "", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_props_by_object_id.json b/internal/server/spanner/golden/query/get_node_props_by_object_id.json index 3705af490..32fffeff0 100644 --- a/internal/server/spanner/golden/query/get_node_props_by_object_id.json +++ b/internal/server/spanner/golden/query/get_node_props_by_object_id.json @@ -1,30 +1,34 @@ -[ - { - "SubjectID": "Count_Person", - "Predicate": "measurementDenominator" - }, - { - "SubjectID": "Person", - "Predicate": "domainIncludes" - }, - { - "SubjectID": "Person", - "Predicate": "populationType" - }, - { - "SubjectID": "Person", - "Predicate": "rangeIncludes" - }, - { - "SubjectID": "Person", - "Predicate": "subClassOf" - }, - { - "SubjectID": "Person", - "Predicate": "unitOfMeasure" - }, - { - "SubjectID": "Person", - "Predicate": "victimType" - } -] \ No newline at end of file +{ + "Count_Person": [ + { + "SubjectID": "Count_Person", + "Predicate": "measurementDenominator" + } + ], + "Person": [ + { + "SubjectID": "Person", + "Predicate": "domainIncludes" + }, + { + "SubjectID": "Person", + "Predicate": "populationType" + }, + { + "SubjectID": "Person", + "Predicate": "rangeIncludes" + }, + { + "SubjectID": "Person", + "Predicate": "subClassOf" + }, + { + "SubjectID": "Person", + "Predicate": "unitOfMeasure" + }, + { + "SubjectID": "Person", + "Predicate": "victimType" + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_props_by_subject_id.json b/internal/server/spanner/golden/query/get_node_props_by_subject_id.json index 5e42a6133..89d485420 100644 --- a/internal/server/spanner/golden/query/get_node_props_by_subject_id.json +++ b/internal/server/spanner/golden/query/get_node_props_by_subject_id.json @@ -1,62 +1,67 @@ -[ - { - "SubjectID": "Count_Person", - "Predicate": "extendedName" - }, - { - "SubjectID": "Count_Person", - "Predicate": "localCuratorLevelId" - }, - { - "SubjectID": "Count_Person", - "Predicate": "measuredProp" - }, - { - "SubjectID": "Count_Person", - "Predicate": "memberOf" - }, - { - "SubjectID": "Count_Person", - "Predicate": "name" - }, - { - "SubjectID": "Count_Person", - "Predicate": "populationType" - }, - { - "SubjectID": "Count_Person", - "Predicate": "statType" - }, - { - "SubjectID": "Count_Person", - "Predicate": "typeOf" - }, - { - "SubjectID": "Person", - "Predicate": "equivalentClass" - }, - { - "SubjectID": "Person", - "Predicate": "extendedName" - }, - { - "SubjectID": "Person", - "Predicate": "localCuratorLevelId" - }, - { - "SubjectID": "Person", - "Predicate": "name" - }, - { - "SubjectID": "Person", - "Predicate": "source" - }, - { - "SubjectID": "Person", - "Predicate": "subClassOf" - }, - { - "SubjectID": "Person", - "Predicate": "typeOf" - } -] \ No newline at end of file +{ + "Count_Person": [ + { + "SubjectID": "Count_Person", + "Predicate": "extendedName" + }, + { + "SubjectID": "Count_Person", + "Predicate": "localCuratorLevelId" + }, + { + "SubjectID": "Count_Person", + "Predicate": "measuredProp" + }, + { + "SubjectID": "Count_Person", + "Predicate": "memberOf" + }, + { + "SubjectID": "Count_Person", + "Predicate": "name" + }, + { + "SubjectID": "Count_Person", + "Predicate": "populationType" + }, + { + "SubjectID": "Count_Person", + "Predicate": "statType" + }, + { + "SubjectID": "Count_Person", + "Predicate": "typeOf" + } + ], + "Person": [ + { + "SubjectID": "Person", + "Predicate": "equivalentClass" + }, + { + "SubjectID": "Person", + "Predicate": "extendedName" + }, + { + "SubjectID": "Person", + "Predicate": "localCuratorLevelId" + }, + { + "SubjectID": "Person", + "Predicate": "name" + }, + { + "SubjectID": "Person", + "Predicate": "source" + }, + { + "SubjectID": "Person", + "Predicate": "subClassOf" + }, + { + "SubjectID": "Person", + "Predicate": "typeOf" + } + ], + "foo": [] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query_test.go b/internal/server/spanner/golden/query_test.go index 81e7ce728..a5918cca5 100644 --- a/internal/server/spanner/golden/query_test.go +++ b/internal/server/spanner/golden/query_test.go @@ -42,7 +42,7 @@ func TestGetNodeProps(t *testing.T) { goldenFile string }{ { - ids: []string{"Count_Person", "Person"}, + ids: []string{"Count_Person", "Person", "foo"}, out: true, goldenFile: "get_node_props_by_subject_id.json", }, @@ -98,7 +98,7 @@ func TestGetNodeEdgesByID(t *testing.T) { goldenFile string }{ { - ids: []string{"Aadhaar", "Monthly_Average_RetailPrice_Electricity_Residential"}, + ids: []string{"Aadhaar", "Monthly_Average_RetailPrice_Electricity_Residential", "foo"}, arc: &v2.Arc{ Out: true, SingleProp: "*", @@ -113,6 +113,89 @@ func TestGetNodeEdgesByID(t *testing.T) { }, goldenFile: "get_node_edges_by_object_id.json", }, + { + ids: []string{"Person"}, + arc: &v2.Arc{ + Out: true, + SingleProp: "extendedName", + }, + goldenFile: "get_node_edges_out_single_prop.json", + }, + { + ids: []string{"Person"}, + arc: &v2.Arc{ + Out: true, + BracketProps: []string{"source", "subClassOf"}, + }, + goldenFile: "get_node_edges_out_bracket_props.json", + }, + { + ids: []string{"nuts/UKI1"}, + arc: &v2.Arc{ + Out: true, + Filter: map[string][]string{ + "subClassOf": {"AdministrativeArea"}, + "extendedName": {"AdministrativeArea2"}, + }, + }, + goldenFile: "get_node_edges_out_filter.json", + }, + { + ids: []string{"dc/g/Person_Gender"}, + arc: &v2.Arc{ + Out: true, + SingleProp: "specializationOf", + Decorator: "+", + }, + goldenFile: "get_node_edges_out_chain.json", + }, + { + ids: []string{"EarthquakeEvent"}, + arc: &v2.Arc{ + Out: false, + SingleProp: "domainIncludes", + }, + goldenFile: "get_node_edges_in_single_prop.json", + }, + { + ids: []string{"EarthquakeEvent"}, + arc: &v2.Arc{ + Out: false, + BracketProps: []string{"domainIncludes", "naturalHazardType"}, + }, + goldenFile: "get_node_edges_in_bracket_props.json", + }, + { + ids: []string{"Farm"}, + arc: &v2.Arc{ + Out: false, + Filter: map[string][]string{ + "farmInventoryType": {"Melon"}, + "extendedName": {"Area of Farm: Melon"}, + }, + }, + goldenFile: "get_node_edges_in_filter.json", + }, + { + ids: []string{"dc/g/Farm_FarmInventoryStatus"}, + arc: &v2.Arc{ + Out: false, + SingleProp: "specializationOf", + Decorator: "+", + }, + goldenFile: "get_node_edges_in_chain.json", + }, + { + ids: []string{"foo OR 1=1;"}, + arc: &v2.Arc{ + Out: false, + SingleProp: "foo OR 1=1;", + Filter: map[string][]string{ + "foo OR 1=1;": {"foo OR 1=1;"}, + }, + }, + goldenFile: "get_node_edges_malicious.json", + }, } { actual, err := client.GetNodeEdgesByID(ctx, c.ids, c.arc) if err != nil { diff --git a/internal/server/spanner/query.go b/internal/server/spanner/query.go index 85904331d..8a233659c 100644 --- a/internal/server/spanner/query.go +++ b/internal/server/spanner/query.go @@ -18,18 +18,37 @@ package spanner import ( "context" "fmt" + "strconv" "cloud.google.com/go/spanner" v2 "github.com/datacommonsorg/mixer/internal/server/v2" "google.golang.org/api/iterator" ) +const ( + // Maximum number of edge hops to traverse for chained properties. + MAX_HOPS = 10 +) + // SQL / GQL statements executed by the SpannerClient var statements = struct { - getPropsBySubjectID string - getPropsByObjectID string - getEdgesBySubjectID string - getEdgesByObjectID string + // Fetch Properties for out arcs + getPropsBySubjectID string + // Fetch Properties for in arcs + getPropsByObjectID string + // Fetch Edges for out arcs with a single hop + getEdgesBySubjectID string + // Fetch Edges for out arcs with chaining + getChainedEdgesBySubjectID string + // Fetch Edges for in arcs with a single hop + getEdgesByObjectID string + // Fetch Edges for in arcs with chaining + getChainedEdgesByObjectID string + // Subquery to filter edges by predicate + filterProps string + // Subquery to filter edges by object property-values + filterObjects string + // Fetch Observations for variable+entity. getObsByVariableAndEntity string }{ getPropsBySubjectID: ` @@ -53,38 +72,167 @@ var statements = struct { `, getEdgesBySubjectID: ` SELECT - edge.subject_id, - edge.predicate, - COALESCE(edge.object_id, '') AS object_id, - COALESCE(edge.object_value, '') AS object_value, - COALESCE(edge.provenance, '') AS provenance, - COALESCE(object.name, '') AS name, - COALESCE(object.types, []) AS types + result.subject_id, + result.predicate, + COALESCE(result.object_id, '') AS object_id, + COALESCE(result.object_value, '') AS object_value, + COALESCE(result.provenance, '') AS provenance, + COALESCE(result.name, '') AS name, + COALESCE(result.types, []) AS types + FROM ( + SELECT + * + FROM + GRAPH_TABLE ( + DCGRAPH MATCH -[e:Edge + WHERE + e.subject_id IN UNNEST(@ids) + AND e.object_value IS NULL + AND e.subject_id != e.object_id%[1]s]->(n:Node) + RETURN e.subject_id, + e.predicate, + e.object_id, + '' as object_value, + e.provenance, + n.name, + n.types + ) + UNION ALL + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.subject_id IN UNNEST(@ids) + AND e.object_value IS NOT NULL%[1]s]-> + RETURN e.subject_id, + e.predicate, + '' as object_id, + e.object_value, + e.provenance, + '' AS name, + ARRAY[] AS types + ) + )result + `, + getChainedEdgesBySubjectID: fmt.Sprintf(` + SELECT + result.subject_id, + @result_predicate AS predicate, + COALESCE(result.object_id, '') AS object_id, + COALESCE(result.object_value, '') AS object_value, + '' AS provenance, + COALESCE(result.name, '') AS name, + ARRAY[] AS types + FROM ( + SELECT + * + FROM + GRAPH_TABLE ( + DCGRAPH MATCH (m:Node + WHERE + m.subject_id IN UNNEST(@ids))-[e:Edge + WHERE + e.predicate = @predicate]->{1,%d}(n:Node) + WHERE + m != n + RETURN DISTINCT m.subject_id, + n.subject_id as object_id, + '' as object_value, + n.name + ) + UNION ALL + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.subject_id IN UNNEST(@ids) + AND e.object_value IS NOT NULL + AND e.predicate = @predicate]-> + RETURN e.subject_id, + '' AS object_id, + e.object_value, + '' AS name + ) + )result + `, MAX_HOPS), + getEdgesByObjectID: ` + SELECT + result.subject_id, + result.predicate, + result.object_id, + '' AS object_value, + COALESCE(result.provenance, '') AS provenance, + COALESCE(result.name, '') AS name, + COALESCE(result.types, []) AS types, + FROM + GRAPH_TABLE ( + DCGraph MATCH <-[e:Edge + WHERE + e.object_id IN UNNEST(@ids) + AND e.subject_id != e.object_id%s]-(n:Node) + RETURN e.object_id AS subject_id, + e.predicate, + e.subject_id AS object_id, + e.provenance, + n.name, + n.types + )result + `, + getChainedEdgesByObjectID: fmt.Sprintf(` + SELECT + result.subject_id, + @result_predicate AS predicate, + result.object_id, + '' AS object_value, + '' AS provenance, + COALESCE(result.name, '') AS name, + ARRAY[] AS types FROM - Edge edge - LEFT JOIN - GRAPH_TABLE( DCGraph MATCH -[e:Edge + GRAPH_TABLE ( + DCGraph MATCH (m:Node + WHERE m.subject_id IN UNNEST(@ids))<-[e:Edge WHERE - e.subject_id IN UNNEST(@ids) - AND e.object_value IS NULL]->(n:Node) RETURN n.subject_id, - n.name, - n.types) object - ON - edge.object_id = object.subject_id - WHERE - edge.subject_id IN UNNEST(@ids) + e.predicate = @predicate]-{1,%d}(n:Node) + WHERE + m!= n + RETURN DISTINCT m.subject_id, + n.subject_id AS object_id, + n.name + )result + `, MAX_HOPS), + filterProps: ` + AND e.predicate IN UNNEST(@props) `, - getEdgesByObjectID: ` - GRAPH DCGraph MATCH (n:Node)-[e:Edge - WHERE - e.object_id IN UNNEST(@ids) - AND e.subject_id != e.object_id]-> return e.object_id AS subject_id, - e.predicate, - n.subject_id AS object_id, - '' as object_value, - COALESCE(e.provenance, '') AS provenance, - COALESCE(n.name, '') AS name, - COALESCE(n.types, []) AS types + filterObjects: ` + INNER JOIN ( + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.predicate = @prop%[1]d + AND e.object_id IN UNNEST(@val%[1]d)]-> + RETURN e.subject_id + ) + UNION DISTINCT + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.predicate = @prop%[1]d + AND e.object_value IN UNNEST(@val%[1]d)]-> + RETURN e.subject_id + ) + )filter%[1]d + ON + result.object_id = filter%[1]d.subject_id `, getObsByVariableAndEntity: ` SELECT @@ -105,12 +253,15 @@ var statements = struct { `, } -// GetNodeProps retrieves node properties from Spanner given a list of IDs and a direction. -func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out bool) ([]*Property, error) { - props := []*Property{} +// GetNodeProps retrieves node properties from Spanner given a list of IDs and a direction and returns a map. +func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out bool) (map[string][]*Property, error) { + props := map[string][]*Property{} if len(ids) == 0 { return props, nil } + for _, id := range ids { + props[id] = []*Property{} + } var stmt spanner.Statement @@ -135,7 +286,8 @@ func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out boo }, func(rowStruct interface{}) { prop := rowStruct.(*Property) - props = append(props, prop) + subjectID := prop.SubjectID + props[subjectID] = append(props[subjectID], prop) }, ) if err != nil { @@ -147,27 +299,66 @@ func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out boo // GetNodeEdgesByID retrieves node edges from Spanner given a list of IDs and a property Arc and returns a map. func (sc *SpannerClient) GetNodeEdgesByID(ctx context.Context, ids []string, arc *v2.Arc) (map[string][]*Edge, error) { - // TODO: Support additional Node functionality (properties, pagination, etc). + // TODO: Support pagination. edges := make(map[string][]*Edge) if len(ids) == 0 { return edges, nil } + for _, id := range ids { + edges[id] = []*Edge{} + } - var stmt spanner.Statement + // Validate input. + if arc.Decorator != "" && (arc.SingleProp == "" || arc.SingleProp == WILDCARD || len(arc.BracketProps) > 0) { + return nil, fmt.Errorf("chain expressions are only supported for a single property") + } + params := map[string]interface{}{"ids": ids} + + // Attach property arcs. + filterProps := "" + if arc.SingleProp != "" && arc.SingleProp != WILDCARD { + filterProps = statements.filterProps + params["props"] = []string{arc.SingleProp} + } else if len(arc.BracketProps) > 0 { + filterProps = statements.filterProps + params["props"] = arc.BracketProps + } + + var template string switch arc.Out { case true: - stmt = spanner.Statement{ - SQL: statements.getEdgesBySubjectID, - Params: map[string]interface{}{"ids": ids}, + if arc.Decorator == CHAIN { + template = statements.getChainedEdgesBySubjectID + params["predicate"] = arc.SingleProp + params["result_predicate"] = arc.SingleProp + arc.Decorator + } else { + template = fmt.Sprintf(statements.getEdgesBySubjectID, filterProps) } case false: - stmt = spanner.Statement{ - SQL: statements.getEdgesByObjectID, - Params: map[string]interface{}{"ids": ids}, + if arc.Decorator == CHAIN { + template = statements.getChainedEdgesByObjectID + params["predicate"] = arc.SingleProp + params["result_predicate"] = arc.SingleProp + arc.Decorator + } else { + template = fmt.Sprintf(statements.getEdgesByObjectID, filterProps) } } + // Attach filters. + i := 0 + for prop, val := range arc.Filter { + template += fmt.Sprintf(statements.filterObjects, i) + params["prop"+strconv.Itoa(i)] = prop + params["val"+strconv.Itoa(i)] = val + i += 1 + } + + stmt := spanner.Statement{ + SQL: template, + Params: params, + } + err := sc.queryAndCollect( ctx, stmt, diff --git a/internal/server/v3/node/golden/node_test.go b/internal/server/v3/node/golden/node_test.go index 5a7022980..43954abaa 100644 --- a/internal/server/v3/node/golden/node_test.go +++ b/internal/server/v3/node/golden/node_test.go @@ -46,10 +46,21 @@ func TestV3Node(t *testing.T) { nextToken string goldenFile string }{ + { + "Out properties", + []string{ + "Count_Person_Female", + "foo", + }, + "->", + "", + "out_prop.json", + }, { "All out property-values", []string{ "Count_Person_Female", + "foo", }, "->*", "", diff --git a/internal/server/v3/node/golden/out_prop.json b/internal/server/v3/node/golden/out_prop.json new file mode 100644 index 000000000..c628b0ed6 --- /dev/null +++ b/internal/server/v3/node/golden/out_prop.json @@ -0,0 +1,19 @@ +{ + "data": { + "Count_Person_Female": { + "properties": [ + "constraintProperties", + "extendedName", + "gender", + "localCuratorLevelId", + "measuredProp", + "memberOf", + "name", + "populationType", + "statType", + "typeOf" + ] + }, + "foo": {} + } +} \ No newline at end of file diff --git a/internal/server/v3/node/golden/out_pv_all.json b/internal/server/v3/node/golden/out_pv_all.json index 887c3f04d..e0fa1a8c6 100644 --- a/internal/server/v3/node/golden/out_pv_all.json +++ b/internal/server/v3/node/golden/out_pv_all.json @@ -111,6 +111,7 @@ ] } } - } + }, + "foo": {} } } \ No newline at end of file