From c3477bc475bf7d5a6594864ab53bab1b4d222d2f Mon Sep 17 00:00:00 2001 From: natalie <77713883+n-h-diaz@users.noreply.github.com> Date: Fri, 22 Nov 2024 14:28:59 -0800 Subject: [PATCH] Update V3 node (#1465) Adds support for property filters, edge filters, and chaining to V3 node. Also fixes the property api to support empty responses. I ended up adding 6 SQL statements: - out arc, single hop - out arc, chained hops - in arc, single hop - in arc, chained hop - shared subquery for filtering properties - shared subquery for filtering edges I split up the single vs chained hops because it was taking too long to return the full response for chained hops. (This comes from processing the edge path, which can be very large and costly.) Instead I only return the name and dcid of the leaf node (and not provenance or types). This is consistent with the current v2 node behavior. I set the limit for chained hops to 10, which can take anywhere from a few hundred milliseconds to ~5 seconds for some of the examples I tested. Adding filters will increase the time too. Still TODO: support pagination, probably test this more --- internal/server/spanner/dsutil.go | 28 +- .../query/get_node_edges_by_subject_id.json | 17 +- .../get_node_edges_in_bracket_props.json | 48 +++ .../golden/query/get_node_edges_in_chain.json | 22 ++ .../query/get_node_edges_in_filter.json | 15 + .../query/get_node_edges_in_single_prop.json | 15 + .../query/get_node_edges_malicious.json | 3 + .../get_node_edges_out_bracket_props.json | 24 ++ .../query/get_node_edges_out_chain.json | 49 +++ .../query/get_node_edges_out_filter.json | 15 + .../query/get_node_edges_out_single_prop.json | 13 + .../query/get_node_props_by_object_id.json | 64 ++-- .../query/get_node_props_by_subject_id.json | 129 ++++---- internal/server/spanner/golden/query_test.go | 87 +++++- internal/server/spanner/query.go | 281 +++++++++++++++--- internal/server/v3/node/golden/node_test.go | 11 + internal/server/v3/node/golden/out_prop.json | 19 ++ .../server/v3/node/golden/out_pv_all.json | 3 +- 18 files changed, 684 insertions(+), 159 deletions(-) create mode 100644 internal/server/spanner/golden/query/get_node_edges_in_bracket_props.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_in_chain.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_in_filter.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_in_single_prop.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_malicious.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_out_bracket_props.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_out_chain.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_out_filter.json create mode 100644 internal/server/spanner/golden/query/get_node_edges_out_single_prop.json create mode 100644 internal/server/v3/node/golden/out_prop.json diff --git a/internal/server/spanner/dsutil.go b/internal/server/spanner/dsutil.go index 107fb5936..deaeb44fa 100644 --- a/internal/server/spanner/dsutil.go +++ b/internal/server/spanner/dsutil.go @@ -25,19 +25,28 @@ import ( "google.golang.org/protobuf/proto" ) -// nodePropsToNodeResponse converts a slice of properties to a NodeResponse proto. -func nodePropsToNodeResponse(props []*Property) *v3.NodeResponse { +const ( + // Indicates that all properties should be returned. + WILDCARD = "*" + // Indicates that recursive property paths should be returned. + CHAIN = "+" +) + +// nodePropsToNodeResponse converts a map from subject id to its properties to a NodeResponse proto. +func nodePropsToNodeResponse(propsBySubjectID map[string][]*Property) *v3.NodeResponse { nodeResponse := &v3.NodeResponse{ Data: make(map[string]*v2.LinkedGraph), } - for _, prop := range props { - linkedGraph, ok := nodeResponse.Data[prop.SubjectID] + for subjectID, props := range propsBySubjectID { + linkedGraph, ok := nodeResponse.Data[subjectID] if !ok { linkedGraph = &v2.LinkedGraph{} - nodeResponse.Data[prop.SubjectID] = linkedGraph + nodeResponse.Data[subjectID] = linkedGraph + } + for _, prop := range props { + linkedGraph.Properties = append(linkedGraph.Properties, prop.Predicate) } - linkedGraph.Properties = append(linkedGraph.Properties, prop.Predicate) } return nodeResponse @@ -73,12 +82,9 @@ func nodeEdgesToLinkedGraph(edges []*Edge) *v2.LinkedGraph { node := &pb.EntityInfo{ Name: edge.Name, Types: edge.Types, + Dcid: edge.ObjectID, ProvenanceId: edge.Provenance, - } - if edge.ObjectValue != "" { - node.Value = edge.ObjectValue - } else { - node.Dcid = edge.ObjectID + Value: edge.ObjectValue, } nodes.Nodes = append(nodes.Nodes, node) diff --git a/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json b/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json index 1d1433a6a..4a932e29b 100644 --- a/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json +++ b/internal/server/spanner/golden/query/get_node_edges_by_subject_id.json @@ -36,7 +36,7 @@ { "SubjectID": "Aadhaar", "Predicate": "description", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "Aadhaar is a 12-digit unique identity number that can be obtained voluntarily by all residents of India", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -45,7 +45,7 @@ { "SubjectID": "Aadhaar", "Predicate": "name", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "Aadhaar", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -54,7 +54,7 @@ { "SubjectID": "Aadhaar", "Predicate": "localCuratorLevelId", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "dcid:Aadhaar", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -63,7 +63,7 @@ { "SubjectID": "Aadhaar", "Predicate": "extendedName", - "ObjectID": "Aadhaar", + "ObjectID": "", "ObjectValue": "Aadhaar", "Provenance": "dc/base/BaseSchema", "Name": "", @@ -174,7 +174,7 @@ { "SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential", "Predicate": "extendedName", - "ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential", + "ObjectID": "", "ObjectValue": "Average retail price of electricity, residential, monthly", "Provenance": "dc/base/HumanReadableStatVars", "Name": "", @@ -183,7 +183,7 @@ { "SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential", "Predicate": "name", - "ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential", + "ObjectID": "", "ObjectValue": "Average retail price of electricity, residential, monthly", "Provenance": "dc/base/HumanReadableStatVars", "Name": "", @@ -192,11 +192,12 @@ { "SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential", "Predicate": "localCuratorLevelId", - "ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential", + "ObjectID": "", "ObjectValue": "dcid:Monthly_Average_RetailPrice_Electricity_Residential", "Provenance": "dc/base/HumanReadableStatVars", "Name": "", "Types": [] } - ] + ], + "foo": [] } \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_bracket_props.json b/internal/server/spanner/golden/query/get_node_edges_in_bracket_props.json new file mode 100644 index 000000000..b1118249b --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_bracket_props.json @@ -0,0 +1,48 @@ +{ + "EarthquakeEvent": [ + { + "SubjectID": "EarthquakeEvent", + "Predicate": "naturalHazardType", + "ObjectID": "Annual_ExpectedLoss_NaturalHazardImpact_EarthquakeEvent", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "Annual Expected Loss from Natural Hazard Impact: Earthquake", + "Types": [ + "StatisticalVariable" + ] + }, + { + "SubjectID": "EarthquakeEvent", + "Predicate": "naturalHazardType", + "ObjectID": "FemaNaturalHazardRiskIndex_NaturalHazardImpact_EarthquakeEvent", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "FEMA National Risk Index for Natural Hazard Impact: Earthquake", + "Types": [ + "StatisticalVariable" + ] + }, + { + "SubjectID": "EarthquakeEvent", + "Predicate": "naturalHazardType", + "ObjectID": "NaturalHazardRiskScore_SuperfundSite_EarthquakeEvent", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "Natural Hazard Risk Score of Superfund Site: Earthquake Event", + "Types": [ + "StatisticalVariable" + ] + }, + { + "SubjectID": "EarthquakeEvent", + "Predicate": "domainIncludes", + "ObjectID": "affectedPlace", + "ObjectValue": "", + "Provenance": "dc/base/BaseSchema", + "Name": "affectedPlace", + "Types": [ + "Property" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_chain.json b/internal/server/spanner/golden/query/get_node_edges_in_chain.json new file mode 100644 index 000000000..3c4404076 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_chain.json @@ -0,0 +1,22 @@ +{ + "dc/g/Farm_FarmInventoryStatus": [ + { + "SubjectID": "dc/g/Farm_FarmInventoryStatus", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Farm_FarmInventoryStatus-InventorySold", + "ObjectValue": "", + "Provenance": "", + "Name": "Farm With Farm Inventory Status = Inventory Sold", + "Types": [] + }, + { + "SubjectID": "dc/g/Farm_FarmInventoryStatus", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Farm_FarmInventoryStatus-InventorySold_FarmInventoryType", + "ObjectValue": "", + "Provenance": "", + "Name": "Farm With Farm Inventory Status = Inventory Sold, Farm Inventory Type", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_filter.json b/internal/server/spanner/golden/query/get_node_edges_in_filter.json new file mode 100644 index 000000000..9a7da79f8 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_filter.json @@ -0,0 +1,15 @@ +{ + "Farm": [ + { + "SubjectID": "Farm", + "Predicate": "populationType", + "ObjectID": "Area_Farm_Melon", + "ObjectValue": "", + "Provenance": "dc/base/HumanReadableStatVars", + "Name": "Area of Farm: Melon", + "Types": [ + "StatisticalVariable" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_in_single_prop.json b/internal/server/spanner/golden/query/get_node_edges_in_single_prop.json new file mode 100644 index 000000000..f9742b5ec --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_in_single_prop.json @@ -0,0 +1,15 @@ +{ + "EarthquakeEvent": [ + { + "SubjectID": "EarthquakeEvent", + "Predicate": "domainIncludes", + "ObjectID": "affectedPlace", + "ObjectValue": "", + "Provenance": "dc/base/BaseSchema", + "Name": "affectedPlace", + "Types": [ + "Property" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_malicious.json b/internal/server/spanner/golden/query/get_node_edges_malicious.json new file mode 100644 index 000000000..55e801876 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_malicious.json @@ -0,0 +1,3 @@ +{ + "foo OR 1=1;": [] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_bracket_props.json b/internal/server/spanner/golden/query/get_node_edges_out_bracket_props.json new file mode 100644 index 000000000..53a645a2a --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_bracket_props.json @@ -0,0 +1,24 @@ +{ + "Person": [ + { + "SubjectID": "Person", + "Predicate": "subClassOf", + "ObjectID": "Thing", + "ObjectValue": "", + "Provenance": "dc/base/BaseSchema", + "Name": "Thing", + "Types": [ + "Class" + ] + }, + { + "SubjectID": "Person", + "Predicate": "source", + "ObjectID": "", + "ObjectValue": "https://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources", + "Provenance": "dc/base/BaseSchema", + "Name": "", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_chain.json b/internal/server/spanner/golden/query/get_node_edges_out_chain.json new file mode 100644 index 000000000..fef4624d0 --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_chain.json @@ -0,0 +1,49 @@ +{ + "dc/g/Person_Gender": [ + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Demographics", + "ObjectValue": "", + "Provenance": "", + "Name": "Demographics", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Economy", + "ObjectValue": "", + "Provenance": "", + "Name": "Economy", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Employment", + "ObjectValue": "", + "Provenance": "", + "Name": "Employment and Business", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Health", + "ObjectValue": "", + "Provenance": "", + "Name": "Health", + "Types": [] + }, + { + "SubjectID": "dc/g/Person_Gender", + "Predicate": "specializationOf+", + "ObjectID": "dc/g/Root", + "ObjectValue": "", + "Provenance": "", + "Name": "Data Commons Variables", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_filter.json b/internal/server/spanner/golden/query/get_node_edges_out_filter.json new file mode 100644 index 000000000..243223c1e --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_filter.json @@ -0,0 +1,15 @@ +{ + "nuts/UKI1": [ + { + "SubjectID": "nuts/UKI1", + "Predicate": "typeOf", + "ObjectID": "AdministrativeArea2", + "ObjectValue": "", + "Provenance": "dc/base/EuroGeos", + "Name": "AdministrativeArea2", + "Types": [ + "Class" + ] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_edges_out_single_prop.json b/internal/server/spanner/golden/query/get_node_edges_out_single_prop.json new file mode 100644 index 000000000..0963f3fec --- /dev/null +++ b/internal/server/spanner/golden/query/get_node_edges_out_single_prop.json @@ -0,0 +1,13 @@ +{ + "Person": [ + { + "SubjectID": "Person", + "Predicate": "extendedName", + "ObjectID": "", + "ObjectValue": "Person", + "Provenance": "dc/base/BaseSchema", + "Name": "", + "Types": [] + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_props_by_object_id.json b/internal/server/spanner/golden/query/get_node_props_by_object_id.json index 3705af490..32fffeff0 100644 --- a/internal/server/spanner/golden/query/get_node_props_by_object_id.json +++ b/internal/server/spanner/golden/query/get_node_props_by_object_id.json @@ -1,30 +1,34 @@ -[ - { - "SubjectID": "Count_Person", - "Predicate": "measurementDenominator" - }, - { - "SubjectID": "Person", - "Predicate": "domainIncludes" - }, - { - "SubjectID": "Person", - "Predicate": "populationType" - }, - { - "SubjectID": "Person", - "Predicate": "rangeIncludes" - }, - { - "SubjectID": "Person", - "Predicate": "subClassOf" - }, - { - "SubjectID": "Person", - "Predicate": "unitOfMeasure" - }, - { - "SubjectID": "Person", - "Predicate": "victimType" - } -] \ No newline at end of file +{ + "Count_Person": [ + { + "SubjectID": "Count_Person", + "Predicate": "measurementDenominator" + } + ], + "Person": [ + { + "SubjectID": "Person", + "Predicate": "domainIncludes" + }, + { + "SubjectID": "Person", + "Predicate": "populationType" + }, + { + "SubjectID": "Person", + "Predicate": "rangeIncludes" + }, + { + "SubjectID": "Person", + "Predicate": "subClassOf" + }, + { + "SubjectID": "Person", + "Predicate": "unitOfMeasure" + }, + { + "SubjectID": "Person", + "Predicate": "victimType" + } + ] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query/get_node_props_by_subject_id.json b/internal/server/spanner/golden/query/get_node_props_by_subject_id.json index 5e42a6133..89d485420 100644 --- a/internal/server/spanner/golden/query/get_node_props_by_subject_id.json +++ b/internal/server/spanner/golden/query/get_node_props_by_subject_id.json @@ -1,62 +1,67 @@ -[ - { - "SubjectID": "Count_Person", - "Predicate": "extendedName" - }, - { - "SubjectID": "Count_Person", - "Predicate": "localCuratorLevelId" - }, - { - "SubjectID": "Count_Person", - "Predicate": "measuredProp" - }, - { - "SubjectID": "Count_Person", - "Predicate": "memberOf" - }, - { - "SubjectID": "Count_Person", - "Predicate": "name" - }, - { - "SubjectID": "Count_Person", - "Predicate": "populationType" - }, - { - "SubjectID": "Count_Person", - "Predicate": "statType" - }, - { - "SubjectID": "Count_Person", - "Predicate": "typeOf" - }, - { - "SubjectID": "Person", - "Predicate": "equivalentClass" - }, - { - "SubjectID": "Person", - "Predicate": "extendedName" - }, - { - "SubjectID": "Person", - "Predicate": "localCuratorLevelId" - }, - { - "SubjectID": "Person", - "Predicate": "name" - }, - { - "SubjectID": "Person", - "Predicate": "source" - }, - { - "SubjectID": "Person", - "Predicate": "subClassOf" - }, - { - "SubjectID": "Person", - "Predicate": "typeOf" - } -] \ No newline at end of file +{ + "Count_Person": [ + { + "SubjectID": "Count_Person", + "Predicate": "extendedName" + }, + { + "SubjectID": "Count_Person", + "Predicate": "localCuratorLevelId" + }, + { + "SubjectID": "Count_Person", + "Predicate": "measuredProp" + }, + { + "SubjectID": "Count_Person", + "Predicate": "memberOf" + }, + { + "SubjectID": "Count_Person", + "Predicate": "name" + }, + { + "SubjectID": "Count_Person", + "Predicate": "populationType" + }, + { + "SubjectID": "Count_Person", + "Predicate": "statType" + }, + { + "SubjectID": "Count_Person", + "Predicate": "typeOf" + } + ], + "Person": [ + { + "SubjectID": "Person", + "Predicate": "equivalentClass" + }, + { + "SubjectID": "Person", + "Predicate": "extendedName" + }, + { + "SubjectID": "Person", + "Predicate": "localCuratorLevelId" + }, + { + "SubjectID": "Person", + "Predicate": "name" + }, + { + "SubjectID": "Person", + "Predicate": "source" + }, + { + "SubjectID": "Person", + "Predicate": "subClassOf" + }, + { + "SubjectID": "Person", + "Predicate": "typeOf" + } + ], + "foo": [] +} \ No newline at end of file diff --git a/internal/server/spanner/golden/query_test.go b/internal/server/spanner/golden/query_test.go index 81e7ce728..a5918cca5 100644 --- a/internal/server/spanner/golden/query_test.go +++ b/internal/server/spanner/golden/query_test.go @@ -42,7 +42,7 @@ func TestGetNodeProps(t *testing.T) { goldenFile string }{ { - ids: []string{"Count_Person", "Person"}, + ids: []string{"Count_Person", "Person", "foo"}, out: true, goldenFile: "get_node_props_by_subject_id.json", }, @@ -98,7 +98,7 @@ func TestGetNodeEdgesByID(t *testing.T) { goldenFile string }{ { - ids: []string{"Aadhaar", "Monthly_Average_RetailPrice_Electricity_Residential"}, + ids: []string{"Aadhaar", "Monthly_Average_RetailPrice_Electricity_Residential", "foo"}, arc: &v2.Arc{ Out: true, SingleProp: "*", @@ -113,6 +113,89 @@ func TestGetNodeEdgesByID(t *testing.T) { }, goldenFile: "get_node_edges_by_object_id.json", }, + { + ids: []string{"Person"}, + arc: &v2.Arc{ + Out: true, + SingleProp: "extendedName", + }, + goldenFile: "get_node_edges_out_single_prop.json", + }, + { + ids: []string{"Person"}, + arc: &v2.Arc{ + Out: true, + BracketProps: []string{"source", "subClassOf"}, + }, + goldenFile: "get_node_edges_out_bracket_props.json", + }, + { + ids: []string{"nuts/UKI1"}, + arc: &v2.Arc{ + Out: true, + Filter: map[string][]string{ + "subClassOf": {"AdministrativeArea"}, + "extendedName": {"AdministrativeArea2"}, + }, + }, + goldenFile: "get_node_edges_out_filter.json", + }, + { + ids: []string{"dc/g/Person_Gender"}, + arc: &v2.Arc{ + Out: true, + SingleProp: "specializationOf", + Decorator: "+", + }, + goldenFile: "get_node_edges_out_chain.json", + }, + { + ids: []string{"EarthquakeEvent"}, + arc: &v2.Arc{ + Out: false, + SingleProp: "domainIncludes", + }, + goldenFile: "get_node_edges_in_single_prop.json", + }, + { + ids: []string{"EarthquakeEvent"}, + arc: &v2.Arc{ + Out: false, + BracketProps: []string{"domainIncludes", "naturalHazardType"}, + }, + goldenFile: "get_node_edges_in_bracket_props.json", + }, + { + ids: []string{"Farm"}, + arc: &v2.Arc{ + Out: false, + Filter: map[string][]string{ + "farmInventoryType": {"Melon"}, + "extendedName": {"Area of Farm: Melon"}, + }, + }, + goldenFile: "get_node_edges_in_filter.json", + }, + { + ids: []string{"dc/g/Farm_FarmInventoryStatus"}, + arc: &v2.Arc{ + Out: false, + SingleProp: "specializationOf", + Decorator: "+", + }, + goldenFile: "get_node_edges_in_chain.json", + }, + { + ids: []string{"foo OR 1=1;"}, + arc: &v2.Arc{ + Out: false, + SingleProp: "foo OR 1=1;", + Filter: map[string][]string{ + "foo OR 1=1;": {"foo OR 1=1;"}, + }, + }, + goldenFile: "get_node_edges_malicious.json", + }, } { actual, err := client.GetNodeEdgesByID(ctx, c.ids, c.arc) if err != nil { diff --git a/internal/server/spanner/query.go b/internal/server/spanner/query.go index 85904331d..8a233659c 100644 --- a/internal/server/spanner/query.go +++ b/internal/server/spanner/query.go @@ -18,18 +18,37 @@ package spanner import ( "context" "fmt" + "strconv" "cloud.google.com/go/spanner" v2 "github.com/datacommonsorg/mixer/internal/server/v2" "google.golang.org/api/iterator" ) +const ( + // Maximum number of edge hops to traverse for chained properties. + MAX_HOPS = 10 +) + // SQL / GQL statements executed by the SpannerClient var statements = struct { - getPropsBySubjectID string - getPropsByObjectID string - getEdgesBySubjectID string - getEdgesByObjectID string + // Fetch Properties for out arcs + getPropsBySubjectID string + // Fetch Properties for in arcs + getPropsByObjectID string + // Fetch Edges for out arcs with a single hop + getEdgesBySubjectID string + // Fetch Edges for out arcs with chaining + getChainedEdgesBySubjectID string + // Fetch Edges for in arcs with a single hop + getEdgesByObjectID string + // Fetch Edges for in arcs with chaining + getChainedEdgesByObjectID string + // Subquery to filter edges by predicate + filterProps string + // Subquery to filter edges by object property-values + filterObjects string + // Fetch Observations for variable+entity. getObsByVariableAndEntity string }{ getPropsBySubjectID: ` @@ -53,38 +72,167 @@ var statements = struct { `, getEdgesBySubjectID: ` SELECT - edge.subject_id, - edge.predicate, - COALESCE(edge.object_id, '') AS object_id, - COALESCE(edge.object_value, '') AS object_value, - COALESCE(edge.provenance, '') AS provenance, - COALESCE(object.name, '') AS name, - COALESCE(object.types, []) AS types + result.subject_id, + result.predicate, + COALESCE(result.object_id, '') AS object_id, + COALESCE(result.object_value, '') AS object_value, + COALESCE(result.provenance, '') AS provenance, + COALESCE(result.name, '') AS name, + COALESCE(result.types, []) AS types + FROM ( + SELECT + * + FROM + GRAPH_TABLE ( + DCGRAPH MATCH -[e:Edge + WHERE + e.subject_id IN UNNEST(@ids) + AND e.object_value IS NULL + AND e.subject_id != e.object_id%[1]s]->(n:Node) + RETURN e.subject_id, + e.predicate, + e.object_id, + '' as object_value, + e.provenance, + n.name, + n.types + ) + UNION ALL + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.subject_id IN UNNEST(@ids) + AND e.object_value IS NOT NULL%[1]s]-> + RETURN e.subject_id, + e.predicate, + '' as object_id, + e.object_value, + e.provenance, + '' AS name, + ARRAY[] AS types + ) + )result + `, + getChainedEdgesBySubjectID: fmt.Sprintf(` + SELECT + result.subject_id, + @result_predicate AS predicate, + COALESCE(result.object_id, '') AS object_id, + COALESCE(result.object_value, '') AS object_value, + '' AS provenance, + COALESCE(result.name, '') AS name, + ARRAY[] AS types + FROM ( + SELECT + * + FROM + GRAPH_TABLE ( + DCGRAPH MATCH (m:Node + WHERE + m.subject_id IN UNNEST(@ids))-[e:Edge + WHERE + e.predicate = @predicate]->{1,%d}(n:Node) + WHERE + m != n + RETURN DISTINCT m.subject_id, + n.subject_id as object_id, + '' as object_value, + n.name + ) + UNION ALL + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.subject_id IN UNNEST(@ids) + AND e.object_value IS NOT NULL + AND e.predicate = @predicate]-> + RETURN e.subject_id, + '' AS object_id, + e.object_value, + '' AS name + ) + )result + `, MAX_HOPS), + getEdgesByObjectID: ` + SELECT + result.subject_id, + result.predicate, + result.object_id, + '' AS object_value, + COALESCE(result.provenance, '') AS provenance, + COALESCE(result.name, '') AS name, + COALESCE(result.types, []) AS types, + FROM + GRAPH_TABLE ( + DCGraph MATCH <-[e:Edge + WHERE + e.object_id IN UNNEST(@ids) + AND e.subject_id != e.object_id%s]-(n:Node) + RETURN e.object_id AS subject_id, + e.predicate, + e.subject_id AS object_id, + e.provenance, + n.name, + n.types + )result + `, + getChainedEdgesByObjectID: fmt.Sprintf(` + SELECT + result.subject_id, + @result_predicate AS predicate, + result.object_id, + '' AS object_value, + '' AS provenance, + COALESCE(result.name, '') AS name, + ARRAY[] AS types FROM - Edge edge - LEFT JOIN - GRAPH_TABLE( DCGraph MATCH -[e:Edge + GRAPH_TABLE ( + DCGraph MATCH (m:Node + WHERE m.subject_id IN UNNEST(@ids))<-[e:Edge WHERE - e.subject_id IN UNNEST(@ids) - AND e.object_value IS NULL]->(n:Node) RETURN n.subject_id, - n.name, - n.types) object - ON - edge.object_id = object.subject_id - WHERE - edge.subject_id IN UNNEST(@ids) + e.predicate = @predicate]-{1,%d}(n:Node) + WHERE + m!= n + RETURN DISTINCT m.subject_id, + n.subject_id AS object_id, + n.name + )result + `, MAX_HOPS), + filterProps: ` + AND e.predicate IN UNNEST(@props) `, - getEdgesByObjectID: ` - GRAPH DCGraph MATCH (n:Node)-[e:Edge - WHERE - e.object_id IN UNNEST(@ids) - AND e.subject_id != e.object_id]-> return e.object_id AS subject_id, - e.predicate, - n.subject_id AS object_id, - '' as object_value, - COALESCE(e.provenance, '') AS provenance, - COALESCE(n.name, '') AS name, - COALESCE(n.types, []) AS types + filterObjects: ` + INNER JOIN ( + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.predicate = @prop%[1]d + AND e.object_id IN UNNEST(@val%[1]d)]-> + RETURN e.subject_id + ) + UNION DISTINCT + SELECT + * + FROM + GRAPH_TABLE ( + DCGraph MATCH -[e:Edge + WHERE + e.predicate = @prop%[1]d + AND e.object_value IN UNNEST(@val%[1]d)]-> + RETURN e.subject_id + ) + )filter%[1]d + ON + result.object_id = filter%[1]d.subject_id `, getObsByVariableAndEntity: ` SELECT @@ -105,12 +253,15 @@ var statements = struct { `, } -// GetNodeProps retrieves node properties from Spanner given a list of IDs and a direction. -func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out bool) ([]*Property, error) { - props := []*Property{} +// GetNodeProps retrieves node properties from Spanner given a list of IDs and a direction and returns a map. +func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out bool) (map[string][]*Property, error) { + props := map[string][]*Property{} if len(ids) == 0 { return props, nil } + for _, id := range ids { + props[id] = []*Property{} + } var stmt spanner.Statement @@ -135,7 +286,8 @@ func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out boo }, func(rowStruct interface{}) { prop := rowStruct.(*Property) - props = append(props, prop) + subjectID := prop.SubjectID + props[subjectID] = append(props[subjectID], prop) }, ) if err != nil { @@ -147,27 +299,66 @@ func (sc *SpannerClient) GetNodeProps(ctx context.Context, ids []string, out boo // GetNodeEdgesByID retrieves node edges from Spanner given a list of IDs and a property Arc and returns a map. func (sc *SpannerClient) GetNodeEdgesByID(ctx context.Context, ids []string, arc *v2.Arc) (map[string][]*Edge, error) { - // TODO: Support additional Node functionality (properties, pagination, etc). + // TODO: Support pagination. edges := make(map[string][]*Edge) if len(ids) == 0 { return edges, nil } + for _, id := range ids { + edges[id] = []*Edge{} + } - var stmt spanner.Statement + // Validate input. + if arc.Decorator != "" && (arc.SingleProp == "" || arc.SingleProp == WILDCARD || len(arc.BracketProps) > 0) { + return nil, fmt.Errorf("chain expressions are only supported for a single property") + } + params := map[string]interface{}{"ids": ids} + + // Attach property arcs. + filterProps := "" + if arc.SingleProp != "" && arc.SingleProp != WILDCARD { + filterProps = statements.filterProps + params["props"] = []string{arc.SingleProp} + } else if len(arc.BracketProps) > 0 { + filterProps = statements.filterProps + params["props"] = arc.BracketProps + } + + var template string switch arc.Out { case true: - stmt = spanner.Statement{ - SQL: statements.getEdgesBySubjectID, - Params: map[string]interface{}{"ids": ids}, + if arc.Decorator == CHAIN { + template = statements.getChainedEdgesBySubjectID + params["predicate"] = arc.SingleProp + params["result_predicate"] = arc.SingleProp + arc.Decorator + } else { + template = fmt.Sprintf(statements.getEdgesBySubjectID, filterProps) } case false: - stmt = spanner.Statement{ - SQL: statements.getEdgesByObjectID, - Params: map[string]interface{}{"ids": ids}, + if arc.Decorator == CHAIN { + template = statements.getChainedEdgesByObjectID + params["predicate"] = arc.SingleProp + params["result_predicate"] = arc.SingleProp + arc.Decorator + } else { + template = fmt.Sprintf(statements.getEdgesByObjectID, filterProps) } } + // Attach filters. + i := 0 + for prop, val := range arc.Filter { + template += fmt.Sprintf(statements.filterObjects, i) + params["prop"+strconv.Itoa(i)] = prop + params["val"+strconv.Itoa(i)] = val + i += 1 + } + + stmt := spanner.Statement{ + SQL: template, + Params: params, + } + err := sc.queryAndCollect( ctx, stmt, diff --git a/internal/server/v3/node/golden/node_test.go b/internal/server/v3/node/golden/node_test.go index 5a7022980..43954abaa 100644 --- a/internal/server/v3/node/golden/node_test.go +++ b/internal/server/v3/node/golden/node_test.go @@ -46,10 +46,21 @@ func TestV3Node(t *testing.T) { nextToken string goldenFile string }{ + { + "Out properties", + []string{ + "Count_Person_Female", + "foo", + }, + "->", + "", + "out_prop.json", + }, { "All out property-values", []string{ "Count_Person_Female", + "foo", }, "->*", "", diff --git a/internal/server/v3/node/golden/out_prop.json b/internal/server/v3/node/golden/out_prop.json new file mode 100644 index 000000000..c628b0ed6 --- /dev/null +++ b/internal/server/v3/node/golden/out_prop.json @@ -0,0 +1,19 @@ +{ + "data": { + "Count_Person_Female": { + "properties": [ + "constraintProperties", + "extendedName", + "gender", + "localCuratorLevelId", + "measuredProp", + "memberOf", + "name", + "populationType", + "statType", + "typeOf" + ] + }, + "foo": {} + } +} \ No newline at end of file diff --git a/internal/server/v3/node/golden/out_pv_all.json b/internal/server/v3/node/golden/out_pv_all.json index 887c3f04d..e0fa1a8c6 100644 --- a/internal/server/v3/node/golden/out_pv_all.json +++ b/internal/server/v3/node/golden/out_pv_all.json @@ -111,6 +111,7 @@ ] } } - } + }, + "foo": {} } } \ No newline at end of file