Skip to content

Commit

Permalink
Add support for grouping by different properties by label in Gremlin (#…
Browse files Browse the repository at this point in the history
…115)

* Add support for grouping by different properties by label in Gremlin

* Update starter notebooks with details on usage

Co-authored-by: Michael Chin <[email protected]>
  • Loading branch information
michaelnchin and michaelnchin authored Apr 29, 2021
1 parent eef18ff commit a36e698
Show file tree
Hide file tree
Showing 5 changed files with 477 additions and 50 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook.

## Upcoming
- Add support for notebook variables in Sparql/Gremlin magic queries ([Link to PR](https://github.com/aws/graph-notebook/pull/113))
- Add support for grouping by different properties per label in Gremlin ([Link to PR](https://github.com/aws/graph-notebook/pull/115))
- Fix missing Boto3 dependency in setup.py ([Link to PR](https://github.com/aws/graph-notebook/pull/118))


## Release 2.1.1 (April 22, 2021)

Expand Down
2 changes: 1 addition & 1 deletion src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def gremlin(self, line, cell, local_ns: dict = None):
parser.add_argument('query_mode', nargs='?', default='query',
help='query mode (default=query) [query|explain|profile]')
parser.add_argument('-p', '--path-pattern', default='', help='path pattern')
parser.add_argument('-g', '--group-by', default='T.label',
parser.add_argument('-g', '--group-by', type=str, default='T.label',
help='Property used to group nodes (e.g. code, T.region) default is T.label')
parser.add_argument('--store-to', type=str, default='', help='store query result to this variable')
parser.add_argument('--ignore-groups', action='store_true', default=False, help="Ignore all grouping options")
Expand Down
67 changes: 49 additions & 18 deletions src/graph_notebook/network/gremlin/GremlinNetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def parse_pattern_list_str(pattern_str: str) -> list:


def generate_id_from_dict(data: dict) -> str:
# Handle cases where user requests '~label' in valueMap step, since json can't serialize non-string keys
if T.label in data.keys():
data['label'] = data[T.label]
del data[T.label]
data_str = json.dumps(data, default=str)
hashed = hashlib.md5(data_str.encode())
generate_id = hashed.hexdigest()
Expand Down Expand Up @@ -93,8 +97,11 @@ def __init__(self, graph: MultiDiGraph = None, callbacks=None, label_max_length=
if graph is None:
graph = MultiDiGraph()
self.label_max_length = label_max_length
self.group_by_property = group_by_property
self.ignore_groups=ignore_groups
try:
self.group_by_property = json.loads(group_by_property)
except ValueError:
self.group_by_property = group_by_property
self.ignore_groups = ignore_groups
super().__init__(graph, callbacks)

def add_results_with_pattern(self, results, pattern_list: list):
Expand Down Expand Up @@ -271,35 +278,59 @@ def add_vertex(self, v):
if type(v) is Vertex:
node_id = v.id
title = v.label
if self.group_by_property in [T_LABEL, 'label']:
# This sets the group key to the label if either "label" is passed in or
# T.label is set in order to handle the default case of grouping by label
# when no explicit key is specified
group = v.label
elif self.group_by_property == 'id':
group = v.id
else:
group = ''
vertex_dict = v.__dict__
if not isinstance(self.group_by_property, dict): # Handle string format group_by
if self.group_by_property in [T_LABEL, 'label']: # this handles if it's just a string
# This sets the group key to the label if either "label" is passed in or
# T.label is set in order to handle the default case of grouping by label
# when no explicit key is specified
group = v.label
elif self.group_by_property == 'id':
group = v.id
else:
group = ''
else: # handle dict format group_by
try:
if str(v.label) in self.group_by_property:
if self.group_by_property[str(v.label)]['groupby'] in [T_LABEL, 'label']:
group = v.label
else:
group = vertex_dict[self.group_by_property[str(v.label)]['groupby']]
elif str(v.id) in self.group_by_property:
group = vertex_dict[self.group_by_property[str(v.id)]['groupby']]
else:
group = ''
except KeyError:
group = ''

label = title if len(title) <= self.label_max_length else title[:self.label_max_length - 3] + '...'
data = {'label': label, 'title': title, 'group': group, 'properties': {'id': node_id, 'label': title}}
elif type(v) is dict:
properties = {}

title = ''
label = ''
group = ''
# Before looping though properties, we first search for T.label in vertex dict, then set title = T.label
# Otherwise, we will hit KeyError if we don't iterate through T.label first to set the title
# Since it is needed for checking for the vertex label's desired grouping behavior in group_by_property
if T.label in v.keys():
title = str(v[T.label])
label = title if len(title) <= self.label_max_length else title[:self.label_max_length - 3] + '...'
for k in v:
if str(k) == T_LABEL:
title = str(v[k])
label = title if len(title) <= self.label_max_length else title[:self.label_max_length - 3] + '...'
elif str(k) == T_ID:
if str(k) == T_ID:
node_id = str(v[k])
properties[k] = v[k]
if str(k) == self.group_by_property:
if isinstance(self.group_by_property, dict):
try:
if str(k) == self.group_by_property[title]['groupby']:
group = str(v[k])
except KeyError:
continue
elif str(k) == self.group_by_property:
group = str(v[k])

# handle when there is no id in a node. In this case, we will generate one which
# is consistently regenerated so that duplicate dicts will be dedubed to the same vertex.
# is consistently regenerated so that duplicate dicts will be reduced to the same vertex.
if node_id == '':
node_id = f'{generate_id_from_dict(v)}'

Expand Down
Loading

0 comments on commit a36e698

Please sign in to comment.