From 26b011a524f81043f0b51f94b0a06baaa6d777fe Mon Sep 17 00:00:00 2001 From: Desirree Adegunle <87389186+dess890@users.noreply.github.com> Date: Wed, 27 Dec 2023 15:59:02 -0500 Subject: [PATCH] fix(readme): added chain + fixed hop malformat --- docs/source/graphistry.compute.rst | 38 ++++++++++++++++++++++++++++++ graphistry/compute/hop.py | 28 +++++++++++----------- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/docs/source/graphistry.compute.rst b/docs/source/graphistry.compute.rst index 87811ed705..7b80c01772 100644 --- a/docs/source/graphistry.compute.rst +++ b/docs/source/graphistry.compute.rst @@ -18,6 +18,44 @@ ComputeMixin module Chain --------------- + +The `Chain` module in Graphistry allows users to run Cypher-style graph queries directly on dataframes. This feature can be used without accessing a database or Java, and it supports optional GPU acceleration for enhanced performance. + +Example Usage: + +.. code-block:: python + + from graphistry import n, e_undirected, is_in + + # Define a graph query chain + g2 = g1.chain([ + n({'user': 'Biden'}), + e_undirected(), + n(name='bridge'), + e_undirected(), + n({'user': is_in(['Trump', 'Obama'])}) + ]) + + # Display the result + print('# bridges', len(g2._nodes[g2._nodes.bridge])) + g2.plot() + +This example demonstrates a graph query that identifies connections between specific users and nodes labeled as 'bridge'. The `chain` function is used to define a series of node and edge patterns that the graph must match. + +To enable GPU acceleration for faster processing: + +.. code-block:: python + + # Switch to RAPIDS GPU dataframes for performance + import cudf + g2 = g1.edges(lambda g: cudf.DataFrame(g._edges)) + + # Utilize the chain function with GPU acceleration + g3 = g2.chain([n(), e(hops=3), n()]) + g3.plot() + +In this example, the `chain` function is used with GPU-accelerated dataframes, demonstrating how Graphistry can efficiently process large-scale graph data. + .. automodule:: graphistry.compute.chain :members: :undoc-members: diff --git a/graphistry/compute/hop.py b/graphistry/compute/hop.py index 7d8425a690..9a750988e8 100644 --- a/graphistry/compute/hop.py +++ b/graphistry/compute/hop.py @@ -40,20 +40,20 @@ def hop(self: Plottable, See chain() examples for examples of many of the parameters - g: Plotter - nodes: dataframe with id column matching g._node. None signifies all nodes (default). - hops: consider paths of length 1 to 'hops' steps, if any (default 1). - to_fixed_point: keep hopping until no new nodes are found (ignores hops) - direction: 'forward', 'reverse', 'undirected' - edge_match: dict of kv-pairs to exact match (see also: filter_edges_by_dict) - source_node_match: dict of kv-pairs to match nodes before hopping (including intermediate) - destination_node_match: dict of kv-pairs to match nodes after hopping (including intermediate) - source_node_query: dataframe query to match nodes before hopping (including intermediate) - destination_node_query: dataframe query to match nodes after hopping (including intermediate) - edge_query: dataframe query to match edges before hopping (including intermediate) - return_as_wave_front: Only return the nodes/edges reached, ignoring past ones (primarily for internal use) - target_wave_front: Only consider these nodes for reachability, and for intermediate hops, also consider nodes (primarily for internal use by reverse pass) - engine: 'auto', 'pandas', 'cudf' (GPU) + :param g: Plotter object. + :param nodes: Dataframe with 'id' column matching g._node. 'None' signifies all nodes (default). + :param hops: Consider paths of length 1 to 'hops' steps, if any (default 1). + :param to_fixed_point: Keep hopping until no new nodes are found (ignores 'hops'). + :param direction: 'forward', 'reverse', 'undirected'. + :param edge_match: Dict of key-value pairs to exact match (see also: filter_edges_by_dict). + :param source_node_match: Dict of key-value pairs to match nodes before hopping (including intermediate). + :param destination_node_match: Dict of key-value pairs to match nodes after hopping (including intermediate). + :param source_node_query: Dataframe query to match nodes before hopping (including intermediate). + :param destination_node_query: Dataframe query to match nodes after hopping (including intermediate). + :param edge_query: Dataframe query to match edges before hopping (including intermediate). + :param return_as_wave_front: Only return the nodes/edges reached, ignoring past ones (primarily for internal use). + :param target_wave_front: Only consider these nodes for reachability, and for intermediate hops, also consider nodes (primarily for internal use by reverse pass). + :param engine: 'auto', 'pandas', 'cudf' (GPU). """ """