Merge pull request #2 from PhilipMathieu/graph_metrics

Integrate https://github.com/gavjmooney/graph_metrics/
PhilipMathieu · Apr 1, 2024 · 5011875 · 5011875
2 parents ec574f7 + c6ac35d
commit 5011875
Show file tree

Hide file tree

Showing 27 changed files with 3,042 additions and 358 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+  "cSpell.words": ["neighbourhood"]
+}
diff --git a/README.md b/README.md
@@ -53,6 +53,16 @@ Metric definitions are derived from:
 
 Initial inspiration was taken from [rpgove/greadability.js](https://github.com/rpgove/greadability/).
 
+Code in [`graphreadability/metrics/`](graphreadability/metrics/) is in part derived from code originally published at https://github.com/gavjmooney/graph_metrics/ associated with the following publication:
+```
+@Conference{citekey,
+  author       = "Gavin J. Mooney, Helen C. Purchase, Michael Wybrow, Stephen G. Kobourov",
+  title        = "The Multi-Dimensional Landscape of Graph Drawing Metrics",
+  booktitle    = "2024 IEEE 17th Pacific Visualization Symposium (PacificVis)",
+  year         = "2024",
+}
+```
+
 ## License
 
 All rights reserved for now (likely to be open sourced shortly).
diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,11 @@
+name: gr-min
+channels:
+  - conda-forge
+dependencies:
+  - python=3.7
+  - networkx
+  - numpy
+  - matplotlib
+  - scipy
+  - pandas
+prefix: /home/philip/miniforge/envs/gr-min
diff --git a/graphreadability/__init__.py b/graphreadability/__init__.py
@@ -1,12 +1,9 @@
 # __init__.py for graphreadability module
-
-# Import the extended Graph object and any other utilities you want to expose
-from .core import graph
+from .core.metricssuite import MetricsSuite
 from .core.readabilitygraph import ReadabilityGraph
-from networkx import Graph
 
-# Optionally, set a version number
-# __version__ = '1.0.0'
+from .utils.helpers import *
+from .utils.crosses_promotion import *
 
-# You can also import subpackages if needed
-# from . import utils
+# Import tests
+from .tests import *
diff --git a/graphreadability/core/graph.py b/graphreadability/core/graph.py
diff --git a/graphreadability/core/metricssuite.py b/graphreadability/core/metricssuite.py
@@ -0,0 +1,175 @@
+import math
+import time
+from typing import Optional, Union, Sequence
+from collections import defaultdict
+import networkx as nx
+from ..metrics import metrics
+
+# Get all the functions in the metrics module
+_metric_functions = [func for func in dir(metrics) if callable(getattr(metrics, func)) and not func.startswith("__")]
+
+# Generate the DEFAULT_WEIGHTS dictionary
+DEFAULT_WEIGHTS = {func: 1 for func in _metric_functions}
+
+# Generate the METRICS dictionary
+METRICS = {func: {"func": getattr(metrics, func)} for func in _metric_functions}
+
+class MetricsSuite:
+    """A suite for calculating several metrics for graph drawing aesthetics, as well as methods for combining these into a single cost function.
+    Takes as an argument a path to a GML or GraphML file, or a NetworkX Graph object. Also takes as an argument a dictionary of metric:weight key/values.
+    Note: to prevent unnecessary calculations, omit metrics with weight 0."""
+
+    def __init__(
+        self,
+        graph: Union[nx.Graph, str] = None,
+        metric_weights: Optional[dict] = DEFAULT_WEIGHTS,
+        metric_combination_strategy: str = "weighted_sum",
+        sym_threshold: Union[int, float] = 2,
+        sym_tolerance: Union[int, float] = 3,
+        file_type: str = "GraphML",
+    ):
+        # Dictionary mapping metric combination strategies to their functions
+        self.metric_combination_strategies = {
+            "weighted_sum": self.weighted_sum,
+            "weighted_prod": self.weighted_prod,
+        }
+        # Placeholder for version of graph with crosses promoted to nodes
+        self.graph_cross_promoted = None
+        # Dictionary mapping metric names to their functions, values, and weights
+        self.metrics = METRICS.copy()
+        for k in self.metrics.keys():
+            self.metrics[k].update({"weight":0, "value": None, "is_calculated": False})
+
+        # Check all metrics given are valid and assign weights
+        self.initial_weights = self.set_weights(metric_weights)
+
+        # Check metric combination strategy is valid
+        assert (
+            metric_combination_strategy in self.metric_combination_strategies
+        ), f"Unknown metric combination strategy: {metric_combination_strategy}. Available strategies: {list(self.metric_combination_strategies.keys())}"
+        self.metric_combination_strategy = metric_combination_strategy
+
+        if graph is None:
+            self._filename = ""
+            self._graph = self.load_graph_test()
+        elif isinstance(graph, str):
+            self._filename = graph
+            self._graph = self.load_graph(graph, file_type=file_type)
+        elif isinstance(graph, nx.Graph):
+            self._filename = ""
+            self._graph = graph
+        else:
+            raise TypeError(
+                f"'graph' must be a string representing a path to a GML or GraphML file, or a NetworkX Graph object, not {type(graph)}"
+            )
+
+        if sym_tolerance < 0:
+            raise ValueError(f"sym_tolerance must be positive.")
+
+        self.sym_tolerance = sym_tolerance
+
+        if sym_threshold < 0:
+            raise ValueError(f"sym_threshold must be positive.")
+
+        self.sym_threshold = sym_threshold
+
+    def set_weights(self, metric_weights: Sequence[float]):
+        metrics_to_remove = [metric for metric, weight in metric_weights.items() if weight <= 0]
+
+        if any(metric_weights[metric] < 0 for metric in metric_weights):
+            raise ValueError("Metric weights must be positive.")
+
+        for metric in metrics_to_remove:
+            metric_weights.pop(metric)
+
+        for metric in metric_weights:
+            self.metrics[metric]["weight"] = metric_weights[metric]
+
+        return {metric: weight for metric, weight in metric_weights.items() if weight > 0}
+
+    def weighted_prod(self):
+        """Returns the weighted product of all metrics. Should NOT be used as a cost function - may be useful for comparing graphs."""
+        return math.prod(
+            self.metrics[metric]["value"] * self.metrics[metric]["weight"]
+            for metric in self.initial_weights
+        )
+
+    def weighted_sum(self):
+        """Returns the weighted sum of all metrics. Can be used as a cost function."""
+        total_weight = sum(self.metrics[metric]["weight"] for metric in self.metrics)
+        return (
+            sum(
+                self.metrics[metric]["value"] * self.metrics[metric]["weight"]
+                for metric in self.initial_weights
+            )
+            / total_weight
+        )
+
+    def load_graph_test(self, nxg=nx.sedgewick_maze_graph):
+        """Loads a test graph with a random layout."""
+        G = nxg()
+        pos = nx.random_layout(G)
+        for k, v in pos.items():
+            pos[k] = {"x": v[0], "y": v[1]}
+
+        nx.set_node_attributes(G, pos)
+        return G
+
+    def reset_metrics(self):
+        for metric in self.metrics:
+            self.metrics[metric]["value"] = None
+            self.metrics[metric]["is_calculated"] = False
+
+    def calculate_metric(self, metric: str = None):
+        """Calculate the value of the given metric by calling the associated function."""
+        if metric is None:
+            raise ValueError("No metric provided. Did you mean to call calculate_metrics()?")
+
+        if not self.metrics[metric]["is_calculated"]:
+            self.metrics[metric]["value"] = self.metrics[metric]["func"](self._graph)
+            self.metrics[metric]["is_calculated"] = True
+        else:
+            pass
+            # print(f"Metric {metric} already calculated. Skipping.")
+
+    def calculate_metrics(self):
+        """Calculates the values of all metrics with non-zero weights."""
+        start_time = time.perf_counter()
+        n_metrics = 0
+        for metric in self.metrics:
+            if self.metrics[metric]["weight"] != 0:
+                self.calculate_metric(metric)
+                n_metrics += 1
+        end_time = time.perf_counter()
+        print(f"Calculated {n_metrics} metrics in {end_time - start_time:0.3f} seconds.")
+
+    def combine_metrics(self):
+        """Combine several metrics based on the given multiple criteria decision analysis technique."""
+        # Important to loop over initial weights to avoid checking the weight of all metrics when they are not needed
+        [self.calculate_metric(metric) for metric in self.initial_weights]
+        return self.metric_combination_strategies[self.metric_combination_strategy]()
+
+    def pretty_print_metrics(self):
+        """Prints all metrics and their values in an easily digestible view."""
+        combined = self.combine_metrics()
+        print("-" * 50)
+        print("{:<30s}Value\tWeight".format("Metric"))
+        print("-" * 50)
+        for k, v in self.metrics.items():
+            if v["value"]:
+                val_str = f"{v['value']:.3f}"
+                print(f"{k:<30s}{val_str:<5s}\t{v['weight']}")
+            else:
+                print(f"{k:<30s}{str(v['value']):<5s}\t{v['weight']}")
+        print("-" * 50)
+        print(f"Evaluation using {self.metric_combination_strategy}: {combined:.5f}")
+        print("-" * 50)
+
+    def metric_table(self):
+        """Returns a dictionary of metrics and their values. Designed to work with pandas from_records() method."""
+        combined = self.combine_metrics()
+        metrics = {}
+        for k, v in self.metrics.items():
+            metrics[k] = v["value"]
+        metrics["Combined"] = combined
+        return metrics
diff --git a/graphreadability/core/readabilitygraph.py b/graphreadability/core/readabilitygraph.py
@@ -8,25 +8,7 @@
 
 import networkx as nx
 import numpy as np
-
-
-def calculate_angle_between_vectors(v1, v2):
-    """Calculate the angle between two vectors."""
-    unit_v1 = v1 / np.linalg.norm(v1)
-    unit_v2 = v2 / np.linalg.norm(v2)
-    dot_product = np.dot(unit_v1, unit_v2)
-    angle = np.arccos(np.clip(dot_product, -1.0, 1.0))
-    return np.degrees(angle)
-
-
-def divide_or_zero(a, b):
-    """Divide a by b, or return 0 if b is 0."""
-    return (
-        np.divide(a, b, out=np.zeros_like(a, dtype=float), where=b != 0.0)
-        if b != 0.0
-        else 0.0
-    )
-
+from graphreadability.utils.helpers import divide_or_zero, lines_intersect, calculate_angle_between_vectors
 
 class ReadabilityGraph(nx.Graph):
     def __init__(self, data=None, **attr):
@@ -44,34 +26,8 @@ def edge_vector(self, edge):
         """Calculate the vector of an edge given its nodes' positions."""
         pos1, pos2 = self.nodes[edge[0]]["pos"], self.nodes[edge[1]]["pos"]
         return np.array(pos2) - np.array(pos1)
-
+    
     def calculate_edge_crossings(self):
-        def lines_intersect(line1, line2):
-            """Check if two lines (each defined by two points) intersect."""
-            p1, p2, p3, p4 = line1[0], line1[1], line2[0], line2[1]
-            # Calculate parts of the determinants
-            det1 = (p1[0] - p2[0]) * (p3[1] - p4[1]) - (p1[1] - p2[1]) * (p3[0] - p4[0])
-            det2 = (p1[0] * p2[1] - p1[1] * p2[0]) * (p3[0] - p4[0]) - (
-                p1[0] - p2[0]
-            ) * (p3[0] * p4[1] - p3[1] * p4[0])
-            det3 = (p1[0] * p2[1] - p1[1] * p2[0]) * (p3[1] - p4[1]) - (
-                p1[1] - p2[1]
-            ) * (p3[0] * p4[1] - p3[1] * p4[0])
-            if det1 == 0:
-                return False  # Lines are parallel or coincident
-            x, y = det2 / det1, det3 / det1
-            # Check if intersection point is on both line segments
-            line1_x_range = sorted([p1[0], p2[0]])
-            line1_y_range = sorted([p1[1], p2[1]])
-            line2_x_range = sorted([p3[0], p4[0]])
-            line2_y_range = sorted([p3[1], p4[1]])
-            return (
-                line1_x_range[0] <= x <= line1_x_range[1]
-                and line2_x_range[0] <= x <= line2_x_range[1]
-                and line1_y_range[0] <= y <= line1_y_range[1]
-                and line2_y_range[0] <= y <= line2_y_range[1]
-            )
-
         positions = nx.get_node_attributes(
             self, "pos"
         )  # Assuming 'pos' contains node positions
@@ -170,7 +126,7 @@ def edge_crossings_global(self):
         m = len(self.edges)
         c_all = m * (m - 1) / 2
         degree = np.array([degree[1] for degree in self.degree()])
-        c_impossible = 0.5 * np.dot(degree, degree - 1)
+        c_impossible = np.dot(degree, degree - 1) / 2
         c_max = c_all - c_impossible
 
         return 1 - divide_or_zero(c, c_max)
@@ -255,25 +211,6 @@ def edge_crossing_angles_global(self, ideal_angle=70):
 
         return 1 - divide_or_zero(deviation, deviation_max)
 
-    def angular_resolution_min_node(self):
-        pass
-
-    def angular_resolution_min_global(self):
-        pass
-
-    def angular_resolution_dev_node(self):
-        pass
-
-    def angular_resolution_dev_global(self):
-        pass
-
-    def group_overlap(self):
-        pass
-
-    def visualization_coverage(self):
-        # Implement computation for visualization coverage metric
-        pass
-
     def compute_metrics(self):
         # Return a dictionary of all metrics
         metrics = {

diff --git a/graphreadability/io/cytoscape_api.py b/graphreadability/io/cytoscape_api.py
@@ -0,0 +1 @@
+# Connect to cytoscape