diff --git a/gunfolds/estimation/linear_model.py b/gunfolds/estimation/linear_model.py index f070eedf..0e7c0e73 100644 --- a/gunfolds/estimation/linear_model.py +++ b/gunfolds/estimation/linear_model.py @@ -5,6 +5,8 @@ import numpy as np from progressbar import ProgressBar, Percentage from scipy import linalg, optimize +import scipy.sparse as sp +from scipy.sparse.linalg import eigs from statsmodels.tsa.api import VAR from sympy.matrices import SparseMatrix @@ -418,11 +420,11 @@ def init(): if distribution == 'flat': x = np.ones(len(edges[0])) elif distribution == 'flatsigned': - x = np.sign(np.randn(len(edges[0])))*np.ones(len(edges[0])) + x = np.sign(np.random.randn(len(edges[0]))) * np.ones(len(edges[0])) elif distribution == 'beta': x = np.random.beta(0.5, 0.5, len(edges[0]))*3-1.5 elif distribution == 'normal': - x = np.randn(len(edges[0])) + x = np.random.randn(len(edges[0])) elif distribution == 'uniform': x = np.sign(np.randn(len(edges[0])))*np.rand(len(edges[0])) else: @@ -1083,4 +1085,92 @@ def randomSVARs(n, repeats=100, rate=2, density=0.1, th=0.09, 'bidirected': B } +def check_matrix_powers(W, powers, threshold): + """ + Check if the powers of a matrix W preserve a threshold for non-zero entries. + + :param W: The input matrix. + :type W: array_like + + :param powers: List of powers to check. + :type powers: iterable + + :param threshold: Threshold for non-zero entries. + :type threshold: float + + :return: True if all powers of the matrix preserve the threshold for non-zero entries, False otherwise. + :rtype: bool + """ + for n in powers: + W_n = np.linalg.matrix_power(W, n) + non_zero_indices = np.nonzero(W_n) + if (np.abs(W_n[non_zero_indices]) < threshold).any(): + return False + return True + + +def create_stable_weighted_matrix( + A, + threshold=0.1, + powers=[1, 2, 3, 4], + max_attempts=1000, + damping_factor=0.99, + random_state=None, +): + """ + Create a stable weighted matrix with a specified spectral radius. + + :param A: The input matrix. + :type A: array_like + + :param threshold: Threshold for non-zero entries preservation. Default is 0.1. + :type threshold: float, optional + + :param powers: List of powers to check in the stability condition. Default is [1, 2, 3, 4]. + :type powers: iterable, optional + + :param max_attempts: Maximum attempts to create a stable matrix. Default is 1000. + :type max_attempts: int, optional + + :param damping_factor: Damping factor for scaling the matrix. Default is 0.99. + :type damping_factor: float, optional + + :param random_state: Random seed for reproducibility. Default is None. + :type random_state: int or None, optional + + :return: A stable weighted matrix. + :rtype: array_like + + :raises ValueError: If unable to create a stable matrix after the maximum attempts. + """ + np.random.seed( + random_state + ) # Set random seed for reproducibility if provided + attempts = 0 + + while attempts < max_attempts: + # Generate a random matrix with the same sparsity pattern as A + random_weights = np.random.randn(*A.shape) + weighted_matrix = A * random_weights + + # Convert to sparse format for efficient eigenvalue computation + weighted_sparse = sp.csr_matrix(weighted_matrix) + + # Compute the largest eigenvalue in magnitude + eigenvalues, _ = eigs(weighted_sparse, k=1, which="LM") + max_eigenvalue = np.abs(eigenvalues[0]) + + # Scale the matrix so that the spectral radius is slightly less than 1 + if max_eigenvalue > 0: + weighted_matrix *= damping_factor / max_eigenvalue + # Check if the powers of the matrix preserve the threshold for non-zero entries of A + if check_matrix_powers(weighted_matrix, powers, threshold): + return weighted_matrix + + attempts += 1 + + raise ValueError( + f"Unable to create a matrix satisfying the condition after {max_attempts} attempts." + ) + # option #3 diff --git a/gunfolds/estimation/modifiedgc.py b/gunfolds/estimation/modifiedgc.py new file mode 100644 index 00000000..38260f6e --- /dev/null +++ b/gunfolds/estimation/modifiedgc.py @@ -0,0 +1,92 @@ +from gunfolds.utils import graphkit as gk +from gunfolds import conversions as conv +from itertools import combinations +import numpy as np +import statsmodels.api as sm +import copy + +# Written by John Cook +# https://github.com/neuroneural/Undersampled_Graph_Estimation/blob/master/dbnestimation/Tools/grangercausality.py +# Modified to return edge weights by Sergey Plis + + +def gc(data, pval=0.05, bidirected=True): + """ + :param data: time series data + :type data: numpy matrix + + :param pval: p-value for statistical significance + :type pval: float + + :param bidirected: flag to indicate bidirectionality + :type bidirected: boolean + + :returns: modified graph and matrices D and B + :rtype: tuple + """ + n = data.shape[0] + # Initialize D and B matrices with zeros + D = np.zeros((n, n)) + B = np.zeros((n, n)) + + data = np.asarray(np.r_[data[:, :-1], data[:, 1:]]) + + def FastPCtest(y, x): + y = y - 1 + x = x - 1 + nodelist = list(range(n)) + nodelist.remove(x) + nodelist.remove(y) + yd = data[n + int(y), :].T + Xalt = data[:n] + Xnull = np.vstack([Xalt, data[n + x, :]]).T + Xalt = Xalt.T + Xnull = sm.add_constant(Xnull) + Xalt = sm.add_constant(Xalt) + estnull = sm.OLS(yd, Xnull).fit() + estalt = sm.OLS(yd, Xalt).fit() + diff = sm.stats.anova_lm(estalt, estnull) + # Return F-statistic and condition with the correct indices + Fval = diff.iloc[1, 4] + return Fval, diff.iloc[1, 5] > pval + + def grangertest(y, x): + y = data[n + int(y) - 1, :].T + Xnull = data[:n, :].T + Xalt = np.vstack([data[: (x - 1), :], data[x:n, :]]).T + Xnull = sm.add_constant(Xnull) + Xalt = sm.add_constant(Xalt) + estnull = sm.OLS(y, Xnull).fit() + estalt = sm.OLS(y, Xalt).fit() + diff = sm.stats.anova_lm(estalt, estnull) + # Return F-statistic and condition with the correct indices + Fval = diff.iloc[1, 4] # F statistic comparing to previous model + return Fval, diff.iloc[1, 5] > pval # PR(>F) is the p-value + + # Assuming gk.superclique and conv.ian2g are defined elsewhere + new_g = gk.superclique(n) + g = conv.ian2g(new_g) + + for i in range(1, n + 1): + for j in range(1, n + 1): + Fval, condition = grangertest(j, i) + if condition: + sett = copy.deepcopy(g[str(i)][str(j)]) + sett.remove((0, 1)) + g[str(i)][str(j)] = sett + else: + D[i - 1, j - 1] = Fval + + biedges = combinations(range(1, n + 1), 2) + for i, j in biedges: + Fval, condition = FastPCtest(j, i) + if bidirected and condition: + sett = copy.deepcopy(g[str(i)][str(j)]) + sett.remove((2, 0)) + g[str(i)][str(j)] = sett + g[str(j)][str(i)] = sett + else: + B[i - 1, j - 1] = Fval + B[j - 1, i - 1] = Fval + + return conv.dict_format_converter(g), D, B diff --git a/gunfolds/scripts/gendata.py b/gunfolds/scripts/gendata.py new file mode 100644 index 00000000..4aac944b --- /dev/null +++ b/gunfolds/scripts/gendata.py @@ -0,0 +1,199 @@ +import numpy as np +import scipy.sparse as sp +from scipy.sparse.linalg import eigs +from gunfolds.utils import graphkit as gk +from gunfolds.conversions import graph2adj +import matplotlib.pyplot as plt +from matplotlib.animation import FuncAnimation +import uuid # Make sure to import the uuid module + + +'''def animate_matrix( + dd, window_size, interval, stride, figsize=(8, 6), aspect_ratio=None +): + fig, ax = plt.subplots(figsize=figsize) + lines = [ + ax.plot([], [], ".-", ms=0.5, lw=0.3)[0] for _ in range(dd.shape[0]) + ] + def init(): + ax.set_xlim(0, window_size) + ax.set_ylim(np.min(dd), np.max(dd)) + if aspect_ratio is not None: + ax.set_aspect(aspect_ratio) + return lines + def update(frame): + start_col = frame * stride + end_col = start_col + window_size + for i, line in enumerate(lines): + if end_col <= dd.shape[1]: + line.set_data(np.arange(window_size), dd[i, start_col:end_col]) + else: + # Avoid going beyond the number of columns in dd + padding = np.empty(end_col - dd.shape[1]) + padding.fill(np.nan) # Fill with NaNs + data = np.hstack((dd[i, start_col:], padding)) + line.set_data(np.arange(window_size), data) + return lines + frames_count = (dd.shape[1] - window_size) // stride + 1 + ani = FuncAnimation( + fig, + update, + frames=range(frames_count), + init_func=init, + blit=True, + interval=interval, + ) + plt.show() + return ani''' + + +def animate_matrix( + dd, + window_size, + interval, + stride, + figsize=(8, 6), + aspect_ratio=None, + save_animation=False, + save_duration=4, +): + fig, ax = plt.subplots(figsize=figsize) + lines = [ + ax.plot([], [], ".-", ms=0.5, lw=0.3)[0] for _ in range(dd.shape[0]) + ] + + def init(): + ax.set_xlim(0, window_size) + ax.set_ylim(np.min(dd), np.max(dd)) + if aspect_ratio is not None: + ax.set_aspect(aspect_ratio) + return lines + + def update(frame): + start_col = frame * stride + end_col = start_col + window_size + for i, line in enumerate(lines): + if end_col <= dd.shape[1]: + line.set_data(np.arange(window_size), dd[i, start_col:end_col]) + else: + # Avoid going beyond the number of columns in dd + padding = np.empty(end_col - dd.shape[1]) + padding.fill(np.nan) # Fill with NaNs + data = np.hstack((dd[i, start_col:], padding)) + line.set_data(np.arange(window_size), data) + return lines + + frames_count = (dd.shape[1] - window_size) // stride + 1 + frames_to_save = int((save_duration * 1000) / interval) + frames_range = range(min(frames_to_save, frames_count)) + + ani = FuncAnimation( + fig, + update, + frames=frames_range, + init_func=init, + blit=True, + interval=interval, + ) + + if save_animation: + # Generate a random filename using uuid + random_filename = f"animation_{uuid.uuid4()}.gif" + # Save the animation as an animated GIF + ani.save(random_filename, writer="imagemagick", fps=1000 / interval) + print(f"Saved animation to {random_filename}") + # Close the figure to prevent it from displaying after saving + plt.close(fig) + else: + # Display the animation if not saving + plt.show() + + return ani + + +def check_matrix_powers(W, A, powers, threshold): + for n in powers: + W_n = np.linalg.matrix_power(W, n) + non_zero_indices = np.nonzero(W_n) + if (np.abs(W_n[non_zero_indices]) < threshold).any(): + return False + return True + + +def create_stable_weighted_matrix( + A, + threshold=0.1, + powers=[1, 2, 3, 4], + max_attempts=1000, + damping_factor=0.99, + random_state=None, +): + np.random.seed( + random_state + ) # Set random seed for reproducibility if provided + attempts = 0 + + while attempts < max_attempts: + # Generate a random matrix with the same sparsity pattern as A + random_weights = np.random.randn(*A.shape) + weighted_matrix = A * random_weights + + # Convert to sparse format for efficient eigenvalue computation + weighted_sparse = sp.csr_matrix(weighted_matrix) + + # Compute the largest eigenvalue in magnitude + eigenvalues, _ = eigs(weighted_sparse, k=1, which="LM") + max_eigenvalue = np.abs(eigenvalues[0]) + + # Scale the matrix so that the spectral radius is slightly less than 1 + if max_eigenvalue > 0: + weighted_matrix *= damping_factor / max_eigenvalue + # Check if the powers of the matrix preserve the threshold for non-zero entries of A + if check_matrix_powers(weighted_matrix, A, powers, threshold): + return weighted_matrix + + attempts += 1 + + raise ValueError( + f"Unable to create a matrix satisfying the condition after {max_attempts} attempts." + ) + + +def drawsamplesLG(A, nstd=0.1, samples=100): + n = A.shape[0] + data = np.zeros([n, samples]) + data[:, 0] = nstd * np.random.randn(A.shape[0]) + for i in range(1, samples): + data[:, i] = A @ data[:, i - 1] + nstd * np.random.randn(A.shape[0]) + return data + + +def genData(A, rate=2, burnin=100, ssize=5000, noise=0.1, dist="normal"): + Agt = A + data = drawsamplesLG(Agt, samples=burnin + (ssize * rate), nstd=noise) + data = data[:, burnin:] + return data[:, ::rate] + + +u_rate = 1 +noise_svar = 0.1 +g = gk.ringmore(8, 2) +A = graph2adj(g) +W = create_stable_weighted_matrix(A, threshold=0.1, powers=[2, 3, 4]) + +dd = genData(W, rate=u_rate, ssize=8000, noise=noise_svar) + +shift = 2.5 +shift_values = shift * np.arange(dd.shape[0])[:, np.newaxis] +ddplot = dd + shift_values + +animation = animate_matrix( + ddplot[:, :2000], + window_size=1000, + interval=1, + stride=3, + figsize=(10, 3), + aspect_ratio="auto", + save_animation=True, # Set to True to save the animation + save_duration=4, # Duration of the saved animation in seconds +) \ No newline at end of file diff --git a/gunfolds/solvers/clingo_rasl.py b/gunfolds/solvers/clingo_rasl.py index aea08526..ddb65814 100644 --- a/gunfolds/solvers/clingo_rasl.py +++ b/gunfolds/solvers/clingo_rasl.py @@ -112,17 +112,24 @@ """ -def weighted_drasl_program(directed, bidirected): +def weighted_drasl_program(directed, bidirected, no_directed, no_bidirected): """ Adjusts the optimization code based on the directed and bidirected priority - :param directed: priority of directed edges in optimization + :param directed: priority of existence of directed edges in optimization :type directed: integer - :param bidirected: priority of bidirected edges in optimization + :param bidirected: priority of existence of bidirected edges in optimization graph :type bidirected: integer + :param no_directed: priority of absence of directed edges in optimization + :type no_directed: integer + + :param no_bidirected: priority of absence of bidirected edges in optimization + graph + :type no_bidirected: integer + :returns: optimization part of the ``clingo`` code :rtype: string """ @@ -136,6 +143,8 @@ def weighted_drasl_program(directed, bidirected): :~ not bidirected(X, Y, L), hbidirected(X, Y, W, K), node(X;Y), u(L, K), X < Y. [W@$bidirected,X,Y] :~ directed(X, Y, L), no_hdirected(X, Y, W, K), node(X;Y), u(L, K). [W@$directed,X,Y] :~ bidirected(X, Y, L), no_hbidirected(X, Y, W, K), node(X;Y), u(L, K), X < Y. [W@$bidirected,X,Y] + :~ not directed(X, Y, L), hdirected(X, Y, W, K), node(X;Y), u(L, K). [W@$no_directed,X,Y] + :~ not bidirected(X, Y, L), hbidirected(X, Y, W, K), node(X;Y), u(L, K), X < Y. [W@$no_bidirected,X,Y] pastfork(X,Y,L) :- directed(Z, X, K), directed(Z, Y, K), node(X;Y;Z), X < Y, K < L-1, uk(K), u(L, _). notequal(L-1,L) :- bidirected(X,Y,L), not pastfork(X,Y,L), node(X;Y), X < Y, u(L, _). @@ -146,7 +155,7 @@ def weighted_drasl_program(directed, bidirected): nonempty(L) :- bidirected(X, Y, L), u(L,_). :- not nonempty(L), u(L,_). """) - return t.substitute(directed=directed, bidirected=bidirected) + return t.substitute(directed=directed, bidirected=bidirected, no_directed=no_directed, no_bidirected=no_bidirected) def rate(u, uname='u'): @@ -253,7 +262,8 @@ def glist2str(g_list, weighted=False, dm=None, bdm=None): return s -def drasl_command(g_list, max_urate=0, weighted=False, scc=False, scc_members=None, dm=None, bdm=None, edge_weights=(1, 1)): +def drasl_command(g_list, max_urate=0, weighted=False, scc=False, scc_members=None, dm=None, bdm=None, + edge_weights=[1, 1, 1, 1, 1], GT_density=None): """ Given a list of graphs generates ``clingo`` codes @@ -272,7 +282,7 @@ def drasl_command(g_list, max_urate=0, weighted=False, scc=False, scc_members=No :param scc: whether to assume that each SCC in the input graph is either a singleton or have ``gcd=1``. If `True` a much more efficient algorithm is employed. - :type scc: (GUESS)boolean + :type scc: boolean :param scc_members: a list of sets for nodes in each SCC :type scc_members: list @@ -285,9 +295,17 @@ def drasl_command(g_list, max_urate=0, weighted=False, scc=False, scc_members=No weights for bidirected edges of each input n-node graph :type bdm: list of numpy arrays - :param edge_weights: a tuple of 2 values, the first is importance of matching - directed weights when solving optimization problem and the second is for bidirected. - :type edge_weights: tuple with 2 elements + :param edge_weights: a list of 5 values, the first is importance of matching the existence of + directed edges, second importance of matching the existence of + bidirected edges, third importance of matching the absence of + directed edges and forth importance of matching the absence of + bidirected edges. Fifth and last value is the importance of matching the expected + density of resulted graph when solving optimization problem. + :type edge_weights: list with 5 elements + + :param GT_density: desired density of the ground truth at causal + time-scale, multiplied by 1000 + :type GT_density: integer :returns: clingo code as a string :rtype: string @@ -296,20 +314,28 @@ def drasl_command(g_list, max_urate=0, weighted=False, scc=False, scc_members=No dm = [nd.astype('int') for nd in dm] if bdm is not None: bdm = [nd.astype('int') for nd in bdm] - assert len({len(g) for g in g_list}) == 1, "Input graphs have variable number of nodes!" - if not max_urate: max_urate = 1+3*len(g_list[0]) n = len(g_list) command = clingo_preamble(g_list[0]) + + if weighted and GT_density is not None: + command += f"#const d = {GT_density}. " + command += 'countedge1(C):- C = #count { edge1(X, Y): edge1(X, Y), node(X), node(Y)}. ' + command += 'countfull(C):- C = n*n. ' + command += 'hypoth_density(D) :- D = 1000*X/Y, countfull(Y), countedge1(X). ' + command += 'abs_diff(Diff) :- hypoth_density(D), Diff = |D - d|. ' + command += f':~ abs_diff(Diff). [Diff@{edge_weights[4]}]\n' + if scc: command += encode_list_sccs(g_list, scc_members) command += f"dagl({len(g_list[0])-1}). " command += glist2str(g_list, weighted=weighted, dm=dm, bdm=bdm) + ' ' # generate all graphs command += 'uk(1..'+str(max_urate)+').' + ' ' command += ' '.join([drate(max_urate, i+1, weighted=weighted) for i in range(n)]) + ' ' - command += weighted_drasl_program(edge_weights[0], edge_weights[1]) if weighted else drasl_program + command += weighted_drasl_program(edge_weights[0], edge_weights[1], edge_weights[2], + edge_weights[3]) if weighted else drasl_program command += f":- M = N, {{u(M, 1..{n}); u(N, 1..{n})}} == 2, u(M, _), u(N, _). " command += "#show edge1/2. " command += "#show u/2." @@ -318,7 +344,7 @@ def drasl_command(g_list, max_urate=0, weighted=False, scc=False, scc_members=No def drasl(glist, capsize=CAPSIZE, timeout=0, urate=0, weighted=False, scc=False, scc_members=None, dm=None, - bdm=None, pnum=PNUM, edge_weights=(1, 1), configuration="crafty", optim='optN'): + bdm=None, pnum=PNUM, GT_density= None, edge_weights=[1, 1, 1, 1, 1], configuration="crafty", optim='optN'): """ Compute all candidate causal time-scale graphs that could have generated all undersampled graphs at all possible undersampling @@ -363,9 +389,17 @@ def drasl(glist, capsize=CAPSIZE, timeout=0, urate=0, weighted=False, scc=False, :param pnum: number of parallel threads to run ``clingo`` on :type pnum: integer - :param edge_weights: a tuple of 2 values, the first is importance - of matching directed weights when solving optimization problem and the second is for bidirected. - :type edge_weights: tuple with 2 elements + :param edge_weights: a list of 5 values, the first is importance of matching the existence of + directed edges, second importance of matching the existence of + bidirected edges, third importance of matching the absence of + directed edges and forth importance of matching the absence of + bidirected edges. Fifth and last value is the importance of matching the expected + density of resulted graph when solving optimization problem. + :type edge_weights: list with 5 elements + + :param GT_density: desired density of the ground truth at causal + time-scale, multiplied by 1000 + :type GT_density: integer :param configuration: Select configuration based on problem type @@ -397,7 +431,8 @@ def drasl(glist, capsize=CAPSIZE, timeout=0, urate=0, weighted=False, scc=False, if not isinstance(glist, list): glist = [glist] return clingo(drasl_command(glist, max_urate=urate, weighted=weighted, - scc=scc, scc_members=scc_members, dm=dm, bdm=bdm, edge_weights=edge_weights), + scc=scc, scc_members=scc_members, dm=dm, + bdm=bdm, edge_weights=edge_weights,GT_density=GT_density), capsize=capsize, convert=drasl_jclingo2g, configuration=configuration, timeout=timeout, exact=not weighted, pnum=pnum, optim=optim) diff --git a/gunfolds/utils/graphkit.py b/gunfolds/utils/graphkit.py index 2c56d204..309af8a2 100644 --- a/gunfolds/utils/graphkit.py +++ b/gunfolds/utils/graphkit.py @@ -427,10 +427,10 @@ def _undirected_OCE(g1, g2): def _normed_undirected_OCE(g1, g2): """ - Return omission and comission errors for undirected edges. + Return omission and commission errors for undirected edges. Omission error is normalized by the number of edges present - in the ground truth. Commision error is normalized by the + in the ground truth. Commission error is normalized by the number of possible edges minus the number of edges present in the ground truth. @@ -775,8 +775,8 @@ def pow_degree_graph(node_num, degree): :type node_num: integer :param degree: degree - :type degree: integer - + :type degree: float + :returns: a graph constructed using the Havel-Hakimi algorithm. :rtype: dictionary(``gunfolds`` graph) """