-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpoly_op.py
153 lines (124 loc) · 5.88 KB
/
poly_op.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#/*******************************************************************************************/
#/* This file is part of the training material available at */
#/* https://github.com/jthies/PELS */
#/* You may redistribute it and/or modify it under the terms of the BSD-style licence */
#/* included in this software. */
#/* */
#/* Contact: Jonas Thies ([email protected]) */
#/* */
#/*******************************************************************************************/
import numpy as np
from scipy.sparse import *
from kernels import *
from kernels import have_RACE
import sys
class poly_op:
'''
Given a matrix A and an integer k, this operator constructs the splitting
(cf. Jacobi iteration):
D^{-1/2}AD^{-1/2} = I - (L+U)
The 'apply' function implements a preconditioned matrix-vector product
w = M1 A M2 v
where M1 and M2 approximately solve
(I-L)v=w and (I-U)v=w, respectively,
using the truncated Neumann series:
v_0 = 0
v \approx \sum_{i=0}^k L^{i} w.
For symmetric and positive (spd) matrices A, this preconditioned operator is spd,
which makes it suitable for CG.
If -use_RACE is given on the command-line, and the RACE library is found,
cache blocking is used to increase the performance of the operator. The
cache_size parameter can be used to fine-tune the performance with RACE.
If RACE is not available, it is ignored.
'''
def __init__(self, A, poly_k, use_RACE=False, cache_size=30):
self.k = poly_k
self.shape = A.shape
self.dtype = A.dtype
# store the inverse of the square-root of the diagonal
# s.t. isqD*A*isqD has ones on the diagonal.
self.isqD = spdiags([1.0/np.sqrt(A.diagonal())], [0], m=self.shape[0], n=self.shape[1]).tocsr()
self.A1 = self.isqD*A*self.isqD
self.L = -tril(self.A1,-1).tocsr()
self.U = -triu(self.A1,1).tocsr()
self.mpkHandle = None
self.use_RACE = False
if have_RACE and use_RACE:
self.use_RACE = True
self.permute =None
self.unpermute = None
#if we have RACE, use it
if self.use_RACE:
split=True
highestPower=2*poly_k+1
print("Using RACE for cache blocking: cache_size=", cache_size)
self.mpkHandle=mpk_setup(self.A1, highestPower, cache_size, split)
self.permute=mpk_get_perm(self.mpkHandle, self.shape[0])
self.unpermute = np.arange(self.shape[0])
self.unpermute[self.permute] = np.arange(self.shape[0])
#permuteute all the objects
#A1 is not permuted, because it is not used if RACE is used
#self.L=self.L[self.permute[:,None], self.permute]
self.L = permute_csr(self.L, self.permute)
#self.U=self.U[self.permute[:,None], self.permute]
self.U = permute_csr(self.U, self.permute)
#work-around for diagonal, since it is not subscriptable
#not needed diagonal is one, due to normalization
#A_prec.isqD = spdiags([1.0/np.sqrt(A.diagonal())], [0], m=A.shape[0], n=A.shape[1])
self.t1 = np.zeros(self.shape[0], dtype=self.dtype)
self.t2 = np.zeros(self.shape[0], dtype=self.dtype)
self.t3 = np.zeros(self.shape[0], dtype=self.dtype)
# in case A has CUDA arrays, also copy over our compnents:
self.isqD = to_device(self.isqD)
self.A1 = to_device(self.A1)
self.L = to_device(self.L)
self.U = to_device(self.U)
self.t1 = to_device(self.t1)
self.t2 = to_device(self.t2)
self.t3 = to_device(self.t3)
def prec_rhs(self, b, prec_b):
'''
Given the right-hand side b of a linear system
Ax=b, computes prec_b=M1\b s.t.(this op)(M2x)=M1b solves the
equivalent preconditioned linear system for the preconditioned
solution vector M1x
'''
diag_spmv(self.isqD, b, self.t1)
self._neumann(self.L, self.k, self.t1, prec_b)
def unprec_sol(self, prec_x, x):
'''
Given the right-preconditioned solution vector prec_x = M2^{-1}x,
returns x.
'''
self._neumann(self.U, self.k, prec_x, x)
diag_spmv(self.isqD, x, x)
def apply(self, w, v):
'''
Apply the complete (preconditioned) operator to a vector.
v = M1 A M2 w.
See class description for details.
'''
if self.mpkHandle is None: # RACE not there (i.e., no cache-blocked MPK)
self._neumann(self.U, self.k, w, self.t1)
spmv(self.A1, self.t1, self.t2)
self._neumann(self.L, self.k, self.t2, v)
else: # RACE is there (i.e., cache-blocked MPK)
mpk_neumann_apply(self, w, v)
def __del__(self):
if hasattr(self, 'mpkHandle') and self.mpkHandle is not None:
mpk_free(self.mpkHandle)
# protected
def _neumann(self, M, k, rhs, sol):
'''
Apply truncated Neumann-series preconditioner to rhs, computing sol.
If A = I-M, the Neumann series to approximate v=A^{-1}rhs is defined as
inv(I-M) = sum_{k=0}^{\inf} M^k.
It converges if ||M||<1
(analogous to geometric series for scalar x: 1/(1-x) = sum_{k=0}^\inf x^k)
'''
# This is the naive implementation with 'back-to-back' spmv's.
# Every intermediate vector M^jy is computed explicitly.
axpby(1.0, rhs, 0.0, sol)
for j in range(k):
spmv(M,sol,self.t3)
axpby(1.0,self.t3,1.0,sol)