Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.2.2"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Expand Down
18 changes: 18 additions & 0 deletions src/HMatrices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ using RecipesBase
using Distributed
using Base.Threads
using AbstractTrees
using DataFlowTasks

"""
getblock!(block,K,irange,jrange)
Expand Down Expand Up @@ -65,6 +66,23 @@ include("multiplication.jl")
include("triangular.jl")
include("lu.jl")

# interface of DataFlowTasks
function DataFlowTasks.memory_overlap(H1::HMatrix, H2::HMatrix)
isempty(intersect(rowrange(H1), rowrange(H2))) && (return false)
isempty(intersect(colrange(H1), colrange(H2))) && (return false)
return true
end
function DataFlowTasks.memory_overlap(H1::HMatrix, pairs::Vector{<:NTuple{2,<:HMatrix}})
for (A, B) in pairs
DataFlowTasks.memory_overlap(H1, A) && (return true)
DataFlowTasks.memory_overlap(H1, B) && (return true)
end
return false
end
function DataFlowTasks.memory_overlap(pairs::Vector{<:NTuple{2,<:HMatrix}}, H::HMatrix)
return DataFlowTasks.memory_overlap(H, pairs)
end

export ClusterTree,
CardinalitySplitter,
DyadicSplitter,
Expand Down
6 changes: 4 additions & 2 deletions src/hmatrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ function _getcol!(col, adjH::Adjoint{<:Any,<:HMatrix}, j, piv)
shift = pivot(adjH) .- 1
jl = j - shift[2]
irange = rowrange(adjH) .- (piv[1] - 1)
getcol!(view(col, irange), data(adjH), jl)
H = parent(adjH)
d = data(H)
getcol!(view(col, irange), adjoint(d), jl)
end
for child in children(adjH)
if j ∈ colrange(child)
Expand Down Expand Up @@ -421,7 +423,7 @@ function _assemble_dense_block!(hmat, K)
end

hasdata(adjH::Adjoint{<:Any,<:HMatrix}) = hasdata(adjH.parent)
data(adjH::Adjoint{<:Any,<:HMatrix}) = adjoint(data(adjH.parent))
data(adjH::Adjoint{<:Any,<:HMatrix}) = adjoint(adjH.parent.data)
children(adjH::Adjoint{<:Any,<:HMatrix}) = adjoint(children(adjH.parent))
pivot(adjH::Adjoint{<:Any,<:HMatrix}) = reverse(pivot(adjH.parent))
offset(adjH::Adjoint{<:Any,<:HMatrix}) = pivot(adjH) .- 1
Expand Down
9 changes: 4 additions & 5 deletions src/lu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ function LinearAlgebra.lu!(M::HMatrix, compressor; threads = use_threads())
buffers = [(VectorOfVectors(T), VectorOfVectors(T)) for _ in 1:Threads.nthreads()]
_lu!(M, compressor, threads, buffers)
# wrap the result in the LU structure
return LU(M, LinearAlgebra.BlasInt[], LinearAlgebra.BlasInt(0))
res = @dspawn LU(@R(M), LinearAlgebra.BlasInt[], LinearAlgebra.BlasInt(0)) label = "LU"
return fetch(res)
end

"""
Expand Down Expand Up @@ -59,10 +60,8 @@ LinearAlgebra.lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwa

function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
if isleaf(M)
d = data(M)
@assert d isa Matrix
lu!(d, NOPIVOT())
else
@dspawn lu!(data(@RW(M)), NOPIVOT()) label = "Dense LU"
else # recurse on children
@assert !hasdata(M)
chdM = children(M)
m, n = size(chdM)
Expand Down
150 changes: 119 additions & 31 deletions src/multiplication.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function hmul!(C::T, A::T, B::T, a, b, compressor, bufs_ = nothing) where {T<:HM
b == true || rmul!(C, b)
dict = IdDict{T,Vector{NTuple{2,T}}}()
_plan_dict!(dict, C, A, B)
_hmul!(C, compressor, dict, a, nothing, bufs)
_hmul!(C, compressor, dict, a, bufs)
return C
end

Expand All @@ -42,43 +42,131 @@ function _plan_dict!(dict, C::T, A::T, B::T) where {T<:HMatrix}
return dict
end

function _hmul!(C::HMatrix, compressor, dict, a, R, bufs)
execute_node!(C, compressor, dict, a, R, bufs)
shift = pivot(C) .- 1
# function _hmul!(C::HMatrix, compressor, dict, a, bufs)
# # recursive multiplication function. For the node C, if it is a leaf, you
# # add the contribution of all the pairs in the dictionary and P directly to
# # it. If it is not a leaf, build an approximation of P + all the pairs, and
# # pass it down to the children.
# T = typeof(C)
# S = eltype(C)
# pairs = get(dict, C, Tuple{T,T}[])
# if isleaf(C)
# if isadmissible(C)
# @dspawn begin
# D = data(C)::RkMatrix{S}
# L = MulLinearOp(D, P, pairs, a)
# id = Threads.threadid()
# R = compressor(L, axes(L, 1), axes(L, 2), bufs[id])
# C.data = R
# end
# else
# M = data(C)::Matrix{S}
# for (A, B) in pairs
# _mul_dense!(M, A, B, a)
# end
# isnothing(P) || mul!(M, P.A, adjoint(P.B), true, true)
# end
# else
# if isempty(pairs)
# Pnew = P
# else
# L = MulLinearOp(nothing, P, pairs, a)
# id = Threads.threadid()
# Pnew = compressor(L, axes(L, 1), axes(L, 2), bufs[id])
# end
# shift = pivot(C) .- 1
# for chd in children(C)
# if isnothing(Pnew)
# _hmul!(chd, compressor, dict, a, nothing, bufs)
# else
# irange = rowrange(chd) .- shift[1]
# jrange = colrange(chd) .- shift[2]
# # Rv = hasdata(C) ? view(Rp,irange,jrange) : nothing
# block = RkMatrix(Pnew.A[irange, :], Pnew.B[jrange, :])
# _hmul!(chd, compressor, dict, a, block, bufs)
# end
# end
# end
# return C
# end

function _hmul!(C::T, compressor, dict, a, bufs) where {T<:HMatrix}
pairs = get(dict, C, Tuple{T,T}[])
if isleaf(C) && !isempty(pairs)
if isadmissible(C)
@dspawn begin
@RW C
@R pairs
L = MulLinearOp(data(C), nothing, pairs, a)
id = Threads.threadid()
R = compressor(L, axes(L, 1), axes(L, 2), bufs[id])
setdata!(C, R)
end label = "admissible leaf $(size(C)) ($(length(pairs)))"
else
@dspawn begin
@RW C
@R pairs
d = data(C)
for (A, B) in pairs
_mul_dense!(d, A, B, a)
end
end label = "non-admissible leaf"
end
elseif !isempty(pairs)
# internal node: compute low rank approximation
Rtask = @dspawn begin
@R pairs
L = MulLinearOp(nothing, nothing, pairs, a)
id = Threads.threadid()
compressor(L, axes(L, 1), axes(L, 2), bufs[id])
end label = "internal node"
# move low rank data to leaves
add_to_leaves!(C, Rtask, compressor, bufs)
end
# continue with children of C
for chd in children(C)
irange = rowrange(chd) .- shift[1]
jrange = colrange(chd) .- shift[2]
Rp = data(C)
Rv = hasdata(C) ? RkMatrix(Rp.A[irange, :], Rp.B[jrange, :]) : nothing
# Rv = hasdata(C) ? view(Rp,irange,jrange) : nothing
_hmul!(chd, compressor, dict, a, Rv, bufs)
_hmul!(chd, compressor, dict, a, bufs)
end
isleaf(C) || (setdata!(C, nothing))
return C
end

# non-recursive execution
function execute_node!(C::HMatrix, compressor, dict, a, R, bufs)
T = typeof(C)
S = eltype(C)
pairs = get(dict, C, Tuple{T,T}[])
isnothing(R) && isempty(pairs) && (return C)
if isleaf(C) && !isadmissible(C)
d = data(C)::Matrix{S}
for (A, B) in pairs
_mul_dense!(d, A, B, a)
end
# isnothing(R) || axpy!(true, R, d)
isnothing(R) || mul!(d, R.A, adjoint(R.B), true, true)
"""
add_to_leaves(H::HMatrix,R,compressor)

Add `R` to the leaves of `H`.
"""
function add_to_leaves!(H::HMatrix, Rtask, compressor, bufs)
shift = pivot(H) .- 1
return _add_to_leaves!(H, Rtask, compressor, shift, bufs)
end
function _add_to_leaves!(node, Rtask, compressor, shift, bufs)
S = typeof(node)
T = eltype(node)
if isleaf(node)
@dspawn begin
@RW node
R = fetch(Rtask)::RkMatrix{T}
irange = rowrange(node) .- shift[1]
jrange = colrange(node) .- shift[2]
Rv = RkMatrix(R.A[irange, :], R.B[jrange, :])
# Rv = view(R,irange,jrange)
if isadmissible(node)
Rl = data(node)::RkMatrix{T}
L = MulLinearOp(Rl, Rv, Tuple{S,S}[], true)
id = Threads.threadid()
node.data = compressor(L, axes(L, 1), axes(L, 2), bufs[id])
else
M = data(node)::Matrix{T}
mul!(M, Rv.A, adjoint(Rv.B), true, true)
# axpy!(true, Rv, M)
end
end label = "add to leaves"
else
L = MulLinearOp(data(C), R, pairs, a)
id = Threads.threadid()
R = compressor(L, axes(L, 1), axes(L, 2), bufs[id])
id == Threads.threadid() ||
(@warn "thread id changed from $id to $(Threads.threadid())")
setdata!(C, R)
for child in children(node)
_add_to_leaves!(child, Rtask, compressor, shift, bufs)
end
end
return C
return node
end

"""
Expand Down
18 changes: 13 additions & 5 deletions src/triangular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ function Base.show(io::IO, ::MIME"text/plain", L::HUnitLowerTriangular)
return println(io, "Unit lower triangular part of $H")
end

function LinearAlgebra.ldiv!(L::HUnitLowerTriangular, B::AbstractMatrix)
function LinearAlgebra.ldiv!(L::HUnitLowerTriangular, B::StridedMatrix)
H = parent(L)
if isleaf(H)
d = data(H)
Expand Down Expand Up @@ -51,8 +51,12 @@ function LinearAlgebra.ldiv!(
H = parent(L)
@assert isclean(H)
if isleaf(X)
d = data(X)
ldiv!(L, d)
@dspawn begin
@R H
@RW X
d = data(X)
ldiv!(L, d)
end label = "dense ldiv"
elseif isleaf(H) # X not a leaf, but L is a leaf. This should not happen.
error()
else
Expand Down Expand Up @@ -141,8 +145,12 @@ function LinearAlgebra.rdiv!(
)
H = parent(U)
if isleaf(X)
d = data(X)
rdiv!(d, U) # b <-- b/L
@dspawn begin
@RW X
@R H
d = data(X)
rdiv!(d, U) # b <-- b/L
end label = "dense rdiv"
elseif isleaf(H)
error()
else
Expand Down
36 changes: 18 additions & 18 deletions test/compressor_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ Random.seed!(1)
# test fast update of frobenius norm
m, n = 10000, 1000
r = 10
A = VectorOfVectors(T,m,r)
B = VectorOfVectors(T,n,r)
A.data .= rand(T, m*r)
B.data .= rand(T, n*r)
old_norm = norm(Matrix(A)*adjoint(Matrix(B)), 2)
A = VectorOfVectors(T, m, r)
B = VectorOfVectors(T, n, r)
A.data .= rand(T, m * r)
B.data .= rand(T, n * r)
old_norm = norm(Matrix(A) * adjoint(Matrix(B)), 2)
a = HMatrices.newcol!(A)
a .= rand(T,m)
a .= rand(T, m)
b = HMatrices.newcol!(B)
b .= rand(T,n)
new_norm = norm(Matrix(A)*adjoint(Matrix(B)), 2)
b .= rand(T, n)
new_norm = norm(Matrix(A) * adjoint(Matrix(B)), 2)
@test new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B)

# test simple case where things are not compressible
Expand All @@ -67,9 +67,9 @@ Random.seed!(1)
R = tsvd(M, irange, jrange)
@test rank(R) == r
# test recompression using QR-SVD
R2 = RkMatrix(hcat(R.A,R.A),hcat(R.B,R.B))
R2 = RkMatrix(hcat(R.A, R.A), hcat(R.B, R.B))
@test rank(R2) == 2r
HMatrices.compress!(R2,tsvd)
HMatrices.compress!(R2, tsvd)
@test rank(R2) == r
end
end
Expand Down Expand Up @@ -101,17 +101,17 @@ end

# test fast update of frobenius norm
T = SMatrix{3,3,Float64,9}
A = VectorOfVectors(T,m,r)
B = VectorOfVectors(T,n,r)
A.data .= rand(T, m*r)
B.data .= rand(T, n*r)
R = Matrix(A)*collect(adjoint(Matrix(B)))
A = VectorOfVectors(T, m, r)
B = VectorOfVectors(T, n, r)
A.data .= rand(T, m * r)
B.data .= rand(T, n * r)
R = Matrix(A) * collect(adjoint(Matrix(B)))
old_norm = norm(R, 2)
a = HMatrices.newcol!(A)
a .= rand(T,m)
a .= rand(T, m)
b = HMatrices.newcol!(B)
b .= rand(T,n)
R = Matrix(A)*collect(adjoint(Matrix(B)))
b .= rand(T, n)
R = Matrix(A) * collect(adjoint(Matrix(B)))
new_norm = norm(R, 2)
HMatrices._update_frob_norm(old_norm, A, B)
@test_broken new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B)
Expand Down