diff --git a/Project.toml b/Project.toml index 819831d..ce6eb39 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.2.2" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/src/HMatrices.jl b/src/HMatrices.jl index 72dec53..f4b338b 100644 --- a/src/HMatrices.jl +++ b/src/HMatrices.jl @@ -10,6 +10,7 @@ using RecipesBase using Distributed using Base.Threads using AbstractTrees +using DataFlowTasks """ getblock!(block,K,irange,jrange) @@ -65,6 +66,23 @@ include("multiplication.jl") include("triangular.jl") include("lu.jl") +# interface of DataFlowTasks +function DataFlowTasks.memory_overlap(H1::HMatrix, H2::HMatrix) + isempty(intersect(rowrange(H1), rowrange(H2))) && (return false) + isempty(intersect(colrange(H1), colrange(H2))) && (return false) + return true +end +function DataFlowTasks.memory_overlap(H1::HMatrix, pairs::Vector{<:NTuple{2,<:HMatrix}}) + for (A, B) in pairs + DataFlowTasks.memory_overlap(H1, A) && (return true) + DataFlowTasks.memory_overlap(H1, B) && (return true) + end + return false +end +function DataFlowTasks.memory_overlap(pairs::Vector{<:NTuple{2,<:HMatrix}}, H::HMatrix) + return DataFlowTasks.memory_overlap(H, pairs) +end + export ClusterTree, CardinalitySplitter, DyadicSplitter, diff --git a/src/hmatrix.jl b/src/hmatrix.jl index a3ad640..5429482 100644 --- a/src/hmatrix.jl +++ b/src/hmatrix.jl @@ -121,7 +121,9 @@ function _getcol!(col, adjH::Adjoint{<:Any,<:HMatrix}, j, piv) shift = pivot(adjH) .- 1 jl = j - shift[2] irange = rowrange(adjH) .- (piv[1] - 1) - getcol!(view(col, irange), data(adjH), jl) + H = parent(adjH) + d = data(H) + getcol!(view(col, irange), adjoint(d), jl) end for child in children(adjH) if j ∈ colrange(child) @@ -421,7 +423,7 @@ function _assemble_dense_block!(hmat, K) end hasdata(adjH::Adjoint{<:Any,<:HMatrix}) = hasdata(adjH.parent) -data(adjH::Adjoint{<:Any,<:HMatrix}) = adjoint(data(adjH.parent)) +data(adjH::Adjoint{<:Any,<:HMatrix}) = adjoint(adjH.parent.data) children(adjH::Adjoint{<:Any,<:HMatrix}) = adjoint(children(adjH.parent)) pivot(adjH::Adjoint{<:Any,<:HMatrix}) = reverse(pivot(adjH.parent)) offset(adjH::Adjoint{<:Any,<:HMatrix}) = pivot(adjH) .- 1 diff --git a/src/lu.jl b/src/lu.jl index f855b38..b49a5e5 100644 --- a/src/lu.jl +++ b/src/lu.jl @@ -30,7 +30,8 @@ function LinearAlgebra.lu!(M::HMatrix, compressor; threads = use_threads()) buffers = [(VectorOfVectors(T), VectorOfVectors(T)) for _ in 1:Threads.nthreads()] _lu!(M, compressor, threads, buffers) # wrap the result in the LU structure - return LU(M, LinearAlgebra.BlasInt[], LinearAlgebra.BlasInt(0)) + res = @dspawn LU(@R(M), LinearAlgebra.BlasInt[], LinearAlgebra.BlasInt(0)) label = "LU" + return fetch(res) end """ @@ -59,10 +60,8 @@ LinearAlgebra.lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwa function _lu!(M::HMatrix, compressor, threads, bufs = nothing) if isleaf(M) - d = data(M) - @assert d isa Matrix - lu!(d, NOPIVOT()) - else + @dspawn lu!(data(@RW(M)), NOPIVOT()) label = "Dense LU" + else # recurse on children @assert !hasdata(M) chdM = children(M) m, n = size(chdM) diff --git a/src/multiplication.jl b/src/multiplication.jl index f2e7f28..59e2308 100644 --- a/src/multiplication.jl +++ b/src/multiplication.jl @@ -17,7 +17,7 @@ function hmul!(C::T, A::T, B::T, a, b, compressor, bufs_ = nothing) where {T<:HM b == true || rmul!(C, b) dict = IdDict{T,Vector{NTuple{2,T}}}() _plan_dict!(dict, C, A, B) - _hmul!(C, compressor, dict, a, nothing, bufs) + _hmul!(C, compressor, dict, a, bufs) return C end @@ -42,43 +42,131 @@ function _plan_dict!(dict, C::T, A::T, B::T) where {T<:HMatrix} return dict end -function _hmul!(C::HMatrix, compressor, dict, a, R, bufs) - execute_node!(C, compressor, dict, a, R, bufs) - shift = pivot(C) .- 1 +# function _hmul!(C::HMatrix, compressor, dict, a, bufs) +# # recursive multiplication function. For the node C, if it is a leaf, you +# # add the contribution of all the pairs in the dictionary and P directly to +# # it. If it is not a leaf, build an approximation of P + all the pairs, and +# # pass it down to the children. +# T = typeof(C) +# S = eltype(C) +# pairs = get(dict, C, Tuple{T,T}[]) +# if isleaf(C) +# if isadmissible(C) +# @dspawn begin +# D = data(C)::RkMatrix{S} +# L = MulLinearOp(D, P, pairs, a) +# id = Threads.threadid() +# R = compressor(L, axes(L, 1), axes(L, 2), bufs[id]) +# C.data = R +# end +# else +# M = data(C)::Matrix{S} +# for (A, B) in pairs +# _mul_dense!(M, A, B, a) +# end +# isnothing(P) || mul!(M, P.A, adjoint(P.B), true, true) +# end +# else +# if isempty(pairs) +# Pnew = P +# else +# L = MulLinearOp(nothing, P, pairs, a) +# id = Threads.threadid() +# Pnew = compressor(L, axes(L, 1), axes(L, 2), bufs[id]) +# end +# shift = pivot(C) .- 1 +# for chd in children(C) +# if isnothing(Pnew) +# _hmul!(chd, compressor, dict, a, nothing, bufs) +# else +# irange = rowrange(chd) .- shift[1] +# jrange = colrange(chd) .- shift[2] +# # Rv = hasdata(C) ? view(Rp,irange,jrange) : nothing +# block = RkMatrix(Pnew.A[irange, :], Pnew.B[jrange, :]) +# _hmul!(chd, compressor, dict, a, block, bufs) +# end +# end +# end +# return C +# end + +function _hmul!(C::T, compressor, dict, a, bufs) where {T<:HMatrix} + pairs = get(dict, C, Tuple{T,T}[]) + if isleaf(C) && !isempty(pairs) + if isadmissible(C) + @dspawn begin + @RW C + @R pairs + L = MulLinearOp(data(C), nothing, pairs, a) + id = Threads.threadid() + R = compressor(L, axes(L, 1), axes(L, 2), bufs[id]) + setdata!(C, R) + end label = "admissible leaf $(size(C)) ($(length(pairs)))" + else + @dspawn begin + @RW C + @R pairs + d = data(C) + for (A, B) in pairs + _mul_dense!(d, A, B, a) + end + end label = "non-admissible leaf" + end + elseif !isempty(pairs) + # internal node: compute low rank approximation + Rtask = @dspawn begin + @R pairs + L = MulLinearOp(nothing, nothing, pairs, a) + id = Threads.threadid() + compressor(L, axes(L, 1), axes(L, 2), bufs[id]) + end label = "internal node" + # move low rank data to leaves + add_to_leaves!(C, Rtask, compressor, bufs) + end + # continue with children of C for chd in children(C) - irange = rowrange(chd) .- shift[1] - jrange = colrange(chd) .- shift[2] - Rp = data(C) - Rv = hasdata(C) ? RkMatrix(Rp.A[irange, :], Rp.B[jrange, :]) : nothing - # Rv = hasdata(C) ? view(Rp,irange,jrange) : nothing - _hmul!(chd, compressor, dict, a, Rv, bufs) + _hmul!(chd, compressor, dict, a, bufs) end - isleaf(C) || (setdata!(C, nothing)) return C end -# non-recursive execution -function execute_node!(C::HMatrix, compressor, dict, a, R, bufs) - T = typeof(C) - S = eltype(C) - pairs = get(dict, C, Tuple{T,T}[]) - isnothing(R) && isempty(pairs) && (return C) - if isleaf(C) && !isadmissible(C) - d = data(C)::Matrix{S} - for (A, B) in pairs - _mul_dense!(d, A, B, a) - end - # isnothing(R) || axpy!(true, R, d) - isnothing(R) || mul!(d, R.A, adjoint(R.B), true, true) +""" + add_to_leaves(H::HMatrix,R,compressor) + +Add `R` to the leaves of `H`. +""" +function add_to_leaves!(H::HMatrix, Rtask, compressor, bufs) + shift = pivot(H) .- 1 + return _add_to_leaves!(H, Rtask, compressor, shift, bufs) +end +function _add_to_leaves!(node, Rtask, compressor, shift, bufs) + S = typeof(node) + T = eltype(node) + if isleaf(node) + @dspawn begin + @RW node + R = fetch(Rtask)::RkMatrix{T} + irange = rowrange(node) .- shift[1] + jrange = colrange(node) .- shift[2] + Rv = RkMatrix(R.A[irange, :], R.B[jrange, :]) + # Rv = view(R,irange,jrange) + if isadmissible(node) + Rl = data(node)::RkMatrix{T} + L = MulLinearOp(Rl, Rv, Tuple{S,S}[], true) + id = Threads.threadid() + node.data = compressor(L, axes(L, 1), axes(L, 2), bufs[id]) + else + M = data(node)::Matrix{T} + mul!(M, Rv.A, adjoint(Rv.B), true, true) + # axpy!(true, Rv, M) + end + end label = "add to leaves" else - L = MulLinearOp(data(C), R, pairs, a) - id = Threads.threadid() - R = compressor(L, axes(L, 1), axes(L, 2), bufs[id]) - id == Threads.threadid() || - (@warn "thread id changed from $id to $(Threads.threadid())") - setdata!(C, R) + for child in children(node) + _add_to_leaves!(child, Rtask, compressor, shift, bufs) + end end - return C + return node end """ diff --git a/src/triangular.jl b/src/triangular.jl index a56f032..cbb7862 100644 --- a/src/triangular.jl +++ b/src/triangular.jl @@ -11,7 +11,7 @@ function Base.show(io::IO, ::MIME"text/plain", L::HUnitLowerTriangular) return println(io, "Unit lower triangular part of $H") end -function LinearAlgebra.ldiv!(L::HUnitLowerTriangular, B::AbstractMatrix) +function LinearAlgebra.ldiv!(L::HUnitLowerTriangular, B::StridedMatrix) H = parent(L) if isleaf(H) d = data(H) @@ -51,8 +51,12 @@ function LinearAlgebra.ldiv!( H = parent(L) @assert isclean(H) if isleaf(X) - d = data(X) - ldiv!(L, d) + @dspawn begin + @R H + @RW X + d = data(X) + ldiv!(L, d) + end label = "dense ldiv" elseif isleaf(H) # X not a leaf, but L is a leaf. This should not happen. error() else @@ -141,8 +145,12 @@ function LinearAlgebra.rdiv!( ) H = parent(U) if isleaf(X) - d = data(X) - rdiv!(d, U) # b <-- b/L + @dspawn begin + @RW X + @R H + d = data(X) + rdiv!(d, U) # b <-- b/L + end label = "dense rdiv" elseif isleaf(H) error() else diff --git a/test/compressor_test.jl b/test/compressor_test.jl index 82e686e..61b8717 100644 --- a/test/compressor_test.jl +++ b/test/compressor_test.jl @@ -34,16 +34,16 @@ Random.seed!(1) # test fast update of frobenius norm m, n = 10000, 1000 r = 10 - A = VectorOfVectors(T,m,r) - B = VectorOfVectors(T,n,r) - A.data .= rand(T, m*r) - B.data .= rand(T, n*r) - old_norm = norm(Matrix(A)*adjoint(Matrix(B)), 2) + A = VectorOfVectors(T, m, r) + B = VectorOfVectors(T, n, r) + A.data .= rand(T, m * r) + B.data .= rand(T, n * r) + old_norm = norm(Matrix(A) * adjoint(Matrix(B)), 2) a = HMatrices.newcol!(A) - a .= rand(T,m) + a .= rand(T, m) b = HMatrices.newcol!(B) - b .= rand(T,n) - new_norm = norm(Matrix(A)*adjoint(Matrix(B)), 2) + b .= rand(T, n) + new_norm = norm(Matrix(A) * adjoint(Matrix(B)), 2) @test new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B) # test simple case where things are not compressible @@ -67,9 +67,9 @@ Random.seed!(1) R = tsvd(M, irange, jrange) @test rank(R) == r # test recompression using QR-SVD - R2 = RkMatrix(hcat(R.A,R.A),hcat(R.B,R.B)) + R2 = RkMatrix(hcat(R.A, R.A), hcat(R.B, R.B)) @test rank(R2) == 2r - HMatrices.compress!(R2,tsvd) + HMatrices.compress!(R2, tsvd) @test rank(R2) == r end end @@ -101,17 +101,17 @@ end # test fast update of frobenius norm T = SMatrix{3,3,Float64,9} - A = VectorOfVectors(T,m,r) - B = VectorOfVectors(T,n,r) - A.data .= rand(T, m*r) - B.data .= rand(T, n*r) - R = Matrix(A)*collect(adjoint(Matrix(B))) + A = VectorOfVectors(T, m, r) + B = VectorOfVectors(T, n, r) + A.data .= rand(T, m * r) + B.data .= rand(T, n * r) + R = Matrix(A) * collect(adjoint(Matrix(B))) old_norm = norm(R, 2) a = HMatrices.newcol!(A) - a .= rand(T,m) + a .= rand(T, m) b = HMatrices.newcol!(B) - b .= rand(T,n) - R = Matrix(A)*collect(adjoint(Matrix(B))) + b .= rand(T, n) + R = Matrix(A) * collect(adjoint(Matrix(B))) new_norm = norm(R, 2) HMatrices._update_frob_norm(old_norm, A, B) @test_broken new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B)