Skip to content

Commit 524fcb8

Browse files
authored
for v0.16.2 (#200)
- bug fix in writeTableCF: columns with taxon name not restricted to be 1:4 - new: posterior_loghybridweight for the posterior probability of a trait passed by "gene flow"
1 parent e4cbb04 commit 524fcb8

File tree

4 files changed

+94
-36
lines changed

4 files changed

+94
-36
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "PhyloNetworks"
22
uuid = "33ad39ac-ed31-50eb-9b15-43d0656eaa72"
33
license = "MIT"
4-
version = "0.16.1"
4+
version = "0.16.2"
55

66
[deps]
77
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"

src/readData.jl

+34-33
Original file line numberDiff line numberDiff line change
@@ -162,28 +162,28 @@ end
162162

163163
function readTableCF!(df::DataFrames.DataFrame; summaryfile=""::AbstractString, kwargs...)
164164
@debug "assume the numbers for the taxon read from the observed CF table match the numbers given to the taxon when creating the object network"
165+
taxoncolnames = [[:t1, :tx1, :tax1, :taxon1], [:t2, :tx2, :tax2, :taxon2],
166+
[:t3, :tx3, :tax3, :taxon3], [:t4, :tx4, :tax4, :taxon4] ]
167+
taxoncol = [findfirst(x-> x taxoncolnames[1], DataFrames.propertynames(df)),
168+
findfirst(x-> x taxoncolnames[2], DataFrames.propertynames(df)),
169+
findfirst(x-> x taxoncolnames[3], DataFrames.propertynames(df)),
170+
findfirst(x-> x taxoncolnames[4], DataFrames.propertynames(df))]
165171
alternativecolnames = [ # obsCF12 is as exported by fittedQuartetCF()
166-
[:CF12_34, Symbol("CF12.34"), :obsCF12],
167-
[:CF13_24, Symbol("CF13.24"), :obsCF13],
168-
[:CF14_23, Symbol("CF14.23"), :obsCF14]
172+
[:CF12_34, Symbol("CF12.34"), :obsCF12, :CF1234],
173+
[:CF13_24, Symbol("CF13.24"), :obsCF13, :CF1324],
174+
[:CF14_23, Symbol("CF14.23"), :obsCF14, :CF1423]
169175
]
170176
obsCFcol = [findfirst(x-> x alternativecolnames[1], DataFrames.propertynames(df)),
171177
findfirst(x-> x alternativecolnames[2], DataFrames.propertynames(df)),
172178
findfirst(x-> x alternativecolnames[3], DataFrames.propertynames(df))]
173179
ngenecol = findfirst(isequal(:ngenes), DataFrames.propertynames(df))
174180
withngenes = ngenecol !== nothing
175-
if nothing in obsCFcol # one or more col names for CFs were not found
176-
size(df,2) == (withngenes ? 8 : 7) ||
177-
@warn """Column names for quartet concordance factors (CFs) were not recognized.
178-
Was expecting CF12_34, CF13_24 and CF14_23 for the columns with CF values,
179-
or CF12.34 or obsCF12, etc.
180-
Will assume that the first 4 columns give the taxon names, and that columns 5-7 give the CFs."""
181-
obsCFcol = [5,6,7] # assuming CFs are in columns 5,6,7, with colname mismatch
182-
end
183-
minimum(obsCFcol) > 4 ||
184-
error("CFs found in columns $obsCFcol, but taxon labels expected in columns 1-4")
185-
# fixit: what about columns giving the taxon names: always assumed to be columns 1-4? No warning if not?
186-
columns = [[1,2,3,4]; obsCFcol]
181+
nothing in taxoncol && error("columns for taxon names were not found")
182+
nothing in obsCFcol && error(
183+
"""Could not identify columns with quartet concordance factors (qCFs).
184+
Was expecting CF12_34, CF13_24 and CF14_23 for the columns with CF values,
185+
or CF12.34 or obsCF12, etc.""")
186+
columns = [taxoncol; obsCFcol]
187187
if withngenes push!(columns, ngenecol) end
188188

189189
d = readTableCF!(df, columns; kwargs...)
@@ -661,15 +661,16 @@ data: [1.0, 0.0, 0.0, 0.5]
661661
julia> df = writeTableCF(q,t); # to get a DataFrame that can be saved to a file later
662662
663663
julia> show(df, allcols=true)
664-
5×8 DataFrame
665-
Row │ t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
666-
│ String String String String Float64 Float64 Float64 Float64
667-
─────┼────────────────────────────────────────────────────────────────────
668-
1 │ A B D E 0.25 0.25 0.5 2.0
669-
2 │ A B D O 0.5 0.5 0.0 1.0
670-
3 │ A B E O 1.0 0.0 0.0 0.5
671-
4 │ A D E O 1.0 0.0 0.0 0.5
672-
5 │ B D E O 0.0 0.0 0.0 0.0
664+
5×9 DataFrame
665+
Row │ qind t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
666+
│ Int64 String String String String Float64 Float64 Float64 Float64
667+
─────┼───────────────────────────────────────────────────────────────────────────
668+
1 │ 1 A B D E 0.25 0.25 0.5 2.0
669+
2 │ 2 A B D O 0.5 0.5 0.0 1.0
670+
3 │ 3 A B E O 1.0 0.0 0.0 0.5
671+
4 │ 4 A D E O 1.0 0.0 0.0 0.5
672+
5 │ 5 B D E O 0.0 0.0 0.0 0.0
673+
673674
julia> # using CSV; CSV.write(df, "filename.csv");
674675
675676
julia> tree2 = readTopology("((A,(B,D)),E);");
@@ -680,15 +681,15 @@ Reading in trees, looking at 5 quartets in each...
680681
**
681682
682683
julia> show(writeTableCF(q,t), allcols=true)
683-
8 DataFrame
684-
Row │ t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
685-
│ String String String String Float64 Float64 Float64 Float64
686-
─────┼───────────────────────────────────────────────────────────────────────
687-
1 │ A B D E 0.333333 0.333333 0.333333 3.0
688-
2 │ A B D O 0.5 0.5 0.0 2.0
689-
3 │ A B E O 1.0 0.0 0.0 1.0
690-
4 │ A D E O 1.0 0.0 0.0 1.0
691-
5 │ B D E O 0.0 0.0 0.0 0.0
684+
9 DataFrame
685+
Row │ qind t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
686+
Int64 String String String String Float64 Float64 Float64 Float64
687+
─────┼──────────────────────────────────────────────────────────────────────────────
688+
1 │ 1 A B D E 0.333333 0.333333 0.333333 3.0
689+
2 │ 2 A B D O 0.5 0.5 0.0 2.0
690+
3 │ 3 A B E O 1.0 0.0 0.0 1.0
691+
4 │ 4 A D E O 1.0 0.0 0.0 1.0
692+
5 │ 5 B D E O 0.0 0.0 0.0 0.0
692693
```
693694
"""
694695
function countquartetsintrees(tree::Vector{HybridNetwork},

src/traitsLikDiscrete.jl

+58-2
Original file line numberDiff line numberDiff line change
@@ -791,11 +791,15 @@ end
791791
"""
792792
posterior_logtreeweight(obj::SSM, trait = 1)
793793
794-
Return an array A such that A[t] = log of P(tree `t` and trait `trait`)
795-
if a single `trait` is requested, or A[i,t]= log of P(tree `t` and trait `i`)
794+
Array A of log-posterior probabilities for each tree displayed in the network:
795+
such that A[t] = log of P(tree `t` | trait `trait`)
796+
if a single `trait` is requested, or A[t,i]= log of P(tree `t` | trait `i`)
796797
if `trait` is a vector or range (e.g. `trait = 1:obj.nsites`).
797798
These probabilities are conditional on the model parameters in `obj`.
798799
800+
Displayed trees are listed in the order in which they are stored in the fitted
801+
model object `obj`.
802+
799803
**Precondition**: `_loglikcache` updated by [`discrete_corelikelihood!`](@ref)
800804
801805
# examples
@@ -835,6 +839,58 @@ function posterior_logtreeweight(obj::SSM, trait = 1)
835839
return ts
836840
end
837841

842+
"""
843+
posterior_loghybridweight(obj::SSM, hybrid_name, trait = 1)
844+
posterior_loghybridweight(obj::SSM, edge_number, trait = 1)
845+
846+
Log-posterior probability for all trees displaying the minor parent edge
847+
of hybrid node named `hybrid_name`, or displaying the edge number `edge_number`.
848+
That is: log of P(hybrid minor parent | trait) if a single `trait` is requested,
849+
or A[i]= log of P(hybrid minor parent | trait `i`)
850+
if `trait` is a vector or range (e.g. `trait = 1:obj.nsites`).
851+
These probabilities are conditional on the model parameters in `obj`.
852+
853+
**Precondition**: `_loglikcache` updated by [`discrete_corelikelihood!`](@ref)
854+
855+
# examples
856+
857+
```jldoctest
858+
julia> net = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);");
859+
860+
julia> m1 = BinaryTraitSubstitutionModel([0.1, 0.1], ["lo", "hi"]); # arbitrary rates
861+
862+
julia> using DataFrames
863+
864+
julia> dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"]);
865+
866+
julia> fit = fitdiscrete(net, m1, dat); # optimized rates: α=0.27 and β=0.35
867+
868+
julia> plhw = PhyloNetworks.posterior_loghybridweight(fit, "H1");
869+
870+
julia> round(exp(plhw), digits=5) # posterior probability of going through minor hybrid edge
871+
0.08017
872+
873+
julia> hn = net.node[3]; getparentedgeminor(hn).gamma # prior probability
874+
0.1
875+
```
876+
"""
877+
function posterior_loghybridweight(obj::SSM, hybridname::String, trait = 1)
878+
hn_index = findfirst(n -> n.name == hybridname, obj.net.node)
879+
isnothing(hn_index) && error("node named $hybridname not found")
880+
hn = obj.net.node[hn_index]
881+
hn.hybrid || error("node named $hybridname is not a hybrid node")
882+
me = getparentedgeminor(hn)
883+
posterior_loghybridweight(obj, me.number, trait)
884+
end
885+
function posterior_loghybridweight(obj::SSM, edgenum::Integer, trait = 1)
886+
tpp = posterior_logtreeweight(obj, trait) # size: (ntree,) or (ntree,ntraits)
887+
hasedge = tree -> any(e.number == edgenum for e in tree.edge)
888+
tokeep = map(hasedge, obj.displayedtree)
889+
tppe = view(tpp, tokeep, :) # makes it a matrix
890+
epp = dropdims(mapslices(logsumexp, tppe, dims=1); dims=2)
891+
return (size(epp)==(1,) ? epp[1] : epp) # scalar or vector
892+
end
893+
838894
"""
839895
traitlabels2indices(data, model::SubstitutionModel)
840896

test/test_traitLikDiscrete.jl

+1
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ asr = ancestralStateReconstruction(fit1)
340340
pltw = [-0.08356534477069566, -2.5236181051014333]
341341
@test PhyloNetworks.posterior_logtreeweight(fit1) pltw atol=1e-5
342342
@test PhyloNetworks.posterior_logtreeweight(fit1, 1:1) reshape(pltw, (2,1)) atol=1e-5
343+
@test PhyloNetworks.posterior_loghybridweight(fit1, "H1") -2.5236227134322293
343344

344345
end # end of testset, fixed topology
345346

0 commit comments

Comments
 (0)