jingchengx
diff --git a/‎.gitignore
+5-1 b/‎.gitignore
+5-1
diff --git a/‎REQUIRE
+1 b/‎REQUIRE
+1
diff --git a/‎benchmark/README.md
+32 b/‎benchmark/README.md
+32
diff --git a/‎benchmark/REQUIRE
+3 b/‎benchmark/REQUIRE
+3
diff --git a/‎benchmark/benchmarks.jl
+56 b/‎benchmark/benchmarks.jl
+56
diff --git a/‎benchmark/compareCommits.sh
+18 b/‎benchmark/compareCommits.sh
+18
diff --git a/‎benchmark/runBenchmark.jl
+6 b/‎benchmark/runBenchmark.jl
+6
diff --git a/‎docs/Project.toml
+4-4 b/‎docs/Project.toml
+4-4
diff --git a/‎docs/make.jl
+1 b/‎docs/make.jl
+1
diff --git a/‎docs/readme.md
+6 b/‎docs/readme.md
+6
diff --git a/‎docs/src/index.md
+2-1 b/‎docs/src/index.md
+2-1
diff --git a/‎docs/src/lib/public.md
+8-2 b/‎docs/src/lib/public.md
+8-2
diff --git a/‎docs/src/man/fitDiscrete.md
+137 b/‎docs/src/man/fitDiscrete.md
+137
diff --git a/‎examples/Ae_bicornis_8_withrepeatingsites.aln
+12 b/‎examples/Ae_bicornis_8_withrepeatingsites.aln
+12
@@ -5,6 +5,10 @@ docs/build/
 docs/site/
 docs/*.toml
 
+# benchmark tuning parameters #
+benchmark/params.json
+benchmark/tune.json
+
 # Compiled source #
 ###################
 *.com
@@ -95,4 +99,4 @@ Backup[ ]of[ ]*.numbers/
 *.jpg
 *.jpeg
 *.png
-*.dot
+*.dot
@@ -5,6 +5,7 @@ Combinatorics 0.7
 CSV 0.4
 DataFrames 0.13
 DataStructures 0.9
+Distributions 0.15.0
 GLM 1.0
 NLopt 0.5.1
 SpecialFunctions 0.7
 
@@ -0,0 +1,32 @@
+# Using PkgBenchmark to Compare the Efficiency of Two Different Commits using Benchmark
+
+PkgBenchmarks allows us to compare the performance of a package at different branches, commits, or tags. For full documentation, see the PkgBenchmark [documentation here] (https://juliaci.github.io/PkgBenchmark.jl/stable/)
+
+# Comparing Two Commits on Speed using Benchmarks
+This benchmark compares the speed of your current version of PhyloNetworks to the
+version in a previous commit.
+
+To use, enter PhyloNetworks' benchmark directory and run:
+```bash
+    bash compareCommits.sh oldCommitNumber
+```
+For example, in .julia/dev/PhyloNetworks/benchmark, run:
+```bash
+    bash compareCommits.sh oldCommitNumber
+```
+variables:
+   oldCommitNumber: a GitHub commit number
+
+# Adding New Benchmarks
+
+To add new benchmarks, use the dictionary interface introduced by Benchmarks.jl. [docs here](https://github.com/JuliaCI/BenchmarkTools.jl/blob/master/doc/manual.md#defining-benchmark-suites)
+
+First, open <PKGROOT>/benchmark/benchmarks.jl. In this file, create a new suite. I've created a suite to test nucleic acid substitution models. It has two subparts, jc69 and hky85.
+```julia
+SUITE["nasm"] = BenchmarkGroup(["jc69", "hky85"])
+```
+We can then add to this suite:
+```julia
+SUITE["nasm"]["jc69"] = @benchmarkable JC69([0.5])
+```
+
@@ -0,0 +1,3 @@
+BenchmarkTools 0.2
+PkgBenchmark 0.2
+Logging
@@ -0,0 +1,56 @@
+using BenchmarkTools, PhyloNetworks, DataFrames, Logging
+
+#suppresses @warn and @info for benchmarks
+logger = SimpleLogger(stderr, Logging.Error);
+old_logger = global_logger(logger);
+
+# Define a parent BenchmarkGroup to contain our SUITE
+const SUITE = BenchmarkGroup()
+
+SUITE["nasm"] = BenchmarkGroup(["JC69", "HKY85"])
+SUITE["fitDiscreteFixed"] = BenchmarkGroup(["ERSM", "BTSM", "JC69", "HKY85"])
+SUITE["fitDiscrete"] = BenchmarkGroup(["ERSM", "BTSM", "JC69", "HKY85"])
+
+# Add benchmarks to nasm group
+SUITE["nasm"]["JC69"] = @benchmarkable JC69([0.5])
+m1 = HKY85([.5], [0.25, 0.25, 0.25, 0.25]);
+SUITE["nasm"]["HKY85"] = @benchmarkable P!(P(m1, 1.0), m1, 3.0)
+
+# fitDiscreteFixed benchmarks
+net_dat = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);")
+species_alone = ["C","A","B","D"]
+dat_alone = DataFrame(trait=["hi","lo","lo","hi"])
+SUITE["fitDiscreteFixed"]["ERSM"] = @benchmarkable fitDiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
+SUITE["fitDiscreteFixed"]["BTSM"] = @benchmarkable fitDiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=false, optimizeRVAS=false)
+
+fastafile = joinpath(@__DIR__, "..", "examples", "Ae_bicornis_Tr406_Contig10132.aln")
+dna_dat, dna_weights = readfastatodna(fastafile, true);
+net_dna = readTopology("((((((((((((((Ae_caudata_Tr275,Ae_caudata_Tr276),Ae_caudata_Tr139))#H1,#H2),(((Ae_umbellulata_Tr266,Ae_umbellulata_Tr257),Ae_umbellulata_Tr268),#H1)),((Ae_comosa_Tr271,Ae_comosa_Tr272),(((Ae_uniaristata_Tr403,Ae_uniaristata_Tr357),Ae_uniaristata_Tr402),Ae_uniaristata_Tr404))),(((Ae_tauschii_Tr352,Ae_tauschii_Tr351),(Ae_tauschii_Tr180,Ae_tauschii_Tr125)),(((((((Ae_longissima_Tr241,Ae_longissima_Tr242),Ae_longissima_Tr355),(Ae_sharonensis_Tr265,Ae_sharonensis_Tr264)),((Ae_bicornis_Tr408,Ae_bicornis_Tr407),Ae_bicornis_Tr406)),((Ae_searsii_Tr164,Ae_searsii_Tr165),Ae_searsii_Tr161)))#H2,#H4))),(((T_boeoticum_TS8,(T_boeoticum_TS10,T_boeoticum_TS3)),T_boeoticum_TS4),((T_urartu_Tr315,T_urartu_Tr232),(T_urartu_Tr317,T_urartu_Tr309)))),(((((Ae_speltoides_Tr320,Ae_speltoides_Tr323),Ae_speltoides_Tr223),Ae_speltoides_Tr251))H3,((((Ae_mutica_Tr237,Ae_mutica_Tr329),Ae_mutica_Tr244),Ae_mutica_Tr332))#H4))),Ta_caputMedusae_TB2),S_vavilovii_Tr279),Er_bonaepartis_TB1),H_vulgare_HVens23);");
+for edge in net_dna.edge #adds branch lengths
+    setLength!(edge,1.0)
+    if edge.gamma < 0
+        setGamma!(edge, 0.5)
+    end
+end
+SUITE["fitDiscreteFixed"]["JC69"] = @benchmarkable fitDiscrete(net_dna, :JC69, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
+SUITE["fitDiscreteFixed"]["HKY85"] = @benchmarkable fitDiscrete(net_dna, :HKY85, dna_dat, dna_weights; optimizeQ=false, optimizeRVAS=false)
+
+## fitDiscrete benchmarks
+SUITE["fitDiscrete"]["ERSM"] = @benchmarkable fitDiscrete(net_dat, :ERSM, species_alone, dat_alone; optimizeQ=true, optimizeRVAS=true)
+SUITE["fitDiscrete"]["BTSM"] = @benchmarkable fitDiscrete(net_dat, :BTSM, species_alone, dat_alone; optimizeQ=true, optimizeRVAS=true)
+SUITE["fitDiscrete"]["JC69"] = @benchmarkable fitDiscrete(net_dna, :JC69, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true)
+SUITE["fitDiscrete"]["HKY85"] = @benchmarkable fitDiscrete(net_dna, :HKY85, dna_dat, dna_weights; optimizeQ=true, optimizeRVAS=true)
+
+# If a cache of tuned parameters already exists, use it, otherwise, tune and cache
+# the benchmark parameters. Reusing cached parameters is faster and more reliable
+# than re-tuning `SUITE` every time the file is included.
+paramspath = joinpath(dirname(@__FILE__), "params.json")
+
+if isfile(paramspath)
+    loadparams!(SUITE, BenchmarkTools.load(paramspath)[1], :evals);
+else
+    tune!(SUITE)
+    BenchmarkTools.save(paramspath, params(SUITE));
+end
+
+global_logger(old_logger) #restores typical logging at end of benchmarks
@@ -0,0 +1,18 @@
+#!/bin/bash
+# This benchmark compares the speed of your current version of PhyloNetworks to a 
+# version in a previous commit.
+
+# To use, enter PhyloNetworks' benchmark directory and run:
+#     bash compareCommits.sh oldCommitNumber
+
+# For example, in .julia/dev/PhyloNetworks/benchmark, run:
+#     bash compareCommits.sh oldCommitNumber
+
+# variables:
+#    oldCommitNumber: a GitHub commit number
+
+currBranch=$(git branch | sed -e '/^[^*]/d' -e 's/* \(.*\)/\1/')
+git checkout $1
+julia runBenchmark.jl
+git checkout $currBranch
+julia runBenchmark.jl
@@ -0,0 +1,6 @@
+using Pkg
+Pkg.activate("/Users/cora/.julia/environments/net") #sets up development environment
+using PkgBenchmark
+using BenchmarkTools
+using PhyloNetworks
+benchmarkpkg("PhyloNetworks")
@@ -1,16 +1,16 @@
 [deps]
+BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433"
 # will be added by Travis as being developed:
 # PhyloNetworks = "33ad39ac-ed31-50eb-9b15-43d0656eaa72"
-# packages used in the manual pages:
-CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 # will be added to track master version in make.jl:
 # PhyloPlots = "c0d5b6db-e3fc-52bc-a87d-1d050989ed3b"
 RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 
 [compat]
-Documenter = "~0.21"
+Documenter = "~0.22"
@@ -24,6 +24,7 @@ makedocs(
             "Multiple Alleles" => "man/multiplealleles.md",
             "Continuous Trait Evolution" => "man/trait_tree.md",
             "Parsimony on networks" => "man/parsimony.md",
+            "Discrete Trait Evolution" => "man/fitDiscrete.md",
         ],
         "Library" => [
             "Public" => "lib/public.md",
 
@@ -20,6 +20,12 @@
   3. run `deploydocs(...)` also from Documenter:
      to push the files on github, gh-pages branch.
 
+for now, docstrings are automatically used to build an entry for
+- each internal thing that has a docstring (e.g. not exported in `src/PhyloNetworks.jl`)
+- each public *type*
+Therefore: any public *function* needs to be manually listed in `docs/src/man/public.md`,
+in a section to get a nice organization of all these manual entries.
+
 ## The "Documenter md" format
 
 ### Note on the format
 
@@ -47,7 +47,8 @@ Pages = [
     "man/bootstrap.md",
     "man/multiplealleles.md",
     "man/trait_tree.md",
-    "man/parsimony.md"
+    "man/parsimony.md",
+    "man/fitDiscrete.md"
 ]
 Depth = 3
 ```
 
@@ -8,6 +8,7 @@ See [Internal Documentation](@ref) for documentation on internal functions.
 DocTestSetup = quote
     using PhyloNetworks
 end
+DocTestFilters = r" PhyloNetworks .*:\d+"
 ```
 
 ```@contents
@@ -52,11 +53,14 @@ getNodeAges
 pairwiseTaxonDistanceMatrix
 biconnectedComponents
 blobDecomposition
+getlabels
+nparams
 ```
 
 ## data and topology read/write
 
 ```@docs
+readfastatodna
 readTopology
 readTopologyLevel1
 readInputTrees
@@ -130,15 +134,17 @@ vcv
 ```@docs
 parsimonySoftwired
 parsimonyGF
-nStates
 Q
-P
 randomTrait
 randomTrait!
 fitDiscrete
 maxParsimonyNet
+nstates
+setrates!
+setalpha!
 ```
 
 ```@meta
 DocTestSetup = nothing
+DocTestFilters = nothing
 ```
@@ -0,0 +1,137 @@
+# Discrete Trait Evolution
+
+With a phylogenetic network structure inferred, we can now estimate how quickly traits
+have evolved over time using a likelihood model. These traits should be discrete 
+characteristics of a species such as feather color, diet type, or dna in aligned 
+genetic sequences.
+
+## Discrete Trait Data
+
+As with continuous trait evolution, we assume a fixed network, correctly rooted, 
+with branch lengths proportional to calendar time. We start with a network, then
+add data about the tips of this network. We allow data of two types.
+1. A vector of species names with a data frame of traits:
+
+```@setup fitDiscrete
+using PhyloNetworks, DataFrames
+mkpath("../assets/figures")
+```
+
+```@example fitDiscrete
+#read in network
+simple_net = readTopology("(A:3.0,(B:2.0,(C:1.0,D:1.0):1.0):1.0);");
+
+#read in trait data
+simple_species = ["C","A","B","D"]
+simple_dat = DataFrame(trait=["hi","lo","lo","hi"])
+```
+
+If your species names and trait data are in the same data frame, read in your data 
+frame then subset the data like this:
+```@example fitDiscrete
+dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"])
+simple_species = dat[:species]
+simple_dat = DataFrame(trait = dat[:trait])
+```
+
+2. To use dna data, read in the network structure then start with a fasta 
+file. Reading the data from this file using the `readfastatodna` function. This 
+creates a data frame of dna data and a vector of dna pattern weights.
+
+```@example fitDiscrete
+#read in network
+dna_net = readTopology("((((((((((((((Ae_caudata_Tr275:1.0,Ae_caudata_Tr276:1.0):1.0,Ae_caudata_Tr139:1.0):1.0)#H1:1.0::0.6,((((((Ae_longissima_Tr241:1.0,Ae_longissima_Tr242:1.0):1.0,Ae_longissima_Tr355:1.0):1.0,(Ae_sharonensis_Tr265:1.0,Ae_sharonensis_Tr264:1.0):1.0):1.0,((Ae_bicornis_Tr408:1.0,Ae_bicornis_Tr407:1.0):1.0,Ae_bicornis_Tr406:1.0):1.0):1.0,((Ae_searsii_Tr164:1.0,Ae_searsii_Tr165:1.0):1.0,Ae_searsii_Tr161:1.0):1.0):1.0)#H2:1.0::0.6):1.0,(((Ae_umbellulata_Tr266:1.0,Ae_umbellulata_Tr257:1.0):1.0,Ae_umbellulata_Tr268:1.0):1.0,#H1:1.0::0.4):1.0):1.0,((Ae_comosa_Tr271:1.0,Ae_comosa_Tr272:1.0):1.0,(((Ae_uniaristata_Tr403:1.0,Ae_uniaristata_Tr357:1.0):1.0,Ae_uniaristata_Tr402:1.0):1.0,Ae_uniaristata_Tr404:1.0):1.0):1.0):1.0,(((Ae_tauschii_Tr352:1.0,Ae_tauschii_Tr351:1.0):1.0,(Ae_tauschii_Tr180:1.0,Ae_tauschii_Tr125:1.0):1.0):1.0,(#H2:1.0::0.4,((((Ae_mutica_Tr237:1.0,Ae_mutica_Tr329:1.0):1.0,Ae_mutica_Tr244:1.0):1.0,Ae_mutica_Tr332:1.0):1.0)#H4:1.0::0.6):1.0):1.0):1.0,(((T_boeoticum_TS8:1.0,(T_boeoticum_TS10:1.0,T_boeoticum_TS3:1.0):1.0):1.0,T_boeoticum_TS4:1.0):1.0,((T_urartu_Tr315:1.0,T_urartu_Tr232:1.0):1.0,(T_urartu_Tr317:1.0,T_urartu_Tr309:1.0):1.0):1.0):1.0):1.0,(((((Ae_speltoides_Tr320:1.0,Ae_speltoides_Tr323:1.0):1.0,Ae_speltoides_Tr223:1.0):1.0,Ae_speltoides_Tr251:1.0):1.0):1.0,#H4:1.0::0.4):1.0):1.0):1.0,Ta_caputMedusae_TB2:1.0):1.0,S_vavilovii_Tr279:1.0):1.0,Er_bonaepartis_TB1:1.0):1.0,H_vulgare_HVens23:1.0);");
+
+#read in dna data
+fastafile = joinpath(dirname(pathof(PhyloNetworks)), "..","examples",
+"Ae_bicornis_Tr406_Contig10132.aln")
+dna_dat, dna_weights = readfastatodna(fastafile, true);
+```
+
+## Choosing a Substitution Model
+
+After reading in your data, choose a model to describe how evolutionary changes 
+(or substitutions, in the case of DNA) happened over time. We offer a selection 
+of Markov substitution models to describe the evolutionary process.
+
+### Generic Trait Models  
+
+These models works well for any type of trait we may want to model. For general
+trait types, use one of these three models:    
+`:ERSM` Equal Rates Substitution Model  
+`:BTSM` Binary Trait Substitution Model    
+`:TBTSM` Two Binary Trait Substituion Model    
+
+### DNA-Specific Models  
+
+The DNA-specific models are optimized for aligned sequence data. We offer JC69 
+and HKY85 models in both relative and absolute versions. The JC69 model was 
+developed by Jukes and Cantor in 1969 and uses one rate for all type of substitutions. 
+The HKY85 model was developed in 1985 by Hasegawa, Kishino, & Yano. It treats 
+transitions differently from transversions.    
+`:JC69` Jukes Cantor 69 Model    
+`:HKY85` Hasegawa, Kishino and Yano 1985     
+
+## Running FitDiscrete
+
+To infer evolutionary rates, run the `fitDiscrete` function on the network and data. 
+It will calculate the maximum likelihood score of a network given one or more 
+discrete trait characters at the tips. Along each edge, evolutionary changes
+are modeled with a continous time Markov model, with parameters estimated by 
+maximizing the likelihood. At each hybrid node, the trait is assumed to be 
+inherited from the immediate parent (or parents, in the case of a hybrid edge).
+If there is a hybrid edge, the trait is modeled according to the parents' weighted 
+average genetic contributions, as measured by inheritance gamma γ. The model 
+currently ignores incomplete lineage sorting.
+
+### General Trait Data
+
+```@example fitDiscrete
+s1 = fitDiscrete(simple_net, :ERSM, simple_species, simple_dat; optimizeQ=false, 
+optimizeRVAS=false)
+s2 = fitDiscrete(simple_net, :BTSM, simple_species, simple_dat; optimizeQ=false, 
+optimizeRVAS=false)
+```
+In this `fitDiscrete` call, we do not optimize rates or allow for rate variation
+across sites.
+
+If `optimizeQ = true`, the `fitDiscrete` function estimates the evolutionary rate
+or rates. Because we didn't allow for rate variation across sites in these models, 
+we do not optimize the way rates may vary across trait types.
+
+```@example fitDiscrete
+s3 = fitDiscrete(simple_net, :ERSM, simple_species, simple_dat; optimizeQ=true, 
+optimizeRVAS=true)
+s4 = fitDiscrete(simple_net, :BTSM, simple_species, simple_dat; optimizeQ=true, 
+optimizeRVAS=true)
+```
+
+### DNA Data
+
+For DNA data, use `:JC69` or `:HKY85` models. 
+```@example fitDiscrete
+d1 = fitDiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeQ=false, 
+optimizeRVAS=false)
+d2 = fitDiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV; optimizeQ=false, 
+optimizeRVAS=false)
+```
+In these `fitDiscrete` models, we do not optimize rates (`optimizeQ=false`), but
+we do allow for rate variation across sites.
+
+### Rate Variation Across Sites
+
+In its default version, `fitDiscrete` does not allow for rate variation across sites.
+To allow for rate variation across sites in your estimate of evolutionary rates 
+(or rate variation across trait types, in the case of general trait types), 
+include `:RV`. If you include `:RV` and `optimizeRVAS = true`, the model will 
+not only allow for rate variation, but it will also optimize how rates vary across 
+sites.
+
+We optimize the evolutionary rates and the way rates vary across sites for the
+DNA data here:    
+```@example fitDiscrete
+d3 = fitDiscrete(dna_net, :JC69, dna_dat, dna_weights, :RV; optimizeQ=true, 
+optimizeRVAS=false)
+d4 = fitDiscrete(dna_net, :HKY85, dna_dat, dna_weights, :RV; optimizeQ=true, 
+optimizeRVAS=false)
+```
@@ -0,0 +1,12 @@
+>test_1
+GCGCCGAG
+>test_2
+GCACCGAG
+>test_3
+GCACCGAG
+>test_4
+GTACCGAG
+>test_5
+GTACCGAG
+>test_6
+GTACCGGG
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+BenchmarkTools 0.2`
	`2`	`+PkgBenchmark 0.2`
	`3`	`+Logging`