From 409084b4f659c8fc4572ce1f8b72bf6f6ba04f52 Mon Sep 17 00:00:00 2001 From: Carl Suster Date: Wed, 22 May 2024 15:24:26 +1000 Subject: [PATCH] Adjust vignette for recent changes --- vignettes/phylepic.Rmd | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/vignettes/phylepic.Rmd b/vignettes/phylepic.Rmd index 4cf1bf6..e3a9a2e 100644 --- a/vignettes/phylepic.Rmd +++ b/vignettes/phylepic.Rmd @@ -45,22 +45,23 @@ The tree comes with some metadata that has been manipulated for illustration. Th ```{r} metadata <- read.csv(system.file("enteric_metadata.csv", package = "phylepic")) -head(metadata) +str(metadata) ``` The only requirements here are that we have a column that corresponds to the tip labels from the tree (here called `name`), and one that has dates represented as `Date` objects. We'll also indicate the two columns with categorical data by converting them to factors: ```{r} -metadata <- mutate( - metadata, - across(c(source, cluster), factor), - collection_date = as.Date(collection_date) -) +metadata <- + metadata |> + mutate( + across(c(source, cluster), factor), + collection_date = as.Date(collection_date) + ) ``` ## Basic plotting -To start with, we can pull out one clade from the tree to make a small plot: +To start with, we should get the tree looking how we want it. You might want to root or re-root the tree (see `ape::root`), reorganise the tip order (`ape::ladderize`), or prune away parts of the tree that aren't relevant. For now, let's extract a single clade from our larger tree: ```{r} clade.parent <- ape::getMRCA(tree, c("NSW-0324", "NSW-0330")) @@ -68,7 +69,7 @@ clade <- ape::extract.clade(tree, clade.parent) plot(clade) ``` -The `phylepic()` function joins a tree with its metadata and does some consistency checks. The resulting `phylepic` object has a plot method that guesses sensible defaults: +The `phylepic()` function joins a tree with its metadata and does some consistency checks. It can help you to drop tips without metadata if desired. It's also where we identify which columns of our metadata correspond to the tip labels and dates. The resulting `phylepic` object has a plot method that guesses sensible defaults: ```{r} phylepic(clade, metadata, name, collection_date) |> plot() @@ -134,8 +135,6 @@ cluster_scale <- scale_colour_brewer( na.translate = FALSE, # we'll use this scale later for both fill and colour aesthetics aesthetics = c("fill", "colour"), - # make the dots on the legend bigger so we can see the colours - guide = guide_legend(override.aes = list(size = 5)) ) ``` @@ -144,13 +143,14 @@ To manipulate the tree, we use `plot_tree`, which creates the base plot to which ```{r} plot_tree(phydata) + # `filter = leaf` in ggraph geoms means that they only draw the tips - ggraph::geom_node_point(aes(filter = leaf, colour = cluster), size = 2) + + ggraph::geom_node_point(aes(filter = leaf, colour = cluster), size = 2, show.legend = FALSE) + cluster_scale ``` +Note that the guides from each panel will be merged in the final plot. If we don't put `show.legend = FALSE` here, the guide for the cluster aesthetic will show circles instead of squares for the legend keys. All of the columns from our metadata frame are available (for the tips) to use in ggraph's aesthetic mappings. -Next, we'll want to hide the redundant coloured tiles describing the cluster. To do this, we use `plot_bars` to override the default behaviour of creating a bar for each factor in the metadata frame. This helper takes arguments in the form ` = `: +Next, we'll want to hide the redundant column of coloured tiles corresponding to the cluster. Remember that this column was drawn because the default `plot.bars` panel makes a column of tiles for each factor column in out data frame. We can use `plot_bars` to override the default behaviour. This helper takes arguments in the form ` = `: ```{r, fig.width=11, fig.height=9} plot( @@ -158,7 +158,7 @@ plot( plot.tree = function(x) { # this function will be called with x = phydata plot_tree(x) + - ggraph::geom_node_point(aes(filter = leaf, colour = cluster), size = 2) + + ggraph::geom_node_point(aes(filter = leaf, colour = cluster), size = 2, show.legend = FALSE) + cluster_scale }, plot.bars = function(x) { @@ -173,6 +173,7 @@ plot( drop = FALSE, na.translate = FALSE ), + # if we wanted more tile columns, we would add them here ) }, scale.date = date_scale, @@ -220,7 +221,7 @@ plot( phydata, plot.tree = function(x) { plot_tree(x) + - ggraph::geom_node_point(aes(filter = leaf, colour = cluster), size = 2) + + ggraph::geom_node_point(aes(filter = leaf, colour = cluster), size = 2, show.legend = FALSE) + cluster_scale }, plot.bars = plot_bars( @@ -243,3 +244,9 @@ plot( ``` Because the cluster colour scales are consistent across panels, the legend guides are automatically combined. + +The warning messages coming from ggplot are annotated to indicate which panel was responsible. Here the warnings are telling us that + + * 11 tree tips couldn't be drawn because they have no cluster, + * 8 rows (a subset of those 11) have known dates that are outside the calendar axis range, but the arrows couldn't be drawn because they have no cluster and we configured the cluster scale with `na.translate = FALSE`, and + * 28 rows didn't contribute to the epidemic curve because their date was missing or out of range.