@@ -1371,6 +1371,207 @@ function readMultiTopology(file::AbstractString, fast::Bool=true)
1371
1371
return vnet
1372
1372
end
1373
1373
1374
+
1375
+ @doc raw """
1376
+ readnexus_treeblock(filename, treereader=readTopology, args...;
1377
+ reticulate=true, stringmodifier=[r"#(\d +)" => s"#H\1 "])
1378
+
1379
+ Read the *first* "trees" block of a nexus-formatted file, using the translate
1380
+ table if present, and return a vector of `HybridNetwork`s.
1381
+ Information inside `[&...]` are interpreted as comments and are discarded by the
1382
+ default tree reader. Optional arguments `args` are passed to the tree reader.
1383
+
1384
+ For the nexus format, see
1385
+ [Maddison, Swofford & Maddison (1997)](https://doi.org/10.1093/sysbio/46.4.590).
1386
+
1387
+ Unless `reticulate` is false, the following is done to read networks with reticulations.
1388
+
1389
+ Prior to reading each phylogeny, each instance of `#number` is replaced by
1390
+ `#Hnumber` to fit the standard extended Newick format at hybrid nodes.
1391
+ This behavior can be changed with option `stringmodifier`, which should be a
1392
+ vector of pairs accepted by `replace`.
1393
+
1394
+ Inheritance γ values are assumed to be given within "comment" blocks at *minor*
1395
+ hybrid edges (cut as tips to form the extended Newick) like this for example,
1396
+ as output by bacter ([Vaughan et al. 2017](http://dx.doi.org/10.1534/genetics.116.193425)):
1397
+
1398
+ #11[&conv=0, relSize=0.08, ...
1399
+
1400
+ or like this, as output by SpeciesNetwork
1401
+ ([Zhang et al. 2018](https://doi.org/10.1093/molbev/msx307)):
1402
+
1403
+ #H11[&gamma=0.08]
1404
+
1405
+ In this example, the corresponding edge to hybrid H11 has γ=0.08.
1406
+ """
1407
+ function readnexus_treeblock (file:: AbstractString , treereader= readTopology:: Function , args... ;
1408
+ reticulate= true , stringmodifier= [r" #(\d +)\b " => s " #H\1 " ]) # add H
1409
+ vnet = HybridNetwork[]
1410
+ rx_start = r" ^\s *begin\s +trees\s *;" i
1411
+ rx_end = r" ^\s *end\s *;" i
1412
+ rx_tree = r" ^\s *tree\s +[^(]+(\( [^;]*;)" i
1413
+ treeblock = false
1414
+ translate = false
1415
+ id2name = nothing
1416
+ open (file) do s
1417
+ numl = 0
1418
+ for line in eachline (s)
1419
+ numl += 1
1420
+ if treeblock
1421
+ occursin (rx_end, line) && break # exit if end of tree block
1422
+ elseif occursin (rx_start, line) # start reading tree block
1423
+ treeblock = true
1424
+ line, translate, id2name = readnexus_translatetable (s)
1425
+ else continue
1426
+ end
1427
+ # now we are inside the treeblock
1428
+ m = match (rx_tree, line)
1429
+ isnothing (m) && continue
1430
+ phy = m. captures[1 ]
1431
+ if reticulate # fix #Hn and extract γ from #Hn[&conv=n, relSize=γ]
1432
+ phy = replace (phy, stringmodifier... )
1433
+ id2gamma = readnexus_extractgamma (phy)
1434
+ end
1435
+ net = nothing
1436
+ try
1437
+ net = treereader (phy, args... )
1438
+ catch err
1439
+ warnmsg = " skipped phylogeny on line $(numl) of file\n $file \n " *
1440
+ (:msg in fieldnames (typeof (err)) ? err. msg : string (typeof (err)))
1441
+ @warn warnmsg
1442
+ continue # don't push to vnet
1443
+ end
1444
+ reticulate && readnexus_assigngammas! (net, id2gamma)
1445
+ if translate
1446
+ for tip in net. leaf
1447
+ id = parse (Int, tip. name)
1448
+ tip. name = id2name[id]
1449
+ end
1450
+ end
1451
+ push! (vnet, net)
1452
+ end
1453
+ end
1454
+ return vnet
1455
+ end
1456
+
1457
+ """
1458
+ readnexus_translatetable(io)
1459
+
1460
+ Read translate table from IO object `io`, whose first non-empty line should contain
1461
+ "translate". Then each line should have "number name" and the end of the table
1462
+ is indicated by a ;. Output tuple:
1463
+ - line that was last read, and is not part of the translate table, taken from `io`
1464
+ - translate: boolean, whether a table was successfully read
1465
+ - id2name: dictionary mapping number to name.
1466
+ """
1467
+ function readnexus_translatetable (io)
1468
+ rx_translate = r" ^\s *translate" i
1469
+ rx_emptyline = r" ^\s *$"
1470
+ line = readline (io)
1471
+ translate = false
1472
+ id2name = Dict {Int,String} ()
1473
+ while true
1474
+ if occursin (rx_translate, line)
1475
+ translate = true
1476
+ break
1477
+ elseif occursin (rx_emptyline, line)
1478
+ line = readline (io)
1479
+ else
1480
+ translate = false
1481
+ break
1482
+ end
1483
+ end
1484
+ if translate # then read the table
1485
+ rx_end = r" ^\s *;"
1486
+ rx_idname = r" \s *(\d +)\s +(\w +)\s *([,;]?)"
1487
+ while true
1488
+ line = readline (io)
1489
+ occursin (rx_emptyline, line) && continue
1490
+ if occursin (rx_end, line)
1491
+ line = readline (io)
1492
+ break
1493
+ end
1494
+ m = match (rx_idname, line)
1495
+ if isnothing (m)
1496
+ @warn " problem reading the translate table at line $line .\n numbers won't be translated to names"
1497
+ translate = false
1498
+ break
1499
+ end
1500
+ push! (id2name, parse (Int,m. captures[1 ]) => String (m. captures[2 ]))
1501
+ if m. captures[3 ] == " ;"
1502
+ line = readline (io)
1503
+ break
1504
+ end
1505
+ end
1506
+ end
1507
+ return line, translate, id2name
1508
+ end
1509
+
1510
+ """
1511
+ readnexus_extractgamma(nexus_string)
1512
+
1513
+ Extract γ from comments and return a dictionary hybrid number ID => γ, from
1514
+ one single phylogeny given as a string.
1515
+ The output from BEAST2 uses this format for reticulations at *minor* edges,
1516
+ as output by bacter ([Vaughan et al. 2017](http://dx.doi.org/10.1534/genetics.116.193425)):
1517
+
1518
+ #11[&conv=0, relSize=0.08, ...
1519
+
1520
+ or as output by SpeciesNetwork ([Zhang et al. 2018](https://doi.org/10.1093/molbev/msx307)):
1521
+
1522
+ #H11[&gamma=0.08]
1523
+
1524
+ The function below assumes that the "H" was already added back if not present
1525
+ already (from bacter), like this:
1526
+
1527
+ #H11[&conv=0, relSize=0.19, ...
1528
+
1529
+ The bacter format is tried first. If this format doesn't give any match,
1530
+ then the SpeciesNetwork format is tried next.
1531
+ See [`readnexus_assigngammas!`](@ref).
1532
+ """
1533
+ function readnexus_extractgamma (nexstring)
1534
+ rx_gamma_v1 = r" #H(\d +)\[ &conv=\d +,\s *relSize=(\d +\.\d +)"
1535
+ rx_gamma_v2 = r" #H(\d +)\[ &gamma=(\d +\.\d +)"
1536
+ id2gamma = Dict {Int,Float64} ()
1537
+ # first: try format v1
1538
+ for m in eachmatch (rx_gamma_v1, nexstring)
1539
+ push! (id2gamma, parse (Int, m. captures[1 ]) => parse (Float64,m. captures[2 ]))
1540
+ end
1541
+ if isempty (id2gamma) # then try format v2
1542
+ for m in eachmatch (rx_gamma_v2, nexstring)
1543
+ push! (id2gamma, parse (Int, m. captures[1 ]) => parse (Float64,m. captures[2 ]))
1544
+ end
1545
+ end
1546
+ return id2gamma
1547
+ end
1548
+
1549
+ """
1550
+ readnexus_assigngammas!(net, d::Dict)
1551
+
1552
+ Assign d[i] as the `.gamma` value of the minor parent edge of hybrid "Hi",
1553
+ if this hybrid node name is found, and if its minor parent doesn't already
1554
+ have a non-missing γ. See [`readnexus_extractgamma`](@ref)
1555
+ """
1556
+ function readnexus_assigngammas! (net:: HybridNetwork , id2gamma:: Dict )
1557
+ for (i,gam) in id2gamma
1558
+ nam = " H$i "
1559
+ j = findfirst (n -> n. name == nam, net. hybrid)
1560
+ if isnothing (j)
1561
+ @warn " didn't find any hybrid node named $nam ."
1562
+ continue
1563
+ end
1564
+ hn = net. hybrid[j]
1565
+ he = getparentedgeminor (hn)
1566
+ if he. gamma == - 1.0
1567
+ setGamma! (he, gam)
1568
+ else
1569
+ @warn " hybrid edge number $(he. number) has γ=$(he. gamma) . won't erase with $gam ."
1570
+ end
1571
+ end
1572
+ return net
1573
+ end
1574
+
1374
1575
"""
1375
1576
writeMultiTopology(nets, file_name; append=false)
1376
1577
writeMultiTopology(nets, IO)
0 commit comments