diff --git a/test/data/NC_001884-orfs.faa b/test/data/NC_001884-orfs.faa new file mode 100644 index 0000000..e69de29 diff --git a/test/findorfstest.jl b/test/findorfstest.jl index 6b95ad4..7e17f91 100644 --- a/test/findorfstest.jl +++ b/test/findorfstest.jl @@ -1,43 +1,57 @@ -@testitem "findorfs tests" begin +@testitem "findorfs tests" default_imports=false begin # cd(@__DIR__) # using GeneFinder: findorfs, NaiveFinder, ORF # A random seq to start - - using BioSequences: @dna_str - using GeneFinder: findorfs, NaiveFinder, ORF, source - # using GeneFinder: findorfs, NaiveFinder, ORF, Features, STRAND_POS, STRAND_NEG, source, sequence, groupname + using Test, BioSequences, GeneFinder seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA" - orf01 = ORF{4,NaiveFinder}("seq01", 1, 33, STRAND_POS, 1, Features((score = 0.0,)), nothing) # ORF{4,NaiveFinder}("phi", 7, 15, '+', 1, Features((score = 0,)), nothing) + orf01 = ORF{4,NaiveFinder}("seq01", 1, 33, STRAND_POS, 1, Features((score = 0.0,)), nothing) # ORF{4,NaiveFinder}("phi", 7, 15, STRAND_POS, 1, Features((score = 0,))) # source(orf01) # GeneFinder.source(orf01) - orfs = findorfs(seq01) - @view(seq01[begin:end]) - seq01 + orfs01 = findorfs(seq01) + # @view(seq01[begin:end]) + getindex(seq01, orf01.first:orf01.last) + + orfs01test = [ + ORF{4,NaiveFinder}("seq01", 1, 33, STRAND_POS, 1, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq01", 4, 33, STRAND_POS, 1, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq01", 8, 22, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq01", 12,29, STRAND_POS, 3, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq01", 16,33, STRAND_POS, 1, Features((score = 0.0,)), nothing) + ] + + @test println(orfs01) == println(orfs01test) + @test length(orfs01) == 5 + + # known failures # seq01[orf01] # sequence(orf01) == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG" - # groupname(orf01) == "seq01" - # @test orfs01 == [ - # ORF{4,NaiveFinder}("seq01", 1, 33, '+', 1, seq01[1:33], Dict(:score => 0.0), nothing), - # ORF{4,NaiveFinder}("seq01", 4, 33, '+', 1, seq01[4:33], Dict(:score => 0.0), nothing), - # ORF{4,NaiveFinder}("seq01", 8, 22, '+', 2, seq01[8:22], Dict(:score => 0.0), nothing), - # ORF{4,NaiveFinder}("seq01", 12, 29, '+', 3, seq01[12:29], Dict(:score => 0.0), nothing), - # ORF{4,NaiveFinder}("seq01", 16, 33, '+', 1, seq01[16:33], Dict(:score => 0.0), nothing) - # ] - - # @test length(orfs01) == 5 -end - # > 180195.SAMN03785337.LFLS01000089 -> finds only 1 gene in Prodigal (from Pyrodigal tests) - # seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC" - # orfs02 = findorfs(seq02, NaiveFinder()) + seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC" + orfs02 = findorfs(seq02, finder=NaiveFinder) + + @test length(orfs02) == 12 - # @test length(orfs02) == 12 - # @test orfs02 == [ORF{NaiveFinder}(29:40, '+', 2, nothing), ORF{NaiveFinder}(137:145, '+', 2, nothing), ORF{NaiveFinder}(164:184, '+', 2, nothing), ORF{NaiveFinder}(173:184, '+', 2, nothing), ORF{NaiveFinder}(236:241, '+', 2, nothing), ORF{NaiveFinder}(248:268, '+', 2, nothing), ORF{NaiveFinder}(362:373, '+', 2, nothing), ORF{NaiveFinder}(470:496, '+', 2, nothing), ORF{NaiveFinder}(551:574, '+', 2, nothing), ORF{NaiveFinder}(569:574, '+', 2, nothing), ORF{NaiveFinder}(581:601, '+', 2, nothing), ORF{NaiveFinder}(695:706, '+', 2, nothing)] + orfs02test = [ + ORF{4,NaiveFinder}("seq02", 29, 40, STRAND_POS, 2 , Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 137,145, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 164,184, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 173,184, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 236,241, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 248,268, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 362,373, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 470,496, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 551,574, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 569,574, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 581,601, STRAND_POS, 2, Features((score = 0.0,)), nothing), + ORF{4,NaiveFinder}("seq02", 695,706, STRAND_POS, 2, Features((score = 0.0,)), nothing) + ] + + @test println(orfs02) == println(orfs02test) # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271 # Pyrodigal predicts 2 genes from this sequence: @@ -47,7 +61,7 @@ end # seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC" # orfs03 = findorfs(seq03, NaiveFinder(), minlen=75) # @test length(orfs03) == 9 - # @test orfs03 == [ORF{NaiveFinder}(37:156, '+', 1, nothing), ORF{NaiveFinder}(194:268, '-', 2, nothing), ORF{NaiveFinder}(194:283, '-', 2, nothing), ORF{NaiveFinder}(249:347, '+', 3, nothing), ORF{NaiveFinder}(426:590, '+', 3, nothing), ORF{NaiveFinder}(565:657, '+', 1, nothing), ORF{NaiveFinder}(650:727, '-', 2, nothing), ORF{NaiveFinder}(786:872, '+', 3, nothing), ORF{NaiveFinder}(887:976, '-', 2, nothing)] + # @test orfs03 == [ORF{NaiveFinder}(37:156, STRAND_POS, 1), ORF{NaiveFinder}(194:268, '-', 2), ORF{NaiveFinder}(194:283, '-', 2), ORF{NaiveFinder}(249:347, STRAND_POS, 3), ORF{NaiveFinder}(426:590, STRAND_POS, 3), ORF{NaiveFinder}(565:657, STRAND_POS, 1), ORF{NaiveFinder}(650:727, '-', 2), ORF{NaiveFinder}(786:872, STRAND_POS, 3), ORF{NaiveFinder}(887:976, '-', 2)] #|-> This occured in Pyrodigal # Lambda phage tests # Compare to https://github.com/jonas-fuchs/viral_orf_finder/blob/master/orf_finder.py @@ -60,27 +74,4 @@ end # NC_001416 = fasta2bioseq("data/NC_001416.1.fasta")[1] # NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), minlen=75) # @test length(NC_001416_orfs) == 885 -# end - -# @testitem "getorfs dna" default_imports=false begin - -# using BioSequences: @dna_str, DNAAlphabet -# using GeneFinder: NaiveFinder, getorfs, findorfs, ORF -# using Test: @test - -# seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA" -# orfseqs = getorfs(seq01, DNAAlphabet{4}(), NaiveFinder()) - -# # @test length(orfseqs) == 5 -# # @test orfseqs[1] == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG" -# end - -# @testitem "getorfs proteins" begin -# using BioSequences, GeneFinder, Test - -# seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA" -# aas = getorfs(seq01, AminoAcidAlphabet(), NaiveFinder()) - -# @test length(aas) == 5 -# @test aas[1] == aa"MMHACMLVTS*" -# end \ No newline at end of file +end \ No newline at end of file