Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving ORF as an AbstractGenomicInterval{T} #34

Merged
merged 40 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
16edd60
First steps towards ORFs as GenomicIntervals
camilogarciabotero May 18, 2024
d9cc396
Some io fixes, and tests
camilogarciabotero May 18, 2024
87b49c3
Some attempts to correct tests
camilogarciabotero May 21, 2024
9132e11
refactor: Remove unused code in runtests.jl
camilogarciabotero May 23, 2024
8e138ba
Relax some tests
camilogarciabotero May 23, 2024
b412355
Move get_variable_name outside loop
camilogarciabotero May 25, 2024
0aaf4e2
Major refactor of the ORF type and also add some groundwork for RBS
camilogarciabotero Jun 3, 2024
14fff61
Update deps
camilogarciabotero Jun 3, 2024
4dc47a6
Improve getindex with some types
camilogarciabotero Jun 3, 2024
61ac5c7
Up translate and other utils are now more stable
camilogarciabotero Jun 3, 2024
27ca3f8
Update the min_len into minlen
camilogarciabotero Jun 3, 2024
4336542
Remove unused function
camilogarciabotero Jun 3, 2024
0516508
Update deps
camilogarciabotero Jun 3, 2024
f17a1f5
Refactor finders with correct score calculation
camilogarciabotero Jun 5, 2024
41e509b
chore: Update Julia version to 1.10.4
camilogarciabotero Jun 5, 2024
140b672
Update README
camilogarciabotero Jun 8, 2024
8cbd961
Manifest update
camilogarciabotero Jun 8, 2024
56d3f1c
Create Features struct and include it in the ORF as the NamedTuple field
camilogarciabotero Jun 8, 2024
fd839c9
Refactor code to improve precompile file size and loading speed
camilogarciabotero Jun 8, 2024
d0cf754
Update io methods
camilogarciabotero Jun 8, 2024
2ac4ce9
Remove getorfs.jl
camilogarciabotero Jun 8, 2024
ba9e75e
Update finder methods with the Feature struct
camilogarciabotero Jun 8, 2024
80a0594
Update lors from BMC taking only one argument
camilogarciabotero Jun 8, 2024
44c791c
Update naivecolletor.jl to new BMC losr
camilogarciabotero Jun 8, 2024
7663ab6
Update iscoding method, so that it takes an ORF directly
camilogarciabotero Jun 9, 2024
b01af90
Update score method to handle directly the score field
camilogarciabotero Jun 9, 2024
784c1d5
Update finder methods kwargs to only use the scheme kwargs
camilogarciabotero Jun 9, 2024
01c3e3e
After fixing the lors from BMC package the lordr is also updated, req…
camilogarciabotero Jun 9, 2024
24b05b5
Update to BMC v0.10.1
camilogarciabotero Jun 9, 2024
2047656
Update some docstring in lordr
camilogarciabotero Jun 9, 2024
f21c4d9
Update deps
camilogarciabotero Jul 1, 2024
91cb0bb
Update getindex of ORFs
camilogarciabotero Jul 1, 2024
5235e69
reduce ORF type fields: get rid of seq
camilogarciabotero Jul 1, 2024
4607f96
Add new internal methods for calling var names and symbol
camilogarciabotero Jul 1, 2024
6419219
Update docs simple coding rule
camilogarciabotero Jul 1, 2024
91e2450
Up deps and extras
camilogarciabotero Jul 1, 2024
f7ae859
Add YAML extra
camilogarciabotero Jul 1, 2024
0ffa402
Update gitignore and delete Manifest
camilogarciabotero Jul 1, 2024
929f482
Update actions cache and cov
camilogarciabotero Jul 1, 2024
67e046d
refactor: Simplify NaiveCollector createorfs function
camilogarciabotero Jul 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

julia_version = "1.10.3"
manifest_format = "2.0"
project_hash = "b881dd48d80d6b19dfa1756a0b2d6e2e42d93e33"
project_hash = "514456eec9673906fe896a3ae36685951a71c19f"

[[deps.Automa]]
deps = ["PrecompileTools", "TranscodingStreams"]
git-tree-sha1 = "588e0d680ad1d7201d4c6a804dcb1cd9cba79fbb"
uuid = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
version = "1.0.3"

[[deps.Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[deps.BioGenerics]]
deps = ["TranscodingStreams"]
git-tree-sha1 = "7bbc085aebc6faa615740b63756e4986c9e85a70"
Expand Down Expand Up @@ -42,6 +45,25 @@ git-tree-sha1 = "e32a61f028b823a172c75e26865637249bb30dff"
uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
version = "5.1.3"

[[deps.Compat]]
deps = ["TOML", "UUIDs"]
git-tree-sha1 = "b1c55339b7c6c350ee89f2c1604299660525b248"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "4.15.0"

[deps.Compat.extensions]
CompatLinearAlgebraExt = "LinearAlgebra"

[deps.Compat.weakdeps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[deps.DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.20"

[[deps.Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand All @@ -56,11 +78,35 @@ weakdeps = ["BioSequences"]
[deps.FASTX.extensions]
BioSequencesExt = "BioSequences"

[[deps.GenomicFeatures]]
deps = ["BioGenerics", "DataStructures", "IntervalTrees"]
git-tree-sha1 = "720844f71f118dd2ab4898a89b2b4feca7730d81"
uuid = "899a7d2d-5c61-547b-bef9-6698a8d05446"
version = "3.0.0"

[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[deps.IntervalTrees]]
git-tree-sha1 = "dc3b97bb5c9cb7c437f74027309f2c2f09a82aaf"
uuid = "524e6230-43b7-53ae-be76-1e9e4d08d11b"
version = "1.1.0"

[[deps.IterTools]]
git-tree-sha1 = "42d5f897009e7ff2cf88db414a389e5ed1bdd023"
uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
version = "1.10.0"

[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[deps.OrderedCollections]]
git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.6.3"

[[deps.PrecompileTools]]
deps = ["Preferences"]
git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f"
Expand Down Expand Up @@ -117,6 +163,10 @@ git-tree-sha1 = "29509c4862bfb5da9e76eb6937125ab93986270a"
uuid = "7200193e-83a8-5a55-b20d-5d36d44a0795"
version = "1.1.2"

[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[deps.Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

Expand Down
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ version = "0.4.0"
BioMarkovChains = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"

[compat]
BioMarkovChains = "0.9"
BioSequences = "3"
FASTX = "2"
GenomicFeatures = "3"
IterTools = "1.4"
PrecompileTools = "1"
julia = "1"
Expand Down
1 change: 1 addition & 0 deletions src/GeneFinder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ using BioMarkovChains: BioMarkovChain, dnaseqprobability, ECOLICDS, ECOLINOCDS,
using FASTX: FASTAReader, sequence
using IterTools: takewhile, iterated
using PrecompileTools: @setup_workload, @compile_workload
using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, leftposition, rightposition, strand, metadata, STRAND_POS

include("algorithms/naivefinder.jl")
include("types.jl")
Expand Down
20 changes: 12 additions & 8 deletions src/algorithms/naivefinder.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
export naivefinder, naivefinderscored, log_odds_ratio_decision_rule, lordr
export naivefinder, naivefinderscored, log_odds_ratio_decision_rule, lordr, lors

const lors = log_odds_ratio_score

"""
locationiterator(sequence::NucleicSeqOrView{DNAAlphabet{N}}; alternative_start::Bool=false) where {N}
Expand Down Expand Up @@ -48,20 +50,21 @@ function naivefinder(
sequence::NucleicSeqOrView{DNAAlphabet{N}};
alternative_start::Bool = false,
min_len::Int64 = 6,
scheme::Union{Nothing, Function} = nothing,
kwargs...
) where {N}
seqlen = length(sequence)
framedict = Dict(0 => 3, 1 => 1, 2 => 2)
orfs = Vector{ORF}()
seqname = get_variable_name(sequence)
for strand in ('+', '-')
seq = strand == '-' ? reverse_complement(sequence) : sequence

@inbounds for location in @views _locationiterator(seq; alternative_start)
if length(location) >= min_len
frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
start = strand == '+' ? location.start : seqlen - location.stop + 1
stop = start + length(location) - 1
push!(orfs, ORF(start:stop, strand, frame, 0.0))
push!(orfs, ORF(seqname, start:stop, strand, frame, scheme)) #NaiveFinder()
end
end
end
Expand Down Expand Up @@ -91,12 +94,13 @@ function naivefinderscored(
sequence::NucleicSeqOrView{DNAAlphabet{N}};
alternative_start::Bool = false,
min_len::Int64 = 6,
scoringscheme::BioMarkovChain = ECOLICDS, # ECOLINOCDS
model::BioMarkovChain = ECOLICDS,
kwargs...
) where {N}
seqlen = length(sequence)
framedict = Dict(0 => 3, 1 => 1, 2 => 2)
orfs = Vector{ORF}()
seqname = get_variable_name(sequence)
for strand in ('+', '-')
seq = strand == '-' ? reverse_complement(sequence) : sequence

Expand All @@ -106,8 +110,8 @@ function naivefinderscored(
start = strand == '+' ? location.start : seqlen - location.stop + 1
stop = start + length(location) - 1
# score = -10log10(dnaseqprobability(seq[start:stop], scoringscheme))
score = log_odds_ratio_score(seq[start:stop], scoringscheme)
push!(orfs, ORF(start:stop, strand, frame, score))
score = log_odds_ratio_score(seq[start:stop], model)
push!(orfs, ORF(seqname, start:stop, strand, frame, NaiveFinderScored(), lors, score))
end
end
end
Expand Down Expand Up @@ -154,7 +158,7 @@ sequence = dna"ATGGCATCTAG"
iscoding(sequence) # Returns: true or false
```
"""
function log_odds_ratio_decision_rule( #log_odds_ratio_decision lordr/cudr/kfdr/aadr
function lordr( #log_odds_ratio_decision, also lordr/cudr/kfdr/aadr
sequence::NucleicSeqOrView{DNAAlphabet{N}};
codingmodel::BioMarkovChain = ECOLICDS,
noncodingmodel::BioMarkovChain = ECOLINOCDS,
Expand All @@ -176,7 +180,7 @@ function log_odds_ratio_decision_rule( #log_odds_ratio_decision lordr/cudr/kfdr/
end
end

const lordr = log_odds_ratio_decision_rule # criteria
const log_odds_ratio_decision_rule = lordr # criteria

## Another alternative:

Expand Down
18 changes: 14 additions & 4 deletions src/extended.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import Base: isless, iterate, sort, getindex


Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
# Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
Base.isless(a::ORF, b::ORF) = isless(a.first:a.last, b.first:b.last)
# Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
# Base.isless(a::ORF, b::ORF) = isless(a.score, b.score)

# Base.sort(v::Vector{<:ORF}; kwargs...) = sort(v, by = _orf_sort_key)
Expand All @@ -12,11 +14,19 @@ Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
#TODOs: how to make more robust the getindex method? confroning the frames?
# Base.getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A} = orf.strand == '+' ? (@view sequence[orf.location]) : reverse_complement(@view sequence[orf.location])

# function getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A}
# if orf.strand == '+'
# return @view sequence[orf.location]
# else
# return reverse_complement(@view sequence[orf.location])
# end
# end

function getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A}
if orf.strand == '+'
return @view sequence[orf.location]
if orf.strand == '+' || orf.strand == STRAND_POS
return @view sequence[orf.first:orf.last]
else
return reverse_complement(@view sequence[orf.location])
return reverse_complement(@view sequence[orf.first:orf.last])
end
end

Expand Down
1 change: 1 addition & 0 deletions src/findorfs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ function findorfs(
end



### Some possible ideas:

# function findorfs(
Expand Down
Loading
Loading