Skip to content

Commit

Permalink
Enhanced loop speed of allele profile extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
infinity-a11y authored Mar 2, 2024
1 parent 3ae9a91 commit c63a80c
Showing 1 changed file with 8 additions and 17 deletions.
25 changes: 8 additions & 17 deletions execute/single_typing.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,21 @@ frag_data_list <- list()
allele_vector <- integer(length(frag_files))

if(sum(unname(base::sapply(frag_files, file.size)) <= 100) / length(frag_files) < 0.5) {
for (i in 1:length(frag_files)) {
for (i in seq_along(frag_files)) {
# Extract the base filename without extension
frag_filename <-
gsub(".frag", "", tools::file_path_sans_ext(basename(frag_files[i])))
frag_filename <- gsub(".frag", "", tools::file_path_sans_ext(basename(frag_files[i])))

# Check if the file is empty
if (file.info(frag_files[i])$size < 100) {
# Handle empty file: Insert NA in the allele_vector and create an empty data frame
allele_vector[i] <- NA
# Handle empty file: Insert NA in the allele_vector
allele_vector[[i]] <- NA
} else {
# Read the .frag.gz file into a data table
frag_data <-
data.table::fread(frag_files[i], sep = "\t", header = FALSE)
# Read only the necessary columns (3rd and 7th) from the .frag.gz file into a data table
frag_data <- data.table::fread(frag_files[i], select = c(3, 7), sep = "\t", header = FALSE)

# Extract the third, and seventh columns
frag_data <- frag_data[, .(V3, V7)]

# Find the row with the highest value in the third field
max_row <- which.max(frag_data$V3)

# Extract the value from the seventh field in the max row
allele_vector[i] <- frag_data$V7[max_row]
# Find the row with the highest value in the third field and extract the value from the seventh field
allele_vector[[i]] <- frag_data[which.max(frag_data$V3), V7]
}

}

allele_vector <- as.integer(allele_vector)
Expand Down

0 comments on commit c63a80c

Please sign in to comment.