diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf..db43d6a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,11 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^dev$ +^doc$ +^LICENSE\.md$ +^LICENSE$ +^revdep$ +^cran-comments\.md$ +^.github$ +^package-code\.Rmd$ +^docs$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..ff3c357 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,55 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, development] + pull_request: + branches: [main, development] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::rcmdcheck + any::covr + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + + - name: Test coverage + run: | + covr::codecov(token = "${{ secrets.CODECOV_TOKEN }}") + shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index bec28a2..cc83549 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,43 @@ Package: BIGr -Title: (B)reeding (I)nsight (G)enomics Functions for Polypoid and Diploid Species -Version: 0.4.2 -Author: Alexander M. Sandercock, Cristiane Taniguti, Josue Chinchilla-Vargas, Shufen Chen, Manoj Sapkota, Meng Lin, Dongyan Zhao, and Breeding Insight Team +Title: Breeding Insight Genomics Analysis Functions for Polypoid and Diploid Species +Version: 0.5.1 +Authors@R: c(person(given='Alexander', + family='Sandercock', + email='ams866@cornell.edu', + role=c('cre','aut')), + person(given='Cristiane', + family='Taniguti', + role = 'aut'), + person(given='Josue', + family='Chinchilla-Vargas', + role='aut'), + person(given='Shufen', + family='Chen', + role='ctb'), + person(given='Manoj', + family='Sapkota', + role='ctb'), + person(given='Meng', + family='Lin', + role='ctb'), + person(given='Dongyan', + family='Zhao', + role='ctb'), + person('Cornell', 'University', + role=c('cph'), + comment = "Breeding Insight")) Maintainer: Alexander M. Sandercock -Description: This package contains the functions developed within Breeding Insight to analyze diploid and polyploid breeding and genetic data. -License: Apache License 2.0 +Description: Functions developed within Breeding Insight to analyze + diploid and polyploid breeding and genetic data. 'BIGr' provides the + ability to filter VCF files, extract SNPs from the DArT MADC file, and + manipulate genotype data for both diploid and polyploid species. It + also serves as the core dependency for the 'BIGapp' Shiny app, which + provides a user-friendly interface for performing routine genotype + analysis tasks such as dosage calling, filtering, PCA, GWAS, and + Genomic Prediction. +License: Apache License (>= 2) +URL: https://github.com/Breeding-Insight/BIGr +BugReports: https://github.com/Breeding-Insight/BIGr/issues Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 @@ -20,6 +53,9 @@ Imports: tidyr (>= 1.3.1), vcfR (>= 1.15.0), Biostrings, - pwalign + pwalign, + janitor, + quadprog, + tibble Remotes: RdMacros: Rdpack diff --git a/NAMESPACE b/NAMESPACE index 4f433b5..19ba188 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,29 +1,29 @@ # Generated by roxygen2: do not edit by hand -export(add_ref_alt) +export(allele_freq_poly) export(calculate_Het) export(calculate_MAF) export(capture_diversity.Gmat) +export(check_homozygous_trios) export(check_ped) -export(compare) -export(create_VCF_body) +export(check_replicates) export(dosage2vcf) export(dosage_ratios) export(filterVCF) export(flip_dosage) export(get_OffTargets) export(get_countsMADC) -export(get_ref_alt_hap_seq) export(imputation_concordance) -export(loop_though_dartag_report) export(madc2vcf) export(merge_MADCs) -export(merge_counts) +export(solve_composition_poly) export(updog2vcf) import(doParallel) import(dplyr) import(foreach) +import(janitor) import(parallel) +import(quadprog) import(tibble) import(tidyr) import(vcfR) @@ -35,11 +35,16 @@ importFrom(pwalign,pairwiseAlignment) importFrom(readr,read_csv) importFrom(reshape2,dcast) importFrom(reshape2,melt) +importFrom(stats,cor) importFrom(stats,lm) importFrom(stats,qt) importFrom(stats,sd) importFrom(stats,setNames) +importFrom(utils,packageVersion) +importFrom(utils,read.csv) importFrom(utils,read.table) +importFrom(utils,tail) +importFrom(utils,write.csv) importFrom(utils,write.table) importFrom(vcfR,extract.gt) importFrom(vcfR,maf) diff --git a/NEWS.md b/NEWS.md index 6bc2865..db635bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,3 +7,17 @@ * updog2vcf function option to output compressed VCF (.vcf.gz) - set as default * remove need for defining ploidy * add metadata at the VCF header + + +# BIGr 0.5.0 + + * Add imputation_concordance function to estimate accuracy of imputed and original dataset + * Add get_OffTargets function to extract target and off-target SNPs from a MADC file + * Add merge_MADCs function to merge two or more MADC files together + * Improved documentation and examples for all functions + * Add tests for all functions + +# BIGr 0.5.1 + +* Improvements of testthat tests +* Add check_replicates and check_homozygous_trios for pedigree relationship quality check diff --git a/R/breedtools_functions.R b/R/breedtools_functions.R index 75c7b14..a7bd41b 100644 --- a/R/breedtools_functions.R +++ b/R/breedtools_functions.R @@ -1,12 +1,35 @@ #' Computes allele frequencies for specified populations given SNP array data #' -#' @param geno matrix of genotypes coded as the dosage of allele B {0, 1, 2, ..., ploidy} +#' @param geno matrix of genotypes coded as the dosage of allele B \code{{0, 1, 2, ..., ploidy}} #' with individuals in rows (named) and SNPs in columns (named) #' @param populations list of named populations. Each population has a vector of IDs #' that belong to the population. Allele frequencies will be derived from all animals #' @param ploidy integer indicating the ploidy level (default is 2 for diploid) #' @return data.frame consisting of allele_frequencies for populations (columns) for #' each SNP (rows) +#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific +#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44. +#' +#' @examples +#' # Example inputs +#' geno_matrix <- matrix( +#' c(4, 1, 4, 0, # S1 +#' 2, 2, 1, 3, # S2 +#' 0, 4, 0, 4, # S3 +#' 3, 3, 2, 2, # S4 +#' 1, 4, 2, 3),# S5 +#' nrow = 4, ncol = 5, byrow = FALSE, # individuals=rows, SNPs=cols +#' dimnames = list(paste0("Ind", 1:4), paste0("S", 1:5)) +#' ) +#' +#'pop_list <- list( +#' PopA = c("Ind1", "Ind2"), +#' PopB = c("Ind3", "Ind4") +#' ) +#' +#' allele_freqs <- allele_freq_poly(geno = geno_matrix, populations = pop_list, ploidy = 4) +#' print(allele_freqs) +#' #' @export allele_freq_poly <- function(geno, populations, ploidy = 2) { @@ -37,16 +60,20 @@ allele_freq_poly <- function(geno, populations, ploidy = 2) { } -# Performs whole genome breed composition prediction. -# -# @param Y numeric vector of genotypes (with names as SNPs) from a single animal. -# coded as dosage of allele B {0, 1, 2} -# @param X numeric matrix of allele frequencies from reference animals -# @param p numeric indicating number of breeds represented in X -# @param names character names of breeds -# @return data.frame of breed composition estimates -# @import quadprog -# @export +#' Performs whole genome breed composition prediction. +#' +#' @param Y numeric vector of genotypes (with names as SNPs) from a single animal. +#' coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}} +#' @param X numeric matrix of allele frequencies from reference animals +#' @param p numeric indicating number of breeds represented in X +#' @param names character names of breeds +#' @return data.frame of breed composition estimates +#' @import quadprog +#' @importFrom stats cor +#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific +#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44. +#' +#' @noRd QPsolve <- function(Y, X) { # Remove NAs from Y and remove corresponding @@ -90,7 +117,7 @@ QPsolve <- function(Y, X) { #' batch of animals. #' #' @param Y numeric matrix of genotypes (columns) from all animals (rows) in population -#' coded as dosage of allele B {0, 1, ..., ploidy} +#' coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}} #' @param X numeric matrix of allele frequencies (rows) from each reference panel (columns). Frequencies are #' relative to allele B. #' @param ped data.frame giving pedigree information. Must be formatted "ID", "Sire", "Dam" @@ -107,6 +134,37 @@ QPsolve <- function(Y, X) { #' @return A data.frame or list of data.frames (if groups is !NULL) with breed/ancestry composition #' results #' @import quadprog +#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific +#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44. +#' +#' @examples +#' # Example inputs for solve_composition_poly (ploidy = 4) +#' +#' # (This would typically be the output from allele_freq_poly) +#' allele_freqs_matrix <- matrix( +#' c(0.625, 0.500, +#' 0.500, 0.500, +#' 0.500, 0.500, +#' 0.750, 0.500, +#' 0.625, 0.625), +#' nrow = 5, ncol = 2, byrow = TRUE, +#' dimnames = list(paste0("SNP", 1:5), c("VarA", "VarB")) +#' ) +#' +#' # Validation Genotypes (individuals x SNPs) +#' val_geno_matrix <- matrix( +#' c(2, 1, 2, 3, 4, # Test1 dosages for SNP1-5 +#' 3, 4, 2, 3, 0), # Test2 dosages for SNP1-5 +#' nrow = 2, ncol = 5, byrow = TRUE, +#' dimnames = list(paste0("Test", 1:2), paste0("SNP", 1:5)) +#' ) +#' +#' # Calculate Breed Composition +#' composition <- solve_composition_poly(Y = val_geno_matrix, +#' X = allele_freqs_matrix, +#' ploidy = 4) +#' print(composition) +#' #' @export solve_composition_poly <- function(Y, X, diff --git a/R/check_ped.R b/R/check_ped.R index 7453a35..1830c29 100644 --- a/R/check_ped.R +++ b/R/check_ped.R @@ -129,11 +129,14 @@ check_ped <- function(ped.file) { missing_parents <- results$missing_parents messy_parents <- results$messy_parents errors <- results$dependencies + # Adding the dataframes as an output list + output.results <- list() #### Print errors and cycles #### # Print repeated ids if any if (nrow(repeated_ids) > 0) { cat("Repeated ids found:\n") print(repeated_ids) + output.results$repeated_ids <- repeated_ids } else { cat("No repeated ids found.\n") } @@ -141,6 +144,7 @@ check_ped <- function(ped.file) { if (nrow(messy_parents) > 0) { cat("Ids found as male and female parent:\n") print(messy_parents) + output.results$messy_parents <- messy_parents } else { cat("No ids found as male and female parent.\n") } @@ -148,6 +152,7 @@ check_ped <- function(ped.file) { if (nrow(missing_parents) > 0) { cat("Missing parents found:\n") print(missing_parents) + output.results$missing_parents <- missing_parents } else { cat("No missing parents found.\n") } @@ -160,5 +165,6 @@ check_ped <- function(ped.file) { } else { cat("No dependencies found.\n") } + return(results) } diff --git a/R/filterVCF.R b/R/filterVCF.R index b345477..348ef38 100644 --- a/R/filterVCF.R +++ b/R/filterVCF.R @@ -51,7 +51,7 @@ filterVCF <- function(vcf.file, #Should allow for any INFO field to be entered to be filtered # Import VCF (can be .vcf or .vcf.gz) - if (class(vcf.file) != "vcfR"){ + if (!inherits(vcf.file, "vcfR")) { vcf <- read.vcfR(vcf.file) } else { vcf <- vcf.file @@ -303,18 +303,18 @@ filterVCF <- function(vcf.file, } ### Export the modified VCF file (this exports as a .vcf.gz, so make sure to have the name end in .vcf.gz) cat("Exporting VCF\n") - if (!class(vcf.file) == "vcfR"){ - if (!is.null(output.file)){ - output_name <- paste0(output.file,".vcf.gz") + if (!inherits(vcf.file, "vcfR")) { + if (!is.null(output.file)) { + output_name <- paste0(output.file, ".vcf.gz") vcfR::write.vcf(vcf, file = output_name) - }else{ + } else { return(vcf) } - }else{ - if (!is.null(output.file)){ - output_name <- paste0(output.file,"_filtered.vcf.gz") + } else { + if (!is.null(output.file)) { + output_name <- paste0(output.file, "_filtered.vcf.gz") vcfR::write.vcf(vcf, file = output_name) - }else{ + } else { return(vcf) } } diff --git a/R/get_OffTargets.R b/R/get_OffTargets.R index 4fa23d0..1d40513 100644 --- a/R/get_OffTargets.R +++ b/R/get_OffTargets.R @@ -4,13 +4,19 @@ #' @param botloci path to file containing the target IDs that were designed in the bottom strand #' @param hap_seq path to haplotype DB fasta file #' @param rm_multiallelic_SNP logical. If TRUE, SNP with more than one alternative base will be removed. If FALSE, check `multiallelic_SNP_dp_thr` specs -#' @param multiallelic_SNP_dp_thr nnumerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold `multiallelic_SNP_dp_thr` combined with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. -#' @param multiallelic_SNP_sample_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold combined with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. +#' @param multiallelic_SNP_dp_thr nnumerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold `multiallelic_SNP_dp_thr` combined +#' with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic +#' aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. +#' @param multiallelic_SNP_sample_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold combined with minimum number of +#' samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, +#' the marker is discarded. This is likely to happen to paralogous sites. #' @param n.cores number of cores to be used in the parallelization #' @param out_vcf output VCF file name #' @param verbose print metrics on the console #' +#' @importFrom utils packageVersion read.csv #' @import vcfR +#' #' @export get_OffTargets <- function(madc = NULL, botloci = NULL, @@ -83,7 +89,7 @@ get_OffTargets <- function(madc = NULL, #' #' @import parallel #' -#' @export +#' @noRd loop_though_dartag_report <- function(report, botloci, hap_seq, n.cores=1, verbose = TRUE){ hap_seq <- get_ref_alt_hap_seq(hap_seq) @@ -139,7 +145,7 @@ loop_though_dartag_report <- function(report, botloci, hap_seq, n.cores=1, verbo #' @param hap_seq haplotype DB #' @param nsamples number of samples #' -#' @export +#' @noRd add_ref_alt <- function(one_tag, hap_seq, nsamples) { # Add ref and alt @@ -195,7 +201,7 @@ add_ref_alt <- function(one_tag, hap_seq, nsamples) { #' @importFrom Biostrings DNAString reverseComplement #' @importFrom pwalign pairwiseAlignment nucleotideSubstitutionMatrix #' -#' @export +#' @noRd compare <- function(one_tag, botloci){ cloneID <- one_tag$CloneID[1] @@ -293,7 +299,7 @@ compare <- function(one_tag, botloci){ #' #' @param hap_seq haplotype db #' -#' @export +#' @noRd get_ref_alt_hap_seq <- function(hap_seq){ headers <- hap_seq$V1[grep(">",hap_seq$V1)] headers <- gsub(">", "", headers) @@ -320,14 +326,19 @@ get_ref_alt_hap_seq <- function(hap_seq){ #' #' @param csv CSV file generated by loop_though_dartag_report #' @param rm_multiallelic_SNP logical. If TRUE, SNP with more than one alternative base will be removed. If FALSE, check `multiallelic_SNP_dp_thr` specs -#' @param multiallelic_SNP_dp_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold `multiallelic_SNP_dp_thr` combined with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. -#' @param multiallelic_SNP_sample_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold `multiallelic_SNP_dp_thr` combined with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. +#' @param multiallelic_SNP_dp_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum +#' depth by tag threshold combined with minimum number of samples (`multiallelic_SNP_dp_thr` + `multiallelic_SNP_sample_thr`) +#' to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker +#' is discarded. This is likely to happen to paralogous sites. +#' @param multiallelic_SNP_sample_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold `multiallelic_SNP_dp_thr` combined +#' with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic +#' aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. #' @param n.cores number of cores to be used in the parallelization #' @param verbose print metrics on the console #' #' @import parallel #' -#' @export +#' @noRd create_VCF_body <- function(csv, rm_multiallelic_SNP = TRUE, multiallelic_SNP_dp_thr = 2, @@ -400,10 +411,14 @@ create_VCF_body <- function(csv, #' #' @param cloneID_unit one item of csv file split by cloneID #' @param rm_multiallelic_SNP logical. If TRUE, SNP with more than one alternative base will be removed. If FALSE, check `multiallelic_SNP_dp_thr` specs -#' @param multiallelic_SNP_dp_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold combined with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. -#' @param multiallelic_SNP_sample_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold `multiallelic_SNP_dp_thr` combined with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. +#' @param multiallelic_SNP_dp_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold combined with minimum number of samples +#' `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker +#' is discarded. This is likely to happen to paralogous sites. +#' @param multiallelic_SNP_sample_thr numerical. If `rm_multiallelic_SNP` is FALSE, set a minimum depth by tag threshold `multiallelic_SNP_dp_thr` combined +#' with minimum number of samples `multiallelic_SNP_sample_thr` to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic +#' aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites. #' -#' @export +#' @noRd merge_counts <- function(cloneID_unit, rm_multiallelic_SNP = FALSE, multiallelic_SNP_dp_thr = 0, multiallelic_SNP_sample_thr = 0){ #Get counts for target SNP diff --git a/R/imputation_concordance.R b/R/imputation_concordance.R index a52f7a3..f0f2f79 100644 --- a/R/imputation_concordance.R +++ b/R/imputation_concordance.R @@ -1,19 +1,32 @@ #' Calculate Concordance between Imputed and Reference Genotypes #' -#' This calculates the concordance between imputed and reference genotypes. It assumes that samples are rows and markers are columns. -#' It is recommended to use allele dosages (0,1,2) but will work with other formats. Missing data in reference or imputed genotypes -#' will not be considered for concordance if argument missing_code used. If a specific subset of markers should it can be provided as argument snps_2_exclude. +#' This function calculates the concordance between imputed and reference genotypes. It assumes that samples are rows and markers are columns. +#' It is recommended to use allele dosages (0, 1, 2) but will work with other formats. Missing data in reference or imputed genotypes +#' will not be considered for concordance if the `missing_code` argument is used. If a specific subset of markers should be excluded, +#' it can be provided using the `snps_2_exclude` argument. +#' +#' @param reference_genos A data frame containing reference genotype data, with rows as samples and columns as markers. Dosage format (0, 1, 2) is recommended. +#' @param imputed_genos A data frame containing imputed genotype data, with rows as samples and columns as markers. Dosage format (0, 1, 2) is recommended. +#' @param missing_code An optional value to specify missing data. If provided, loci with this value in either dataset will be excluded from the concordance calculation. +#' @param snps_2_exclude An optional vector of marker IDs to exclude from the concordance calculation. +#' @param verbose A logical value indicating whether to print a summary of the concordance results. Default is FALSE. +#' +#' @return A list with two elements: +#' \itemize{ +#' \item \code{result_df}: A data frame with sample IDs and their concordance percentages. +#' \item \code{summary_concordance}: A summary of concordance percentages, including minimum, maximum, mean, and quartiles. +#' } +#' +#' @details The function identifies common samples and markers between the reference and imputed genotype datasets. It calculates the percentage of matching genotypes for each sample, excluding missing data and specified markers. The concordance is reported as a percentage for each sample, along with a summary of the overall concordance distribution. #' -#' @param reference_genos Genotype data.frame with rows as samples and columns as markers. Dosage recommended. -#' @param imputed_genos Genotype data.frame with rows as samples and columns as markers. Dosage recommended. -#' @param missing_code Optional input to consider missing data to exclude in concordance calculation. -#' @param snps_2_exclude Optional input to exclude specific markers from concordance calculation. Single column of marker ids. -#' @param output Optional input to assign the output dataframe to a specific variable name. Default is "imputation_concordance" #' @import dplyr -#' @return 2 outputs: 1) A data frame with sample IDs and concordance percentages. 2) A summary of concordance percentages. #' @export #' -imputation_concordance <- function(reference_genos, imputed_genos, missing_code = NULL, snps_2_exclude = NULL, output = "imputation_concordance") { +imputation_concordance <- function(reference_genos, + imputed_genos, + missing_code = NULL, + snps_2_exclude = NULL, + verbose = FALSE) { # Find common IDs common_ids <- intersect(imputed_genos$ID, reference_genos$ID) @@ -48,16 +61,17 @@ imputation_concordance <- function(reference_genos, imputed_genos, missing_code Concordance = paste0(round(percentage_match * 100, 2), "%") ) - # Assign the result dataframe to the output variable - assign(output, result_df, envir = .GlobalEnv) - # Print mean concordance summary_concordance <- summary(percentage_match, na.rm = TRUE) * 100 names(summary_concordance) <- c("Min", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max") - cat("Concordance Summary:\n") - for (name in names(summary_concordance)) { - cat(name, ":", round(summary_concordance[name], 2), "%\n") + if (verbose) { + message("Concordance Summary:\n") + for (name in names(summary_concordance)) { + cat(name, ":", round(summary_concordance[name], 2), "%\n") + } } + + return(result_df) } diff --git a/R/merge_MADCs.R b/R/merge_MADCs.R index a0cd34c..3d0fa4d 100644 --- a/R/merge_MADCs.R +++ b/R/merge_MADCs.R @@ -15,6 +15,44 @@ ##' exist in different files. ##' ##' @import dplyr +##' @importFrom utils tail write.csv +##' +##' @examples +##' # First generating example MADC files +##' temp_dir <- tempdir() +##' file1_path <- file.path(temp_dir, "madc1.csv") +##' file2_path <- file.path(temp_dir, "madc2.csv") +##' out_path <- file.path(temp_dir, "merged_madc.csv") +##' +##' # Data for file 1: Has SampleA and SampleB +##' df1 <- data.frame( +##' AlleleID = c("chr1.1_0001|Alt_0002", "chr1.1_0001|Ref_0001", "chr1.1_0001|AltMatch_0001"), +##' CloneID = c("chr1.1_0001", "chr1.1_0001", "chr1.1_0001"), +##' AlleleSequence = c("GGG", "AAA", "TTT"), +##' SampleA = c(10, 8, 0), +##' SampleB = c(5, 4, 9), +##' stringsAsFactors = FALSE, +##' check.names = FALSE +##' ) +##' write.csv(df1, file1_path, row.names = FALSE, quote = FALSE) +##' +##' # Data for file 2: Has SampleA (duplicate name) and SampleC, different rows +##' df2 <- data.frame( +##' AlleleID = c("chr1.1_0001|Alt_0002", "chr1.1_0001|Ref_0001", "chr1.1_0001|AltMatch_0001"), +##' CloneID = c("chr1.1_0001", "chr1.1_0001", "chr1.1_0001"), +##' AlleleSequence = c("GGG", "AAA", "TTT"), +##' SampleA = c(11, 7, 20), +##' SampleC = c(1, 2, 6), +##' stringsAsFactors = FALSE, +##' check.names = FALSE +##' ) +##' write.csv(df2, file2_path, row.names = FALSE, quote = FALSE) +##' +##' # 2. Run the merge function +##' # Use default suffixes (.x, .y) for the duplicated "SampleA" +##' merge_MADCs(madc_list = list(file1_path, file2_path), +##' out_madc = out_path) +##' ##' ##' @export merge_MADCs <- function(..., madc_list=NULL, out_madc=NULL, run_ids=NULL){ diff --git a/R/relationship_qc.R b/R/relationship_qc.R new file mode 100644 index 0000000..9cf5401 --- /dev/null +++ b/R/relationship_qc.R @@ -0,0 +1,177 @@ +#' Check homozygous loci in trios +#' +#' This function analyzes homozygous loci segregation in trios (parents and progeny) using genotype data from a VCF file. +#' It calculates the percentage of homozygous loci in the progeny that match the expected segregation patterns based on the tested parents. +#' +#' @param path.vcf A string specifying the path to the VCF file containing genotype data. +#' @param ploidy An integer specifying the ploidy level of the samples. Default is 4. +#' @param parents_candidates A character vector of parent sample names to be tested. Must be provided. +#' @param progeny_candidates A character vector of progeny sample names to be tested. Must be provided. +#' @param verbose A logical value indicating whether to print the number of combinations tested. Default is TRUE. +#' +#' @return A data frame with the following columns: +#' \itemize{ +#' \item \code{parent1}: The name of the first parent in the pair. +#' \item \code{parent2}: The name of the second parent in the pair. +#' \item \code{progeny}: The name of the progeny sample. +#' \item \code{homoRef_x_homoRef_n}: Number of loci where both parents are homozygous reference. +#' \item \code{homoRef_x_homoRef_match}: Percentage of matching loci in the progeny for homozygous reference parents. +#' \item \code{homoAlt_x_homoAlt_n}: Number of loci where both parents are homozygous alternate. +#' \item \code{homoAlt_x_homoAlt_match}: Percentage of matching loci in the progeny for homozygous alternate parents. +#' \item \code{homoRef_x_homoAlt_n}: Number of loci where one parent is homozygous reference and the other is homozygous alternate. +#' \item \code{homoRef_x_homoAlt_match}: Percentage of matching loci in the progeny for mixed homozygous parents. +#' \item \code{homoalt_x_homoRef_n}: Number of loci where one parent is homozygous alternate and the other is homozygous reference. +#' \item \code{homoalt_x_homoRef_match}: Percentage of matching loci in the progeny for mixed homozygous parents (alternate-reference). +#' \item \code{missing}: The number of loci with missing genotype data in the comparison. +#' } +#' +#' @details This function is designed to validate the segregation of homozygous loci in trios, ensuring that the progeny genotypes align with the expected patterns based on the parental genotypes. It requires both parent and progeny candidates to be specified. The function validates the ploidy level and ensures that all specified samples are present in the VCF file. The results include detailed statistics for each combination of parents and progeny. Reciprocal comparisons (e.g., A vs. B and B vs. A) and self-comparisons (e.g., A vs. A) are removed to avoid redundancy. Missing genotype data is also accounted for and reported in the results. +#' +#' @importFrom vcfR read.vcfR extract.gt +#' +#' @export +check_homozygous_trios <- function(path.vcf, ploidy = 4, parents_candidates = NULL, progeny_candidates = NULL, verbose = TRUE) { + + # Check if parents and progeny are not NULL + if (is.null(parents_candidates) || is.null(progeny_candidates)) { + stop("Please provide both parents and progeny candidates.") + } + + # Load the VCF file + vcf <- read.vcfR(path.vcf, verbose = FALSE) + + # Extract the genotype data + GT <- extract.gt(vcf, element = "GT") + + count_ones <- function(x) { + sapply(strsplit(x, "/"), function(v) sum(v == "1")) + } + + # Apply to entire matrix + GT_counts <- apply(GT, c(1, 2), count_ones) + if (max(GT_counts) != ploidy) stop("Ploidy level is not correct, check the VCF file") + + if (!all(parents_candidates %in% colnames(GT))) stop("Some parents are not in the VCF file") + if (!all(progeny_candidates %in% colnames(GT))) stop("Some progeny are not in the VCF file") + + # Get all combinations of samples + combinations <- expand.grid(parents_candidates, parents_candidates) + filtered_combinations <- combinations[!duplicated(t(apply(combinations, 1, sort))), ] # remove reciprocal combinations + filtered_combinations <- filtered_combinations[filtered_combinations$Var1 != filtered_combinations$Var2, ] # remove self-comparisons + + progeny_rep <- rep(progeny_candidates, each = nrow(filtered_combinations)) + rownames(filtered_combinations) <- NULL + filtered_combinations <- cbind(filtered_combinations, progeny_rep) + colnames(filtered_combinations) <- c("parent1", "parent2", "progeny") + if(verbose) cat("Number of combinations tested: ", nrow(filtered_combinations), "\n") + + # Initialize a data frame to store results + homo1 <- 0 + homo2 <- ploidy + + matches <- mapply(function(parent1, parent2, progeny, homo1, homo2) { + # Get the genotypes for the two parents + gt1 <- GT_counts[, parent1] + gt2 <- GT_counts[, parent2] + gt3 <- GT_counts[, progeny] + + # Check if the genotypes are compatible + homoRef_x_homoRef <- which(gt1 == homo1 & gt2 == homo1) + homoAlt_x_homoAlt <- which(gt1 == homo2 & gt2 == homo2) + homoRef_x_homoAlt <- which(gt1 == homo1 & gt2 == homo2) + homoalt_x_homoRef <- which(gt1 == homo2 & gt2 == homo1) + homoRef_x_homoRef_n <- length(homoRef_x_homoRef) + homoAlt_x_homoAlt_n <- length(homoAlt_x_homoAlt) + homoRef_x_homoAlt_n <- length(homoRef_x_homoAlt) + homoalt_x_homoRef_n <- length(homoalt_x_homoRef) + homoRef_x_homoRef_match <- ifelse(homoRef_x_homoRef_n > 0, round((sum(gt3[homoRef_x_homoRef] == homo1, na.rm = TRUE) / homoRef_x_homoRef_n) * 100, 2), NA) + homoAlt_x_homoAlt_match <- ifelse(homoAlt_x_homoAlt_n > 0, round((sum(gt3[homoAlt_x_homoAlt] == homo2, na.rm = TRUE) / homoAlt_x_homoAlt_n) * 100, 2), NA) + homoRef_x_homoAlt_match <- ifelse(homoRef_x_homoAlt_n > 0, round((sum(gt3[homoRef_x_homoAlt] == ploidy / 2, na.rm = TRUE) / homoRef_x_homoAlt_n) * 100, 2), NA) + homoalt_x_homoRef_match <- ifelse(homoalt_x_homoRef_n > 0, round((sum(gt3[homoalt_x_homoRef] == ploidy / 2, na.rm = TRUE) / homoalt_x_homoRef_n) * 100, 2), NA) + + miss <- sum(is.na(gt1) | is.na(gt2) | is.na(gt3)) + + # Return the result + result <- c( + homoRef_x_homoRef_n, homoRef_x_homoRef_match, homoAlt_x_homoAlt_n, + homoAlt_x_homoAlt_match, homoRef_x_homoAlt_n, homoRef_x_homoAlt_match, + homoalt_x_homoRef_n, homoalt_x_homoRef_match, miss + ) + return(result) + }, filtered_combinations$parent1, filtered_combinations$parent2, filtered_combinations$progeny, homo1, homo2) + + all_comb <- cbind(filtered_combinations, t(matches)) + head(all_comb) + colnames(all_comb) <- c( + "parent1", "parent2", "progeny", "homoRef_x_homoRef_n", "homoRef_x_homoRef_match", + "homoAlt_x_homoAlt_n", "homoAlt_x_homoAlt_match", "homoRef_x_homoAlt_n", + "homoRef_x_homoAlt_match", "homoalt_x_homoRef_n", "homoalt_x_homoRef_match", "missing" + ) + + return(all_comb) +} + +#' Compatibility between samples genotypes +#' +#' This function checks the compatibility between sample genotypes in a VCF file by comparing all pairs of samples. +#' +#' @param path.vcf A string specifying the path to the VCF file containing genotype data. +#' @param select_samples An optional character vector of sample names to be selected for comparison. If NULL (default), all samples in the VCF file are used. +#' @param verbose A logical value indicating whether to print the number of combinations tested. Default is TRUE. +#' +#' @return A data frame with four columns: +#' \itemize{ +#' \item \code{sample1}: The name of the first sample in the pair. +#' \item \code{sample2}: The name of the second sample in the pair. +#' \item \code{\%_matching_genotypes}: The percentage of compatible genotypes between the two samples. +#' \item \code{\%_missing_genotypes}: The percentage of missing genotypes in the comparison. +#' } +#' +#' @details The function removes reciprocal comparisons (e.g., A vs. B and B vs. A) and self-comparisons (e.g., A vs. A) to avoid redundancy. Compatibility is calculated as the percentage of matching genotypes between two samples, excluding missing values. The percentage of missing genotypes is also reported for each pair. +#' +#' @importFrom vcfR read.vcfR extract.gt +#' +#' @export +check_replicates <- function(path.vcf, select_samples = NULL, verbose = TRUE) { + # Load the VCF file + vcf <- read.vcfR(path.vcf, verbose = FALSE) + + # Extract the genotype data + GT <- extract.gt(vcf, element = "GT", convertNA = TRUE) + if(any(GT == "./.")) GT[which(GT == "./.")] <- NA + + # Select samples + if (is.null(select_samples)) { + samples <- colnames(GT) + } else { + if (!all(select_samples %in% colnames(GT))) stop("Some samples are not in the VCF file") + samples <- select_samples + } + + # Get all combinations of samples + combinations <- expand.grid(samples, samples) + filtered_combinations <- combinations[!duplicated(t(apply(combinations, 1, sort))), ] # remove reciprocal combinations + filtered_combinations <- filtered_combinations[filtered_combinations$Var1 != filtered_combinations$Var2, ] # remove self-comparisons + + if(verbose) cat("Number of combinations tested: ", nrow(filtered_combinations), "\n") + + compatibility <- mapply(function(sample1, sample2) { + + # Get the genotypes for the two samples + gt1 <- GT[, sample1] + gt2 <- GT[, sample2] + + # Check if the genotypes are compatible + compatible <- (sum(gt1 == gt2, na.rm = TRUE) / length(gt1)) * 100 + miss.perc <- (sum(is.na(gt1) | is.na(gt2))/ length(gt1)) * 100 + + # Return the result + return(c(compatible = compatible, miss.perc = miss.perc)) + }, filtered_combinations$Var1, filtered_combinations$Var2) + + + result <- cbind(filtered_combinations, t(compatibility)) + colnames(result) <- c("sample1", "sample2", "%_matching_genotypes", "%_missing_genotypes") + return(result) +} + diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..2e14720 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,23 @@ +#Internal Functions + +globalVariables(c( + "ALT", "AlleleID", "CHROM", "Data", "ID", "MarkerName", "POS", + "QPseparate", "QPsolve_par", "REF", "Var1", "Variant", "geno", + "ind", "ref", "row_name", "size", "snp" +)) + +#' Convert GT format to numeric dosage +#' @param gt a genotype matrix with samples as columns and variants as rows +#' @noRd +convert_to_dosage <- function(gt) { + # Split the genotype string + alleles <- strsplit(gt, "[|/]") + # Sum the alleles, treating NA values appropriately + sapply(alleles, function(x) { + if (any(is.na(x))) { + return(NA) + } else { + return(sum(as.numeric(x), na.rm = TRUE)) + } + }) +} diff --git a/README.md b/README.md index 3332119..9db7492 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,14 @@ + +[![R-CMD-check](https://github.com/Breeding-Insight/BIGr/workflows/R-CMD-check/badge.svg)](https://github.com/Breeding-Insight/BIGr/actions) +![GitHub Release](https://img.shields.io/github/v/release/Breeding-Insight/BIGr) +[![Development Status](https://img.shields.io/badge/development-active-blue.svg)](https://img.shields.io/badge/development-active-blue.svg) +![GitHub License](https://img.shields.io/github/license/Breeding-Insight/BIGr) +[![codecov](https://codecov.io/gh/Breeding-Insight/BIGr/graph/badge.svg?token=PJUZMRN1NF)](https://codecov.io/gh/Breeding-Insight/BIGr) + + + + +======= # BIGr diff --git a/inst/.DS_Store b/inst/.DS_Store new file mode 100644 index 0000000..3df67aa Binary files /dev/null and b/inst/.DS_Store differ diff --git a/inst/check_ped_test.txt b/inst/check_ped_test.txt new file mode 100644 index 0000000..ae7bc08 --- /dev/null +++ b/inst/check_ped_test.txt @@ -0,0 +1,16 @@ +id sire dam +off1 sire1 dam1 +off2 sire2 dam2 +off3 sire3 dam3 +off4 sire4 dam4 +off5 sire5 dam5 +sire1 off1 grandmother1 +sire2 grandfather2 grandfather2 +sire3 grandfather3 grandfather3 +sire4 grandfather4 grandmother4 +sire5 grandfather5 grandmother5 +dam1 grandfather6 grandmother6 +dam2 grandfather7 grandmother7 +dam3 grandfather8 grandmother8 +dam4 0 0 +dam5 0 0 \ No newline at end of file diff --git a/inst/example_MADC_FixedAlleleID.csv b/inst/example_MADC_FixedAlleleID.csv new file mode 100644 index 0000000..f92b839 --- /dev/null +++ b/inst/example_MADC_FixedAlleleID.csv @@ -0,0 +1,52 @@ +AlleleID,CloneID,AlleleSequence,Sample_1,Sample_2,Sample_3,Sample_4,Sample_5,Sample_6,Sample_7,Sample_8,Sample_9,Sample_10 +chr1.1_000194324|AltMatch_0001,chr1.1_000194324,CGAAATAATAACCCAAGTTCTGCCAGTTTATGTTAAAACTTTTTTTACAAGGTACAAGTTCGGTGACAAC,0,139,135,185,90,69,54,54,40,129 +chr1.1_000194324|Alt_0002,chr1.1_000194324,CGAAATAATAACCCAAGTTCTGCCAGTTTATGTTAAAACTTTTCTTACAAGGTACAAGTTCGGTGACAACTTAACAAGTAA,44,1,0,59,37,109,41,1,68,0 +chr1.1_000194324|Ref_0001,chr1.1_000194324,CGAAATAATAACCCAAGTTCTGCCAGTTTATGTTAAAACTTTTCTTACATGGTACAAGTTCGGTGACAACTTAACAAGTAA,152,171,160,0,71,60,95,147,49,163 +chr1.1_000309952|AltMatch_0001,chr1.1_000309952,TGGTATTGTATTAGATGAGATATTCTAACTTGTAAGGTAGAAATCTTGACCACTTATAAACACATATTCA,43,82,63,48,34,5,0,95,0,110 +chr1.1_000309952|Alt_0002,chr1.1_000309952,TGGTATTGTATTAGATGAGATATTCTAACTTGTAAGATTGAGATCTTGACCACTTATAAACACATATTCATATCATATGTA,0,0,0,0,0,0,0,0,0,0 +chr1.1_000309952|RefMatch_0001,chr1.1_000309952,TGGTATTGTATTAGATGAGATATTCTAACTTGTAAGGTGGAGATCTCGACGACTTATAAACACATATTCA,40,5,11,16,36,12,23,17,20,0 +chr1.1_000309952|Ref_0001,chr1.1_000309952,TGGTATTGTATTAGATGAGATATTCTAACTTGTAAGATTGAGATCTCGACCACTTATAAACACATATTCATATCATATGTA,0,0,0,0,0,0,0,0,0,0 +chr1.1_000452961|Alt_0002,chr1.1_000452961,TTACGAGATCGCGAAGTTCGTTCCTTTCTTTATCTTTCTTCTCTTTTACCCGACCGGCTCCCTGCAGACCAGAAAGCCCAA,0,0,0,0,0,0,0,0,0,0 +chr1.1_000452961|Ref_0001,chr1.1_000452961,TTACGAGATCGCGAAGCTCGTTCCTTTCTTTATCTTTCTTCTCTTTTACCCGACCGGCTCCCTGCAGACCAGAAAGCCCAA,0,0,0,0,0,0,0,0,0,0 +chr1.1_000532584|RefMatch_0001,chr1.1_000532584,CAACGGAACATATAAAGATATCCACTTCTCTTGGAGCTTGATAATACTTATAATGTTGGGGATTTGTGTT,0,0,0,157,0,197,0,0,0,0 +chr1.1_000532584|RefMatch_0002,chr1.1_000532584,CAACGGAACATATAAAGATATCCACTTCTCTTGGGGCTTGATAATACTTATAATGTTGGGGATTTGTGTT,0,0,0,0,0,0,0,0,143,0 +chr1.1_000532584|RefMatch_0003,chr1.1_000532584,CAACGGAACATATAAAGATATCCACTTTTCTCGGAGCTTGATAATACTTATAATGTTGGGGATTTGTGTT,0,0,0,0,0,0,0,0,1,0 +chr1.1_000532584|Ref_0001,chr1.1_000532584,CAACGGAACATATAAAGATATCCACTTCTCTTGGAGCTTGATGATACTTATAATGTTGGGGATTTGTGTTTTGCAGGATTT,653,236,176,0,199,410,473,216,177,1 +chr1.1_000735393|Alt_0002,chr1.1_000735393,GACTCTTGGAAGGAAAATGGTTTTTCTAGGTAATTAAACTTCAATCAAAGTTACATATTTGACTCACTTCACTATTCTAAA,248,91,74,0,70,0,174,73,165,5 +chr1.1_000735393|Ref_0001,chr1.1_000735393,GACTCTTGGAAGGAAAATGGTTTTTCTAGGTAGTTAAACTTCAATCAAAGTTACATATTTGACTCACTTCACTATTCTAAA,76,278,164,188,168,202,53,197,170,225 +chr1.1_000837330|Alt_0002,chr1.1_000837330,CCTCTATCTAATAGAGAATATTGATTGGCTGAATGTTGACCATATTCCATGTACCCACTAGGGTTACCCCGTGGAGTCCAA,0,0,0,78,0,90,0,0,176,0 +chr1.1_000837330|Ref_0001,chr1.1_000837330,CCTCTATCTAATAGAGAATATTGATTGGCTGAATGTTGACCATATTCCCTGTACCCACTAGGGTTACCCCGTGGAGTCCAA,364,344,348,229,285,265,270,345,147,339 +chr1.1_000915014|Alt_0002,chr1.1_000915014,CCAGGCTTCTATATATACAATGATCAGATATGTTAAACCTAAGCTGCTCAGTGCTCCTTAACCCTAAGACACACAGAACCT,1,0,1,0,0,3,1,0,0,0 +chr1.1_000915014|Ref_0001,chr1.1_000915014,CCAGGCTTCTATATATACAATGATCAGATATGTTAAACCTAAGCTGCTCGGTGCTCCTTAACCCTAAGACACACAGAACCT,271,465,342,177,486,122,265,377,103,542 +chr1.1_001169609|AltMatch_0001,chr1.1_001169609,CAATGATCTCTGCGCAACTGCACCTTTAAAATCTTCCTGCCTACATAGTACTTTTGGTTTTTGGAACCC,304,292,218,149,254,314,247,148,127,0 +chr1.1_001169609|Alt_0002,chr1.1_001169609,CAATGATCTCTGCGCAACTGCACCTTTAAAATCTTCCTGCCTGACATAGTACTTTTGGTTTTTGGAACCCAAGTACCGTAT,2,0,0,0,1,0,0,0,1,0 +chr1.1_001169609|RefMatch_0002,chr1.1_001169609,CAATGATCTCTGCGCAACTGCACCTTTTAAATCTTCCTGCCTGACAAAGTACTTTTGGTTTTTGGAACCC,0,284,214,564,0,0,0,272,229,243 +chr1.1_001169609|Ref_0001,chr1.1_001169609,CAATGATCTCTGCGCAACTGCACCTTTTAAATCTTCCTGCCTGACATAGTACTTTTGGTTTTTGGAACCCAAGTACCGTAT,593,300,196,254,486,571,419,508,451,705 +chr1.1_001494903|Alt_0002,chr1.1_001494903,TTGGTGCAGTGTTGATAGTAGCTGGACTATACTTTGTGTTGTGGGGTAAAAGTGAAGAGAAGAAATTATTTGCAAAGGAAC,22,21,9,5,10,5,24,18,7,11 +chr1.1_001494903|Ref_0001,chr1.1_001494903,TTGGTGCAGTGTTGATAGTAGCTGGACTATACTTTGTGCTGTGGGGTAAAAGTGAAGAGAAGAAATTATTTGCAAAGGAAC,5,17,7,18,13,14,3,6,22,12 +chr1.1_001590881|Alt_0002,chr1.1_001590881,CTGTGAGAAGCACTTCATCTGAATTAAGCAATCCTTTTCCCGTAAGTAAGAGTTTGTAATAGGTATTATCAAACATTCTTG,9,10,5,2,6,6,1,15,6,16 +chr1.1_001590881|Ref_0001,chr1.1_001590881,CTGTGAGAAGCACTTCATCTGAATTAAGCAATCCTTTTCCCCTAAGTAAGAGTTTGTAATAGGTATTATCAAACATTCTTG,27,6,12,3,8,9,21,5,11,0 +chr1.1_001938036|AltMatch_0003,chr1.1_001938036,CAGTGTTATCAGCCACACATGTAAATTGATGCTTTTATCTGGACTTGTTAAGTATACTGACAGCTTATC,0,0,0,27,0,25,0,0,2,0 +chr1.1_001938036|Alt_0002,chr1.1_001938036,CAGTGTTATCAGCCACACATGTAAATTGATGCTTTTCTCTGGACTTGTTTAAGTATACTGACAGCTTATCATGTCTGTTGG,0,48,39,82,30,71,0,0,32,0 +chr1.1_001938036|Ref_0001,chr1.1_001938036,CAGTGTTATCAGCCACACATGTAAATTGATTCTTTTCTCTGGACTTGTTTAAGTATACTGACAGCTTATCATGTCTGTTGG,149,101,122,0,98,38,92,178,99,153 +chr1.1_002111756|Alt_0002,chr1.1_002111756,GGTAGATAAATTTTACAGATGCTTAAAAGGTTTGCTAAATGGAATTCTGAGTATTGATCCTAAGAAAATCCATGTATAGAT,2,11,5,2,1,4,0,18,11,7 +chr1.1_002111756|Ref_0001,chr1.1_002111756,GGTAGATAAATTTTACAGATGCTTAAAAGGTTTGTTAAATGGAATTCTGAGTATTGATCCTAAGAAAATCCATGTATAGAT,115,176,163,149,179,242,136,61,167,56 +chr1.1_002341138|Alt_0002,chr1.1_002341138,GACTGTTGGAGTAATTTGCATATCAAAATATCTATATGTGATCACAGGGTTCTTTAAACAGAACATGGAGATTTTGACTTA,0,0,0,10,0,0,0,0,3,0 +chr1.1_002341138|Ref_0001,chr1.1_002341138,GACTGTTGGAGTAATTTGCATATCAAAATATCTATATGTGATCACAGGGTACTTTAAACAGAACATGGAGATTTTGACTTA,19,28,27,7,28,12,17,23,6,36 +chr1.1_002432574|Alt_0002,chr1.1_002432574,ATACATCCTTCCTATCCTGGATTATCACTGACCAGTTTTCAGGGATGTTTCATCAACAAAATCCTGTCTTATATTACATTC,519,411,331,668,420,405,462,247,216,0 +chr1.1_002432574|Ref_0001,chr1.1_002432574,ATACATCCTTCCTATCCTGGATTATCACTGACCACTTTTCAGGGATGTTTCATCAACAAAATCCTGTCTTATATTACATTC,289,332,203,92,268,223,230,368,448,479 +chr1.1_002703089|Alt_0002,chr1.1_002703089,CAACCACTTTGCAACCTTGCATGAAACTTTTATTTTCATCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT,0,202,179,202,81,155,60,124,200,165 +chr1.1_002703089|Ref_0001,chr1.1_002703089,CAACCACTTTGCAACCTTGCATGAAACTTTTATCTTCATCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT,306,172,119,44,177,120,117,167,145,120 +chr1.1_002798325|Alt_0002,chr1.1_002798325,TAGAAGTGTATTATATATATCTAACCTAGTATGTTATTTGCATTTGGACAAATTTGTAAAGGCATGGAGAAATTGGAAGGA,0,12,8,0,9,9,8,0,4,8 +chr1.1_002798325|Ref_0001,chr1.1_002798325,TAGAAGTGTATTATATATATCTAACCTAGTATTTTATTTGCATTTGGACAAATTTGTAAAGGCATGGAGAAATTGGAAGGA,8,17,10,13,5,8,4,19,5,17 +chr1.1_003103125|Alt_0002,chr1.1_003103125,CAGTGATAGCTTATCTCAAGTGATGTAGTTAATTTTTGTTCTCAAACTTTAATAGTAATGGTATTTAAAGTTCTACTTTGA,34,34,62,94,38,26,39,8,0,0 +chr1.1_003103125|Ref_0001,chr1.1_003103125,CAGTGATAGCTTATCTCAAGTGATGTAGTTAATTTTTGTTCTCAAACTATAATAGTAATGGTATTTAAAGTTCTACTTTGA,48,79,99,35,80,171,34,162,138,183 +chr1.1_003243094|Alt_0002,chr1.1_003243094,AAGGTAAGAACACAACCATTAATGTTATGTTTTTCTGTTTTGTCTTAATGTTTTTATTGATTAGTTACATAATGTCCCATA,9,4,4,16,5,18,8,4,0,0 +chr1.1_003243094|Ref_0001,chr1.1_003243094,AAGGTAAGAACACAACCATTAATGTTATGTTATTCTGTTTTGTCTTAATGTTTTTATTGATTAGTTACATAATGTCCCATA,8,12,12,0,10,11,7,17,16,17 +chr1.1_003329439|Alt_0002,chr1.1_003329439,GAACTAAGACCAACGTTTAAATACTAAGTTTATACTAATTAGGGTTTATTTTCTGGTTTGTAACACTGCATGTAAAAGTTA,1,1,0,0,3,6,1,5,5,8 +chr1.1_003329439|Ref_0001,chr1.1_003329439,GAACTAAGACCAACGTTTAAATACTAAGTTTATCCTAATTAGGGTTTATTTTCTGGTTTGTAACACTGCATGTAAAAGTTA,14,18,18,10,14,10,7,14,0,2 +chr1.1_003491884|Alt_0002,chr1.1_003491884,GAGGAAATCGACACTTTAGTTGATTATCTCATTAGCCGTGGTTTTGGCAAGAACTAATTATTAAGAGGTGATATACGATCA,0,138,95,0,167,37,71,0,209,63 +chr1.1_003491884|RefMatch_0002,chr1.1_003491884,GAGGAAATCGACACTTTAGTTGATTATCTCCTTAGCCATGGTTTTGGCAAGAACTAATTATTAAGAGGTG,0,0,0,0,0,0,0,1,0,0 +chr1.1_003491884|Ref_0001,chr1.1_003491884,GAGGAAATCGACACTTTAGTTGATTATCTCCTTAGCCGTGGTTTTGGCAAGAACTAATTATTAAGAGGTGATATACGATCA,219,142,116,201,0,169,88,311,66,161 +chr1.1_003613850|Alt_0002,chr1.1_003613850,CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGCGTGAACAGTGTTACAATGAGTGACCGTTCTGTA,2,0,0,636,0,0,0,2,330,0 +chr1.1_003613850|Ref_0001,chr1.1_003613850,CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGTGTGAACAGTGTTACAATGAGTGACCGTTCTGTA,1646,1541,992,593,1244,1149,1359,1970,846,1120 \ No newline at end of file diff --git a/inst/example_MADC_to_merge.csv b/inst/example_MADC_to_merge.csv new file mode 100644 index 0000000..1bb2e56 --- /dev/null +++ b/inst/example_MADC_to_merge.csv @@ -0,0 +1,36 @@ +AlleleID,CloneID,AlleleSequence,Sample_11,Sample_12,Sample_13,Sample_14,Sample_15,Sample_16,Sample_17,Sample_18,Sample_19,Sample_20 +chr1.1_001590881|Ref_0001,chr1.1_001590881,CTGTGAGAAGCACTTCATCTGAATTAAGCAATCCTTTTCCCCTAAGTAAGAGTTTGTAATAGGTATTATCAAACATTCTTG,0,0,0,20,9,17,16,14,6,5 +chr1.1_001938036|AltMatch_0003,chr1.1_001938036,CAGTGTTATCAGCCACACATGTAAATTGATGCTTTTATCTGGACTTGTTAAGTATACTGACAGCTTATC,48,0,0,34,36,33,0,26,0,0 +chr1.1_001938036|Alt_0002,chr1.1_001938036,CAGTGTTATCAGCCACACATGTAAATTGATGCTTTTCTCTGGACTTGTTTAAGTATACTGACAGCTTATCATGTCTGTTGG,39,0,0,0,75,42,83,0,76,35 +chr1.1_001938036|Ref_0001,chr1.1_001938036,CAGTGTTATCAGCCACACATGTAAATTGATTCTTTTCTCTGGACTTGTTTAAGTATACTGACAGCTTATCATGTCTGTTGG,35,149,134,145,37,75,52,106,66,95 +chr1.1_002111756|Alt_0002,chr1.1_002111756,GGTAGATAAATTTTACAGATGCTTAAAAGGTTTGCTAAATGGAATTCTGAGTATTGATCCTAAGAAAATCCATGTATAGAT,5,24,24,8,19,11,11,4,6,4 +chr1.1_002111756|Ref_0001,chr1.1_002111756,GGTAGATAAATTTTACAGATGCTTAAAAGGTTTGTTAAATGGAATTCTGAGTATTGATCCTAAGAAAATCCATGTATAGAT,55,62,53,167,68,89,164,124,106,113 +chr1.1_002341138|Alt_0002,chr1.1_002341138,GACTGTTGGAGTAATTTGCATATCAAAATATCTATATGTGATCACAGGGTTCTTTAAACAGAACATGGAGATTTTGACTTA,6,0,0,0,14,0,0,0,6,0 +chr1.1_002341138|Ref_0001,chr1.1_002341138,GACTGTTGGAGTAATTTGCATATCAAAATATCTATATGTGATCACAGGGTACTTTAAACAGAACATGGAGATTTTGACTTA,3,35,33,16,22,34,40,16,29,34 +chr1.1_002432574|Alt_0002,chr1.1_002432574,ATACATCCTTCCTATCCTGGATTATCACTGACCAGTTTTCAGGGATGTTTCATCAACAAAATCCTGTCTTATATTACATTC,634,0,1,386,419,430,674,313,398,216 +chr1.1_002432574|Ref_0001,chr1.1_002432574,ATACATCCTTCCTATCCTGGATTATCACTGACCACTTTTCAGGGATGTTTCATCAACAAAATCCTGTCTTATATTACATTC,73,466,530,274,293,264,106,177,254,433 +chr1.1_002703089|Alt_0002,chr1.1_002703089,CAACCACTTTGCAACCTTGCATGAAACTTTTATTTTCATCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT,0,151,172,120,125,1,56,1,0,107 +chr1.1_002703089|Ref_0001,chr1.1_002703089,CAACCACTTTGCAACCTTGCATGAAACTTTTATCTTCATCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT,193,127,129,257,289,294,178,200,275,198 +chr1.1_002798325|Alt_0002,chr1.1_002798325,TAGAAGTGTATTATATATATCTAACCTAGTATGTTATTTGCATTTGGACAAATTTGTAAAGGCATGGAGAAATTGGAAGGA,9,2,0,9,8,0,0,0,6,2 +chr1.1_002798325|Ref_0001,chr1.1_002798325,TAGAAGTGTATTATATATATCTAACCTAGTATTTTATTTGCATTTGGACAAATTTGTAAAGGCATGGAGAAATTGGAAGGA,3,10,10,11,7,13,18,20,5,15 +chr1.1_003103125|Alt_0002,chr1.1_003103125,CAGTGATAGCTTATCTCAAGTGATGTAGTTAATTTTTGTTCTCAAACTTTAATAGTAATGGTATTTAAAGTTCTACTTTGA,35,0,0,29,60,40,40,39,15,22 +chr1.1_003103125|Ref_0001,chr1.1_003103125,CAGTGATAGCTTATCTCAAGTGATGTAGTTAATTTTTGTTCTCAAACTATAATAGTAATGGTATTTAAAGTTCTACTTTGA,82,188,163,73,85,91,109,118,109,90 +chr1.1_003243094|Alt_0002,chr1.1_003243094,AAGGTAAGAACACAACCATTAATGTTATGTTTTTCTGTTTTGTCTTAATGTTTTTATTGATTAGTTACATAATGTCCCATA,2,0,5,18,6,27,0,12,8,3 +chr1.1_003243094|Ref_0001,chr1.1_003243094,AAGGTAAGAACACAACCATTAATGTTATGTTATTCTGTTTTGTCTTAATGTTTTTATTGATTAGTTACATAATGTCCCATA,21,38,17,8,11,6,25,5,8,13 +chr1.1_003329439|Alt_0002,chr1.1_003329439,GAACTAAGACCAACGTTTAAATACTAAGTTTATACTAATTAGGGTTTATTTTCTGGTTTGTAACACTGCATGTAAAAGTTA,6,6,3,3,3,4,1,1,3,2 +chr1.1_003329439|Ref_0001,chr1.1_003329439,GAACTAAGACCAACGTTTAAATACTAAGTTTATCCTAATTAGGGTTTATTTTCTGGTTTGTAACACTGCATGTAAAAGTTA,1,5,3,16,15,13,21,22,7,5 +chr1.1_003491884|Alt_0002,chr1.1_003491884,GAGGAAATCGACACTTTAGTTGATTATCTCATTAGCCGTGGTTTTGGCAAGAACTAATTATTAAGAGGTGATATACGATCA,108,69,56,113,81,59,151,39,88,85 +chr1.1_003491884|RefMatch_0002,chr1.1_003491884,GAGGAAATCGACACTTTAGTTGATTATCTCCTTAGCCATGGTTTTGGCAAGAACTAATTATTAAGAGGTG,0,0,0,0,0,0,0,1,1,1 +chr1.1_003491884|Ref_0001,chr1.1_003491884,GAGGAAATCGACACTTTAGTTGATTATCTCCTTAGCCGTGGTTTTGGCAAGAACTAATTATTAAGAGGTGATATACGATCA,50,178,217,135,151,143,57,165,128,131 +chr1.1_003613850|Alt_0002,chr1.1_003613850,CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGCGTGAACAGTGTTACAATGAGTGACCGTTCTGTA,496,1,946,0,0,1,0,0,319,749 +chr1.1_003613850|Ref_0001,chr1.1_003613850,CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGTGTGAACAGTGTTACAATGAGTGACCGTTCTGTA,555,1491,872,1422,1346,1464,1089,812,951,695 +chr1.1_003712477|Alt_0002,chr1.1_003712477,TGATTTTAGAGCTTACCACAAATTATAGCATGTGAATAAATTTCACTCATTTCGAATGCACAAACTTTCCTGTAATATCTA,47,43,28,63,49,66,0,49,37,25 +chr1.1_003712477|Ref_0001,chr1.1_003712477,TGATTTTAGAGCTTACCACAAATTATAGCATGTGAATAAATTTCACTCATTTTGAATGCACAAACTTTCCTGTAATATCTA,16,27,33,42,39,23,59,9,23,13 +chr1.1_003898103|Alt_0002,chr1.1_003898103,GTCCGAAAGAAGAAAAAGTGTCATGTAAAGCTTTGTGATCAATCGTCTTATCCAAATTCTGCACAACCAACAACAGCATAA,35,61,51,67,63,24,126,36,28,46 +chr1.1_003898103|Ref_0001,chr1.1_003898103,GTCCGAAAGAAGAAAAAGTGTCATGTAAAGCTTTGTGATCAATTGTCTTATCCAAATTCTGCACAACCAACAACAGCATAA,72,70,38,87,35,101,48,133,56,26 +chr1.1_004102347|AltMatch_0001,chr1.1_004102347,GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTTTGTTAGAATGTATTCCATTTG,37,22,71,0,80,30,53,0,36,32 +chr1.1_004102347|Alt_0002,chr1.1_004102347,GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTTTGTTTGAATGTATTCCATTTGTCTCTTGTCAG,0,0,0,0,0,0,0,0,0,0 +chr1.1_004102347|RefMatch_0002,chr1.1_004102347,GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTCTGCTTGAATGTATTCCATTTG,0,0,0,0,67,0,0,0,0,0 +chr1.1_004102347|RefMatch_0001,chr1.1_004102347,GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTCTGTTAGAATGTATTCCATTTG,20,0,37,0,1,55,0,0,35,57 +chr1.1_004102347|Ref_0001,chr1.1_004102347,GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTCTGTTTGAATGTATTCCATTTGTCTCTTGTCAG,19,109,49,195,56,96,81,139,91,79 +chr1.1_004315961|Alt_0002,chr1.1_004315961,GATGGAGTATGGAGAAGAAGTTAATTAAGGGCACTTTGGCTTTCCCTTAGAGTTCTTCATGTCCCTATAGCAAGGGCACTC,0,0,0,0,0,0,0,0,0,0 \ No newline at end of file diff --git a/inst/example_SNPs_DArTag-probe-design_f180bp.botloci b/inst/example_SNPs_DArTag-probe-design_f180bp.botloci new file mode 100644 index 0000000..bd923fb --- /dev/null +++ b/inst/example_SNPs_DArTag-probe-design_f180bp.botloci @@ -0,0 +1,101 @@ +chr1.1_000194324 +chr1.1_000309952 +chr1.1_000452961 +chr1.1_001169609 +chr1.1_001590881 +chr1.1_001938036 +chr1.1_002111756 +chr1.1_002341138 +chr1.1_002432574 +chr1.1_002703089 +chr1.1_003103125 +chr1.1_003243094 +chr1.1_003329439 +chr1.1_003491884 +chr1.1_003613850 +chr1.1_004436755 +chr1.1_004538231 +chr1.1_004964967 +chr1.1_005027893 +chr1.1_005509457 +chr1.1_005850336 +chr1.1_006342913 +chr1.1_006491042 +chr1.1_006927231 +chr1.1_007233177 +chr1.1_007349949 +chr1.1_007830610 +chr1.1_008362856 +chr1.1_009175857 +chr1.1_009654615 +chr1.1_009788520 +chr1.1_010059811 +chr1.1_010182954 +chr1.1_010513693 +chr1.1_010654139 +chr1.1_010740435 +chr1.1_011053888 +chr1.1_011550587 +chr1.1_011602715 +chr1.1_012607158 +chr1.1_012952616 +chr1.1_013166244 +chr1.1_013536075 +chr1.1_014100127 +chr1.1_014207906 +chr1.1_014290088 +chr1.1_014788595 +chr1.1_015044468 +chr1.1_015424890 +chr1.1_016006885 +chr1.1_016221768 +chr1.1_016563045 +chr1.1_016976894 +chr1.1_017863839 +chr1.1_018083264 +chr1.1_018128065 +chr1.1_018362099 +chr1.1_018698343 +chr1.1_018789059 +chr1.1_019009023 +chr1.1_019232221 +chr1.1_019387175 +chr1.1_019780298 +chr1.1_020310833 +chr1.1_020548930 +chr1.1_020860458 +chr1.1_021161903 +chr1.1_023241568 +chr1.1_024489859 +chr1.1_024760634 +chr1.1_025688638 +chr1.1_025728959 +chr1.1_026050142 +chr1.1_026502181 +chr1.1_026940687 +chr1.1_027461652 +chr1.1_027966972 +chr1.1_028084165 +chr1.1_028687110 +chr1.1_028857948 +chr1.1_028955804 +chr1.1_029326140 +chr1.1_029584457 +chr1.1_030458048 +chr1.1_030744456 +chr1.1_030969636 +chr1.1_031360007 +chr1.1_031442927 +chr1.1_031907509 +chr1.1_032107042 +chr1.1_032180745 +chr1.1_034284365 +chr1.1_034579746 +chr1.1_034663420 +chr1.1_035085053 +chr1.1_035253375 +chr1.1_035606739 +chr1.1_035758205 +chr1.1_036130046 +chr1.1_036517797 + diff --git a/inst/example_allele_db.fa b/inst/example_allele_db.fa new file mode 100644 index 0000000..6be6c1c --- /dev/null +++ b/inst/example_allele_db.fa @@ -0,0 +1,1184 @@ +>chr1.1_000194324|AltMatch_0001 +CGAAATAATAACCCAAGTTCTGCCAGTTTATGTTAAAACTTTTTTTACAAGGTACAAGTTCGGTGACAACTTAACAAGTAA +>chr1.1_000194324|Alt_0002 +CGAAATAATAACCCAAGTTCTGCCAGTTTATGTTAAAACTTTTCTTACAAGGTACAAGTTCGGTGACAACTTAACAAGTAA +>chr1.1_000194324|Ref_0001 +CGAAATAATAACCCAAGTTCTGCCAGTTTATGTTAAAACTTTTCTTACATGGTACAAGTTCGGTGACAACTTAACAAGTAA +>chr1.1_000309952|AltMatch_0001 +TGGTATTGTATTAGATGAGATATTCTAACTTGTAAGGTAGAAATCTTGACCACTTATAAACACATATTCATATCATATGTA +>chr1.1_000309952|Alt_0002 +TGGTATTGTATTAGATGAGATATTCTAACTTGTAAGATTGAGATCTTGACCACTTATAAACACATATTCATATCATATGTA +>chr1.1_000309952|Ref_0001 +TGGTATTGTATTAGATGAGATATTCTAACTTGTAAGATTGAGATCTCGACCACTTATAAACACATATTCATATCATATGTA +>chr1.1_000452961|Alt_0002 +TTACGAGATCGCGAAGTTCGTTCCTTTCTTTATCTTTCTTCTCTTTTACCCGACCGGCTCCCTGCAGACCAGAAAGCCCAA +>chr1.1_000452961|Ref_0001 +TTACGAGATCGCGAAGCTCGTTCCTTTCTTTATCTTTCTTCTCTTTTACCCGACCGGCTCCCTGCAGACCAGAAAGCCCAA +>chr1.1_000532584|Alt_0002 +CAACGGAACATATAAAGATATCCACTTCTCTTGGAGCTTGATGATACTTGTAATGTTGGGGATTTGTGTTTTGCAGGATTT +>chr1.1_000532584|RefMatch_0001 +CAACGGAACATATAAAGATATCCACTTCTCTTGGAGCTTGATAATACTTATAATGTTGGGGATTTGTGTTTTGCAGGATTT +>chr1.1_000532584|RefMatch_0002 +CAACGGAACATATAAAGATATCCACTTCTCTTGGGGCTTGATAATACTTATAATGTTGGGGATTTGTGTTTTGCAGGATTT +>chr1.1_000532584|RefMatch_0003 +CAACGGAACATATAAAGATATCCACTTTTCTCGGAGCTTGATAATACTTATAATGTTGGGGATTTGTGTTTTGCAGGATTT +>chr1.1_000532584|Ref_0001 +CAACGGAACATATAAAGATATCCACTTCTCTTGGAGCTTGATGATACTTATAATGTTGGGGATTTGTGTTTTGCAGGATTT +>chr1.1_000735393|Alt_0002 +GACTCTTGGAAGGAAAATGGTTTTTCTAGGTAATTAAACTTCAATCAAAGTTACATATTTGACTCACTTCACTATTCTAAA +>chr1.1_000735393|Ref_0001 +GACTCTTGGAAGGAAAATGGTTTTTCTAGGTAGTTAAACTTCAATCAAAGTTACATATTTGACTCACTTCACTATTCTAAA +>chr1.1_000837330|Alt_0002 +CCTCTATCTAATAGAGAATATTGATTGGCTGAATGTTGACCATATTCCATGTACCCACTAGGGTTACCCCGTGGAGTCCAA +>chr1.1_000837330|RefMatch_0001 +CCTCTATCTAATAGAGAATATTGATTGGCTGAATGTTGACCATATTCCCTATACCCACTAGGGTTACCCCGTGGAGTCCAA +>chr1.1_000837330|Ref_0001 +CCTCTATCTAATAGAGAATATTGATTGGCTGAATGTTGACCATATTCCCTGTACCCACTAGGGTTACCCCGTGGAGTCCAA +>chr1.1_000915014|Alt_0002 +CCAGGCTTCTATATATACAATGATCAGATATGTTAAACCTAAGCTGCTCAGTGCTCCTTAACCCTAAGACACACAGAACCT +>chr1.1_000915014|Ref_0001 +CCAGGCTTCTATATATACAATGATCAGATATGTTAAACCTAAGCTGCTCGGTGCTCCTTAACCCTAAGACACACAGAACCT +>chr1.1_001169609|AltMatch_0001 +CAATGATCTCTGCGCAACTGCACCTTTAAAATCTTCCTGCCTACATAGTACTTTTGGTTTTTGGAACCCAAGTACCGTATT +>chr1.1_001169609|Alt_0002 +CAATGATCTCTGCGCAACTGCACCTTTAAAATCTTCCTGCCTGACATAGTACTTTTGGTTTTTGGAACCCAAGTACCGTAT +>chr1.1_001169609|RefMatch_0001 +CAATGATCTCTGCGCAACTGCACCTTTTAAATCTTCCTACCTGACATAGTACTTTTGGTTTTTGGAACCCAAGTACCGTAT +>chr1.1_001169609|RefMatch_0002 +CAATGATCTCTGCGCAACTGCACCTTTTAAATCTTCCTGCCTGACAAAGTACTTTTGGTTTTTGGAACCCAAGTACCGTAT +>chr1.1_001169609|RefMatch_0003 +CAATGATCTCTGCGCAACTGCACCTTTTAAATCTTCCTGCCTGACATAATACTTTTGGTTTTTGGAACCCAAGTACCGTAT +>chr1.1_001169609|Ref_0001 +CAATGATCTCTGCGCAACTGCACCTTTTAAATCTTCCTGCCTGACATAGTACTTTTGGTTTTTGGAACCCAAGTACCGTAT +>chr1.1_001494903|Alt_0002 +TTGGTGCAGTGTTGATAGTAGCTGGACTATACTTTGTGTTGTGGGGTAAAAGTGAAGAGAAGAAATTATTTGCAAAGGAAC +>chr1.1_001494903|Ref_0001 +TTGGTGCAGTGTTGATAGTAGCTGGACTATACTTTGTGCTGTGGGGTAAAAGTGAAGAGAAGAAATTATTTGCAAAGGAAC +>chr1.1_001590881|Alt_0002 +CTGTGAGAAGCACTTCATCTGAATTAAGCAATCCTTTTCCCGTAAGTAAGAGTTTGTAATAGGTATTATCAAACATTCTTG +>chr1.1_001590881|Ref_0001 +CTGTGAGAAGCACTTCATCTGAATTAAGCAATCCTTTTCCCCTAAGTAAGAGTTTGTAATAGGTATTATCAAACATTCTTG +>chr1.1_001938036|AltMatch_0001 +CAGTGTTATCAGCCACACATGTAAATTGATGCTTTTATCTGGACTTGTTAAGTAATACTGACAGCTTATCATGTCTGTTGG +>chr1.1_001938036|AltMatch_0002 +CAGTGTTATCAGCCACACATGTAAATTGATGCTTTTCTCTGGACTTGTTAAGTAATACTGACAGCTTATCATGTCTGTTGG +>chr1.1_001938036|Alt_0002 +CAGTGTTATCAGCCACACATGTAAATTGATGCTTTTCTCTGGACTTGTTTAAGTATACTGACAGCTTATCATGTCTGTTGG +>chr1.1_001938036|Ref_0001 +CAGTGTTATCAGCCACACATGTAAATTGATTCTTTTCTCTGGACTTGTTTAAGTATACTGACAGCTTATCATGTCTGTTGG +>chr1.1_002111756|Alt_0002 +GGTAGATAAATTTTACAGATGCTTAAAAGGTTTGCTAAATGGAATTCTGAGTATTGATCCTAAGAAAATCCATGTATAGAT +>chr1.1_002111756|Ref_0001 +GGTAGATAAATTTTACAGATGCTTAAAAGGTTTGTTAAATGGAATTCTGAGTATTGATCCTAAGAAAATCCATGTATAGAT +>chr1.1_002341138|Alt_0002 +GACTGTTGGAGTAATTTGCATATCAAAATATCTATATGTGATCACAGGGTTCTTTAAACAGAACATGGAGATTTTGACTTA +>chr1.1_002341138|Ref_0001 +GACTGTTGGAGTAATTTGCATATCAAAATATCTATATGTGATCACAGGGTACTTTAAACAGAACATGGAGATTTTGACTTA +>chr1.1_002432574|AltMatch_0001 +ATACATCCTTCCTATCCTGGATTATCACTGACCAGTTTTCGGGGATGTTTCATCAACAAAATCCTGTCTTATATTACATTC +>chr1.1_002432574|Alt_0002 +ATACATCCTTCCTATCCTGGATTATCACTGACCAGTTTTCAGGGATGTTTCATCAACAAAATCCTGTCTTATATTACATTC +>chr1.1_002432574|Ref_0001 +ATACATCCTTCCTATCCTGGATTATCACTGACCACTTTTCAGGGATGTTTCATCAACAAAATCCTGTCTTATATTACATTC +>chr1.1_002703089|Alt_0002 +CAACCACTTTGCAACCTTGCATGAAACTTTTATTTTCATCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT +>chr1.1_002703089|RefMatch_0001 +CAACCACTTTGCAACCTTGCATGAAACTTTTATCATCATCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT +>chr1.1_002703089|RefMatch_0002 +CAACCACTTTGCAACCTTGCATGAAACTTTTATCTTCACCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT +>chr1.1_002703089|Ref_0001 +CAACCACTTTGCAACCTTGCATGAAACTTTTATCTTCATCTGATATCCTAAACCCACCATTCGCACTGTAGAACCCACAAT +>chr1.1_002798325|Alt_0002 +TAGAAGTGTATTATATATATCTAACCTAGTATGTTATTTGCATTTGGACAAATTTGTAAAGGCATGGAGAAATTGGAAGGA +>chr1.1_002798325|Ref_0001 +TAGAAGTGTATTATATATATCTAACCTAGTATTTTATTTGCATTTGGACAAATTTGTAAAGGCATGGAGAAATTGGAAGGA +>chr1.1_003103125|Alt_0002 +CAGTGATAGCTTATCTCAAGTGATGTAGTTAATTTTTGTTCTCAAACTTTAATAGTAATGGTATTTAAAGTTCTACTTTGA +>chr1.1_003103125|Ref_0001 +CAGTGATAGCTTATCTCAAGTGATGTAGTTAATTTTTGTTCTCAAACTATAATAGTAATGGTATTTAAAGTTCTACTTTGA +>chr1.1_003243094|Alt_0002 +AAGGTAAGAACACAACCATTAATGTTATGTTTTTCTGTTTTGTCTTAATGTTTTTATTGATTAGTTACATAATGTCCCATA +>chr1.1_003243094|Ref_0001 +AAGGTAAGAACACAACCATTAATGTTATGTTATTCTGTTTTGTCTTAATGTTTTTATTGATTAGTTACATAATGTCCCATA +>chr1.1_003329439|Alt_0002 +GAACTAAGACCAACGTTTAAATACTAAGTTTATACTAATTAGGGTTTATTTTCTGGTTTGTAACACTGCATGTAAAAGTTA +>chr1.1_003329439|Ref_0001 +GAACTAAGACCAACGTTTAAATACTAAGTTTATCCTAATTAGGGTTTATTTTCTGGTTTGTAACACTGCATGTAAAAGTTA +>chr1.1_003491884|Alt_0002 +GAGGAAATCGACACTTTAGTTGATTATCTCATTAGCCGTGGTTTTGGCAAGAACTAATTATTAAGAGGTGATATACGATCA +>chr1.1_003491884|RefMatch_0001 +GAGGAAATCGACACTTTAGTTGATTATCTCCTTACCCGTGGTTTTGGCAAGAACTAATTATTAAGAGGTGATATACGATCA +>chr1.1_003491884|Ref_0001 +GAGGAAATCGACACTTTAGTTGATTATCTCCTTAGCCGTGGTTTTGGCAAGAACTAATTATTAAGAGGTGATATACGATCA +>chr1.1_003613850|AltMatch_0001 +CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGCATGAACAGTGTTACAATGAGTGACCGTTCTGTA +>chr1.1_003613850|Alt_0002 +CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGCGTGAACAGTGTTACAATGAGTGACCGTTCTGTA +>chr1.1_003613850|RefMatch_0001 +CATGATCAACCACCAAGGAGGCAAACCCTCGATCAGATTGGATCTGGTGTGAACAGTGTTACAATGAGTGACCGTTCTGTA +>chr1.1_003613850|RefMatch_0002 +CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCCGGTGTGAACAGTGTTACAATGAGTGACCGTTCTGTA +>chr1.1_003613850|RefMatch_0003 +CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGTATGAACAGTGTTACAATGAGTGACCGTTCTGTA +>chr1.1_003613850|RefMatch_0004 +CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGTGTGAATAGTGTTACAATGAGTGACCGTTCTGTA +>chr1.1_003613850|Ref_0001 +CATGATCAACCACCAAGGAGGCAAACCCTTGATCAGATTGGATCTGGTGTGAACAGTGTTACAATGAGTGACCGTTCTGTA +>chr1.1_003712477|Alt_0002 +TGATTTTAGAGCTTACCACAAATTATAGCATGTGAATAAATTTCACTCATTTCGAATGCACAAACTTTCCTGTAATATCTA +>chr1.1_003712477|Ref_0001 +TGATTTTAGAGCTTACCACAAATTATAGCATGTGAATAAATTTCACTCATTTTGAATGCACAAACTTTCCTGTAATATCTA +>chr1.1_003898103|Alt_0002 +GTCCGAAAGAAGAAAAAGTGTCATGTAAAGCTTTGTGATCAATCGTCTTATCCAAATTCTGCACAACCAACAACAGCATAA +>chr1.1_003898103|Ref_0001 +GTCCGAAAGAAGAAAAAGTGTCATGTAAAGCTTTGTGATCAATTGTCTTATCCAAATTCTGCACAACCAACAACAGCATAA +>chr1.1_004102347|AltMatch_0001 +GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTTTGTTAGAATGTATTCCATTTGTCTCTTGTCAG +>chr1.1_004102347|Alt_0002 +GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTTTGTTTGAATGTATTCCATTTGTCTCTTGTCAG +>chr1.1_004102347|RefMatch_0001 +GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTCTGTTAGAATGTATTCCATTTGTCTCTTGTCAG +>chr1.1_004102347|Ref_0001 +GACTTTGCTTCACAAGATTGGTACATACTGTTATATTTCATGAACTTTCTGTTTGAATGTATTCCATTTGTCTCTTGTCAG +>chr1.1_004315961|Alt_0002 +GATGGAGTATGGAGAAGAAGTTAATTAAGGGCACTTTGGCTTTCCCTTAGAGTTCTTCATGTCCCTATAGCAAGGGCACTC +>chr1.1_004315961|Ref_0001 +GATGGAGTATGGAGAAGAAGTTAATTAAGGGCATTTTGGCTTTCCCTTAGAGTTCTTCATGTCCCTATAGCAAGGGCACTC +>chr1.1_004436755|Alt_0002 +GCACCAATGTTGGTGGTTTAATTACTGCATTTCTCCTGCTTTATTGTATCTTGATAGATATATTATTGTAATATTTTGGAA +>chr1.1_004436755|RefMatch_0001 +GCACCAATGTTGGTGGTTTAATTACTGCATTTCTCCTGCTTGATTCTAACTTGATAGATATATTATTGTAATATTTTGGAA +>chr1.1_004436755|Ref_0001 +GCACCAATGTTGGTGGTTTAATTACTGCATTTCTCCTGCTTTATTGTAACTTGATAGATATATTATTGTAATATTTTGGAA +>chr1.1_004538231|Alt_0002 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACGAGTCCAAATCTCCTGCCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0001 +TCTGATCAGAAGGTACTATTGCCGTGCCTCTGATACGAGTCCAAATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0002 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGACACGAGTCCAAATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0003 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATAAGAGTCCAAATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0004 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACAAGTCCAAATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0005 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACGAATCCAAATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0006 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACGAGTCCAAATCTCTTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0007 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACGAGTCCAAATGTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0008 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACGAGTCCAGATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|RefMatch_0009 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACTAGTCCAAATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004538231|Ref_0001 +TCTGATCAGAAGGTACTATTGCCGTGCCTTTGATACGAGTCCAAATCTCCTACCTCCCAAGTTTAGCTTCAACACAAGTGC +>chr1.1_004614862|AltMatch_0001 +GATAGAGTAGACTAGTCATGATGTAATTAAACTATTCTATGTTTCACAAGTCTTGGAGCTCGAGTTCACAACTAATCTAGT +>chr1.1_004614862|Alt_0002 +GATAGAGTAGACTAGTCATGATGTAATTAAACTATTCTATGTTTCACGAGTCTTGGAGCTCGAGTTCACAACTAATCTAGT +>chr1.1_004614862|RefMatch_0001 +GATAGAGTAGACTAGTCATGATGTAATTAAACTATTCTATGTTTCTCAAGTCTTGGAGCTCGAGTTCACAACTAATCTAGT +>chr1.1_004614862|Ref_0001 +GATAGAGTAGACTAGTCATGATGTAATTAAACTATTCTATGTTTCTCGAGTCTTGGAGCTCGAGTTCACAACTAATCTAGT +>chr1.1_004757298|Alt_0002 +TCTTGCTCTTCTAGAAACTTTCAAACTCGCCGAGTCTCTCCAAAACAACCTTCTTAACCTCTCTTCAAAGCTATCAACAGA +>chr1.1_004757298|RefMatch_0001 +TCTTGCTCTTCTAGAAACTTTCAAACTCGCCGAATCTCTCCAAAACAATCTTCTTAACCTCTCTTCAAAGCTATCAACAGA +>chr1.1_004757298|RefMatch_0002 +TCTTGCTCTTCTAGAAACTTTCAAACTCGCCGAGTCTCTCCAAAACAATCTCCTTAACCTCTCTTCAAAGCTATCAACAGA +>chr1.1_004757298|RefMatch_0003 +TCTTGCTCTTCTAGAAACTTTCAAACTCGCCGAGTCTCTTCAAAACAATCTTCTTAACCTCTCTTCAAAGCTATCAACAGA +>chr1.1_004757298|Ref_0001 +TCTTGCTCTTCTAGAAACTTTCAAACTCGCCGAGTCTCTCCAAAACAATCTTCTTAACCTCTCTTCAAAGCTATCAACAGA +>chr1.1_004964967|AltMatch_0001 +GCAATTTCTGATTTTCAACAACAAGAAGGTCATTAATCTCTTCATTGTAAATCTCCATGTATGAAACTCGAATTAAAAACT +>chr1.1_004964967|Alt_0002 +GCAATTTCTGATTTTCAACAACAAGAAGGTCATTAATCTCTTCATTATAAATCTCCATGTATGAAACTCGAATTAAAAACT +>chr1.1_004964967|RefMatch_0001 +GCAATTTCTGATTTTCAACAACAAGAAGGTCATTAATATCTTCGTTATAAATCTCCATGTATGAAACTCGAATTAAAAACT +>chr1.1_004964967|Ref_0001 +GCAATTTCTGATTTTCAACAACAAGAAGGTCATTAATCTCTTCGTTATAAATCTCCATGTATGAAACTCGAATTAAAAACT +>chr1.1_005027893|Alt_0002 +GATCTTGCAATGTTCGTGATAATAATTTGGTTAGGATCTCTCATTCTCCACAAAATATGGATTGAGATTGATATCTATGTT +>chr1.1_005027893|Ref_0001 +GATCTTGCAATGTTCGTGATAATAATTTGGTTAGGATCTCTCATTCTCCACAGAATATGGATTGAGATTGATATCTATGTT +>chr1.1_005278309|Alt_0002 +GTAATGATGTCGAGGAGACCTGGAACTTCTTCTAGAAGATTCGGTGATACCAAATCCAAATCGTCTCCAGTTTTGTCAATT +>chr1.1_005278309|Ref_0001 +GTAATGATGTCGAGGAGACCTGGAACTTCTTCTAGAAGATTCGGCGATACCAAATCCAAATCGTCTCCAGTTTTGTCAATT +>chr1.1_005509457|Alt_0002 +GATATCTACCATTCTAGTACTCATCATGCATATCATTTTTTCGCATAAATCTTAACATTATTGTTCCAGGAGGATTGGAAT +>chr1.1_005509457|RefMatch_0001 +GATATCTACCATTCTAGTACTCATCATGCATATCATTTTTTCTCATATATCTTAACATTATTGTTCCAGGAGGATTGGAAT +>chr1.1_005509457|Ref_0001 +GATATCTACCATTCTAGTACTCATCATGCATATCATTTTTTCTCATAAATCTTAACATTATTGTTCCAGGAGGATTGGAAT +>chr1.1_005618685|Alt_0002 +GATGTTCCTGGCTGATAAACACCTTCAATATTTAATTTTATACACATGTTTATCCACTTTATTTTTCTCTCATATAAACAG +>chr1.1_005618685|Ref_0001 +GATGTTCCTGGCTGATAAACACCTTCATTATTTAATTTTATACACATGTTTATCCACTTTATTTTTCTCTCATATAAACAG +>chr1.1_005850336|Alt_0002 +GGAGTGTGCATATGGTTATCTATCTTTGTAGTTAGTAATGCCTATCATTTGACCAACAGCAGAATTCACATTATATTAAAA +>chr1.1_005850336|RefMatch_0001 +GGAGTGTGCATATGGTTATCTATCTTTGTAGTTGGTAATGCCTATCATTTGTCCAACAGCAGAATTCACATTATATTAAAA +>chr1.1_005850336|Ref_0001 +GGAGTGTGCATATGGTTATCTATCTTTGTAGTTAGTAATGCCTATCATTTGTCCAACAGCAGAATTCACATTATATTAAAA +>chr1.1_006072575|AltMatch_0001 +CCTATAGATCAAAGCTATACCAAACTCTGACAAACAATCCATTTTAACTTCTTTATAACATCAACCATCCAATACTCCATA +>chr1.1_006072575|AltMatch_0002 +CCTATAGATCAAAGCTATACCAAACTCTGACAAACATTCCATTTTTAACTTCTTTATAACATCAACCATCCAATACTCCAT +>chr1.1_006072575|Alt_0002 +CCTATAGATCAAAGCTATACCAAACTCTGACAAACATTCCATTTTAACTTCTTTATAACATCAACCATCCAATACTCCATA +>chr1.1_006072575|Ref_0001 +CCTATAGATCAAAGCTATACCAAACTCTGGCAAACATTCCATTTTAACTTCTTTATAACATCAACCATCCAATACTCCATA +>chr1.1_006263297|AltMatch_0001 +CTACTTTCACTGTGCTGCTTAGTCCTTCGATGTTTTTCGGTATCAGTTAATGTCTAGTATAAACTGCATGTTATACTTTCT +>chr1.1_006263297|AltMatch_0002 +CTACTTTCACTGTGCTGCTTAGTCCTTTGATGTTTTTCGGTATCAGTTAATGTCTAGTATAAACTGCATGTTATACTTTCT +>chr1.1_006263297|Alt_0002 +CTACTTTCACTGTGCTGCTTAGTCCTTCGTTGTTTTTCGGTATCAGTTAATGTATAGTATAAACTGCATGTTATACTTTCT +>chr1.1_006263297|Ref_0001 +CTACTTTCACTGTGCTGCTTAGTCCTTCGTTGTTTTTCGGTATCAGTCAATGTATAGTATAAACTGCATGTTATACTTTCT +>chr1.1_006342913|Alt_0002 +CAATTTGTTCAATTTGCTTGTTTTGTTGATCTTCTGTAGGAATCTTTCGTCAAACAACCTTCAGGGTCCCATTCCAATTGA +>chr1.1_006342913|RefMatch_0001 +CAATTTGTTCAATTTGCTTGTTTTGTTGGTCTTCTGTAGGAATCTTTCATCAAACAACCTTCAGGGTCCCATTCCAATTGA +>chr1.1_006342913|Ref_0001 +CAATTTGTTCAATTTGCTTGTTTTGTTGGTCTTCTGTAGGAATCTTTCGTCAAACAACCTTCAGGGTCCCATTCCAATTGA +>chr1.1_006491042|Alt_0002 +CATGTTGGATGATATATATACCAAGTCTAATAATTTTTGGACGGTCTTTGCAGGGAAGATCAAGGAGACCTCAGCATTTTT +>chr1.1_006491042|RefMatch_0001 +CATGTTGGATGATATATATACCAAGTCTAATAATTTTTGGATGATCTTTGCAGGGAAGATCAAGGAGACCTCAGCATTTTT +>chr1.1_006491042|Ref_0001 +CATGTTGGATGATATATATACCAAGTCTAATAATTTTTGGACGATCTTTGCAGGGAAGATCAAGGAGACCTCAGCATTTTT +>chr1.1_006660525|AltMatch_0001 +ACAACATTTGGAATACCTGACATACCCTCAAACTCATTCTTAATTTTTCTCAAAGAAGCATCATTAGGCCATTGAAGATAC +>chr1.1_006660525|Alt_0002 +ACAACATTTGGAATACCTGACATACCCTCAAATTCATTCTTAATTTTTCTCAAAGAAGCATCATTAGGCCATTGAAGATAC +>chr1.1_006660525|RefMatch_0001 +ACAACATTTGGAATACCTGACATACCCTCAAACTCATTTTTAATTTTTCTCAAAGAAGCATCATTAGGCCATTGAAGATAC +>chr1.1_006660525|Ref_0001 +ACAACATTTGGAATACCTGACATACCCTCAAATTCATTTTTAATTTTTCTCAAAGAAGCATCATTAGGCCATTGAAGATAC +>chr1.1_006759713|AltMatch_0001 +GTGAGATGACTCCAACGCAATGCAGCTTCCAGACTCTAGTCATTGTTTCCTGAATCCTTATACCCTTTGTGTCATGCTCAC +>chr1.1_006759713|Alt_0002 +GTGAGATGACTCCAACGCAATGCAGCTTCCAGACTCTAGTCACTGTTTCCTGAATCCTTATACCCTTTGTGTCATGCTCAC +>chr1.1_006759713|RefMatch_0001 +GTGAGATGACTCCAACGCAATGCGGCTTCCAGACTCTAATCACTGTTTCCTGAATCCTTATACCCTTTGTGTCATGCTCAC +>chr1.1_006759713|Ref_0001 +GTGAGATGACTCCAACGCAATGCAGCTTCCAGACTCTAATCACTGTTTCCTGAATCCTTATACCCTTTGTGTCATGCTCAC +>chr1.1_006927231|AltMatch_0001 +GGTGAATGGTGATGGAACAAATCTAAGCATGCCAAAAACTGGTTATGAATATGTTGGAAACACACCTGAATGTGCTTTATT +>chr1.1_006927231|Alt_0002 +GGTGAATGGTGATGGAACAAATCTAAGCATGCTAAAAACTGGTTATGAATATGTTGGAAACACACCTGAATGTGCTTTATT +>chr1.1_006927231|RefMatch_0001 +GGTGAATGGTGATGGAACAAATCTAAGCATACTAAATACTGGTTATGAATATGTTGGAAACACACCTGAATGTGCTTTATT +>chr1.1_006927231|RefMatch_0002 +GGTGAATGGTGATGGAACAAATCTAAGCATGCTAAATATTGGTTATGAATATGTTGGAAACACACCTGAATGTGCTTTATT +>chr1.1_006927231|Ref_0001 +GGTGAATGGTGATGGAACAAATCTAAGCATGCTAAATACTGGTTATGAATATGTTGGAAACACACCTGAATGTGCTTTATT +>chr1.1_007050147|Alt_0002 +GTCAGAGACATGAGGGAAATGGGAAGCAACTTTTCTCTCAGTAAGATGTTAACAATATCATCACCACCAATATGCAAATCA +>chr1.1_007050147|Ref_0001 +GTCAGAGACATGAGGGAAATGGGAAGCAACTTTTCTCTCAGCAAGATGTTAACAATATCATCACCACCAATATGCAAATCA +>chr1.1_007233177|AltMatch_0001 +GAATAGAAATTTAGTGCAGCGTATGCAATCATCTTTGGGAATCCCCTTTACTAGTGAAGATGATGATGCATTCACTAACTT +>chr1.1_007233177|Alt_0002 +GAATAGAAATTTAGTGCAGCGTATGCAATCATCTGTGGGAATCCCCTTTACTAGTGAAGATGATGATGCATTCACTAACTT +>chr1.1_007233177|RefMatch_0001 +GAATAGAAATTTAGTGCAGCGTATGCAATCATCTGCGGGAATCCCCGTTACTAGTGAAGATGATGATGCATTCACTAACTT +>chr1.1_007233177|RefMatch_0002 +GAATAGAAATTTAGTGCAGCGTATGCAATCATCTTTGGGAATCCCCGTTACTAGTGAAGATGATGATGCATTCACTAACTT +>chr1.1_007233177|Ref_0001 +GAATAGAAATTTAGTGCAGCGTATGCAATCATCTGTGGGAATCCCCGTTACTAGTGAAGATGATGATGCATTCACTAACTT +>chr1.1_007349949|Alt_0002 +CCCTATCTTAGCACAATCCAATACAATAGCATTTTCATCAATGCATGTTCCATAGATATTACTGAATTTGATGTTACTTAT +>chr1.1_007349949|RefMatch_0001 +CCCTATCTTAGCACAATCCAATACAATAGCATTTTCATCAATACATGTTCCACAGATATTACTGAATTTGATGTTACTTAT +>chr1.1_007349949|Ref_0001 +CCCTATCTTAGCACAATCCAATACAATAGCATTTTCATCAATACATGTTCCATAGATATTACTGAATTTGATGTTACTTAT +>chr1.1_007519233|AltMatch_0001 +GCAATATACTAACTAACATTCATGTAAAACACATCTCTATTGTTTTTACTTCCAACAAAGTCATCGAGTAGTTTTAATTTA +>chr1.1_007519233|Alt_0002 +GCAATATACTAACTAACATTCATGTAAAACACATCTCTATATGTTTTTACTTCCAACAAAGTCATCGAGTAGTTTTAATTT +>chr1.1_007519233|Ref_0001 +GCAATATACTAACTAACATTCATGTAAAACACATCTCTATATGTTTTTACTTACAACAAAGTCATCGAGTAGTTTTAATTT +>chr1.1_007830610|Alt_0002 +CTCATTATAACTTGCATCATATAAAGTGCTGATAATTAGACTGAATCTGACATTAGAAACTCTCACAACAAACAAAAAAAA +>chr1.1_007830610|Ref_0001 +CTCATTATAACTTGCATCATATAAAGTGCTGATAATTAGACTGAATCCGACATTAGAAACTCTCACAACAAACAAAAAAAA +>chr1.1_008362856|AltMatch_0001 +TGCCATTGCAATTTGAAATTCTGATCAAGTGGTATCTTTTATTGGACATCATAGCATGCAGTGTGCTAATATCCAAGCTTT +>chr1.1_008362856|Alt_0002 +TGCCATTGCAATTTGAAATTCTGATCAAGTGGTATCTTTTATTGGACACCATAGCATGCAGTGTGCTAATATCCAAGCTTT +>chr1.1_008362856|Ref_0001 +TGCCATTGCAATTTGAAATTCTGATCAAGTGGTATCTTTTATTGGACACCATATCATGCAGTGTGCTAATATCCAAGCTTT +>chr1.1_008573540|Alt_0002 +CCTCCTATTGCAAATGTAGAACTATGCAGTTTTCTTTATCGCATAATGTATTATTGAGTATGAGGATGTGTATTTTTGTGG +>chr1.1_008573540|Ref_0001 +CCTCCTATTGCAAATGTAGAACTATGCAGTTTTCTTTATCACATAATGTATTATTGAGTATGAGGATGTGTATTTTTGTGG +>chr1.1_008671918|Alt_0002 +CCCTGTGCATGAATTTTAAGCGACAAATTCCTTTTGCCTCATAGGATAGTAACATATGGTATTTTGCTTTTTTGAAGTACA +>chr1.1_008671918|RefMatch_0001 +CCCTGTGCATGAATTTTAAGCGACAAATTCCTTTTGCATCATAAGATAGTAACATATGGTATTTTGCTTTTTTGAAGTACA +>chr1.1_008671918|Ref_0001 +CCCTGTGCATGAATTTTAAGCGACAAATTCCTTTTGCATCATAGGATAGTAACATATGGTATTTTGCTTTTTTGAAGTACA +>chr1.1_009082943|Alt_0002 +GCTCATCTGCAGTTATAAAAATATGCCTTGTTGAAAGTAGCTGTAGATAATTGAAGTTTATGGATATTTTATTGTGATCAA +>chr1.1_009082943|Ref_0001 +GCTCATCTGCAGTTATAAAAATATGCCTTGTTGAAAGTAGCTGTAGATAATAGAAGTTTATGGATATTTTATTGTGATCAA +>chr1.1_009175857|AltMatch_0001 +TGCAAGGTTTCCATGTAATTGGCTAATGGCCTCCTTTCACACTGGAAACATTTAAAGTTTCACATTATGCAACAATGTTAC +>chr1.1_009175857|Alt_0002 +TGCAAGGTTTCCATGTAATTGGCTAATGGCCTCCTTTCACACTGGAGACATTTAAAGTTTCACATTATGCAACAATGTTAC +>chr1.1_009175857|RefMatch_0001 +TGCAAGGTTTCCATGTAATTGGCTAATGGCCTTCTTTCACACTGGAGAGATTTAAAGTTTCACATTATGCAACAATGTTAC +>chr1.1_009175857|Ref_0001 +TGCAAGGTTTCCATGTAATTGGCTAATGGCCTCCTTTCACACTGGAGAGATTTAAAGTTTCACATTATGCAACAATGTTAC +>chr1.1_009325684|Alt_0002 +CCTTATTTCTTAGAAGCTTTGGTTACGCTTTCTTCTTTATGATGAATTGGAAATGAGTAATACTTGGGCTAATGAAATTAG +>chr1.1_009325684|Ref_0001 +CCTTATTTCTTAGAAGCTTTGGTTACGCTTTCTTCTTTATGATGAATTGGATATGAGTAATACTTGGGCTAATGAAATTAG +>chr1.1_009654615|Alt_0002 +GATTCATTTTTGGAAGTTGATGTGAAGTTGAATCTCACACTTGAGGTTGCTCTTATACTCAAACTAAAAATATAGCATATA +>chr1.1_009654615|Ref_0001 +GATTCATTTTTGGAAGTTGATGTGAAGTTGAATCTCACACTTGAGGTTACTCTTATACTCAAACTAAAAATATAGCATATA +>chr1.1_009788520|Alt_0002 +ATTACGTTACTCAACAACTCATTCAGGTGATTGTTCAATGCAATGATTGTTACATGGCTAATTGGTTTTCAATGTATCACC +>chr1.1_009788520|RefMatch_0001 +ATTACGTTACTCAACAACTCATTCAGGTGATTGTTCGACGCAATGATTGTTACATGGCTAATTGGTTTTCAATGTATCACC +>chr1.1_009788520|RefMatch_0002 +ATTACGTTACTCAACAACTCATTCAGGTGATTGTTTGATGCAATGATTGTTACATGGCTAATTGGTTTTCAATGTATCACC +>chr1.1_009788520|Ref_0001 +ATTACGTTACTCAACAACTCATTCAGGTGATTGTTCGATGCAATGATTGTTACATGGCTAATTGGTTTTCAATGTATCACC +>chr1.1_010059811|AltMatch_0001 +AGTGCCATGTAACTTCCTAGCACAACACCAGTTGCAAATATCTCCCTCAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010059811|Alt_0002 +AGTGCCATGTAACTTCCTAGCACAACACCAGTTGCAAATATCTCCTTCAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010059811|RefMatch_0001 +AGTGCCATGTAACTTCCTAGCACAACACCAGTAGCAAATATCTCCCTCAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010059811|RefMatch_0002 +AGTGCCATGTAACTTCCTAGCACAACACCAGTAGCAAATATCTCTCTCAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010059811|RefMatch_0003 +AGTGCCATGTAACTTCCTAGCACAACACCAGTAGCAAATATTTCCTTTAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010059811|RefMatch_0004 +AGTGCCATGTAACTTCCTAGCACAACACCAGTAGCGAATATCTCCCTCAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010059811|RefMatch_0005 +AGTGCCATGTAACTTCCTAGCACAACACCAGTAGTGAATATCTCCCTCAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010059811|Ref_0001 +AGTGCCATGTAACTTCCTAGCACAACACCAGTAGCAAATATCTCCTTCAGTTTCCAACTATCAGGTAGTGGAGATGGTTTC +>chr1.1_010182954|AltMatch_0001 +AATCACTCTATTAGTGCTTTCAGACAATCCTTTTGTTTCGTTAACTAATTGATGAATTTGTATTTTGTTCATCATATAATT +>chr1.1_010182954|Alt_0002 +AATCACTCTATTAGTGCTTTCAGACAATCCTTTTGTTTCATTAACTAATTGATGAATTTGTATTTTGTTCATCATATAATT +>chr1.1_010182954|Ref_0001 +AATCACTCTATTAGTGCTTTCAGACAATCCTTTTGTTTCATTAAGTAATTGATGAATTTGTATTTTGTTCATCATATAATT +>chr1.1_010318074|Alt_0002 +GCTTCTGGCTGAATTAATCAATTTCAAGGTTGTTCTTTTGTGTTTATATATTGGATGAGGCTGTCTCCTTGCCCCTTGTTT +>chr1.1_010318074|Ref_0001 +GCTTCTGGCTGAATTAATCAATTTCAAGGTTGTTCTTTTGTGTTTTTATATTGGATGAGGCTGTCTCCTTGCCCCTTGTTT +>chr1.1_010513693|AltMatch_0001 +GACCTTGTCTCATTCAAAATGAAACTTTATTTTACTTGGAAAACACAGATCAGTCTATAAATATCAAGCGTTATACTTATC +>chr1.1_010513693|Alt_0002 +GACCTTGTCTCATTCAAAATGAAACTTTATTTTACTTGGAAAACACTGATCAGTCTATAAATATCAAGCGTTATACTTATC +>chr1.1_010513693|Ref_0001 +GACCTTGTCTCATTCAAAATGAAACTTTATTTTACTTGGAAAACACTTATCAGTCTATAAATATCAAGCGTTATACTTATC +>chr1.1_010654139|Alt_0002 +AAGACAAGCATATTTCATATTCTGAGCTTGCTTTTTGAATCTTTTTATAGACCTTAAAATTTAGTCCTACAATTATCTTAA +>chr1.1_010654139|Ref_0001 +AAGACAAGCATATTTCATATTCTGAGCTTGCTTTTTGAATCTTTTTATAGAGCTTAAAATTTAGTCCTACAATTATCTTAA +>chr1.1_010740435|AltMatch_0001 +GGACTCAAGAGTTCAAGAATGACAGAAATCATTTTTGTATTTTCTTAGGCAAAAATGTTGAAGGTAAGGTTTGAAATCAGT +>chr1.1_010740435|Alt_0002 +GGACTCAAGAGTTCAAGAATGACAGAAATCATTTTGGTATTTTCTTAGGCAAAAATGTTGAAGGTAAGGTTTGAAATCAGT +>chr1.1_010740435|RefMatch_0001 +GGACTCAAGAGTTCAAGAATGACAGAAATCATTTTTGTATATTCTTAGGCAAAAATGTTGAAGGTAAGGTTTGAAATCAGT +>chr1.1_010740435|Ref_0001 +GGACTCAAGAGTTCAAGAATGACAGAAATCATTTTGGTATATTCTTAGGCAAAAATGTTGAAGGTAAGGTTTGAAATCAGT +>chr1.1_011053888|AltMatch_0001 +GCCACAAAACAGATGCAACTAAAGTGTACTCTTTGCCTCTGGAAGGTGCTGATGGAAGTAGAGTTAAAGCTGCTGCTATTT +>chr1.1_011053888|Alt_0002 +GCCACAAAACAGATGCAACTAAAGTATACTATTTGCCTTTGGAAGGTGCTGATGGAAGTAGAGTTAAAGCTGCTGCTATTT +>chr1.1_011053888|RefMatch_0001 +GCCACAAAACAGATGCAACTAAAGTTTACTCTGTGCCTCTAGAAGGTGCTGATGGAAGTAGAGTTAAAGCTGCTGCTATTT +>chr1.1_011053888|RefMatch_0002 +GCCACAAAACAGATGCAACTAAAGTTTACTCTGTGCCTTTGGAAGGTGTTGATGGAAGTAGAGTTAAAGCTGCTGCTATTT +>chr1.1_011053888|Ref_0001 +GCCACAAAACAGATGCAACTAAAGTATACTATGTGCCTTTGGAAGGTGCTGATGGAAGTAGAGTTAAAGCTGCTGCTATTT +>chr1.1_011205612|Alt_0002 +ATGGGGTAATGAGTGTAAACTTTTAAGGTTATGATCCTTGTTTTTCTTCTTACAGCATGAAAGTAACCTTTTGATCTGATT +>chr1.1_011205612|Ref_0001 +ATGGGGTAATGAGTGTAAACTTTTAAGGTTATGATCCTTGTTTTTCTTTTTACAGCATGAAAGTAACCTTTTGATCTGATT +>chr1.1_011294632|AltMatch_0001 +ATGTCAAGTTGCATGTTTATTGCAAACTGTAAGTGAAAGTGTACTTACTTGGACAAGATTGCACCCACCGTTTAAGTATTT +>chr1.1_011294632|Alt_0002 +ATGTCAAGTTGCATGTTTATTGCAAACTGTAAGTTAAAGTGTACTTACTTGGACAAGATTGCACCCACCGTTTAAGTATTT +>chr1.1_011294632|Ref_0001 +ATGTCAAGTTGCATGTTTATTGCAAACTGTAAGTTATAGTGTACTTACTTGGACAAGATTGCACCCACCGTTTAAGTATTT +>chr1.1_011550587|Alt_0002 +GTGCTTACAAGGTAATGCTTTTGTACGAACTCTAGTATACCTGGTTAAATGTTTTGGATTGATAGTTGTACTATGACAAAA +>chr1.1_011550587|RefMatch_0001 +GTGCTTACAAGGTAATGCTTTTGTACGAACTCTAGTATGCCTGGATAAATGTTTTGGATTGATAGTTGTACTATGACAAAA +>chr1.1_011550587|Ref_0001 +GTGCTTACAAGGTAATGCTTTTGTACGAACTCTAGTATACCTGGATAAATGTTTTGGATTGATAGTTGTACTATGACAAAA +>chr1.1_011602715|Alt_0002 +CTGTAATATTTGCATAACCCAATATTTCTATTTGACATATTTGCTTGTATTTTTATCATCCAGATGTTTGATACATATAGA +>chr1.1_011602715|Ref_0001 +CTGTAATATTTGCATAACCCAATATTTCTATTTTACATATTTGCTTGTATTTTTATCATCCAGATGTTTGATACATATAGA +>chr1.1_011667046|AltMatch_0001 +CCAACAACGGTTTTGGAAGGTCCTTACGTTGGAAGAGCTGTGTCTTTTACGACGTCGTATAGGGATAGTAATAACAAGGGC +>chr1.1_011667046|Alt_0002 +CCAACAACGGTTTTGGAAGGTCCATACGTTGGACGAGCAGTGTCTTTTACGACGTCGTATAGGGATAGTAATAACAAGGGC +>chr1.1_011667046|Ref_0001 +CCAACAACGGTTTTGGAAGGTCCATACGTTGGACGTGCAGTGTCTTTTACGACGTCGTATAGGGATAGTAATAACAAGGGC +>chr1.1_011839955|Alt_0002 +ATTATACCTGTCCATCAGGCCGATAGCACACTGCGGTGACAATTTCCTTGACGTCAGTCCAATCAACAACATGACAATCAG +>chr1.1_011839955|RefMatch_0001 +ATTATACCTGTCCATCAGGCCGATAGCACACCGCGGTGACAATTTCCTTGACATCAGTCCAATCAACAACATGACAATCAG +>chr1.1_011839955|RefMatch_0002 +ATTATACCTGTCCATCAGGCCGATAGCACACTGCAGTGACAATTTCCTTGACATCAGTCCAATCAACAACATGACAATCAG +>chr1.1_011839955|RefMatch_0003 +ATTATACCTGTCCATCAGGCCGATAGCACACTGCGGTGACAATTTCCCTGACATCAGTCCAATCAACAACATGACAATCAG +>chr1.1_011839955|RefMatch_0004 +ATTATACCTGTCCATCAGGCCGATAGCACACTGCGGTGACAATTTCCTTTACATCAGTCCAATCAACAACATGACAATCAG +>chr1.1_011839955|Ref_0001 +ATTATACCTGTCCATCAGGCCGATAGCACACTGCGGTGACAATTTCCTTGACATCAGTCCAATCAACAACATGACAATCAG +>chr1.1_012022200|Alt_0002 +GTGGAATTGATGAAAGTTTTAATTTAAGATGAATTGACTTTAGTGTAACTTGATTATTGTTTGCTGTGTTATATAACAGAT +>chr1.1_012022200|Ref_0001 +GTGGAATTGATGAAAGTTTTAATTTAAGATGAATTGACTTTAGGGTAACTTGATTATTGTTTGCTGTGTTATATAACAGAT +>chr1.1_012607158|AltMatch_0001 +CCCAAAGAAGGCTCATCTGTTTTACTTGCCATTCAGTTCTAGGATGCTAGAGGAAGCCTTGTATGTGAAGAATTCGCATAG +>chr1.1_012607158|Alt_0002 +CCCAAAGAAGGCTCATCTGTTTTACTTGCCTTTCAGTTCTAGGATGCTAGAGGAAGCCTTGTATGTGAAGAATTCGCATAG +>chr1.1_012607158|Ref_0001 +CCCAAAGAAGGCTCATCTGTTTTACTTACCTTTCAGTTCTAGGATGCTAGAGGAAGCCTTGTATGTGAAGAATTCGCATAG +>chr1.1_012673656|Alt_0002 +CTAACATAAGTGGTTGATGCGCTATACGTGTGTGTTGAAAAATATCAAATAATGAGTTTGATTTGCCCTTATAAAGAATAT +>chr1.1_012673656|RefMatch_0001 +CTAACATAAGTGGTTGATGCGCTATACGTGTGTGTTGAAAAATATCAGATATTGAGTTTGATTTGCCCTTATAAAGAATAT +>chr1.1_012673656|Ref_0001 +CTAACATAAGTGGTTGATGCGCTATACGTGTGTGTTGAAAAATATCAGATAATGAGTTTGATTTGCCCTTATAAAGAATAT +>chr1.1_012905400|Alt_0002 +TGTTTTTCGACATGTTCTAAAATTGTTGGCCTTTTGTTCCTCAAAACATAGTCTTCATGATTCAGACATGTTAAGAAAATA +>chr1.1_012905400|RefMatch_0001 +TGTTTTTCGACATGTTCTAAAATTGTTGGCCTTTCGTTCCTCAAAACATAGTTTTCATGATTCAGACATGTTAAGAAAATA +>chr1.1_012905400|RefMatch_0002 +TGTTTTTCGACATGTTCTAAAATTGTTGGCCTTTTGTTCCTCAACACATAGTTTTCATGATTCAGACATGTTAAGAAAATA +>chr1.1_012905400|Ref_0001 +TGTTTTTCGACATGTTCTAAAATTGTTGGCCTTTTGTTCCTCAAAACATAGTTTTCATGATTCAGACATGTTAAGAAAATA +>chr1.1_012952616|Alt_0002 +CTGAACATGATAGGAACGACCTTTCATTATACTCATCTGATCAAGTTGGTTTTAGAGACAGGGTTATAGGTTCTAGTGTTA +>chr1.1_012952616|Ref_0001 +CTGAACATGATAGGAACGACCTTTCATTATATTCATCTGATCAAGTTGGTTTTAGAGACAGGGTTATAGGTTCTAGTGTTA +>chr1.1_013166244|Alt_0002 +ATCGTGTCATTCCACTAAAGCGCATTAAAATCGTCTTAAGCTGGGTAGTATTTCCTTCGTGGTCATTCTAATTGCTTAAAG +>chr1.1_013166244|Ref_0001 +ATCGTGTCATTCCACTAAAGCGCAATAAAATCGTCTTAAGCTGGGTAGTATTTCCTTCGTGGTCATTCTAATTGCTTAAAG +>chr1.1_013536075|Alt_0002 +GAAATTGACAAAGGGGAACATAATTTGTTGTTCCCCTTATACTGTTATACATATGTTGGGAATCAGACTTCGTTTTGTGTC +>chr1.1_013536075|Ref_0001 +GAAATTGACAAAGGGGAACATAATTTGTTGTTCCCCTTATACTGTAATACATATGTTGGGAATCAGACTTCGTTTTGTGTC +>chr1.1_014007601|Alt_0002 +TTGAAGACTAGTGTCAGGAGTGTACATACATTGTTGCTTGCATCTAGAGTCAATTATTTTGATTTTGATTATTATTATTAT +>chr1.1_014007601|RefMatch_0001 +TTGAAGACTAGTGTCAGGAGTGTACATACATTGTTGCTTGTATCTAGTGTCAATTATTTTGATTTTGATTATTATTATTAT +>chr1.1_014007601|RefMatch_0002 +TTGAAGACTAGTGTCAGGAGTGTACATACGTTGTTGCTTGCATCTAGTGTCAATTATTTTGATTTTGATTATTATTATTAT +>chr1.1_014007601|Ref_0001 +TTGAAGACTAGTGTCAGGAGTGTACATACATTGTTGCTTGCATCTAGTGTCAATTATTTTGATTTTGATTATTATTATTAT +>chr1.1_014100127|Alt_0002 +ATTTTTTAACTTCTAACAATTGAAATTATGTTTGGATTATTTGATTTGCTCTTCTAAGGACATCAACAATCAAATTCATGG +>chr1.1_014100127|Ref_0001 +ATTTTTTAACTTCTAACAATTGAAATTATGTTTGGATTAGTTGATTTGCTCTTCTAAGGACATCAACAATCAAATTCATGG +>chr1.1_014207906|Alt_0002 +CCAAATGAATAATAGTTATGTGCATGAAAATTTTCTCATTATTTTATATTTCTTCCAAAACCAAGTGAAATGATTGGGTTG +>chr1.1_014207906|Ref_0001 +CCAAATGAATAATAGTTATGTGCATGAAAATTTTCTCATTATTTTATATATCTTCCAAAACCAAGTGAAATGATTGGGTTG +>chr1.1_014290088|Alt_0002 +CTCACAACAGCACCAACAGATGCATAATCTAGGCAATAATACTTTCAGGTAAAAAAGAGATCAATCTAACTTGATATTTTT +>chr1.1_014290088|Ref_0001 +CTCACAACAGCACCAACAGATGCATAATCTAGGCAATAATATTTTCAGGTAAAAAAGAGATCAATCTAACTTGATATTTTT +>chr1.1_014683124|Alt_0002 +CATTCATGGTATTAAATGATCATATATGTGTCTACATATTTGTAGTTATGCACCTAAGCTAAACATTGCCATCCCAAGACC +>chr1.1_014683124|Ref_0001 +CATTCATGGTATTAAATGATCATATATGTGTCTATATATTTGTAGTTATGCACCTAAGCTAAACATTGCCATCCCAAGACC +>chr1.1_014788595|Alt_0002 +AATGCTGAAAAGATAATTGTCACATTCACCTAATCAATTTAATGATCAGTCGCATGATTTCCTGCTTTTCAAAGGAAGGTA +>chr1.1_014788595|RefMatch_0001 +AATGCTGAAAAGATAATTGTCACATTCACCTACGCAATTTAATGATCAGTCGCATGATTTCCTGCTTTTCAAAGGAAGGTA +>chr1.1_014788595|RefMatch_0002 +AATGCTGAAAAGATAATTGTCACATTCACCTACTCAACTTAATGATCAGTCGCATGATTTCCTGCTTTTCAAAGGAAGGTA +>chr1.1_014788595|RefMatch_0003 +AATGCTGAAAAGATAATTGTCACATTCACCTACTCAATTTAATGATCAGTCACATGATTTCCTGCTTTTCAAAGGAAGGTA +>chr1.1_014788595|RefMatch_0004 +AATGCTGAAAAGATAATTGTCACATTCACCTACTCAATTTAATGATTAGTCGCATGATTTCCTGCTTTTCAAAGGAAGGTA +>chr1.1_014788595|Ref_0001 +AATGCTGAAAAGATAATTGTCACATTCACCTACTCAATTTAATGATCAGTCGCATGATTTCCTGCTTTTCAAAGGAAGGTA +>chr1.1_014974968|Alt_0002 +CGGCAGTTGTTCGCAGGTAAGATATGCATGCCTTTTTCATGTCCAAATATATTTGTTTTCTTTTTACTGCTATACATTTTT +>chr1.1_014974968|RefMatch_0001 +CGGCAGTTGTTCGCAGGTAAGATATGCACGCCATTTTCATGTCCAAATATATTTGTTTTCTTTTTACTGCTATACATTTTT +>chr1.1_014974968|Ref_0001 +CGGCAGTTGTTCGCAGGTAAGATATGCACGCCTTTTTCATGTCCAAATATATTTGTTTTCTTTTTACTGCTATACATTTTT +>chr1.1_015044468|Alt_0002 +GATTAGCTTTTCGTACCATCAAACTCTGACATGACATGGCTCACATGGATTAGTCCATACCCATCTGCACAAGGCAGTTTA +>chr1.1_015044468|RefMatch_0001 +GATTAGCTTTTCGTACCATCAAACTCTGACATGATATGGCTCCCATGGATTAGTCCATACCCATCTGCACAAGGCAGTTTA +>chr1.1_015044468|Ref_0001 +GATTAGCTTTTCGTACCATCAAACTCTGACATGACATGGCTCCCATGGATTAGTCCATACCCATCTGCACAAGGCAGTTTA +>chr1.1_015282744|AltMatch_0001 +AGAATTAAATTTGTGTATGATGTTTGTCACTTCTTTAATCATTCTTGTTTGTTGTAAATTTTTAGCTGATTTTGTATTGAG +>chr1.1_015282744|Alt_0002 +AGAATTAAATTTGTGTATGATGTTTGTCACTTCTTTAATCATTCTTGTTTGTCGTAAATTTTTAGCTGATTTTGTATTGAG +>chr1.1_015282744|RefMatch_0001 +AGAATTAAATTTGTGTATGATGTTTGTCACTTCTTCAAGCATTCTTGTTTGTTGTAAATTTTTAGCTGATTTTGTATTGAG +>chr1.1_015282744|RefMatch_0002 +AGAATTAAATTTGTGTATGATGTTTGTCACTTCTTTAAGCATTATTGTTTGTTGTAAATTTTTAGCTGATTTTGTATTGAG +>chr1.1_015282744|RefMatch_0003 +AGAATTAAATTTGTGTATGATGTTTGTCACTTCTTTAAGCATTCTTGTTTGTTGTAAATTTTTAGCTGATTTTGTATTGAG +>chr1.1_015282744|RefMatch_0004 +AGAATTAAATTTGTGTATGATGTTTGTCACTTCTTTAGGCATTCTTGTTTGTTGTAAATTTTTAGCTGATTTTGTATTGAG +>chr1.1_015282744|Ref_0001 +AGAATTAAATTTGTGTATGATGTTTGTCACTTCTTTAAGCATTCTTGTTTGTCGTAAATTTTTAGCTGATTTTGTATTGAG +>chr1.1_015424890|Alt_0002 +CTATACTTATGCTCTCTTTCGGCAACTGCTTTTGCCACGTGTCCATGCTAACATTGCCTAATTTTCTTTCAAGTTCATTCA +>chr1.1_015424890|RefMatch_0001 +CTATACTTATGCTCTCTTTCGGCAACTGCTTTTGGCACGTGTCCATACTAACATTGCCTAATTTTCTTTCAAGTTCATTCA +>chr1.1_015424890|RefMatch_0002 +CTATACTTATGCTCTCTTTCGGCAACTGCTTTTGGCATGTGTCCATGCTAACATTGCCTAATTTTCTTTCAAGTTCATTCA +>chr1.1_015424890|Ref_0001 +CTATACTTATGCTCTCTTTCGGCAACTGCTTTTGGCACGTGTCCATGCTAACATTGCCTAATTTTCTTTCAAGTTCATTCA +>chr1.1_015712918|Alt_0002 +GCACGGAATAGACTCTATTTGGTTGCATTTCATCCATAGTGTAGATATTTCAAGACTTACTAAGTAATAGTATATCATTTC +>chr1.1_015712918|RefMatch_0001 +GCACGGAATAGACTCTATTTGGTTGCATGTCATCCATAGTGTAGATAATTCAAGACTTACTAAGTAATAGTATATCATTTC +>chr1.1_015712918|RefMatch_0002 +GCACGGAATAGACTCTATTTGGTTGCATTTCATCCATAATGTAGATAATTCAAGACTTACTAAGTAATAGTATATCATTTC +>chr1.1_015712918|Ref_0001 +GCACGGAATAGACTCTATTTGGTTGCATTTCATCCATAGTGTAGATAATTCAAGACTTACTAAGTAATAGTATATCATTTC +>chr1.1_016006885|Alt_0002 +GGTTTTAGAGGTATCTTTATTTATAAATATGTTTTGGTTATTTATGCTTTCAACTGATTCATCTTTGTCACAATAATATTC +>chr1.1_016006885|Ref_0001 +GGTTTTAGAGGTATCTTTATTTATAAATATGTTTTGGTTATTGATGCTTTCAACTGATTCATCTTTGTCACAATAATATTC +>chr1.1_016136535|Alt_0002 +AGTTATGCATGTAATTTTGAAATTTCATAAATTTCTCTATACTTCCTTTTGTTTGTTATGTCTTCATCACATGATCAAGAA +>chr1.1_016136535|RefMatch_0001 +AGTTATGCATGTAATTTTGAAATTTCATAAAGTTATCTATACTTCCTTTTGTTTGTTATGTCTTCATCACATGATCAAGAA +>chr1.1_016136535|RefMatch_0002 +AGTTATGCATGTAATTTTGAAATTTCATAAAGTTCTCTATACTTCCTTCTGTTTGTTATGTCTTCATCACATGATCAAGAA +>chr1.1_016136535|Ref_0001 +AGTTATGCATGTAATTTTGAAATTTCATAAAGTTCTCTATACTTCCTTTTGTTTGTTATGTCTTCATCACATGATCAAGAA +>chr1.1_016221768|AltMatch_0001 +GATATTTCACCATGATGTTATCTCATAATATGATTTTCTAACACTATTCTGCTAGTAGTAGCCAGCTACATTGAGTGCTAA +>chr1.1_016221768|Alt_0002 +GATATTTCACCATGATGTTATCTCATAATATTATTTTCTAACACTATTCTGCTAGTAGTAGCCAGCTACATTGAGTGCTAA +>chr1.1_016221768|Ref_0001 +GATATTTCACCATGATGTTATCTCATAATATTATTTTCTAACTCTATTCTGCTAGTAGTAGCCAGCTACATTGAGTGCTAA +>chr1.1_016403559|Alt_0002 +CACTGGGAATATACAATAGGTAATTAAGGTTATTATGTTTCATAGTTTTATTTTATTTATTGCTTAAAGGTAATAGTAGTA +>chr1.1_016403559|Ref_0001 +CACTGGGAATATACAATAGGTAATTAAGGTTATTATGTTTCATATTTTTATTTTATTTATTGCTTAAAGGTAATAGTAGTA +>chr1.1_016522945|AltMatch_0001 +TGTGTGAAGTTTGAGAATAATTAACCAATCAAGTATTTGATAATAGACTGTTCCTAACTGTTAGGACTGAATAATGGGAAG +>chr1.1_016522945|Alt_0002 +TGTGTGAAGTTTGAGAATAATTAACCAATCAAGTATTTGATAATAGACCGTTCCTAACTGTTAGGACTGAATAATGGGAAG +>chr1.1_016522945|Ref_0001 +TGTGTGAAGTTTGAGAATAATTAACCAATCAAGTATTTGATAATAGATCGTTCCTAACTGTTAGGACTGAATAATGGGAAG +>chr1.1_016563045|Alt_0002 +TAGCCAGCGCTTCTAAAGGGATGTGTATTCTATCGTTGTTAAAGACATCGCTTATTATATCAGTTGATATAATTCAAAGTT +>chr1.1_016563045|RefMatch_0001 +TAGCCAGCGCTTCTAAAGGGATGTGTATTCTATCATTGTTAAAGACATCGCTTTTTATATCAGTTGATATAATTCAAAGTT +>chr1.1_016563045|RefMatch_0002 +TAGCCAGCGCTTCTAAAGGGATGTGTATTCTATCGTTATTAAAGACATCGCTTTTTATATCAGTTGATATAATTCAAAGTT +>chr1.1_016563045|RefMatch_0003 +TAGCCAGCGCTTCTAAAGGGATGTGTATTCTATCGTTGTTAAAGACATCGCGTTTTATATCAGTTGATATAATTCAAAGTT +>chr1.1_016563045|RefMatch_0004 +TAGCCAGCGCTTCTAAAGGGATGTGTATTCTATCGTTGTTAAAGGCACCGCTTTTTATATCAGTTGATATAATTCAAAGTT +>chr1.1_016563045|RefMatch_0005 +TAGCCAGCGCTTCTAAAGGGATGTGTATTCTATCGTTGTTAAAGGCATCGCTTTTTATATCAGTTGATATAATTCAAAGTT +>chr1.1_016563045|Ref_0001 +TAGCCAGCGCTTCTAAAGGGATGTGTATTCTATCGTTGTTAAAGACATCGCTTTTTATATCAGTTGATATAATTCAAAGTT +>chr1.1_016763798|AltMatch_0001 +CTTCTGGACTGTCTTTTAGCCTGAAAGGGTGGCTGATGTTATGAACACCACACCTTCTTGAACATGACTCTTTTTGCTCAT +>chr1.1_016763798|Alt_0002 +CTTCTGGACTGTCTTTTAGCCTGAAAGGGTGGCTGATGTTATGAACACCACACTTTCTTGAACATGACTCTTTTTGCTCAT +>chr1.1_016763798|RefMatch_0001 +CTTCTGGACTGTCTTTTAGCCTGAAAGGGTGACTGATGTTATGAACACCGCACCTTCTTGAACATGACTCTTTTTGCTCAT +>chr1.1_016763798|RefMatch_0002 +CTTCTGGACTGTCTTTTAGCCTGAAAGGGTGGCTGATGTTATGAACACCGCACCTTCTTGAACATGACTCTTTTTGCTCAT +>chr1.1_016763798|RefMatch_0003 +CTTCTGGACTGTCTTTTAGCCTGAAAGGGTGGCTGATGTTATGAACACCGTACTTTCTTGAACATGACTCTTTTTGCTCAT +>chr1.1_016763798|RefMatch_0004 +CTTCTGGACTGTCTTTTAGCCTGAAAGGGTGGCTGATGTTATGGACACCGCACCTTCTTGAACATGACTCTTTTTGCTCAT +>chr1.1_016763798|Ref_0001 +CTTCTGGACTGTCTTTTAGCCTGAAAGGGTGGCTGATGTTATGAACACCGCACTTTCTTGAACATGACTCTTTTTGCTCAT +>chr1.1_016976894|Alt_0002 +TGCATGTGAAACTCAGACTAGTAATTGGTCTTCAGATGAATTTTTATGCTATATTGTTTGGTGATCTCATATGGTTGTAAT +>chr1.1_016976894|RefMatch_0001 +TGCATGTGAAACTCAGACTAGTAATTGGTTTTCAGATGAATTTTCATGCTATATTGTTTGGTGATCTCATATGGTTGTAAT +>chr1.1_016976894|Ref_0001 +TGCATGTGAAACTCAGACTAGTAATTGGTTTTCAGATGAATTTTTATGCTATATTGTTTGGTGATCTCATATGGTTGTAAT +>chr1.1_017030914|Alt_0002 +CTACCTCTTTAATTATTATTGCAGACCGTTTTGACTTAGAATCAAGCAATTCAACATCAGTCTTCCCTCTGACGACAACAC +>chr1.1_017030914|RefMatch_0001 +CTACCTCTTTAATTATTATTGCAGACCGTTTTGACTTAGAATCAAGCTGTTCAACATCAGTCTTCCCTCTGACGACAACAC +>chr1.1_017030914|Ref_0001 +CTACCTCTTTAATTATTATTGCAGACCGTTTTGACTTAGAATCAAGCAGTTCAACATCAGTCTTCCCTCTGACGACAACAC +>chr1.1_017368305|Alt_0002 +AACTGATCACTTTCGGTCACAATATCCTGTGTTAAAAGATTAGAGTGTAATCAATAGTTAATCTAAAAGGATTTGAATCAT +>chr1.1_017368305|RefMatch_0001 +AACTGATCACTTTCGGTCACAATATCCTGTGTTAAAAGATTAGAGTGCAATCAGTAGTTAATCTAAAAGGATTTGAATCAT +>chr1.1_017368305|Ref_0001 +AACTGATCACTTTCGGTCACAATATCCTGTGTTAAAAGATTAGAGTGTAATCAGTAGTTAATCTAAAAGGATTTGAATCAT +>chr1.1_017502654|Alt_0002 +GTGTCTCATTAAACCGAAATCACGGTACACAAGTCAATGCTTGTTTTTGAAGTTAACCATACTTGACTTAGAGGAGGGTTA +>chr1.1_017502654|RefMatch_0001 +GTGTCTCATTAAACCGAAATCACGGTACACAACTCAATGCTTGTTTTTGAACTTAACCATACTTGACTTAGAGGAGGGTTA +>chr1.1_017502654|RefMatch_0002 +GTGTCTCATTAAACCGAAATCACGGTACACAACTCAATGTTTGTTTTTGAAGTTAACCATACTTGACTTAGAGGAGGGTTA +>chr1.1_017502654|Ref_0001 +GTGTCTCATTAAACCGAAATCACGGTACACAACTCAATGCTTGTTTTTGAAGTTAACCATACTTGACTTAGAGGAGGGTTA +>chr1.1_017604907|Alt_0002 +TATGTTGCCTTTCATCTCATCATGTTCAGTAAGTTTTCATTCAGCATCATTTCTTACACAACAGTACAAGATTATACAGTG +>chr1.1_017604907|Ref_0001 +TATGTTGCCTTTCATCTCATCATGTTCAGTAAGTTTTCATTCAACATCATTTCTTACACAACAGTACAAGATTATACAGTG +>chr1.1_017863839|Alt_0002 +ATCTTGTAACTGTTTGCCACATTCTTCACAGATTCCACCAGCTTGCTTTTGAGATGTTGATGTCTTTGCTACTGATAGTCG +>chr1.1_017863839|RefMatch_0001 +ATCTTGTAACTGTTTGCCACATTCTTCACAAAATCCACCAGCTTGCTTTTGAGATGTTGATGTCTTTGCTACTGATAGTCG +>chr1.1_017863839|RefMatch_0002 +ATCTTGTAACTGTTTGCCACATTCTTCACAAATTCCACCAGCTTGTTTTTGAGATGTTGATGTCTTTGCTACTGATAGTCG +>chr1.1_017863839|RefMatch_0003 +ATCTTGTAACTGTTTGCCACATTCTTCACAAATTCCACCTGCTTGCTTTTGAGATGTTGATGTCTTTGCTACTGATAGTCG +>chr1.1_017863839|RefMatch_0004 +ATCTTGTAACTGTTTGCCACATTCTTCACAAATTCCTCCAGCTTGCTTTTGAGATGTTGATGTCTTTGCTACTGATAGTCG +>chr1.1_017863839|Ref_0001 +ATCTTGTAACTGTTTGCCACATTCTTCACAAATTCCACCAGCTTGCTTTTGAGATGTTGATGTCTTTGCTACTGATAGTCG +>chr1.1_018083264|AltMatch_0001 +TGCTGCTGCTCTAGGTCGAGGGATTTCAGGTTTTATTACAGCACAAGGGAGAACAGGTCAAAATGATGCAAATGGTTTTGT +>chr1.1_018083264|AltMatch_0002 +TGCTGCTGCTCTAGGTCGAGGGATTTCGGGTTTTATTACAGCACAAGGGAGAACAGGTCAAAATGATGCAAATGGTTTTGT +>chr1.1_018083264|Alt_0002 +TGCTGCTGCTCTAGGTCGAGGGATTTCTGGTTTTATTACAGCACAAGGGAGAACAGGTCAAAATGATGCAAATGGTTTTGT +>chr1.1_018083264|Ref_0001 +TGCTGCTGCTCTAGGTCGAGGAATTTCTGGTTTTATTACAGCACAAGGGAGAACAGGTCAAAATGATGCAAATGGTTTTGT +>chr1.1_018128065|AltMatch_0001 +GTACCTTGGAGATGTTTCTTGTTGTGAGACTTTGCAGGATACTCCCAGTCATAGCATCTGAAGCTGTAGACTTTACCGACT +>chr1.1_018128065|Alt_0002 +GTACCTTGGAGATGTTTCTTGTTGTGTGACTTTGCTGGATACTCCCAGTCATAGCATCTGAAGCTGTAGACTTTACCGACT +>chr1.1_018128065|RefMatch_0001 +GTACCTTGGAGATGTTTCTTGTTATGAGACTTTGCAGGATACTCCCAGTCATAGCATCTGAAGCTGTAGACTTTACCGACT +>chr1.1_018128065|RefMatch_0002 +GTACCTTGGAGATGTTTCTTGTTATGTGACTTTGCAGGATACTCCCAGTCATAGCATCTGAAGCTGTAGACTTTACCGACT +>chr1.1_018128065|Ref_0001 +GTACCTTGGAGATGTTTCTTGTTATGTGACTTTGCTGGATACTCCCAGTCATAGCATCTGAAGCTGTAGACTTTACCGACT +>chr1.1_018362099|Alt_0002 +GAGCGTACCATAGAAGTCGCTGTTGTTATATGCTAAATTCGATTACATATATAATTCGTCCCAAAAACTCATTATTCTTAA +>chr1.1_018362099|Ref_0001 +GAGCGTACCATAGAAGTCGCTGTTGTTATATGCTAAATTCGATTAAATATATAATTCGTCCCAAAAACTCATTATTCTTAA +>chr1.1_018698343|AltMatch_0001 +GGCTGAAAGTCATACATCTCACCCAATATTTTATCTTGCTTAGCTGAGGAGTAGTCCCTAGATACTTCCATAATGGCCCTC +>chr1.1_018698343|Alt_0002 +GGCTGAAAGTCATACATCTCACCCAATATTTTATCTTGCTTATCTGAGGAGTAGTCCCTAGATACTTCCATAATGGCCCTC +>chr1.1_018698343|RefMatch_0001 +GGCTGAAAGTCATACATCTCACCCAATATTTTATCTTGCATATCTGGGGAGTAGTCCCTAGATACTTCCATAATGGCCCTC +>chr1.1_018698343|Ref_0001 +GGCTGAAAGTCATACATCTCACCCAATATTTTATCTTGCATATCTGAGGAGTAGTCCCTAGATACTTCCATAATGGCCCTC +>chr1.1_018789059|AltMatch_0001 +CCATACCGACTTTTGAGTACACACCAACCATTATACCTGCCATAGCTTTATCGATCAGACCCTTATTGAGTCTATACTCAT +>chr1.1_018789059|AltMatch_0002 +CCATACCGACTTTTGAGTACACACCAACCATTGTACCTGCCATAGCTTTATCGATCAGACCCTTATTGAGTCTATACTCAT +>chr1.1_018789059|Alt_0002 +CCATACCGACTTTTGAGTACACACCAACCATTGTACCTGCCATAGCTTTATCAATCAGACCCTTATTGAGTCTATACTCAT +>chr1.1_018789059|RefMatch_0001 +CCATACCGACTTTTGAGTACACACCAACCATTGTACCTGCCATCACTTTATCAATCAGACCCTTATTGAGTCTATACTCAT +>chr1.1_018789059|Ref_0001 +CCATACCGACTTTTGAGTACACACCAACCATTGTACCTGCCATCGCTTTATCAATCAGACCCTTATTGAGTCTATACTCAT +>chr1.1_018934716|Alt_0002 +AACGGTTGTTCCATAGTTAACCACATCAACCCATGGTGTAGATGTTTTTAATACAACATGATTATGATCATAGATAGGAGC +>chr1.1_018934716|Ref_0001 +AACGGTTGTTCCATAGTTAACCACATCAACCCATGGTGTAGATCTTTTTAATACAACATGATTATGATCATAGATAGGAGC +>chr1.1_019009023|AltMatch_0001 +CAGGAGTTAACTCTCCCAGCTAATTTTTAGGTTGAGCTAACGTGGTGCTGCAAGTGAGGGTTCATCCTCTTTTAGACGAAA +>chr1.1_019009023|Alt_0002 +CAGGAGTTAACTCTCCCAGCTAATTTTTAGGTTGAGCTAACGCGGTGCTGCAAGTGAGGGTTCATCCTCTTTTAGACGAAA +>chr1.1_019009023|Ref_0001 +CAGGAGTTAACTCTCCCAGCTAATTTTTAGGTTGAGCTAACGCAGTGCTGCAAGTGAGGGTTCATCCTCTTTTAGACGAAA +>chr1.1_019232221|Alt_0002 +GAATATGCATTCGGAGCCCATGACTATCCATCAAGTGGTGTATTTGAGGTTGAACCTAGGCAGTGTCCTGGATTCAAGTTC +>chr1.1_019232221|Ref_0001 +GAATATGCATTCGGAGCCCATGACTATCCATCAAGTGGTGTATTTGAAGTTGAACCTAGGCAGTGTCCTGGATTCAAGTTC +>chr1.1_019281061|AltMatch_0001 +AGCAGAAGCCCATGGTGAAAGCTGACTAGCTATCTTCTCACCATCAACACCTTGTCTGAGTAAAACGTGAATTTCCTGAGA +>chr1.1_019281061|Alt_0002 +AGCAGAAGCCCATGGTGAAAGCTGACTAGCTATCTTCTCACTATCAACACCTTGTCTGAGTAAAACGTGAATTTCCTGAGA +>chr1.1_019281061|RefMatch_0001 +AGCAGAAGCCCATGGTGAAAGCTGACTAGCTATCTTCTCAGCATCAACACCTTGTCTGAGTAAAACGTGAATTTCCTGAGA +>chr1.1_019281061|RefMatch_0002 +AGCAGAAGCCCATGGTGAAAGCTGACTAGCTATCTTCTCAGCATTAACACCTTGTCTGAGTAAAACGTGAATTTCCTGAGA +>chr1.1_019281061|Ref_0001 +AGCAGAAGCCCATGGTGAAAGCTGACTAGCTATCTTCTCAGTATCAACACCTTGTCTGAGTAAAACGTGAATTTCCTGAGA +>chr1.1_019387175|Alt_0002 +TACTCCTGGTGTTGCACCGGTAATAGTCCTTTCTGAACTCGTCATTCTTCAACAGCTCATGCATTCTCTCAGCCTGAAAGC +>chr1.1_019387175|Ref_0001 +TACTCCTGGTGTTGCACCGGTAATAGTCCTTTCTGAACTCATCATTCTTCAACAGCTCATGCATTCTCTCAGCCTGAAAGC +>chr1.1_019596141|Alt_0002 +CGCCGTAAAATAAATGGCCCTAATATAGACTTCATACGACTAGTTAAATCTTTATCTTCTGCACTAAGTAACTTTTTTAAG +>chr1.1_019596141|Ref_0001 +CGCCGTAAAATAAATGGCCCTAATATAGACTTCATACGACTAGTTAAATCTCTATCTTCTGCACTAAGTAACTTTTTTAAG +>chr1.1_019780298|Alt_0002 +GTGTACAATTCATGAAAATATCTTTAAACTATGTTTACTTTGTTCTTAATTACAGCAGATTCTGTAAGTCCGAATTTACAT +>chr1.1_019780298|RefMatch_0001 +GTGTACAATTCATGAAAATATCTTTAAACTATGTTTTCTTTGTTCTTCATTACAGCAGATTCTGTAAGTCCGAATTTACAT +>chr1.1_019780298|RefMatch_0002 +GTGTACAATTCATGAAAATATCTTTAAACTTTGTTTTCTTTGTTCTTAATTACAGCAGATTCTGTAAGTCCGAATTTACAT +>chr1.1_019780298|Ref_0001 +GTGTACAATTCATGAAAATATCTTTAAACTATGTTTTCTTTGTTCTTAATTACAGCAGATTCTGTAAGTCCGAATTTACAT +>chr1.1_020097917|Alt_0002 +CCTCACCACAAACGGGTGATGAAATGATGGCATTTGGTGATGGCATTTGTTGGGAGGTTGGTTATGTAGTAGTGGAATTTG +>chr1.1_020097917|RefMatch_0001 +CCTCACCACAAACGGGTGATGAAATGATGGCATTTGATGATGGCATATGTTGGGAGGTTGGTTATGTAGTAGTGGAATTTG +>chr1.1_020097917|RefMatch_0002 +CCTCACCACAAACGGGTGATGAAATGATGGCATTTGGTGATGGCATATGTTGGAAGGTTGGTTATGTAGTAGTGGAATTTG +>chr1.1_020097917|Ref_0001 +CCTCACCACAAACGGGTGATGAAATGATGGCATTTGGTGATGGCATATGTTGGGAGGTTGGTTATGTAGTAGTGGAATTTG +>chr1.1_020310833|Alt_0002 +GTTCAATAGACTCTAATCTGTTAATTTCGCTTTCGCTTTCGCTATATATCTCATCTTGATATAACTTCGCATTAGATTTTT +>chr1.1_020310833|Ref_0001 +GTTCAATAGACTCTAATCTGTTAATTTCGCTTTCGCTTTCGCTATATATCTTATCTTGATATAACTTCGCATTAGATTTTT +>chr1.1_020548930|Alt_0002 +GGCAACTTTGGATTTATTCCCGATGTGTTTAGAGTATTGTCTCTGCTACAGGGTAGAGTGGAAGGTTCTAAGTTATATGCA +>chr1.1_020548930|RefMatch_0001 +GGCAACTTTGGATTTATTCCCGATGTGTTCAGAGTATTGTCTCTGCAACAGGGTAGAGTGGAAGGTTCTAAGTTATATGCA +>chr1.1_020548930|RefMatch_0002 +GGCAACTTTGGATTTATTCCCGATGTGTTTAGAGTATTATCTCTGCAACAGGGTAGAGTGGAAGGTTCTAAGTTATATGCA +>chr1.1_020548930|Ref_0001 +GGCAACTTTGGATTTATTCCCGATGTGTTTAGAGTATTGTCTCTGCAACAGGGTAGAGTGGAAGGTTCTAAGTTATATGCA +>chr1.1_020860458|Alt_0002 +TGTCAATTTTTCCAACTTGTCACTTTAAACCTTTAATTGAGGCTTTAATATTGTTCCAATGCTTCATGGAAATTTAAATGA +>chr1.1_020860458|Ref_0001 +TGTCAATTTTTCCAACTTGTCACTTTAAACCTTTAATTGAGGCTTTAATATTCTTCCAATGCTTCATGGAAATTTAAATGA +>chr1.1_021161903|Alt_0002 +ACTTATTTCAATCCAAACCAGACATATATGAGGTCTTTTTATGAAGTGTTAACAATTATATTTTCAGATAAATTTAATTTC +>chr1.1_021161903|Ref_0001 +ACTTATTTCAATCCAAACCAGACATATATGAGGTCTTTTTATGAAGTGTTCACAATTATATTTTCAGATAAATTTAATTTC +>chr1.1_022487705|AltMatch_0001 +AGCTGAGCTTATCCGTCTATCTACAGTACAACAATAATTGTTTTTAATATTTAAACTGGTTTCGCACTTCAACACTACTCA +>chr1.1_022487705|Alt_0002 +AGCTGAGCTTATCCGTCTATCTACAGTACGACAATAATTGTTTTTAATATTTAAACTGGTTTCGCACTTCAACACTACTCA +>chr1.1_022487705|Ref_0001 +AGCTGAGCTTATCCGTCTATCTACAGTACGACAATAATTGTTTTTAATATTTGAACTGGTTTCGCACTTCAACACTACTCA +>chr1.1_022582932|Alt_0002 +TCATTCACATTTAGAGAAGCAGTTAGAGATGTGGAGCTGCAAGGTTACTCCATTCCAAAAGGTTGGAAAGTCCTTCCACTC +>chr1.1_022582932|RefMatch_0001 +TCATTCACATTTAGAGAAGCAGTTAGAGATGTGAAGCTACAAGGTTACTCCATTCCAAAAGGTTGGAAAGTCCTTCCACTC +>chr1.1_022582932|RefMatch_0002 +TCATTCACATTTAGAGAAGCAGTTAGAGATGTGGAACTACAAGGTTACTCCATTCCAAAAGGTTGGAAAGTCCTTCCACTC +>chr1.1_022582932|RefMatch_0003 +TCATTCACATTTAGAGAAGCAGTTAGAGATGTGGAGATACAAGGTTACTCCATTCCAAAAGGTTGGAAAGTCCTTCCACTC +>chr1.1_022582932|RefMatch_0004 +TCATTCACATTTAGAGAAGCAGTTAGAGATGTGGAGCTACAAGGTTACACCATTCCAAAAGGTTGGAAAGTCCTTCCACTC +>chr1.1_022582932|RefMatch_0005 +TCATTCACATTTAGAGAAGCAGTTAGAGATGTGGAGTTACAAGGTTACTCCATTCCAAAAGGTTGGAAAGTCCTTCCACTC +>chr1.1_022582932|Ref_0001 +TCATTCACATTTAGAGAAGCAGTTAGAGATGTGGAGCTACAAGGTTACTCCATTCCAAAAGGTTGGAAAGTCCTTCCACTC +>chr1.1_023060630|AltMatch_0001 +CAAGACAAAGAGGTGGAGAGGGTTGAGTTGTTTGTTAGAAATACTGGATGAGACTTGGGTAGTAGTATGGTTTTAAAGGGT +>chr1.1_023060630|Alt_0002 +CAAGACAAAGAGGTGGAGAGGGTTGAGTTGTTTGTTAGAAATATTGGATGAGACTTGGGTAGTAGTATGGTTTTAAAGGGT +>chr1.1_023060630|Ref_0001 +CAAGACAAAGAGGTGGAGAGGGTTGACTTGTTTGTTAGAAATATTGGATGAGACTTGGGTAGTAGTATGGTTTTAAAGGGT +>chr1.1_023241568|Alt_0002 +GAAGAGACATAAAGCTTACATAAATCATATTTTGATTGTATCTATACTGTAAATCGTATAAAACTGACGGTTCGTTTAAAT +>chr1.1_023241568|Ref_0001 +GAAGAGACATAAAGCTTACATAAATCATATTTTGATTGTATCTATACTGTAGATCGTATAAAACTGACGGTTCGTTTAAAT +>chr1.1_023395952|Alt_0002 +CGAAGAAGATAGTCATGTAATGTTAAAAACTGTAAGTTCAGCTTTGGTAACGCTAAACAACCTTCTCCAGAATAATTTATG +>chr1.1_023395952|Ref_0001 +CGAAGAAGATAGTCATGTAATGTTAAAAATTGTAAGTTCAGCTTTGGTAACGCTAAACAACCTTCTCCAGAATAATTTATG +>chr1.1_023535207|AltMatch_0001 +TGTTTGAAGAAAGGCTGGTTAACTTCAAGCTGTGTATGATGGTTTTCCATAATACAATGTACAATATATGATGAATATATG +>chr1.1_023535207|Alt_0002 +TGTTTGAAGAAAGGCTGGTTAACTTCAAGCTGTGAATGATGGTTTTCCATAATACAATGTACAATATATGATGAATATATG +>chr1.1_023535207|Ref_0001 +TGTTTGAAGAAAGGCTGGTTAACTTCAAGCTGTGAATGATGGTTTTCCCTAATACAATGTACAATATATGATGAATATATG +>chr1.1_023709651|Alt_0002 +GAGGGTAGAGACATGACATATTTTGAATTTCTAAGGACATTTATATATGTGAAGTACAATATATGATGGCATCCATGTAAG +>chr1.1_023709651|RefMatch_0001 +GAGGGTAGAGACATGACATATTTTGAATTTCTAACGAGATTTATATATGTGAAGTACAATATATGATGGCATCCATGTAAG +>chr1.1_023709651|RefMatch_0002 +GAGGGTAGAGACATGACATGTTTTGAATTTCTAACGAGATTTATATATGTCAAGTACAATATATGATGGCATCCATGTAAG +>chr1.1_023709651|RefMatch_0003 +GAGGGTAGAGACATGACATGTTTTGAATTTCTAACGAGATTTATATATGTGAAGTACAATATATGATGGCATCCATGTAAG +>chr1.1_023709651|Ref_0001 +GAGGGTAGAGACATGACATATTTTGAATTTCTAAGGAGATTTATATATGTGAAGTACAATATATGATGGCATCCATGTAAG +>chr1.1_024489859|Alt_0002 +GCTAGAACCCTAACACCCGACTTAACAAGATTCCCTAGCATAGAGATGGTAGGAATCTCAAGGTTCTGAAAATCATATACC +>chr1.1_024489859|Ref_0001 +GCTAGAACCCTAACACCCGACTTAACAAGTTTCCCTAGCATAGAGATGGTAGGAATCTCAAGGTTCTGAAAATCATATACC +>chr1.1_024760634|AltMatch_0001 +CACCATCAAACCAAACGATACCTTTTTCCTTGTTTCAACTATAAACTTCCAAAACCTCACTACTTATCCTTCTGTTGAAGT +>chr1.1_024760634|Alt_0002 +CACCATCAAACCAAACGATACTTTTTTCCTTGTTTCAACTATAAACTTCCAAAACCTCACTACTTATCCTTCTGTTGAAGT +>chr1.1_024760634|RefMatch_0001 +CACCATCAAACCAAACGATACTTTCTTCCTAGTTTCAACTATAAACTTCCAAAACCTCACTACTTATCCTTCTGTTGAAGT +>chr1.1_024760634|RefMatch_0002 +CACCATCAAACCAAACGATACTTTCTTCCTTGTTTCAACTATAAACTTTCAAAACCTCACTACTTATCCTTCTGTTGAAGT +>chr1.1_024760634|RefMatch_0003 +CACCATCAAACCAAACGATACTTTCTTCCTTGTTTCAACTGTAAACTTCCAAAACCTCACTACTTATCCTTCTGTTGAAGT +>chr1.1_024760634|Ref_0001 +CACCATCAAACCAAACGATACTTTCTTCCTTGTTTCAACTATAAACTTCCAAAACCTCACTACTTATCCTTCTGTTGAAGT +>chr1.1_024951410|AltMatch_0001 +ACAGTTTATAATCAGACACAATAGAAACTTTCAGTGATGTTGCACATATAATTGGATGATAATGTGGAGAGAGAAAATACA +>chr1.1_024951410|Alt_0002 +ACAGTTTATAATCAGACACAATAGAAACTTTCAGTGATGTTGCACACATAATTGGATGATAATGTGGAGAGAGAAAATACA +>chr1.1_024951410|Ref_0001 +ACAGTTTATAATCAGACACAATAGAAACTTTCAGTGATGTTGCTCACATAATTGGATGATAATGTGGAGAGAGAAAATACA +>chr1.1_025281938|Alt_0002 +CATTATGGTACTGAGATTTAAAAGGTTGTTTAATTGTTAGAGTCTGGTTTGAGAAGAAAGTAGACTGGCCACGTTATATAT +>chr1.1_025281938|RefMatch_0001 +CATTATGGTACTGAGATTTAAAAGGTTGTTTAATTGTTAGAGTTTGGATTGAGAAGAAAGTAGACTGGCCACGTTATATAT +>chr1.1_025281938|Ref_0001 +CATTATGGTACTGAGATTTAAAAGGTTGTTTAATTGTTAGAGTCTGGATTGAGAAGAAAGTAGACTGGCCACGTTATATAT +>chr1.1_025474859|Alt_0002 +AACATTCGATCTCATATTCTCCACTTCTTGAGCTCGCTTGCTATCTTTAACTAGTTGGTTTATCTTCTTTATCTTTTTCAA +>chr1.1_025474859|RefMatch_0001 +AACATTCGATCTCATATTCTCCACTTCTTGAGCTCGCTTGCTATCTTTCACTAATTGGTTTATCTTCTTTATCTTTTTCAA +>chr1.1_025474859|RefMatch_0002 +AACATTCGATCTCATATTCTCCACTTCTTGAGCTCGTTTGCTATCTTTCACTAGTTGGTTTATCTTCTTTATCTTTTTCAA +>chr1.1_025474859|Ref_0001 +AACATTCGATCTCATATTCTCCACTTCTTGAGCTCGCTTGCTATCTTTCACTAGTTGGTTTATCTTCTTTATCTTTTTCAA +>chr1.1_025688638|Alt_0002 +GGTCAGTTGCATTTTGGGTATTTACACCTACATCTTACTTTGCCCAATATAGTTTGCATGAATAAAGACCAAATGAAAGTA +>chr1.1_025688638|RefMatch_0001 +GGTCAGTTGCATTTTGGGTATTTACACCTACATCTTGCTTTGCTCAATATAGTTTGCATGAATAAAGACCAAATGAAAGTA +>chr1.1_025688638|RefMatch_0002 +GGTCAGTTGCATTTTGGGTATTTACACCTACATCTTGCTTTTCCCAATATAGTTTGCATGAATAAAGACCAAATGAAAGTA +>chr1.1_025688638|RefMatch_0003 +GGTCAGTTGCATTTTGGGTATTTACACCTACATCTTGCTTTTCCCAATGTAGTTTGCATGAATAAAGACCAAATGAAAGTA +>chr1.1_025688638|Ref_0001 +GGTCAGTTGCATTTTGGGTATTTACACCTACATCTTGCTTTGCCCAATATAGTTTGCATGAATAAAGACCAAATGAAAGTA +>chr1.1_025728959|Alt_0002 +GTTCCCCACATGTTCATTTTTATTGTCTTTTTCCAATTCATGGTTGTGATTCTGTACCTTGATATGGGCACTTTCTTTGGT +>chr1.1_025728959|RefMatch_0001 +GTTCCCCACATGTTCATTTTTATTGTCTCTTCCCAATTCATGGTTGTGATTCTGTACCTTGATATGGGCACTTTCTTTGGT +>chr1.1_025728959|RefMatch_0002 +GTTCCCCACATGTTCATTTTTATTGTCTCTTTCCAATTCATGGCTGTGATTCTGTACCTTGATATGGGCACTTTCTTTGGT +>chr1.1_025728959|RefMatch_0003 +GTTCCCCACATGTTCATTTTTATTGTCTCTTTCCAATTCATGGTTGTGATTCCGTACCTTGATATGGGCACTTTCTTTGGT +>chr1.1_025728959|RefMatch_0004 +GTTCCCCACATGTTCATTTTTATTGTCTCTTTCCAATTCATGGTTGTGATTCTATACCTTGATATGGGCACTTTCTTTGGT +>chr1.1_025728959|Ref_0001 +GTTCCCCACATGTTCATTTTTATTGTCTCTTTCCAATTCATGGTTGTGATTCTGTACCTTGATATGGGCACTTTCTTTGGT +>chr1.1_026050142|Alt_0002 +AGAGACCTATGCTGCAGTCCTTGCGCAGACCCATTCCTTCTTGAAAATGCCCCCACTGCACCAGATAATCTATCTCGAATT +>chr1.1_026050142|Ref_0001 +AGAGACCTATGCTGCAGTCCTTGCGCAGAGCCATTCCTTCTTGAAAATGCCCCCACTGCACCAGATAATCTATCTCGAATT +>chr1.1_026445102|AltMatch_0001 +CAGACTCCCCGATATAGAACCTAGCTACGTCTTCAGTCAGTATATCTTTCCGCATCAGCAACGTCATCATATCTCCACCGG +>chr1.1_026445102|AltMatch_0002 +CAGACTCCCCGATATAGAACCTAGCTTCATCTTCAGTCAGTACTTCTTTCCGCATCAGCAACGTCATCATATCTCCACCGG +>chr1.1_026445102|Alt_0002 +CAGACTCCCCGATATAGAACCTAGCTACATCTTCAGTCAGTATATCTTTCCGCATCAGCAACGTCATCATATCTCCACCGG +>chr1.1_026445102|Ref_0001 +CAGACTCCCCGATATAGAACCTAGCTACATCTTCAGTCAGTATATCTTTGCGCATCAGCAACGTCATCATATCTCCACCGG +>chr1.1_026502181|AltMatch_0001 +TCACCTTCAAGTACACACAAATTTATTACAAATTGGACCATGAATAATTGGTCACACAAACTCAAACCCTTTAGTTACTGC +>chr1.1_026502181|Alt_0002 +TCACCTTCAAGTACACACAAATTTATTACAAATTGGACCGTGAATAATTGGTCACACAAACTCAAACCCTTTAGTTACTGC +>chr1.1_026502181|Ref_0001 +TCACCTTCAAGTACACACAAATTTATTACAAAATGGACCGTGAATAATTGGTCACACAAACTCAAACCCTTTAGTTACTGC +>chr1.1_026672067|Alt_0002 +CTAAATAAAGTAACACAATCAACCTTTATTAGGGGGTTCTCTACTGTGCCTTTCAATCAACTCCATTGTGATACGATTATG +>chr1.1_026672067|Ref_0001 +CTAAATAAAGTAACACAATCAACCTTTATTAGGGGGTTCTCTACTGTGTCTTTCAATCAACTCCATTGTGATACGATTATG +>chr1.1_026865008|Alt_0002 +CCTACGGTTAGTAGTGAATTATGTCTTAGGTTTAATTCAATAGTGATCAAGTAAAGAAAGTCATTTAGCGAAATAATTACT +>chr1.1_026865008|Ref_0001 +CCTACGGTTAGTAGTGAATTATGTCTTAGATTTAATTCAATAGTGATCAAGTAAAGAAAGTCATTTAGCGAAATAATTACT +>chr1.1_026940687|AltMatch_0001 +ACCGGTGTTTATTGAGAATGTTCAAACCCATATTTGTAACGTAGCAAAATCTTCAACTCTGAAGTCTCACTTAAAATTCCA +>chr1.1_026940687|Alt_0002 +ACCGGTGTTTATTGAGAATGTTCAAACCCATATTTGTGACGTAGCAAAATCTTCAACTCTGAAGTCTCACTTAAAATTCCA +>chr1.1_026940687|Ref_0001 +ACCGGTGTTTATTGAGAATGTTCAAACCCATATTTGTGATGTAGCAAAATCTTCAACTCTGAAGTCTCACTTAAAATTCCA +>chr1.1_027050989|AltMatch_0001 +GATCCTAGTAAAGATGTTCATGGTTGGGGAGTTAATGAACGTGGTGTTTCGTTTACTTTTGGTGCTAGTAAGATTCAAGAG +>chr1.1_027050989|Alt_0002 +GATCCTAGTAAAGATGTTCATGGTTGGGGAGTTAATGAACGTGGTGTTTCATTTACTTTTGGTGCTAGTAAGATTCAAGAG +>chr1.1_027050989|RefMatch_0001 +GATCCTAGTAAAGATGTTCATGGTTGGGGTGTTAATGAACGTGGTGTTTCGTTTACTTTTGGTGCTAGTAAGATTCAAGAG +>chr1.1_027050989|Ref_0001 +GATCCTAGTAAAGATGTTCATGGTTGGGGTGTTAATGAACGTGGTGTTTCATTTACTTTTGGTGCTAGTAAGATTCAAGAG +>chr1.1_027377103|Alt_0002 +ATCCAAACTATGGATATATCCCAATGTGGCATTATCTTCCTCAATCAGCTCGTGATACGTCCCAAGATCATGAGCTCAGGC +>chr1.1_027377103|Ref_0001 +ATCCAAACTATGGATATATCCCAATGTGGCATTATCTTCCTCAATCAGCTCGCGATACGTCCCAAGATCATGAGCTCAGGC +>chr1.1_027461652|Alt_0002 +GCTTCTTAATAACACTAAATAGATGGGTTCTTATTTGTCACATTAAATTATCAATGTTTCTGGAATCCTGAGGTAGAGATG +>chr1.1_027461652|Ref_0001 +GCTTCTTAATAACACTAAATAGATGGGTTCTTATTTGTCACATTAAGTTATCAATGTTTCTGGAATCCTGAGGTAGAGATG +>chr1.1_027966972|AltMatch_0001 +TTTCGGTTTCCAAAGTTAATCTTATTGTTCATTGTAACTGAAAGATATCCATGTGTATGTGATGGTCTTCAACTTGTCTCT +>chr1.1_027966972|AltMatch_0002 +TTTCGGTTTCCAAAGTTAATCTTATTGTTCATTGTAATTGAAAGATATCCATGTGTATGTGATGGTCTTCAACTTGTCTCT +>chr1.1_027966972|Alt_0002 +TTTCGGTTTCCAAAGTTAATCTTATTGTTCATTATAATTGAAAGATATCCATGTGTATGTGATGGTCTTCAACTTGTCTCT +>chr1.1_027966972|Ref_0001 +TTTCGGTTTCCAAAGTTAATCTTATTGTTCATTATAATTGAAATATATCCATGTGTATGTGATGGTCTTCAACTTGTCTCT +>chr1.1_028084165|Alt_0002 +GACTGGTGTCAATGGTTACACAGTATTCAGGCCTTCGAATGGTCAGTAGAGGGAAAGGATGGCAACGAATGTAATAATGTG +>chr1.1_028084165|RefMatch_0001 +GACTGGTGTCAATGGTTACACAGTATTCAGACCTTCAAATGGTCAGTAGAGGGAAAGGATGGCAACGAATGTAATAATGTG +>chr1.1_028084165|Ref_0001 +GACTGGTGTCAATGGTTACACAGTATTCAGGCCTTCAAATGGTCAGTAGAGGGAAAGGATGGCAACGAATGTAATAATGTG +>chr1.1_028189850|Alt_0002 +TGATGGATAATCATAGGCAAGCACTTGGGAGCTTTGCAATGGTAGCATTACTCTTTCTTGGTTTTATTTTCATTTACCATT +>chr1.1_028189850|Ref_0001 +TGATGGATAATCATAGGCAAGCACTTGGGAGTTTTGCAATGGTAGCATTACTCTTTCTTGGTTTTATTTTCATTTACCATT +>chr1.1_028465946|Alt_0002 +GTTGGCATGGACTGATTCTTGACTCCTGTTGGTAGTTATATATAATTTGTGCCTTTGGAATTTGTTGGGCGTTTCTTTGGT +>chr1.1_028465946|Ref_0001 +GTTGGCATGGACTGATTCTTGACTCCTGTTGGTAGTTATATATAGTTTGTGCCTTTGGAATTTGTTGGGCGTTTCTTTGGT +>chr1.1_028687110|Alt_0002 +TCCTAAGGCTTTCTTTGACCTATTGCTACCTTGTGAGTGATATGAATTGTATATTTTGCTGCCTCCTAATAATAAGACTGT +>chr1.1_028687110|RefMatch_0001 +TCCTAAGGCTTTCTTTGACCTATTGCTACCTTGTGAGTAATATGAATTATATATTTTGCTGCCTCCTAATAATAAGACTGT +>chr1.1_028687110|RefMatch_0002 +TCCTAAGGCTTTCTTTGACCTATTGCTACCTTGTGAGTAATATGAATTGTCTATTTTGCTGCCTCCTAATAATAAGACTGT +>chr1.1_028687110|RefMatch_0003 +TCCTAAGGCTTTCTTTGACCTATTGCTACCTTGTTAGTAATATGAATTGTATATTTTGCTGCCTCCTAATAATAAGACTGT +>chr1.1_028687110|Ref_0001 +TCCTAAGGCTTTCTTTGACCTATTGCTACCTTGTGAGTAATATGAATTGTATATTTTGCTGCCTCCTAATAATAAGACTGT +>chr1.1_028857948|AltMatch_0001 +GTTCCAAATGCTTATTTTCATAGTATAAATAATTAATTAATGTGTCTTTTTTCCTAAAAAATAGGTGGGGTGTGAGTGTTA +>chr1.1_028857948|Alt_0002 +GTTCCAAATGCTTATTTTCATAGTATAAATAATTAATTAATGTGTCTTTTCTCCTAAAAAATAGGTGGGGTGTGAGTGTTA +>chr1.1_028857948|Ref_0001 +GTTCCAAATGCTTATTTTCATAGTATAAATAAATAATTAATGTGTCTTTTCTCCTAAAAAATAGGTGGGGTGTGAGTGTTA +>chr1.1_028955804|Alt_0002 +GTCCTTTATTTTTCCAACATAATTGTTGTTCTTGACAAATTAGGTTATGGTTCTAATATGAAGAATCTCATGTATATAAGC +>chr1.1_028955804|RefMatch_0001 +GTCCTTTATTTTTCCAACATAATTGTTGTTCTTAACAAATTAGGTTGTGGTTCTAATATGAAGAATCTCATGTATATAAGC +>chr1.1_028955804|RefMatch_0002 +GTCCTTTATTTTTCCAACATAATTGTTGTTCTTGACAAATTAGGTTGTAGTTCTAATATGAAGAATCTCATGTATATAAGC +>chr1.1_028955804|RefMatch_0003 +GTCCTTTATTTTTCCAACATAATTGTTGTTCTTGACAAATTTGGTTGTGGTTCTAATATGAAGAATCTCATGTATATAAGC +>chr1.1_028955804|RefMatch_0004 +GTCCTTTATTTTTCCAACATAATTGTTGTTCTTGGCAAATTAGGTTGTGGTTCTAATATGAAGAATCTCATGTATATAAGC +>chr1.1_028955804|Ref_0001 +GTCCTTTATTTTTCCAACATAATTGTTGTTCTTGACAAATTAGGTTGTGGTTCTAATATGAAGAATCTCATGTATATAAGC +>chr1.1_029222133|Alt_0002 +GAACTAACGGGTTTAATCTAGAGTCCACATATTTAGAAACCATATTTGAAGAACTTGTTTCTGTTTTTTTTAGTAAAAAAA +>chr1.1_029222133|Ref_0001 +GAACTAACGGGTTTAATCTAGAGTCCACACATTTAGAAACCATATTTGAAGAACTTGTTTCTGTTTTTTTTAGTAAAAAAA +>chr1.1_029326140|Alt_0002 +CACATTTCCATTCAACCTGACAATGACTTGGTGCTTTCTGCTGATGAAATTTGCACAATATATGATGTACTACAGGCTTCA +>chr1.1_029326140|RefMatch_0001 +CACATTTCCATTCAACCTGACAATGACTTAGTGCTCTCTGCTGATGAAATTTGCACAATATATGATGTACTACAGGCTTCA +>chr1.1_029326140|RefMatch_0002 +CACATTTCCATTCAACCTGACAATGACTTAGTGCTTTCTGCTGATGAAATTTGAACAATATATGATGTACTACAGGCTTCA +>chr1.1_029326140|Ref_0001 +CACATTTCCATTCAACCTGACAATGACTTAGTGCTTTCTGCTGATGAAATTTGCACAATATATGATGTACTACAGGCTTCA +>chr1.1_029362039|Alt_0002 +GGGAGCAATGTACTTTGCAATGTTATTCATCAGTTGGGATCTTAATAACTCAGCTAGAAAGTAAGGAACCTAATTTATCAC +>chr1.1_029362039|Ref_0001 +GGGAGCAATGTACTTTGCAATGTTATTCATCAGTTGGGATCTGAATAACTCAGCTAGAAAGTAAGGAACCTAATTTATCAC +>chr1.1_029584457|Alt_0002 +CAAATTAAAATACAAAGAATTAATCAGAGGTAATGACTTATACCAGTTTTTCTGAAATGTAGTTAGCAAGGAGTGATCCTC +>chr1.1_029584457|Ref_0001 +CAAATTAAAATACAAAGAATTAATCAGAGGTGATGACTTATACCAGTTTTTCTGAAATGTAGTTAGCAAGGAGTGATCCTC +>chr1.1_030458048|Alt_0002 +GAAACCAATACCACAGTCTGCTAGGCGCTTCCTGTTTTGTTGCCAATTGCGGTCACAACGCCAACAATCGTCAATAGGGTT +>chr1.1_030458048|RefMatch_0001 +GAAACCAATACCACAGTCTGCCAGACGCTTCCTGTTTTGTTGCCAATTGCGGTCACAACGCCAACAATCGTCAATAGGGTT +>chr1.1_030458048|RefMatch_0002 +GAAACCAATACCACAGTCTGCTAGACGCTTCCTGTTTTGTTGCCAATTACGGTCACAACGCCAACAATCGTCAATAGGGTT +>chr1.1_030458048|RefMatch_0003 +GAAACCAATACCACAGTCTGCTAGACGCTTCCTGTTTTGTTGCCAATTGCAGTCACAACGCCAACAATCGTCAATAGGGTT +>chr1.1_030458048|Ref_0001 +GAAACCAATACCACAGTCTGCTAGACGCTTCCTGTTTTGTTGCCAATTGCGGTCACAACGCCAACAATCGTCAATAGGGTT +>chr1.1_030744456|Alt_0002 +CAAACGTGATTACTAGACCTAATGATATATATGCCTCCTGTTTCGTTAACATACAGTGCGTCTGATCAAGTCGTACGGAAT +>chr1.1_030744456|Ref_0001 +CAAACGTGATTACTAGACCTAATGATATATATGCCTCCTGTTTGGTTAACATACAGTGCGTCTGATCAAGTCGTACGGAAT +>chr1.1_030969636|AltMatch_0001 +GGTGAGTGCTCACAAAGCTCCTAGTCTTTTTACTTCTCCATCCCTTTCTGAATATTGTTCTTATTCTTGTATTTGCTTGTA +>chr1.1_030969636|Alt_0002 +GGTGAGTGCTCACAAAGCTCCTAGTCTTTTCAGTTCTCCATCCCTTTCTGAATATTGTTCTTATTCTTGTATTTGCTTGTA +>chr1.1_030969636|RefMatch_0001 +GGTGAGTGCTCACAAAGCTCCTAGTCTCTTCAGTTCTCCATCCTTTTCTGAATATTGTTCTTATTCTTGTATTTGCTTGTA +>chr1.1_030969636|RefMatch_0002 +GGTGAGTGCTCACAAAGCTCCTAGTCTCTTTACTTCTCCATCCCTTTCTGAATATTGTTCTTATTCTTGTATTTGCTTGTA +>chr1.1_030969636|Ref_0001 +GGTGAGTGCTCACAAAGCTCCTAGTCTCTTCAGTTCTCCATCCCTTTCTGAATATTGTTCTTATTCTTGTATTTGCTTGTA +>chr1.1_031180522|Alt_0002 +GGGGGACCAATATCTGATACCTGTCTTATTTTGGCTCTAGAATTGAAAACAGAACATTTTTACCATGATTTTCTTTTATAT +>chr1.1_031180522|Ref_0001 +GGGGGACCAATATCTGATACCTGTCTTATTTTGGCTCTATAATTGAAAACAGAACATTTTTACCATGATTTTCTTTTATAT +>chr1.1_031360007|Alt_0002 +CAATTTTACAAGATATAAATTCTGCTTTTATGTTTCAGTGTTATGTAATTTCTGTGGTGTAATAGAAGTATCAATCTCAAA +>chr1.1_031360007|Ref_0001 +CAATTTTACAAGATATAAATTCTGCTTTTATGTTTCAGTGTTATGTAATTTCTATGGTGTAATAGAAGTATCAATCTCAAA +>chr1.1_031442927|Alt_0002 +TAATATCAACAGCATTCTTTGCAGCAACTGTCTCCTCGGCATGAATGGTAATTGCCCCTTGATTTATTTGACTACTATTCC +>chr1.1_031442927|RefMatch_0001 +TAATATCAACAGCATTCTTTGCAGCAACTGTCTCCTCAACATGAATGGTAATTGCCCCTTGATTTATTTGACTACTATTCC +>chr1.1_031442927|Ref_0001 +TAATATCAACAGCATTCTTTGCAGCAACTGTCTCCTCAGCATGAATGGTAATTGCCCCTTGATTTATTTGACTACTATTCC +>chr1.1_031546487|Alt_0002 +AATGAATTACTATGACTATAATACTAAGTCCGATCCAATCATGTGTATGTTTTCTTCTTCTGGGTTATGAGAAGAATTAGT +>chr1.1_031546487|RefMatch_0001 +AATGAATTACTATGACTATAATACTAAGTCCTATCCAATCATGTTTATGTTTTCTTCTTCTGGGTTATGAGAAGAATTAGT +>chr1.1_031546487|Ref_0001 +AATGAATTACTATGACTATAATACTAAGTCCTATCCAATCATGTGTATGTTTTCTTCTTCTGGGTTATGAGAAGAATTAGT +>chr1.1_031816459|AltMatch_0001 +GGGACAAATTTGAGAATGGACAAGGAGCATTGGTATTCTTTTGGAATATGCATCAATTTGTCTTATAATGAATGTTTGACC +>chr1.1_031816459|Alt_0002 +GGGACAAATTTGAGAATGGACAAGGAGCATTGGTACTCTTTTGGAATATGCATCAATTTGTCTTATAATGAATGTTTGACC +>chr1.1_031816459|RefMatch_0001 +GGGACAAATTTGAGAATGGACAAGGAGCATTGATGCTCCTTTGGAATATGCATCAATTTGTCTTATAATGAATGTTTGACC +>chr1.1_031816459|RefMatch_0002 +GGGACAAATTTGAGAATGGACAAGGAGCATTGATGCTCTTTTGGAATATGCATCAATTTGTCTTATAATGAATGTTTGACC +>chr1.1_031816459|RefMatch_0003 +GGGACAAATTTGAGAATGGACAAGGAGCATTGGTGCTCTTTTAGAATATGCATCAATTTGTCTTATAATGAATGTTTGACC +>chr1.1_031816459|Ref_0001 +GGGACAAATTTGAGAATGGACAAGGAGCATTGGTGCTCTTTTGGAATATGCATCAATTTGTCTTATAATGAATGTTTGACC +>chr1.1_031907509|Alt_0002 +GAGCTGTGAGTTTCTCTCGAACATACAATTCTTTTGTTTTATTAAAGTCGTTTTATGTTAACTGTACCATATCTCTGGTAG +>chr1.1_031907509|Ref_0001 +GAGCTGTGAGTTTCTCTCGAACATACAATTCTTTTGTTTTATTAAAGTCATTTTATGTTAACTGTACCATATCTCTGGTAG +>chr1.1_032107042|AltMatch_0001 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAAAGTCAATAACACCATTTCGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|AltMatch_0002 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAAAGTCGATAACACCATTTCAGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|AltMatch_0003 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAAAGTCGATAACACCATTTCGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|AltMatch_0004 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAAAGTCGATAACACCATTTTGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|Alt_0002 +TTGTTTAAAGCCCTTAAGGTTGGGACTTTGAGTAAAGTCGATAACACCATTTTGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|RefMatch_0001 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGCAATGTCGATAACACCATTTCGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|RefMatch_0002 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAATGCCGATAACACCATTTCCGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|RefMatch_0003 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAATGTCGATAACACCATTTAGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|RefMatch_0004 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAATGTCGATAACACCATTTCCGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|RefMatch_0005 +TTGTTTAAAGCCCTTAAGGTTGGGACTTGGAGTAATGTCGATAACACCATTTCGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|RefMatch_0006 +TTGTTTAAAGCCCTTAAGGTTGGGACTTTGAGTAATGTCGATAACACCATTTCGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032107042|Ref_0001 +TTGTTTAAAGCCCTTAAGGTTGGGACTTTGAGTAATGTCGATAACACCATTTTGGTTGATTCATTTTGTCACAAACCATTC +>chr1.1_032180745|AltMatch_0001 +TCTTGAGTTCTTGTCCTTCTTCTCATTCTTCTCTTTCGTTTTTGTCAAAAACTACTATTTCAAATGGATCATTTTTCTTCT +>chr1.1_032180745|Alt_0002 +TCTTGAGTTCTTGTCCTTCTTCTCATTCTTCTCTTTCGTTTTTGTCGAAAACTACTATTTCAAATGGATCATTTTTCTTCT +>chr1.1_032180745|RefMatch_0001 +TCTTGAGTTCTTGTCCTTCTTCTCATTCTTCTCTTTTGTTTTTGTCAAAAACTACTATTTCAAATGGATCATTTTTCTTCT +>chr1.1_032180745|RefMatch_0002 +TCTTGAGTTCTTGTCCTTCTTCTCATTCTTCTCTTTTGTTTTTGTCAAAACTACTATTTCAAATGGATCATTTTTCTTCTA +>chr1.1_032180745|RefMatch_0003 +TCTTGAGTTCTTGTCCTTCTTCTCATTCTTCTCTTTTGTTTTTGTTAAAAACTACTATTTCAAATGGATCATTTTTCTTCT +>chr1.1_032180745|Ref_0001 +TCTTGAGTTCTTGTCCTTCTTCTCATTCTTCTCTTTTGTTTTTGTCGAAAACTACTATTTCAAATGGATCATTTTTCTTCT +>chr1.1_032924623|AltMatch_0001 +TGGTATTGATATCATTGTGTTGGAGAGCATTATTCATCATTGATTGTGGAATGGTTGCTGTTATTATTGATAATAGCATGG +>chr1.1_032924623|Alt_0002 +TGGTATTGATATCATTGTGTTGGAGAGCATTATTCATCATTGGTTGTGGAATGGTTGCTGTTATTATTGATAATAGCATGG +>chr1.1_032924623|RefMatch_0001 +TGGTATTGATATCATTGTGTTGGAGAGCATTATTCATCATTGATTGTGGAGTGGTTGCTGTTATTATTGATAATAGCATGG +>chr1.1_032924623|RefMatch_0002 +TGGTATTGATATCATTGTGTTGGAGAGCATTATTCATCATTGGTTGTGGAGTGTTTGCTGTTATTATTGATAATAGCATGG +>chr1.1_032924623|Ref_0001 +TGGTATTGATATCATTGTGTTGGAGAGCATTATTCATCATTGGTTGTGGAGTGGTTGCTGTTATTATTGATAATAGCATGG +>chr1.1_033171704|Alt_0002 +GGTGCTTCTATTTTGGAGTTCTTAAAAAGTTTTCTTTCACATGAAGATCCAAATATGCGTGCAAAAGCTTGCAGTGCTCTT +>chr1.1_033171704|RefMatch_0001 +GGTGCTTCTATTTTGGAGTTCTTAAAAAGTTTTCTTTTGCATGAAGATCCAAATATGCGTGCAAAAGCTTGCAGTGCTCTT +>chr1.1_033171704|Ref_0001 +GGTGCTTCTATTTTGGAGTTCTTAAAAAGTTTTCTTTCGCATGAAGATCCAAATATGCGTGCAAAAGCTTGCAGTGCTCTT +>chr1.1_034199458|Alt_0002 +CCTCATTAGAATCCTTTAGAAAGGAAAACATATTTTTTGCTGCAGGAGTTTTCTCCAATATACTGTTTTAACAAAACCAAT +>chr1.1_034199458|RefMatch_0001 +CCTCATTAGAATCCTTTAGAAAGGAAAACATATTTTTTGTTGCAGGTGTTTTCTCCAATATACTGTTTTAACAAAACCAAT +>chr1.1_034199458|Ref_0001 +CCTCATTAGAATCCTTTAGAAAGGAAAACATATTTTTTGCTGCAGGTGTTTTCTCCAATATACTGTTTTAACAAAACCAAT +>chr1.1_034284365|AltMatch_0001 +CCAATTTCCTTTTTTGGATTTGTGTAAATTCACTTTTCATATATTTATTGTGCTACACACCAGCTCTGGAGGAGATACTGT +>chr1.1_034284365|Alt_0002 +CCAATTTCCTTTTTTGGATTTGTGTAAATTCACTTTTCATATATATATTGTGCTACACACCAGCTCTGGAGGAGATACTGT +>chr1.1_034284365|RefMatch_0001 +CCAATTTCCTTTTTTGGATTTGTGTAAATACACTTTTCATATATATATTGTTCTACACACCAGCTCTGGAGGAGATACTGT +>chr1.1_034284365|RefMatch_0002 +CCAATTTCCTTTTTTGGATTTGTGTAAATTCACTTTTCATATATTTATTGTTCTACACACCAGCTCTGGAGGAGATACTGT +>chr1.1_034284365|Ref_0001 +CCAATTTCCTTTTTTGGATTTGTGTAAATTCACTTTTCATATATATATTGTTCTACACACCAGCTCTGGAGGAGATACTGT +>chr1.1_034579746|Alt_0002 +ACCAACTGGTAGAAAAAGGGTGACCTACCATGGACCAGCAATGGCCTTCCATTCAATGGTAATGAAGACATTCATAAAACA +>chr1.1_034579746|RefMatch_0001 +ACCAACTGGTAGAAAAAGGGTGACCTACCATGGGCCTGCAATGGCCTTCCATTCAATGGTAATGAAGACATTCATAAAACA +>chr1.1_034579746|Ref_0001 +ACCAACTGGTAGAAAAAGGGTGACCTACCATGGACCTGCAATGGCCTTCCATTCAATGGTAATGAAGACATTCATAAAACA +>chr1.1_034663420|Alt_0002 +CAATGACGATTCCCTCTCCCTTTCCTAGTATTGGTTAAAATGAATGTGATTGATGGTAACAAACAAACACATAATAAATTG +>chr1.1_034663420|Ref_0001 +CAATGACGATTCCCTCTCCCTTTCCTAGTATTGGTTAAAATGAATGTCATTGATGGTAACAAACAAACACATAATAAATTG +>chr1.1_034929778|AltMatch_0001 +GGAGGGGAAAGGTAGAAGCCTGAGGGGCACGGTTGTTTATTACGATGGCCAGATGAATGATGCACGACTTAATGTTGGGTT +>chr1.1_034929778|Alt_0002 +GGAGGGGAAAGGTAGAAGCTTGAGGGGCACGGTTGTTTATTACGATGGCCAGATGAATGATGCACGACTTAATGTTGGGTT +>chr1.1_034929778|Ref_0001 +GGAGGGGAAAGGTAGAAGCTTGAGGGGCACAGTTGTTTATTACGATGGCCAGATGAATGATGCACGACTTAATGTTGGGTT +>chr1.1_035085053|Alt_0002 +ACCATCCCTGGTAACATGAATAGATATCTTTTTCTTCTCTTTGTCCAAATCTAAGAATTGCAGAGATTGCTGATAGTCACT +>chr1.1_035085053|Ref_0001 +ACCATCCCTGGTAACATGAATAGAAATCTTTTTCTTCTCTTTGTCCAAATCTAAGAATTGCAGAGATTGCTGATAGTCACT +>chr1.1_035253375|Alt_0002 +ACTCCAAAATTAACTCCCAAGCACCTAACCTCACTTAACGTTCTTAAATCCTTCACCAACCTATCGACTTTATAACATCCA +>chr1.1_035253375|RefMatch_0001 +ACTCCAAAATTAACTCCCAAGCACCTAACCTCACCTCACGTTCTTAATTCCTTCACCAACCTATCGACTTTATAACATCCA +>chr1.1_035253375|RefMatch_0002 +ACTCCAAAATTAACTCCCAAGCACCTAACCTCACTACACGTTCTTAAATCCTTCACCAACCTATCGACTTTATAACATCCA +>chr1.1_035253375|RefMatch_0003 +ACTCCAAAATTAACTCCCAAGCACCTAACCTCACTCCACGTTCTTAAATCCTTCACCAACCTATCGACTTTATAACATCCA +>chr1.1_035253375|RefMatch_0004 +ACTCCAAAATTAACTCCCAAGCACCTAACCTCACTTCACATTCTTAAATCCTTTACCAACCTATCGACTTTATAACATCCA +>chr1.1_035253375|Ref_0001 +ACTCCAAAATTAACTCCCAAGCACCTAACCTCACTTCACGTTCTTAAATCCTTCACCAACCTATCGACTTTATAACATCCA +>chr1.1_035517292|Alt_0002 +GGGTCACTGTATTAATCGATAGGTGTATCTTTAATGTAAATGGGAAAATCTTGGAGGATTTGGTCTTGGCTGCCGAAGCAG +>chr1.1_035517292|Ref_0001 +GGGTCACTGTATTAATCGATAGGTGTATCTTTAATGGAAATGGGAAAATCTTGGAGGATTTGGTCTTGGCTGCCGAAGCAG +>chr1.1_035606739|Alt_0002 +GTCCCGAATCTGAAACTGCAGTGAAGTATTGAATAATTTTTAGCCAAGAATAAATAATGGGTGTACTTGTATATATTTATT +>chr1.1_035606739|RefMatch_0001 +GTCCCGAATCTGAAACTGCAGTGAAGTATTGAATGATTTTTAGCCAACAATAAATAATGGGTGTACTTGTATATATTTATT +>chr1.1_035606739|Ref_0001 +GTCCCGAATCTGAAACTGCAGTGAAGTATTGAATAATTTTTAGCCAACAATAAATAATGGGTGTACTTGTATATATTTATT +>chr1.1_035758205|Alt_0002 +CTCATCTCAAGATCTTTTGTATTCTTAATCTTAGCTTCTTGAATGACTCTCTGTTGCTTCTGTTCTTCCTTAAGCTCTGCC +>chr1.1_035758205|RefMatch_0001 +CTCATCTCAAGATCTTTTGTATTCTTAATCCGAGCTTCTTGAATGACTCTCTGTTGCTTCTGTTCTTCCTTAAGCTCTGCC +>chr1.1_035758205|RefMatch_0002 +CTCATCTCAAGATCTTTTGTATTCTTAATCTGAGCTTCTTGAATGACTCTATGTTGCTTCTGTTCTTCCTTAAGCTCTGCC +>chr1.1_035758205|RefMatch_0003 +CTCATCTCAAGATCTTTTGTATTCTTAATCTGAGCTTCTTGAATGACTCTTTGTTGCTTCTGTTCTTCCTTAAGCTCTGCC +>chr1.1_035758205|Ref_0001 +CTCATCTCAAGATCTTTTGTATTCTTAATCTGAGCTTCTTGAATGACTCTCTGTTGCTTCTGTTCTTCCTTAAGCTCTGCC +>chr1.1_035847212|Alt_0002 +TACATGCACGGGAATCAGATGAACAACTTTTCAGAAGATTCATACAGTCAACTCAATTTCATAAAAAGGATATGAAGAAAA +>chr1.1_035847212|RefMatch_0001 +TACATGCACGGGAATCAGATGAACAACTTTCCAGAAGATTCATACAGTCGACTCAATTTCATAAAAAGGATATGAAGAAAA +>chr1.1_035847212|Ref_0001 +TACATGCACGGGAATCAGATGAACAACTTTTCAGAAGATTCATACAGTCGACTCAATTTCATAAAAAGGATATGAAGAAAA +>chr1.1_036130046|Alt_0002 +GAAGTTTTAATTTCATTTGAGTTTATAGAATCTTCAAGAGGGTCTCTTTGCCTTTACATATGTTGTGAATATTCCTGTATT +>chr1.1_036130046|Ref_0001 +GAAGTTTTAATTTCATTTGAGTTTATAGAATCTTCGAGAGGGTCTCTTTGCCTTTACATATGTTGTGAATATTCCTGTATT +>chr1.1_036517797|Alt_0002 +GGACCGCCGTCCTAAATAATATTAATATGATATCATCTTAAGCATATCGGTCCTAGTTTTCCATTTGTTTTGACAATAACC +>chr1.1_036517797|Ref_0001 +GGACCGCCGTCCTAAATAATATTAATATGATATCATCTTAAGCATATGGGTCCTAGTTTTCCATTTGTTTTGACAATAACC \ No newline at end of file diff --git a/inst/imputation_ignore.txt b/inst/imputation_ignore.txt new file mode 100644 index 0000000..907502d --- /dev/null +++ b/inst/imputation_ignore.txt @@ -0,0 +1,4 @@ +SNP_ID +SNP1 +SNP3 +SNP14 diff --git a/inst/imputation_reference.txt b/inst/imputation_reference.txt new file mode 100644 index 0000000..13875f1 --- /dev/null +++ b/inst/imputation_reference.txt @@ -0,0 +1,21 @@ +ID SNP1 SNP2 SNP3 SNP4 SNP5 SNP6 SNP7 SNP8 SNP9 SNP10 SNP11 SNP12 SNP13 SNP14 SNP15 SNP16 SNP17 SNP18 SNP19 SNP20 SNP21 SNP22 SNP23 SNP24 SNP25 SNP26 SNP27 SNP28 SNP29 SNP30 +ID1 5 1 0 1 1 0 5 1 5 0 0 0 0 5 5 1 0 0 2 0 0 0 0 1 2 0 0 2 0 0 +ID2 1 2 5 2 0 2 5 0 0 1 2 1 5 5 2 0 2 0 0 2 0 5 1 1 2 2 2 5 1 0 +ID3 2 2 1 1 0 1 1 5 0 2 0 2 1 1 0 1 1 0 0 5 2 1 5 2 0 5 2 0 1 0 +ID4 5 0 0 5 5 0 1 0 0 5 0 5 1 0 2 0 1 5 0 5 0 0 2 0 1 5 0 2 5 1 +ID5 5 0 2 2 5 0 0 2 1 2 2 2 5 1 5 2 2 2 1 5 5 1 1 1 5 2 1 1 0 0 +ID6 5 5 1 5 5 5 2 1 0 0 5 0 2 2 1 1 0 2 2 1 1 1 5 5 1 0 0 5 1 0 +ID7 2 0 5 2 1 1 0 1 1 0 5 1 5 0 0 1 1 2 0 5 1 0 1 2 5 0 2 1 0 5 +ID8 1 2 0 2 5 2 5 5 0 2 0 2 0 5 1 2 2 5 5 0 0 1 1 0 0 5 2 0 0 2 +ID9 1 5 5 1 2 5 0 1 1 0 2 2 1 2 2 1 1 2 5 5 5 5 0 2 5 1 1 5 5 5 +ID10 2 1 2 0 1 0 1 2 1 2 2 0 5 1 1 1 1 5 5 1 5 1 0 2 5 2 1 1 1 0 +ID11 1 1 0 0 2 0 0 2 5 5 0 5 0 5 5 0 1 5 1 5 2 1 5 2 1 0 1 2 2 0 +ID12 5 0 1 1 2 1 2 5 2 1 2 5 0 5 2 1 5 5 5 1 0 5 0 0 1 2 2 5 5 1 +ID13 1 5 0 5 5 2 1 5 0 1 2 2 0 5 2 0 0 1 2 5 5 5 2 0 0 2 0 1 2 2 +ID14 5 0 0 1 0 5 1 0 0 1 1 2 1 5 2 5 2 1 5 0 0 0 1 0 2 0 5 0 5 0 +ID15 5 1 0 1 1 0 5 0 0 1 0 0 1 5 1 1 5 1 2 5 5 0 2 5 0 0 5 1 2 5 +ID16 0 1 5 1 2 2 0 5 1 1 5 1 0 0 2 2 5 1 1 1 2 0 2 2 5 2 5 0 5 2 +ID17 1 1 0 1 0 2 2 5 5 2 1 5 1 1 5 1 0 5 2 0 0 1 0 0 0 1 5 1 1 5 +ID18 1 1 2 2 2 2 0 5 5 1 0 0 2 0 1 0 5 2 2 2 2 0 5 5 2 5 5 0 5 1 +ID19 1 5 5 2 5 0 2 5 5 1 2 5 1 1 0 1 0 1 5 5 2 2 1 5 5 5 1 0 5 2 +ID20 2 5 1 1 2 5 2 1 0 0 0 1 5 5 2 1 2 5 2 5 1 5 5 0 5 0 5 5 1 1 \ No newline at end of file diff --git a/inst/imputation_test.txt b/inst/imputation_test.txt new file mode 100644 index 0000000..b137d31 --- /dev/null +++ b/inst/imputation_test.txt @@ -0,0 +1,29 @@ +ID SNP1 SNP2 SNP3 SNP4 SNP5 SNP6 SNP7 SNP8 SNP9 SNP10 SNP11 SNP12 SNP13 SNP14 SNP15 SNP16 SNP17 SNP18 SNP19 SNP20 SNP21 SNP22 SNP23 SNP24 SNP25 SNP26 SNP27 SNP28 SNP29 SNP30 SNP32 NSP42 +ID1 5 1 0 1 2 2 2 1 5 0 0 0 0 5 5 1 0 0 2 0 0 0 0 1 2 0 0 2 0 0 1 2 +ID2 1 2 5 2 0 2 5 1 0 5 2 1 5 5 2 0 2 0 0 2 0 5 1 1 2 2 2 5 1 0 5 5 +ID3 2 2 1 1 0 1 1 5 0 2 0 2 1 1 0 1 1 0 0 5 2 1 5 2 0 5 2 0 1 0 5 1 +ID4 5 0 0 5 5 0 1 0 0 5 0 5 1 0 2 0 1 5 0 5 0 0 2 0 1 5 0 2 5 1 5 0 +ID5 5 0 2 2 5 0 0 2 1 2 2 2 5 1 5 2 2 2 1 5 5 1 1 1 5 2 1 1 0 0 0 0 +ID6 5 5 1 5 5 5 2 1 0 2 2 2 2 2 2 2 2 2 2 1 1 1 5 5 1 0 0 5 1 0 5 5 +ID7 2 0 5 2 1 1 0 1 1 0 5 1 2 5 1 2 1 1 1 1 1 0 1 2 5 0 2 1 0 5 0 2 +ID8 1 2 0 2 5 2 5 5 0 2 0 2 0 5 1 2 2 5 5 0 0 1 1 0 0 5 2 0 0 2 5 5 +ID9 1 5 5 1 2 5 0 1 1 0 2 2 1 2 2 1 1 2 5 5 5 5 0 2 5 1 1 5 5 5 1 1 +ID10 2 1 2 0 1 0 0 2 1 2 2 0 5 1 1 1 1 5 5 1 5 1 0 2 5 2 1 1 1 0 1 0 +ID11 1 1 0 0 2 0 0 2 5 2 2 2 2 5 5 0 1 5 1 5 2 1 5 2 1 0 1 2 2 0 0 5 +ID12 5 0 1 1 2 1 2 5 2 1 2 5 0 1 2 2 0 5 5 1 0 5 0 0 1 2 2 5 5 1 2 5 +ID13 1 5 0 5 5 2 1 5 0 1 2 2 0 5 2 0 0 1 2 5 5 5 2 0 0 2 0 1 2 2 1 2 +ID14 5 0 0 1 0 5 1 0 0 1 1 2 1 5 2 5 2 1 5 0 0 0 1 0 2 0 5 0 5 0 0 0 +ID15 5 1 0 1 1 0 5 0 0 1 0 0 1 5 1 1 5 1 2 5 5 0 2 5 0 0 5 1 2 5 2 0 +ID16 0 1 5 1 2 2 0 5 1 1 5 1 0 0 2 2 5 1 1 1 2 0 2 2 5 2 5 0 5 2 2 1 +ID17 1 1 0 1 0 2 2 5 5 2 1 5 1 1 5 1 0 5 2 0 0 1 0 0 0 1 5 1 1 5 2 5 +ID18 1 1 2 2 2 2 0 5 5 1 0 0 2 2 2 2 5 2 2 2 2 0 5 5 2 5 5 0 5 1 2 5 +ID19 1 5 5 2 5 0 2 5 2 1 1 2 1 1 0 1 0 1 5 5 2 2 1 5 5 5 1 0 5 2 1 1 +ID20 2 5 1 1 2 5 2 1 0 0 0 1 5 5 2 1 2 5 2 5 1 5 5 0 5 0 5 5 1 1 1 1 +ID21 2 5 1 0 5 0 0 5 2 5 1 2 2 5 5 1 0 0 2 5 2 2 5 0 2 5 2 0 0 0 1 0 +ID22 0 2 1 5 5 1 1 2 2 0 1 1 5 5 0 1 2 5 1 0 2 2 1 1 1 2 1 1 0 5 1 1 +ID23 0 0 5 2 0 0 5 5 5 0 1 5 0 5 0 2 1 1 1 1 1 2 1 0 5 0 2 2 0 5 2 1 +ID24 0 5 0 5 2 5 0 1 2 5 1 5 1 2 2 2 2 2 2 5 5 5 0 1 1 0 1 0 2 5 0 5 +ID25 5 1 0 0 2 2 2 1 0 1 0 1 2 2 2 1 1 5 0 1 0 0 0 5 5 1 0 1 5 5 1 2 +ID26 2 0 5 0 2 0 1 0 2 1 1 5 2 5 1 0 1 5 5 1 1 2 1 1 0 2 5 5 5 0 1 1 +ID27 0 0 1 2 2 5 1 0 0 0 2 5 2 5 1 0 5 5 1 0 5 5 5 0 0 2 1 5 1 5 2 1 +ID28 5 0 2 1 0 5 1 2 2 5 1 2 0 2 2 5 0 5 2 1 5 0 5 5 5 1 1 2 0 0 1 5 \ No newline at end of file diff --git a/inst/ref_ids.txt b/inst/ref_ids.txt new file mode 100644 index 0000000..4009c04 --- /dev/null +++ b/inst/ref_ids.txt @@ -0,0 +1,6 @@ +VarA VarB +Reference1_A Reference1_B +Reference2_A Reference2_B +Reference3_A Reference3_B +Reference4_A Reference4_B +Reference5_A Reference5_B \ No newline at end of file diff --git a/inst/test_ref.txt b/inst/test_ref.txt new file mode 100644 index 0000000..bae0e00 --- /dev/null +++ b/inst/test_ref.txt @@ -0,0 +1,11 @@ +ID SNP1 SNP2 SNP3 SNP4 SNP5 SNP6 SNP7 SNP8 SNP9 SNP10 SNP11 SNP12 SNP13 SNP14 SNP15 SNP16 SNP17 SNP18 SNP19 SNP20 SNP21 SNP22 SNP23 SNP24 SNP25 SNP26 SNP27 SNP28 SNP29 SNP30 SNP31 SNP32 SNP33 SNP34 SNP35 SNP36 +Reference1_A 4 3 4 4 4 4 4 4 4 4 3 4 4 4 3 4 4 4 4 1 4 4 4 4 4 4 4 4 4 1 4 4 3 4 4 4 +Reference2_A 4 1 4 3 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Reference3_A 4 1 4 3 4 4 4 4 4 2 3 4 2 4 1 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Reference4_A 4 2 4 3 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Reference5_A 4 2 4 4 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Reference1_B 4 1 4 2 4 4 4 4 4 2 4 4 2 4 2 4 3 4 4 1 4 3 2 4 4 4 4 4 4 1 3 4 4 4 4 4 +Reference2_B 4 2 4 4 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Reference3_B 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Reference4_B 4 3 4 3 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Reference5_B 4 2 4 3 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 \ No newline at end of file diff --git a/inst/test_test.txt b/inst/test_test.txt new file mode 100644 index 0000000..073ffd6 --- /dev/null +++ b/inst/test_test.txt @@ -0,0 +1,176 @@ +ID SNP1 SNP2 SNP3 SNP4 SNP5 SNP6 SNP7 SNP8 SNP9 SNP10 SNP11 SNP12 SNP13 SNP14 SNP15 SNP16 SNP17 SNP18 SNP19 SNP20 SNP21 SNP22 SNP23 SNP24 SNP25 SNP26 SNP27 SNP28 SNP29 SNP30 SNP31 SNP32 SNP33 SNP34 SNP35 SNP36 +Test1 4 2 4 3 4 4 4 4 4 3 4 4 3 4 4 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test2 4 3 4 3 4 4 4 4 4 4 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test3 4 3 4 3 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test4 4 2 4 4 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test5 4 1 4 3 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test6 4 2 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test7 4 1 4 2 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test8 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test9 4 1 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test10 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test11 4 2 4 3 4 4 4 4 4 4 3 4 4 4 3 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test12 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test13 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test14 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 3 3 4 0 4 4 3 4 4 4 +Test15 4 4 4 4 4 4 3 4 4 2 4 4 2 4 2 4 3 4 4 1 4 3 2 4 4 4 4 3 4 1 4 4 2 4 4 4 +Test16 4 3 4 4 4 4 4 4 4 2 4 3 4 4 4 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 4 4 2 4 4 4 +Test17 4 2 4 4 4 4 3 4 4 4 3 4 3 3 1 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test18 4 3 4 3 4 4 4 4 4 3 4 4 4 4 4 4 2 4 4 0 4 2 4 4 4 4 4 4 4 1 3 4 2 4 4 4 +Test19 4 3 4 4 4 4 4 4 4 4 3 4 4 4 3 4 4 4 4 1 4 4 4 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test20 4 3 4 4 4 4 4 4 4 4 3 4 4 4 3 4 4 4 4 1 4 4 4 4 4 4 4 4 4 1 4 4 2 4 4 4 +Test21 4 3 3 3 4 4 3 4 4 3 4 4 2 4 1 4 4 4 4 1 4 4 1 4 4 4 4 3 4 0 4 4 4 4 4 4 +Test22 4 3 4 4 4 4 4 4 4 1 3 4 4 4 4 4 2 4 4 1 4 2 4 4 4 4 4 4 4 2 4 4 1 4 4 4 +Test23 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test24 4 1 4 2 4 4 4 4 4 2 4 4 2 4 3 4 3 4 4 1 4 3 2 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test25 4 0 4 0 4 4 4 4 4 0 4 4 0 3 1 4 4 4 4 0 4 4 1 4 4 4 4 4 3 0 4 4 3 3 4 4 +Test26 4 2 4 3 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test27 4 1 4 2 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test28 4 3 4 3 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test29 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 1 4 3 4 4 4 4 4 4 4 3 3 4 4 4 4 4 +Test30 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test31 4 2 4 3 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test32 4 1 4 3 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test33 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test34 4 1 4 2 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test35 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test36 4 2 4 4 4 4 4 4 4 3 3 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test37 4 2 4 2 4 4 4 4 4 3 4 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test38 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test39 4 3 4 3 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test40 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test41 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test42 4 2 4 2 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test43 4 3 4 3 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test44 4 2 4 3 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test45 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 1 4 3 2 4 4 4 4 4 4 0 3 4 2 4 4 4 +Test46 4 1 4 3 4 4 4 4 4 3 3 4 3 4 4 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test47 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 3 4 0 4 4 3 4 4 4 +Test48 4 2 4 3 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test49 4 2 4 3 4 4 4 4 4 4 3 4 4 4 3 4 3 4 4 1 4 3 4 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test50 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 3 3 4 0 4 4 3 4 4 4 +Test51 4 2 4 3 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test52 4 2 4 2 4 4 4 4 4 3 4 4 3 4 4 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test53 4 2 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test54 4 2 4 3 4 4 4 4 4 4 3 4 4 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test55 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 2 3 4 4 4 4 4 +Test56 4 2 4 3 4 4 4 4 4 2 4 4 2 4 3 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test57 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test58 4 2 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test59 4 3 4 4 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test60 4 2 4 3 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test61 4 2 4 4 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test62 4 1 4 2 4 4 4 4 4 3 3 4 3 4 4 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test63 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test64 4 1 4 2 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test65 4 2 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test66 4 1 4 2 4 4 4 4 4 4 3 4 4 4 4 4 2 4 4 1 4 2 4 4 4 4 4 4 4 3 3 4 4 4 4 4 +Test67 4 3 4 3 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test68 4 1 4 3 4 4 4 4 4 2 3 4 2 4 3 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 4 4 4 4 +Test69 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 4 4 4 4 +Test70 4 3 4 3 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test71 4 2 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test72 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test73 4 1 4 3 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test74 4 3 4 3 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test75 4 2 4 3 4 4 4 4 4 4 3 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 3 3 4 4 4 4 4 +Test76 4 1 4 3 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test77 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 4 4 4 4 +Test78 4 3 4 4 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test79 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test80 4 2 4 3 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test81 4 3 4 3 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test82 4 1 4 2 4 4 4 4 4 2 4 4 2 4 2 4 3 4 4 1 4 3 2 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test83 4 2 4 4 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test84 4 2 4 2 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test85 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test86 4 2 4 2 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test87 4 2 4 2 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test88 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test89 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test90 4 2 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test91 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 1 4 3 4 4 4 4 4 4 4 3 3 4 4 4 4 4 +Test92 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test93 4 2 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test94 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test95 4 3 4 3 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test96 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test97 4 2 4 3 4 4 4 4 4 4 3 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 3 3 4 4 4 4 4 +Test98 4 1 4 3 4 4 4 4 4 2 3 4 2 4 1 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test99 4 2 4 3 4 4 4 4 4 3 4 4 3 4 4 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test100 4 1 4 2 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test101 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 4 4 4 4 +Test102 4 2 4 2 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test103 4 3 4 4 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test104 4 2 4 3 4 4 4 4 4 2 4 4 2 4 3 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test105 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test106 4 2 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test107 4 3 4 3 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test108 4 1 4 2 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test109 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test110 4 2 4 4 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test111 4 2 4 3 4 4 4 4 4 4 3 4 4 4 4 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test112 4 2 4 2 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test113 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test114 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test115 4 2 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test116 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test117 4 3 4 4 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test118 4 1 4 3 4 4 4 4 4 2 3 4 2 4 3 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test119 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test120 4 1 4 4 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test121 4 2 4 2 4 4 4 4 4 3 4 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test122 4 1 4 3 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test123 4 2 4 4 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test124 4 3 4 3 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test125 4 2 4 3 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test126 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test127 4 2 4 3 4 4 4 4 4 3 3 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test128 4 2 4 3 4 4 4 4 4 3 4 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test129 4 3 4 3 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test130 4 3 4 3 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test131 4 2 4 2 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test132 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test133 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test134 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test135 4 2 4 4 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test136 4 2 4 2 4 4 4 4 4 3 4 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test137 4 1 4 3 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test138 4 1 4 3 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test139 4 1 4 3 4 4 4 4 4 2 3 4 2 4 3 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test140 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 1 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test141 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test142 4 2 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test143 4 2 4 3 4 4 4 4 4 3 4 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test144 4 1 4 2 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test145 4 2 4 4 4 4 4 4 4 3 3 4 3 4 2 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test146 4 2 4 3 4 4 4 4 4 3 3 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test147 4 1 4 3 4 4 4 4 4 2 2 4 2 4 2 4 4 4 4 0 4 4 3 4 4 4 3 3 4 2 4 4 4 4 4 4 +Test148 4 2 4 3 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test149 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 2 4 3 4 4 4 4 4 4 4 2 3 4 3 4 4 4 +Test150 4 2 4 3 4 4 4 4 4 4 3 4 4 4 3 4 3 4 4 1 4 3 4 4 4 4 4 4 4 3 3 4 4 4 4 4 +Test151 4 1 4 2 4 4 4 4 4 2 4 4 2 4 2 4 3 4 4 1 4 3 2 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test152 4 2 4 3 4 4 4 4 4 2 4 4 2 4 2 4 4 4 4 0 4 4 0 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test153 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 1 4 4 4 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test154 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 3 4 0 4 4 3 4 4 4 +Test155 4 1 4 2 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test156 4 1 4 3 4 4 4 4 4 2 3 4 2 4 1 4 4 4 4 1 4 4 3 4 4 4 4 4 4 2 4 4 4 4 4 4 +Test157 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test158 4 1 4 2 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test159 4 3 4 3 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 2 4 4 3 4 4 4 +Test160 4 3 4 4 4 4 4 4 4 3 4 4 3 4 2 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test161 4 1 4 3 4 4 4 4 4 3 3 4 3 4 4 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 4 4 4 4 +Test162 4 2 4 4 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test163 4 2 4 4 4 4 4 4 4 3 3 4 3 4 3 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test164 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 1 4 3 4 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test165 4 2 4 3 4 4 4 4 4 3 4 4 3 4 4 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test166 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 3 4 0 4 4 3 4 4 4 +Test167 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 4 4 4 0 4 4 3 4 4 4 +Test168 4 1 4 3 4 4 4 4 4 2 3 4 2 4 2 4 4 4 4 0 4 4 2 4 4 4 3 3 4 0 4 4 3 4 4 4 +Test169 4 2 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 1 3 4 3 4 4 4 +Test170 4 1 4 2 4 4 4 4 4 3 3 4 3 4 2 4 3 4 4 0 4 3 3 4 4 4 4 4 4 0 3 4 3 4 4 4 +Test171 4 1 4 3 4 4 4 4 4 2 3 4 2 4 1 4 4 4 4 1 4 4 3 4 4 4 4 4 4 1 4 4 4 4 4 4 +Test172 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 2 4 4 3 4 4 4 4 4 4 1 4 4 3 4 4 4 +Test173 4 2 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 4 0 4 3 2 4 4 4 4 4 4 1 4 4 2 4 4 4 +Test174 4 3 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 1 4 4 2 4 4 4 4 4 4 1 4 4 2 4 4 4 +Test175 4 1 4 2 4 4 4 4 4 3 3 4 3 4 3 4 3 4 4 1 4 3 3 4 4 4 4 4 4 2 3 4 4 4 4 4 \ No newline at end of file diff --git a/man/add_ref_alt.Rd b/man/add_ref_alt.Rd deleted file mode 100644 index c8a8fe6..0000000 --- a/man/add_ref_alt.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_OffTargets.R -\name{add_ref_alt} -\alias{add_ref_alt} -\title{Check if Ref_0001 and Alt_0002 tags are present, if not, add them from the hap_seq input. Function made for parallelization.} -\usage{ -add_ref_alt(one_tag, hap_seq, nsamples) -} -\arguments{ -\item{one_tag}{madc file split by tag} - -\item{hap_seq}{haplotype DB} - -\item{nsamples}{number of samples} -} -\description{ -Check if Ref_0001 and Alt_0002 tags are present, if not, add them from the hap_seq input. Function made for parallelization. -} diff --git a/man/allele_freq_poly.Rd b/man/allele_freq_poly.Rd new file mode 100644 index 0000000..6db4ee6 --- /dev/null +++ b/man/allele_freq_poly.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/breedtools_functions.R +\name{allele_freq_poly} +\alias{allele_freq_poly} +\title{Computes allele frequencies for specified populations given SNP array data} +\usage{ +allele_freq_poly(geno, populations, ploidy = 2) +} +\arguments{ +\item{geno}{matrix of genotypes coded as the dosage of allele B \code{{0, 1, 2, ..., ploidy}} +with individuals in rows (named) and SNPs in columns (named)} + +\item{populations}{list of named populations. Each population has a vector of IDs +that belong to the population. Allele frequencies will be derived from all animals} + +\item{ploidy}{integer indicating the ploidy level (default is 2 for diploid)} +} +\value{ +data.frame consisting of allele_frequencies for populations (columns) for +each SNP (rows) +} +\description{ +Computes allele frequencies for specified populations given SNP array data +} +\examples{ +# Example inputs +geno_matrix <- matrix( +c(4, 1, 4, 0, # S1 + 2, 2, 1, 3, # S2 + 0, 4, 0, 4, # S3 + 3, 3, 2, 2, # S4 + 1, 4, 2, 3),# S5 +nrow = 4, ncol = 5, byrow = FALSE, # individuals=rows, SNPs=cols +dimnames = list(paste0("Ind", 1:4), paste0("S", 1:5)) +) + +pop_list <- list( +PopA = c("Ind1", "Ind2"), +PopB = c("Ind3", "Ind4") +) + +allele_freqs <- allele_freq_poly(geno = geno_matrix, populations = pop_list, ploidy = 4) +print(allele_freqs) + +} +\references{ +Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific +breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44. +} diff --git a/man/check_homozygous_trios.Rd b/man/check_homozygous_trios.Rd new file mode 100644 index 0000000..865e806 --- /dev/null +++ b/man/check_homozygous_trios.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/relationship_qc.R +\name{check_homozygous_trios} +\alias{check_homozygous_trios} +\title{Check homozygous loci in trios} +\usage{ +check_homozygous_trios( + path.vcf, + ploidy = 4, + parents_candidates = NULL, + progeny_candidates = NULL, + verbose = TRUE +) +} +\arguments{ +\item{path.vcf}{A string specifying the path to the VCF file containing genotype data.} + +\item{ploidy}{An integer specifying the ploidy level of the samples. Default is 4.} + +\item{parents_candidates}{A character vector of parent sample names to be tested. Must be provided.} + +\item{progeny_candidates}{A character vector of progeny sample names to be tested. Must be provided.} + +\item{verbose}{A logical value indicating whether to print the number of combinations tested. Default is TRUE.} +} +\value{ +A data frame with the following columns: +\itemize{ +\item \code{parent1}: The name of the first parent in the pair. +\item \code{parent2}: The name of the second parent in the pair. +\item \code{progeny}: The name of the progeny sample. +\item \code{homoRef_x_homoRef_n}: Number of loci where both parents are homozygous reference. +\item \code{homoRef_x_homoRef_match}: Percentage of matching loci in the progeny for homozygous reference parents. +\item \code{homoAlt_x_homoAlt_n}: Number of loci where both parents are homozygous alternate. +\item \code{homoAlt_x_homoAlt_match}: Percentage of matching loci in the progeny for homozygous alternate parents. +\item \code{homoRef_x_homoAlt_n}: Number of loci where one parent is homozygous reference and the other is homozygous alternate. +\item \code{homoRef_x_homoAlt_match}: Percentage of matching loci in the progeny for mixed homozygous parents. +\item \code{homoalt_x_homoRef_n}: Number of loci where one parent is homozygous alternate and the other is homozygous reference. +\item \code{homoalt_x_homoRef_match}: Percentage of matching loci in the progeny for mixed homozygous parents (alternate-reference). +\item \code{missing}: The number of loci with missing genotype data in the comparison. +} +} +\description{ +This function analyzes homozygous loci segregation in trios (parents and progeny) using genotype data from a VCF file. +It calculates the percentage of homozygous loci in the progeny that match the expected segregation patterns based on the tested parents. +} +\details{ +This function is designed to validate the segregation of homozygous loci in trios, ensuring that the progeny genotypes align with the expected patterns based on the parental genotypes. It requires both parent and progeny candidates to be specified. The function validates the ploidy level and ensures that all specified samples are present in the VCF file. The results include detailed statistics for each combination of parents and progeny. Reciprocal comparisons (e.g., A vs. B and B vs. A) and self-comparisons (e.g., A vs. A) are removed to avoid redundancy. Missing genotype data is also accounted for and reported in the results. +} diff --git a/man/check_replicates.Rd b/man/check_replicates.Rd new file mode 100644 index 0000000..97037de --- /dev/null +++ b/man/check_replicates.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/relationship_qc.R +\name{check_replicates} +\alias{check_replicates} +\title{Compatibility between samples genotypes} +\usage{ +check_replicates(path.vcf, select_samples = NULL, verbose = TRUE) +} +\arguments{ +\item{path.vcf}{A string specifying the path to the VCF file containing genotype data.} + +\item{select_samples}{An optional character vector of sample names to be selected for comparison. If NULL (default), all samples in the VCF file are used.} + +\item{verbose}{A logical value indicating whether to print the number of combinations tested. Default is TRUE.} +} +\value{ +A data frame with four columns: +\itemize{ +\item \code{sample1}: The name of the first sample in the pair. +\item \code{sample2}: The name of the second sample in the pair. +\item \code{\%_matching_genotypes}: The percentage of compatible genotypes between the two samples. +\item \code{\%_missing_genotypes}: The percentage of missing genotypes in the comparison. +} +} +\description{ +This function checks the compatibility between sample genotypes in a VCF file by comparing all pairs of samples. +} +\details{ +The function removes reciprocal comparisons (e.g., A vs. B and B vs. A) and self-comparisons (e.g., A vs. A) to avoid redundancy. Compatibility is calculated as the percentage of matching genotypes between two samples, excluding missing values. The percentage of missing genotypes is also reported for each pair. +} diff --git a/man/compare.Rd b/man/compare.Rd deleted file mode 100644 index 929ada4..0000000 --- a/man/compare.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_OffTargets.R -\name{compare} -\alias{compare} -\title{Get SNP positions, reference and alternative alleles based on the reference -Align alternatives to reference and discard low score alignment tags -Discard tags if alternative in the target locus is N -Do the complement reverse if cloneID present in the botloci vector} -\usage{ -compare(one_tag, botloci) -} -\arguments{ -\item{one_tag}{madc file split by tag} - -\item{botloci}{file containing the target IDs that were designed in the bottom strand} -} -\description{ -Get SNP positions, reference and alternative alleles based on the reference -Align alternatives to reference and discard low score alignment tags -Discard tags if alternative in the target locus is N -Do the complement reverse if cloneID present in the botloci vector -} diff --git a/man/create_VCF_body.Rd b/man/create_VCF_body.Rd deleted file mode 100644 index f00315a..0000000 --- a/man/create_VCF_body.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_OffTargets.R -\name{create_VCF_body} -\alias{create_VCF_body} -\title{Creates VCF body from CSV generated by loop_though_dartag_report} -\usage{ -create_VCF_body( - csv, - rm_multiallelic_SNP = TRUE, - multiallelic_SNP_dp_thr = 2, - multiallelic_SNP_sample_thr = 10, - n.cores = 1, - verbose = TRUE -) -} -\arguments{ -\item{csv}{CSV file generated by loop_though_dartag_report} - -\item{rm_multiallelic_SNP}{logical. If TRUE, SNP with more than one alternative base will be removed. If FALSE, check \code{multiallelic_SNP_dp_thr} specs} - -\item{multiallelic_SNP_dp_thr}{numerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold \code{multiallelic_SNP_dp_thr} combined with minimum number of samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites.} - -\item{multiallelic_SNP_sample_thr}{numerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold \code{multiallelic_SNP_dp_thr} combined with minimum number of samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites.} - -\item{n.cores}{number of cores to be used in the parallelization} - -\item{verbose}{print metrics on the console} -} -\description{ -Creates VCF body from CSV generated by loop_though_dartag_report -} diff --git a/man/get_OffTargets.Rd b/man/get_OffTargets.Rd index 4b59548..96a071c 100644 --- a/man/get_OffTargets.Rd +++ b/man/get_OffTargets.Rd @@ -27,9 +27,13 @@ get_OffTargets( \item{rm_multiallelic_SNP}{logical. If TRUE, SNP with more than one alternative base will be removed. If FALSE, check \code{multiallelic_SNP_dp_thr} specs} -\item{multiallelic_SNP_dp_thr}{nnumerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold \code{multiallelic_SNP_dp_thr} combined with minimum number of samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites.} +\item{multiallelic_SNP_dp_thr}{nnumerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold \code{multiallelic_SNP_dp_thr} combined +with minimum number of samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic +aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites.} -\item{multiallelic_SNP_sample_thr}{numerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold combined with minimum number of samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites.} +\item{multiallelic_SNP_sample_thr}{numerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold combined with minimum number of +samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, +the marker is discarded. This is likely to happen to paralogous sites.} \item{out_vcf}{output VCF file name} diff --git a/man/get_ref_alt_hap_seq.Rd b/man/get_ref_alt_hap_seq.Rd deleted file mode 100644 index 3045797..0000000 --- a/man/get_ref_alt_hap_seq.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_OffTargets.R -\name{get_ref_alt_hap_seq} -\alias{get_ref_alt_hap_seq} -\title{Converts the fasta to a data.frame with first column the AlleleID and and second the AlleleSequence -The function will work even if the sequence is split in multiple lines} -\usage{ -get_ref_alt_hap_seq(hap_seq) -} -\arguments{ -\item{hap_seq}{haplotype db} -} -\description{ -Converts the fasta to a data.frame with first column the AlleleID and and second the AlleleSequence -The function will work even if the sequence is split in multiple lines -} diff --git a/man/imputation_concordance.Rd b/man/imputation_concordance.Rd index a8e2d76..a46b10c 100644 --- a/man/imputation_concordance.Rd +++ b/man/imputation_concordance.Rd @@ -9,25 +9,33 @@ imputation_concordance( imputed_genos, missing_code = NULL, snps_2_exclude = NULL, - output_df_name = "imputation_concordance" + verbose = FALSE ) } \arguments{ -\item{reference_genos}{Genotype data.frame with rows as samples and columns as markers. Dosage recommended.} +\item{reference_genos}{A data frame containing reference genotype data, with rows as samples and columns as markers. Dosage format (0, 1, 2) is recommended.} -\item{imputed_genos}{Genotype data.frame with rows as samples and columns as markers. Dosage recommended.} +\item{imputed_genos}{A data frame containing imputed genotype data, with rows as samples and columns as markers. Dosage format (0, 1, 2) is recommended.} -\item{missing_code}{Optional input to consider missing data to exclude in concordance calculation.} +\item{missing_code}{An optional value to specify missing data. If provided, loci with this value in either dataset will be excluded from the concordance calculation.} -\item{snps_2_exclude}{Optional input to exclude specific markers from concordance calculation. Single column of marker ids.} +\item{snps_2_exclude}{An optional vector of marker IDs to exclude from the concordance calculation.} -\item{output_df_name}{Optional input to assign the output dataframe to a specific variable name. Default is "imputation_concordance"} +\item{verbose}{A logical value indicating whether to print a summary of the concordance results. Default is FALSE.} } \value{ -2 outputs: 1) A data frame with sample IDs and concordance percentages. 2) A summary of concordance percentages. +A list with two elements: +\itemize{ +\item \code{result_df}: A data frame with sample IDs and their concordance percentages. +\item \code{summary_concordance}: A summary of concordance percentages, including minimum, maximum, mean, and quartiles. +} } \description{ -This calculates the concordance between imputed and reference genotypes. It assumes that samples are rows and markers are columns. -It is recommended to use allele dosages (0,1,2) but will work with other formats. Missing data in reference or imputed genotypes -will not be considered for concordance if argument missing_code used. If a specific subset of markers should it can be provided as argument snps_2_exclude. +This function calculates the concordance between imputed and reference genotypes. It assumes that samples are rows and markers are columns. +It is recommended to use allele dosages (0, 1, 2) but will work with other formats. Missing data in reference or imputed genotypes +will not be considered for concordance if the \code{missing_code} argument is used. If a specific subset of markers should be excluded, +it can be provided using the \code{snps_2_exclude} argument. +} +\details{ +The function identifies common samples and markers between the reference and imputed genotype datasets. It calculates the percentage of matching genotypes for each sample, excluding missing data and specified markers. The concordance is reported as a percentage for each sample, along with a summary of the overall concordance distribution. } diff --git a/man/loop_though_dartag_report.Rd b/man/loop_though_dartag_report.Rd deleted file mode 100644 index ed7db3d..0000000 --- a/man/loop_though_dartag_report.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_OffTargets.R -\name{loop_though_dartag_report} -\alias{loop_though_dartag_report} -\title{Include SNP_position_in_Genome, Ref, and Alt information} -\usage{ -loop_though_dartag_report( - report, - botloci, - hap_seq, - n.cores = 1, - verbose = TRUE -) -} -\arguments{ -\item{report}{MADC file} - -\item{botloci}{file containing the target IDs that were designed in the bottom strand} - -\item{hap_seq}{haplotype DB fasta file} - -\item{n.cores}{number of cores to be used in the parallelization} - -\item{verbose}{print metrics on the console} -} -\description{ -Include SNP_position_in_Genome, Ref, and Alt information -} diff --git a/man/merge_MADCs.Rd b/man/merge_MADCs.Rd index cee847e..d88e045 100644 --- a/man/merge_MADCs.Rd +++ b/man/merge_MADCs.Rd @@ -24,3 +24,41 @@ they are used as suffix, if not, files will be identified from 1 to number of files, considering the order that was defined in the function. } +\examples{ +# First generating example MADC files +temp_dir <- tempdir() +file1_path <- file.path(temp_dir, "madc1.csv") +file2_path <- file.path(temp_dir, "madc2.csv") +out_path <- file.path(temp_dir, "merged_madc.csv") + +# Data for file 1: Has SampleA and SampleB +df1 <- data.frame( + AlleleID = c("chr1.1_0001|Alt_0002", "chr1.1_0001|Ref_0001", "chr1.1_0001|AltMatch_0001"), + CloneID = c("chr1.1_0001", "chr1.1_0001", "chr1.1_0001"), + AlleleSequence = c("GGG", "AAA", "TTT"), + SampleA = c(10, 8, 0), + SampleB = c(5, 4, 9), + stringsAsFactors = FALSE, + check.names = FALSE +) +write.csv(df1, file1_path, row.names = FALSE, quote = FALSE) + +# Data for file 2: Has SampleA (duplicate name) and SampleC, different rows +df2 <- data.frame( + AlleleID = c("chr1.1_0001|Alt_0002", "chr1.1_0001|Ref_0001", "chr1.1_0001|AltMatch_0001"), + CloneID = c("chr1.1_0001", "chr1.1_0001", "chr1.1_0001"), + AlleleSequence = c("GGG", "AAA", "TTT"), + SampleA = c(11, 7, 20), + SampleC = c(1, 2, 6), + stringsAsFactors = FALSE, + check.names = FALSE +) +write.csv(df2, file2_path, row.names = FALSE, quote = FALSE) + +# 2. Run the merge function +# Use default suffixes (.x, .y) for the duplicated "SampleA" +merge_MADCs(madc_list = list(file1_path, file2_path), + out_madc = out_path) + + +} diff --git a/man/merge_counts.Rd b/man/merge_counts.Rd deleted file mode 100644 index 3b27281..0000000 --- a/man/merge_counts.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_OffTargets.R -\name{merge_counts} -\alias{merge_counts} -\title{Function made for parallelization of create_VCF_body function} -\usage{ -merge_counts( - cloneID_unit, - rm_multiallelic_SNP = FALSE, - multiallelic_SNP_dp_thr = 0, - multiallelic_SNP_sample_thr = 0 -) -} -\arguments{ -\item{cloneID_unit}{one item of csv file split by cloneID} - -\item{rm_multiallelic_SNP}{logical. If TRUE, SNP with more than one alternative base will be removed. If FALSE, check \code{multiallelic_SNP_dp_thr} specs} - -\item{multiallelic_SNP_dp_thr}{numerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold combined with minimum number of samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites.} - -\item{multiallelic_SNP_sample_thr}{numerical. If \code{rm_multiallelic_SNP} is FALSE, set a minimum depth by tag threshold \code{multiallelic_SNP_dp_thr} combined with minimum number of samples \code{multiallelic_SNP_sample_thr} to eliminate low frequency SNP allele. If the threshold does not eliminate the multiallelic aspect of the marker, the marker is discarded. This is likely to happen to paralogous sites.} -} -\description{ -Function made for parallelization of create_VCF_body function -} diff --git a/man/solve_composition_poly.Rd b/man/solve_composition_poly.Rd new file mode 100644 index 0000000..d95645f --- /dev/null +++ b/man/solve_composition_poly.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/breedtools_functions.R +\name{solve_composition_poly} +\alias{solve_composition_poly} +\title{Compute genome-wide breed composition} +\usage{ +solve_composition_poly( + Y, + X, + ped = NULL, + groups = NULL, + mia = FALSE, + sire = FALSE, + dam = FALSE, + ploidy = 2 +) +} +\arguments{ +\item{Y}{numeric matrix of genotypes (columns) from all animals (rows) in population +coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}} + +\item{X}{numeric matrix of allele frequencies (rows) from each reference panel (columns). Frequencies are +relative to allele B.} + +\item{ped}{data.frame giving pedigree information. Must be formatted "ID", "Sire", "Dam"} + +\item{groups}{list of IDs categorized by breed/population. If specified, output will be a list +of results categorized by breed/population.} + +\item{mia}{logical. Only applies if ped argument is supplied. If true, returns a data.frame +containing the inferred maternally inherited allele for each locus for each animal instead +of breed composition results.} + +\item{sire}{logical. Only applies if ped argument is supplied. If true, returns a data.frame +containing sire genotypes for each locus for each animal instead of breed composition results.} + +\item{dam}{logical. Only applies if ped argument is supplied. If true, returns a data.frame +containing dam genotypes for each locus for each animal instead of breed composition results.} + +\item{ploidy}{integer. The ploidy level of the species (e.g., 2 for diploid, 3 for triploid, etc.).} +} +\value{ +A data.frame or list of data.frames (if groups is !NULL) with breed/ancestry composition +results +} +\description{ +Computes genome-wide breed/ancestry composition using quadratic programming on a +batch of animals. +} +\examples{ +# Example inputs for solve_composition_poly (ploidy = 4) + +# (This would typically be the output from allele_freq_poly) +allele_freqs_matrix <- matrix( + c(0.625, 0.500, + 0.500, 0.500, + 0.500, 0.500, + 0.750, 0.500, + 0.625, 0.625), + nrow = 5, ncol = 2, byrow = TRUE, + dimnames = list(paste0("SNP", 1:5), c("VarA", "VarB")) +) + +# Validation Genotypes (individuals x SNPs) +val_geno_matrix <- matrix( + c(2, 1, 2, 3, 4, # Test1 dosages for SNP1-5 + 3, 4, 2, 3, 0), # Test2 dosages for SNP1-5 + nrow = 2, ncol = 5, byrow = TRUE, + dimnames = list(paste0("Test", 1:2), paste0("SNP", 1:5)) +) + +# Calculate Breed Composition +composition <- solve_composition_poly(Y = val_geno_matrix, + X = allele_freqs_matrix, + ploidy = 4) +print(composition) + +} +\references{ +Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific +breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44. +} diff --git a/package-code.Rmd b/package-code.Rmd deleted file mode 100644 index d3c9bbd..0000000 --- a/package-code.Rmd +++ /dev/null @@ -1,104 +0,0 @@ ---- -title: "R Notebook" -output: html_notebook ---- - -This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. - -Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*. - -```{r} -#Tutorial for making R package using devtools - -#https://tinyheero.github.io/jekyll/update/2015/07/26/making-your-first-R-package.html -#https://r-pkgs.org/description.html -``` - - -```{r} -library(roxygen2) -library(devtools) -``` - -```{r} -#Create framework of the R package -#devtools::create("BIGr") - -#Will need to edit the DESCRIPTION file with the correct information -``` - -```{r} -##Function to extract read count data from MADC file -``` - -```{r} -#Functions are added either to individual .R files, or lumped together in .R files with common theme (such as a diversity .R file with all genomic diversity analyses) -``` - -```{r} -#Testing the R functions -#https://r-pkgs.org/testing-basics.html -devtools::load_all() -``` - - -```{r} -setwd("/Users/ams866/Library/CloudStorage/Box-Box/AS_Projects/Pipelines/R_packge/BIGr") -#Each time you add new documentation to your R function, you need to run devtools::document() again to re-generate the .Rd files. -devtools::document() -``` -```{r} -#Making the citation file -usethis::use_citation() -``` - - -```{r} -#Make a vignette(s) to provide examples for using the package - -usethat::use_vignette("introduction") -``` - -```{r} -#Make the pdf manual -setwd("/Users/ams866/Library/CloudStorage/Box-Box/AS_Projects/Pipelines/R_packge/BIGr") -devtools::build_manual() -``` - -```{r} -#Adding needed packages to description -usethis::use_package("dplyr") # Default is "Imports" -usethis::use_package("foreach") -usethis::use_package("doParallel") -#usethis::use_package("kinship2", min_version = "1.9.6.1") - -#If wanting to add specific minimum versions -# exact version -usethis::use_package("Rdpack", min_version = "0.7") - -# min version = currently installed version -usethis::use_package("tidyr", min_version = TRUE) -usethis::use_package("readr", min_version = TRUE) -``` - - -```{r} -#Test install -devtools::install_github("alex-sandercock/BIGr") -``` - -```{r} -#library(BIGr) - -dosage2vcf(dart.report = "/Users/ams866/Library/CloudStorage/Box-Box/AS_Projects/Strawberry/Report_DSt21-8501/DSt23-8501_Allele_Dose_Report.csv", - dart.counts = "/Users/ams866/Library/CloudStorage/Box-Box/AS_Projects/Strawberry/Report_DSt21-8501/Report_DSt23-8501_Counts.csv", - ploidy=2, - output.file = "Test") -``` -```{r} -install.packages("devtools") #If not already installed -library(devtools) -devtools::install_github("Breeding-Insight/BIGr") -library("BIGr") -``` - diff --git a/tests/testthat/iris_updog.RData b/tests/testthat/iris_updog.RData new file mode 100644 index 0000000..1d7024a Binary files /dev/null and b/tests/testthat/iris_updog.RData differ diff --git a/tests/testthat/test-breedtools_poly.R b/tests/testthat/test-breedtools_poly.R new file mode 100644 index 0000000..91ed356 --- /dev/null +++ b/tests/testthat/test-breedtools_poly.R @@ -0,0 +1,31 @@ +context("BreedTools") + + +test_that("test breedtools poly",{ + #Input variables + ref_file <- system.file("test_ref.txt", package="BIGr") + val_file <- system.file("test_test.txt", package="BIGr") + ref_ids <- system.file("ref_ids.txt", package="BIGr") + + #import files + reference = read.table(ref_file, header = T, row.names = 1, sep = "\t") + validation = read.table(val_file, header = T, row.names = 1, sep = "\t") + reference_ids = read.table(ref_ids, header = T, sep = "\t") + + #Calculations + ref_ids = lapply(as.list(reference_ids),as.character) + + freq = allele_freq_poly(reference, ref_ids, ploidy = 4) + + prediction = as.data.frame(solve_composition_poly(validation,freq, ploidy = 4)) + + #Check + freq_mean <- round(mean(as.numeric(freq)),6) + pred_mean <- round(mean(as.numeric(prediction$R2)),6) + + + expect_equal(freq_mean, 0.888889, tolerance = 0.01) + expect_equal(pred_mean, 0.841454, tolerance = 0.01) + expect_true(nrow(prediction) == 175) + +}) diff --git a/tests/testthat/test-check_ped.R b/tests/testthat/test-check_ped.R new file mode 100644 index 0000000..f0fac82 --- /dev/null +++ b/tests/testthat/test-check_ped.R @@ -0,0 +1,20 @@ +context("Imputation Concordance") + + +test_that("test imputation",{ + #Input variables + ped_file <- system.file("check_ped_test.txt", package="BIGr") + + #Calculations + output.list <- check_ped(ped_file, TRUE) + + #Check + df_length <- length(output.list) + messy_parents <- output.list$messy_parents + missing_parents <- output.list$missing_parents + + expect_true(df_length == 2) + expect_true(all(messy_parents$id == c("grandfather2","grandfather3"))) + expect_true(nrow(missing_parents) == 13) + +}) diff --git a/tests/testthat/test-filterVCF.R b/tests/testthat/test-filterVCF.R new file mode 100644 index 0000000..03567aa --- /dev/null +++ b/tests/testthat/test-filterVCF.R @@ -0,0 +1,74 @@ +context("Filtering") + +test_that("Filtering with Updog metrics",{ + + #Variables + filter_ploidy <- 2 + filter_maf <- 0.05 + size_depth <- 10 + snp_miss <- 50 + sample_miss <- 50 + OD_filter <- 0.05 + Bias <- c(0.5, 2) + Bias_min <- Bias[1] + Bias_max <- Bias[2] + Prop_mis <- 0.05 + maxpostprob_filter <- 0.5 + max_post <- maxpostprob_filter + output_name <- "out" + snp_miss <- snp_miss/100 + sample_miss <- sample_miss/100 + ploidy <- filter_ploidy + maf_filter <- filter_maf + + input <- filtering_files <- list() + input$updog_rdata$datapath <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGr") + + temp_file <- tempfile(fileext = ".vcf.gz") + + #Input file + vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE) + + # Identify if have updog parameters + format_fields <- unique(vcf@gt[,1]) + info_fields <- vcf@fix[1,8] + + updog_par <- grepl("MPP", format_fields) & grepl("PMC", info_fields) & grepl("BIAS", info_fields) + + #Starting SNPs + starting_snps <- nrow(vcf) + #export INFO dataframe + filtering_files$raw_vcf_df <- data.frame(vcf@fix) + + #Filtering + vcf <- filterVCF(vcf.file = vcf, + ploidy=ploidy, + output.file=NULL, + filter.OD = OD_filter, + filter.BIAS.min = Bias_min, + filter.BIAS.max = Bias_max, + filter.DP = as.numeric(size_depth), + filter.PMC = Prop_mis, + filter.SAMPLE.miss = as.numeric(sample_miss), + filter.SNP.miss = as.numeric(snp_miss), + filter.MAF = as.numeric(maf_filter), + filter.MPP = max_post) + + #Getting missing data information + #Add support for genotype matrix filtering? + gt_matrix <- extract.gt(vcf, element = "GT", as.numeric = FALSE) + filtering_files$snp_miss_df <- rowMeans(is.na(gt_matrix)) #SNP missing values + filtering_files$sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values + + expect_true(all(table(gt_matrix[,10]) == c(20,13,8))) + + rm(gt_matrix) #Remove gt matrix + + #Writing file + write.vcf(vcf, file = temp_file) + + #Get final_snps + final_snps <- nrow(vcf) + expect_equal(final_snps, 43) + +}) diff --git a/tests/testthat/test-get_OffTargets.R b/tests/testthat/test-get_OffTargets.R new file mode 100644 index 0000000..002d11d --- /dev/null +++ b/tests/testthat/test-get_OffTargets.R @@ -0,0 +1,46 @@ +context("Get OffTargets") + + +test_that("test madc offtargets",{ + #Input variables + madc_file <- system.file("example_MADC_FixedAlleleID.csv", package="BIGr") + bot_file <- system.file("example_SNPs_DArTag-probe-design_f180bp.botloci", package="BIGr") + db_file <- system.file("example_allele_db.fa", package="BIGr") + + #Calculations + temp <- tempfile(fileext = ".vcf") + temp_multi <- tempfile(fileext = ".vcf") + + set.seed(123) + get_OffTargets(madc = madc_file, + botloci = bot_file, + hap_seq = db_file, + n.cores = 2, + rm_multiallelic_SNP = FALSE, + multiallelic_SNP_dp_thr = 0, + multiallelic_SNP_sample_thr = 0, + out_vcf = temp, + verbose = FALSE) + + set.seed(456) + get_OffTargets(madc = madc_file, + botloci = bot_file, + hap_seq = db_file, + n.cores = 2, + rm_multiallelic_SNP = TRUE, + multiallelic_SNP_dp_thr = 0, + multiallelic_SNP_sample_thr = 0, + out_vcf = temp_multi, + verbose = FALSE) + + vcf <- read.vcfR(temp) + vcf_multi <- read.vcfR(temp_multi) + + #Check + expect_true(all(dim(vcf@gt) == c("33","11"))) + expect_true(all(dim(vcf_multi@gt) == c("32","11"))) + + rm(vcf) + rm(vcf_multi) + +}) diff --git a/tests/testthat/test-imputation_concordance.R b/tests/testthat/test-imputation_concordance.R new file mode 100644 index 0000000..f1fb421 --- /dev/null +++ b/tests/testthat/test-imputation_concordance.R @@ -0,0 +1,24 @@ +context("Imputation Concordance") + + +test_that("test imputation",{ + #Input variables + ignore_file <- system.file("imputation_ignore.txt", package="BIGr") + ref_file <- system.file("imputation_reference.txt", package="BIGr") + test_file <- system.file("imputation_test.txt", package="BIGr") + + #import files + snps = read.table(ignore_file, header = TRUE) + ref = read.table(ref_file, header = TRUE) + test = read.table(test_file, header = TRUE) + + #Calculations + result <- imputation_concordance(ref, test,snps_2_exclude = snps, missing_code =5, verbose = FALSE) + + #Check + result2 <- sum(as.numeric(gsub("%","",result$Concordance))) + + expect_equal(result2, 1910.51, tolerance = 0.01) + expect_true(nrow(result) == nrow(ref)) + +}) diff --git a/tests/testthat/test-merge_MADCs.R b/tests/testthat/test-merge_MADCs.R new file mode 100644 index 0000000..f4f41b7 --- /dev/null +++ b/tests/testthat/test-merge_MADCs.R @@ -0,0 +1,35 @@ +context("Merge MADCs") + + +test_that("test merge madc",{ + #Input variables + madc_file <- system.file("example_MADC_FixedAlleleID.csv", package="BIGr") + madc2_file <- system.file("example_MADC_to_merge.csv", package="BIGr") + + #Calculations + temp <- tempfile(fileext = ".csv") + temp2 <- tempfile(fileext = ".csv") + + merge_MADCs(madc_list = list(madc_file,madc2_file), + out_madc=temp, run_ids=NULL) + + merge_MADCs(madc_list = list(madc_file,madc_file), out_madc=temp2, run_ids=NULL) + + merged_madc <- data.frame(read_csv(temp)) + merged_madc2 <- data.frame(read_csv(temp2)) + + #Check + count_sum <- sum(as.matrix(merged_madc[,-c(1,2,3)])) + df_dim <- dim(merged_madc) + + + expect_true(all(df_dim == c("61","23"))) + expect_true(count_sum == 86845) + expect_error(merge_MADCs(madc_list = NULL,out_madc=temp, run_ids=NULL)) + expect_error(merge_MADCs(madc_list = list(madc_file,madc2_file), out_madc=NULL, run_ids=NULL)) + expect_error(merge_MADCs(madc_list = list(madc_file,madc2_file), out_madc=temp, run_ids="one")) + expect_true(all(merged_madc2[,4:13] == merged_madc2[,14:23])) + + rm(count_sum,merged_madc,merged_madc2,df_dim) + +}) diff --git a/tests/testthat/test-relationship_qc.R b/tests/testthat/test-relationship_qc.R new file mode 100644 index 0000000..d838b64 --- /dev/null +++ b/tests/testthat/test-relationship_qc.R @@ -0,0 +1,26 @@ +test_that("Checking replicates",{ + example_vcf <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGr") + + check_tab <- check_replicates(path.vcf = example_vcf, select_samples = NULL) + expect_equal(sum(check_tab$`%_matching_genotypes`), 799901) + + check_tab <- check_replicates(example_vcf, select_samples = paste0("Sample_",1:10)) + expect_equal(sum(check_tab$`%_matching_genotypes`), 3134.87, tolerance = 0.01) + +}) + +test_that("Checking homozygous segregation by trios",{ + + example_vcf <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGr") + + parents_candidates <- paste0("Sample_",1:10) + progeny_candidates <- paste0("Sample_",11:20) + + check_tab <- check_homozygous_trios(path.vcf = example_vcf, + ploidy = 2, + parents_candidates = parents_candidates, + progeny_candidates = progeny_candidates) + + expect_equal(sum(check_tab$homoRef_x_homoRef_match), 36562.35, tolerance = 0.01) + +}) diff --git a/tests/testthat/test-updog2vcf.R b/tests/testthat/test-updog2vcf.R new file mode 100644 index 0000000..0faca49 --- /dev/null +++ b/tests/testthat/test-updog2vcf.R @@ -0,0 +1,28 @@ +context("Updog to VCF") + + +test_that("test updog conversion",{ + #Input variables + load(testthat::test_path("iris_updog.RData")) + + temp_file <- tempfile() + + # Convert updog to VCF + updog2vcf( + multidog.object = mout, + output.file = temp_file, + updog_version = "0.0.0", + compress = TRUE + ) + + vcf_result <- read.vcfR(paste0(temp_file,".vcf.gz"), verbose = FALSE) + + DP <- sum(as.numeric(extract.gt(vcf_result, "DP"))) + + expect_equal(DP, 23618990) + + MPP <- sum(as.numeric(extract.gt(vcf_result, "MPP"))) + + expect_equal(MPP, 74519.94, tolerance = 0.01) + +})