Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
7e23962
breedtools updates
alex-sandercock Mar 27, 2025
fffff1a
documentation updates
alex-sandercock Mar 27, 2025
80be4b2
Added Tests
alex-sandercock Mar 27, 2025
a30a4a6
Update Dependencies
alex-sandercock Mar 27, 2025
a757128
Updated DESCRIPTION
alex-sandercock Mar 28, 2025
9d09d78
update citation
alex-sandercock Mar 28, 2025
71f7d5c
update examples
alex-sandercock Mar 28, 2025
d7b26be
added tests
alex-sandercock Mar 28, 2025
a8581f3
documentation updates
alex-sandercock Mar 28, 2025
f9aa79b
update merge_madc example
alex-sandercock Mar 28, 2025
cae3621
example updates
alex-sandercock Mar 28, 2025
f50a6d6
added examples
alex-sandercock Mar 28, 2025
bd28988
add function + tests
Cristianetaniguti Apr 29, 2025
104c83f
Merge pull request #25 from Breeding-Insight/test_updates
alex-sandercock Apr 29, 2025
eb3a144
Merge branch 'relationship_qc_functions' into development
Cristianetaniguti Apr 29, 2025
aec88cd
add codecov action
Cristianetaniguti May 2, 2025
3480a40
add missing % columns
Cristianetaniguti May 5, 2025
4586c4e
fix documentation
Cristianetaniguti May 5, 2025
5ce4792
add badge
Cristianetaniguti May 5, 2025
e770c1d
rm 2 notes
Cristianetaniguti May 5, 2025
506ec10
solve conflict
Cristianetaniguti May 5, 2025
2f333d7
replace class by inherits + fix documentation
Cristianetaniguti May 6, 2025
2e4333a
Delete package-code.Rmd
alex-sandercock May 6, 2025
1e74164
Update DESCRIPTION
alex-sandercock May 6, 2025
9a34c1c
Update NEWS.md
alex-sandercock May 6, 2025
1a7938b
Update DESCRIPTION
alex-sandercock May 6, 2025
69a1e4a
update authors format
Cristianetaniguti May 6, 2025
de8a76c
Merge branch 'development' of https://github.com/Breeding-Insight/BIG…
Cristianetaniguti May 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
^.*\.Rproj$
^\.Rproj\.user$
^dev$
^doc$
^LICENSE\.md$
^LICENSE$
^revdep$
^cran-comments\.md$
^.github$
^package-code\.Rmd$
^docs$
55 changes: 55 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, development]
pull_request:
branches: [main, development]

name: R-CMD-check

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: |
any::rcmdcheck
any::covr
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true

- name: Test coverage
run: |
covr::codecov(token = "${{ secrets.CODECOV_TOKEN }}")
shell: Rscript {0}
48 changes: 42 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,43 @@
Package: BIGr
Title: (B)reeding (I)nsight (G)enomics Functions for Polypoid and Diploid Species
Version: 0.4.2
Author: Alexander M. Sandercock, Cristiane Taniguti, Josue Chinchilla-Vargas, Shufen Chen, Manoj Sapkota, Meng Lin, Dongyan Zhao, and Breeding Insight Team
Title: Breeding Insight Genomics Analysis Functions for Polypoid and Diploid Species
Version: 0.5.1
Authors@R: c(person(given='Alexander',
family='Sandercock',
email='ams866@cornell.edu',
role=c('cre','aut')),
person(given='Cristiane',
family='Taniguti',
role = 'aut'),
person(given='Josue',
family='Chinchilla-Vargas',
role='aut'),
person(given='Shufen',
family='Chen',
role='ctb'),
person(given='Manoj',
family='Sapkota',
role='ctb'),
person(given='Meng',
family='Lin',
role='ctb'),
person(given='Dongyan',
family='Zhao',
role='ctb'),
person('Cornell', 'University',
role=c('cph'),
comment = "Breeding Insight"))
Maintainer: Alexander M. Sandercock <ams866@cornell.edu>
Description: This package contains the functions developed within Breeding Insight to analyze diploid and polyploid breeding and genetic data.
License: Apache License 2.0
Description: Functions developed within Breeding Insight to analyze
diploid and polyploid breeding and genetic data. 'BIGr' provides the
ability to filter VCF files, extract SNPs from the DArT MADC file, and
manipulate genotype data for both diploid and polyploid species. It
also serves as the core dependency for the 'BIGapp' Shiny app, which
provides a user-friendly interface for performing routine genotype
analysis tasks such as dosage calling, filtering, PCA, GWAS, and
Genomic Prediction.
License: Apache License (>= 2)
URL: https://github.com/Breeding-Insight/BIGr
BugReports: https://github.com/Breeding-Insight/BIGr/issues
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
Expand All @@ -20,6 +53,9 @@ Imports:
tidyr (>= 1.3.1),
vcfR (>= 1.15.0),
Biostrings,
pwalign
pwalign,
janitor,
quadprog,
tibble
Remotes:
RdMacros: Rdpack
17 changes: 11 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
# Generated by roxygen2: do not edit by hand

export(add_ref_alt)
export(allele_freq_poly)
export(calculate_Het)
export(calculate_MAF)
export(capture_diversity.Gmat)
export(check_homozygous_trios)
export(check_ped)
export(compare)
export(create_VCF_body)
export(check_replicates)
export(dosage2vcf)
export(dosage_ratios)
export(filterVCF)
export(flip_dosage)
export(get_OffTargets)
export(get_countsMADC)
export(get_ref_alt_hap_seq)
export(imputation_concordance)
export(loop_though_dartag_report)
export(madc2vcf)
export(merge_MADCs)
export(merge_counts)
export(solve_composition_poly)
export(updog2vcf)
import(doParallel)
import(dplyr)
import(foreach)
import(janitor)
import(parallel)
import(quadprog)
import(tibble)
import(tidyr)
import(vcfR)
Expand All @@ -35,11 +35,16 @@ importFrom(pwalign,pairwiseAlignment)
importFrom(readr,read_csv)
importFrom(reshape2,dcast)
importFrom(reshape2,melt)
importFrom(stats,cor)
importFrom(stats,lm)
importFrom(stats,qt)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(utils,packageVersion)
importFrom(utils,read.csv)
importFrom(utils,read.table)
importFrom(utils,tail)
importFrom(utils,write.csv)
importFrom(utils,write.table)
importFrom(vcfR,extract.gt)
importFrom(vcfR,maf)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,17 @@
* updog2vcf function option to output compressed VCF (.vcf.gz) - set as default
* remove need for defining ploidy
* add metadata at the VCF header


# BIGr 0.5.0

* Add imputation_concordance function to estimate accuracy of imputed and original dataset
* Add get_OffTargets function to extract target and off-target SNPs from a MADC file
* Add merge_MADCs function to merge two or more MADC files together
* Improved documentation and examples for all functions
* Add tests for all functions

# BIGr 0.5.1

* Improvements of testthat tests
* Add check_replicates and check_homozygous_trios for pedigree relationship quality check
82 changes: 70 additions & 12 deletions R/breedtools_functions.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,35 @@
#' Computes allele frequencies for specified populations given SNP array data
#'
#' @param geno matrix of genotypes coded as the dosage of allele B {0, 1, 2, ..., ploidy}
#' @param geno matrix of genotypes coded as the dosage of allele B \code{{0, 1, 2, ..., ploidy}}
#' with individuals in rows (named) and SNPs in columns (named)
#' @param populations list of named populations. Each population has a vector of IDs
#' that belong to the population. Allele frequencies will be derived from all animals
#' @param ploidy integer indicating the ploidy level (default is 2 for diploid)
#' @return data.frame consisting of allele_frequencies for populations (columns) for
#' each SNP (rows)
#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
#'
#' @examples
#' # Example inputs
#' geno_matrix <- matrix(
#' c(4, 1, 4, 0, # S1
#' 2, 2, 1, 3, # S2
#' 0, 4, 0, 4, # S3
#' 3, 3, 2, 2, # S4
#' 1, 4, 2, 3),# S5
#' nrow = 4, ncol = 5, byrow = FALSE, # individuals=rows, SNPs=cols
#' dimnames = list(paste0("Ind", 1:4), paste0("S", 1:5))
#' )
#'
#'pop_list <- list(
#' PopA = c("Ind1", "Ind2"),
#' PopB = c("Ind3", "Ind4")
#' )
#'
#' allele_freqs <- allele_freq_poly(geno = geno_matrix, populations = pop_list, ploidy = 4)
#' print(allele_freqs)
#'
#' @export
allele_freq_poly <- function(geno, populations, ploidy = 2) {

Expand Down Expand Up @@ -37,16 +60,20 @@ allele_freq_poly <- function(geno, populations, ploidy = 2) {
}


# Performs whole genome breed composition prediction.
#
# @param Y numeric vector of genotypes (with names as SNPs) from a single animal.
# coded as dosage of allele B {0, 1, 2}
# @param X numeric matrix of allele frequencies from reference animals
# @param p numeric indicating number of breeds represented in X
# @param names character names of breeds
# @return data.frame of breed composition estimates
# @import quadprog
# @export
#' Performs whole genome breed composition prediction.
#'
#' @param Y numeric vector of genotypes (with names as SNPs) from a single animal.
#' coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}
#' @param X numeric matrix of allele frequencies from reference animals
#' @param p numeric indicating number of breeds represented in X
#' @param names character names of breeds
#' @return data.frame of breed composition estimates
#' @import quadprog
#' @importFrom stats cor
#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
#'
#' @noRd
QPsolve <- function(Y, X) {

# Remove NAs from Y and remove corresponding
Expand Down Expand Up @@ -90,7 +117,7 @@ QPsolve <- function(Y, X) {
#' batch of animals.
#'
#' @param Y numeric matrix of genotypes (columns) from all animals (rows) in population
#' coded as dosage of allele B {0, 1, ..., ploidy}
#' coded as dosage of allele B \code{{0, 1, 2, ..., ploidy}}
#' @param X numeric matrix of allele frequencies (rows) from each reference panel (columns). Frequencies are
#' relative to allele B.
#' @param ped data.frame giving pedigree information. Must be formatted "ID", "Sire", "Dam"
Expand All @@ -107,6 +134,37 @@ QPsolve <- function(Y, X) {
#' @return A data.frame or list of data.frames (if groups is !NULL) with breed/ancestry composition
#' results
#' @import quadprog
#' @references Funkhouser SA, Bates RO, Ernst CW, Newcom D, Steibel JP. Estimation of genome-wide and locus-specific
#' breed composition in pigs. Transl Anim Sci. 2017 Feb 1;1(1):36-44.
#'
#' @examples
#' # Example inputs for solve_composition_poly (ploidy = 4)
#'
#' # (This would typically be the output from allele_freq_poly)
#' allele_freqs_matrix <- matrix(
#' c(0.625, 0.500,
#' 0.500, 0.500,
#' 0.500, 0.500,
#' 0.750, 0.500,
#' 0.625, 0.625),
#' nrow = 5, ncol = 2, byrow = TRUE,
#' dimnames = list(paste0("SNP", 1:5), c("VarA", "VarB"))
#' )
#'
#' # Validation Genotypes (individuals x SNPs)
#' val_geno_matrix <- matrix(
#' c(2, 1, 2, 3, 4, # Test1 dosages for SNP1-5
#' 3, 4, 2, 3, 0), # Test2 dosages for SNP1-5
#' nrow = 2, ncol = 5, byrow = TRUE,
#' dimnames = list(paste0("Test", 1:2), paste0("SNP", 1:5))
#' )
#'
#' # Calculate Breed Composition
#' composition <- solve_composition_poly(Y = val_geno_matrix,
#' X = allele_freqs_matrix,
#' ploidy = 4)
#' print(composition)
#'
#' @export
solve_composition_poly <- function(Y,
X,
Expand Down
6 changes: 6 additions & 0 deletions R/check_ped.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,25 +129,30 @@ check_ped <- function(ped.file) {
missing_parents <- results$missing_parents
messy_parents <- results$messy_parents
errors <- results$dependencies
# Adding the dataframes as an output list
output.results <- list()
#### Print errors and cycles ####
# Print repeated ids if any
if (nrow(repeated_ids) > 0) {
cat("Repeated ids found:\n")
print(repeated_ids)
output.results$repeated_ids <- repeated_ids
} else {
cat("No repeated ids found.\n")
}
#Print parents that a ppear as male and female
if (nrow(messy_parents) > 0) {
cat("Ids found as male and female parent:\n")
print(messy_parents)
output.results$messy_parents <- messy_parents
} else {
cat("No ids found as male and female parent.\n")
}
# Print missing parents if any
if (nrow(missing_parents) > 0) {
cat("Missing parents found:\n")
print(missing_parents)
output.results$missing_parents <- missing_parents
} else {
cat("No missing parents found.\n")
}
Expand All @@ -160,5 +165,6 @@ check_ped <- function(ped.file) {
} else {
cat("No dependencies found.\n")
}

return(results)
}
18 changes: 9 additions & 9 deletions R/filterVCF.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ filterVCF <- function(vcf.file,
#Should allow for any INFO field to be entered to be filtered

# Import VCF (can be .vcf or .vcf.gz)
if (class(vcf.file) != "vcfR"){
if (!inherits(vcf.file, "vcfR")) {
vcf <- read.vcfR(vcf.file)
} else {
vcf <- vcf.file
Expand Down Expand Up @@ -303,18 +303,18 @@ filterVCF <- function(vcf.file,
}
### Export the modified VCF file (this exports as a .vcf.gz, so make sure to have the name end in .vcf.gz)
cat("Exporting VCF\n")
if (!class(vcf.file) == "vcfR"){
if (!is.null(output.file)){
output_name <- paste0(output.file,".vcf.gz")
if (!inherits(vcf.file, "vcfR")) {
if (!is.null(output.file)) {
output_name <- paste0(output.file, ".vcf.gz")
vcfR::write.vcf(vcf, file = output_name)
}else{
} else {
return(vcf)
}
}else{
if (!is.null(output.file)){
output_name <- paste0(output.file,"_filtered.vcf.gz")
} else {
if (!is.null(output.file)) {
output_name <- paste0(output.file, "_filtered.vcf.gz")
vcfR::write.vcf(vcf, file = output_name)
}else{
} else {
return(vcf)
}
}
Expand Down
Loading