diff --git a/.github/workflows/dockerhub-on-version.yml b/.github/workflows/dockerhub-on-version.yml
new file mode 100644
index 0000000..5e89d89
--- /dev/null
+++ b/.github/workflows/dockerhub-on-version.yml
@@ -0,0 +1,139 @@
+name: Build & Push Docker image on package version change
+
+on:
+  push:
+    branches: [ main, vcf_sanity_check ]
+    paths:
+      - .github/workflows/dockerhub-on-version.yml
+      - Dockerfile
+      - DESCRIPTION
+  workflow_dispatch:
+
+concurrency:
+  group: docker-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  IMAGE: docker.io/breedinginsight/bigapp
+  IMAGE_DEPS: docker.io/breedinginsight/bigapp-deps
+  DEPS_TAG: r4.5-bioc3.21-2025-08
+
+jobs:
+  check-version:
+    runs-on: ubuntu-latest
+    outputs:
+      changed: ${{ steps.ver.outputs.changed }}
+      version: ${{ steps.ver.outputs.version }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - id: ver
+        shell: bash
+        run: |
+          set -euo pipefail
+          cur_ver=$(sed -n 's/^Version:[[:space:]]*//p' DESCRIPTION | tr -d '[:space:]')
+          prev_sha="${{ github.event.before }}"
+          if git show "${prev_sha}:DESCRIPTION" >/dev/null 2>&1; then
+            prev_ver=$(git show "${prev_sha}:DESCRIPTION" | sed -n 's/^Version:[[:space:]]*//p' | tr -d '[:space:]')
+          else
+            prev_ver=""
+          fi
+          changed=false
+          if [[ -z "${prev_ver}" || "${cur_ver}" != "${prev_ver}" ]]; then changed=true; fi
+          echo "version=${cur_ver}" >> "$GITHUB_OUTPUT"
+          echo "changed=${changed}" >> "$GITHUB_OUTPUT"
+
+  build-amd64:
+    needs: check-version
+    if: needs['check-version'].outputs.changed == 'true'
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    steps:
+      - uses: actions/checkout@v4
+      - uses: docker/setup-qemu-action@v3
+      - uses: docker/setup-buildx-action@v3
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.IMAGE }}
+          tags: |
+            type=raw,value=v${{ needs['check-version'].outputs.version }}-amd64
+            type=raw,value=sha-${{ github.sha }}-amd64
+      - name: Build & push app (amd64)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile
+          platforms: linux/amd64
+          push: true
+          build-args: |
+            BASE_IMAGE=${{ env.IMAGE_DEPS }}:${{ env.DEPS_TAG }}
+          tags: ${{ steps.meta.outputs.tags }}
+          cache-from: |
+            type=registry,ref=${{ env.IMAGE }}:buildcache-amd64
+          cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache-amd64,mode=max,compression=zstd
+
+  build-arm64:
+    needs: check-version
+    if: needs['check-version'].outputs.changed == 'true' 
+    continue-on-error: true
+    runs-on: ubuntu-latest
+    timeout-minutes: 350
+    steps:
+      - uses: actions/checkout@v4
+      - uses: docker/setup-qemu-action@v3
+      - uses: docker/setup-buildx-action@v3
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.IMAGE }}
+          tags: |
+            type=raw,value=v${{ needs['check-version'].outputs.version }}-arm64
+            type=raw,value=sha-${{ github.sha }}-arm64
+      - name: Build & push app (arm64)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile
+          platforms: linux/arm64
+          push: true
+          build-args: |
+            BASE_IMAGE=${{ env.IMAGE_DEPS }}:${{ env.DEPS_TAG }}
+          tags: ${{ steps.meta.outputs.tags }}
+          cache-from: |
+            type=registry,ref=${{ env.IMAGE }}:buildcache-arm64
+          cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache-arm64,mode=max,compression=zstd
+
+  manifest:
+    needs: [check-version, build-amd64, build-arm64]
+    if: >
+      needs['check-version'].outputs.changed == 'true' &&
+      needs['build-amd64'].result == 'success' &&
+      needs['build-arm64'].result == 'success'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+  
+      - name: Create multi-arch manifest
+        env:
+          IMAGE: ${{ env.IMAGE }}
+          PKG_VERSION: ${{ needs['check-version'].outputs.version }}
+        run: |
+          set -euo pipefail
+          docker buildx imagetools create \
+            -t "$IMAGE:v${PKG_VERSION}" \
+            -t "$IMAGE:latest" \
+            "$IMAGE:v${PKG_VERSION}-amd64" \
+            "$IMAGE:v${PKG_VERSION}-arm64"
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index 47cd469..6fccfde 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: BIGapp
 Title: Breeding Insight Genomics Shiny Application
-Version: 1.3.0
+Version: 1.5.1
 Authors@R: 
     c(
               person(c("Alexander", "M."), "Sandercock",
@@ -73,6 +73,6 @@ Imports:
     methods
 Remotes:
     github::jendelman/GWASpoly,
-    github::Breeding-Insight/BIGr,
+    bioc::Rsamtools
 Suggests:
     testthat
diff --git a/Dockerfile b/Dockerfile
index b09a0bc..d783a35 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,47 +1,25 @@
-FROM rocker/r-ver:4.4.2
-RUN apt-get update && apt-get install -y  cmake libz-dev libcurl4-openssl-dev libssl-dev
-RUN R -e 'install.packages("remotes")'
-RUN Rscript -e 'remotes::install_version("adegenet",upgrade="never", version = "2.1.10")'
-RUN Rscript -e 'remotes::install_version("curl",upgrade="never", version = "6.0.1")'
-RUN Rscript -e 'remotes::install_version("DT",upgrade="never", version = "0.33")'
-RUN Rscript -e 'remotes::install_version("dplyr",upgrade="never", version = "1.1.4")'
-RUN Rscript -e 'remotes::install_version("vcfR",upgrade="never", version = "1.15.0")'
-RUN Rscript -e 'remotes::install_version("ggplot2",upgrade="never", version = "3.5.1")'
-RUN Rscript -e 'remotes::install_version("tidyr",upgrade="never", version = "1.3.1")'
-RUN Rscript -e 'remotes::install_version("curl",upgrade="never", version = "6.0.1")'
-RUN Rscript -e 'remotes::install_version("shiny",upgrade="never", version = "1.9.1")'
-RUN Rscript -e 'remotes::install_version("config",upgrade="never", version = "0.3.2")'
-RUN Rscript -e 'remotes::install_version("bs4Dash",upgrade="never", version = "2.3.4")'
-RUN Rscript -e 'remotes::install_version("golem",upgrade="never", version = "0.5.1")'
-RUN Rscript -e 'remotes::install_version("purrr",upgrade="never", version = "1.0.2")'
-RUN Rscript -e 'remotes::install_version("markdown",upgrade="never", version = "1.13")'
-RUN Rscript -e 'remotes::install_version("scales",upgrade="never", version = "1.3.0")'
-RUN Rscript -e 'remotes::install_version("plotly",upgrade="never", version = "4.10.4")'
-RUN Rscript -e 'remotes::install_version("shinyWidgets",upgrade="never", version = "0.8.7")'
-RUN Rscript -e 'remotes::install_version("shinyjs",upgrade="never", version = "2.1.0")'
-RUN Rscript -e 'remotes::install_version("shinydisconnect",upgrade="never", version = "0.1.1")'
-RUN Rscript -e 'remotes::install_version("shinyalert",upgrade="never", version = "3.1.0")'
-RUN Rscript -e 'remotes::install_version("stringr",upgrade="never", version = "1.5.1")'
-RUN Rscript -e 'remotes::install_version("updog",upgrade="never", version = "2.1.5")'
-RUN Rscript -e 'remotes::install_version("AGHmatrix",upgrade="never", version = "2.1.4")'
-RUN Rscript -e 'remotes::install_version("factoextra",upgrade="never", version = "1.0.7")'
-RUN Rscript -e 'remotes::install_version("httr",upgrade="never", version = "1.4.7")'
-RUN Rscript -e 'remotes::install_version("future",upgrade="never", version = "1.34.0")'
-RUN Rscript -e 'remotes::install_version("shinycssloaders",upgrade="never", version = "1.1.0")'
-RUN Rscript -e 'remotes::install_version("RColorBrewer",upgrade="never", version = "1.1.3")'
-RUN Rscript -e 'remotes::install_version("tibble",upgrade="never", version = "3.2.1")'
-RUN Rscript -e 'remotes::install_version("rrBLUP",upgrade="never", version = "4.6.3")'
-RUN Rscript -e 'remotes::install_version("MASS",upgrade="never", version = "7.3.60.2")'
-RUN Rscript -e 'remotes::install_version("Matrix",upgrade="never", version = "1.7.0")'
-RUN Rscript -e 'remotes::install_version("matrixcalc",upgrade="never", version = "1.0.6")'
-RUN Rscript -e 'remotes::install_github("Breeding-Insight/BIGr",upgrade="never")'
-RUN Rscript -e 'remotes::install_github("jendelman/GWASpoly",upgrade="never")'
+# syntax=docker/dockerfile:1.7
+# Buildx/Actions will pass BASE_IMAGE as a manifest tag that covers both arches
+ARG BASE_IMAGE=docker.io/breedinginsight/bigapp-deps:r4.5-bioc3.21-2025-08
+FROM ${BASE_IMAGE}
 
-RUN mkdir /build_zone
-ADD . /build_zone
-WORKDIR /build_zone
-RUN R -e 'remotes::install_local(upgrade="never")'
-RUN rm -rf /build_zone
-EXPOSE 80
-CMD R -e "options('shiny.port'=80,shiny.host='0.0.0.0');BIGapp::run_app()"
+SHELL ["/bin/bash","-eo","pipefail","-c"]
+ENV DEBIAN_FRONTEND=noninteractive TZ=UTC
+ENV MAKEFLAGS="-j2" R_PKG_INSTALL_ARGS="--no-build-vignettes --no-manual"
+
+# App install (only your code changes should rebuild this layer)
+WORKDIR /app
+COPY DESCRIPTION /app/
+# COPY NAMESPACE /app/  # if present, include for better cache hits
+COPY . /app
+RUN R -q -e "remotes::install_local('.', upgrade='never', dependencies=TRUE, \
+                                    INSTALL_opts=c('--no-build-vignettes','--no-manual'))"
 
+# Runtime
+RUN useradd -m appuser
+USER appuser
+WORKDIR /app
+EXPOSE 80
+HEALTHCHECK --interval=30s --timeout=5s --retries=5 \
+  CMD wget -qO- http://localhost:${PORT:-80}/ || exit 1
+CMD ["R","-q","-e","options(shiny.port=as.integer(Sys.getenv('PORT','80')), shiny.host='0.0.0.0'); BIGapp::run_app()"]
diff --git a/NAMESPACE b/NAMESPACE
index 703183d..9399374 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(print,vcf_sanity_check)
 export(run_app)
+export(vcf_sanity_check)
+export(vcf_sanity_messages)
 import(BIGr)
 import(GWASpoly)
 import(dplyr)
diff --git a/R/DosageCall_functions.R b/R/DosageCall_functions.R
index 88e0b71..5cd18bf 100644
--- a/R/DosageCall_functions.R
+++ b/R/DosageCall_functions.R
@@ -22,77 +22,75 @@ polyRAD_dosage_call <- function(vcf, ploidy, model, p1 = NULL, p2 = NULL,
                                 backcross.gen = 0, intercross.gen = 0, selfing.gen = 0,
                                 contamRate = 0.001, min_ind_read = 1, min_ind_maf = 0,
                                 tol = 1e-05, session) {
-
+  
   # Variables
   vcf_path <- vcf
-  ploidy <- ploidy
-  model <- model
   n.bx <- backcross.gen
   n.inter <- intercross.gen
   n.self <- selfing.gen
-
+  
   # Having some issues formatting the output when multiallelic SNPs is input, so excluding for now
   temp_vcf <- vcfR::read.vcfR(vcf_path, verbose = FALSE)
   temp_vcf <- temp_vcf[is.biallelic(temp_vcf),]
-
+  
   # Function to randomly assign non-matching bases for polyRAD
   assign_random_bases <- function(vcf) {
     # Get the number of rows in the VCF
     num_rows <- nrow(vcf@fix)
-
+    
     # Generate random bases for REF and ALT
     bases <- c("A", "C", "G", "T")
     ref_bases <- sample(bases, num_rows, replace = TRUE)
     alt_bases <- character(num_rows) # Initialize an empty character vector
-
+    
     # Ensure REF and ALT are different for each row
     for (i in 1:num_rows) {
       alt_bases[i] <- sample(bases[bases != ref_bases[i]], 1)
     }
-
+    
     # Assign the new bases to the vcf object
     vcf@fix[, "REF"] <- ref_bases
     vcf@fix[, "ALT"] <- alt_bases
-
+    
     return(vcf)
   }
-
+  
   #Editing REF and ALT fields randomly temporarily if blank or AB
   ref_base <- temp_vcf@fix[1, "REF"]
   alt_base <- temp_vcf@fix[1, "ALT"]
   if (is.na(ref_base) || is.na(alt_base) || alt_base == "B") {
-  temp_vcf <- assign_random_bases(temp_vcf)
+    temp_vcf <- assign_random_bases(temp_vcf)
   }
-
+  
   # Adding filtered VCF as a temp object
   temp_vcf_path <- tempfile(fileext = ".vcf.gz")
   vcfR::write.vcf(temp_vcf, file = temp_vcf_path)
   rm(temp_vcf)
-
+  
   # vcfR gzipped the VCF, but polyRAD cannot accept gzipped, only unzipped or bgzipped.
   # Unzipping the VCF (see if there is a workaround because this is wasteful for memory)
   temp_unzipped_path <- tempfile(fileext = ".vcf")
   R.utils::gunzip(temp_vcf_path, destname = temp_unzipped_path, overwrite = TRUE)
   rm(temp_vcf_path)
-
+  
   # Load the VCF file as RADobject
   # Retaining all markers with at least 1 individual with reads (minimal filtering at this stage)
   # Need to determine the best contamRate to use; currently using the default
-  polyRAD_obj <- polyRAD::VCF2RADdata(file = temp_unzipped_path,
-                                      min.ind.with.reads = min_ind_read,
-                                      min.ind.with.minor.allele = min_ind_maf,
-                                      taxaPloidy = ploidy,
-                                      contamRate = contamRate,
-                                      tol = tol,
-                                      phaseSNPs = FALSE
-
+  polyRAD_obj <- VCF2RADdata(file = temp_unzipped_path,
+                             min.ind.with.reads = min_ind_read,
+                             min.ind.with.minor.allele = min_ind_maf,
+                             taxaPloidy = ploidy,
+                             contamRate = contamRate,
+                             tol = tol,
+                             phaseSNPs = FALSE
+                             
   )
-
+  
   # Empty vcf from memory
   rm(temp_unzipped_path)
-
+  
   # Perform QC
-
+  
   # Test overdispersion
   # Need to see if we can adapt the to_test values based on the input VCF or RAD object values
   # Currently using a range of 2:20, but an error will be provbided if the optimal value is the max or min of this range
@@ -106,10 +104,10 @@ polyRAD_dosage_call <- function(vcf, ploidy, model, p1 = NULL, p2 = NULL,
     to_test_max <- to_test_max + 20
     OD <- TestOverdispersion(polyRAD_obj, to_test = to_test_min:to_test_max)$optimal
   }
-
+  
   # Test HindHe
   myhindhe <- HindHe(polyRAD_obj)
-
+  
   # Perform dosage calling
   # "If you expect that your species has high linkage disequilibrium, the functions IterateHWE_LD and IteratePopStructLD behave like IterateHWE and IteratePopStruct, respectively, but also update priors based on genotypes at linked loci."
   if (model == "IterateHWE") {
@@ -126,7 +124,7 @@ polyRAD_dosage_call <- function(vcf, ploidy, model, p1 = NULL, p2 = NULL,
       if (!p1 %in% rownames(data.frame(polyRAD_obj$taxaPloidy))) {
         stop("Parent 1 is not a valid parent")
       }
-
+      
       polyRAD_obj <- SetDonorParent(polyRAD_obj, p1)
     }
     if (!is.null(p2)) {
@@ -134,27 +132,27 @@ polyRAD_dosage_call <- function(vcf, ploidy, model, p1 = NULL, p2 = NULL,
       if (!p2 %in% rownames(data.frame(polyRAD_obj$taxaPloidy))) {
         stop("Parent 2 is not a valid parent")
       }
-
+      
       polyRAD_obj <- SetRecurrentParent(polyRAD_obj, p2)
     }
-
+    
     rad <- PipelineMapping2Parents(polyRAD_obj,
                                    overdispersion = OD,
                                    n.gen.backcrossing = n.bx,
                                    n.gen.intermating = n.inter,
                                    n.gen.selfing = n.self)
   }
-
-
+  
+  
   # Get discrete genotypes
   pg <- GetProbableGenotypes(rad, naIfZeroReads = TRUE)$genotypes
-
+  
   # Free up memory
   rm(rad)
   rm(polyRAD_obj)
-
+  
   return(list(Genos = t(pg), RADHindHe = myhindhe))
-
+  
 }
 
 
@@ -176,19 +174,20 @@ polyRAD_dosage_call <- function(vcf, ploidy, model, p1 = NULL, p2 = NULL,
 polyRAD2vcf <- function(geno, model, vcf_path, hindhe.obj, ploidy, output.file, session) {
   # Making the VCF
   # Appending the polyRAD info to the original VCF file and exporting
-
+  
   #Format dosages
   gt <- data.frame(flip_dosage(geno, ploidy = ploidy), check.names = FALSE)
-  row.names(gt) <- sub("_[A,T,C,G,B]$", "", row.names(gt)) #Removing the appended allele to the SNP IDs
-
+  #row.names(gt) <- sub("_[A,T,C,G,B]$", "", row.names(gt)) #Removing the appended allele to the SNP IDs
+  row.names(gt) <- sub("_[^_]+$", "", row.names(gt))
+  
   #Format HindHe
-  colnames(hindhe.obj) <- sub("_[A,T,C,G,B]$", "", colnames(hindhe.obj)) #Removing the appended allele to the column
+  #colnames(hindhe.obj) <- sub("_[A,T,C,G,B]$", "", colnames(hindhe.obj)) #Removing the appended allele to the column
   hh <- data.frame(t(colMeans(hindhe.obj, na.rm = TRUE)), check.names = FALSE)
   hh <- hh[, row.names(gt)]
-
+  
   #Getting information from VCF
   og_vcf <- read.vcfR(vcf_path, verbose = FALSE)
-
+  
   # Updating Header
   # Extracting the header (meta)
   meta <- og_vcf@meta
@@ -200,14 +199,14 @@ polyRAD2vcf <- function(geno, model, vcf_path, hindhe.obj, ploidy, output.file,
   )
   # Custom FORMAT lines to add
   custom_format_lines <- c(
-    '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype, where 1 is the count of alternate alleles">',
+    '##FORMAT=<ID=GTP,Number=1,Type=String,Description="Genotype estimated by polyRAD, where 1 is the count of alternate alleles">',
     '##FORMAT=<ID=UD,Number=1,Type=Integer,Description="Dosage count of reference alleles from polyRAD, where 0 = homozygous alternate">'
   )
   custom_version_lines <- c(
     paste0('##polyRAD','_',model,'=', packageVersion("polyRAD")),
     paste0('##BIGapp_polyRAD2vcf=', packageVersion("BIGapp"))
   )
-
+  
   #polyrad_meta <- paste0('##PolyRADCommandLine.multidog=<ID=Multidog,Version="',
   #                     updog_version, '",CommandLine="> multidog(refmat = matrices$ref_matrix, sizemat = matrices$size_matrix, ploidy = ',ploidy,
   #                     ', model = ',model_selected,')">')
@@ -216,18 +215,18 @@ polyRAD2vcf <- function(geno, model, vcf_path, hindhe.obj, ploidy, output.file,
   #                    Sys.time(),'", CommandLine="> updog2vcf(',deparse(substitute(multidog.object)),',',
   #                    output.file, ',',
   #                    updog_version,')">')
-
+  
   # Inserting the new header lines into the VCF header
   meta <- append(meta, custom_format_lines, after = FORMAT_index-1) #FORMAT
   INFO_index <- max(grep("^##INFO=", meta))
   meta <- append(meta, custom_info_lines, after = INFO_index) #INFO
   FORMAT_index <- max(grep("^##FORMAT=", meta))
   meta <- append(meta, custom_version_lines, after = FORMAT_index)
-
-
+  
+  
   #Make a header separate from the dataframe
   vcf_header <- meta
-
+  
   #Make the VCF df
   vcf_df <- data.frame(
     CHROM = data.frame(og_vcf@fix)$CHROM,
@@ -240,26 +239,26 @@ polyRAD2vcf <- function(geno, model, vcf_path, hindhe.obj, ploidy, output.file,
     INFO = data.frame(og_vcf@fix)$INFO,
     FORMAT = data.frame(og_vcf@gt)$FORMAT
   )
-
+  
   # Replace NA values in the data frame with "."
   vcf_df[is.na(vcf_df)] <- "."
-
+  
   #Get FORMAT info
   format_df <- og_vcf@gt[,-1]
-
+  
   # Add the IDs as row names for subsetting
   row.names(vcf_df) <- vcf_df$ID
   row.names(format_df) <- vcf_df$ID
   vcf_df <- vcf_df[row.names(gt),]
   format_df <- format_df[row.names(gt),]
-
+  
   #Add the INFO column for each SNP
   vcf_df$INFO <- paste0(vcf_df$INFO,";",
                         "HH=",t(hh))
-
+  
   #Add the FORMAT label for each SNP
-  vcf_df$FORMAT <- paste("GT","UD",og_vcf@gt[1, "FORMAT"],sep=":")
-
+  vcf_df$FORMAT <- paste("GTP","UD",og_vcf@gt[1, "FORMAT"],sep=":")
+  
   #Convert genotypes from dosage to gt
   precompute_genotype_strings <- function(ploidy) {
     genotype_strings <- character(ploidy + 1)
@@ -272,31 +271,31 @@ polyRAD2vcf <- function(geno, model, vcf_path, hindhe.obj, ploidy, output.file,
     }
     return(genotype_strings)
   }
-
+  
   # Apply the precomputed genotype strings to the matrix
   convert_dosage2gt <- function(dosage_matrix, ploidy) {
     dosage_matrix <- as.matrix(dosage_matrix)
     genotype_strings <- precompute_genotype_strings(ploidy)
-
+    
     # Create missing GT
     missing_gt_string <- paste(rep(".", ploidy), collapse = "/")
-
+    
     # Handle missing values separately
     genotype_matrix <- matrix(genotype_strings[dosage_matrix + 1], nrow = nrow(dosage_matrix), ncol = ncol(dosage_matrix))
     genotype_matrix[is.na(dosage_matrix)] <- missing_gt_string # Handle missing values
-
+    
     # Retain row and column names
     rownames(genotype_matrix) <- rownames(dosage_matrix)
     colnames(genotype_matrix) <- colnames(dosage_matrix)
-
+    
     return(genotype_matrix)
   }
-
+  
   # Convert the dosage matrix to genotypes
   gt_df <- convert_dosage2gt(gt, ploidy)
   # Replace NA values in the data frame with "."
   gt[is.na(gt)] <- "."
-
+  
   #Combine info from the matrices to form the VCF information for each sample
   # Combine the matrices into a single matrix with elements separated by ":"
   make_vcf_format <- function(matrix1, matrix2, matrix3) {
@@ -304,66 +303,66 @@ polyRAD2vcf <- function(geno, model, vcf_path, hindhe.obj, ploidy, output.file,
     if (!is.matrix(matrix1) || !is.matrix(matrix2) || !is.matrix(matrix3)) {
       stop("All inputs must be matrices.")
     }
-
+    
     # Check if matrices have the same dimensions (optional, but recommended)
     if (!all(dim(matrix1) == dim(matrix2), dim(matrix1) == dim(matrix3))) {
       stop("Input matrices must have the same dimensions.")
     }
-
+    
     # Check if matrices have row and column names (optional, based on problem description)
     if (is.null(rownames(matrix1)) || is.null(colnames(matrix1)) ||
         is.null(rownames(matrix2)) || is.null(colnames(matrix2)) ||
         is.null(rownames(matrix3)) || is.null(colnames(matrix3))) {
       stop("Input matrices must have row and column names.")
     }
-
+    
     # 1. Convert matrices to vectors
     vector1 <- as.vector(matrix1)
     vector2 <- as.vector(matrix2)
     vector3 <- as.vector(matrix3)
-
+    
     # 2. Combine the vectors element-wise with ":" as separator
     combined_vector <- paste(vector1, vector2, vector3, sep = ":")
-
+    
     # 3. Reshape the combined vector back into a matrix with the original dimensions
     combined_matrix <- matrix(combined_vector, nrow = nrow(matrix1), ncol = ncol(matrix1))
-
+    
     # 4. Convert the combined matrix to a dataframe
     combined_dataframe <- as.data.frame(combined_matrix)
-
+    
     # 5. Set the row and column names for the dataframe
     rownames(combined_dataframe) <- rownames(matrix1)
     colnames(combined_dataframe) <- colnames(matrix1)
-
+    
     return(combined_dataframe)
   }
-
+  
   # Combine the matrices
   # First get each item that exists in the VCF FORMAT
   #format_fields <- strsplit(og_vcf@gt[1, "FORMAT"], ":")[[1]]
   geno_df <- make_vcf_format(gt_df,
                              as.matrix(gt),
                              format_df
-                             )
-
+  )
+  
   #Combine the dataframes together
   vcf_df <- cbind(vcf_df,geno_df)
-
+  
   # Add # to the CHROM column name
   colnames(vcf_df)[1] <- "#CHROM"
-
+  
   # Sort
   vcf_df <- vcf_df[order(vcf_df[,1],as.numeric(as.character(vcf_df[,2]))),]
-
+  
   # Write the header to the file
   # Make sure that .vcf is at the end of the file name
   output.file <- paste0(output.file, ".vcf")
   writeLines(vcf_header, con = output.file)
-
+  
   # Append the dataframe to the file in tab-separated format
   suppressWarnings(
     write.table(vcf_df, file = output.file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE, append = TRUE)
   )
-
-
+  
+  
 }
diff --git a/R/GS_functions.R b/R/GS_functions.R
index 9893a24..ddd2340 100644
--- a/R/GS_functions.R
+++ b/R/GS_functions.R
@@ -200,8 +200,12 @@ get_relationship_mat <- function(geno_input, ped_file, type = c("Gmatrix", "Amat
   if (type == "Gmatrix") {
     #Convert normalized genotypes to relationship matrix
     #By default, it removes SNPs with more than 50% missing data and imputes using the mean
-    Geno.mat <- A.mat(t(geno_input)) # only diploids
-    #Geno.mat <- Gmatrix(t(geno_input), method = "VanRaden", ploidy = ploidy)
+    Geno.mat <- Gmatrix(t(geno_input),
+                        method = "VanRaden",
+                        ploidy = ploidy,
+                        ploidy.correction=TRUE,
+                        ratio = FALSE,
+                        missingValue = "NA")
     return(Geno.mat)
 
   }else if (type == "Amatrix") {
@@ -230,7 +234,11 @@ get_relationship_mat <- function(geno_input, ped_file, type = c("Gmatrix", "Amat
 
     #Using Gmatrix to get the Gmatrix instead of A.mat for consistency
     #Should I be using the raw dosage values or is it okay to use the scaled genotype data that is used for A.mat()?
-    G.mat <- Gmatrix(t(geno_input[ ,valid_ids]), method = "VanRaden", ploidy = as.numeric(ploidy), missingValue = "NA")
+    G.mat <- Gmatrix(t(geno_input[ ,valid_ids]),
+                     method = "VanRaden",
+                     ploidy = as.numeric(ploidy),
+                     missingValue = "NA",
+                     ploidy.correction = TRUE)
     G.mat <- round(G.mat,3) #to be easy to invert
 
     #Computing H matrix (Martini) - Using the name Geno.mat for consistency
@@ -374,7 +382,7 @@ assign_colors <- function(color){
 format_geno_matrix <- function(geno, model, pred_matrix = NULL, ploidy){
 
   if(is.null(pred_matrix)) pred_matrix <- "none_selected"
-  if(model == "rrBLUP" | (model == "GBLUP" & pred_matrix == "Gmatrix")) {
+  if(model == "rrBLUP" & ploidy == 2 & pred_matrix == "Gmatrix") {
     #if(model == "rrBLUP") {
     geno_formated <- 2 * (geno / as.numeric(ploidy)) - 1 # codification -1 0 1
   } else {
diff --git a/R/app_ui.R b/R/app_ui.R
index e32c5c6..55d43cb 100644
--- a/R/app_ui.R
+++ b/R/app_ui.R
@@ -103,8 +103,9 @@ app_ui <- function(request) {
           )
         ),
         left = div(
-          style = "display: flex; align-items: center; height: 100%;",  # Center the version text vertically
-          "v1.3.0")
+          style = "display: flex; align-items: center; height: 100%;",  
+          sprintf("v%s", as.character(utils::packageVersion("BIGapp")))
+        )
       ),
       dashboardBody(
         disconnectMessage(), #Adds generic error message for any error if not already accounted for
diff --git a/R/mod_DosageCall.R b/R/mod_DosageCall.R
index ac107ea..67a1af4 100644
--- a/R/mod_DosageCall.R
+++ b/R/mod_DosageCall.R
@@ -265,6 +265,11 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
     tol = 1e-05
   )
 
+  #VCF items
+  vcf_items <- reactiveValues(
+    RefAlt_df = NULL
+  )
+
   #UI popup window for input
   observeEvent(input$advanced_options, {
     showModal(modalDialog(
@@ -392,12 +397,40 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
         matrices <- list()
 
         #Import genotype information if in VCF format
+        #### VCF sanity check
+        checks <- vcf_sanity_check(madc_file, depth_support_fields = c("AD","RA"), max_markers = 10000)
+        
+        error_if_false <- c(
+          "VCF_header", "VCF_columns", "unique_FORMAT", 
+          "samples", "chrom_info", "pos_info", "allele_counts", "VCF_compressed"
+        )
+        
+        error_if_true <- c(
+          "duplicated_samples", "duplicated_markers"
+        )
+        warning_if_false <- c("ref_alt","max_markers")
+        
+        checks_result <- vcf_sanity_messages(checks, 
+                                             error_if_false, 
+                                             error_if_true, 
+                                             warning_if_false = warning_if_false, 
+                                             warning_if_true = NULL,
+                                             input_ploidy = NULL)
+        
+        if(checks_result) return() # Stop the analysis if checks fail
+        #########
+        
         vcf <- read.vcfR(madc_file, verbose = FALSE)
 
         #Get items in FORMAT column
         info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT
         chrom <- vcf@fix[,1]
         pos <- vcf@fix[,2]
+        #Retain RefAlt df for later use
+        RefAlt_df <- data.frame(vcf@fix)
+        RefAlt_df <- RefAlt_df[,c(1,2,4,5)]
+        colnames(RefAlt_df) <- c("chr","pos","ref","alt")
+        vcf_items$RefAlt_df <- RefAlt_df
 
         info_ids <- extract_info_ids(info[1])
 
@@ -600,7 +633,7 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
       }
       
       updateProgressBar(session = session, id = "pb_madc", value = 35, title = "Performing Dosage Calling")
-      polyrad_items <- polyRAD_dosage_call(vcf = polyrad_items$vcf_path,
+      polyrad_results <- polyRAD_dosage_call(vcf = polyrad_items$vcf_path,
                                            ploidy = input$ploidy,
                                            model = input$polyRAD_model,
                                            p1 = input$parent1,
@@ -613,7 +646,8 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
                                            min_ind_maf = advanced_options$min_ind_maf,
                                            tol=advanced_options$tol)
       updateProgressBar(session = session, id = "pb_madc", value = 100, title = "Finished")
-      polyrad_items
+      polyrad_results$vcf_path <- polyrad_items$vcf_path
+      polyrad_results
     }
   })
 
@@ -637,6 +671,7 @@ mod_DosageCall_server <- function(input, output, session, parent_session){
           multidog.object = updog_out(),
           output.file = temp,
           updog_version = packageVersion("updog"),
+          RefAlt = vcf_items$RefAlt_df,
           compress = FALSE
         )
 
diff --git a/R/mod_Filtering.R b/R/mod_Filtering.R
index 373ff2d..b040996 100644
--- a/R/mod_Filtering.R
+++ b/R/mod_Filtering.R
@@ -102,7 +102,9 @@ mod_Filtering_ui <- function(id){
              ),
              box(title = "Status", width =12, collapsible = TRUE, status = "info",
                  progressBar(id = ns("pb_filter"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
-             )
+             ),
+             # A placeholder for the download button. It will be rendered in the shinyalert modal.
+             uiOutput(ns("download_ui_placeholder")) 
       )
     )
   )
@@ -178,6 +180,35 @@ mod_Filtering_server <- function(input, output, session, parent_session){
   
   #Get list of sample names from VCF file
   observeEvent(input$updog_rdata, {
+    #### VCF sanity check
+    checks <- vcf_sanity_check(input$updog_rdata$datapath, 
+                               max_markers = 16000, 
+                               depth_support_fields = c("DP", "AD", "RA"))
+    
+    error_if_false <- c(
+      "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+      "samples", "chrom_info", "pos_info", "VCF_compressed", "allele_counts"
+    )
+    
+    error_if_true <- c(
+      "multiallelics", "phased_GT",  
+      "duplicated_samples", "duplicated_markers"
+    )
+    
+    warning_if_false <- c("ref_alt","max_markers")
+    
+    checks_result <- vcf_sanity_messages(checks, 
+                                         error_if_false, 
+                                         error_if_true, 
+                                         warning_if_false = warning_if_false, 
+                                         warning_if_true = NULL)
+    
+    print(checks)
+    print(checks_result)
+    if(checks_result) return() # Stop the analysis if checks fail
+    #########
+    
+    
     #populate preview_data
     preview_vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE, nrows = 1)
     
@@ -262,7 +293,8 @@ mod_Filtering_server <- function(input, output, session, parent_session){
     raw_sample_miss_df = NULL,
     maf_df = NULL,
     raw_maf_df = NULL,
-    format_fields = NULL
+    format_fields = NULL,
+    removed_names = NULL
 
   )
 
@@ -343,6 +375,23 @@ mod_Filtering_server <- function(input, output, session, parent_session){
     updog_par <- grepl("MPP", format_fields) & grepl("PMC", info_fields) & grepl("BIAS", info_fields) & grepl("OD", info_fields)
     filtering_files$format_fields <- updog_par
 
+    if(length(updog_par) > 1) {
+      shinyalert(
+        title = "Malformed VCF",
+        text = "Make sure all your markers have the same information in the FORMAT field.",
+        size = "s",
+        closeOnEsc = TRUE,
+        closeOnClickOutside = FALSE,
+        html = TRUE,
+        type = "error",
+        showConfirmButton = TRUE,
+        confirmButtonText = "OK",
+        confirmButtonCol = "#004192",
+        showCancelButton = FALSE,
+        animation = TRUE
+      )
+    }
+    
     if (input$use_updog & updog_par) {
       # Use Updog filtering parameters
       OD_filter <- as.numeric(input$OD_filter)
@@ -390,7 +439,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
     #Filtering
     #Raw VCF info
     gt_matrix <- extract.gt(filterVCF(vcf, ploidy = ploidy,filter.DP = as.numeric(size_depth), output.file = NULL), element = "GT", as.numeric = FALSE)
-    starting_samples <- ncol(gt_matrix)
+    starting_samples <- colnames(gt_matrix)
     filtering_files$raw_snp_miss_df <- rowMeans(is.na(gt_matrix)) #SNP missing values
     filtering_files$raw_sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values
 
@@ -454,7 +503,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
     gt_matrix <- extract.gt(vcf, element = "GT", as.numeric = FALSE)
     filtering_files$snp_miss_df <- rowMeans(is.na(gt_matrix)) #SNP missing values
     filtering_files$sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values
-    final_samples <- ncol(gt_matrix)
+    final_samples <- colnames(gt_matrix)
     rm(gt_matrix) #Remove gt matrix
 
     #Pb
@@ -473,23 +522,32 @@ mod_Filtering_server <- function(input, output, session, parent_session){
     })
 
     #User warning if samples were removed during filtering
-    sample_removed <- starting_samples - final_samples
+    sample_removed <- length(starting_samples) - length(final_samples)
+    removed_names <- setdiff(starting_samples, final_samples)
+    filtering_files$removed_names <- removed_names
+    
+    # Define the download handler
+    output$download_removed_samples <- downloadHandler(
+      filename = function() {
+        "removed_samples.txt"
+      },
+      content = function(file) {
+        if (!is.null(filtering_files$removed_names)) {
+          writeLines(filtering_files$removed_names, file)
+        }
+      }
+    )
     
     if (sample_removed > 0 && removed_samples == 0) {
-      shinyalert(
+      showModal(modalDialog(
         title = "Samples Filtered",
-        text = paste(sample_removed, "samples were removed during filtering."),
-        size = "s",
-        closeOnEsc = TRUE,
-        closeOnClickOutside = FALSE,
-        html = TRUE,
-        type = "info",
-        showConfirmButton = TRUE,
-        confirmButtonText = "OK",
-        confirmButtonCol = "#004192",
-        showCancelButton = FALSE,
-        animation = TRUE
-      )
+        footer = tagList(
+          modalButton("OK"),
+          # Use a downloadButton instead of a manual <a> tag
+          downloadButton(ns("download_removed_samples"), "Save Sample List")
+        ),
+        paste(sample_removed, "samples were removed during filtering.")
+      ))
     } else if (sample_removed > 0 && removed_samples > 0) {
       shinyalert(
         title = "Samples Filtered",
@@ -523,8 +581,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
              tabPanel("Results", p("Upload VCF file to access results in this section."))
       )
     } else {
-
-      if (!is.null(filtering_files$format_fields) && filtering_files$format_fields == TRUE && input$vcf_type == "Unfiltered VCF") {
+      if (!any(is.null(filtering_files$format_fields)) && any(filtering_files$format_fields == TRUE) && input$vcf_type == "Unfiltered VCF") {
         # Include "Bias Histogram", "OD Histogram", and "Prop_mis Histogram" for Pre-Filtered VCF
         tabBox(
           width = 12, collapsible = FALSE, status = "info",
@@ -944,6 +1001,7 @@ mod_Filtering_server <- function(input, output, session, parent_session){
       writeLines(paste(capture.output(filtering_summary_info()), collapse = "\n"), file)
     }
   )
+  
 }
 
 ## To be copied in the UI
diff --git a/R/mod_GS.R b/R/mod_GS.R
index c0b309c..7367186 100644
--- a/R/mod_GS.R
+++ b/R/mod_GS.R
@@ -15,7 +15,7 @@
 #' @import shinydisconnect
 #'
 #'
-mod_GS_ui <- function(id){
+mod_GS_ui <- function(id) {
   ns <- NS(id)
   tagList(
     fluidRow(
@@ -28,106 +28,109 @@ mod_GS_ui <- function(id){
         overlayOpacity = 0.3,
         refreshColour = "purple"
       ),
-      column(width = 3,
-             box(title="Inputs", width = 12, collapsible = TRUE, collapsed = FALSE, status = "info", solidHeader = TRUE,
-                 "* Required",
-                 fileInput(ns("pred_known_file"), "Choose VCF File*", accept = c(".csv",".vcf",".gz")),
-                 fileInput(ns("pred_trait_file"), "Choose Trait File*", accept = ".csv"),
-                 numericInput(ns("pred_est_ploidy"), "Species Ploidy*", min = 1, value = NULL),
-                 virtualSelectInput(
-                   inputId = ns("pred_trait_info2"),
-                   label = "Select Trait*",
-                   choices = NULL,
-                   showValueAsTags = TRUE,
-                   search = TRUE,
-                   multiple = TRUE
-                 ),
-                 virtualSelectInput(
-                   inputId = ns("pred_fixed_info2"),
-                   label = span("Select Fixed Effects", bs4Badge("beta", position = "right", color = "success")),
-                   choices = NULL,
-                   showValueAsTags = TRUE,
-                   search = TRUE,
-                   multiple = TRUE
-                 ),
-                 conditionalPanel(
-                   condition = "input.pred_fixed_info2.length > 0", ns = ns,
-                   div(
-                     "(unselected will be considered covariates)",
-                     virtualSelectInput(
-                       inputId = ns("pred_fixed_cat2"),
-                       label = "Select Categorical Fixed Effects",
-                       choices = NULL,
-                       showValueAsTags = TRUE,
-                       search = TRUE,
-                       multiple = TRUE
-                     )
-                   )
-                 ),
-                 actionButton(ns("prediction_est_start"), "Run Analysis"),
-                 div(style="display:inline-block; float:right",dropdownButton(
-                   HTML("<b>Input files</b>"),
-                   p(downloadButton(ns('download_vcf'),""), "VCF Example File"),
-                   p(downloadButton(ns('download_pheno'),""), "Trait Example File"),
-                   p(downloadButton(ns('download_vcfp'), ""), "Download Prediction VCF Example File"),hr(),
-                   p(HTML("<b>Parameters description:</b>"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
-                   p(HTML("<b>Results description:</b>"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
-                   p(HTML("<b>How to cite:</b>"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(),
-                   actionButton(ns("pred_summary"), "Summary"),
-                   circle = FALSE,
-                   status = "warning",
-                   icon = icon("info"), width = "300px",
-                   tooltip = tooltipOptions(title = "Click to see info!")
-                 )),
-                 tags$hr(style="border-color: #d3d3d3; margin-top: 20px; margin-bottom: 20px;"),  # Lighter grey line
-                 div(style="text-align: left; margin-top: 10px;",
-                     actionButton(ns("advanced_options_pred"),
-                                  label = HTML(paste(icon("cog", style = "color: #007bff;"), "Advanced Options (beta)")),
-                                  style = "background-color: transparent; border: none; color: #007bff; font-size: smaller; text-decoration: underline; padding: 0;"
-                     )
-                 )
-
-             )
+      column(
+        width = 3,
+        box(
+          title = "Inputs", width = 12, collapsible = TRUE, collapsed = FALSE, status = "info", solidHeader = TRUE,
+          "* Required",
+          fileInput(ns("pred_known_file"), "Choose VCF File*", accept = c(".csv", ".vcf", ".gz")),
+          fileInput(ns("pred_trait_file"), "Choose Trait File*", accept = ".csv"),
+          numericInput(ns("pred_est_ploidy"), "Species Ploidy*", min = 1, value = NULL),
+          virtualSelectInput(
+            inputId = ns("pred_trait_info2"),
+            label = "Select Trait*",
+            choices = NULL,
+            showValueAsTags = TRUE,
+            search = TRUE,
+            multiple = TRUE
+          ),
+          virtualSelectInput(
+            inputId = ns("pred_fixed_info2"),
+            label = span("Select Fixed Effects", bs4Badge("beta", position = "right", color = "success")),
+            choices = NULL,
+            showValueAsTags = TRUE,
+            search = TRUE,
+            multiple = TRUE
+          ),
+          conditionalPanel(
+            condition = "input.pred_fixed_info2.length > 0", ns = ns,
+            div(
+              "(unselected will be considered covariates)",
+              virtualSelectInput(
+                inputId = ns("pred_fixed_cat2"),
+                label = "Select Categorical Fixed Effects",
+                choices = NULL,
+                showValueAsTags = TRUE,
+                search = TRUE,
+                multiple = TRUE
+              )
+            )
+          ),
+          actionButton(ns("prediction_est_start"), "Run Analysis"),
+          div(style = "display:inline-block; float:right", dropdownButton(
+            HTML("<b>Input files</b>"),
+            p(downloadButton(ns("download_vcf"), ""), "VCF Example File"),
+            p(downloadButton(ns("download_pheno"), ""), "Trait Example File"),
+            p(downloadButton(ns("download_vcfp"), ""), "Download Prediction VCF Example File"), hr(),
+            p(HTML("<b>Parameters description:</b>"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE))), hr(),
+            p(HTML("<b>Results description:</b>"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE))), hr(),
+            p(HTML("<b>How to cite:</b>"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE))), hr(),
+            actionButton(ns("pred_summary"), "Summary"),
+            circle = FALSE,
+            status = "warning",
+            icon = icon("info"), width = "300px",
+            tooltip = tooltipOptions(title = "Click to see info!")
+          )),
+          tags$hr(style = "border-color: #d3d3d3; margin-top: 20px; margin-bottom: 20px;"), # Lighter grey line
+          div(
+            style = "text-align: left; margin-top: 10px;",
+            actionButton(ns("advanced_options_pred"),
+                         label = HTML(paste(icon("cog", style = "color: #007bff;"), "Advanced Options (beta)")),
+                         style = "background-color: transparent; border: none; color: #007bff; font-size: smaller; text-decoration: underline; padding: 0;"
+            )
+          )
+        )
       ),
-
-      column(width = 6,
-             box(title = "Results", status = "info", solidHeader = FALSE, width = 12, height = 600, maximizable = T,
-                 bs4Dash::tabsetPanel(
-                   tabPanel("Predicted Pheno Table", DTOutput(ns("pred_trait_table")), style = "overflow-y: auto; height: 500px"),
-                   tabPanel("EBVs Table", DTOutput(ns("pred_gebvs_table2")),style = "overflow-y: auto; height: 500px")
-
-                 )
-             )
+      column(
+        width = 6,
+        box(
+          title = "Results", status = "info", solidHeader = FALSE, width = 12, height = 600, maximizable = T,
+          bs4Dash::tabsetPanel(
+            tabPanel("Predicted Pheno Table", DTOutput(ns("pred_trait_table")), style = "overflow-y: auto; height: 500px"),
+            tabPanel("EBVs Table", DTOutput(ns("pred_gebvs_table2")), style = "overflow-y: auto; height: 500px")
+          )
+        )
       ),
-
-      column(width = 3,
-             valueBoxOutput("shared_snps", width = NULL),
-             box(title = "Status", width = 12, collapsible = TRUE, status = "info",
-                 progressBar(id = ns("pb_gp"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
-             ),
-             box(title = "Plot Controls", status = "warning", solidHeader = TRUE, collapsible = TRUE, width = 12,
-                 div(style="display:inline-block; float:left",dropdownButton(
-                   tags$h3("Save Files"),
-                   fluidRow(
-                     downloadButton(ns("download_pred_results_file"), "Save Files")),
-                   circle = FALSE,
-                   status = "danger",
-                   icon = icon("floppy-disk"), width = "300px", label = "Save",
-                   tooltip = tooltipOptions(title = "Click to see inputs!")
-                 ))
-             )
-
+      column(
+        width = 3,
+        valueBoxOutput("shared_snps", width = NULL),
+        box(
+          title = "Status", width = 12, collapsible = TRUE, status = "info",
+          progressBar(id = ns("pb_gp"), value = 0, status = "info", display_pct = TRUE, striped = TRUE, title = " ")
+        ),
+        box(
+          title = "Plot Controls", status = "warning", solidHeader = TRUE, collapsible = TRUE, width = 12,
+          div(style = "display:inline-block; float:left", dropdownButton(
+            tags$h3("Save Files"),
+            fluidRow(
+              downloadButton(ns("download_pred_results_file"), "Save Files")
+            ),
+            circle = FALSE,
+            status = "danger",
+            icon = icon("floppy-disk"), width = "300px", label = "Save",
+            tooltip = tooltipOptions(title = "Click to see inputs!")
+          ))
+        )
       )
-
     )
-
   )
 }
 
 #' GS Server Functions
 #'
 #' @importFrom vcfR read.vcfR extract.gt
-#' @importFrom rrBLUP mixed.solve A.mat
+#' @importFrom rrBLUP mixed.solve
+#' @importFrom AGHmatrix Gmatrix
 #' @importFrom stats cor
 #' @importFrom shinyalert shinyalert
 #' @import dplyr
@@ -135,80 +138,93 @@ mod_GS_ui <- function(id){
 #' @import tidyr
 #' @importFrom DT renderDT
 #' @noRd
-mod_GS_server <- function(input, output, session, parent_session){
-
+mod_GS_server <- function(input, output, session, parent_session) {
   ns <- session$ns
-
+  
   # Help links
   observeEvent(input$goPar, {
     # change to help tab
-    updatebs4TabItems(session = parent_session, inputId = "MainMenu",
-                      selected = "help")
-
+    updatebs4TabItems(
+      session = parent_session, inputId = "MainMenu",
+      selected = "help"
+    )
+    
     # select specific tab
-    updateTabsetPanel(session = parent_session, inputId = "Genomic_Prediction_tabset",
-                      selected = "Genomic_Prediction_par")
+    updateTabsetPanel(
+      session = parent_session, inputId = "Genomic_Prediction_tabset",
+      selected = "Genomic_Prediction_par"
+    )
     # expand specific box
     updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
   })
-
+  
   observeEvent(input$goRes, {
     # change to help tab
-    updatebs4TabItems(session = parent_session, inputId = "MainMenu",
-                      selected = "help")
-
+    updatebs4TabItems(
+      session = parent_session, inputId = "MainMenu",
+      selected = "help"
+    )
+    
     # select specific tab
-    updateTabsetPanel(session = parent_session, inputId = "Genomic_Prediction_tabset",
-                      selected = "Genomic_Prediction_results")
+    updateTabsetPanel(
+      session = parent_session, inputId = "Genomic_Prediction_tabset",
+      selected = "Genomic_Prediction_results"
+    )
     # expand specific box
     updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
   })
-
+  
   observeEvent(input$goCite, {
     # change to help tab
-    updatebs4TabItems(session = parent_session, inputId = "MainMenu",
-                      selected = "help")
-
+    updatebs4TabItems(
+      session = parent_session, inputId = "MainMenu",
+      selected = "help"
+    )
+    
     # select specific tab
-    updateTabsetPanel(session = parent_session, inputId = "Genomic_Prediction_tabset",
-                      selected = "Genomic_Prediction_cite")
+    updateTabsetPanel(
+      session = parent_session, inputId = "Genomic_Prediction_tabset",
+      selected = "Genomic_Prediction_cite"
+    )
     # expand specific box
     updateBox(id = "Genomic_Prediction_box", action = "toggle", session = parent_session)
   })
-
-  #Default model choices
+  
+  # Default model choices
   advanced_options_pred <- reactiveValues(
     pred_model = "GBLUP",
     pred_matrix = "Gmatrix",
     pred_est_file = NULL,
     ped_file = NULL
   )
-
-  pred_outputs <- reactiveValues(corr_output = NULL,
-                                 comb_output = NULL,
-                                 all_GEBVs = NULL,
-                                 avg_GEBVs = NULL)
-
-  #List the ped file name if previously uploaded
+  
+  pred_outputs <- reactiveValues(
+    corr_output = NULL,
+    comb_output = NULL,
+    all_GEBVs = NULL,
+    avg_GEBVs = NULL
+  )
+  
+  # List the ped file name if previously uploaded
   output$uploaded_file_name <- renderText({
     if (!is.null(advanced_options_pred$ped_file)) {
       paste("Previously uploaded file:", advanced_options_pred$ped_file$name)
     } else {
-      ""  # Return an empty string if no file has been uploaded
+      "" # Return an empty string if no file has been uploaded
     }
   })
-
+  
   output$uploaded_file_name_pred <- renderText({
     if (!is.null(advanced_options_pred$pred_est_file)) {
       paste("Previously uploaded file:", advanced_options_pred$pred_est_file$name)
     } else {
-      ""  # Return an empty string if no file has been uploaded
+      "" # Return an empty string if no file has been uploaded
     }
   })
   
   ## Trait file modal window
   
-  #Default choices
+  # Default choices
   trait_options <- reactiveValues(
     missing_data = "NA",
     custom_missing = NULL,
@@ -216,37 +232,39 @@ mod_GS_server <- function(input, output, session, parent_session){
     file_type = NULL
   )
   
-  #UI popup window for input
+  # UI popup window for input
   observeEvent(input$pred_trait_file, {
     req(input$pred_trait_file)
-    #Get the column names of the csv file
-    info_df <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrows=2)
-    info_df[,1] <- as.character(info_df[,1]) #Makes sure that the sample names are characters instead of numeric
+    # Get the column names of the csv file
+    info_df <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrows = 2)
+    info_df[, 1] <- as.character(info_df[, 1]) # Makes sure that the sample names are characters instead of numeric
     
     # Read first 5 rows for preview
-    preview_data <- tryCatch({
-      head(read.csv(input$pred_trait_file$datapath, nrows = 5, na.strings=trait_options$missing_data),5)
-    }, error = function(e) {
-      NULL
-    })
+    preview_data <- tryCatch(
+      {
+        head(read.csv(input$pred_trait_file$datapath, nrows = 5, na.strings = trait_options$missing_data), 5)
+      },
+      error = function(e) {
+        NULL
+      }
+    )
     
     showModal(modalDialog(
       title = "Trait File Options",
-      size= "l",
-      
+      size = "l",
       selectInput(
-        inputId = ns('missing_data'),
-        label = 'Missing Data Value',
-        choices = c("NA",".","-99","(blank)","Custom"),
-        selected = trait_options$missing_data  # Initialize with stored value
+        inputId = ns("missing_data"),
+        label = "Missing Data Value",
+        choices = c("NA", ".", "-99", "(blank)", "Custom"),
+        selected = trait_options$missing_data # Initialize with stored value
       ),
       conditionalPanel(
         condition = "input.missing_data == 'Custom'", ns = ns,
         div(
           textInput(
-            inputId = ns('custom_missing'),
-            label = 'Custom Missing Value',
-            value = trait_options$custom_missing  # Initialize with stored value
+            inputId = ns("custom_missing"),
+            label = "Custom Missing Value",
+            value = trait_options$custom_missing # Initialize with stored value
           )
         ),
         div(
@@ -256,11 +274,10 @@ mod_GS_server <- function(input, output, session, parent_session){
         )
       ),
       selectInput(
-        inputId = ns('sample_column'),
-        label = 'Sample ID Column',
+        inputId = ns("sample_column"),
+        label = "Sample ID Column",
         choices = colnames(info_df)
       ),
-      
       if (!is.null(preview_data)) {
         div(
           h4(
@@ -280,7 +297,6 @@ mod_GS_server <- function(input, output, session, parent_session){
           p("Could not load file preview.")
         )
       },
-      
       footer = tagList(
         actionButton(ns("save_trait_options"), "Save")
       )
@@ -291,7 +307,6 @@ mod_GS_server <- function(input, output, session, parent_session){
       req(preview_data)
       preview_data
     })
-    
   })
   
   output$custom_missing_msg <- renderText({
@@ -303,12 +318,12 @@ mod_GS_server <- function(input, output, session, parent_session){
   })
   
   
-  #Close popup window when user "saves options"
+  # Close popup window when user "saves options"
   observeEvent(input$save_trait_options, {
     trait_options$missing_data <- input$missing_data
     trait_options$custom_missing <- input$custom_missing
     trait_options$sample_column <- input$sample_column
-    #trait_options$file_type
+    # trait_options$file_type
     # Save other inputs as needed
     
     if (input$missing_data == "Custom" && nchar(input$custom_missing) == 0) {
@@ -321,38 +336,38 @@ mod_GS_server <- function(input, output, session, parent_session){
       return() # Stop further execution and keep the modal open
     }
     
-    removeModal()  # Close the modal after saving
+    removeModal() # Close the modal after saving
   })
-
-  #UI popup window for input
+  
+  # UI popup window for input
   observeEvent(input$advanced_options_pred, {
     showModal(modalDialog(
       title = "Advanced Options (beta)",
       selectInput(
-        inputId = ns('pred_model'),
-        label = 'Method Choice',
+        inputId = ns("pred_model"),
+        label = "Method Choice",
         choices = c("GBLUP"),
-        selected = advanced_options_pred$pred_model  # Initialize with stored value
+        selected = advanced_options_pred$pred_model # Initialize with stored value
       ),
       conditionalPanel(
         condition = "input.pred_model == 'GBLUP'", ns = ns,
         div(
           selectInput(
-            inputId = ns('pred_matrix'),
-            label = 'Relationship Matrix Choice',
-            #choices = c("Gmatrix", "Amatrix", "Hmatrix"),
+            inputId = ns("pred_matrix"),
+            label = "Relationship Matrix Choice",
+            # choices = c("Gmatrix", "Amatrix", "Hmatrix"),
             choices = c("Gmatrix"),
-            selected = advanced_options_pred$pred_matrix  # Initialize with stored value
+            selected = advanced_options_pred$pred_matrix # Initialize with stored value
           )
         )
       ),
       conditionalPanel(
         condition = "input.pred_matrix != 'Amatrix'", ns = ns,
         div(
-          fileInput(ns("pred_est_file"), "Choose Prediction VCF", accept = c(".vcf",".gz")),
+          fileInput(ns("pred_est_file"), "Choose Prediction VCF", accept = c(".vcf", ".gz")),
           conditionalPanel(
             condition = "output.uploaded_file_name_pred !== ''", # Show only if there's content
-            textOutput(ns("uploaded_file_name_pred"))  # Display the uploaded file name
+            textOutput(ns("uploaded_file_name_pred")) # Display the uploaded file name
           )
         )
       ),
@@ -362,7 +377,7 @@ mod_GS_server <- function(input, output, session, parent_session){
           fileInput(ns("ped_file"), "Choose Pedigree File", accept = ".csv"),
           conditionalPanel(
             condition = "output.uploaded_file_name !== ''", # Show only if there's content
-            textOutput(ns("uploaded_file_name"))  # Display the uploaded file name
+            textOutput(ns("uploaded_file_name")) # Display the uploaded file name
           )
         )
       ),
@@ -372,28 +387,28 @@ mod_GS_server <- function(input, output, session, parent_session){
       )
     ))
   })
-
-
-
-  #Close popup window when user "saves options"
+  
+  
+  
+  # Close popup window when user "saves options"
   observeEvent(input$save_advanced_options_pred, {
     advanced_options_pred$pred_model <- input$pred_model
     advanced_options_pred$pred_matrix <- input$pred_matrix
     advanced_options_pred$pred_est_file <- input$pred_est_file
     advanced_options_pred$ped_file <- input$ped_file
     # Save other inputs as needed
-
-    removeModal()  # Close the modal after saving
+    
+    removeModal() # Close the modal after saving
   })
-
-  ###Genomic Prediction
-  #This tab involved 3 observeEvents
-  #1) to get the traits listed in the phenotype file
-  #2) to input and validate the input files
-  #3) to perform the genomic prediction
-
-
-  #1) Get traits
+  
+  ### Genomic Prediction
+  # This tab involved 3 observeEvents
+  # 1) to get the traits listed in the phenotype file
+  # 2) to input and validate the input files
+  # 3) to perform the genomic prediction
+  
+  
+  # 1) Get traits
   observeEvent(input$save_trait_options, {
     info_df2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, nrow = 0)
     sample_col_name <- input$sample_column
@@ -401,12 +416,12 @@ mod_GS_server <- function(input, output, session, parent_session){
     updateVirtualSelect("pred_fixed_info2", choices = trait_var2, session = session)
     updateVirtualSelect("pred_trait_info2", choices = trait_var2, session = session)
   })
-
+  
   observeEvent(input$pred_fixed_info2, {
     updateVirtualSelect("pred_fixed_cat2", choices = input$pred_fixed_info2, session = session)
   })
-
-  #2) Error check for prediction and save input files
+  
+  # 2) Error check for prediction and save input files
   continue_prediction2 <- reactiveVal(NULL)
   pred_inputs2 <- reactiveValues(
     pheno_input = NULL,
@@ -417,7 +432,7 @@ mod_GS_server <- function(input, output, session, parent_session){
     pred_geno_pheno = NULL,
     matrix = NULL
   )
-
+  
   pred_outputs2 <- reactiveValues(
     corr_output = NULL,
     box_plot = NULL,
@@ -428,8 +443,8 @@ mod_GS_server <- function(input, output, session, parent_session){
     colors = NULL,
     trait_output = NULL
   )
-
-  #Reactive boxes
+  
+  # Reactive boxes
   output$shared_snps <- renderValueBox({
     valueBox(
       value = pred_inputs2$shared_snps,
@@ -438,12 +453,12 @@ mod_GS_server <- function(input, output, session, parent_session){
       color = "info"
     )
   })
-
+  
   observeEvent(input$prediction_est_start, {
-    #req(pred_inputs$pheno_input, pred_inputs$geno_input)
-
+    # req(pred_inputs$pheno_input, pred_inputs$geno_input)
+    
     toggleClass(id = "pred_est_ploidy", class = "borderred", condition = (is.na(input$pred_est_ploidy) | is.null(input$pred_est_ploidy)))
-
+    
     if (is.null(input$pred_known_file$datapath) | is.null(input$pred_trait_file$datapath)) {
       shinyalert(
         title = "Missing input!",
@@ -461,11 +476,11 @@ mod_GS_server <- function(input, output, session, parent_session){
       )
     }
     req(input$pred_known_file$datapath, input$pred_trait_file$datapath, input$pred_est_ploidy)
-
-    #Status
+    
+    # Status
     updateProgressBar(session = session, id = "pb_gp", value = 5, title = "Checking input files")
-
-    #Variables
+    
+    # Variables
     ploidy <- as.numeric(input$pred_est_ploidy)
     train_geno_path <- input$pred_known_file$datapath
     est_geno_path <- input$pred_est_file$datapath
@@ -473,7 +488,7 @@ mod_GS_server <- function(input, output, session, parent_session){
     
     # Phenotypic variables
     if (trait_options$missing_data == "(blank)") {
-      pheno2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, na.strings="")
+      pheno2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = "")
     } else if (trait_options$missing_data == "Custom") {
       pheno2 <- read.csv(input$pred_trait_file$datapath, header = TRUE, check.names = FALSE, na.strings = trait_options$custom_missing)
     } else {
@@ -485,32 +500,35 @@ mod_GS_server <- function(input, output, session, parent_session){
     pheno2 <- pheno2[, c(sample_col_name, setdiff(names(pheno2), sample_col_name))]
     
     # Add row names and catch errors
-    tryCatch({
-      row.names(pheno2) <- pheno2[, 1]
-    }, warning = function(w) {
-      showNotification(
-        paste("Warning: Duplicate row names detected. Please ensure the sample ID column has unique values.", w$message),
-        type = "warning",
-        duration = NULL
-      )
-      return(NULL) # Return NULL to prevent further processing
-    }, error = function(e) {
-      showNotification(
-        paste("Error: An error occurred while assigning row names. Please check your sample ID column.", e$message),
-        type = "error",
-        duration = NULL
-      )
-      return(NULL) # Return NULL to prevent further processing
-    })
+    tryCatch(
+      {
+        row.names(pheno2) <- pheno2[, 1]
+      },
+      warning = function(w) {
+        showNotification(
+          paste("Warning: Duplicate row names detected. Please ensure the sample ID column has unique values.", w$message),
+          type = "warning",
+          duration = NULL
+        )
+        return(NULL) # Return NULL to prevent further processing
+      },
+      error = function(e) {
+        showNotification(
+          paste("Error: An error occurred while assigning row names. Please check your sample ID column.", e$message),
+          type = "error",
+          duration = NULL
+        )
+        return(NULL) # Return NULL to prevent further processing
+      }
+    )
     
     # Assigning the IDs based on user input for column 1
     ids_pheno <- pheno2[, 1]
     
-
-
-    #Make sure at least one trait was input
+    
+    
+    # Make sure at least one trait was input
     if (length(traits) == 0) {
-
       # If condition is met, show notification toast
       shinyalert(
         title = "Missing input!",
@@ -527,30 +545,27 @@ mod_GS_server <- function(input, output, session, parent_session){
         imageUrl = "",
         animation = TRUE,
       )
-
-
+      
+      
       # Stop the observeEvent gracefully
       return()
-
     }
-
-
-    #Getting genotype matrix
-
-    #Geno file path
+    
+    
+    # Getting genotype matrix
+    
+    # Geno file path
     file_path <- train_geno_path
-
-    #Geno.file conversion if needed
+    
+    # Geno.file conversion if needed
     if (grepl("\\.csv$", file_path)) {
       train_geno <- read.csv(train_geno_path, header = TRUE, row.names = 1, check.names = FALSE)
       est_geno <- read.csv(est_geno_path, header = TRUE, row.names = 1, check.names = FALSE)
-
-      #Save number of SNPs
-      #pred_inputs$pred_snps <- nrow(geno)
-
+      
+      # Save number of SNPs
+      # pred_inputs$pred_snps <- nrow(geno)
     } else if (grepl("\\.vcf$", file_path) || grepl("\\.gz$", file_path)) {
-
-      #Function to convert GT to dosage calls (add to BIGr)
+      # Function to convert GT to dosage calls (add to BIGr)
       convert_to_dosage <- function(gt) {
         # Split the genotype string
         alleles <- strsplit(gt, "[|/]")
@@ -563,19 +578,55 @@ mod_GS_server <- function(input, output, session, parent_session){
           }
         })
       }
-
-      #Convert VCF file if submitted
+      
+      #### VCF sanity check
+      checks <- vcf_sanity_check(train_geno_path)
+      
+      error_if_false <- c(
+        "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+        "samples", "VCF_compressed"
+      )
+      
+      error_if_true <- c(
+        "multiallelics", "phased_GT",  "mixed_ploidies",
+        "duplicated_samples", "duplicated_markers"
+      )
+      
+      warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")
+
+      checks_result <- vcf_sanity_messages(checks, 
+                                           error_if_false, 
+                                           error_if_true, 
+                                           warning_if_false = warning_if_false, 
+                                           warning_if_true = NULL,
+                                           input_ploidy = as.numeric(input$pred_est_ploidy))
+      
+      if(checks_result) return() # Stop the analysis if checks fail
+      #########
+      
+      # Convert VCF file if submitted
       train_vcf <- vcfR::read.vcfR(train_geno_path, verbose = FALSE)
-      if (is.null(est_geno_path) || is.na(est_geno_path)){
+      if (is.null(est_geno_path) || is.na(est_geno_path)) {
         est_vcf <- NULL
       } else {
+        ## Check VCF
+        checks <- vcf_sanity_check(est_geno_path)
+        checks_result <- vcf_sanity_messages(checks, 
+                                             error_if_false, 
+                                             error_if_true, 
+                                             warning_if_false = warning_if_false, 
+                                             warning_if_true = NULL,
+                                             as.numeric(input$pred_est_ploidy))
+        
+        if(checks_result) return() # Stop the analysis if checks fail
+        
         est_vcf <- vcfR::read.vcfR(est_geno_path, verbose = FALSE)
       }
-
-      #Get number of SNPs
-      #pred_inputs$pred_snps <- nrow(vcf)
-
-      #Extract GT
+      
+      # Get number of SNPs
+      # pred_inputs$pred_snps <- nrow(vcf)
+      
+      # Extract GT
       if (is.null(est_vcf)) {
         est_geno <- NULL
       } else {
@@ -588,9 +639,7 @@ mod_GS_server <- function(input, output, session, parent_session){
       class(train_geno) <- "numeric"
       rm(train_vcf)
       rm(est_vcf)
-
     } else {
-
       # If condition is met, show notification toast
       shinyalert(
         title = "File Error",
@@ -607,17 +656,17 @@ mod_GS_server <- function(input, output, session, parent_session){
         imageUrl = "",
         animation = TRUE,
       )
-
-      #Stop the analysis
+      
+      # Stop the analysis
       return()
     }
-
-    #Check that the ploidy entered is correct
+    
+    # Check that the ploidy entered is correct
     if (ploidy != max(train_geno, na.rm = TRUE)) {
       # If condition is met, show notification toast
       shinyalert(
         title = "Ploidy Mismatch",
-        text = paste0("The maximum value in the genotype file (",max(train_geno, na.rm = TRUE),") does not equal the ploidy entered"),
+        text = paste0("The maximum value in the genotype file (", max(train_geno, na.rm = TRUE), ") does not equal the ploidy entered"),
         size = "xs",
         closeOnEsc = FALSE,
         closeOnClickOutside = FALSE,
@@ -627,45 +676,48 @@ mod_GS_server <- function(input, output, session, parent_session){
         confirmButtonText = "OK",
         confirmButtonCol = "#004192",
         showCancelButton = FALSE,
-        #closeOnConfirm = TRUE,
-        #closeOnCancel = TRUE,
+        # closeOnConfirm = TRUE,
+        # closeOnCancel = TRUE,
         imageUrl = "",
         animation = TRUE
       )
-
-
+      
+      
       # Stop the observeEvent gracefully
-      #return()
+      # return()
     }
-
-
+    
+    
     # Function to convert genotype matrix according to ploidy
-    convert_genotype <- function(genotype_matrix, ploidy) {
-      normalized_matrix <- 2 * (genotype_matrix / ploidy) - 1
-      return(normalized_matrix)
-    }
+
+    #convert_genotype <- function(genotype_matrix, ploidy) {
+    #  normalized_matrix <- 2 * (genotype_matrix / ploidy) - 1
+    #  return(normalized_matrix)
+    #}
 
     #tranforming genotypes
-    train_geno_adj_init <- convert_genotype(train_geno, as.numeric(ploidy))
+    #train_geno_adj_init <- convert_genotype(train_geno, as.numeric(ploidy))
+    train_geno_adj_init <- train_geno
+
     if (is.null(est_geno)) {
       est_geno_adj_init <- NULL
     } else {
-      est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy))
+      #est_geno_adj_init <- convert_genotype(est_geno, as.numeric(ploidy))
+      est_geno_adj_init <- est_geno
     }
-
-    #Make sure the trait file and genotype file are in the same order
+    
+    # Make sure the trait file and genotype file are in the same order
     # Column names for geno (assuming these are the individual IDs)
     colnames_geno <- colnames(train_geno)
     # Assuming the first column in Pheno contains the matching IDs
     ids_pheno <- pheno2[, 1]
     # Find common identifiers
     common_ids <- intersect(colnames_geno, ids_pheno)
-    #Get number of id
+    # Get number of id
     pred_inputs2$pred_geno_pheno <- length(common_ids)
-
-    #Throw an error if there are less matching samples in the phenotype file than the genotype file
+    
+    # Throw an error if there are less matching samples in the phenotype file than the genotype file
     if (length(common_ids) == 0) {
-
       # If condition is met, show notification toast
       shinyalert(
         title = "Data Mismatch",
@@ -682,16 +734,15 @@ mod_GS_server <- function(input, output, session, parent_session){
         imageUrl = "",
         animation = TRUE,
       )
-
-
+      
+      
       # Stop the observeEvent gracefully
       return()
-
     } else if (length(common_ids) < length(colnames_geno)) {
       # If condition is met, show notification toast
       shinyalert(
         title = "Data Check",
-        text = paste0((length(common_ids))," samples in VCF File have trait information"),
+        text = paste0((length(common_ids)), " samples in VCF File have trait information"),
         size = "xs",
         closeOnEsc = FALSE,
         closeOnClickOutside = FALSE,
@@ -701,21 +752,21 @@ mod_GS_server <- function(input, output, session, parent_session){
         confirmButtonText = "OK",
         confirmButtonCol = "#004192",
         showCancelButton = FALSE,
-        #closeOnConfirm = TRUE,
-        #closeOnCancel = TRUE,
+        # closeOnConfirm = TRUE,
+        # closeOnCancel = TRUE,
         imageUrl = "",
         animation = TRUE
       )
-
-
+      
+      
       # Stop the observeEvent gracefully
-      #return()
+      # return()
     }
-
-
-
-
-    #Final check before performing analyses
+    
+    
+    
+    
+    # Final check before performing analyses
     shinyalert(
       title = "Ready?",
       text = "Inputs have been checked",
@@ -728,8 +779,8 @@ mod_GS_server <- function(input, output, session, parent_session){
       confirmButtonText = "Proceed",
       confirmButtonCol = "#004192",
       showCancelButton = TRUE,
-      #closeOnConfirm = TRUE,
-      #closeOnCancel = TRUE,
+      # closeOnConfirm = TRUE,
+      # closeOnCancel = TRUE,
       imageUrl = "",
       animation = TRUE,
       callbackR = function(value) {
@@ -742,52 +793,63 @@ mod_GS_server <- function(input, output, session, parent_session){
         }
       }
     )
-
-    #Status
+    
+    # Status
     updateProgressBar(session = session, id = "pb_gp", value = 40, title = "Generating Matrices")
-
-    #Create relationship matrix depending on the input VCF files
+    
+    # Create relationship matrix depending on the input VCF files
     if (is.null(advanced_options_pred$pred_est_file)) {
-      #Subset phenotype file by common IDs
+      # Subset phenotype file by common IDs
       pheno2 <- pheno2[common_ids, ]
-
+      
       #Matrix
-      Kin_mat <- A.mat(t(train_geno_adj_init), max.missing=NULL,impute.method="mean",return.imputed=FALSE)
+      #Kin_mat <- A.mat(t(train_geno_adj_init), max.missing=NULL,impute.method="mean",return.imputed=FALSE)
+      Kin_mat <- Gmatrix(t(train_geno_adj_init),
+                          method = "VanRaden",
+                          ploidy = ploidy,
+                          ploidy.correction=TRUE,
+                          ratio = FALSE,
+                          missingValue = "NA")
 
     } else{
       #Subset phenotype file and training file by common IDs
       pheno2 <- pheno2[common_ids, ]
-
-      #Match training and testing genotype file SNPs
+      
+      # Match training and testing genotype file SNPs
       common_markers <- intersect(rownames(train_geno_adj_init), rownames(est_geno_adj_init))
-      #first remove samples from training genotype matrix that are not in the phenotype file
+      # first remove samples from training genotype matrix that are not in the phenotype file
       train_geno_adj <- train_geno_adj_init[common_markers, common_ids]
-      #Merge the training and prediction genotype matrices
+      # Merge the training and prediction genotype matrices
       est_geno_adj_init <- est_geno_adj_init[common_markers, ]
       train_pred_geno <- cbind(train_geno_adj, est_geno_adj_init)
 
       #Matrix
-      Kin_mat <- A.mat(t(train_pred_geno), max.missing=NULL,impute.method="mean",return.imputed=FALSE)
+      #Kin_mat <- A.mat(t(train_pred_geno), max.missing=NULL,impute.method="mean",return.imputed=FALSE)
+      Kin_mat <- Gmatrix(t(train_pred_geno),
+                         method = "VanRaden",
+                         ploidy = ploidy,
+                         ploidy.correction=TRUE,
+                         ratio = FALSE,
+                         missingValue = "NA")
 
     }
-
-    #Save to reactive values
-    #pred_inputs2$shared_snps <- length(common_markers)
+    
+    # Save to reactive values
+    # pred_inputs2$shared_snps <- length(common_markers)
     pred_inputs2$matrix <- Kin_mat
     pred_inputs2$pheno_input <- pheno2
   })
-
-  #3) Analysis only proceeds once continue_prediction is converted to TRUE
+  
+  # 3) Analysis only proceeds once continue_prediction is converted to TRUE
   observe({
-
-    req(continue_prediction2(),pred_inputs2$pheno_input, pred_inputs2$matrix)
-
+    req(continue_prediction2(), pred_inputs2$pheno_input, pred_inputs2$matrix)
+    
     # Stop analysis if cancel was selected
     if (isFALSE(continue_prediction2())) {
       return()
     }
-
-    #Variables
+    
+    # Variables
     ploidy <- as.numeric(input$pred_est_ploidy)
     gmat <- pred_inputs2$matrix
     pheno2 <- pred_inputs2$pheno_input
@@ -806,89 +868,101 @@ mod_GS_server <- function(input, output, session, parent_session){
     } else {
       fixed_cov <- setdiff(input$pred_fixed_info2, input$pred_fixed_cat2)
     }
-    #fixed_cov <- setdiff(input$pred_fixed_info2, input$pred_fixed_cat2)
+    # fixed_cov <- setdiff(input$pred_fixed_info2, input$pred_fixed_cat2)
     cores <- 1
     total_population <- ncol(gmat)
-    #train_size <- floor(percentage / 100 * total_population)
-
-    #Status
+    # train_size <- floor(percentage / 100 * total_population)
+    
+    # Status
     updateProgressBar(session = session, id = "pb_gp", value = 90, title = "Generating Results")
-
-    #initialize dataframe
+    
+    # initialize dataframe
     results_GEBVs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1)
     results_TRAITs <- matrix(nrow = ncol(gmat), ncol = length(traits) + 1)
-    colnames(results_TRAITs) <- c("Sample",paste0(traits))  # Set the column names to be the traits
-    colnames(results_GEBVs) <- c("Sample",paste0(traits))  # Set the column names to be the traits
-
-    #GBLUP for each trait
+    colnames(results_TRAITs) <- c("Sample", paste0(traits)) # Set the column names to be the traits
+    colnames(results_GEBVs) <- c("Sample", paste0(traits)) # Set the column names to be the traits
+    
+    # GBLUP for each trait
     for (trait_idx in 1:length(traits)) {
-      traitpred <- kin.blup(data = pheno2,
-                            geno = colnames(pheno2)[1],
-                            pheno = traits[trait_idx],
-                            fixed = fixed_cat,
-                            covariate = fixed_cov,
-                            K=gmat,
-                            n.core = cores)
-
-      results_GEBVs[, (trait_idx+1)] <- traitpred$g
-      results_TRAITs[, (trait_idx+1)] <- traitpred$pred
+      traitpred <- kin.blup(
+        data = pheno2,
+        geno = colnames(pheno2)[1],
+        pheno = traits[trait_idx],
+        fixed = fixed_cat,
+        covariate = fixed_cov,
+        K = gmat,
+        n.core = cores
+      )
+      
+      results_GEBVs[, (trait_idx + 1)] <- traitpred$g
+      results_TRAITs[, (trait_idx + 1)] <- traitpred$pred
     }
-    #Organize dataframes
-    results_GEBVs[,1] <- row.names(data.frame(traitpred$g))
-    results_TRAITs[,1] <- row.names(data.frame(traitpred$pred))
-
-    #Label the samples that already had phenotype information
+    # Organize dataframes
+    results_GEBVs[, 1] <- row.names(data.frame(traitpred$g))
+    results_TRAITs[, 1] <- row.names(data.frame(traitpred$pred))
+    
+    # Label the samples that already had phenotype information
     results_GEBVs <- data.frame(results_GEBVs)
     results_TRAITs <- data.frame(results_TRAITs)
     exists_in_df <- results_GEBVs[[1]] %in% pheno2[[1]]
     results_GEBVs <- cbind(results_GEBVs[1], "w/Pheno" = exists_in_df, results_GEBVs[-1])
     results_TRAITs <- cbind(results_TRAITs[1], "w/Pheno" = exists_in_df, results_TRAITs[-1])
-
-    #Status
+    
+    # Status
     updateProgressBar(session = session, id = "pb_gp", value = 100, title = "Finished!")
-
-    ##Make output tables depending on 1 or 2 VCF/pedigree files used.
-    #GEBVs
+    
+    ## Make output tables depending on 1 or 2 VCF/pedigree files used.
+    # GEBVs
     if (!is.null(advanced_options_pred$pred_est_file)) {
       # Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column
       pred_outputs2$all_GEBVs <- results_GEBVs[results_GEBVs$`w/Pheno` == FALSE, !names(results_GEBVs) %in% "w/Pheno"]
-    } else{
+    } else {
       pred_outputs2$all_GEBVs <- results_GEBVs
     }
-
-    #BLUPs of genetic values
+    
+    # BLUPs of genetic values
     if (!is.null(advanced_options_pred$pred_est_file)) {
       # Subset rows where 'w/Pheno' is FALSE and drop the 'w/Pheno' column
       pred_outputs2$trait_output <- results_TRAITs[results_TRAITs$`w/Pheno` == FALSE, !names(results_TRAITs) %in% "w/Pheno"]
-    } else{
+    } else {
       pred_outputs2$trait_output <- results_TRAITs
     }
-
-    #End the event
+    
+    # End the event
     continue_prediction2(NULL)
   })
-
-  #Output the prediction tables
+  
+  # Output the prediction tables
   all_GEBVs <- reactive({
     validate(
       need(!is.null(pred_outputs2$all_GEBVs), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
     )
     pred_outputs2$all_GEBVs
   })
-
-  #GEBVs from all iterations/folds
-  output$pred_gebvs_table2 <- renderDT({all_GEBVs()}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
-
+  
+  # GEBVs from all iterations/folds
+  output$pred_gebvs_table2 <- renderDT(
+    {
+      all_GEBVs()
+    },
+    options = list(scrollX = TRUE, autoWidth = FALSE, pageLength = 5)
+  )
+  
   trait_output <- reactive({
     validate(
       need(!is.null(pred_outputs2$trait_output), "Upload the input files, set the parameters and click 'run analysis' to access results in this session.")
     )
     pred_outputs2$trait_output
   })
-
-  #GEBVs from all iterations/folds
-  output$pred_trait_table <- renderDT({trait_output()}, options = list(scrollX = TRUE,autoWidth = FALSE, pageLength = 5))
-
+  
+  # GEBVs from all iterations/folds
+  output$pred_trait_table <- renderDT(
+    {
+      trait_output()
+    },
+    options = list(scrollX = TRUE, autoWidth = FALSE, pageLength = 5)
+  )
+  
   output$download_vcft <- downloadHandler(
     filename = function() {
       paste0("BIGapp_Training_VCF_Example_file.vcf.gz")
@@ -896,8 +970,9 @@ mod_GS_server <- function(input, output, session, parent_session){
     content = function(file) {
       ex <- system.file("test-dose.vcf.gz", package = "BIGapp")
       file.copy(ex, file)
-    })
-
+    }
+  )
+  
   output$download_vcfp <- downloadHandler(
     filename = function() {
       paste0("BIGapp_Predict_VCF_Example_file.vcf")
@@ -905,8 +980,9 @@ mod_GS_server <- function(input, output, session, parent_session){
     content = function(file) {
       ex <- system.file("test-dose-use-for-prediction.vcf", package = "BIGapp")
       file.copy(ex, file)
-    })
-
+    }
+  )
+  
   output$download_pheno <- downloadHandler(
     filename = function() {
       paste0("BIGapp_passport_Example_file.csv")
@@ -914,9 +990,10 @@ mod_GS_server <- function(input, output, session, parent_session){
     content = function(file) {
       ex <- system.file("iris_passport_file.csv", package = "BIGapp")
       file.copy(ex, file)
-    })
-
-  #Download files for GP
+    }
+  )
+  
+  # Download files for GP
   output$download_pred_results_file <- downloadHandler(
     filename = function() {
       paste0("Prediction-results-", Sys.Date(), ".zip")
@@ -925,34 +1002,34 @@ mod_GS_server <- function(input, output, session, parent_session){
       # Temporary files list
       temp_dir <- tempdir()
       temp_files <- c()
-
+      
       # Create a temporary file for data frames
       ebv_file <- file.path(temp_dir, paste0("GS-EBV-predictions-", Sys.Date(), ".csv"))
       write.csv(pred_outputs2$all_GEBVs, ebv_file, row.names = FALSE)
       temp_files <- c(temp_files, ebv_file)
-
+      
       trait_file <- file.path(temp_dir, paste0("GS-Phenotype-predictions-", Sys.Date(), ".csv"))
       write.csv(pred_outputs2$trait_output, trait_file, row.names = FALSE)
       temp_files <- c(temp_files, trait_file)
-
+      
       # Zip files only if there's something to zip
       if (length(temp_files) > 0) {
         zip(file, files = temp_files, extras = "-j") # Using -j to junk paths
       }
-
+      
       # Optionally clean up
       file.remove(temp_files)
     }
   )
-
-  ##Summary Info
+  
+  ## Summary Info
   pred_summary_info <- function() {
     # Handle possible NULL values for inputs
     dosage_file_name <- if (!is.null(input$pred_known_file$name)) input$pred_known_file$name else "No file selected"
     est_file_name <- if (!is.null(input$pred_est_file$name)) input$pred_est_file$name else "No file selected"
     passport_file_name <- if (!is.null(input$pred_trait_file$name)) input$pred_trait_file$name else "No file selected"
     selected_ploidy <- if (!is.null(input$pred_est_ploidy)) as.character(input$pred_est_ploidy) else "Not selected"
-
+    
     # Print the summary information
     cat(
       "BIGapp Selection Summary\n",
@@ -971,8 +1048,8 @@ mod_GS_server <- function(input, output, session, parent_session){
       paste("Selected Ploidy:", selected_ploidy), "\n",
       paste("Selected Trait(s):", input$pred_trait_info2), "\n",
       paste("Selected Fixed Effects:", input$pred_fixed_info2), "\n",
-      #paste("Selected Model:", input$pred_fixed_info2), "\n",
-      #paste("Selected Matrix:", input$pred_fixed_info2), "\n",
+      # paste("Selected Model:", input$pred_fixed_info2), "\n",
+      # paste("Selected Matrix:", input$pred_fixed_info2), "\n",
       "\n",
       "### R Packages Used ###\n",
       "\n",
@@ -986,7 +1063,7 @@ mod_GS_server <- function(input, output, session, parent_session){
       sep = ""
     )
   }
-
+  
   # Popup for analysis summary
   observeEvent(input$pred_summary, {
     showModal(modalDialog(
@@ -1002,8 +1079,8 @@ mod_GS_server <- function(input, output, session, parent_session){
       )
     ))
   })
-
-
+  
+  
   # Download Summary Info
   output$download_pred_info <- downloadHandler(
     filename = function() {
diff --git a/R/mod_GSAcc.R b/R/mod_GSAcc.R
index f433ab2..8f5a788 100644
--- a/R/mod_GSAcc.R
+++ b/R/mod_GSAcc.R
@@ -429,6 +429,32 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
 
     toggleClass(id = "pred_ploidy", class = "borderred", condition = (is.na(input$pred_ploidy) | is.null(input$pred_ploidy)))
 
+    #### VCF sanity check
+    checks <- vcf_sanity_check(input$pred_file$datapath)
+    
+    error_if_false <- c(
+      "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+      "samples", "VCF_compressed"
+    )
+    
+    error_if_true <- c(
+      "multiallelics", "phased_GT",  "mixed_ploidies",
+      "duplicated_samples", "duplicated_markers"
+    )
+    
+    warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")
+    
+    checks_result <- vcf_sanity_messages(checks, 
+                                         error_if_false, 
+                                         error_if_true, 
+                                         warning_if_false = warning_if_false, 
+                                         warning_if_true = NULL,
+                                         input_ploidy = as.numeric(input$pred_ploidy))
+    
+    if(checks_result) return() # Stop the analysis if checks fail
+    #########
+    
+    
     if(is.null(advanced_options$pred_matrix)) advanced_options$pred_matrix <- "none_selected"
     if (((is.null(input$pred_file$datapath) &  advanced_options$pred_matrix != "Amatrix") |
          (is.null(advanced_options$ped_file$datapath) &  advanced_options$pred_matrix == "Amatrix")) |
@@ -521,35 +547,11 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
     #Getting genotype matrix
     #Geno.file conversion if needed
     if(!is.null(input$pred_file$datapath)){
-      geno_snps <- read_geno_file(input$pred_file$datapath, requires = "GT")
+      geno_snps <- read_geno_file(input$pred_file$datapath, requires = "GT", ploidy = as.numeric(input$pred_ploidy))
+      if(is.null(geno_snps)) return()
       geno <- geno_snps[[1]]
       values_boxes$pred_snps <- geno_snps[[2]]
 
-      #Check that the ploidy entered is correct
-      if (input$pred_ploidy != max(geno, na.rm = TRUE)) {
-        # If condition is met, show notification toast
-        shinyalert(
-          title = "Ploidy Mismatch",
-          text = paste0("The maximum value in the genotype file (",max(geno, na.rm = TRUE),") does not equal the ploidy entered"),
-          size = "xs",
-          closeOnEsc = FALSE,
-          closeOnClickOutside = FALSE,
-          html = TRUE,
-          type = "warning",
-          showConfirmButton = TRUE,
-          confirmButtonText = "OK",
-          confirmButtonCol = "#004192",
-          showCancelButton = FALSE,
-          #closeOnConfirm = TRUE,
-          #closeOnCancel = TRUE,
-          imageUrl = "",
-          animation = TRUE
-        )
-
-        # Stop the observeEvent gracefully
-        #return()
-      }
-
       #Make sure the trait file and genotype file are in the same order
       # Column names for geno (assuming these are the individual IDs)
       colnames_geno <- colnames(geno)
diff --git a/R/mod_PCA.R b/R/mod_PCA.R
index b315e76..cdacdbe 100644
--- a/R/mod_PCA.R
+++ b/R/mod_PCA.R
@@ -93,6 +93,7 @@ mod_PCA_ui <- function(id){
                    sliderInput(inputId = ns('pca_image_width'), label = 'Width', value = 10, min = 1, max = 20, step=0.5),
                    sliderInput(inputId = ns('pca_image_height'), label = 'Height', value = 6, min = 1, max = 20, step = 0.5),
                    downloadButton(ns("download_pca"), "Save Image"),
+                   downloadButton(ns("download_pcs"), "Save PCs table"),
                    circle = FALSE,
                    status = "danger",
                    icon = icon("floppy-disk"), width = "300px", label = "Save",
@@ -363,6 +364,31 @@ mod_PCA_server <- function(input, output, session, parent_session){
       genomat <- read.csv(geno, header = TRUE, row.names = 1, check.names = FALSE)
     } else{
 
+      #### VCF sanity check
+      checks <- vcf_sanity_check(geno)
+      
+      error_if_false <- c(
+        "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+        "samples", "VCF_compressed"
+      )
+      
+      error_if_true <- c(
+        "multiallelics", "phased_GT",  "mixed_ploidies",
+        "duplicated_samples", "duplicated_markers"
+      )
+      
+      warning_if_false <- c("chrom_info", "pos_info", "ref_alt", "max_markers")
+      
+      checks_result <- vcf_sanity_messages(checks, 
+                                           error_if_false, 
+                                           error_if_true, 
+                                           warning_if_false = warning_if_false, 
+                                           warning_if_true = NULL,
+                                           input_ploidy = as.numeric(ploidy))
+      
+      if(checks_result) return() # Stop the analysis if checks fail
+      #########
+      
       #Import genotype information if in VCF format
       vcf <- read.vcfR(geno, verbose = FALSE)
 
@@ -447,25 +473,6 @@ mod_PCA_server <- function(input, output, session, parent_session){
     # Print the modified dataframe
     row.names(info_df) <- info_df[,1]
 
-    # Check ploidy
-    if(input$pca_ploidy != max(genomat, na.rm = T)){
-      shinyalert(
-        title = "Wrong ploidy",
-        text = "The input ploidy does not match the maximum dosage found in the genotype file",
-        size = "s",
-        closeOnEsc = TRUE,
-        closeOnClickOutside = FALSE,
-        html = TRUE,
-        type = "error",
-        showConfirmButton = TRUE,
-        confirmButtonText = "OK",
-        confirmButtonCol = "#004192",
-        showCancelButton = FALSE,
-        animation = TRUE
-      )
-      req(input$pca_ploidy == max(genomat, na.rm = T))
-    }
-
     #Plotting
     #First build a relationship matrix using the genotype values
     G.mat.updog <- Gmatrix(t(genomat), method = "VanRaden", ploidy = as.numeric(ploidy), missingValue = "NA")
@@ -909,6 +916,15 @@ mod_PCA_server <- function(input, output, session, parent_session){
       ex <- system.file("iris_passport_file.csv", package = "BIGapp")
       file.copy(ex, file)
     })
+  
+  output$download_pcs <- downloadHandler(
+    filename = function() {
+      paste0("PCs.csv")
+    },
+    content = function(file) {
+      df <- pca_data$pc_df_pop
+      write.csv(df, file, row.names = FALSE, quote = FALSE)
+    })
 
 }
 
diff --git a/R/mod_dapc.R b/R/mod_dapc.R
index 174ecc1..921bc39 100644
--- a/R/mod_dapc.R
+++ b/R/mod_dapc.R
@@ -203,6 +203,31 @@ mod_dapc_server <- function(input, output, session, parent_session){
     ##Add in VCF with the vcfR package (input VCF, then convert to genlight using vcf2genlight function)
 
     #Import genotype information if in VCF format
+    #### VCF sanity check
+    checks <- vcf_sanity_check(geno)
+    
+    error_if_false <- c(
+      "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+      "samples"
+    )
+    
+    error_if_true <- c(
+      "multiallelics", "phased_GT",  "mixed_ploidies",
+      "duplicated_samples", "duplicated_markers"
+    )
+    
+    warning_if_false <- c("chrom_info", "pos_info", "ref_alt")
+    
+    checks_result <- vcf_sanity_messages(checks, 
+                                         error_if_false, 
+                                         error_if_true, 
+                                         warning_if_false = warning_if_false, 
+                                         warning_if_true = NULL,
+                                         input_ploidy = as.numeric(ploidy))
+    
+    if(checks_result) return() # Stop the analysis if checks fail
+    #########
+    
     vcf <- read.vcfR(geno)
 
     #Get items in FORMAT column
@@ -270,6 +295,31 @@ mod_dapc_server <- function(input, output, session, parent_session){
     selected_K <- as.numeric(input$dapc_k)
 
     #Import genotype information if in VCF format
+    #### VCF sanity check
+    checks <- vcf_sanity_check(geno)
+    
+    error_if_false <- c(
+      "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+      "samples", "VCF_compressed"
+    )
+    
+    error_if_true <- c(
+      "multiallelics", "phased_GT",  "mixed_ploidies",
+      "duplicated_samples", "duplicated_markers"
+    )
+    
+    warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")
+    
+    checks_result <- vcf_sanity_messages(checks, 
+                                         error_if_false, 
+                                         error_if_true, 
+                                         warning_if_false = warning_if_false, 
+                                         warning_if_true = NULL,
+                                         input_ploidy = as.numeric(ploidy))
+    
+    if(checks_result) return() # Stop the analysis if checks fail
+    #########
+    
     vcf <- read.vcfR(geno)
 
     #Get items in FORMAT column
@@ -349,14 +399,16 @@ mod_dapc_server <- function(input, output, session, parent_session){
                         col = my_palette,
                         pch = 20, # shapes
                         cstar = 1, # 0 or 1, arrows from center of cluster
-                        cell = 2, # size of elipse
+                        cellipse = 2, # size of elipse
                         scree.da = T, # plot da
                         scree.pca = T, # plot pca
                         posi.da = "topright",
                         posi.pca="bottomright",
                         mstree = F, # lines connecting clusters
-                        lwd = 1, lty = 2,
-                        leg = F, clab = 1) # legend and label of legend clusters. clab 0 or 1
+                        lwd = 1, 
+                        lty = 2,
+                        legeng = F, 
+                        clabel = 1) # legend and label of legend clusters. clab 0 or 1
   })
 
   output$DAPC_plot <- renderPlot({
@@ -424,14 +476,14 @@ mod_dapc_server <- function(input, output, session, parent_session){
                             col = my_palette,
                             pch = 20, # shapes
                             cstar = 1, # 0 or 1, arrows from center of cluster
-                            cell = 2, # size of elipse
+                            cellipse = 2, # size of elipse
                             scree.da = T, # plot da
                             scree.pca = T, # plot pca
                             posi.da = "topright",
                             posi.pca="bottomright",
                             mstree = F, # lines connecting clusters
                             lwd = 1, lty = 2,
-                            leg = F, clab = 1) # legend and label of legend clusters. clab 0 or 1
+                            legend = F, clabel = 1) # legend and label of legend clusters. clab 0 or 1
 
       } else if (input$dapc_figure == "BIC Plot") {
         req(dapc_items$BIC, dapc_items$bestK)
diff --git a/R/mod_diversity.R b/R/mod_diversity.R
index ddf97a8..a10c089 100644
--- a/R/mod_diversity.R
+++ b/R/mod_diversity.R
@@ -224,6 +224,31 @@ mod_diversity_server <- function(input, output, session, parent_session){
       updateProgressBar(session = session, id = "pb_diversity", value = 20, title = "Importing VCF")
 
       #Import genotype information if in VCF format
+      #### VCF sanity check
+      checks <- vcf_sanity_check(geno)
+      
+      error_if_false <- c(
+        "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+        "samples", "chrom_info", "pos_info", "VCF_compressed"
+      )
+      
+      error_if_true <- c(
+        "multiallelics", "phased_GT",  "mixed_ploidies",
+        "duplicated_samples", "duplicated_markers"
+      )
+      
+      warning_if_false <- c("ref_alt","max_markers")
+      
+      checks_result <- vcf_sanity_messages(checks, 
+                                           error_if_false, 
+                                           error_if_true, 
+                                           warning_if_false = NULL, 
+                                           warning_if_true = NULL,
+                                           input_ploidy = ploidy)
+      
+      if(checks_result) return() # Stop the analysis if checks fail
+      #########
+      
       vcf <- read.vcfR(geno, verbose = FALSE)
 
       #Save position information
diff --git a/R/mod_dosage2vcf.R b/R/mod_dosage2vcf.R
index e7fa70c..b168d4f 100644
--- a/R/mod_dosage2vcf.R
+++ b/R/mod_dosage2vcf.R
@@ -326,8 +326,8 @@ mod_dosage2vcf_server <- function(input, output, session, parent_session){
         updateProgressBar(session = session, id = "dosage2vcf_pb", value = 30, title = "Writing VCF")
         
         madc2vcf_all(madc = read_madc,
-                     botloci = botloci,
-                     hap_seq = input$hapDB_file$datapath,
+                     botloci_file = botloci,
+                     hap_seq_file = input$hapDB_file$datapath,
                      n.cores= input$cores,
                      rm_multiallelic_SNP = TRUE,
                      out_vcf = output_name,
diff --git a/R/mod_gwas.R b/R/mod_gwas.R
index b569273..d308a9c 100644
--- a/R/mod_gwas.R
+++ b/R/mod_gwas.R
@@ -126,7 +126,8 @@ mod_gwas_ui <- function(id){
                    sliderInput(inputId = ns('gwas_image_height'), label = 'Height', value = 5, min = 1, max = 20, step = 0.5),
                    fluidRow(
                      downloadButton(ns("download_gwas_figure"), "Save Image"),
-                     downloadButton(ns("download_gwas_file"), "Save Files")),
+                     downloadButton(ns("download_gwas_file"), "Save Files"),
+                     downloadButton(ns("download_viewpoly"), "VIEWpoly input")),
                    circle = FALSE,
                    status = "danger",
                    icon = icon("floppy-disk"), width = "300px", label = "Save",
@@ -455,6 +456,31 @@ mod_gwas_server <- function(input, output, session, parent_session){
       temp_geno_file <- tempfile(fileext = ".csv")
 
       #Convert VCF file if submitted
+      #### VCF sanity check
+      checks <- vcf_sanity_check(input$gwas_file$datapath, max_markers = 10000)
+      
+      error_if_false <- c(
+        "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+        "samples", "chrom_info", "pos_info", "VCF_compressed"
+      )
+      
+      error_if_true <- c(
+        "multiallelics", "phased_GT",  "mixed_ploidies",
+        "duplicated_samples", "duplicated_markers"
+      )
+      
+      warning_if_false <- c("ref_alt","max_markers")
+      
+      checks_result <- vcf_sanity_messages(checks, 
+                                           error_if_false, 
+                                           error_if_true, 
+                                           warning_if_false = warning_if_false, 
+                                           warning_if_true = NULL,
+                                           input_ploidy = ploidy)
+      
+      if(checks_result) return() # Stop the analysis if checks fail
+      #########
+      
       vcf <- read.vcfR(input$gwas_file$datapath, verbose = FALSE)
 
       #Extract GT
@@ -872,6 +898,18 @@ mod_gwas_server <- function(input, output, session, parent_session){
       )
   })
 
+  
+  
+  output$download_viewpoly <- downloadHandler(
+    filename = function() {
+      paste0("BIGapp_Viewpoly_GWASpoly.RData")
+    },
+    content = function(file) {
+      temp <- gwas_data$data2
+      save(temp, file = file)
+    }
+  )
+  
   #Download files for GWAS
   output$download_gwas_file <- downloadHandler(
     filename = function() {
diff --git a/R/run_app.R b/R/run_app.R
index 7c41142..47b1cb9 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -17,7 +17,7 @@ run_app <- function(
 ) {
   #Uncomment the sink command to allow console output
   sink(file = tempfile())
-  options(warn = -1)
+  #options(warn = -1)
 
   with_golem_options(
     app = shinyApp(
diff --git a/R/utils.R b/R/utils.R
index f1f7385..0696bcf 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -6,7 +6,7 @@ get_counts <- function(madc_file, output_name) {
   # Read the madc file
   madc_df <- read.csv(madc_file, sep = ',', check.names = FALSE, header = FALSE)
   header <- grep("AlleleID", madc_df[,1])
-  if(header > 1) madc_df <- madc_df[-c(1:(grep("AlleleID", madc_df[,1]))-1),]
+  if(header > 1) madc_df <- madc_df[-c(1:(grep("AlleleID", madc_df[,1]))-1), ]
   colnames(madc_df) <- madc_df[1,]
   madc_df <- madc_df[-1,]
 
@@ -35,7 +35,7 @@ get_matrices <- function(result_df) {
   alt_df <- subset(update_df, grepl("Alt$", AlleleID))
 
   #Ensure that each has the same SNPs and that they are in the same order
-  same <- identical(alt_df$CloneID,ref_df$CloneID)
+  same <- identical(alt_df$CloneID, ref_df$CloneID)
 
   ###Convert the ref and alt counts into matrices with the CloneID as the index
   #Set SNP names as index
@@ -52,10 +52,10 @@ get_matrices <- function(result_df) {
     alt_df <- alt_df[common_ids, ]
   }
 
-  #Remove unwanted columns and convert to matrix
+  # Remove unwanted columns and convert to matrix
   rm.col <- c("AlleleID", "CloneID", "AlleleSequence", "ClusterConsensusSequence",
-              "CallRate", "OneRatioRef", "OneRatioSnp", "FreqHomRef", "FreqHomSnp",
-              "FreqHets", "PICRef", "PICSnp", "AvgPIC", "AvgCountRef", "AvgCountSnp","RatioAvgCountRefAvgCountSnp")
+    "CallRate", "OneRatioRef", "OneRatioSnp", "FreqHomRef", "FreqHomSnp",
+    "FreqHets", "PICRef", "PICSnp", "AvgPIC", "AvgCountRef", "AvgCountSnp", "RatioAvgCountRefAvgCountSnp")
 
   ref_matrix <- as.matrix(ref_df[, -which(colnames(ref_df) %in% rm.col)])
   alt_matrix <- as.matrix(alt_df[, -which(colnames(alt_df) %in% rm.col)])
@@ -64,7 +64,7 @@ get_matrices <- function(result_df) {
   class(ref_matrix) <- "numeric"
   class(alt_matrix) <- "numeric"
 
-  #Make the size matrix by combining the two matrices
+  # Make the size matrix by combining the two matrices
   size_matrix <- (ref_matrix + alt_matrix)
 
   #Count the number of cells with 0 count to estimate missing data
@@ -115,27 +115,29 @@ convert_to_dosage <- function(gt) {
 findK <- function(genotypeMatrix, maxK, ploidy) {
   # Convert the genotype matrix to a genlight object
   genlight_new <- new("genlight", t(genotypeMatrix),
-                      ind.names = row.names(t(genotypeMatrix)),
-                      loc.names = colnames(t(genotypeMatrix)),
-                      ploidy = ploidy,
-                      NA.char = NA)
+    ind.names = row.names(t(genotypeMatrix)),
+    loc.names = colnames(t(genotypeMatrix)),
+    ploidy = ploidy,
+    NA.char = NA
+  )
 
   #Assign the populations as the sample names since there is no assumed populations
   pop(genlight_new) <- genlight_new@ind.names
 
-  #Estimate number of clusters
-  #Retain all pca for the find.clusters step. Retain as few as possible while maximizing variance captured for DAPC step.
-  #Choose is the option to allow adegenet to select the best cluster number based on the BIC minimum
-  #The default criterion is "diffNgroup, which is not necessarily the minimum BIC, but based on the sharp decrease of the BIC value.
-  #Either way, this is a suggestion, and the number of clusters to use should be made with biology considerations.
-  graphics.off() #Prevent plot from automatically displaying
-  grp <- find.clusters(genlight_new, max.n.clust = maxK,
+  # Estimate number of clusters
+  # Retain all pca for the find.clusters step. Retain as few as possible while maximizing variance captured for DAPC step.
+  # Choose is the option to allow adegenet to select the best cluster number based on the BIC minimum
+  # The default criterion is "diffNgroup, which is not necessarily the minimum BIC, but based on the sharp decrease of the BIC value.
+  # Either way, this is a suggestion, and the number of clusters to use should be made with biology considerations.
+  graphics.off() # Prevent plot from automatically displaying
+  grp <- find.clusters(genlight_new,
+                       max.n.clust = maxK,
                        n.pca = nInd(genlight_new),
                        stat = "BIC",
                        criterion = "diffNgroup",
                        parallel = FALSE,
                        choose = FALSE)
-
+  
   # Identify the best K based on lowest BIC
   bestK <- length(grp$size)
 
@@ -143,40 +145,43 @@ findK <- function(genotypeMatrix, maxK, ploidy) {
   bicDF <- data.frame(K = 1:maxK, BIC = as.data.frame(grp$Kstat)$`grp$Kstat`)
 
   return(list(bestK = as.numeric(bestK), grp = grp, BIC = bicDF))
-
+  
 }
 
 #' @importFrom methods new
 performDAPC <- function(genotypeMatrix, selected_K, ploidy) {
-
+  
   #Convert matrix to genlight
   genlight_new <- new("genlight", t(genotypeMatrix),
                       ind.names = row.names(t(genotypeMatrix)),
                       loc.names = colnames(t(genotypeMatrix)),
                       ploidy = ploidy,
                       NA.char = NA)
-
-  #Get groups based on specified cluster number (K)
-  graphics.off() #Prevent plot from automatically displaying
-  grp <- find.clusters(genlight_new, n.clust = selected_K,
-                       n.pca = nInd(genlight_new),
-                       stat = "BIC",
-                       criterion = "diffNgroup",
-                       parallel = FALSE,
-                       choose = FALSE)
+  
+  # Get groups based on specified cluster number (K)
+  graphics.off() # Prevent plot from automatically displaying
+  grp <- find.clusters(genlight_new,
+    n.clust = selected_K,
+    n.pca = nInd(genlight_new),
+    stat = "BIC",
+    criterion = "diffNgroup",
+    parallel = FALSE,
+    choose = FALSE
+  )
 
   # Find the optimal number of principal components
-  #NOTE: The default n.da is K-1, but I have read previously to use #Samples - 1?
+  # NOTE: The default n.da is K-1, but I have read previously to use #Samples - 1?
   dapc1 <- dapc(genlight_new, grp$grp,
-                n.pca = nInd(genlight_new),
-                n.da = nInd(genlight_new)-1,
-                parallel = FALSE)
+    n.pca = nInd(genlight_new),
+    n.da = nInd(genlight_new) - 1,
+    parallel = FALSE
+  )
 
   a.score <- optim.a.score(dapc1, plot = FALSE)
   n.pca <- a.score$best
 
   # Perform DAPC with the best K
-  finalDapc <- dapc(genlight_new, grp$grp, n.pca = n.pca, n.da = selected_K-1, parallel= FALSE)
+  finalDapc <- dapc(genlight_new, grp$grp, n.pca = n.pca, n.da = selected_K - 1, parallel = FALSE)
 
   # Extract the membership probabilities
   Q <- as.data.frame(finalDapc$posterior)
@@ -184,14 +189,14 @@ performDAPC <- function(genotypeMatrix, selected_K, ploidy) {
   # Add cluster assignments to Q dataframe
   Q$Cluster_Assignment <- finalDapc$assign
 
-  #a data.frame giving the contributions of original variables (alleles in the case of genetic data) to the principal components of DAPC.
-  #dapc$var.contr
+  # a data.frame giving the contributions of original variables (alleles in the case of genetic data) to the principal components of DAPC.
+  # dapc$var.contr
 
   # Return list containing BIC dataframe, Q dataframe w/ dapc assignments
   return(list(Q = Q, dapc = finalDapc))
 }
 
-#Heterozygosity function
+# Heterozygosity function
 calculate_heterozygosity <- function(genotype_matrix, ploidy = 2) {
   # Determine the heterozygous values based on ploidy
   heterozygous_values <- seq(1, ploidy - 1)
@@ -231,8 +236,8 @@ calculateMAF <- function(df, ploidy) {
     df <- as.data.frame(df)
   }
 
-  #Convert the elements to numeric if they are characters
-  df[] <- lapply(df, function(x) if(is.character(x)) as.numeric(as.character(x)) else x)
+  # Convert the elements to numeric if they are characters
+  df[] <- lapply(df, function(x) if (is.character(x)) as.numeric(as.character(x)) else x)
 
   allele_frequencies <- apply(df, 1, function(row) {
     non_na_count <- sum(!is.na(row))
@@ -249,9 +254,9 @@ calculateMAF <- function(df, ploidy) {
   df$AF <- allele_frequencies
   df$MAF <- maf
 
-  maf_df <- df[,c("AF", "MAF"), drop = FALSE]
+  maf_df <- df[, c("AF", "MAF"), drop = FALSE]
 
-  #Make the row names (SNP ID) the first column
+  # Make the row names (SNP ID) the first column
   maf_df <- maf_df %>%
     rownames_to_column(var = "SNP_ID")
 
@@ -263,7 +268,7 @@ calculate_percentages <- function(matrix_data, ploidy) {
   apply(matrix_data, 2, function(col) {
     counts <- table(col)
     prop <- prop.table(counts) * 100
-    prop[as.character(0:ploidy)]  # Adjust the range based on the max value (consider entering the ploidy value explicitly for max_val)
+    prop[as.character(0:ploidy)] # Adjust the range based on the max value (consider entering the ploidy value explicitly for max_val)
   })
 }
 
@@ -285,10 +290,9 @@ convert_genotype_counts <- function(df, ploidy, is_reference = TRUE) {
 #'
 posdefmat <- function(mat) {
   if (is.positive.definite(round(mat, 18))) {
-    g = mat
-  }
-  else {
-    g <-nearPD(mat)$mat
+    g <- mat
+  } else {
+    g <- nearPD(mat)$mat
     warning("The matrix was adjusted for the nearest positive definite matrix")
   }
   return(g)
@@ -300,8 +304,10 @@ split_info_column <- function(info) {
   info_split <- str_split(info, ";")[[1]]
 
   # Create a named list by splitting each element by equals sign
-  info_list <- set_names(map(info_split, ~ str_split(.x, "=")[[1]][2]),
-                         map(info_split, ~ str_split(.x, "=")[[1]][1]))
+  info_list <- set_names(
+    map(info_split, ~ str_split(.x, "=")[[1]][2]),
+    map(info_split, ~ str_split(.x, "=")[[1]][1])
+  )
 
   return(info_list)
 }
@@ -310,25 +316,51 @@ split_info_column <- function(info) {
 #'
 #' @param file_path character indicanting path to file
 #' @param requires which information is required from the VCF. Define the FORMAT or INFO letters. Example: c("GT", "DP", "PL")
+#' @param check logical indicating whether to perform checks on the VCF file
+#' @param ploidy numeric value indicating the ploidy of the species
 #'
 #' @importFrom vcfR read.vcfR
 #' @importFrom shinyalert shinyalert
 #'
-read_geno_file <- function(file_path, requires = c("GT")){
+read_geno_file <- function(file_path, requires = c("GT"), ploidy, check=TRUE) {
   if (grepl("\\.csv$", file_path)) {
     geno <- read.csv(geno_path, header = TRUE, row.names = 1, check.names = FALSE)
     n_snps <- nrow(geno)
     return(list(geno, n_snps))
-
   } else if (grepl("\\.vcf$", file_path) || grepl("\\.gz$", file_path)) {
-
-    #Convert VCF file if submitted
+    # Convert VCF file if submitted
+    #### VCF sanity check - Used in the GSAcc mod
+    if(check){
+      checks <- vcf_sanity_check(file_path)
+      
+      error_if_false <- c(
+        "VCF_header", "VCF_columns", "unique_FORMAT", "GT",
+        "samples"
+      )
+      
+      error_if_true <- c(
+        "multiallelics", "phased_GT",  "mixed_ploidies",
+        "duplicated_samples", "duplicated_markers"
+      )
+      
+      warning_if_false <- c("chrom_info", "pos_info", "ref_alt")
+      
+      checks_result <- vcf_sanity_messages(checks, 
+                                           error_if_false, 
+                                           error_if_true, 
+                                           warning_if_false = warning_if_false, 
+                                           warning_if_true = NULL,
+                                           input_ploidy = as.numeric(ploidy))
+      
+      if(checks_result) return(NULL) # Stop the analysis if checks fail
+      #########
+    }
     vcf <- read.vcfR(file_path, verbose = FALSE)
 
     all_requires <- vector()
-    for(i in 1:length(requires))  all_requires[i] <- grepl(requires[i], vcf@fix[1,8]) | grepl(requires[i], vcf@gt[1,1])
+    for (i in 1:length(requires)) all_requires[i] <- grepl(requires[i], vcf@fix[1, 8]) | grepl(requires[i], vcf@gt[1, 1])
 
-    if(!all(all_requires)) {
+    if (!all(all_requires)) {
       shinyalert(
         title = "Oops",
         text = paste("The VCF file does not contain required information:", requires[which(!all_requires)]),
@@ -349,7 +381,7 @@ read_geno_file <- function(file_path, requires = c("GT")){
 
     n_snps <- nrow(vcf@gt)
 
-    #Extract GT
+    # Extract GT
     geno <- extract.gt(vcf, element = "GT")
     geno <- apply(geno, 2, convert_to_dosage)
     class(geno) <- "numeric"
@@ -384,7 +416,7 @@ read_geno_file <- function(file_path, requires = c("GT")){
 ##' @param file location to be saved
 ##' @importFrom Rsamtools bgzip
 ##'
-bgzip_compress <- function(output_name, file){
+bgzip_compress <- function(output_name, file) {
   # Check if the VCF file was created
   if (file.exists(output_name)) {
     # Compress the VCF file using gzip
@@ -399,7 +431,6 @@ bgzip_compress <- function(output_name, file){
       # Delete the temporary files
       unlink(bgzip_file)
       unlink(output_name)
-
     } else {
       stop("Error: Failed to create the bgzip file.")
     }
@@ -417,26 +448,24 @@ bgzip_compress <- function(output_name, file){
 #'
 subset_vcf <- function(vcfR.object, remove.sample.list = NULL, remove.sample.file = NULL) {
   # Remove samples from the VCF object
-  
+
   vcf <- vcfR.object
-  
-  if (!is.null(remove.sample.file)){
-    #unwanted_samples <- read.csv(remove.sample.file, header = FALSE, stringsAsFactors = FALSE)$V1
+
+  if (!is.null(remove.sample.file)) {
+    # unwanted_samples <- read.csv(remove.sample.file, header = FALSE, stringsAsFactors = FALSE)$V1
     unwanted_samples <- suppressWarnings(readLines(remove.sample.file))
-    
-  }else{
+  } else {
     unwanted_samples <- remove.sample.list
   }
-  
-  all_samples <- names(data.frame(vcf@gt, check.names=FALSE))
+
+  all_samples <- names(data.frame(vcf@gt, check.names = FALSE))
   samples_to_keep <- all_samples[!all_samples %in% unwanted_samples]
-  vcf <- vcf[,samples_to_keep]
-  
-  #Get the number of samples removed to add to the filtering info popup
+  vcf <- vcf[, samples_to_keep]
+
+  # Get the number of samples removed to add to the filtering info popup
   removed_number <- (length(all_samples) - length(samples_to_keep))
-  
+
   return(list(vcf = vcf, removed_number = removed_number))
-  
 }
 
 #' Internal function
@@ -454,30 +483,30 @@ subset_vcf <- function(vcfR.object, remove.sample.list = NULL, remove.sample.fil
 gmatrix2vcf <- function(Gmat.file, ploidy, output.file, dosageCount = "Reference") {
   # Convert a genotype matrix with numeric dosage values to a VCF file
   input_path <- Gmat.file
-  
-  #Import matrix
+
+  # Import matrix
   dosage <- readr::read_csv(input_path)
   dosage <- data.frame(dosage, check.names = FALSE)
-  rownames(dosage) <- dosage[,1]
+  rownames(dosage) <- dosage[, 1]
   names(dosage)[1] <- "ID"
-  
-  ###Test that all values are integers within file
-  
+
+  ### Test that all values are integers within file
+
   # Header
   vcf_header <- c(
-      "##fileformat=VCFv4.3",
-      paste0("##BIGapp_gmatrix2vcf=",packageVersion("BIGapp")),
-      "##reference=NA",
-      "##contig=<ID=NA,length=NA>",
-      '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype, where 1 is the count of alternate alleles">'
+    "##fileformat=VCFv4.3",
+    paste0("##BIGapp_gmatrix2vcf=", packageVersion("BIGapp")),
+    "##reference=NA",
+    "##contig=<ID=NA,length=NA>",
+    '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype, where 1 is the count of alternate alleles">'
   )
 
-  
+
   # Assuming markers are in the "Chr_Pos" format
   df <- dosage %>%
     separate(ID, into = c("Chr", "Pos"), sep = "_")
-  
-  #Make the VCF df
+
+  # Make the VCF df
   vcf_df <- data.frame(
     CHROM = df$Chr,
     POS = df$Pos,
@@ -489,10 +518,10 @@ gmatrix2vcf <- function(Gmat.file, ploidy, output.file, dosageCount = "Reference
     INFO = ".",
     FORMAT = "GT"
   )
-  
+
   cat("Converting dosages to genotype format\n")
-  
-  ###Convert genotypes from dosage to gt
+
+  ### Convert genotypes from dosage to gt
   # Precompute genotype strings for all possible dosage values to improve efficiency
   precompute_genotype_strings <- function(ploidy) {
     genotype_strings <- character(ploidy + 1)
@@ -505,46 +534,74 @@ gmatrix2vcf <- function(Gmat.file, ploidy, output.file, dosageCount = "Reference
     }
     return(genotype_strings)
   }
-  
+
   # Apply the precomputed genotype strings to the matrix
   convert_dosage2gt <- function(dosage_matrix, ploidy) {
     dosage_matrix <- as.matrix(dosage_matrix)
     genotype_strings <- precompute_genotype_strings(ploidy)
-    
+
     # Handle missing values separately
     genotype_matrix <- matrix(genotype_strings[dosage_matrix + 1], nrow = nrow(dosage_matrix), ncol = ncol(dosage_matrix))
-    #genotype_matrix[is.na(dosage_matrix)] <- "./." # Handle missing values
+    # genotype_matrix[is.na(dosage_matrix)] <- "./." # Handle missing values
     genotype_matrix[is.na(dosage_matrix)] <- paste(rep(".", ploidy), collapse = "/")
-    
+
     # Retain row and column names
     rownames(genotype_matrix) <- rownames(dosage_matrix)
     colnames(genotype_matrix) <- colnames(dosage_matrix)
-    
+
     return(genotype_matrix)
   }
-  
+
   # Convert the dosage matrix to genotypes
   # Adjust dosage values depending on allele count
   dosage_check <- ifelse(dosageCount == "Reference", FALSE, TRUE)
-  dosage <- BIGr::flip_dosage(dosage[,-c(1)], ploidy, is.reference= dosage_check)
+  dosage <- BIGr::flip_dosage(dosage[, -c(1)], ploidy, is.reference = dosage_check)
   geno_df <- convert_dosage2gt(dosage, ploidy)
-  
-  #Combine info from the matrices to form the VCF information for each sample
-  #Combine the dataframes together
-  vcf_df <- cbind(vcf_df,geno_df)
-  
+
+  # Combine info from the matrices to form the VCF information for each sample
+  # Combine the dataframes together
+  vcf_df <- cbind(vcf_df, geno_df)
+
   # Add # to the CHROM column name
   colnames(vcf_df)[1] <- "#CHROM"
-  
+
   # Write the header to the file
   writeLines(vcf_header, con = output.file)
-  
+
   # Append the dataframe to the file in tab-separated format
   suppressWarnings(
     write.table(vcf_df, file = output.file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE, append = TRUE)
   )
-  
+
   # Unload all items from memory
   rm(vcf_df, geno_df, dosage, df)
+}
+
+#' Check if a File is Compressed
+#'
+#' This function checks whether a given file is compressed by inspecting its
+#' magic number (first few bytes). It detects common compression formats such as
+#' gzip (`.gz`), bzip2 (`.bz2`), and xz (`.xz`).
+#'
+#' @param file_path A character string giving the path to the file to be checked.
+#'
+#' @return A character string indicating the compression type (`"gzip (.gz)"`,
+#' `"bzip2 (.bz2)"`, or `"xz (.xz)"`) if the file is compressed, or `FALSE` if
+#' the file is not recognized as compressed.
+#' 
+is_compressed_file <- function(file_path) {
+  con <- file(file_path, "rb")
+  magic <- readBin(con, "raw", n = 4)
+  close(con)
   
+  # Known magic numbers for compressed formats
+  if (all(magic[1:2] == as.raw(c(0x1f, 0x8b)))) {
+    return("gzip (.gz)")
+  } else if (all(magic[1:3] == as.raw(c(0x42, 0x5a, 0x68)))) {
+    return("bzip2 (.bz2)")
+  } else if (all(magic[1:6] == as.raw(c(0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00)))) {
+    return("xz (.xz)")
+  } else {
+    return(FALSE)  # Not a known compressed format
+  }
 }
diff --git a/R/vcf_sanity_check.R b/R/vcf_sanity_check.R
new file mode 100644
index 0000000..5db2e35
--- /dev/null
+++ b/R/vcf_sanity_check.R
@@ -0,0 +1,602 @@
+#' Perform a Sanity Check on a VCF File
+#'
+#' This function performs a series of checks on a VCF file to ensure its validity and integrity. It verifies the presence of required headers, columns, and data fields, and checks for common issues such as missing or malformed data.
+#'
+#' @param vcf_path A character string specifying the path to the VCF file. The file can be plain text or gzipped.
+#' @param n_data_lines An integer specifying the number of data lines to sample for detailed checks. Default is 100.
+#' @param depth_support_fields (Optional) A character vector of fields that are expected to be present in the FORMAT column for allele counts. Default is `c("AD", "RA", "AO", "RO", "NR", "NV", "SB", "F1R2", "F2R1")`.
+#' @param max_markers An integer specifying the maximum number of markers allowed in the VCF file. Default is 10,000.
+#' @param verbose A logical value indicating whether to print detailed messages during the checks. Default is FALSE.
+#'
+#' @return A list containing:
+#' - `checks`: A named vector indicating the results of each check (TRUE or FALSE).
+#' - `messages`: A data frame containing messages for each check, indicating success or failure.
+#' - `duplicates`: A list containing any duplicated sample or marker IDs found in the VCF file.
+#' - `ploidy_max`: The maximum ploidy detected from the genotype field, if applicable.
+#'
+#' @details The function performs the following checks:
+#' - **VCF_header**: Verifies the presence of the `##fileformat` header.
+#' - **VCF_compressed**: Checks if the VCF file is .gz compressed and if the extension is correct.
+#' - **VCF_columns**: Ensures required columns (`#CHROM`, `POS`, `ID`, `REF`, `ALT`, `QUAL`, `FILTER`, `INFO`) are present.
+#' - **max_markers**: Checks if the total number of markers exceeds the specified limit.
+#' - **unique_FORMAT**: Ensures that the FORMAT fields are consistent across sampled markers.
+#' - **GT**: Verifies the presence of the `GT` (genotype) field in the FORMAT column.
+#' - **allele_counts**: Checks for allele-level count fields (e.g., `AD`, `RA`, `AO`, `RO`).
+#' - **samples**: Ensures sample/genotype columns are present.
+#' - **chrom_info** and **pos_info**: Verifies the presence of `CHROM` and `POS` columns.
+#' - **ref_alt**: Ensures `REF` and `ALT` fields contain valid nucleotide codes.
+#' - **multiallelics**: Identifies multiallelic sites (ALT field with commas).
+#' - **phased_GT**: Checks for phased genotypes (presence of `|` in the `GT` field).
+#' - **duplicated_samples**: Checks for duplicated sample IDs.
+#' - **duplicated_markers**: Checks for duplicated marker IDs.
+#'
+#' @importFrom stats setNames
+#'
+#' @export
+vcf_sanity_check <- function(
+    vcf_path,
+    n_data_lines = 100,
+    max_markers = 10000,
+    depth_support_fields = c("AD", "RA", "AO", "RO", "NR", "NV", "SB", "F1R2", "F2R1"),
+    verbose = FALSE) {
+  
+  # --- Prepare result vector ---
+  checks_names <- c(
+    "VCF_header",
+    "VCF_compressed",
+    "VCF_columns",
+    "max_markers",
+    "unique_FORMAT",
+    "GT",
+    "allele_counts",
+    "samples",
+    "chrom_info",
+    "pos_info",
+    "ref_alt",
+    "multiallelics",
+    "phased_GT",
+    "duplicated_samples",
+    "duplicated_markers",
+    "mixed_ploidies"
+  )
+  checks <- setNames(rep(NA, length(checks_names)), checks_names)
+  
+  if (!file.exists(vcf_path)) {
+    warning("File does not exist.")
+    return(structure(
+      list(
+        checks = NULL, messages = "File does not exist",
+        duplicates = NULL, ploidy_max = NULL
+      ),
+      class = "vcf_sanity_check"
+    ))  
+  }
+  
+  is_gz_ext <- grepl("\\.gz$", vcf_path)
+  is_gz <- is_compressed_file(vcf_path)
+  
+  con <- if (is_gz == "gzip (.gz)") {
+    checks["VCF_compressed"] <- TRUE
+    if(!is_gz_ext) {
+      checks["VCF_compressed"] <- FALSE 
+      if(verbose) warning("File is compressed with gzip (.gz), but does not have .gz extension.")
+    }
+    gzfile(vcf_path, open = "rt") 
+  } else if(is_gz == "bzip2 (.bz2)") {
+    if (verbose) warning("File is compressed th bzip2 (.bz2), which is not supported.")
+    checks["VCF_compressed"] <- FALSE
+  } else if (is_gz == "xz (.xz)") {
+    if (verbose) warning("File is compressed with xz (.xz), which is not supported.")
+    checks["VCF_compressed"] <- FALSE
+  } else {
+    checks["VCF_compressed"] <- TRUE
+    file(vcf_path, open = "r")
+  }
+  
+  lines <- readLines(con, warn = FALSE)
+  close(con)
+  
+  # Container for duplicated IDs
+  duplicates <- list(
+    duplicated_samples = character(0),
+    duplicated_markers = character(0)
+  )
+  
+  # --- Header checks ---
+  header_lines <- grep("^##", lines, value = TRUE)
+  if (!any(grepl("^##fileformat=VCFv", header_lines))) {
+    checks["VCF_header"] <- FALSE
+    if (verbose) warning("Missing ##fileformat header.")
+  } else {
+    checks["VCF_header"] <- TRUE
+    if (verbose) cat("VCF header is present.\n")
+  }
+  
+  # --- Column header line ---
+  column_header_line <- grep("^#CHROM", lines, value = TRUE)
+  if (length(column_header_line) > 1) {
+    return(structure(
+      list(
+        checks = NULL, messages = "There are more than 1 line in your VCF starting with the header #CHROM. Please check the file format.",
+        duplicates = NULL, ploidy_max = NULL
+      ),
+      class = "vcf_sanity_check"
+    ))  
+  } else if (length(column_header_line) == 0) {
+    return(structure(
+      list(
+        checks = NULL, messages = "Malformed VCF file: missing column header line.",
+        duplicates = NULL, ploidy_max = NULL
+      ),
+      class = "vcf_sanity_check"
+    ))  
+  } else {
+    column_names <- unlist(strsplit(column_header_line, "\t"))
+    has_genotypes <- length(column_names) > 8
+    required_columns <- c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO")
+    checks["VCF_columns"] <- all(required_columns %in% column_names[1:8])
+    if (checks["VCF_columns"]) {
+      if (verbose) cat("Required VCF columns are present.\n")
+    } else {
+      if (verbose) warning("Missing one or more required VCF columns.")
+    }
+  }
+  
+  # --- Total marker count ---
+  data_line_indices <- grep("^[^#]", lines)
+  total_markers <- length(data_line_indices)
+  if (verbose) cat(sprintf("Total markers (data rows): %d\n", total_markers))
+  
+  checks["max_markers"] <- total_markers <= max_markers
+  if (!checks["max_markers"]) {
+    warning(sprintf("More than %d markers found. Consider subsampling.", max_markers))
+  }
+  
+  # --- Check for duplicated marker IDs ---
+  if (total_markers > 0) {
+    marker_ids <- sapply(lines[data_line_indices], function(line) {
+      fields <- strsplit(line, "\t")[[1]]
+      if (length(fields) >= 3) fields[3] else NA
+    })
+    marker_ids <- marker_ids[!is.na(marker_ids)]
+    duplicated_markers <- marker_ids[duplicated(marker_ids)]
+    duplicates$duplicated_markers <- duplicated_markers
+    if (length(duplicated_markers) > 0) {
+      checks["duplicated_markers"] <- TRUE
+      if (verbose) warning("Duplicated marker IDs found: ", paste(head(duplicated_markers, 10), collapse = ", "), "...")
+    } else {
+      if (verbose) cat("No duplicated marker IDs.\n")
+      checks["duplicated_markers"] <- FALSE
+    }
+  }
+  
+  # --- FORMAT field checks (GT, AD, etc.) ---
+  if (has_genotypes) {
+    checks["samples"] <- TRUE
+    if (n_data_lines >= length(data_line_indices)) sample_indices <- data_line_indices else
+      sample_indices <- sample(data_line_indices, n_data_lines)
+    format_fields <- character()
+    
+    chrom_pos <- list() # this list will store the CHROM and POS for n_data_lines sample markers
+    format_keys <- list() # this list store the format field for n_data_lines sample markers
+    for (i in seq_along(sample_indices)) {
+      fields <- strsplit(lines[sample_indices[i]], "\t")[[1]]
+      if (length(fields) >= 9) {
+        format_keys[[i]] <- unlist(strsplit(fields[9], ":"))
+        chrom_pos[[i]] <- fields[1:2]
+      }
+    }
+    
+    # Check if all markers sampled format are identical with the first
+    checks["unique_FORMAT"] <- all(sapply(format_keys[-1], function(x) identical(x, format_keys[[1]])))
+    
+    # --- CHROM and POS column checks ---
+    chrom_pos <- do.call(rbind, chrom_pos)
+    checks["chrom_info"] <- all(chrom_pos[,1] != "." | chrom_pos[,1] != "" | !is.na(chrom_pos[,1]))
+    checks["pos_info"] <- all(chrom_pos[,2] != "." | chrom_pos[,2] != "" | !is.na(chrom_pos[,2]))
+    
+    if (checks["chrom_info"] && checks["pos_info"]) {
+      if (verbose) cat("Both CHROM and POS information are present.\n")
+    } else {
+      if (!checks["chrom_info"]) warning(" 'CHROM' information is missing for at least one marker.")
+      if (!checks["pos_info"]) warning(" 'POS' information is missing for at least one marker.")
+    }
+    
+    format_fields <- unique(unlist(format_keys))
+    
+    # GT check
+    checks["GT"] <- "GT" %in% format_fields
+    if (verbose) {
+      if (checks["GT"]) cat("FORMAT field 'GT' (genotype) is present.\n") else warning("FORMAT field 'GT' is missing.")
+    }
+    # Allele counts check
+    checks["allele_counts"] <- any(depth_support_fields %in% format_fields)
+    if (checks["allele_counts"]) {
+      if (verbose) {
+        cat(sprintf(
+          "Allele count FORMAT field(s) found: %s\n",
+          paste(intersect(depth_support_fields, format_fields), collapse = ", ")
+        ))
+      }
+    } else {
+      warning(paste(" No required allele-level count fields found. e.g.", depth_support_fields))
+    }
+    
+    # Optional: phased GT (presence of '|' instead of '/' in genotypes)
+    # this complicated regex avoids the 5 first columns
+    phased_lines <- grep('^(?:[^\\t]*\\t){5}.*\\|', lines[sample_indices], perl = TRUE)  
+    checks["phased_GT"] <- length(phased_lines) > 0
+    
+    # --- Check for duplicated sample names ---
+    sample_names <- column_names[10:length(column_names)]
+    duplicated_samples <- sample_names[duplicated(sample_names)]
+    duplicates$duplicated_samples <- duplicated_samples
+    if (length(duplicated_samples) > 0) {
+      if (verbose) warning("Duplicated sample names found: ", paste(duplicated_samples, collapse = ", "))
+      checks["duplicated_samples"] <- TRUE
+    } else {
+      if (verbose) cat("No duplicated sample names.\n")
+      checks["duplicated_samples"] <- FALSE
+    }
+  } else {
+    checks["samples"] <- FALSE
+    checks["GT"] <- FALSE
+    checks["allele_counts"] <- FALSE
+    checks["phased_GT"] <- FALSE
+    checks["duplicated_samples"] <- FALSE
+    
+    warning("No sample/genotype columns found.")
+  }
+  
+  # --- Ploidy inference (based on GT) ---
+  ploidy_max <- NA # default if GT not found or no valid genotypes
+  
+  if (checks["GT"]) {
+    ploidy_values <- c()
+    
+    for (i in seq_along(sample_indices)) {
+      fields <- strsplit(lines[sample_indices[i]], "\t")[[1]]
+      
+      # Skip if not enough fields
+      if (length(fields) < 10) next
+      
+      format_keys <- unlist(strsplit(fields[9], ":"))
+      gt_index <- which(format_keys == "GT")
+      
+      # Loop through samples (from column 10 onward)
+      for (sample_field in fields[10:length(fields)]) {
+        sample_fields <- unlist(strsplit(sample_field, ":"))
+        if (length(sample_fields) >= gt_index) {
+          gt_raw <- sample_fields[gt_index]
+          if (grepl("[/|]", gt_raw)) {
+            ploidy <- length(unlist(strsplit(gt_raw, "[/|]")))
+            ploidy_values <- c(ploidy_values, ploidy)
+          }
+        }
+      }
+    }
+    
+    if (length(ploidy_values) > 0) {
+      ploidy_max <- max(ploidy_values, na.rm = TRUE)
+      if (verbose) cat("Highest ploidy detected from GT field:", ploidy_max, "\n")
+    }
+    if (length(unique(ploidy_values)) > 1) {
+      checks["mixed_ploidies"] <- TRUE
+      if (verbose) cat("Mixed ploidies detected\n")
+    } else {
+      checks["mixed_ploidies"] <- FALSE
+    }
+  }
+  
+  # --- REF/ALT basic check on sample rows ---
+  sample_lines <- lines[head(data_line_indices, n_data_lines)]
+  ref_alt_valid <- sapply(sample_lines, function(line) {
+    fields <- strsplit(line, "\t")[[1]]
+    if (length(fields) >= 5) {
+      ref <- fields[4]
+      alt <- fields[5]
+      grepl("^[ACGTN]+$", ref) && grepl("^[ACGTN.,<>]+$", alt)
+    } else {
+      FALSE
+    }
+  })
+  checks["ref_alt"] <- all(ref_alt_valid)
+  
+  # --- Multiallelic site check (ALT with ',' separator) ---
+  multiallelic_flags <- grepl(",", sapply(sample_lines, function(line) strsplit(line, "\t")[[1]][5]))
+  checks["multiallelics"] <- any(multiallelic_flags)
+  
+  # --- Compile messages ---
+  
+  # Messages in case of failure
+  messages <- data.frame(
+    "VCF_header" = c(
+      "VCF header is missing. Please check the file format\n",
+      "VCF header is present\n"
+    ),
+    "VCF_compressed" = c(
+      "VCF is compressed but filename doesn't have the extension or it has non-supported format\n",
+      "VCF is .gz compressed or uncompressed\n"
+    ),
+    "VCF_columns" = c(
+      "Required VCF columns are missing. Please check the file format\n",
+      "Required VCF columns are present\n"
+    ),
+    "max_markers" = c(
+      "More than 10,000 markers found. Consider subsampling or running in HPC\n",
+      "Less than maximum number of markers found\n"
+    ),
+    "unique_FORMAT" = c(
+      "FORMAT fields are not consistent across sampled markers\n",
+      "FORMAT fields are consistent across sampled markers\n"
+    ),
+    "GT" = c(
+      "Genotype information is not available in the VCF file\n",
+      "Genotype information is available in the VCF file\n"
+    ),
+    "allele_counts" = c(
+      "Required field for allele counts are not available in the VCF file\n",
+      "Required field for allele counts are available in the VCF file\n"
+    ),
+    "samples" = c(
+      "Sample information is not available in the VCF file\n",
+      "Sample information is available in the VCF file\n"
+    ),
+    "chrom_info" = c(
+      "Chromosome information is not available in the VCF file\n",
+      "Chromosome information is available in the VCF file\n"
+    ),
+    "pos_info" = c(
+      "Position information is not available in the VCF file\n",
+      "Position information is available in the VCF file\n"
+    ),
+    "ref_alt" = c(
+      "REF/ALT fields contain invalid nucleotide codes\n",
+      "REF/ALT fields are valid\n"
+    ),
+    "multiallelics" = c(
+      "Multiallelic sites not found in the VCF file\n",
+      "Multiallelic sites found in the VCF file\n"
+    ),
+    "phased_GT" = c(
+      "Phased genotypes (|) are not present in the VCF file\n",
+      "Phased genotypes (|) are present in the VCF file\n"
+    ),
+    "duplicated_samples" = c(
+      "No duplicated sample IDs found",
+      paste("Duplicated sample IDs found: ", paste(duplicates$duplicated_samples, collapse = ", "))
+    ),
+    "duplicated_markers" = c(
+      "No duplicated marker IDs found",
+      paste("Duplicated marker IDs found: ", paste(duplicates$duplicated_markers, collapse = ", "))
+    ),
+    "mixed_ploidies" = c(
+      "No mixed ploidies detected",
+      "Mixed ploidies detected"
+    )
+  )
+  rownames(messages) <- c("false", "true")
+  
+  # --- Done ---
+  if (verbose) cat("Sanity check complete.\n")
+  return(structure(
+    list(
+      checks = checks, messages = messages,
+      duplicates = duplicates, ploidy_max = ploidy_max
+    ),
+    class = "vcf_sanity_check"
+  ))
+}
+
+#' Generate Sanity Check Messages for VCF Files
+#'
+#' This function generates messages based on the results of a VCF sanity check.
+#'
+#' @param checks A `vcf_sanity_check` object containing the results of the sanity check.
+#' @param error_if_false (Optional) A character vector of checks that must be TRUE for the VCF file to pass validation.
+#' @param error_if_true (Optional) A character vector of checks that must be FALSE for the VCF file to pass validation.
+#' @param warning_if_true (Optional) A character vector of checks that should trigger a warning if TRUE.
+#' @param warning_if_false (Optional) A character vector of checks that should trigger a warning if FALSE.
+#' @param input_ploidy (Optional) An integer specifying the expected ploidy of the VCF file. If provided, the function will compare it with the detected ploidy.
+#'
+#' @details This function uses the results of a VCF sanity check to display appropriate messages to the user. It checks for required conditions that must be met (TRUE or FALSE) and optionally validates the ploidy of the VCF file. If any issues are detected, the function displays alerts using `shinyalert`.
+#'
+#' @return A logical value indicating whether any of the specified checks failed (TRUE) or passed (FALSE). If any required checks are not met, an error alert is displayed. If any warnings are triggered, a warning alert is displayed.
+#'
+#' @importFrom shinyalert shinyalert
+#'
+#' @export
+vcf_sanity_messages <- function(
+    checks,
+    error_if_false = c(
+      "VCF_header",
+      "VCF_columns",
+      "max_markers",
+      "GT",
+      "allele_counts",
+      "samples",
+      "chrom_info",
+      "pos_info",
+      "ref_alt",
+      "mixed_ploidies"
+    ), 
+    error_if_true = c( "phased_GT",
+                       "duplicated_samples",
+                       "duplicated_markers"),
+    warning_if_true = c("multiallelics"),
+    warning_if_false = c("unique_FORMAT"),
+    input_ploidy = NULL) {
+  
+  # check must be from vcf_sanity_check
+  if (!inherits(checks, "vcf_sanity_check")) {
+    stop("check must be a vcf_sanity_check object.")
+  }
+  
+  if(is.null(checks$checks)){
+    shinyalert(
+      title = "File Error",
+      text = checks$message,
+      size = "xs",
+      closeOnEsc = TRUE,
+      closeOnClickOutside = FALSE,
+      html = TRUE,
+      type = "error",
+      showConfirmButton = TRUE,
+      confirmButtonText = "OK",
+      confirmButtonCol = "#004192",
+      showCancelButton = FALSE,
+      imageUrl = "",
+      animation = TRUE,
+    )
+  }
+  
+  it_should_brake <- vector()
+  # Check required TRUE
+  if (length(error_if_false) > 0 && !all(checks$checks[error_if_false])) {
+    it_should_brake <- c(it_should_brake, !checks$checks[error_if_false])
+    shinyalert(
+      title = "File Error",
+      text = paste(checks$message[1, error_if_false][which(!checks$checks[error_if_false])],
+                   collapse = ";"
+      ),
+      size = "xs",
+      closeOnEsc = TRUE,
+      closeOnClickOutside = FALSE,
+      html = TRUE,
+      type = "error",
+      showConfirmButton = TRUE,
+      confirmButtonText = "OK",
+      confirmButtonCol = "#004192",
+      showCancelButton = FALSE,
+      imageUrl = "",
+      animation = TRUE,
+    )
+  }
+  
+  # Check warning TRUE
+  if (length(warning_if_true) > 0 && all(checks$checks[warning_if_true])) {
+    shinyalert(
+      title = "Warning",
+      text = paste(checks$message[2, warning_if_true][which(checks$checks[warning_if_true])],
+                   collapse = ";"
+      ),
+      size = "xs",
+      closeOnEsc = TRUE,
+      closeOnClickOutside = FALSE,
+      html = TRUE,
+      type = "warning",
+      showConfirmButton = TRUE,
+      confirmButtonText = "OK",
+      confirmButtonCol = "#004192",
+      showCancelButton = FALSE,
+      imageUrl = "",
+      animation = TRUE,
+    )
+  }
+  
+  # Check required FALSE
+  if (length(error_if_true) > 0 && any(checks$checks[error_if_true])) {
+    it_should_brake <- c(it_should_brake, checks$checks[error_if_true])
+    
+    shinyalert(
+      title = "File Error",
+      text = paste(checks$message[2, error_if_true][which(checks$checks[error_if_true])],
+                   collapse = ";"
+      ),
+      size = "xs",
+      closeOnEsc = TRUE,
+      closeOnClickOutside = FALSE,
+      html = TRUE,
+      type = "error",
+      showConfirmButton = TRUE,
+      confirmButtonText = "OK",
+      confirmButtonCol = "#004192",
+      showCancelButton = FALSE,
+      imageUrl = "",
+      animation = TRUE,
+    )
+  }
+  
+  # Check warning FALSE
+  if (length(warning_if_false) > 0 && !all(checks$checks[warning_if_false])) {
+    shinyalert(
+      title = "Warning",
+      text = paste(checks$message[1, warning_if_false][which(!checks$checks[warning_if_false])],
+                   collapse = ";"
+      ),
+      size = "xs",
+      closeOnEsc = TRUE,
+      closeOnClickOutside = FALSE,
+      html = TRUE,
+      type = "warning",
+      showConfirmButton = TRUE,
+      confirmButtonText = "OK",
+      confirmButtonCol = "#004192",
+      showCancelButton = FALSE,
+      imageUrl = "",
+      animation = TRUE,
+    )
+  }
+  
+  if (!is.null(input_ploidy)) {
+    it_should_brake <- c(it_should_brake, checks$ploidy_max != input_ploidy)
+    
+    if (checks$ploidy_max != input_ploidy) {
+      shinyalert(
+        title = "File Error",
+        text = "Informed ploidy doesn't match genotypes in the VCF file",
+        size = "xs",
+        closeOnEsc = TRUE,
+        closeOnClickOutside = FALSE,
+        html = TRUE,
+        type = "info",
+        showConfirmButton = TRUE,
+        confirmButtonText = "OK",
+        confirmButtonCol = "#004192",
+        showCancelButton = FALSE,
+        imageUrl = "",
+        animation = TRUE,
+      )
+    }
+  }
+  return(any(it_should_brake))
+}
+
+#' Print Method for vcf_sanity_check Objects
+#'
+#' Displays a summary table of the VCF sanity check results, showing the check name, result (TRUE/FALSE), and the corresponding message for each selected check.
+#'
+#' @param x A vcf_sanity_check object as returned by `vcf_sanity_check()`.
+#' @param checks A character vector of check names to display. Defaults to all checks in the object.
+#' @param ... Additional arguments (currently ignored).
+#'
+#' @return The input object, invisibly. Called for its side effect (printing to console).
+#' 
+#' @method print vcf_sanity_check
+#'
+#' @export
+print.vcf_sanity_check <- function(x, checks = names(x$checks), ...) {
+  if (!inherits(x, "vcf_sanity_check")) stop("Object must be of class 'vcf_sanity_check'.")
+  
+  if(is.null(x$checks)) {
+    warning(paste("Checks could not be performed. Because:", x$message))
+  }
+  
+  # Prepare a data.frame with check name, result, and message
+  res <- data.frame(
+    Check = checks,
+    Result = as.logical(x$checks[checks]),
+    Message = mapply(
+      function(chk, val) {
+        # TRUE is row 'true', FALSE is row 'false'
+        row_idx <- ifelse(isTRUE(val), "true", "false")
+        as.character(x$messages[row_idx, chk])
+      },
+      chk = checks,
+      val = x$checks[checks],
+      SIMPLIFY = TRUE
+    ),
+    stringsAsFactors = FALSE
+  )
+  print(res, row.names = FALSE)
+}
diff --git a/inst/Dockerfile.deps b/inst/Dockerfile.deps
new file mode 100644
index 0000000..0c48941
--- /dev/null
+++ b/inst/Dockerfile.deps
@@ -0,0 +1,78 @@
+# syntax=docker/dockerfile:1.7
+FROM rocker/r2u:noble
+
+SHELL ["/bin/bash","-eo","pipefail","-c"]
+ENV DEBIAN_FRONTEND=noninteractive TZ=UTC
+# keep compiles gentle for CI/QEMU
+ENV MAKEFLAGS="-j2"
+ENV R_PKG_INSTALL_ARGS="--no-build-vignettes --no-manual"
+
+# System libs (no Bioc via apt to avoid ABI/arch mismatches)
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    apt-get update && apt-get install -y --no-install-recommends \
+      build-essential gfortran cmake git wget pkg-config \
+      ccache \
+      libhts-dev \
+      libcurl4-gnutls-dev libssl-dev libxml2-dev \
+      libpng-dev libjpeg-dev libfreetype6-dev libfontconfig1-dev \
+      libbz2-dev liblzma-dev zlib1g-dev \
+      r-cran-remotes r-cran-pak r-cran-biocmanager \
+    && rm -rf /var/lib/apt/lists/*
+
+# ccache
+ENV CCACHE_DIR=/root/.cache/ccache
+RUN --mount=type=cache,target=/root/.cache/ccache ccache -M 2G && \
+    { echo 'CC=ccache gcc'; echo 'CXX=ccache g++'; echo 'FC=ccache gfortran'; } >> /usr/lib/R/etc/Makevars.site
+
+# ---- CRAN via r2u binaries first (fast) ----
+ENV CRAN_PKGS="adegenet curl DT dplyr vcfR \
+ggplot2 tidyr shiny config bs4Dash \
+golem purrr markdown scales plotly \
+shinyWidgets shinyjs shinydisconnect shinyalert \
+stringr updog AGHmatrix factoextra \
+httr future shinycssloaders RColorBrewer \
+tibble rrBLUP MASS Matrix matrixcalc BIGr"
+RUN install2.r --skipinstalled --ncpus 1 $CRAN_PKGS || true
+
+# ---- Bioconductor from source (R 4.5 -> Bioc 3.21) ----
+RUN --mount=type=cache,target=/root/.cache/R/src Rscript - <<'RS'
+options(Ncpus = 2)
+repos <- BiocManager::repositories(version = "3.21")
+options(repos = repos)
+bioc_pkgs <- c("Rsamtools","Biostrings","pwalign","VariantAnnotation")
+for (p in bioc_pkgs) {
+  if (!requireNamespace(p, quietly = TRUE)) {
+    message("Installing Bioconductor (source): ", p)
+    install.packages(p, dependencies = TRUE, type = "source",
+                     INSTALL_opts = c("--no-build-vignettes","--no-manual"))
+  }
+}
+RS
+
+# ---- Fallback for any missing CRAN pkgs (source, sequential) ----
+RUN --mount=type=cache,target=/root/.cache/R/src Rscript - <<'RS'
+pkgs <- strsplit(Sys.getenv("CRAN_PKGS"), " +")[[1]]
+miss <- setdiff(pkgs, rownames(installed.packages()))
+if (length(miss)) {
+  message("Installing remaining CRAN from source: ", paste(miss, collapse=", "))
+  for (p in miss) {
+    install.packages(p, repos="https://cloud.r-project.org", type="source",
+                     INSTALL_opts=c("--no-build-vignettes","--no-manual"))
+  }
+}
+RS
+
+# ---- GitHub dep (pak-free to avoid QEMU helper issues) ----
+RUN R -q -e "remotes::install_github('jendelman/GWASpoly', upgrade='never', dependencies=TRUE, \
+                                     INSTALL_opts=c('--no-build-vignettes','--no-manual'))"
+
+# Version snapshot (handy for audits)
+RUN R -q -e "pkgs <- c('adegenet','curl','DT','dplyr','vcfR','ggplot2','tidyr','shiny','config','bs4Dash', \
+                       'golem','purrr','markdown','scales','plotly','shinyWidgets','shinyjs','shinydisconnect','shinyalert', \
+                       'stringr','updog','AGHmatrix','factoextra','httr','future','shinycssloaders','RColorBrewer', \
+                       'tibble','rrBLUP','MASS','Matrix','matrixcalc','BIGr', \
+                       'Rsamtools','VariantAnnotation','Biostrings','pwalign'); \
+                 ip <- installed.packages(); dir.create('/opt/pins', recursive=TRUE, showWarnings=FALSE); \
+                 write.table(data.frame(Package=pkgs, Version=ip[pkgs,'Version']), \
+                             file='/opt/pins/deps_versions.tsv', sep='\t', row.names=FALSE, quote=FALSE)"
diff --git a/inst/help_files/DAPC_cite.Rmd b/inst/help_files/DAPC_cite.Rmd
index c590227..3e97a14 100644
--- a/inst/help_files/DAPC_cite.Rmd
+++ b/inst/help_files/DAPC_cite.Rmd
@@ -4,9 +4,9 @@ output: html_document
 date: "2024-08-29"
 ---
 
-* **BIGapp**
+* **BIGapp** and **BIGr**
 
-* **BIGr**
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 * **vcfR**
 
diff --git a/inst/help_files/DArT_Report2VCF_cite.Rmd b/inst/help_files/DArT_Report2VCF_cite.Rmd
index 7ff8c10..ffcc9c0 100644
--- a/inst/help_files/DArT_Report2VCF_cite.Rmd
+++ b/inst/help_files/DArT_Report2VCF_cite.Rmd
@@ -4,10 +4,9 @@ output: html_document
 date: "2024-08-29"
 ---
 
-* **BIGapp** 
+* **BIGapp**  and **BIGr**
 
-
-* **BIGr**
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 * **vcfR** 
 
diff --git a/inst/help_files/GWAS_cite.Rmd b/inst/help_files/GWAS_cite.Rmd
index 837ece6..bf4f98f 100644
--- a/inst/help_files/GWAS_cite.Rmd
+++ b/inst/help_files/GWAS_cite.Rmd
@@ -6,6 +6,7 @@ date: "2024-08-29"
 
 * BIGapp
 
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 * GWASpoly package
 
diff --git a/inst/help_files/Genomic_Diversity_cite.Rmd b/inst/help_files/Genomic_Diversity_cite.Rmd
index 1870dbc..523d0c8 100644
--- a/inst/help_files/Genomic_Diversity_cite.Rmd
+++ b/inst/help_files/Genomic_Diversity_cite.Rmd
@@ -4,9 +4,9 @@ output: html_document
 date: "2024-08-29"
 ---
 
-* **BIGapp**
+* **BIGapp** and **BIGr**
 
-* **BIGr**
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 * **vcfR**
 
diff --git a/inst/help_files/Genomic_Prediction_cite.Rmd b/inst/help_files/Genomic_Prediction_cite.Rmd
index 1e59726..bb06dd0 100644
--- a/inst/help_files/Genomic_Prediction_cite.Rmd
+++ b/inst/help_files/Genomic_Prediction_cite.Rmd
@@ -6,6 +6,8 @@ date: "2024-08-29"
 
 * **BIGapp**
 
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
+
 * **vcfR**
 
 Knaus BJ, Grünwald NJ (2017). “VCFR: a package to manipulate and visualize variant call format data in R.” Molecular Ecology Resources, 17(1), 44–53. ISSN 757, https://dx.doi.org/10.1111/1755-0998.12549.
diff --git a/inst/help_files/PCA_cite.Rmd b/inst/help_files/PCA_cite.Rmd
index d9e2272..a0c546e 100644
--- a/inst/help_files/PCA_cite.Rmd
+++ b/inst/help_files/PCA_cite.Rmd
@@ -4,9 +4,9 @@ output: html_document
 date: "2024-08-29"
 ---
 
-- **BIGapp**
+- **BIGapp** and **BIGr**
 
-- **BIGr**
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 - **vcfR**
   - Knaus BJ, Grünwald NJ (2017). *VCFR: a package to manipulate and visualize variant call format data in R.* Molecular Ecology Resources, 17(1), 44–53. ISSN 757. [doi:10.1111/1755-0998.12549](https://dx.doi.org/10.1111/1755-0998.12549).
diff --git a/inst/help_files/Predictive_Ability_cite.Rmd b/inst/help_files/Predictive_Ability_cite.Rmd
index 2bdeda1..1dec9ef 100644
--- a/inst/help_files/Predictive_Ability_cite.Rmd
+++ b/inst/help_files/Predictive_Ability_cite.Rmd
@@ -4,7 +4,9 @@ output: html_document
 date: "2024-08-29"
 ---
 
-* **BIGapp**
+* **BIGapp** 
+
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 * **vcfR**
 
diff --git a/inst/help_files/Updog_Dosage_Calling_cite.Rmd b/inst/help_files/Updog_Dosage_Calling_cite.Rmd
index e7ca342..0ba57e0 100644
--- a/inst/help_files/Updog_Dosage_Calling_cite.Rmd
+++ b/inst/help_files/Updog_Dosage_Calling_cite.Rmd
@@ -4,9 +4,9 @@ output: html_document
 date: "2024-08-29"
 ---
 
-- **BIGapp**
+- **BIGapp** and **BIGr**
 
-- **BIGr**
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 - **Updog Package**
 
diff --git a/inst/help_files/VCF_Filtering_cite.Rmd b/inst/help_files/VCF_Filtering_cite.Rmd
index 390e900..1a9a0a9 100644
--- a/inst/help_files/VCF_Filtering_cite.Rmd
+++ b/inst/help_files/VCF_Filtering_cite.Rmd
@@ -5,9 +5,9 @@ date: "2024-08-29"
 ---
 
 
-- **BIGapp**
+- **BIGapp** and **BIGr**
 
-- **BIGr**
+Sandercock A.M., Peel M.D., Taniguti C.H., Chinchilla-Vargas J., Chen S., Sapkota M., Lin M., Zhao D., Ackerman A.J., Basnet B.R., Beil C.T., Sheehan M.J. (2025). BIGapp: A User-Friendly Genomic Tool Kit Identified Quantitative Trait Loci for Creeping Rootedness in Alfalfa (Medicago sativa L.)., The Plant Genome. doi:https://doi.org/10.1002/tpg2.70067
 
 - **Updog** (if filtering parameters used)  
   Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). *Genotyping Polyploids from Messy Sequencing Data*. Genetics, 210(3), 789-807. [doi: 10.1534/genetics.118.301468](https://doi.org/10.1534/genetics.118.301468).
diff --git a/inst/iris_DArT_VCF.vcf.gz b/inst/iris_DArT_VCF.vcf.gz
index 03de653..756e73f 100644
Binary files a/inst/iris_DArT_VCF.vcf.gz and b/inst/iris_DArT_VCF.vcf.gz differ
diff --git a/inst/update_dep_image.sh b/inst/update_dep_image.sh
new file mode 100644
index 0000000..094b96c
--- /dev/null
+++ b/inst/update_dep_image.sh
@@ -0,0 +1,32 @@
+# The arm64 image is too large to load with Github actions
+# So we build and push locally with the following commands
+
+# There is a file inside the image with all packages versions listed
+# find it at /opt/pins/deps_versions.tsv
+
+# Make sure to login with BI user
+IMAGE_DEPS=docker.io/breedinginsight/bigapp-deps
+DEPS_TAG=r4.5-bioc3.21-2025-08 # Update version here
+
+# amd64 (load locally instead of pushing)
+docker buildx build \
+  -f Dockerfile.deps \
+  --platform linux/amd64 \
+  -t $IMAGE_DEPS:$DEPS_TAG-amd64 \
+  --progress=plain \
+  --load \
+  .
+
+# arm64 (push or save to a tar; cannot load multi-arch to local daemon)
+docker buildx build \
+  -f Dockerfile.deps \
+  --platform linux/arm64 \
+  -t $IMAGE_DEPS:$DEPS_TAG-arm64 \
+  --progress=plain \
+  --push \
+  .
+  
+docker buildx imagetools create \
+  -t $IMAGE_DEPS:$DEPS_TAG \
+  $IMAGE_DEPS:$DEPS_TAG-amd64 \
+  $IMAGE_DEPS:$DEPS_TAG-arm64
\ No newline at end of file
diff --git a/man/is_compressed_file.Rd b/man/is_compressed_file.Rd
new file mode 100644
index 0000000..9a1b5e4
--- /dev/null
+++ b/man/is_compressed_file.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{is_compressed_file}
+\alias{is_compressed_file}
+\title{Check if a File is Compressed}
+\usage{
+is_compressed_file(file_path)
+}
+\arguments{
+\item{file_path}{A character string giving the path to the file to be checked.}
+}
+\value{
+A character string indicating the compression type (\code{"gzip (.gz)"},
+\code{"bzip2 (.bz2)"}, or \code{"xz (.xz)"}) if the file is compressed, or \code{FALSE} if
+the file is not recognized as compressed.
+}
+\description{
+This function checks whether a given file is compressed by inspecting its
+magic number (first few bytes). It detects common compression formats such as
+gzip (\code{.gz}), bzip2 (\code{.bz2}), and xz (\code{.xz}).
+}
diff --git a/man/print.vcf_sanity_check.Rd b/man/print.vcf_sanity_check.Rd
new file mode 100644
index 0000000..68ab2b9
--- /dev/null
+++ b/man/print.vcf_sanity_check.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vcf_sanity_check.R
+\name{print.vcf_sanity_check}
+\alias{print.vcf_sanity_check}
+\title{Print Method for vcf_sanity_check Objects}
+\usage{
+\method{print}{vcf_sanity_check}(x, checks = names(x$checks), ...)
+}
+\arguments{
+\item{x}{A vcf_sanity_check object as returned by \code{vcf_sanity_check()}.}
+
+\item{checks}{A character vector of check names to display. Defaults to all checks in the object.}
+
+\item{...}{Additional arguments (currently ignored).}
+}
+\value{
+The input object, invisibly. Called for its side effect (printing to console).
+}
+\description{
+Displays a summary table of the VCF sanity check results, showing the check name, result (TRUE/FALSE), and the corresponding message for each selected check.
+}
diff --git a/man/read_geno_file.Rd b/man/read_geno_file.Rd
index 8552c1d..cfb2d27 100644
--- a/man/read_geno_file.Rd
+++ b/man/read_geno_file.Rd
@@ -4,12 +4,16 @@
 \alias{read_geno_file}
 \title{Read geno file}
 \usage{
-read_geno_file(file_path, requires = c("GT"))
+read_geno_file(file_path, requires = c("GT"), ploidy, check = TRUE)
 }
 \arguments{
 \item{file_path}{character indicanting path to file}
 
 \item{requires}{which information is required from the VCF. Define the FORMAT or INFO letters. Example: c("GT", "DP", "PL")}
+
+\item{ploidy}{numeric value indicating the ploidy of the species}
+
+\item{check}{logical indicating whether to perform checks on the VCF file}
 }
 \description{
 Read geno file
diff --git a/man/vcf_sanity_check.Rd b/man/vcf_sanity_check.Rd
new file mode 100644
index 0000000..d6675d5
--- /dev/null
+++ b/man/vcf_sanity_check.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vcf_sanity_check.R
+\name{vcf_sanity_check}
+\alias{vcf_sanity_check}
+\title{Perform a Sanity Check on a VCF File}
+\usage{
+vcf_sanity_check(
+  vcf_path,
+  n_data_lines = 100,
+  max_markers = 10000,
+  depth_support_fields = c("AD", "RA", "AO", "RO", "NR", "NV", "SB", "F1R2", "F2R1"),
+  verbose = FALSE
+)
+}
+\arguments{
+\item{vcf_path}{A character string specifying the path to the VCF file. The file can be plain text or gzipped.}
+
+\item{n_data_lines}{An integer specifying the number of data lines to sample for detailed checks. Default is 100.}
+
+\item{max_markers}{An integer specifying the maximum number of markers allowed in the VCF file. Default is 10,000.}
+
+\item{depth_support_fields}{(Optional) A character vector of fields that are expected to be present in the FORMAT column for allele counts. Default is \code{c("AD", "RA", "AO", "RO", "NR", "NV", "SB", "F1R2", "F2R1")}.}
+
+\item{verbose}{A logical value indicating whether to print detailed messages during the checks. Default is FALSE.}
+}
+\value{
+A list containing:
+\itemize{
+\item \code{checks}: A named vector indicating the results of each check (TRUE or FALSE).
+\item \code{messages}: A data frame containing messages for each check, indicating success or failure.
+\item \code{duplicates}: A list containing any duplicated sample or marker IDs found in the VCF file.
+\item \code{ploidy_max}: The maximum ploidy detected from the genotype field, if applicable.
+}
+}
+\description{
+This function performs a series of checks on a VCF file to ensure its validity and integrity. It verifies the presence of required headers, columns, and data fields, and checks for common issues such as missing or malformed data.
+}
+\details{
+The function performs the following checks:
+\itemize{
+\item \strong{VCF_header}: Verifies the presence of the \verb{##fileformat} header.
+\item \strong{VCF_compressed}: Checks if the VCF file is .gz compressed and if the extension is correct.
+\item \strong{VCF_columns}: Ensures required columns (\verb{#CHROM}, \code{POS}, \code{ID}, \code{REF}, \code{ALT}, \code{QUAL}, \code{FILTER}, \code{INFO}) are present.
+\item \strong{max_markers}: Checks if the total number of markers exceeds the specified limit.
+\item \strong{unique_FORMAT}: Ensures that the FORMAT fields are consistent across sampled markers.
+\item \strong{GT}: Verifies the presence of the \code{GT} (genotype) field in the FORMAT column.
+\item \strong{allele_counts}: Checks for allele-level count fields (e.g., \code{AD}, \code{RA}, \code{AO}, \code{RO}).
+\item \strong{samples}: Ensures sample/genotype columns are present.
+\item \strong{chrom_info} and \strong{pos_info}: Verifies the presence of \code{CHROM} and \code{POS} columns.
+\item \strong{ref_alt}: Ensures \code{REF} and \code{ALT} fields contain valid nucleotide codes.
+\item \strong{multiallelics}: Identifies multiallelic sites (ALT field with commas).
+\item \strong{phased_GT}: Checks for phased genotypes (presence of \code{|} in the \code{GT} field).
+\item \strong{duplicated_samples}: Checks for duplicated sample IDs.
+\item \strong{duplicated_markers}: Checks for duplicated marker IDs.
+}
+}
diff --git a/man/vcf_sanity_messages.Rd b/man/vcf_sanity_messages.Rd
new file mode 100644
index 0000000..1a0fb3e
--- /dev/null
+++ b/man/vcf_sanity_messages.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vcf_sanity_check.R
+\name{vcf_sanity_messages}
+\alias{vcf_sanity_messages}
+\title{Generate Sanity Check Messages for VCF Files}
+\usage{
+vcf_sanity_messages(
+  checks,
+  error_if_false = c("VCF_header", "VCF_columns", "max_markers", "GT", "allele_counts",
+    "samples", "chrom_info", "pos_info", "ref_alt", "mixed_ploidies"),
+  error_if_true = c("phased_GT", "duplicated_samples", "duplicated_markers"),
+  warning_if_true = c("multiallelics"),
+  warning_if_false = c("unique_FORMAT"),
+  input_ploidy = NULL
+)
+}
+\arguments{
+\item{checks}{A \code{vcf_sanity_check} object containing the results of the sanity check.}
+
+\item{error_if_false}{(Optional) A character vector of checks that must be TRUE for the VCF file to pass validation.}
+
+\item{error_if_true}{(Optional) A character vector of checks that must be FALSE for the VCF file to pass validation.}
+
+\item{warning_if_true}{(Optional) A character vector of checks that should trigger a warning if TRUE.}
+
+\item{warning_if_false}{(Optional) A character vector of checks that should trigger a warning if FALSE.}
+
+\item{input_ploidy}{(Optional) An integer specifying the expected ploidy of the VCF file. If provided, the function will compare it with the detected ploidy.}
+}
+\value{
+A logical value indicating whether any of the specified checks failed (TRUE) or passed (FALSE). If any required checks are not met, an error alert is displayed. If any warnings are triggered, a warning alert is displayed.
+}
+\description{
+This function generates messages based on the results of a VCF sanity check.
+}
+\details{
+This function uses the results of a VCF sanity check to display appropriate messages to the user. It checks for required conditions that must be met (TRUE or FALSE) and optionally validates the ploidy of the VCF file. If any issues are detected, the function displays alerts using \code{shinyalert}.
+}
diff --git a/tests/testthat/test-DosageCall.R b/tests/testthat/test-DosageCall.R
index 05ab2df..ef8a912 100644
--- a/tests/testthat/test-DosageCall.R
+++ b/tests/testthat/test-DosageCall.R
@@ -4,33 +4,34 @@ context("Dosage Calling")
 #library(vcfR)
 
 test_that("Dosage Calling from MADC file",{
-
+  future::plan(future::sequential)
+  
   madc_file <- system.file("iris_DArT_MADC.csv", package="BIGapp")
-
+  
   output_name <- "output"
   ploidy <- 2
-  cores <- 2
+  cores <- 1
   model_select <- "norm" # TODO: test for other models
-
+  
   # Status
   #Import genotype info if genotype matrix format
-
+  
   # Call the get_counts function with the specified MADC file path and output file path
   #Status
   result_df <- get_counts(madc_file, output_name)
-
+  
   #Call the get_matrices function
   matrices <- get_matrices(result_df)
-
+  
   mout <- updog::multidog(refmat = matrices$ref_matrix,
                           sizemat = matrices$size_matrix,
                           ploidy = as.numeric(ploidy),
                           model = model_select,
                           nc = cores)
-
+  
   expect_equal(sum(mout$snpdf$bias), 402.7979, tolerance = 0.01)
   expect_equal(sum(mout$inddf$postmean), 95229.13, tolerance = 0.01)
-
+  
   # Convert updog to VCF
   updog2vcf(
     multidog.object = mout,
@@ -38,40 +39,40 @@ test_that("Dosage Calling from MADC file",{
     updog_version = packageVersion("updog"),
     compress = TRUE
   )
-
+  
   vcf_result <- read.vcfR(paste0(output_name,".vcf.gz"))
-
+  
   DP <- sum(as.numeric(extract.gt(vcf_result, "DP")))
-
+  
   expect_equal(DP, 23618990)
-
+  
   MPP <- sum(as.numeric(extract.gt(vcf_result, "MPP")))
-
+  
   expect_equal(MPP, 74519.94, tolerance = 0.01)
 })
 
 
 test_that("Dosage Calling from VCF file",{
-
+  
   madc_file <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGapp")
   ploidy <- 2
   model_select <- "norm"
-  cores <- 2
+  cores <- 1
   output_name <- "out"
-
+  
   #Initialize matrices list
   matrices <- list()
-
+  
   #Import genotype information if in VCF format
   vcf <- read.vcfR(madc_file)
-
+  
   #Get items in FORMAT column
   info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT
-
+  
   info_ids <- extract_info_ids(info[1])
   chrom <- vcf@fix[,1]
   pos <- vcf@fix[,2]
-
+  
   if (("DP" %in% info_ids) && (("RA" %in% info_ids) | ("AD" %in% info_ids))) {
     #Extract DP and RA and convert to matrices
     matrices$size_matrix <- extract.gt(vcf, element = "DP")
@@ -82,22 +83,22 @@ test_that("Dosage Calling from VCF file",{
       matrices$ref_matrix <- matrix(sapply(strsplit(ad_matrix, ","), "[[", 1), nrow = nrow(matrices$size_matrix))
       colnames(matrices$ref_matrix) <- colnames(matrices$size_matrix)
     }
-
+    
     class(matrices$size_matrix) <- "numeric"
     class(matrices$ref_matrix) <- "numeric"
     rownames(matrices$size_matrix) <- rownames(matrices$ref_matrix) <- paste0(chrom, "_", pos)
-
+    
   }
-
+  
   mout <- updog::multidog(refmat = matrices$ref_matrix,
                           sizemat = matrices$size_matrix,
                           ploidy = as.numeric(ploidy),
                           model = model_select,
                           nc = cores)
-
+  
   expect_equal(sum(mout$snpdf$bias), 402.79, tolerance = 0.01)
   expect_equal(sum(mout$inddf$postmean), 95229.13, tolerance = 0.01)
-
+  
   # Convert updog to VCF
   BIGr::updog2vcf(
     multidog.object = mout,
@@ -105,47 +106,68 @@ test_that("Dosage Calling from VCF file",{
     updog_version = packageVersion("updog"),
     compress = TRUE
   )
-
+  
   vcf_result <- read.vcfR(paste0(output_name,".vcf.gz"))
-
+  
   DP <- sum(as.numeric(extract.gt(vcf_result, "DP")))
   expect_equal(DP, 23618990)
-
+  
   MPP <- sum(as.numeric(extract.gt(vcf_result, "MPP")))
   expect_equal(MPP, 74519.94, tolerance = 0.01)
+  
+})
 
+test_that("polyRAD", {
+  #Variables
+  
+  madc_file <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGapp")
+  
+  polyrad_items <- polyRAD_dosage_call(vcf = madc_file,
+                                       ploidy = 2,
+                                       model = "IterateHWE")
+  
+  polyRAD2vcf(geno = polyrad_items$Genos,
+              model = "IterateHWE",
+              vcf_path = madc_file,
+              hindhe.obj = polyrad_items$RADHindHe,
+              ploidy = 2,
+              output.file = "temp")
+  
+  bgzip_compress("temp.vcf", file = ".")
+  
 })
 
 
-test_that("Dosage Calling from VCF file f1 and s1 model",{
 
+test_that("Dosage Calling from VCF file f1 and s1 model",{
+  
   input <- list()
   madc_file <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGapp")
   ploidy <- 2
-
+  
   # input$updog_model <- "s1"
   # input$parent <- "Sample_1"
-
+  
   input$updog_model <- "f1"
   input$parent1 <- "Sample_1"
   input$parent2 <- "Sample_2"
-
-  cores <- 2
+  
+  cores <- 1
   output_name <- "out2"
-
+  
   #Initialize matrices list
   matrices <- list()
-
+  
   #Import genotype information if in VCF format
   vcf <- read.vcfR(madc_file)
-
+  
   #Get items in FORMAT column
   info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT
-
+  
   info_ids <- extract_info_ids(info[1])
   chrom <- vcf@fix[,1]
   pos <- vcf@fix[,2]
-
+  
   if (("DP" %in% info_ids) && (("RA" %in% info_ids) | ("AD" %in% info_ids))) {
     #Extract DP and RA and convert to matrices
     matrices$size_matrix <- extract.gt(vcf, element = "DP")
@@ -156,13 +178,13 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{
       matrices$ref_matrix <- matrix(sapply(strsplit(ad_matrix, ","), "[[", 1), nrow = nrow(matrices$size_matrix))
       colnames(matrices$ref_matrix) <- colnames(matrices$size_matrix)
     }
-
+    
     class(matrices$size_matrix) <- "numeric"
     class(matrices$ref_matrix) <- "numeric"
     rownames(matrices$size_matrix) <- rownames(matrices$ref_matrix) <- paste0(chrom, "_", pos)
-
+    
   }
-
+  
   # Select parents
   if(input$updog_model == "s1" | input$updog_model == "s1pp"){
     parents <- c(input$parent, NULL)
@@ -171,7 +193,7 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{
   } else {
     parents <- c(NULL, NULL)
   }
-
+  
   if (!all(parents %in% colnames(matrices$size_matrix))) {
     shinyalert(
       title = "Data Warning!",
@@ -187,10 +209,10 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{
       showCancelButton = FALSE,
       animation = TRUE
     )
-
+    
     return()
   }
-
+  
   mout <- updog::multidog(refmat = matrices$ref_matrix,
                           sizemat = matrices$size_matrix,
                           ploidy = as.numeric(ploidy),
@@ -198,10 +220,10 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{
                           p2_id = if(is.na(parents[2])) NULL else parents[2],
                           model = input$updog_model,
                           nc = cores)
-
+  
   expect_equal(sum(mout$snpdf$bias), 408.5401, tolerance = 0.01)
   expect_equal(sum(mout$inddf$postmean), 94249.05, tolerance = 0.01)
-
+  
   # Convert updog to VCF
   updog2vcf(
     multidog.object = mout,
@@ -209,13 +231,13 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{
     updog_version = packageVersion("updog"),
     compress = TRUE
   )
-
+  
   vcf_result <- read.vcfR(paste0(output_name,".vcf.gz"))
-
+  
   DP <- sum(as.numeric(extract.gt(vcf_result, "DP")))
   expect_equal(DP, 23618990)
-
+  
   MPP <- sum(as.numeric(extract.gt(vcf_result, "MPP")))
   expect_equal(MPP, 74519.94, tolerance = 0.01)
-
+  
 })
diff --git a/tests/testthat/test-GSAcc.R b/tests/testthat/test-GSAcc.R
index 97051a1..3ed7dcb 100644
--- a/tests/testthat/test-GSAcc.R
+++ b/tests/testthat/test-GSAcc.R
@@ -64,7 +64,7 @@ test_that("test Predictive Ability iris",{
 
   #Getting genotype matrix
   #Geno.file conversion if needed
-  geno_snps <- read_geno_file(input$pred_file$datapath, requires = "GT")
+  geno_snps <- BIGapp:::read_geno_file(input$pred_file$datapath, requires = "GT", ploidy = input$pred_ploidy, check = FALSE)
   geno <- geno_snps[[1]]
   pred_inputs$pred_snps <- geno_snps[[2]] # n markers
   pred_inputs$pred_genos <- ncol(geno) # n samples
@@ -111,9 +111,9 @@ test_that("test Predictive Ability iris",{
   input$pred_model <- "rrBLUP"
 
   # Convert genotype matrix according to ploidy and model used
-  geno_formated <- format_geno_matrix(pred_inputs$geno_input,input$pred_model, input$pred_matrix, input$pred_ploidy)
+  geno_formated <- BIGapp:::format_geno_matrix(pred_inputs$geno_input,input$pred_model, input$pred_matrix, input$pred_ploidy)
 
-  results <- run_predictive_model(geno = geno_formated,
+  results <- BIGapp:::run_predictive_model(geno = geno_formated,
                                   pheno = pred_inputs$pheno_input,
                                   selected_traits = input$pred_trait_info,
                                   predictive_model = input$pred_model,
@@ -163,10 +163,10 @@ test_that("test Predictive Ability iris",{
   advanced_options$pred_matrix <- input$pred_matrix
 
   # Convert genotype matrix according to ploidy and model used
-  geno_formated <- format_geno_matrix(pred_inputs$geno_input,input$pred_model, input$pred_matrix, input$pred_ploidy)
+  geno_formated <- BIGapp:::format_geno_matrix(pred_inputs$geno_input,input$pred_model, input$pred_matrix, input$pred_ploidy)
 
   # Main function
-  results <- run_predictive_model(geno = geno_formated,
+  results <- BIGapp:::run_predictive_model(geno = geno_formated,
                                   pheno = pred_inputs$pheno_input,
                                   selected_traits = input$pred_trait_info,
                                   predictive_model = input$pred_model,
@@ -203,8 +203,8 @@ test_that("test Predictive Ability iris",{
   pred_outputs_gBLUP$comb_output <- average_accuracy_df
 
   # Compare rrBLUP and GBLUP
-  expect_equal(pred_outputs_gBLUP$corr_output[,1], pred_outputs_rrBLUP$corr_output[,1], tolerance = 0.01)
-  expect_equal(pred_outputs_gBLUP$comb_output[,2], pred_outputs_rrBLUP$comb_output[,2], tolerance = 0.01)
+  expect_equal(pred_outputs_gBLUP$corr_output[,1], pred_outputs_rrBLUP$corr_output[,1], tolerance = 0.02)
+  expect_equal(pred_outputs_gBLUP$comb_output[,2], pred_outputs_rrBLUP$comb_output[,2], tolerance = 0.02)
   expect_equal(sum(pred_outputs_gBLUP$avg_GEBVs[,2]), -0.594, tolerance = 0.01)
   expect_equal(sum(as.numeric(pred_outputs_gBLUP$all_GEBVs[,1])), -2.971, tolerance = 0.1)
 })
diff --git a/tests/testthat/test-vcf_sanity_check.R b/tests/testthat/test-vcf_sanity_check.R
new file mode 100644
index 0000000..298b647
--- /dev/null
+++ b/tests/testthat/test-vcf_sanity_check.R
@@ -0,0 +1,18 @@
+# Test for vcf_sanity_check function
+
+test_that("vcf_sanity_check works on a valid VCF", {
+  # Use a small example VCF file from inst/ or create a minimal one
+  vcf_path <- system.file("iris_DArT_VCF.vcf.gz", package = "BIGapp")
+  
+  res <- vcf_sanity_check(vcf_path, n_data_lines = 100, max_markers = 10000, verbose = FALSE)
+  expect_s3_class(res, "vcf_sanity_check")
+  expect_true(res$checks["VCF_header"])
+  expect_true(res$checks["VCF_columns"])
+  expect_true(res$checks["max_markers"])
+  expect_true(res$checks["samples"])
+  expect_true(res$checks["chrom_info"])
+  expect_true(res$checks["pos_info"])
+  expect_false(res$checks["ref_alt"])
+})
+
+