diff --git a/R/convert_rds_to_anndata.R b/R/convert_rds_to_anndata.R
new file mode 100644
index 0000000..61aeda5
--- /dev/null
+++ b/R/convert_rds_to_anndata.R
@@ -0,0 +1,29 @@
+## Export data to be used as input for analysis with python
+## packages such as scanpy
+
+# Libraries ----
+
+stopifnot(
+  require(optparse),
+  require(Seurat),
+  require(SeuratDisk),
+  require(tenxutils)
+)
+
+# Options ----
+
+option_list <- list(
+  make_option(c("--seuratobject"), default="begin.Robj",
+              help="A seurat object after dimensionality reduction")
+)
+
+opt <- parse_args(OptionParser(option_list=option_list))
+
+cat("Running with options:\n")
+print(opt)
+
+output_file = gsub(".h5seurat", ".h5ad", opt$seuratobject)
+
+Convert(opt$seuratobject, dest = output_file, overwrite = TRUE, verbose = TRUE) 
+
+print("Completed")
diff --git a/R/export_for_python.R b/R/export_for_python.R
index c0b5d1e..e7986a1 100644
--- a/R/export_for_python.R
+++ b/R/export_for_python.R
@@ -53,10 +53,10 @@ exportMetaData <- function(seurat_object, outdir=NULL) {
                 quote=FALSE, sep="\t", row.names = FALSE, col.names = TRUE)
     }
 
-
 # Read RDS seurat object
-message("readRDS")
+message(sprintf("readRDS: %s", opt$seuratobject))
 s <- readRDS(opt$seuratobject)
+
 message("export_for_python running with default assay: ", DefaultAssay(s))
 
 # Write out the cell and feature names
diff --git a/R/plot_group_numbers.R b/R/plot_group_numbers.R
index a410cc1..d406aba 100644
--- a/R/plot_group_numbers.R
+++ b/R/plot_group_numbers.R
@@ -179,8 +179,8 @@ if(opt$stat %in% c("total_UMI", "ngenes"))
     {
         require(Seurat)
         # add the statistic from the seurat object
-        s <- readRDS(opt$seuratobject)
-
+        s <- loadSeurat(path=opt$seuratobject)
+      
         ## set the default assay
         message("Setting default assay to: ", opt$seuratassay)
         DefaultAssay(s) <- opt$seuratassay
diff --git a/R/plot_rdims_cluster_genes.R b/R/plot_rdims_cluster_genes.R
index cbaddfc..74f1349 100644
--- a/R/plot_rdims_cluster_genes.R
+++ b/R/plot_rdims_cluster_genes.R
@@ -117,7 +117,7 @@ print(opt)
 ## read in the assay data
 #data <- read.table(opt$assaydata, sep="\t", header=T, as.is=T, check.names=F)
 message("reading in assay data")
-data <- readRDS(opt$assaydata)
+data <- loadSeurat(opt$assaydata)
 
 message("reading in the coordinates")
 plot_data <- read.table(opt$table,sep="\t",header=TRUE)
diff --git a/R/plot_rdims_gene.R b/R/plot_rdims_gene.R
index fbb9979..c3c4413 100644
--- a/R/plot_rdims_gene.R
+++ b/R/plot_rdims_gene.R
@@ -119,9 +119,7 @@ if(opt$pointpch != ".") { opt$pointpch <- as.numeric(opt$pointpch) }
 cat("Running with options:\n")
 print(opt)
 
-
-## read in the raw count matrix
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 ## set the default assay
 message("Setting default assay to: ", opt$seuratassay)
diff --git a/R/plot_violins.R b/R/plot_violins.R
index 202e9ab..46f9e66 100644
--- a/R/plot_violins.R
+++ b/R/plot_violins.R
@@ -59,7 +59,8 @@ genes <- read.table(opt$genetable,
 
 
 ## read in the raw count matrix
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
+
 Idents(s) <- readRDS(opt$clusterids)
 
 ## set the default assay
diff --git a/R/scanpy_post_process_clusters.R b/R/scanpy_post_process_clusters.R
index 3142f06..a3dbc7e 100644
--- a/R/scanpy_post_process_clusters.R
+++ b/R/scanpy_post_process_clusters.R
@@ -34,8 +34,7 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
-message(sprintf("readRDS: %s", opt$seuratobject))
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 message("seurat_cluster.R running with default assay: ", DefaultAssay(s))
 
@@ -71,16 +70,16 @@ names(cluster_ids) <- clusters$barcode
 unique_cluster_ids <- unique(cluster_ids)
 print(unique_cluster_ids)
 
-message(sprintf("saving a unique list ofe cluster ids"))
+print(sprintf("saving a unique list of cluster ids"))
 write.table(unique_cluster_ids, file=file.path(opt$outdir,"cluster_ids.tsv"),
             quote=FALSE, col.names = FALSE, row.names = FALSE)
 
 cluster_colors <- gg_color_hue(length(unique_cluster_ids))
-message(sprintf("saving the cluster colors (ggplot)"))
+print(sprintf("saving the cluster colors (ggplot)"))
 write.table(cluster_colors, file=file.path(opt$outdir,"cluster_colors.tsv"),
             quote=FALSE, col.names = FALSE, row.names = FALSE)
 
-message(sprintf("saveRDS"))
+print(sprintf("saveRDS of cluster ids"))
 saveRDS(cluster_ids, file=file.path(opt$outdir,"cluster_ids.rds"))
 
 cluster_assignments <- data.frame(barcode=as.character(names(cluster_ids)),
@@ -91,6 +90,6 @@ write.table(cluster_assignments,
             sep="\t", col.names=T, row.names=F, quote=F)
 
 
-message("seurat_cluster.R final default assay: ", DefaultAssay(s))
+print(sprintf("seurat_cluster.R final default assay: %s", DefaultAssay(s)))
 
-message("Completed")
+print("Completed")
diff --git a/R/seurat_FindMarkers.R b/R/seurat_FindMarkers.R
index 1a2a94a..7d65edc 100644
--- a/R/seurat_FindMarkers.R
+++ b/R/seurat_FindMarkers.R
@@ -78,27 +78,43 @@ plan("multiprocess",
      workers = opt$numcores)
 
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 cluster_ids <- readRDS(opt$clusterids)
 
+if (class(s@misc) == "list"){
+  cat("Convert the s@misc to dataframe")
+  s@misc <- as.data.frame(s@misc, stringsAsFactors=FALSE)
+}
+
 have_gene_ids <- "gene_id" %in% colnames(s@misc)
 
 if(!have_gene_ids)
 {
-    cat("Missing ensembl gene_id column in @misc, reading in annotation.\n")
-
+  cat("Missing ensembl gene_id column in @misc, reading in annotation from other source.\n")
+
+  if(endsWith(opt$seuratobject, ".h5seurat")){
+    message("Using s@assays$RNA@meta.features in Seurat object for gene annotation. This slot contains unique gene names and is produced during conversion from scanpy anndata object.")
+    ann <- s@assays$RNA@meta.features
+    ann$seurat_id = rownames(ann)
+    ann <- unique(ann[,c("gene_ids", "seurat_id")])
+    colnames(ann) <- c("ensembl_id", "gene_name")
+  } else {
+    message("Reading in annotation from annotation.dir")
+    message("Please note that only genes with unique gene names will be kept.")
+    message("The other ones will be lost due to Seurat's process of making gene names unique!")
     ann <- read.table(gzfile(opt$annotation), header=T, sep="\t", as.is=T)
 
     ## subset to columns of interest
     ann <- unique(ann[,c("ensembl_id", "gene_name")])
+  }
 
-    ## the gene names have been made unique so we want only 1:1 mappings.
-    gn_tab <- table(ann$gene_name)
+  ## the gene names have been made unique so we want only 1:1 mappings.
+  gn_tab <- table(ann$gene_name)
 
-    unique_gn <- names(gn_tab)[gn_tab == 1]
-    ann <- ann[ann$gene_name %in% unique_gn,]
+  unique_gn <- names(gn_tab)[gn_tab == 1]
+  ann <- ann[ann$gene_name %in% unique_gn,]
 
-    rownames(ann) <- ann$gene_name
+  rownames(ann) <- ann$gene_name
 }
 
 xx <- names(cluster_ids)
diff --git a/R/seurat_begin.R b/R/seurat_begin.R
index 96035d6..796061e 100644
--- a/R/seurat_begin.R
+++ b/R/seurat_begin.R
@@ -42,7 +42,6 @@ option_list <- list(
         c("--matrixtype"), default="10X",
         help="either 10X or rds"
     ),
-
     make_option(
         c("--project"),
         default="SeuratAnalysis",
@@ -53,6 +52,11 @@ option_list <- list(
         default="seurat.out.dir",
         help="Location for outputs files. Must exist."
         ),
+    make_option(
+      c("--file_type"),
+      default="rds",
+      help="Which type of file to use (rds or h5seurat)"
+    ),
     make_option(
         c("--metadata"),
         default="none",
@@ -674,6 +678,11 @@ print(
 message("seurat_begin.R object final default assay: ", DefaultAssay(s))
 
 # Save the R object
-saveRDS(s, file=file.path(opt$outdir, "begin.rds"))
+if (opt$file_type == "rds") {
+  outfile = file.path(opt$outdir, "begin.rds")
+} else {
+  outfile = file.path(opt$outdir, "begin.h5seurat")
+}
+saveSeurat(path=outfile, format=opt$file_type)
 
 message("Completed")
diff --git a/R/seurat_characteriseClusterDEGenes.R b/R/seurat_characteriseClusterDEGenes.R
index 0e62490..a2f80ea 100644
--- a/R/seurat_characteriseClusterDEGenes.R
+++ b/R/seurat_characteriseClusterDEGenes.R
@@ -187,7 +187,8 @@ tex <- c(tex, getFigureTex(defn, deCaption,plot_dir_var=opt$plotdirvar))
 ## enforce significance
 
 ## read in the seurat object
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
+
 cluster_ids <- readRDS(opt$clusterids)
 Idents(s) <- cluster_ids
 
diff --git a/R/seurat_clusterStats.R b/R/seurat_clusterStats.R
index 31984ac..e78ed97 100644
--- a/R/seurat_clusterStats.R
+++ b/R/seurat_clusterStats.R
@@ -52,7 +52,7 @@ cat("Running with options:\n")
 print(opt)
 
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 cluster_ids <- readRDS(opt$clusterids)
 
 xx <- names(cluster_ids)
diff --git a/R/seurat_compare_clusters.R b/R/seurat_compare_clusters.R
index f73d4ff..737862b 100644
--- a/R/seurat_compare_clusters.R
+++ b/R/seurat_compare_clusters.R
@@ -36,8 +36,7 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
-message(sprintf("readRDS: %s", opt$seuratobject))
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 message("seurat_cluster.R running with default assay: ", DefaultAssay(s))
 
diff --git a/R/seurat_dm.R b/R/seurat_dm.R
index a5f65d6..bcd76c1 100644
--- a/R/seurat_dm.R
+++ b/R/seurat_dm.R
@@ -48,9 +48,7 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
-
-message("readRDS")
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 cluster_ids <- readRDS(opt$clusterids)
 Idents(s) <- cluster_ids
 
diff --git a/R/seurat_explore_hvg_and_cell_cycle.R b/R/seurat_explore_hvg_and_cell_cycle.R
index 58810f9..4b5c393 100644
--- a/R/seurat_explore_hvg_and_cell_cycle.R
+++ b/R/seurat_explore_hvg_and_cell_cycle.R
@@ -200,7 +200,7 @@ options(future.globals.maxSize = opt$memory * 1024^2)
 
 # Input data ----
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 ## ######################################################################### ##
 ## # (i) Initial normalisation, variable gene identification and scaling ### ##
diff --git a/R/seurat_find_neighbors.R b/R/seurat_find_neighbors.R
index 27e7eaf..56dbb75 100644
--- a/R/seurat_find_neighbors.R
+++ b/R/seurat_find_neighbors.R
@@ -87,7 +87,7 @@ options(future.globals.maxSize = opt$memory * 1024^2)
 
 # Input data ----
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 if(opt$usesigcomponents)
 {
diff --git a/R/seurat_get_assay_data.R b/R/seurat_get_assay_data.R
index 4a69a97..24a3351 100644
--- a/R/seurat_get_assay_data.R
+++ b/R/seurat_get_assay_data.R
@@ -63,8 +63,7 @@ cat("Running with options:\n")
 print(opt)
 
 
-## read in the raw count matrix
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 ## set the default assay
 message("Setting default assay to: ", opt$seuratassay)
diff --git a/R/seurat_jackstraw.R b/R/seurat_jackstraw.R
index dea3139..463d932 100644
--- a/R/seurat_jackstraw.R
+++ b/R/seurat_jackstraw.R
@@ -30,7 +30,7 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 ## In Macosko et al, we implemented a resampling test inspired by the jackStraw procedure.
 ## We randomly permute a subset of the data (1% by default) and rerun PCA,
diff --git a/R/seurat_normalise_and_scale.R b/R/seurat_normalise_and_scale.R
index c20de5a..58ca0cb 100644
--- a/R/seurat_normalise_and_scale.R
+++ b/R/seurat_normalise_and_scale.R
@@ -221,7 +221,7 @@ options(future.globals.maxSize = opt$memory * 1024^2)
 
 # Input data ----
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 ## ######################################################################### ##
 ## # (i) Initial normalisation, variable gene identification and scaling ## ##
@@ -351,6 +351,6 @@ if(opt$normalizationmethod=="sctransform")
 message("seurat_begin.R object final default assay: ", DefaultAssay(s))
 
 # Save the R object
-saveRDS(s, file=file.path(opt$outdir, "begin.rds"))
+saveSeurat(path=opt$seuratobject)
 
 message("Completed")
diff --git a/R/seurat_pca.R b/R/seurat_pca.R
index 1027a91..2d95fa5 100644
--- a/R/seurat_pca.R
+++ b/R/seurat_pca.R
@@ -115,7 +115,7 @@ options(future.globals.maxSize = opt$memory * 1024^2)
 
 # Input data ----
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 
 ## ######################################################################### ##
 ## ################### (vi) Dimension reduction (PCA) ###################### ##
@@ -200,6 +200,6 @@ if(opt$normalizationmethod!="sctransform" & opt$jackstraw)
 message("seurat_begin.R object final default assay: ", DefaultAssay(s))
 
 # Save the R object
-saveRDS(s, file=file.path(opt$outdir, "begin.rds"))
+saveSeurat(path=opt$seuratobject)
 
 message("Completed")
diff --git a/R/seurat_summariseMarkers.R b/R/seurat_summariseMarkers.R
index b493031..7eda7f2 100644
--- a/R/seurat_summariseMarkers.R
+++ b/R/seurat_summariseMarkers.R
@@ -40,7 +40,8 @@ if(!is.null(opt$subgroup)) { opt$subgroup <- strsplit(opt$subgroup,",")[[1]]}
 cat("Running with options:\n")
 print(opt)
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
+
 cluster_ids <- readRDS(opt$clusterids)
 Idents(s) <- cluster_ids
 
diff --git a/R/seurat_summariseMarkersBetween.R b/R/seurat_summariseMarkersBetween.R
index 98b0449..4f25560 100644
--- a/R/seurat_summariseMarkersBetween.R
+++ b/R/seurat_summariseMarkersBetween.R
@@ -48,7 +48,7 @@ print(opt)
 # set the run specs
 run_specs <- paste(opt$numpcs,opt$resolution,opt$algorithm,opt$testuse,sep="_")
 
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
 cluster_ids <- readRDS(opt$clusterids)
 
 message("Setting the default seurat assay to: ", opt$seuratassay)
diff --git a/R/seurat_tsne.R b/R/seurat_tsne.R
index c94a41a..a15b660 100644
--- a/R/seurat_tsne.R
+++ b/R/seurat_tsne.R
@@ -45,8 +45,8 @@ cat("Running with options:\n")
 print(opt)
 
 
-message("readRDS")
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
+
 cluster_ids <- readRDS(opt$clusterids)
 Idents(s) <- cluster_ids
 print(head(Idents(s)))
diff --git a/R/seurat_umap.R b/R/seurat_umap.R
index 2df6391..7cdab7b 100644
--- a/R/seurat_umap.R
+++ b/R/seurat_umap.R
@@ -50,9 +50,8 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
+s <- loadSeurat(path=opt$seuratobject)
 
-message("readRDS")
-s <- readRDS(opt$seuratobject)
 s@graphs <- readRDS(opt$seuratgraphs)
 
 message("seurat_umap.R running with default assay: ", DefaultAssay(s))
diff --git a/R/singleR_extra_plots.R b/R/singleR_extra_plots.R
index 98fb522..44b546a 100644
--- a/R/singleR_extra_plots.R
+++ b/R/singleR_extra_plots.R
@@ -35,8 +35,8 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
-message(sprintf("readRDS: %s", opt$seuratobject))
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
+
 a <- ifelse("SCT" %in% names(s), yes = "SCT", no = "RNA")
 
 sce <- as.SingleCellExperiment(s, assay = a) # Seems to be using SCT assay (so does it take the defualt?)
diff --git a/R/singleR_plots.R b/R/singleR_plots.R
index a132088..83537bd 100644
--- a/R/singleR_plots.R
+++ b/R/singleR_plots.R
@@ -38,8 +38,8 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
-message(sprintf("readRDS: %s", opt$seuratobject))
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
+
 a <- ifelse("SCT" %in% names(s), yes = "SCT", no = "RNA")
 
 sce <- as.SingleCellExperiment(s, assay = a) # Seems to be using SCT assay (so does it take the defualt?)
diff --git a/R/singleR_run.R b/R/singleR_run.R
index bc8364e..a95bc72 100644
--- a/R/singleR_run.R
+++ b/R/singleR_run.R
@@ -35,8 +35,8 @@ opt <- parse_args(OptionParser(option_list=option_list))
 cat("Running with options:\n")
 print(opt)
 
-message(sprintf("readRDS: %s", opt$seuratobject))
-s <- readRDS(opt$seuratobject)
+s <- loadSeurat(path=opt$seuratobject)
+
 a <- ifelse("SCT" %in% names(s), yes = "SCT", no = "RNA")
 
 sce <- as.SingleCellExperiment(s, assay = a) # Seems to be using SCT assay (so does it take the defualt?)
diff --git a/pipelines/pipeline_seurat.py b/pipelines/pipeline_seurat.py
index 0ddcc38..cd72bae 100644
--- a/pipelines/pipeline_seurat.py
+++ b/pipelines/pipeline_seurat.py
@@ -212,7 +212,6 @@ def taskSummary(infile, outfile):
             run.append(str(v))
 
     tab = pd.DataFrame(list(zip(tasks,run)),columns=["task","run"])
-    print(tab)
 
     tab.to_latex(buf=outfile, index=False)
 
@@ -296,6 +295,7 @@ def createSeuratObject(infile, outfile):
                    --matrixtype=%(matrix_type)s
                    --project=%(sample_name)s
                    --outdir=%(outdir)s
+                   --file_type=%(input_format)s
                    --groupby=%(qc_groupby)s
                    --mingenes=%(qc_initial_mingenes)s
                    --mincells=%(qc_mincells)s
@@ -512,12 +512,13 @@ def seuratPCA(infile, outfile):
 # ############### Predict cell-types using singleR ################### #
 # #################################################################### #
 
+file_suffix = str(PARAMS["input_format"])
 
 @active_if(PARAMS["headstart_seurat_object"])
 @transform(os.path.join(str(PARAMS["headstart_path"]),
-                        "*.seurat.dir/begin.rds"),
-           regex(r".*/(.*).seurat.dir/begin.rds"),
-                 r"\1.seurat.dir/begin.rds")
+                        "*.seurat.dir/begin."+file_suffix),
+           regex(r".*/(.*).seurat.dir/begin."+file_suffix),
+                 rf"\1.seurat.dir/begin."+file_suffix)
 def headstart(infile, outfile):
     '''
     link in the begin.rds objects
@@ -544,7 +545,10 @@ def headstart(infile, outfile):
 def genSingleRjobs():
     '''generate the singleR jobs'''
 
-    seurat_objects = glob.glob("*.seurat.dir/begin.rds")
+    if PARAMS["input_format"] == "rds":
+        seurat_objects = glob.glob("*.seurat.dir/begin.rds")
+    else:
+        seurat_objects = glob.glob("*.seurat.dir/begin.h5seurat")
 
     references = [x.strip() for x in PARAMS["singleR_reference"].split(",")]
 
@@ -610,7 +614,6 @@ def singleR(infile, outfile):
 
         spec, SPEC = TASK.get_vars(infile, outfile, PARAMS)
 
-
         # set the job threads and memory
         job_threads, job_memory, r_memory = TASK.get_resources(
             memory=PARAMS["resources_memory_extreme"],
@@ -693,10 +696,22 @@ def plotExtraSingleR(infile, outfile):
 # ############ Begin per-parameter combination analysis runs ################ #
 # ########################################################################### #
 
+USE_RDS = 0
+USE_H5 = 0
 
+if PARAMS["input_format"] == "rds":
+    SEURAT_OBJECTS = "*.seurat.dir/begin.rds"
+    S_REGEX = regex(r"(.*)/begin.rds")
+    USE_RDS = 1
+else:
+    SEURAT_OBJECTS = "*.seurat.dir/begin.h5seurat"
+    S_REGEX = regex(r"(.*)/begin.h5seurat")
+    USE_H5 = 1
+
+@active_if(USE_RDS)
 @follows(seuratPCA, headstart)
-@transform("*.seurat.dir/begin.rds",
-           regex(r"(.*)/begin.rds"),
+@transform(SEURAT_OBJECTS,
+           S_REGEX,
            r"\1/export_for_python.sentinel")
 def exportForPython(infile, outfile):
     '''
@@ -779,6 +794,58 @@ def exportForPython(infile, outfile):
         P.run(statement)
         IOTools.touch_file(outfile)
 
+@active_if(USE_H5)
+@follows(seuratPCA, headstart)
+@transform(SEURAT_OBJECTS,
+           S_REGEX,
+           r"\1/export_for_python.sentinel")
+def convertRDStoAnndata(infile, outfile):
+    '''Convert Seurat h5ad to anndata h5ad '''
+
+    spec, SPEC = TASK.get_vars(infile, outfile, PARAMS)
+
+    if PARAMS["headstart_export_for_python"]:
+
+        source_folder = os.path.join(PARAMS["headstart_path"],
+                                     spec.sample_dir)
+
+        source_files = ["begin.h5ad",
+                        "export_for_python.*"]
+
+        source_paths = [ os.path.join(source_folder, x)
+                         for x in source_files ]
+
+        for source in source_paths:
+            if "*" in source:
+                files = glob.glob(source)
+            else:
+                files = [ source ]
+
+            for sf in files:
+                if os.path.exists(sf):
+                    os.symlink(os.path.relpath(sf,
+                                               start=spec.sample_dir),
+                               os.path.join(spec.sample_dir,
+                                            os.path.basename(sf)))
+    else:
+        # set the job threads and memory
+        job_threads, job_memory, r_memory = TASK.get_resources(
+            memory=PARAMS["resources_memory_standard"])
+
+        statement = '''Rscript %(tenx_dir)s/R/convert_rds_to_anndata.R
+                       --seuratobject=%(seurat_object)s
+                       &> %(log_file)s
+                    ''' % dict(PARAMS, **SPEC, **locals())
+
+        P.run(statement)
+        IOTools.touch_file(outfile)
+
+
+if PARAMS["input_format"] == "rds":
+    TASK_EXPORT = exportForPython
+else:
+    TASK_EXPORT = convertRDStoAnndata
+
 
 def genAnndata():
 
@@ -790,7 +857,10 @@ def genAnndata():
 
     for sample in samples:
 
-        infile = os.path.join(sample, "begin.rds")
+        if PARAMS["input_format"] == "rds":
+            infile = os.path.join(sample, "begin.rds")
+        else:
+            infile = os.path.join(sample, "begin.h5seurat")
 
         for comps in components:
 
@@ -800,8 +870,7 @@ def genAnndata():
             outfile = os.path.join(sample, outdir, subdir, outname)
             yield [infile, outfile]
 
-
-@follows(exportForPython)
+@follows(TASK_EXPORT)
 @files(genAnndata)
 def anndata(infile, outfile):
     '''
@@ -812,8 +881,14 @@ def anndata(infile, outfile):
 
     reductiontype = PARAMS["dimreduction_method"]
 
-    reduced_dims_matrix_file = os.path.join(spec.sample_dir,
-                                            "embedding." + reductiontype + ".tsv.gz")
+    if PARAMS["input_format"] == "rds":
+        reduced_dims_matrix_file = os.path.join(spec.sample_dir,
+                                                "embedding." + reductiontype + ".tsv.gz")
+        input_files = '''--reduced_dims_matrix_file=%(reduced_dims_matrix_file)s '''
+    else:
+        anndata_file = SPEC["seurat_object"].replace(".h5seurat", ".h5ad")
+        input_files = '''--anndata_file=%(anndata_file)s
+                         --reduction_name=%(dimreduction_method)s '''
 
     barcode_file = os.path.join(spec.sample_dir,
                                 "barcodes.tsv.gz")
@@ -841,7 +916,7 @@ def anndata(infile, outfile):
         memory=PARAMS["resources_memory_extreme"])
 
     statement = '''python %(tenx_dir)s/python/make_anndata.py
-                   --reduced_dims_matrix_file=%(reduced_dims_matrix_file)s
+                   %(input_files)s
                    --barcode_file=%(barcode_file)s
                    --outdir=%(outdir)s
                    --comps=%(comps)s
@@ -871,7 +946,10 @@ def genScanpyClusterJobs():
 
     for sample in samples:
 
-        infile = os.path.join(sample, "begin.rds")
+        if PARAMS["input_format"] == "rds":
+            infile = os.path.join(sample, "begin.rds")
+        else:
+            infile = os.path.join(sample, "begin.h5seurat")
 
         for comps in components:
 
@@ -932,7 +1010,7 @@ def cluster(infile, outfile):
     spec, SPEC = TASK.get_vars(infile, outfile, PARAMS)
 
     job_threads, job_memory, r_memory = TASK.get_resources(
-        memory=PARAMS["resources_memory_low"])
+        memory=PARAMS["resources_memory_standard"])
 
     scanpy_cluster_tsv = infile.replace(".sentinel",".tsv.gz")
 
@@ -1083,7 +1161,7 @@ def paga(infile, outfile):
 
 
 @active_if(PARAMS["run_phate"])
-@follows(exportForPython,cluster)
+@follows(TASK_EXPORT, cluster)
 @transform(anndata,
            regex(r"(.*)/(.*)/anndata.dir/anndata.sentinel"),
            r"\1/\2/phate.dir/phate.sentinel")
@@ -1113,22 +1191,29 @@ def phate(infile, outfile):
 
     cluster_resolutions = ",".join(spec.resolutions)
 
-    barcode_file = os.path.join(spec.sample_dir,
-                                "barcodes.tsv.gz")
-
-    if PARAMS["phate_assay"] == "reduced.dimensions":
-
-        embeddings = ".".join(["embedding",
-                               PARAMS["dimreduction_method"],
-                               "tsv.gz"])
-
-        assay_data = os.path.join(spec.sample_dir, embeddings)
-
-    elif PARAMS["phate_assay"] == "scaled.data":
-        assay_data = os.path.join(spec.sample_dir, "assay.scale.data.tsv.gz")
 
+    if USE_H5:
+        # path to anndata object
+        assay_data = os.path.join(spec.sample_dir, "begin.h5ad")
+        if PARAMS["phate_assay"] == "reduced.dimensions":
+            # add red dimension name to extract from anndata
+            input_phate = '''--reduction_name=%(dimreduction_method)s
+                             --input_type="anndata" '''
     else:
-        raise ValueError("PHATE assay not recognised")
+        barcode_file = os.path.join(spec.sample_dir,
+                                    "barcodes.tsv.gz")
+        input_phate = ''' --barcode_file=%(barcode_file)s
+                          --input_type="tsv" '''
+        if PARAMS["phate_assay"] == "reduced.dimensions":
+            embeddings = ".".join(["embedding",
+                                   PARAMS["dimreduction_method"],
+                                   "tsv.gz"])
+            assay_data = os.path.join(spec.sample_dir, embeddings)
+
+        elif PARAMS["phate_assay"] == "scaled.data":
+            assay_data = os.path.join(spec.sample_dir, "assay.scale.data.tsv.gz")
+        else:
+            raise ValueError("PHATE assay not recognised")
 
 
     k = PARAMS["phate_k"]
@@ -1140,13 +1225,13 @@ def phate(infile, outfile):
     statement = '''python %(tenx_dir)s/python/run_phate.py
                    --data=%(assay_data)s
                    --assay=%(phate_assay)s
-                   --barcode_file=%(barcode_file)s
                    --outdir=%(outdir)s
                    --resolution=%(cluster_resolutions)s
                    --cluster_assignments=%(cluster_assignment_files)s
                    --cluster_colors=%(cluster_color_files)s
                    --k=%(k)s
                    --gif=%(phate_gif)s
+                   %(input_phate)s
                    &> %(log_file)s
                 ''' % dict(PARAMS, **SPEC, **locals())
 
@@ -1201,9 +1286,9 @@ def UMAP(infile, outfile):
 # ########################################################################### #
 
 @active_if(PARAMS["run_diffusionmap"])
-@transform(cluster,
-           regex(r"(.*)/(.*)/(.*)/cluster.sentinel"),
-           r"\1/\2/\3/diffusionmap.dir/dm.sentinel")
+@transform(anndata,
+           regex(r"(.*)/(.*)/anndata.dir/anndata.sentinel"),
+           r"\1/\2/diffusionmap.dir/dm.sentinel")
 def diffusionMap(infile, outfile):
     '''
     Run the diffusion map analysis on a saved seurat object.
@@ -1309,7 +1394,7 @@ def knownMarkerViolins(infile, outfile):
 # ########################################################################### #
 
 @active_if(PARAMS["run_velocity"])
-@follows(paga)
+@follows(paga, phate)
 @transform(RDIMS_VIS_TASK,
            regex(r"(.*)/(.*)/(.*).sentinel"),
            r"\1/velocity.dir/scvelo.sentinel")
@@ -1407,6 +1492,10 @@ def scvelo(infile, outfile):
         runs["phate"] = {"method": "phate",
                          "table": phate_table,
                          "rdim1": "PHATE1", "rdim2": "PHATE2"}
+    if USE_H5:
+        input_type = ''' --input_type="anndata" '''
+    else:
+        input_type = ''' --input_type="tsv" '''
 
     statements = []
 
@@ -1432,6 +1521,7 @@ def scvelo(infile, outfile):
                    --rdims=%(r_tab)s
                    --rdim1=%(r_1)s
                    --rdim2=%(r_2)s
+                   %(input_type)s
                 &> %(this_log_file)s
                 ''' % dict(PARAMS, **SPEC, **locals())
 
@@ -1447,7 +1537,7 @@ def scvelo(infile, outfile):
 
 @transform(RDIMS_VIS_TASK,
            regex(r"(.*)/(.*)/(.*)/(.*).sentinel"),
-           add_inputs(exportForPython),
+           add_inputs(TASK_EXPORT),
            r"\1/\2/rdims.visualisation.dir/plot.rdims.factor.sentinel")
 def plotRdimsFactors(infiles, outfile):
     '''
@@ -1602,7 +1692,6 @@ def plotRdimsClusters(infile, outfile):
 
 
 @active_if(PARAMS["run_diffusionmap"])
-@follows(diffusionMap)
 @transform(cluster,
            regex(r"(.*)/(.*).dir/(.*).dir/(.*).sentinel"),
            add_inputs(diffusionMap),
@@ -1865,7 +1954,7 @@ def plotRdimsGenes(infile, outfile):
 # ################# plot per-cluster summary statistics ##################### #
 # ########################################################################### #
 
-@follows(exportForPython)
+@follows(TASK_EXPORT)
 @transform(cluster,
            regex(r"(.*)/cluster.sentinel"),
            r"\1/group.numbers.dir/plot.group.numbers.sentinel")
@@ -2056,7 +2145,7 @@ def findMarkers(infile, outfile):
 
     # set the job threads and memory
     job_threads, job_memory, r_memory = TASK.get_resources(
-        memory=PARAMS["resources_memory_standard"],
+        memory=PARAMS["resources_memory_high"],
         cpu=PARAMS["findmarkers_numcores"])
 
     for i in spec.clusters:
@@ -2181,7 +2270,7 @@ def summariseMarkers(infile, outfile):
 
     # set the job threads and memory
     job_threads, job_memory, r_memory = TASK.get_resources(
-        memory=PARAMS["resources_memory_low"],
+        memory=PARAMS["resources_memory_high"],
         cpu=1)
 
     # make sumamary tables and plots of the differentially expressed genes
@@ -3629,8 +3718,6 @@ def _add_figure(plot_file=None,
        \\input %(tenx_dir)s/latex/endmatter.tex'
        '''
 
-    print(statement)
-
     # Deliberately run twice - necessary for LaTeX compilation..
     draft_mode = "-draftmode"
     P.run(statement % dict(PARAMS, **SPEC, **locals()))
diff --git a/pipelines/pipeline_seurat/pipeline.yml b/pipelines/pipeline_seurat/pipeline.yml
index 0401734..4dfb0c4 100644
--- a/pipelines/pipeline_seurat/pipeline.yml
+++ b/pipelines/pipeline_seurat/pipeline.yml
@@ -26,6 +26,15 @@ matrix:
   #
   type: 10X
 
+input:
+  # currently supported are
+  # -----------------------
+  #
+  # "rds", for which the pipeline expects a
+  # file called 'begin.rds'
+  # "h5seurat", for which the pipeline expects
+  # a file called 'begin.h5seurat'
+  format: rds
 
 # compute resource options
 # ------------------------
@@ -44,7 +53,8 @@ resources:
   memory_low: 10G
   # Memory_standard is used if not further specified
   memory_standard: 20G
-  # Memory_high is used for tasks: findMarkers, summariseMarkers,
+  # Memory_high is used for tasks: beginSeurat, compareClusters,
+  # paga, UMAP, topClusterMarkers, findMarkers, summariseMarkers
   # diffusionMap
   memory_high: 50G
   # Memory_extreme is used for the scaleing and PCA tasks
diff --git a/pipelines/pipeline_utils/TASK.py b/pipelines/pipeline_utils/TASK.py
index 8aedbf4..23b184b 100644
--- a/pipelines/pipeline_utils/TASK.py
+++ b/pipelines/pipeline_utils/TASK.py
@@ -36,12 +36,10 @@ def get_vars(infile, outfile, PARAMS, make_outdir=True):
 
     SPEC = { "sample_name": parts[0].split(".")[0],
              "sample_dir": parts[0],
-             "seurat_object": os.path.join(parts[0], "begin.rds"),
              "outdir": os.path.dirname(outfile),
              "outname": os.path.basename(outfile),
              }
 
-
     if PARAMS["runspecs_cluster_resolutions"]:
 
         SPEC["resolutions"] = [x.strip()
@@ -54,6 +52,10 @@ def get_vars(infile, outfile, PARAMS, make_outdir=True):
     if PARAMS["runspecs_predefined_clusters"]:
         SPEC["resolutions"].append("predefined")
 
+    if PARAMS["input_format"] == "rds":
+        SPEC["seurat_object"] = os.path.join(parts[0], "begin.rds")
+    else:
+        SPEC["seurat_object"] = os.path.join(parts[0], "begin.h5seurat")
 
     if not infile is None:
         infile = os.path.relpath(infile)
diff --git a/pipelines/pipeline_utils/spec.py b/pipelines/pipeline_utils/spec.py
index 87436c6..7ccb51d 100644
--- a/pipelines/pipeline_utils/spec.py
+++ b/pipelines/pipeline_utils/spec.py
@@ -11,13 +11,16 @@ def get(infile, outfile, PARAMS):
     nparts = len(parts)
 
     spec = { "sample_name": parts[0].split(".")[0],
-             "seurat_object": os.path.join(parts[0], "begin.rds"),
              "outdir": os.path.dirname(outfile),
              "indir": os.path.dirname(infile),
              "resolutions" : [x.strip()
                               for x in
                               PARAMS["runspecs_cluster_resolutions"].split(",")]
              }
+    if PARAMS["input_format"] == "rds":
+        spec["seurat_object"] = os.path.join(parts[0], "begin.rds")
+    else:
+        spec["seurat_object"] = os.path.join(parts[0], "begin.h5seurat")
 
     # take care of making the output directory.
     if not os.path.exists(spec["outdir"]):
diff --git a/python/make_anndata.py b/python/make_anndata.py
index 99d6a4a..4733423 100644
--- a/python/make_anndata.py
+++ b/python/make_anndata.py
@@ -19,7 +19,6 @@
 L.addHandler(log_handler)
 L.setLevel(logging.INFO)
 
-
 # ########################################################################### #
 # ######################## Parse the arguments ############################## #
 # ########################################################################### #
@@ -27,8 +26,12 @@
 L.info("parsing arguments")
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--reduced_dims_matrix_file", default="reduced_dims.tsv.gz", type=str,
+parser.add_argument("--reduced_dims_matrix_file", default="none", type=str,
                     help="File with reduced dimensions")
+parser.add_argument("--anndata_file", default="none", type=str,
+                    help="h5ad file with anndata")
+parser.add_argument("--reduction_name", default="pca", type=str,
+                    help="Name of the dimension reduction")
 parser.add_argument("--barcode_file", default="barcodes.tsv.gz", type=str,
                     help="File with the cell barcodes")
 parser.add_argument("--outdir",default=1, type=str,
@@ -61,29 +64,51 @@
 
 
 # ########################################################################### #
-# ######################### Make the anndata object ######################### #
+# ######################## Make anndata object ############################## #
 # ########################################################################### #
 
-# Read matrix of reduced dimensions, create anndata and add dimensions
-reduced_dims_mat = pd.read_csv(args.reduced_dims_matrix_file, sep="\t")
-reduced_dims_mat.index = [x for x in pd.read_csv(args.barcode_file, header=None)[0]]
+# select correct PCs
+select_comps = args.comps.split(",")
 
-adata = anndata.AnnData(obs=[x for x in reduced_dims_mat.index])
-adata.obs.index = reduced_dims_mat.index
-adata.obs.rename(columns={0:'barcode'}, inplace=True)
+if not args.anndata_file == "none":
+    L.info("Using converted h5ad from SeuratDisk")
+    adata_input = anndata.read(args.anndata_file)
+    adata_input.obs['barcode'] = pd.Series(adata_input.obs.index.copy(),
+                                     index=adata_input.obs.index.copy())
+
+    # get name for obsm
+    obsm_use = 'X_' + str(args.reduction_name)
+    indeces = [int(i)-1 for i in select_comps]
+    adata = anndata.AnnData(obs=adata_input.obs.copy())
+    adata.obsm[obsm_use] = adata_input.obsm[obsm_use].copy()[:,indeces]
+
+    rd_colnames = [str(args.reduction_name)+"_"+str(i) for i in select_comps]
+    L.info("Using comps " + ', '.join(rd_colnames))
 
-# Add dimensions to anndata
-colnames = list(reduced_dims_mat.columns)
-colname_prefix = list(set([re.sub(r'_[0-9]+', '', i) for i in colnames]))[0] + "_"
-select_comps = args.comps.split(",")
-select_comps = [ colname_prefix + item for item in select_comps]
-reduced_dims_mat = reduced_dims_mat[select_comps]
 
-n_pcs = len(select_comps)
+elif not args.reduced_dims_matrix_file == "none":
+    # Read matrix of reduced dimensions, create anndata and add dimensions
+    reduced_dims_mat = pd.read_csv(args.reduced_dims_matrix_file, sep="\t")
+    reduced_dims_mat.index = [x for x in pd.read_csv(args.barcode_file, header=None)[0]]
 
-L.info("Using comps " + ', '.join(list(reduced_dims_mat.columns)))
+    adata = anndata.AnnData(obs=[x for x in reduced_dims_mat.index])
+    adata.obs.index = reduced_dims_mat.index
+    adata.obs.rename(columns={0:'barcode'}, inplace=True)
 
-adata.obsm['X_rdims'] = reduced_dims_mat.to_numpy(dtype="float32")
+    # Add dimensions to anndata
+    colnames = list(reduced_dims_mat.columns)
+    colname_prefix = list(set([re.sub(r'_[0-9]+', '', i) for i in colnames]))[0] + "_"
+    select_comps = args.comps.split(",")
+    select_comps = [ colname_prefix + item for item in select_comps]
+    reduced_dims_mat = reduced_dims_mat[select_comps]
+
+    L.info("Using comps " + ', '.join(list(reduced_dims_mat.columns)))
+
+    adata.obsm['X_rdims'] = reduced_dims_mat.to_numpy(dtype="float32")
+    obsm_use = 'X_rdims'
+
+else:
+    L.info("No input file present.")
 
 
 # ########################################################################### #
@@ -92,6 +117,7 @@
 
 # Run neighbors
 L.info( "Using " + str(args.k) + " neighbors")
+L.info( "Computing neighbors based on obsm: " + str(obsm_use))
 
 if args.method == "scanpy":
 
@@ -101,7 +127,7 @@
     neighbors(adata,
               n_neighbors = args.k,
               metric = args.metric,
-              use_rep = 'X_rdims')
+              use_rep = obsm_use)
 
 
 elif args.method == "hnsw":
@@ -117,7 +143,7 @@
     neighbors(adata,
               n_neighbors = args.k,
               n_pcs = None,
-              use_rep = "X_rdims",
+              use_rep = obsm_use,
               knn = True,
               random_state = 0,
               method = 'hnsw',
@@ -135,4 +161,11 @@
 adata.write(os.path.join(args.outdir,
                          "anndata.h5ad"))
 
+if args.anndata_file != "none":
+    # write out metadata for other tasks
+    metadata_out = adata.obs.copy()
+    out_folder = os.path.dirname(args.anndata_file)
+    metadata_out.to_csv(os.path.join(out_folder, "metadata.tsv.gz"),
+                        sep="\t", index=False)
+
 L.info("Complete")
diff --git a/python/run_cluster.py b/python/run_cluster.py
index c681f87..8904530 100644
--- a/python/run_cluster.py
+++ b/python/run_cluster.py
@@ -84,7 +84,9 @@
 else:
     raise ValueError("Clustering algorithm not recognised")
 
-adata.obs.to_csv(os.path.join(args.outdir,
-                          "scanpy.clusters.tsv.gz"), sep="\t", index=False)
+select_cols = ['barcode', args.algorithm]
+adata.obs[select_cols].to_csv(os.path.join(args.outdir,
+                                           "scanpy.clusters.tsv.gz"),
+                              sep="\t", index=False)
 
 L.info("Complete")
diff --git a/python/run_phate.py b/python/run_phate.py
index f414e52..9d775b5 100644
--- a/python/run_phate.py
+++ b/python/run_phate.py
@@ -15,7 +15,7 @@
 import sys
 import scprep
 import phate
-
+import anndata
 
 
 # ########################################################################### #
@@ -53,32 +53,43 @@
                     help="number of neighbors")
 parser.add_argument("--gif", default="No", type=str,
                     help="output a GIF")
-
+parser.add_argument("--reduction_name", default="pca", type=str,
+                    help="Name of the reduced dimseions")
+parser.add_argument("--input_type", default="tsv", type=str,
+                    help="anndata or tsv")
 
 args = parser.parse_args()
 
-# ########################################################################### #
-# ############## Create outdir and set results file ######################### #
-# ########################################################################### #
-
-
+L.info("Running with arguments:")
+print(args)
 
 
 # ########################################################################### #
 # ############################## Run PHATE ################################## #
 # ########################################################################### #
 
-if args.assay == "reduced.dimensions":
-    # Read matrix of reduced dimensions, create anndata and add dimensions
-    data = pd.read_csv(args.data, sep="\t", header=0)
-
-if args.assay == "scaled.data":
-    # Read matrix of reduced dimensions, create anndata and add dimensions
-    data = pd.read_csv(args.data, sep="\t", header=None)
-
-    # we need to transpose the data for PHATE
-    data = data.transpose()
-
+if args.input_type == "tsv":
+    if args.assay == "reduced.dimensions":
+        # Read matrix of reduced dimensions, create anndata and add dimensions
+        data = pd.read_csv(args.data, sep="\t", header=0)
+
+    if args.assay == "scaled.data":
+        # Read matrix of reduced dimensions, create anndata and add dimensions
+        data = pd.read_csv(args.data, sep="\t", header=None)
+        # we need to transpose the data for PHATE
+        data = data.transpose()
+
+if args.input_type == "anndata":
+    # Read in anndata and extract correct data type
+    adata = anndata.read(args.data)
+    if args.assay == "reduced.dimensions":
+        obsm_use = 'X_' + str(args.reduction_name)
+        data = pd.DataFrame(adata.obsm[obsm_use].copy(),
+                            index=adata.obs.index.copy())
+    if args.assay == "scaled.data":
+        data = pd.DataFrame(adata.X.copy(),
+                            index=adata.index.copy())
+        data = data.transpose()
 
 phate_operator = phate.PHATE(n_jobs=-2, knn=args.k)
 x2 = phate_operator.fit_transform(data)
@@ -113,7 +124,11 @@
 rdims_phate = pd.DataFrame(x2,
                            columns=["PHATE1","PHATE2"])
 
-rdims_phate["barcode"] = pd.read_csv(args.barcode_file, header=None)[0].values
+if args.input_type == "tsv":
+    rdims_phate["barcode"] = pd.read_csv(args.barcode_file, header=None)[0].values
+if args.input_type == "anndata":
+    rdims_phate["barcode"] = adata.obs.index.values
+
 
 rdims_phate.to_csv(os.path.join(args.outdir,"phate.tsv.gz"),
                    sep="\t")
@@ -145,7 +160,10 @@
 rdims_phate = pd.DataFrame(x3,
                            columns=["PHATE1","PHATE2", "PHATE3"])
 
-rdims_phate["barcode"] = pd.read_csv(args.barcode_file, header=None)[0].values
+if args.input_type == "tsv":
+    rdims_phate["barcode"] = pd.read_csv(args.barcode_file, header=None)[0].values
+if args.input_type == "anndata":
+    rdims_phate["barcode"] = adata.obs.index.values
 
 rdims_phate.to_csv(os.path.join(args.outdir,"phate_3D.tsv.gz"),
                    sep="\t")
diff --git a/python/run_scvelo.py b/python/run_scvelo.py
index 673c9b2..27cfdc1 100644
--- a/python/run_scvelo.py
+++ b/python/run_scvelo.py
@@ -56,6 +56,9 @@
                     help="reduced dimension 1")
 parser.add_argument("--rdim2", default="UMAP2", type=str,
                     help="reduced dimension 2")
+parser.add_argument("--input_type", default="tsv", type=str,
+                    help="whether gene and dim reduction information are "
+                         "read from tsv or anndata")
 
 args = parser.parse_args()
 
@@ -92,17 +95,32 @@
 else:
     raise ValueError("either a loom file or dropEst directory must be specified")
 
-# Add the variable and observation information
 
-feat = pd.read_csv(os.path.join(metadata_dir,"features.tsv.gz"), header=None)
-feat.columns = ["gene_symbol"]
-feat.index = feat["gene_symbol"]
-samples  = pd.read_csv(os.path.join(metadata_dir, "barcodes.tsv.gz"),header=None)
-metadata = pd.read_csv(os.path.join(metadata_dir,"metadata.tsv.gz"), sep="\t")
-metadata.index = metadata.barcode.values
+if args.input_type == "tsv" or not args.dropest_dir == "none":
+    # Add the variable and observation information from flat files
+    # or from dropest input directory
 
-adata.vars = feat
-adata.obs = metadata.loc[adata.obs.index,]
+    feat = pd.read_csv(os.path.join(metadata_dir,"features.tsv.gz"), header=None)
+    feat.columns = ["gene_symbol"]
+    feat.index = feat["gene_symbol"]
+    samples  = pd.read_csv(os.path.join(metadata_dir, "barcodes.tsv.gz"),header=None)
+    metadata = pd.read_csv(os.path.join(metadata_dir,"metadata.tsv.gz"), sep="\t")
+    metadata.index = metadata.barcode.values
+
+    adata.vars = feat
+    adata.obs = metadata.loc[adata.obs.index,]
+
+else:
+    # This is used for loom files + h5ad runs
+    # Need to use .raw slot here to have all genes included!
+    adata_info = ad.read(os.path.join(args.rdims.split("/")[0], "begin.h5ad"))
+    #adata_info.raw.var['gene_symbol'] = adata_info.raw.var.index.values
+    #adata.vars = adata_info.raw.var.copy()
+    #adata.obs = adata_info
+    #adata = scv.utils.merge(adata, adata_meta)
+    ## the merge is causing issues with a corrupted neighbor graph
+    metadata = adata_info.obs.copy()
+    adata.obs = metadata.loc[adata.obs.index,]
 
 L.info("AnnData initialised")
 
diff --git a/python/untitled b/python/untitled
deleted file mode 100644
index e69de29..0000000
diff --git a/tenxutils/R/Seurat.R b/tenxutils/R/Seurat.R
index 2f9f012..8da0197 100644
--- a/tenxutils/R/Seurat.R
+++ b/tenxutils/R/Seurat.R
@@ -126,3 +126,50 @@ getCellCycleGenes <- function(sgenes_file = NULL,
     list(s.genes = sgenes,
          g2m.genes = g2mgenes)
 }
+
+
+## Function to load seurat object from either rds or h5seurat
+#' This function can load the Seurat object from either rds or 
+#' h5seurat format 
+#'
+#' @param path The path to the stored object
+#' @param format A string indicating whether it is 'rds' or 'h5seurat'. If left empty (defaults to "none"), the ending of the path is used to determine file format.
+loadSeurat <- function(path,
+                       format = "none")
+{
+    if (endsWith(path, ".rds") | format == "rds") {
+        message(sprintf("readRDS: %s", opt$seuratobject))
+        s <- readRDS(path)
+    } else if (endsWith(path, ".h5seurat") | format == "h5seurat") {
+        message(sprintf("LoadH5Seurat: %s", opt$seuratobject))
+        stopifnot(require(SeuratDisk))
+        s <- LoadH5Seurat(path)
+    } else {
+        stop("Input format not supported. The format or path ending needs to be rds/.rds or h5seurat/.h5seurat.")
+    }
+}
+
+## Function to save seurat objects to either rds or h5seurat
+#' This function can save the Seurat object to either rds or 
+#' h5seurat format 
+#'
+#' @param path The path where the object should be stored
+#' @param format A string indicating whether it is 'rds' or 'h5seurat'. If left empty (defaults to "none"), the ending of the path is used to determine file format.
+
+saveSeurat <- function(path, 
+                       format = "none")
+{
+    if (endsWith(path, ".rds") | format == "rds") {
+        message(sprintf("readRDS: %s", opt$seuratobject))
+        saveRDS(s, file=file.path(opt$outdir, "begin.rds"))
+    } else if (endsWith(path, ".h5seurat") | format == "h5seurat") {
+        message(sprintf("LoadH5Seurat: %s", opt$seuratobject))
+        stopifnot(require(SeuratDisk))
+        SaveH5Seurat(s, file=file.path(opt$outdir, "begin.h5seurat"), overwrite = TRUE)
+    } else {
+        stop("Output format not supported. The format or path ending needs to be rds/.rds or h5seurat/.h5seurat.") 
+    }
+}
+
+
+