diff --git a/kbase.yml b/kbase.yml index 4fda497..684f73d 100644 --- a/kbase.yml +++ b/kbase.yml @@ -8,8 +8,8 @@ service-language: python module-version: - 1.2.2 + 1.2.3 owners: - [landml] + [dakota, landml] diff --git a/lib/kb_ObjectInfo/CreateAssemblyReport.py b/lib/kb_ObjectInfo/CreateAssemblyReport.py index ff49722..d3eaee6 100644 --- a/lib/kb_ObjectInfo/CreateAssemblyReport.py +++ b/lib/kb_ObjectInfo/CreateAssemblyReport.py @@ -10,11 +10,11 @@ def __init__(self, config): self.callback_url = os.environ['SDK_CALLBACK_URL'] self.dfu = DataFileUtil(self.callback_url) - + def assembly_overview(self,obj_list): header = "OVERVIEW" this_list = [["Assembly Name","Type","Assembly Type"]] - + for assembly in obj_list['data']: name = "Assembly Data Object" type_assem = '' @@ -26,25 +26,26 @@ def assembly_overview(self,obj_list): type_assem = assembly['data']['type'] this_list.extend([[name,object_type,type_assem]]) - + return (header,this_list) - + def assembly_metadata(self,obj_list): header = "METADATA" this_list = [['Assembly Name','Assembly ID', 'DNA size', 'GC content', 'Number contigs', 'FastA handle reference', 'MD5', 'Type', 'Taxon reference','Original filename']] - - list = ['assembly_id', 'dna_size', 'gc_content', 'num_contigs', + + items = ['dna_size', 'gc_content', 'num_contigs', 'fasta_handle_ref', 'md5', 'type', 'taxon_ref'] - + + # DataFileUtil guarantees that 'info' contains object_info: + # https://github.com/kbaseapps/DataFileUtil/blob/master/DataFileUtil.spec#L499-L506 for assembly in obj_list['data']: - name = "Assembly Data Object" - if 'info' in assembly: - name = assembly['info'][1] + name = assembly['info'][1] + ref = f"{assembly['info'][6]}/{assembly['info'][0]}/{assembly['info'][4]}" # Create the row for the one assembly - assem_list = [name] - for item in list: + assem_list = [name, ref] + for item in items: if item in assembly['data']: assem_list.append(str(assembly['data'][item])) else: @@ -54,18 +55,18 @@ def assembly_metadata(self,obj_list): assem_list.append(assembly['data']['fasta_handle_info']['node_file_name']) else: assem_list.append(" ") - + # Add the row to the list that will be returned this_list.extend([assem_list]) - + return (header,this_list) - + def assembly_dnabases(self,obj_list): header = "DNA Composition" this_list = [["Assembly Name","Total DNA Bases", "A Count","A Percent","C Count","C Percent", "G Count","G Percent","T Count","T Percent"]] - + for assembly in obj_list['data']: name = "Assembly Data Object" if 'info' in assembly: @@ -74,32 +75,32 @@ def assembly_dnabases(self,obj_list): pct = 1.00 if 'dna_size' in assembly['data']: dna_size = assembly['data']['dna_size'] - + assem_list = [name,str(dna_size)] for base in ["A","C","G","T"]: pct = round(100 * assembly['data']['base_counts'][base] / dna_size,2) assem_list.append(str(assembly['data']['base_counts'][base])) assem_list.append(str(pct)) - + this_list.extend([assem_list]) - + return(header,this_list) - + def assembly_contigs(self,obj_list): header = "Contigs in the Assembly" this_list= [["Assembly Name","Contig Name","Length","GC content","Number of Ns","Contig ID","Description"]] - + for assembly in obj_list['data']: name = "Assembly Data Object" if 'info' in assembly: name = assembly['info'][1] - + if 'contigs' in assembly['data']: myContig = assembly['data']['contigs'] for ctg in myContig: list = ['length', 'gc_content', 'Ncount', 'contig_id', 'description'] ctg_list = [name,ctg] - + for item in list: if item in myContig[ctg]: ctg_list.append(format(myContig[ctg][item])) @@ -107,16 +108,16 @@ def assembly_contigs(self,obj_list): ctg_list.append("") this_list.append(ctg_list) - + return(header,this_list) - - + + def assembly_dna(self,obj_list,scratch): header = "Contig FastA files found in the download files." fasta_list = [] dna_string = "" cf = CreateFasta(self.config) - + for assembly in obj_list['data']: name = "Assembly Data Object" input_ref = '' @@ -126,7 +127,7 @@ def assembly_dna(self,obj_list,scratch): fasta_list = cf.get_assembly_sequence(input_ref) report_path = os.path.join(scratch, name + '.fna') - + # Write the DNA string out to a Fasta file report_txt = open(report_path, "w") for dna_seq in fasta_list: @@ -134,6 +135,5 @@ def assembly_dna(self,obj_list,scratch): report_txt.write(dna) dna_string += dna report_txt.close() - + return(header) - diff --git a/test/kb_ObjectInfo_test.py b/test/kb_ObjectInfo_test.py index cf23b96..0b2def0 100644 --- a/test/kb_ObjectInfo_test.py +++ b/test/kb_ObjectInfo_test.py @@ -82,7 +82,7 @@ def setUpClass(cls): "generate_ids_if_needed": 1, "generate_missing_genes": 1 })['genome_ref'] - + # Prepare the Genome from gbff File cls.genbank_file_name = 'Carsonella_ruddii_HT_isolate_Thao2000.gbff' # Set the path to file in scratch @@ -251,9 +251,9 @@ def mytest_assembly_metadata(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - - def mytest_assembly_set(self): - assemblyset_ref = '69870/28/2' + + def test_assembly_set(self): + assemblyset_ref = '72131/16/1' ret = self.getImpl().assemblyset_report(self.getContext(), {'workspace_name': self.ws_info[1], 'input_ref': assemblyset_ref, @@ -264,7 +264,7 @@ def mytest_assembly_set(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_genome_protein_list(self): genome_ref = self.genome_ref genome_ref = '40843/4/1' @@ -281,7 +281,7 @@ def mytest_genome_protein_list(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_genome_protein_fasta(self): ret = self.getImpl().genome_report(self.getContext(), {'workspace_name': self.ws_info[1], @@ -394,7 +394,7 @@ def mytest_featureSet_unordered(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_sequenceSet(self): featset_ref = '27092/23/1' ret = self.getImpl().featseq_report(self.getContext(), @@ -405,7 +405,7 @@ def mytest_sequenceSet(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_ProtComp(self): protcomp_ref = '29939/15/1' ret = self.getImpl().protcomp_report(self.getContext(), @@ -428,7 +428,7 @@ def mytest_genomeComp(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_MSA(self): msa_ref = '70362/27/1' ret = self.getImpl().msa_report(self.getContext(),