MNT: Standardize COLUMN_DTYPES global variable name

bioscan-ml · Oct 6, 2024 · ae85cfd · ae85cfd
1 parent 2b6258b
commit ae85cfd
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/bioscan_dataset/bioscan1m.py b/bioscan_dataset/bioscan1m.py
@@ -14,7 +14,7 @@
 import PIL
 from torchvision.datasets.vision import VisionDataset
 
-column_dtypes = {
+COLUMN_DTYPES = {
     "sampleid": str,
     "processid": str,
     "uri": str,
@@ -167,7 +167,7 @@ def _load_metadata(self) -> pd.DataFrame:
         df = pd.read_csv(
             os.path.join(self.root, "BIOSCAN_Insect_Dataset_metadata.tsv"),
             sep="\t",
-            dtype=column_dtypes,
+            dtype=COLUMN_DTYPES,
             usecols=usecols,
         )
         # Convert missing values to NaN

diff --git a/bioscan_dataset/bioscan5m.py b/bioscan_dataset/bioscan5m.py
@@ -14,7 +14,7 @@
 import PIL
 from torchvision.datasets.vision import VisionDataset
 
-df_dtypes = {
+COLUMN_DTYPES = {
     "processid": str,
     "sampleid": str,
     "taxon": "category",
@@ -223,7 +223,7 @@ def _load_metadata(self) -> pd.DataFrame:
         pandas.DataFrame
             The metadata DataFrame.
         """
-        df = pd.read_csv(self.metadata_path, dtype=df_dtypes, usecols=df_usecols)
+        df = pd.read_csv(self.metadata_path, dtype=COLUMN_DTYPES, usecols=df_usecols)
         if self.max_nucleotides is not None:
             df["dna_barcode"] = df["dna_barcode"].str[: self.max_nucleotides]
         if self.reduce_repeated_barcodes: