Skip to content

Commit

Permalink
MNT: Standardize COLUMN_DTYPES global variable name
Browse files Browse the repository at this point in the history
  • Loading branch information
scottclowe committed Oct 6, 2024
1 parent 2b6258b commit ae85cfd
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions bioscan_dataset/bioscan1m.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import PIL
from torchvision.datasets.vision import VisionDataset

column_dtypes = {
COLUMN_DTYPES = {
"sampleid": str,
"processid": str,
"uri": str,
Expand Down Expand Up @@ -167,7 +167,7 @@ def _load_metadata(self) -> pd.DataFrame:
df = pd.read_csv(
os.path.join(self.root, "BIOSCAN_Insect_Dataset_metadata.tsv"),
sep="\t",
dtype=column_dtypes,
dtype=COLUMN_DTYPES,
usecols=usecols,
)
# Convert missing values to NaN
Expand Down
4 changes: 2 additions & 2 deletions bioscan_dataset/bioscan5m.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import PIL
from torchvision.datasets.vision import VisionDataset

df_dtypes = {
COLUMN_DTYPES = {
"processid": str,
"sampleid": str,
"taxon": "category",
Expand Down Expand Up @@ -223,7 +223,7 @@ def _load_metadata(self) -> pd.DataFrame:
pandas.DataFrame
The metadata DataFrame.
"""
df = pd.read_csv(self.metadata_path, dtype=df_dtypes, usecols=df_usecols)
df = pd.read_csv(self.metadata_path, dtype=COLUMN_DTYPES, usecols=df_usecols)
if self.max_nucleotides is not None:
df["dna_barcode"] = df["dna_barcode"].str[: self.max_nucleotides]
if self.reduce_repeated_barcodes:
Expand Down

0 comments on commit ae85cfd

Please sign in to comment.