ARCAFF · samaloney · Aug 13, 2025 · Jul 16, 2025 · Jul 17, 2025 · Jul 17, 2025
diff --git a/arccnet/cli/main.py b/arccnet/cli/main.py
@@ -8,6 +8,7 @@
 from collections import ChainMap, defaultdict
 from collections.abc import Mapping
 
+import arccnet
 from arccnet import load_config
 from arccnet.models.cutouts import config as config_module
 from arccnet.models.cutouts.inference import predict
@@ -239,6 +240,7 @@ def combine_args(args=None):
 
 def main(args=None):
     combined = combine_args(args)
+    arccnet.config = combined
     command = combined.get("command")
     if command == "catalog":
         catalog_commands(combined)
@@ -248,3 +250,8 @@ def main(args=None):
         inference_commands(combined)
     else:
         raise ValueError(f"Unknown command: {command}")
+
+
+if __name__ == "__main__":
+    main()
+    sys.exit(0)
diff --git a/arccnet/data_generation/data_manager.py b/arccnet/data_generation/data_manager.py
@@ -220,7 +220,24 @@ def search(self, batch_frequency: int, merge_tolerance: timedelta) -> list[Query
             meta_datetime = (
                 meta[["datetime"]].drop_duplicates().dropna().sort_values("datetime").reset_index(drop=True)
             )  # adding sorting here... is this going to mess something up?
-
+            if len(meta_datetime) == 0:
+                results.append(
+                    Query(
+                        QTable(
+                            names=[
+                                "target_time",
+                                "datetime",
+                                "start_time",
+                                "end_time",
+                                "record",
+                                "filename",
+                                "url",
+                                "record_T_REC",
+                            ]
+                        )
+                    )
+                )
+                continue
             # generate a mapping between target_time to datetime with the specified tolerance.
             merged_time = pd.merge_asof(
                 left=pd_query[["target_time"]],
@@ -242,18 +259,9 @@ def search(self, batch_frequency: int, merge_tolerance: timedelta) -> list[Query
             # which there may be multiple for cutouts at the same full-disk time, and join
             matched_rows = meta[meta["datetime"].isin(merged_time["datetime"])]
 
-            # -- Bit hacky to stop H(T)ARPNUM becoming a float
-            #    I think Shane may have found a better way to deal with this?
-            # Convert int64 columns to Int64
-            int64_columns = matched_rows.select_dtypes(include=["int64"]).columns
-            # Create a new DataFrame with Int64 data types
-            new_df = matched_rows.copy()
-            for col in int64_columns:
-                new_df[col] = matched_rows[col].astype("Int64")
-
             # merged_time <- this is the times that match between the query and output
-            # new_df / matched_rows are the rows in the output at the same time as the query
-            merged_df = pd.merge(merged_time, new_df, on="datetime", how="left")
+            # matched_rows are the rows in the output at the same time as the query
+            merged_df = pd.merge(merged_time, matched_rows, on="datetime", how="left")
             # I hope this isn't nonsense, and keeps the `urls` as a masked column
             # how does this work with sharps/smarps where same datetime for multiple rows
 
@@ -305,9 +313,12 @@ def download(self, query_list: list[Query], path: Path = None, overwrite: bool =
 
             # !TODO a way of retrying missing would be good, but JSOC URLs are temporary.
             if new_query is not None:
-                downloaded_files = self._download(
-                    data_list=new_query[~new_query["url"].mask]["url"].data.data, path=path, overwrite=overwrite
-                )
+                if len(new_query) > 0:
+                    downloaded_files = self._download(
+                        data_list=new_query[~new_query.mask["url"]]["url"].data, path=path, overwrite=overwrite
+                    )
+                else:
+                    downloaded_files = []
                 results = self._match(results, downloaded_files)  # should return a results object.
             else:
                 raise NotImplementedError("new_query is none.")