diff --git a/candidate_filter/candidate_filter.py b/candidate_filter/candidate_filter.py index 3f912dd..77e0d6a 100755 --- a/candidate_filter/candidate_filter.py +++ b/candidate_filter/candidate_filter.py @@ -56,8 +56,13 @@ def main(args): # Read files into a single pandas DataFrame with open(args.input, "r") as f: - xml_list = [i.strip() for i in f.readlines() if i] - df_cands_ini, obs_meta_data = reading_cands.read_candidate_files(xml_list) + candidate_files_list = [i.strip() for i in f.readlines() if i] + + if candidate_files_list[0].endswith('.xml'): + df_cands_ini, obs_meta_data = reading_cands.read_xml_candidate_files(candidate_files_list) + + if candidate_files_list[0].endswith('.csv'): + df_cands_ini, obs_meta_data = reading_cands.read_csv_candidate_files(candidate_files_list) diff --git a/candidate_filter/reading_cands.py b/candidate_filter/reading_cands.py index 9af2ca1..7f69385 100644 --- a/candidate_filter/reading_cands.py +++ b/candidate_filter/reading_cands.py @@ -3,9 +3,10 @@ import xml.etree.ElementTree as ET from astropy import units as u from astropy.coordinates import SkyCoord +import numpy as np -def read_candidate_files(files, verbose=True): +def read_xml_candidate_files(files, verbose=True): # Reads candidates files and include the candidates in a single pandas DataFrame #files = glob.glob(path + '*/overview.xml') @@ -57,6 +58,79 @@ def read_candidate_files(files, verbose=True): return df_candidates, obs_meta_data +def read_csv_candidate_files(files, verbose=True): + # Reads candidates files and include the candidates in a single pandas DataFrame + + #files = glob.glob(path + '*/candidates.csv') + + if verbose: + print(f"{len(files)} candidates files found.") + + all_rows = [] + file_index = 0 + for file in files: + file = file.replace(',','') + + candidates = np.genfromtxt(file,dtype='str',skip_header=1, delimiter=',') #header is column names for candidate info + + for candidate_number, candidate in reversed(list(enumerate(candidates))): #start with last row of candidate file, which is observation metadata, to get the beam coordinates for all candidates in that file + + if candidate_number == np.size(candidates,0) - 1: #read observation metadata from the last row of the candidate file + + tsamp = float(candidate[3]) + fft_size = 0.0 #this is handled correctly in candidate filter + obs_length = float(candidate[2]) + nsamples = int(obs_length/tsamp) + speed_of_light = 299792458.0 + obs_length_over_c = obs_length / speed_of_light + obs_meta_data = {"tsamp": tsamp, + "nsamples": nsamples, + "obs_length": obs_length, + "fft_size": fft_size, + 'obs_length_over_c': obs_length_over_c} + src_raj = float(candidate[0]) + src_dej = float(candidate[1]) + src_rajd, src_dejd = convert_to_deg(src_raj, src_dej) + + else: + + row = [] + new_dict = {} + new_dict['cand_id_in_file'] = candidate_number + new_dict['src_raj'] = src_raj #should be defined from last row of file + new_dict['src_rajd'] = src_rajd + new_dict['src_dej'] = src_dej + new_dict['src_dejd'] = src_dejd + new_dict['file_index'] = file_index + new_dict['period'] = float(candidate[0]) + new_dict['dm'] = float(candidate[2]) + new_dict['snr'] = float(candidate[5]) + new_dict['acc'] = 0.0 + new_dict['file'] = file + new_dict['nassoc'] = 3 #set as the default minimum required nassoc to disable low nassoc filtering for FFA outputs + #don't have nh, is_adjacent, is_physical, ddm count/snr ratio, byte offset like peasoup + row.append(new_dict) + all_rows.extend(row) + + file_index += 1 + + + df_candidates = pd.DataFrame(all_rows) + + # Additional type casting may be necessary or not necessary at all + df_candidates = df_candidates.astype({"snr": float, "dm": float, "period": float, + "acc": float, "nassoc": int}) + + if verbose: + print(f"{len(df_candidates)} candidates read.") + + # sort by snr + df_candidates.sort_values('snr', inplace=True, ascending=False) + df_candidates.reset_index(inplace=True, drop=True) + + return df_candidates, obs_meta_data + + def create_row(root, candidates, file, file_index): # Read a candidate file and creates data rows