import logging, os from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser import numpy as np import pandas as pd logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) def process(directory): for subdir in [f.path for f in os.scandir(directory) if f.is_dir()]: #iterate over folders in directory dirname = os.path.basename(subdir) if dirname == '': dirname = os.path.basename(os.path.dirname(subdir)) mainCSV = pd.read_csv(os.path.join(subdir, f"{dirname}.csv")) candCSV = pd.read_csv(os.path.join(subdir, "cands","results_a.csv")) merge(mainCSV, candCSV, os.path.join(subdir, f"{dirname}_merged.csv")) def merge(mainDF:pd.DataFrame, candDF:pd.DataFrame, outname): #add new columns to main dataframe colNum = len(mainDF.columns) mainDF.insert(colNum, "probability", 0) mainDF.insert(colNum+1, "label", 0) #iterate over candidates for props in candDF.itertuples(index=False): #get dm and snr via string parsing the cand name dm = float(props.candidate.split("dm_")[1].split("_")[0].split(".h5")[0].strip("0")) snr = float(props.candidate.split("snr_")[1].split("_")[0].split(".h5")[0].strip("0")) #use those values to index the main dataframe and replace values dmMatch = mainDF['dm'].map(lambda d: round(d, 2) == round(dm, 2)) snrMatch = mainDF['snr'].map(lambda s: round(s,2) == round(snr, 2)) row = mainDF[dmMatch & snrMatch] if len(row) > 1: logger.error(f"{outname}: Multiple matches found for DM {dm} and SNR {snr}.") elif len(row) == 0: logger.error(f"{outname}: No matches found for DM {dm} and SNR {snr}.") else: index = int(row.index[0]) mainDF.loc[index, 'probability'] = props.probability mainDF.loc[index, 'label'] = props.label mainDF.to_csv(outname) if __name__ == "__main__": parser = ArgumentParser( description="Merge results_a.csv into each main CSV file. Merged CSV has a _merged.csv suffix.", formatter_class=ArgumentDefaultsHelpFormatter ) parser.add_argument('-p','--path', type=str, help="Main folder containing all observation folders.") parser.add_argument('-d','--directory', type=str, help="Single folder to process.") parser.set_defaults(path=None, directory=None) values = parser.parse_args() if values.path is not None: process(path) elif values.directory is not None: dirname = os.path.basename(values.directory) if dirname == '': dirname = os.path.basename(os.path.dirname(values.directory)) mainCSV = pd.read_csv(os.path.join(values.directory, f"{dirname}.csv")) candCSV = pd.read_csv(os.path.join(values.directory, "cands","results_a.csv")) logger.info(f"Working with {os.path.join(values.directory, f'{dirname}.csv')} and {os.path.join(values.directory, 'cands','results_a.csv')}") merge(mainCSV, candCSV, os.path.join(values.directory, f"{dirname}_merged.csv"))