67 lines
3 KiB
Python
67 lines
3 KiB
Python
import logging, os
|
|
|
|
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
def process(directory):
|
|
for subdir in [f.path for f in os.scandir(directory) if f.is_dir()]: #iterate over folders in directory
|
|
dirname = os.path.basename(subdir)
|
|
if dirname == '':
|
|
dirname = os.path.basename(os.path.dirname(subdir))
|
|
mainCSV = pd.read_csv(os.path.join(subdir, f"{dirname}.csv"))
|
|
candCSV = pd.read_csv(os.path.join(subdir, "cands","results_a.csv"))
|
|
merge(mainCSV, candCSV, os.path.join(subdir, f"{dirname}_merged.csv"))
|
|
|
|
def merge(mainDF:pd.DataFrame, candDF:pd.DataFrame, outname):
|
|
#add new columns to main dataframe
|
|
colNum = len(mainDF.columns)
|
|
mainDF.insert(colNum, "probability", 0)
|
|
mainDF.insert(colNum+1, "label", 0)
|
|
|
|
#iterate over candidates
|
|
for props in candDF.itertuples(index=False):
|
|
#get dm and snr via string parsing the cand name
|
|
dm = float(props.candidate.split("dm_")[1].split("_")[0].split(".h5")[0].strip("0"))
|
|
snr = float(props.candidate.split("snr_")[1].split("_")[0].split(".h5")[0].strip("0"))
|
|
|
|
#use those values to index the main dataframe and replace values
|
|
dmMatch = mainDF['dm'].map(lambda d: round(d, 2) == round(dm, 2))
|
|
snrMatch = mainDF['snr'].map(lambda s: round(s,2) == round(snr, 2))
|
|
row = mainDF[dmMatch & snrMatch]
|
|
if len(row) > 1:
|
|
logger.error(f"{outname}: Multiple matches found for DM {dm} and SNR {snr}.")
|
|
elif len(row) == 0:
|
|
logger.error(f"{outname}: No matches found for DM {dm} and SNR {snr}.")
|
|
else:
|
|
index = int(row.index[0])
|
|
mainDF.loc[index, 'probability'] = props.probability
|
|
mainDF.loc[index, 'label'] = props.label
|
|
mainDF.to_csv(outname)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser(
|
|
description="Merge results_a.csv into each main CSV file. Merged CSV has a _merged.csv suffix.",
|
|
formatter_class=ArgumentDefaultsHelpFormatter
|
|
)
|
|
parser.add_argument('-p','--path', type=str, help="Main folder containing all observation folders.")
|
|
parser.add_argument('-d','--directory', type=str, help="Single folder to process.")
|
|
parser.set_defaults(path=None, directory=None)
|
|
values = parser.parse_args()
|
|
|
|
if values.path is not None:
|
|
process(path)
|
|
elif values.directory is not None:
|
|
dirname = os.path.basename(values.directory)
|
|
if dirname == '':
|
|
dirname = os.path.basename(os.path.dirname(values.directory))
|
|
mainCSV = pd.read_csv(os.path.join(values.directory, f"{dirname}.csv"))
|
|
candCSV = pd.read_csv(os.path.join(values.directory, "cands","results_a.csv"))
|
|
logger.info(f"Working with {os.path.join(values.directory, f'{dirname}.csv')} and {os.path.join(values.directory, 'cands','results_a.csv')}")
|
|
merge(mainCSV, candCSV, os.path.join(values.directory, f"{dirname}_merged.csv")) |