initial test of merging for susie
This commit is contained in:
parent
597e901f56
commit
2847289886
62
csv_merge.py
Normal file
62
csv_merge.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
import logging, os
|
||||||
|
|
||||||
|
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
def process(directory):
|
||||||
|
for subdir in [f.path for f in os.scandir(directory) if f.is_dir()]: #iterate over folders in directory
|
||||||
|
dirname = os.path.basename(subdir)
|
||||||
|
mainCSV = pd.read_csv(os.path.join(subdir, f"{dirname}.csv"))
|
||||||
|
candCSV = pd.read_csv(os.path.join(subdir, "cands","results_a.csv"))
|
||||||
|
merge(mainCSV, candCSV, os.path.join(subdir, f"{dirname}_merged.csv"))
|
||||||
|
|
||||||
|
def merge(mainDF:pd.DataFrame, candDF:pd.DataFrame, outname):
|
||||||
|
#add new columns to main dataframe
|
||||||
|
colNum = len(mainDF.columns)
|
||||||
|
mainDF.insert(colNum, "probability", 0)
|
||||||
|
mainDF.insert(colNum+1, "label", 0)
|
||||||
|
|
||||||
|
#iterate over candidates
|
||||||
|
for name, probability, label in candDF.itertuples(index=False):
|
||||||
|
#get dm and snr via string parsing the cand name
|
||||||
|
dm = float(name.split("dm_")[1].split("_")[0].split(".h5")[0].strip("0"))
|
||||||
|
snr = float(name.split("snr_")[1].split("_")[0].split(".h5")[0].strip("0"))
|
||||||
|
|
||||||
|
#use those values to index the main dataframe and replace values
|
||||||
|
dmMatch = mainDF['dm'].map(lambda d: round(d, 2) == round(dm, 2))
|
||||||
|
snrMatch = mainDF['snr'].map(lambda s: round(s,2) == round(snr, 2))
|
||||||
|
row = mainDF[dmMatch & snrMatch]
|
||||||
|
if len(row) > 1:
|
||||||
|
logger.error(f"{outname}: Multiple matches found for DM {dm} and SNR {snr}.")
|
||||||
|
elif len(row) == 0:
|
||||||
|
logger.error(f"{outname}: No matches found for DM {dm} and SNR {snr}.")
|
||||||
|
else:
|
||||||
|
index = int(row.index[0])
|
||||||
|
mainDF.loc[index, 'probability'] = probability
|
||||||
|
mainDF.loc[index, 'label'] = label
|
||||||
|
mainDF.to_csv(outname)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = ArgumentParser(
|
||||||
|
description="Merge results_a.csv into each main CSV file. Merged CSV has a _merged.csv suffix.",
|
||||||
|
formatter_class=ArgumentDefaultsHelpFormatter
|
||||||
|
)
|
||||||
|
parser.add_argument('-p','--path', type=str, help="Main folder containing all observation folders.")
|
||||||
|
parser.add_argument('-d','--directory', type=str, help="Single folder to process.")
|
||||||
|
parser.set_defaults(path=None, directory=None)
|
||||||
|
values = parser.parse_args()
|
||||||
|
|
||||||
|
if values.path is not None:
|
||||||
|
process(path)
|
||||||
|
elif values.directory is not None:
|
||||||
|
dirname = os.path.basename(values.directory)
|
||||||
|
mainCSV = pd.read_csv(os.path.join(values.directory, f"{dirname}.csv"))
|
||||||
|
candCSV = pd.read_csv(os.path.join(values.directory, "cands","results_a.csv"))
|
||||||
|
merge(mainCSV, candCSV, os.path.join(values.directory, f"{dirname}_merged.csv"))
|
Loading…
Reference in a new issue