initial test of merging for susie
This commit is contained in:
parent
597e901f56
commit
2847289886
62
csv_merge.py
Normal file
62
csv_merge.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
import logging, os
|
||||
|
||||
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
def process(directory):
|
||||
for subdir in [f.path for f in os.scandir(directory) if f.is_dir()]: #iterate over folders in directory
|
||||
dirname = os.path.basename(subdir)
|
||||
mainCSV = pd.read_csv(os.path.join(subdir, f"{dirname}.csv"))
|
||||
candCSV = pd.read_csv(os.path.join(subdir, "cands","results_a.csv"))
|
||||
merge(mainCSV, candCSV, os.path.join(subdir, f"{dirname}_merged.csv"))
|
||||
|
||||
def merge(mainDF:pd.DataFrame, candDF:pd.DataFrame, outname):
|
||||
#add new columns to main dataframe
|
||||
colNum = len(mainDF.columns)
|
||||
mainDF.insert(colNum, "probability", 0)
|
||||
mainDF.insert(colNum+1, "label", 0)
|
||||
|
||||
#iterate over candidates
|
||||
for name, probability, label in candDF.itertuples(index=False):
|
||||
#get dm and snr via string parsing the cand name
|
||||
dm = float(name.split("dm_")[1].split("_")[0].split(".h5")[0].strip("0"))
|
||||
snr = float(name.split("snr_")[1].split("_")[0].split(".h5")[0].strip("0"))
|
||||
|
||||
#use those values to index the main dataframe and replace values
|
||||
dmMatch = mainDF['dm'].map(lambda d: round(d, 2) == round(dm, 2))
|
||||
snrMatch = mainDF['snr'].map(lambda s: round(s,2) == round(snr, 2))
|
||||
row = mainDF[dmMatch & snrMatch]
|
||||
if len(row) > 1:
|
||||
logger.error(f"{outname}: Multiple matches found for DM {dm} and SNR {snr}.")
|
||||
elif len(row) == 0:
|
||||
logger.error(f"{outname}: No matches found for DM {dm} and SNR {snr}.")
|
||||
else:
|
||||
index = int(row.index[0])
|
||||
mainDF.loc[index, 'probability'] = probability
|
||||
mainDF.loc[index, 'label'] = label
|
||||
mainDF.to_csv(outname)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = ArgumentParser(
|
||||
description="Merge results_a.csv into each main CSV file. Merged CSV has a _merged.csv suffix.",
|
||||
formatter_class=ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument('-p','--path', type=str, help="Main folder containing all observation folders.")
|
||||
parser.add_argument('-d','--directory', type=str, help="Single folder to process.")
|
||||
parser.set_defaults(path=None, directory=None)
|
||||
values = parser.parse_args()
|
||||
|
||||
if values.path is not None:
|
||||
process(path)
|
||||
elif values.directory is not None:
|
||||
dirname = os.path.basename(values.directory)
|
||||
mainCSV = pd.read_csv(os.path.join(values.directory, f"{dirname}.csv"))
|
||||
candCSV = pd.read_csv(os.path.join(values.directory, "cands","results_a.csv"))
|
||||
merge(mainCSV, candCSV, os.path.join(values.directory, f"{dirname}_merged.csv"))
|
Loading…
Reference in a new issue