diff --git a/testanalysis.py b/testanalysis.py index cc36ad1..88fb6db 100644 --- a/testanalysis.py +++ b/testanalysis.py @@ -59,6 +59,7 @@ detections = pd.DataFrame(data=detectedDic) #this is our main object for detecti #define summary printing for multiple steps def summary(stage): print(f"Summary Stage {stage}") + print(injections.head()) print(f"Number of files injected: {len(Counter(injections['file']))}") print(f"Number of files with detections: {len(Counter(detections['file']))}") print("=========================") @@ -76,12 +77,49 @@ summary(1) minDM = (10**2.5) * 0.95 #as per signal generation, plus a bit of wiggle room print(f"Filtering out pulsars (DM below {int(minDM)}...)") detections = detections[detections['dm'] > minDM] +detections = detections.reset_index(drop=True) summary(2) #Let's do detection matching! Yaaaay! #What detections line up to which injections? This will determine which ones got missed entirely. -#Define some kind of epsilon for DM and pulse width; if detection is within epsilon in both DM and PW we can match it. -dmEps = 0.05 -pwEps = 0.2 -#and define an auxiliary array of bools for injections containing the "is matched?" information. -isMatched = [False] * len(injections['dm']) +#Define some kind of epsilon for DM and pulse width; if detection is within epsilon in DM we can match it. +dmEps = 5 +#and define an auxiliary array of 0s for injections. List of detection counts! +matchCount = np.zeros(len(injections['dm']), dtype=int) +#also keep track of false positives: +falsePositiveMask = [False] * len(detections['dm']) +#Use queries to find matches +for detection in detections.itertuples(): + qstring = ( + f"(file == '{detection.file}') & " + f"((dm - @dmEps) < {detection.dm}) & " + f"((dm + @dmEps) > {detection.dm})" + ) + matches = injections.query(qstring) + if len(matches) > 0: + print(f"Detection: DM {detection.dm} and PW {detection.pulseWidth}") + print(matches) + if len(matches) == 1: + i = matches.index[0] + matchCount[i] += 1 + print("======") + elif len(matches) > 1: + raise ValueError("MULTIPLE MATCHES OHNO") + else: #no matching injection... + falsePositiveMask[detection.Index] = True + print(f"NO MATCH FOR: DM {detection.dm} and PW {detection.pulseWidth}") + print("Injections in file:") + print(injections.query(f"(file == '{detection.file}')")) +matchMaskInj = [matchCount > 0] +matchMaskDet = np.logical_not(falsePositiveMask) +missedMask = [matchCount == 0] + +#So where are we? +#We have multiple datasets. +#1. List of all injected pulses. [injections] +#2. List of detections with pulsars filtered out. [detections] +#3. Number of times each injection was detected [matchCount] +#4. A mask for only detected injections [matchMaskInj] +#5. A mask for only true positives [matchMaskDet] +#6. A mask for only missed injections [missedMask] +#7. A mask for false positives [falsePositiveMask] \ No newline at end of file