good plots
This commit is contained in:
parent
488ea2d6e4
commit
54c128b45f
159
testanalysis.py
159
testanalysis.py
|
@ -1,10 +1,16 @@
|
||||||
import os
|
import os
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
from time import sleep
|
||||||
|
import logging
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib import rcParams
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logging.basicConfig(level=logging.INFO) #change for debug prints
|
||||||
|
|
||||||
def linesplit(line):
|
def linesplit(line):
|
||||||
"""
|
"""
|
||||||
Splits line of file into useful components.
|
Splits line of file into useful components.
|
||||||
|
@ -58,28 +64,43 @@ detections = pd.DataFrame(data=detectedDic) #this is our main object for detecti
|
||||||
|
|
||||||
#define summary printing for multiple steps
|
#define summary printing for multiple steps
|
||||||
def summary(stage):
|
def summary(stage):
|
||||||
print(f"Summary Stage {stage}")
|
logger.info(f"Summary Stage {stage}")
|
||||||
print(injections.head())
|
logger.info(f"Number of files injected: {len(Counter(injections['file']))}")
|
||||||
print(f"Number of files injected: {len(Counter(injections['file']))}")
|
logger.info(f"Number of files with detections: {len(Counter(detections['file']))}")
|
||||||
print(f"Number of files with detections: {len(Counter(detections['file']))}")
|
logger.info("=========================")
|
||||||
print("=========================")
|
logger.info(f"Number of pulses injected: {len(injections['dm'])}")
|
||||||
print(f"Number of pulses injected: {len(injections['dm'])}")
|
logger.info(f"Number of pulses detected: {len(detections['dm'])}")
|
||||||
print(f"Number of pulses detected: {len(detections['dm'])}")
|
logger.info("=========================")
|
||||||
print("=========================")
|
logger.info(f"File ratio: {round(len(Counter(detections['file']))/len(Counter(injections['file'])), 3)}")
|
||||||
print(f"File ratio: {round(len(Counter(detections['file']))/len(Counter(injections['file'])), 3)}")
|
logger.info("=========================")
|
||||||
print(f"Detection ratio: {round(len(detections['dm'])/len(injections['dm']), 3)}")
|
|
||||||
print("=========================")
|
|
||||||
|
|
||||||
#print initial summary
|
#print initial summary
|
||||||
summary(1)
|
summary(1)
|
||||||
|
|
||||||
|
#let's track how many detections get removed by filtering
|
||||||
|
preFilterCount = len(detections['dm'])
|
||||||
|
|
||||||
#many files contained pulsar bursts, so we filter those out via DM
|
#many files contained pulsar bursts, so we filter those out via DM
|
||||||
minDM = (10**2.5) * 0.95 #as per signal generation, plus a bit of wiggle room
|
minDM = (10**2.5) * 0.95 #as per signal generation, plus a bit of wiggle room
|
||||||
print(f"Filtering out pulsars (DM below {int(minDM)}...)")
|
logger.info(f"Filtering out pulsars (DM below {int(minDM)}...)")
|
||||||
detections = detections[detections['dm'] > minDM]
|
detections = detections[detections['dm'] > minDM]
|
||||||
detections = detections.reset_index(drop=True)
|
detections = detections.reset_index(drop=True)
|
||||||
summary(2)
|
summary(2)
|
||||||
|
|
||||||
|
#five files have SO MANY FALSE POSITIVES so get rid of them here?
|
||||||
|
remFiles = ["data_2025-04-30_07-53-07", "data_2025-05-01_07-47-34", "data_2025-04-24_07-36-04", "data_2025-04-29_07-50-16", "data_2025-04-30_08-18-17"]
|
||||||
|
logger.info("Filtering out the five problem files...")
|
||||||
|
remMask = [True] * len(detections['dm'])
|
||||||
|
for detection in detections.itertuples():
|
||||||
|
if detection.file in remFiles:
|
||||||
|
remMask[detection.Index] = False
|
||||||
|
detections = detections[remMask]
|
||||||
|
detections = detections.reset_index(drop=True)
|
||||||
|
summary(3)
|
||||||
|
|
||||||
|
postFilterCount = len(detections['dm'])
|
||||||
|
logger.info(f"Removed {preFilterCount-postFilterCount} detections by filtering.")
|
||||||
|
|
||||||
#Let's do detection matching! Yaaaay!
|
#Let's do detection matching! Yaaaay!
|
||||||
#What detections line up to which injections? This will determine which ones got missed entirely.
|
#What detections line up to which injections? This will determine which ones got missed entirely.
|
||||||
#Define some kind of epsilon for DM and pulse width; if detection is within epsilon in DM we can match it.
|
#Define some kind of epsilon for DM and pulse width; if detection is within epsilon in DM we can match it.
|
||||||
|
@ -97,29 +118,119 @@ for detection in detections.itertuples():
|
||||||
)
|
)
|
||||||
matches = injections.query(qstring)
|
matches = injections.query(qstring)
|
||||||
if len(matches) > 0:
|
if len(matches) > 0:
|
||||||
print(f"Detection: DM {detection.dm} and PW {detection.pulseWidth}")
|
logger.debug(f"Detection: DM {detection.dm} and PW {detection.pulseWidth}")
|
||||||
print(matches)
|
logger.debug(matches)
|
||||||
if len(matches) == 1:
|
if len(matches) == 1:
|
||||||
i = matches.index[0]
|
i = matches.index[0]
|
||||||
matchCount[i] += 1
|
matchCount[i] += 1
|
||||||
print("======")
|
logger.debug("======")
|
||||||
elif len(matches) > 1:
|
elif len(matches) > 1:
|
||||||
raise ValueError("MULTIPLE MATCHES OHNO")
|
raise ValueError("MULTIPLE MATCHES OHNO")
|
||||||
else: #no matching injection...
|
else: #no matching injection...
|
||||||
falsePositiveMask[detection.Index] = True
|
falsePositiveMask[detection.Index] = True
|
||||||
print(f"NO MATCH FOR: DM {detection.dm} and PW {detection.pulseWidth}")
|
logger.debug(f"NO MATCH FOR: DM {detection.dm} and PW {detection.pulseWidth}")
|
||||||
print("Injections in file:")
|
logger.debug("Injections in file:")
|
||||||
print(injections.query(f"(file == '{detection.file}')"))
|
logger.debug(injections.query(f"(file == '{detection.file}')"))
|
||||||
matchMaskInj = [matchCount > 0]
|
matchMaskInj = matchCount > 0
|
||||||
matchMaskDet = np.logical_not(falsePositiveMask)
|
matchMaskDet = np.logical_not(falsePositiveMask)
|
||||||
missedMask = [matchCount == 0]
|
missedMask = matchCount == 0
|
||||||
|
|
||||||
#So where are we?
|
#So where are we?
|
||||||
#We have multiple datasets.
|
#We have multiple datasets.
|
||||||
#1. List of all injected pulses. [injections]
|
#1. Dataframe of all injected pulses. [injections]
|
||||||
#2. List of detections with pulsars filtered out. [detections]
|
#2. Dataframe of detections with pulsars filtered out. [detections]
|
||||||
#3. Number of times each injection was detected [matchCount]
|
#3. List of number of times each injection was detected [matchCount]
|
||||||
#4. A mask for only detected injections [matchMaskInj]
|
#4. A mask for only detected injections [matchMaskInj]
|
||||||
#5. A mask for only true positives [matchMaskDet]
|
#5. A mask for only true positives [matchMaskDet]
|
||||||
#6. A mask for only missed injections [missedMask]
|
#6. A mask for only missed injections [missedMask]
|
||||||
#7. A mask for false positives [falsePositiveMask]
|
#7. A mask for false positives [falsePositiveMask]
|
||||||
|
|
||||||
|
logger.info(f"Successful detection ratio: {Counter(matchMaskInj)[True]/(len(injections['dm']))}")
|
||||||
|
|
||||||
|
#Let's try to figure out if certain files are responsible for the weird amount of false
|
||||||
|
#positives at around 10^2.7 pc/cc
|
||||||
|
|
||||||
|
fpFileCounts = Counter(detections[falsePositiveMask]['file'])
|
||||||
|
sortedFPCounts = [(k, v) for k,v in sorted(fpFileCounts.items(), key=lambda value: -value[1])]
|
||||||
|
logger.info("False positive counts:")
|
||||||
|
for f, c in sortedFPCounts:
|
||||||
|
logger.info(f"{f}: {c}")
|
||||||
|
|
||||||
|
|
||||||
|
#Let's set a matplotlib default to make figures a bit bigger cos i like them
|
||||||
|
rcParams["figure.figsize"] = [7,5]
|
||||||
|
plt.rcParams['figure.constrained_layout.use'] = True
|
||||||
|
|
||||||
|
# #Injected pulses
|
||||||
|
# plt.figure(figsize=(7,10))
|
||||||
|
# allAx = plt.subplot(311)
|
||||||
|
# _, bins, _ = allAx.hist(np.log10(injections['dm']), bins=15)
|
||||||
|
# allAx.set_title("All injected pulses")
|
||||||
|
# detAx = plt.subplot(312, sharex=allAx, sharey=allAx)
|
||||||
|
# detAx.hist(np.log10(injections['dm'][matchMaskInj]), bins=bins)
|
||||||
|
# detAx.set_title("Detected pulses")
|
||||||
|
# misAx = plt.subplot(313, sharex=allAx, sharey=allAx)
|
||||||
|
# misAx.hist(np.log10(injections['dm'][missedMask]), bins=bins)
|
||||||
|
# misAx.set_title("Missed pulses")
|
||||||
|
# plt.ylabel("Count")
|
||||||
|
# plt.xlabel(r"DM (log pc cm$^3$)")
|
||||||
|
# plt.draw()
|
||||||
|
|
||||||
|
# #Detected pulses unstacked
|
||||||
|
# plt.figure(figsize=(7,10))
|
||||||
|
# allAx = plt.subplot(311)
|
||||||
|
# _, bins, _ = allAx.hist(np.log10(detections['dm']), bins=15)
|
||||||
|
# allAx.set_title("All detections")
|
||||||
|
# detAx = plt.subplot(312, sharex=allAx, sharey=allAx)
|
||||||
|
# detAx.hist(np.log10(detections['dm'][matchMaskDet]), bins=bins)
|
||||||
|
# detAx.set_title("Detected pulses")
|
||||||
|
# misAx = plt.subplot(313, sharex=allAx, sharey=allAx)
|
||||||
|
# misAx.hist(np.log10(detections['dm'][falsePositiveMask]), bins=bins)
|
||||||
|
# misAx.set_title("False positives")
|
||||||
|
# plt.ylabel("Count")
|
||||||
|
# plt.xlabel(r"DM (log pc cm$^3$)")
|
||||||
|
# plt.draw()
|
||||||
|
|
||||||
|
#Stacked histogram of injections
|
||||||
|
plt.figure(figsize=(7,10))
|
||||||
|
ax = plt.subplot(17,1,(1,8))
|
||||||
|
ax.hist([np.log10(injections['dm'][matchMaskInj]), np.log10(injections['dm'][missedMask])],
|
||||||
|
stacked=True, label=['Detected', 'Missed'], bins=15)
|
||||||
|
ax.yaxis.get_major_locator().set_params(integer=True)
|
||||||
|
ax.legend(loc='upper center')
|
||||||
|
ax.set_xlabel(r"DM (log pc cm$^3$)")
|
||||||
|
ax.tick_params(labelbottom=True)
|
||||||
|
ax2 = plt.subplot(17,1,(9,16))
|
||||||
|
ax2.hist([np.log10(injections['pulseWidth'][matchMaskInj]), np.log10(injections['pulseWidth'][missedMask])],
|
||||||
|
stacked=True, label=['Detected', 'Missed'], bins=15)
|
||||||
|
ax2.yaxis.get_major_locator().set_params(integer=True)
|
||||||
|
ax2.set_xlabel(r"Pulse width (log s)")
|
||||||
|
plt.ylabel("Count")
|
||||||
|
plt.title("Injected pulses")
|
||||||
|
wordAx = plt.subplot(17,1,17)
|
||||||
|
wordAx.text(.3,.5,f"Overall detection rate: {round(Counter(matchMaskInj)[True]/(len(injections['dm']))*100,1)}%", size=14)
|
||||||
|
wordAx.set_axis_off()
|
||||||
|
plt.draw()
|
||||||
|
|
||||||
|
#Stacked histogram of detections
|
||||||
|
plt.figure(figsize=(7,10))
|
||||||
|
ax = plt.subplot(17,1,(1,8))
|
||||||
|
ax.hist([np.log10(detections['dm'][matchMaskDet]), np.log10(detections['dm'][falsePositiveMask])],
|
||||||
|
stacked=True, label=['True detection', 'False positive'], bins=15)
|
||||||
|
ax.yaxis.get_major_locator().set_params(integer=True)
|
||||||
|
ax.set_xlabel(r"DM (log pc cm$^3$)")
|
||||||
|
ax.legend(loc='upper center')
|
||||||
|
ax2 = plt.subplot(17,1,(9,16))
|
||||||
|
ax2.hist([np.log10(detections['pulseWidth'][matchMaskDet]), np.log10(detections['pulseWidth'][falsePositiveMask])],
|
||||||
|
stacked=True, label=['True detection', 'False positive'], bins=15)
|
||||||
|
ax2.yaxis.get_major_locator().set_params(integer=True)
|
||||||
|
ax2.set_xlabel("Pulse width (log s)")
|
||||||
|
plt.ylabel("Count")
|
||||||
|
plt.title("Detections")
|
||||||
|
wordAx = plt.subplot(17,1,17)
|
||||||
|
wordAx.text(.3,.5,f"False positive rate: {round(Counter(falsePositiveMask)[True]/(len(detections['dm']))*100,1)}%", size=14)
|
||||||
|
wordAx.set_axis_off()
|
||||||
|
plt.draw()
|
||||||
|
|
||||||
|
#block end of script
|
||||||
|
plt.show()
|
Loading…
Reference in a new issue