greenburstAux/singlepulse.py
2025-08-22 13:19:09 -04:00

365 lines
13 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Shawaiz Tabassum
This script uses code from YOUR.
Modified by Abyss Halley.
"""
import h5py
import numpy as np
import pylab as plt
import math
from scipy.signal import detrend
from your.utils.math import smad_plotter
import glob
import os
import pandas as pd
from scipy.signal import find_peaks
#import smplotlib #plot style, not needed; AH: commented out
from pygdsm import GlobalSkyModel
from astropy.coordinates import SkyCoord
from astropy.coordinates import ICRS, Galactic, FK4, FK5
from astropy.coordinates import Angle, Latitude, Longitude
import astropy.units as u
#AH: Modifying this to support multiple directories
main_dir = os.path.join("/ldata","trunk")
pulsar_dir_list = [os.path.join(main_dir, name) for name in
["data_2025-04-24_07-36-04", "data_2025-04-29_07-50-16", "data_2025-04-30_07-53-07", "data_2025-04-30_08-18-17", "data_2025-05-01_07-47-34"]]
#AH: old variable: pulsar_dir
pulsar_period = 1.34789947290 * 1000 #period of the pulsar. AH: in ms
#AH: Leaving these as default for now.
flux_threshold = 35 #minimum flux in mJy
distance_peaks = 6 #Number of samples/bins each pulse needs to be away from another pulse to be considered a detection.
time_threshold = 0.1 #error interval for matching pulses only at pulsar spin period apart. Set to 1 if don't want to apply this filter
#Set options
detrend_ft = True
mad_filter = True
save_path_list = [os.path.join(name, "calib-pngs", "") for name in pulsar_dir_list]
#AH: old variable: save_path
#%%
#Need to get radio sky temparature at the location of the pulsar. Get it using pygdsm.
t_sys = 18 #Fig 3 of the GBT document: https://greenbankobservatory.org/wp-content/uploads/2017/07/GBTpg.pdf)
freq_mhz = 1400
sky_coord = SkyCoord(339.492 * u.deg, 0.406 * u.deg, frame="galactic") #AH: For J1643-4522
gsm = GlobalSkyModel()
t_sky = gsm.get_sky_temperature(sky_coord, freq_mhz)
#t_sky = 4.9805965
t = t_sys + t_sky
gain = 2.0 #same doc as above
n_p = 2.0 #number of polarizations
bw = 960*10**6 #Hz
def radiometer(T, G, np, bandwidth, W, sigma):
"""
Parameters
----------
T : float
temperature of sky plus the system temperature for the telescope in K.
G : float
Gain of the telescope in K/Jy.
np : float
number of polarizations.
bw : float
bandwith of the system in hertz.
W : float
width of the pulse in seconds.
sigma : float
the pulse flux in terms of standard deviations. Pulse height/off_pulse_std
Returns
-------
flux_mjy : TYPE
DESCRIPTION.
"""
sqt_term = math.sqrt(np*bandwidth*W)
denom = G*sqt_term
S = T/denom
flux_mjy = sigma*S * 10**3 #mJy
return flux_mjy
def std_to_mjy(pulse):
"""
Parameters
----------
pulse : arr
The pulse timeseries you want to normalize.
Returns
-------
ts_mjy : arr
Returns normalized time series such that the maximum is the flux in mjy calculated for the brightest pulse and minimum is for the lowest value oberved trough.
"""
peaks, peaks_dict = find_peaks(pulse, width=1, rel_height=0.5)
sorted_peaks = sorted(peaks, key=lambda i: pulse[i], reverse=True)
flux_max = pulse[sorted_peaks[0]]
width_max = np.round((peaks_dict["widths"][np.where(peaks== sorted_peaks[0])]))[0]
width_max_s = width_max*0.000256
flipped_ts = -pulse
mins, mins_dict = find_peaks(flipped_ts, width=1)
sorted_mins = sorted(mins, key=lambda i: flipped_ts[i], reverse=True)
flux_min = pulse[sorted_mins[0]]
width_min = np.round((mins_dict["widths"][np.where(mins== sorted_mins[0])]))[0]
width_min_s = width_min*0.000256
exclude_fraction = width_max/256
N = len(pulse)
window_size = int(N * exclude_fraction / 2)
mask = np.ones(N, dtype=bool)
mask[max(0, sorted_peaks[0] - window_size):min(N, sorted_peaks[0] + window_size)] = False
off_pulse_std = np.std(pulse[mask])
std_max = flux_max /off_pulse_std
std_min = flux_min /off_pulse_std
min_mjy = radiometer(t, gain, n_p, bw, width_min_s, std_min)
max_mjy = radiometer(t, gain, n_p, bw, width_max_s, std_max)
ts_mjy = (((pulse - np.min(pulse)) / (np.max(pulse) - np.min(pulse))) * (max_mjy - min_mjy)) + min_mjy
return ts_mjy
def compute_snr_from_h5(h5_file, detrend_ft=True, mad_filter=True, save=False, show=False, save_path=None):
"""
Parameters
----------
h5_file : str
Path to the h5 file..
detrend_ft : boolean, optional
Choose whether to detrend or not. The default is True.
mad_filter : boolean, optional
Choose whether to apply the MAD RFI filter or not. The default is True.
Returns
-------
flux : float
Flux in units of standard deviations.
width_t : float
width of the pulse in units of seconds.
"""
fname = os.path.basename(h5_file)
parts = fname.split("_")
peak_mjd = np.float64(parts[2])
time_pulse = float(parts[4])
dm = float(parts[6])
dm_label = np.round(float(parts[6]), 2)
label_dict = {
"MJD": peak_mjd,
"Pulse Time (s)": time_pulse,
"DM": dm_label,
"Flux (mJy)": None,
"Flux P2 (mJy)": None,
"Flux P3 (mJy)": None,
}
with h5py.File(h5_file, "r") as f:
if detrend_ft:
freq_time = detrend(np.array(f["data_freq_time"])[:, ::-1].T)
else:
freq_time = np.array(f["data_freq_time"])[:, ::-1].T
freq_time[freq_time != freq_time] = 0
freq_time -= np.median(freq_time)
freq_time /= np.std(freq_time)
width, tsamp = (f.attrs["width"], f.attrs["tsamp"])
if mad_filter:
freq_time = smad_plotter(freq_time, float(mad_filter))
ts_y = freq_time.sum(0)
tlen = freq_time.shape[1]
l = np.linspace(-tlen // 2, tlen // 2, tlen)
ts = l * tsamp * (width * 1000 / 2 if width > 1 else 1000)
ts_norm = std_to_mjy(ts_y)
peaks, peaks_dict = find_peaks(ts_norm, width=(1,10), distance=distance_peaks, height=flux_threshold, rel_height=0.5,)
heights = peaks_dict["peak_heights"]
widths_all = peaks_dict["widths"]
sorted_indices = np.argsort(heights)[::-1] #descending
sorted_peaks = peaks[sorted_indices]
sorted_heights = heights[sorted_indices]
sorted_widths = np.round(widths_all[sorted_indices], decimals=0)
max_idx = np.where(ts_y == max(ts_y[124:133]))
ref_peak = int(max_idx[0])
if not (124 < sorted_peaks[0] < 133):
# Find the index of ref_peak
idx_cent = np.where(sorted_peaks == ref_peak)[0]
if idx_cent.size > 0:
idx_cent = idx_cent[0]
cent = sorted_peaks[idx_cent]
# Remove ref_peak and place it at the start
sorted_peaks = np.delete(sorted_peaks, idx_cent)
sorted_peaks = np.insert(sorted_peaks, 0, cent)
else:
print(f"Warning: ref_peak {ref_peak} not found in sorted_peaks")
#check if the peak is close to an integer multiple of the MSP period.
reference_peak = sorted_peaks[0]
final_peaks = [reference_peak]
for pk in sorted_peaks[1:]:
delta = (ts[pk] - ts[reference_peak]) / pulsar_period
if abs(delta - round(delta)) < time_threshold:
final_peaks.append(pk)
peaks_mask = np.isin(sorted_peaks, final_peaks)
selected_heights = sorted_heights[peaks_mask]
selected_widths = sorted_widths[peaks_mask]
exclude_fractions = np.array(selected_widths)/256
# Exclude a window around the pulse for off-pulse estimation
N = len(ts_y)
window_sizes = N * exclude_fractions/2
window_sizes = np.array(window_sizes.astype(int))
mask = np.ones(N, dtype=bool)
time_peaks = []
for i in range(0, (len(final_peaks))):
mask[max(0, final_peaks[i] - window_sizes[i]):min(N, final_peaks[i] + window_sizes[i])] = False
time_peaks.append(ts[final_peaks[i]])
# off_pulse_mean = np.mean(ts_y[mask])
off_pulse_std = np.std(ts_y[mask])
widths_t = np.array(selected_widths)*tsamp
flux_stds = ts_y[final_peaks] / off_pulse_std
fluxes_mjy =[]
for i in range(0, (len(final_peaks))):
wid = widths_t[i]
flx = flux_stds[i]
flux = radiometer(t, gain, n_p, bw, wid, flx)
fluxes_mjy.append(flux)
label_dict["Flux (mJy)"] = f"{fluxes_mjy[0]:.2f}"
if len(fluxes_mjy) >= 2:
label_dict["Flux P2 (mJy)"] = f"{fluxes_mjy[1]:.2f}"
if len(fluxes_mjy) >= 3:
label_dict["Flux P3 (mJy)"] = f"{fluxes_mjy[2]:.2f}"
plt.figure(figsize=(12, 7))
plt.plot(ts, ts_norm)
plt.scatter(ts[final_peaks[:]], ts_norm[final_peaks[:]], marker='o', label='Pulse Peak', facecolors='none', edgecolors='black', s=90)
# Add label text in top right
text_lines_all = [f"{k}: {v}" for k, v in label_dict.items()]
text_lines_r = text_lines_all[0:2]
label_text_r = "\n".join(text_lines_r)
if len(selected_heights) == 1:
text_lines_l = text_lines_all[2:4]
label_text_l = "\n".join(text_lines_l)
elif len(selected_heights) == 2:
text_lines_l = text_lines_all[2:5]
label_text_l = "\n".join(text_lines_l)
else:
text_lines_l = text_lines_all[2:6]
label_text_l = "\n".join(text_lines_l)
plt.gca().text(
0.98, 0.98, label_text_r,
transform=plt.gca().transAxes,
ha='right', va='top',
fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1)
)
plt.gca().text(
0.25, 0.98, label_text_l,
transform=plt.gca().transAxes,
ha='right', va='top',
fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1)
)
plt.xlabel("Time (ms)", fontsize=22)
plt.ylabel("Flux (mJy) ", fontsize=22)
plt.tick_params(axis='both', labelsize=18)
plt.grid(False)
plt.tight_layout()
savepath = save_path +"mjd_" + str(peak_mjd) + "_tcand_" + str(time_pulse) + "_dm_" + str(dm) + ".png"
# Save or display
if save:
plt.savefig(savepath, dpi=300)
if show:
plt.show()
else:
plt.close()
if show:
plt.show()
else:
plt.close()
peak_mjds = []
for i in range(0,len(selected_heights)):
pulse_mjd = peak_mjd + (time_pulse/86400) + (time_peaks[i] / 8.64e+7)
peak_mjds.append(pulse_mjd)
return fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm
#%%
for pulsar_dir in pulsar_dir_list:
save_path = os.path.join(pulsar_dir, "calib-pngs", "")
if not os.path.exists(save_path):
os.makedirs(save_path)
h5_dir = os.path.join(pulsar_dir,"cands","")
print(h5_dir)
h5_files = sorted(glob.glob(f"{h5_dir}*.h5"))
results = []
for filepath in h5_files:
try:
fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm = compute_snr_from_h5(filepath,
detrend_ft=True,
mad_filter=True,
save=True,
show=False,
save_path=save_path)
for i in range(0,(len(fluxes_mjy))):
results.append({
"mjd": peak_mjds[i],
"flux_std": flux_stds[i],
"flux_mJy": fluxes_mjy[i],
"width_s": widths_t[i],
"Pulse time (ms)": time_peaks[i],
"tcand": time_pulse,
"dm": dm,
"filename": os.path.basename(filepath)
})
except Exception as e:
print(f"Error processing {filepath}: {e}")
df = pd.DataFrame(results)
#%%
df['pulse_energy_jyus'] = (df['flux_mJy']/1000) * (df['width_s']*10**6)
df.to_csv(save_path+"flux.csv", index=False)