From 002fe62744a4ef4b910d7e3c9b160aa979302276 Mon Sep 17 00:00:00 2001 From: Abyss Halley Date: Thu, 21 Aug 2025 09:50:52 -0400 Subject: [PATCH] adding single pulse analysis from shawaiz --- singlepulse.py | 352 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100644 singlepulse.py diff --git a/singlepulse.py b/singlepulse.py new file mode 100644 index 0000000..68cc155 --- /dev/null +++ b/singlepulse.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +@author: Shawaiz Tabassum +This script uses code from YOUR. +""" +import h5py +import numpy as np +import pylab as plt +import math +from scipy.signal import detrend +from your.utils.math import smad_plotter +import glob +import os +import pandas as pd +from scipy.signal import find_peaks +import smplotlib #plot style, not needed + +# from pygdsm import GlobalSkyModel +# from astropy.coordinates import SkyCoord +# from astropy.coordinates import ICRS, Galactic, FK4, FK5 +# from astropy.coordinates import Angle, Latitude, Longitude +# import astropy.units as u + +pulsar_dir = "/home/st3105/greenburst/J1944+0907/data_2025-02-27_17-41-33_composite/" +pulsar_period = 5.185201908798642 #period of the pulsar. + +flux_threshold = 35 #minimum flux in mJy +distance_peaks = 6 #Number of samples/bins each pulse needs to be away from another pulse to be considered a detection. +time_threshold = 0.1 #error interval for matching pulses only at pulsar spin period apart. Set to 1 if don't want to apply this filter + +#Set options +detrend_ft = True +mad_filter = True +save_path = "/home/st3105/greenburst/J1944+0907/data_2025-02-27_17-41-33_composite/calib-pngs/take3/" +#%% + +#Need to get radio sky temparature at the location of the pulsar. Get it using pygdsm. + +t_sys = 18 #Fig 3 of the GBT document: https://greenbankobservatory.org/wp-content/uploads/2017/07/GBTpg.pdf) +# freq_mhz = 1400 +# sky_coord = SkyCoord(47.160175 * u.deg, -7.357055 * u.deg, frame="galactic") #For J1944+0907 +# gsm = GlobalSkyModel() +# t_sky = gsm.get_sky_temperature(sky_coord, freq_mhz) +t_sky = 4.9805965 +t = t_sys + t_sky +gain = 2.0 #same doc as above +n_p = 2.0 #number of polarizations +bw = 800*10**6 #Hz + +def radiometer(T, G, np, bandwidth, W, sigma): + """ + Parameters + ---------- + T : float + temperature of sky plus the system temperature for the telescope in K. + G : float + Gain of the telescope in K/Jy. + np : float + number of polarizations. + bw : float + bandwith of the system in hertz. + W : float + width of the pulse in seconds. + sigma : float + the pulse flux in terms of standard deviations. Pulse height/off_pulse_std + + Returns + ------- + flux_mjy : TYPE + DESCRIPTION. + + """ + sqt_term = math.sqrt(np*bandwidth*W) + denom = G*sqt_term + + S = T/denom + flux_mjy = sigma*S * 10**3 #mJy + return flux_mjy + +def std_to_mjy(pulse): + """ + Parameters + ---------- + pulse : arr + The pulse timeseries you want to normalize. + + Returns + ------- + ts_mjy : arr + Returns normalized time series such that the maximum is the flux in mjy calculated for the brightest pulse and minimum is for the lowest value oberved trough. + + """ + peaks, peaks_dict = find_peaks(pulse, width=1, rel_height=0.5) + sorted_peaks = sorted(peaks, key=lambda i: pulse[i], reverse=True) + + flux_max = pulse[sorted_peaks[0]] + width_max = np.round((peaks_dict["widths"][np.where(peaks== sorted_peaks[0])]))[0] + width_max_s = width_max*0.000256 + + flipped_ts = -pulse + mins, mins_dict = find_peaks(flipped_ts, width=1) + sorted_mins = sorted(mins, key=lambda i: flipped_ts[i], reverse=True) + + flux_min = pulse[sorted_mins[0]] + width_min = np.round((mins_dict["widths"][np.where(mins== sorted_mins[0])]))[0] + width_min_s = width_min*0.000256 + + exclude_fraction = width_max/256 + N = len(pulse) + window_size = int(N * exclude_fraction / 2) + mask = np.ones(N, dtype=bool) + mask[max(0, sorted_peaks[0] - window_size):min(N, sorted_peaks[0] + window_size)] = False + off_pulse_std = np.std(pulse[mask]) + + std_max = flux_max /off_pulse_std + std_min = flux_min /off_pulse_std + + min_mjy = radiometer(t, gain, n_p, bw, width_min_s, std_min) + max_mjy = radiometer(t, gain, n_p, bw, width_max_s, std_max) + + ts_mjy = (((pulse - np.min(pulse)) / (np.max(pulse) - np.min(pulse))) * (max_mjy - min_mjy)) + min_mjy + return ts_mjy + + +def compute_snr_from_h5(h5_file, detrend_ft=True, mad_filter=True, save=False, show=False, save_path=None): + """ + Parameters + ---------- + h5_file : str + Path to the h5 file.. + detrend_ft : boolean, optional + Choose whether to detrend or not. The default is True. + mad_filter : boolean, optional + Choose whether to apply the MAD RFI filter or not. The default is True. + + Returns + ------- + flux : float + Flux in units of standard deviations. + width_t : float + width of the pulse in units of seconds. + """ + + fname = os.path.basename(h5_file) + parts = fname.split("_") + + peak_mjd = np.float64(parts[2]) + time_pulse = float(parts[4]) + dm = float(parts[6]) + dm_label = np.round(float(parts[6]), 2) + + label_dict = { + "MJD": peak_mjd, + "Pulse Time (s)": time_pulse, + "DM": dm_label, + "Flux (mJy)": None, + "Flux P2 (mJy)": None, + "Flux P3 (mJy)": None, + } + + with h5py.File(h5_file, "r") as f: + + if detrend_ft: + freq_time = detrend(np.array(f["data_freq_time"])[:, ::-1].T) + else: + freq_time = np.array(f["data_freq_time"])[:, ::-1].T + + freq_time[freq_time != freq_time] = 0 + freq_time -= np.median(freq_time) + freq_time /= np.std(freq_time) + + width, tsamp = (f.attrs["width"], f.attrs["tsamp"]) + + if mad_filter: + freq_time = smad_plotter(freq_time, float(mad_filter)) + + ts_y = freq_time.sum(0) + + tlen = freq_time.shape[1] + l = np.linspace(-tlen // 2, tlen // 2, tlen) + ts = l * tsamp * (width * 1000 / 2 if width > 1 else 1000) + + ts_norm = std_to_mjy(ts_y) + + peaks, peaks_dict = find_peaks(ts_norm, width=(1,10), distance=distance_peaks, height=flux_threshold, rel_height=0.5,) + + heights = peaks_dict["peak_heights"] + widths_all = peaks_dict["widths"] + + sorted_indices = np.argsort(heights)[::-1] #descending + sorted_peaks = peaks[sorted_indices] + sorted_heights = heights[sorted_indices] + sorted_widths = np.round(widths_all[sorted_indices], decimals=0) + + max_idx = np.where(ts_y == max(ts_y[124:133])) + ref_peak = int(max_idx[0]) + + if not (124 < sorted_peaks[0] < 133): + # Find the index of ref_peak + idx_cent = np.where(sorted_peaks == ref_peak)[0] + if idx_cent.size > 0: + idx_cent = idx_cent[0] + cent = sorted_peaks[idx_cent] + + # Remove ref_peak and place it at the start + sorted_peaks = np.delete(sorted_peaks, idx_cent) + sorted_peaks = np.insert(sorted_peaks, 0, cent) + else: + print(f"Warning: ref_peak {ref_peak} not found in sorted_peaks") + + #check if the peak is close to an integer multiple of the MSP period. + reference_peak = sorted_peaks[0] + final_peaks = [reference_peak] + for pk in sorted_peaks[1:]: + delta = (ts[pk] - ts[reference_peak]) / pulsar_period + if abs(delta - round(delta)) < time_threshold: + final_peaks.append(pk) + + peaks_mask = np.isin(sorted_peaks, final_peaks) + selected_heights = sorted_heights[peaks_mask] + selected_widths = sorted_widths[peaks_mask] + + exclude_fractions = np.array(selected_widths)/256 + + # Exclude a window around the pulse for off-pulse estimation + N = len(ts_y) + window_sizes = N * exclude_fractions/2 + window_sizes = np.array(window_sizes.astype(int)) + mask = np.ones(N, dtype=bool) + + time_peaks = [] + for i in range(0, (len(final_peaks))): + mask[max(0, final_peaks[i] - window_sizes[i]):min(N, final_peaks[i] + window_sizes[i])] = False + time_peaks.append(ts[final_peaks[i]]) + + # off_pulse_mean = np.mean(ts_y[mask]) + off_pulse_std = np.std(ts_y[mask]) + + widths_t = np.array(selected_widths)*tsamp + flux_stds = ts_y[final_peaks] / off_pulse_std + + fluxes_mjy =[] + for i in range(0, (len(final_peaks))): + wid = widths_t[i] + flx = flux_stds[i] + flux = radiometer(t, gain, n_p, bw, wid, flx) + fluxes_mjy.append(flux) + + label_dict["Flux (mJy)"] = f"{fluxes_mjy[0]:.2f}" + + if len(fluxes_mjy) >= 2: + label_dict["Flux P2 (mJy)"] = f"{fluxes_mjy[1]:.2f}" + if len(fluxes_mjy) >= 3: + label_dict["Flux P3 (mJy)"] = f"{fluxes_mjy[2]:.2f}" + + + plt.figure(figsize=(12, 7)) + plt.plot(ts, ts_norm) + plt.scatter(ts[final_peaks[:]], ts_norm[final_peaks[:]], marker='o', label='Pulse Peak', facecolors='none', edgecolors='black', s=90) + + # Add label text in top right + text_lines_all = [f"{k}: {v}" for k, v in label_dict.items()] + text_lines_r = text_lines_all[0:2] + label_text_r = "\n".join(text_lines_r) + if len(selected_heights) == 1: + text_lines_l = text_lines_all[2:4] + label_text_l = "\n".join(text_lines_l) + elif len(selected_heights) == 2: + text_lines_l = text_lines_all[2:5] + label_text_l = "\n".join(text_lines_l) + else: + text_lines_l = text_lines_all[2:6] + label_text_l = "\n".join(text_lines_l) + + plt.gca().text( + 0.98, 0.98, label_text_r, + transform=plt.gca().transAxes, + ha='right', va='top', + fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1) + ) + plt.gca().text( + 0.25, 0.98, label_text_l, + transform=plt.gca().transAxes, + ha='right', va='top', + fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1) + ) + + + plt.xlabel("Time (ms)", fontsize=22) + plt.ylabel("Flux (mJy) ", fontsize=22) + plt.tick_params(axis='both', labelsize=18) + plt.grid(False) + plt.tight_layout() + + savepath = save_path +"mjd_" + str(peak_mjd) + "_tcand_" + str(time_pulse) + "_dm_" + str(dm) + ".png" + + # Save or display + if save: + plt.savefig(savepath, dpi=300) + if show: + plt.show() + else: + plt.close() + if show: + plt.show() + else: + plt.close() + + peak_mjds = [] + + for i in range(0,len(selected_heights)): + pulse_mjd = peak_mjd + (time_pulse/86400) + (time_peaks[i] / 8.64e+7) + peak_mjds.append(pulse_mjd) + + return fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm + + +#%% +h5_dir = pulsar_dir + "h5/" +h5_files = sorted(glob.glob(f"{pulsar_dir}*.h5")) + +#%% +results = [] + +for filepath in h5_files: + try: + fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm = compute_snr_from_h5(filepath, + detrend_ft=True, + mad_filter=True, + save=True, + show=False, + save_path=save_path) + + for i in range(0,(len(fluxes_mjy))): + results.append({ + "mjd": peak_mjds[i], + "flux_std": flux_stds[i], + "flux_mJy": fluxes_mjy[i], + "width_s": widths_t[i], + "Pulse time (ms)": time_peaks[i], + "tcand": time_pulse, + "dm": dm, + "filename": os.path.basename(filepath) + }) + except Exception as e: + print(f"Error processing {filepath}: {e}") + +df = pd.DataFrame(results) +#%% +df['pulse_energy_jyus'] = (df['flux_mJy']/1000) * (df['width_s']*10**6) +df.to_csv(save_path+"flux.csv", index=False)