From 002fe62744a4ef4b910d7e3c9b160aa979302276 Mon Sep 17 00:00:00 2001
From: Abyss Halley <abyss@kobold60.com>
Date: Thu, 21 Aug 2025 09:50:52 -0400
Subject: [PATCH] adding single pulse analysis from shawaiz

---
 singlepulse.py | 352 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 352 insertions(+)
 create mode 100644 singlepulse.py

diff --git a/singlepulse.py b/singlepulse.py
new file mode 100644
index 0000000..68cc155
--- /dev/null
+++ b/singlepulse.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: Shawaiz Tabassum
+This script uses code from YOUR.
+"""
+import h5py
+import numpy as np
+import pylab as plt
+import math
+from scipy.signal import detrend
+from your.utils.math import smad_plotter
+import glob
+import os
+import pandas as pd
+from scipy.signal import find_peaks
+import smplotlib #plot style, not needed
+
+# from pygdsm import GlobalSkyModel
+# from astropy.coordinates import SkyCoord
+# from astropy.coordinates import ICRS, Galactic, FK4, FK5  
+# from astropy.coordinates import Angle, Latitude, Longitude
+# import astropy.units as u
+
+pulsar_dir = "/home/st3105/greenburst/J1944+0907/data_2025-02-27_17-41-33_composite/"
+pulsar_period = 5.185201908798642 #period of the pulsar.
+
+flux_threshold = 35 #minimum flux in mJy
+distance_peaks = 6 #Number of samples/bins each pulse needs to be away from another pulse to be considered a detection.
+time_threshold = 0.1 #error interval for matching pulses only at pulsar spin period apart. Set to 1 if don't want to apply this filter
+
+#Set options
+detrend_ft = True
+mad_filter = True
+save_path = "/home/st3105/greenburst/J1944+0907/data_2025-02-27_17-41-33_composite/calib-pngs/take3/"
+#%%
+
+#Need to get radio sky temparature at the location of the pulsar. Get it using pygdsm.
+
+t_sys = 18 #Fig 3 of the GBT document: https://greenbankobservatory.org/wp-content/uploads/2017/07/GBTpg.pdf)
+# freq_mhz = 1400
+# sky_coord = SkyCoord(47.160175 * u.deg, -7.357055 * u.deg, frame="galactic") #For J1944+0907
+# gsm = GlobalSkyModel()
+# t_sky = gsm.get_sky_temperature(sky_coord, freq_mhz)
+t_sky = 4.9805965
+t = t_sys + t_sky
+gain = 2.0 #same doc as above
+n_p = 2.0 #number of polarizations
+bw = 800*10**6 #Hz
+
+def radiometer(T, G, np, bandwidth, W, sigma):
+    """
+    Parameters
+    ----------
+    T : float
+        temperature of sky plus the system temperature for the telescope in K.    
+    G : float
+        Gain of the telescope in K/Jy.
+    np : float
+        number of polarizations.
+    bw : float
+        bandwith of the system in hertz.
+    W : float
+        width of the pulse in seconds.
+    sigma : float
+        the pulse flux in terms of standard deviations. Pulse height/off_pulse_std
+
+    Returns
+    -------
+    flux_mjy : TYPE
+        DESCRIPTION.
+
+    """
+    sqt_term = math.sqrt(np*bandwidth*W)
+    denom = G*sqt_term
+    
+    S = T/denom 
+    flux_mjy = sigma*S * 10**3 #mJy
+    return flux_mjy
+
+def std_to_mjy(pulse):
+    """
+    Parameters
+    ----------
+    pulse : arr
+        The pulse timeseries you want to normalize.
+
+    Returns
+    -------
+    ts_mjy : arr
+        Returns normalized time series such that the maximum is the flux in mjy calculated for the brightest pulse and minimum is for the lowest value oberved trough.
+
+    """
+    peaks, peaks_dict = find_peaks(pulse, width=1, rel_height=0.5)
+    sorted_peaks = sorted(peaks, key=lambda i: pulse[i], reverse=True)
+    
+    flux_max = pulse[sorted_peaks[0]]
+    width_max = np.round((peaks_dict["widths"][np.where(peaks== sorted_peaks[0])]))[0]
+    width_max_s = width_max*0.000256
+
+    flipped_ts = -pulse
+    mins, mins_dict = find_peaks(flipped_ts, width=1)
+    sorted_mins = sorted(mins, key=lambda i: flipped_ts[i], reverse=True)
+    
+    flux_min = pulse[sorted_mins[0]]
+    width_min = np.round((mins_dict["widths"][np.where(mins== sorted_mins[0])]))[0]
+    width_min_s = width_min*0.000256
+    
+    exclude_fraction = width_max/256
+    N = len(pulse)
+    window_size = int(N * exclude_fraction / 2)
+    mask = np.ones(N, dtype=bool)
+    mask[max(0, sorted_peaks[0] - window_size):min(N, sorted_peaks[0] + window_size)] = False
+    off_pulse_std = np.std(pulse[mask])
+    
+    std_max = flux_max /off_pulse_std
+    std_min = flux_min /off_pulse_std
+    
+    min_mjy = radiometer(t, gain, n_p, bw, width_min_s, std_min)
+    max_mjy = radiometer(t, gain, n_p, bw, width_max_s, std_max)
+    
+    ts_mjy = (((pulse - np.min(pulse)) / (np.max(pulse) - np.min(pulse))) * (max_mjy - min_mjy)) + min_mjy
+    return ts_mjy
+
+
+def compute_snr_from_h5(h5_file, detrend_ft=True, mad_filter=True, save=False, show=False, save_path=None):
+    """
+    Parameters
+    ----------
+    h5_file : str
+        Path to the h5 file..
+    detrend_ft : boolean, optional
+        Choose whether to detrend or not. The default is True.
+    mad_filter : boolean, optional
+        Choose whether to apply the MAD RFI filter or not. The default is True.
+
+    Returns
+    -------
+    flux : float
+        Flux in units of standard deviations.
+    width_t : float
+        width of the pulse in units of seconds.
+    """
+    
+    fname = os.path.basename(h5_file)
+    parts = fname.split("_")
+
+    peak_mjd = np.float64(parts[2])
+    time_pulse = float(parts[4])
+    dm = float(parts[6])
+    dm_label = np.round(float(parts[6]), 2)
+
+    label_dict = {
+        "MJD": peak_mjd,
+        "Pulse Time (s)": time_pulse,
+        "DM": dm_label,
+        "Flux (mJy)": None,
+        "Flux P2 (mJy)": None,
+        "Flux P3 (mJy)": None,
+        }
+        
+    with h5py.File(h5_file, "r") as f:
+    
+        if detrend_ft:
+            freq_time = detrend(np.array(f["data_freq_time"])[:, ::-1].T)
+        else:
+            freq_time = np.array(f["data_freq_time"])[:, ::-1].T
+    
+        freq_time[freq_time != freq_time] = 0
+        freq_time -= np.median(freq_time)
+        freq_time /= np.std(freq_time)
+        
+        width, tsamp = (f.attrs["width"], f.attrs["tsamp"])
+        
+        if mad_filter:
+            freq_time = smad_plotter(freq_time, float(mad_filter))
+    
+        ts_y = freq_time.sum(0)
+        
+        tlen = freq_time.shape[1]
+        l = np.linspace(-tlen // 2, tlen // 2, tlen)
+        ts = l * tsamp * (width * 1000 / 2 if width > 1 else 1000)
+        
+        ts_norm = std_to_mjy(ts_y)
+        
+        peaks, peaks_dict = find_peaks(ts_norm, width=(1,10), distance=distance_peaks, height=flux_threshold, rel_height=0.5,)
+    
+        heights = peaks_dict["peak_heights"]
+        widths_all = peaks_dict["widths"]
+        
+        sorted_indices = np.argsort(heights)[::-1]  #descending
+        sorted_peaks = peaks[sorted_indices]
+        sorted_heights = heights[sorted_indices]
+        sorted_widths = np.round(widths_all[sorted_indices], decimals=0)
+        
+        max_idx = np.where(ts_y == max(ts_y[124:133]))
+        ref_peak = int(max_idx[0])
+        
+        if not (124 < sorted_peaks[0] < 133):
+            # Find the index of ref_peak
+            idx_cent = np.where(sorted_peaks == ref_peak)[0]
+            if idx_cent.size > 0:
+                idx_cent = idx_cent[0]
+                cent = sorted_peaks[idx_cent]
+                
+                # Remove ref_peak and place it at the start
+                sorted_peaks = np.delete(sorted_peaks, idx_cent)
+                sorted_peaks = np.insert(sorted_peaks, 0, cent)
+            else:
+                print(f"Warning: ref_peak {ref_peak} not found in sorted_peaks")
+        
+        #check if the peak is close to an integer multiple of the MSP period.
+        reference_peak = sorted_peaks[0]
+        final_peaks = [reference_peak]
+        for pk in sorted_peaks[1:]:
+            delta = (ts[pk] - ts[reference_peak]) / pulsar_period
+            if abs(delta - round(delta)) < time_threshold:
+                final_peaks.append(pk)
+        
+        peaks_mask = np.isin(sorted_peaks, final_peaks)
+        selected_heights = sorted_heights[peaks_mask]
+        selected_widths = sorted_widths[peaks_mask]
+
+        exclude_fractions = np.array(selected_widths)/256
+    
+        # Exclude a window around the pulse for off-pulse estimation
+        N = len(ts_y)
+        window_sizes = N * exclude_fractions/2
+        window_sizes = np.array(window_sizes.astype(int))
+        mask = np.ones(N, dtype=bool)
+        
+        time_peaks = []
+        for i in range(0, (len(final_peaks))):
+            mask[max(0, final_peaks[i] - window_sizes[i]):min(N, final_peaks[i] + window_sizes[i])] = False
+            time_peaks.append(ts[final_peaks[i]])
+        
+        # off_pulse_mean = np.mean(ts_y[mask])
+        off_pulse_std = np.std(ts_y[mask])
+        
+        widths_t = np.array(selected_widths)*tsamp
+        flux_stds = ts_y[final_peaks] / off_pulse_std
+        
+        fluxes_mjy =[]
+        for i in range(0, (len(final_peaks))):
+            wid = widths_t[i]
+            flx = flux_stds[i]
+            flux = radiometer(t, gain, n_p, bw, wid, flx)
+            fluxes_mjy.append(flux)
+            
+        label_dict["Flux (mJy)"] = f"{fluxes_mjy[0]:.2f}"
+        
+        if len(fluxes_mjy) >= 2:
+            label_dict["Flux P2 (mJy)"] = f"{fluxes_mjy[1]:.2f}"
+        if len(fluxes_mjy) >= 3: 
+            label_dict["Flux P3 (mJy)"] = f"{fluxes_mjy[2]:.2f}"
+            
+        
+        plt.figure(figsize=(12, 7))
+        plt.plot(ts, ts_norm)
+        plt.scatter(ts[final_peaks[:]], ts_norm[final_peaks[:]], marker='o', label='Pulse Peak', facecolors='none', edgecolors='black', s=90)
+        
+        # Add label text in top right
+        text_lines_all = [f"{k}: {v}" for k, v in label_dict.items()]
+        text_lines_r = text_lines_all[0:2]
+        label_text_r = "\n".join(text_lines_r)
+        if len(selected_heights) == 1:
+            text_lines_l = text_lines_all[2:4]
+            label_text_l = "\n".join(text_lines_l)
+        elif len(selected_heights) == 2:
+            text_lines_l = text_lines_all[2:5]
+            label_text_l = "\n".join(text_lines_l)
+        else:
+            text_lines_l = text_lines_all[2:6]
+            label_text_l = "\n".join(text_lines_l)
+    
+        plt.gca().text(
+            0.98, 0.98, label_text_r,
+            transform=plt.gca().transAxes,
+            ha='right', va='top',
+            fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1)
+        )
+        plt.gca().text(
+            0.25, 0.98, label_text_l,
+            transform=plt.gca().transAxes,
+            ha='right', va='top',
+            fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1)
+        )
+    
+    
+        plt.xlabel("Time (ms)",  fontsize=22)
+        plt.ylabel("Flux (mJy) ",  fontsize=22)
+        plt.tick_params(axis='both', labelsize=18)
+        plt.grid(False)
+        plt.tight_layout()
+        
+        savepath = save_path +"mjd_" + str(peak_mjd) + "_tcand_" + str(time_pulse) + "_dm_" + str(dm) + ".png"
+    
+        # Save or display
+        if save:
+            plt.savefig(savepath, dpi=300)
+            if show:
+                plt.show()
+            else:
+                plt.close()
+        if show:
+            plt.show()
+        else:
+            plt.close()
+            
+        peak_mjds = []
+        
+        for i in range(0,len(selected_heights)):
+            pulse_mjd = peak_mjd + (time_pulse/86400) + (time_peaks[i] / 8.64e+7)
+            peak_mjds.append(pulse_mjd)
+            
+    return fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm
+    
+
+#%%
+h5_dir = pulsar_dir + "h5/"
+h5_files = sorted(glob.glob(f"{pulsar_dir}*.h5"))
+
+#%%
+results = []
+
+for filepath in h5_files:
+    try:
+        fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm = compute_snr_from_h5(filepath, 
+                                                                                                 detrend_ft=True, 
+                                                                                                 mad_filter=True, 
+                                                                                                 save=True, 
+                                                                                                 show=False, 
+                                                                                                 save_path=save_path)
+  
+        for i in range(0,(len(fluxes_mjy))):     
+            results.append({
+                "mjd": peak_mjds[i],
+                "flux_std": flux_stds[i],
+                "flux_mJy": fluxes_mjy[i],
+                "width_s": widths_t[i],
+                "Pulse time (ms)": time_peaks[i],
+                "tcand": time_pulse,
+                "dm": dm,
+                "filename": os.path.basename(filepath)
+            })                
+    except Exception as e:
+        print(f"Error processing {filepath}: {e}")
+
+df = pd.DataFrame(results)
+#%%
+df['pulse_energy_jyus'] = (df['flux_mJy']/1000) * (df['width_s']*10**6)
+df.to_csv(save_path+"flux.csv", index=False)