greenburstAux/singlepulse.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Shawaiz Tabassum
This script uses code from YOUR.
"""
import h5py
import numpy as np
import pylab as plt
import math
from scipy.signal import detrend
from your.utils.math import smad_plotter
import glob
import os
import pandas as pd
from scipy.signal import find_peaks
import smplotlib #plot style, not needed

# from pygdsm import GlobalSkyModel
# from astropy.coordinates import SkyCoord
# from astropy.coordinates import ICRS, Galactic, FK4, FK5
# from astropy.coordinates import Angle, Latitude, Longitude
# import astropy.units as u

pulsar_dir = "/home/st3105/greenburst/J1944+0907/data_2025-02-27_17-41-33_composite/"
pulsar_period = 5.185201908798642 #period of the pulsar.

flux_threshold = 35 #minimum flux in mJy
distance_peaks = 6 #Number of samples/bins each pulse needs to be away from another pulse to be considered a detection.
time_threshold = 0.1 #error interval for matching pulses only at pulsar spin period apart. Set to 1 if don't want to apply this filter

#Set options
detrend_ft = True
mad_filter = True
save_path = "/home/st3105/greenburst/J1944+0907/data_2025-02-27_17-41-33_composite/calib-pngs/take3/"
#%%

#Need to get radio sky temparature at the location of the pulsar. Get it using pygdsm.

t_sys = 18 #Fig 3 of the GBT document: https://greenbankobservatory.org/wp-content/uploads/2017/07/GBTpg.pdf)
# freq_mhz = 1400
# sky_coord = SkyCoord(47.160175 * u.deg, -7.357055 * u.deg, frame="galactic") #For J1944+0907
# gsm = GlobalSkyModel()
# t_sky = gsm.get_sky_temperature(sky_coord, freq_mhz)
t_sky = 4.9805965
t = t_sys + t_sky
gain = 2.0 #same doc as above
n_p = 2.0 #number of polarizations
bw = 800*10**6 #Hz

def radiometer(T, G, np, bandwidth, W, sigma):
    """
    Parameters
    ----------
    T : float
        temperature of sky plus the system temperature for the telescope in K.
    G : float
        Gain of the telescope in K/Jy.
    np : float
        number of polarizations.
    bw : float
        bandwith of the system in hertz.
    W : float
        width of the pulse in seconds.
    sigma : float
        the pulse flux in terms of standard deviations. Pulse height/off_pulse_std

    Returns
    -------
    flux_mjy : TYPE
        DESCRIPTION.

    """
    sqt_term = math.sqrt(np*bandwidth*W)
    denom = G*sqt_term

    S = T/denom
    flux_mjy = sigma*S * 10**3 #mJy
    return flux_mjy

def std_to_mjy(pulse):
    """
    Parameters
    ----------
    pulse : arr
        The pulse timeseries you want to normalize.

    Returns
    -------
    ts_mjy : arr
        Returns normalized time series such that the maximum is the flux in mjy calculated for the brightest pulse and minimum is for the lowest value oberved trough.

    """
    peaks, peaks_dict = find_peaks(pulse, width=1, rel_height=0.5)
    sorted_peaks = sorted(peaks, key=lambda i: pulse[i], reverse=True)

    flux_max = pulse[sorted_peaks[0]]
    width_max = np.round((peaks_dict["widths"][np.where(peaks== sorted_peaks[0])]))[0]
    width_max_s = width_max*0.000256

    flipped_ts = -pulse
    mins, mins_dict = find_peaks(flipped_ts, width=1)
    sorted_mins = sorted(mins, key=lambda i: flipped_ts[i], reverse=True)

    flux_min = pulse[sorted_mins[0]]
    width_min = np.round((mins_dict["widths"][np.where(mins== sorted_mins[0])]))[0]
    width_min_s = width_min*0.000256

    exclude_fraction = width_max/256
    N = len(pulse)
    window_size = int(N * exclude_fraction / 2)
    mask = np.ones(N, dtype=bool)
    mask[max(0, sorted_peaks[0] - window_size):min(N, sorted_peaks[0] + window_size)] = False
    off_pulse_std = np.std(pulse[mask])

    std_max = flux_max /off_pulse_std
    std_min = flux_min /off_pulse_std

    min_mjy = radiometer(t, gain, n_p, bw, width_min_s, std_min)
    max_mjy = radiometer(t, gain, n_p, bw, width_max_s, std_max)

    ts_mjy = (((pulse - np.min(pulse)) / (np.max(pulse) - np.min(pulse))) * (max_mjy - min_mjy)) + min_mjy
    return ts_mjy


def compute_snr_from_h5(h5_file, detrend_ft=True, mad_filter=True, save=False, show=False, save_path=None):
    """
    Parameters
    ----------
    h5_file : str
        Path to the h5 file..
    detrend_ft : boolean, optional
        Choose whether to detrend or not. The default is True.
    mad_filter : boolean, optional
        Choose whether to apply the MAD RFI filter or not. The default is True.

    Returns
    -------
    flux : float
        Flux in units of standard deviations.
    width_t : float
        width of the pulse in units of seconds.
    """

    fname = os.path.basename(h5_file)
    parts = fname.split("_")

    peak_mjd = np.float64(parts[2])
    time_pulse = float(parts[4])
    dm = float(parts[6])
    dm_label = np.round(float(parts[6]), 2)

    label_dict = {
        "MJD": peak_mjd,
        "Pulse Time (s)": time_pulse,
        "DM": dm_label,
        "Flux (mJy)": None,
        "Flux P2 (mJy)": None,
        "Flux P3 (mJy)": None,
        }

    with h5py.File(h5_file, "r") as f:

        if detrend_ft:
            freq_time = detrend(np.array(f["data_freq_time"])[:, ::-1].T)
        else:
            freq_time = np.array(f["data_freq_time"])[:, ::-1].T

        freq_time[freq_time != freq_time] = 0
        freq_time -= np.median(freq_time)
        freq_time /= np.std(freq_time)

        width, tsamp = (f.attrs["width"], f.attrs["tsamp"])

        if mad_filter:
            freq_time = smad_plotter(freq_time, float(mad_filter))

        ts_y = freq_time.sum(0)

        tlen = freq_time.shape[1]
        l = np.linspace(-tlen // 2, tlen // 2, tlen)
        ts = l * tsamp * (width * 1000 / 2 if width > 1 else 1000)

        ts_norm = std_to_mjy(ts_y)

        peaks, peaks_dict = find_peaks(ts_norm, width=(1,10), distance=distance_peaks, height=flux_threshold, rel_height=0.5,)

        heights = peaks_dict["peak_heights"]
        widths_all = peaks_dict["widths"]

        sorted_indices = np.argsort(heights)[::-1]  #descending
        sorted_peaks = peaks[sorted_indices]
        sorted_heights = heights[sorted_indices]
        sorted_widths = np.round(widths_all[sorted_indices], decimals=0)

        max_idx = np.where(ts_y == max(ts_y[124:133]))
        ref_peak = int(max_idx[0])

        if not (124 < sorted_peaks[0] < 133):
            # Find the index of ref_peak
            idx_cent = np.where(sorted_peaks == ref_peak)[0]
            if idx_cent.size > 0:
                idx_cent = idx_cent[0]
                cent = sorted_peaks[idx_cent]

                # Remove ref_peak and place it at the start
                sorted_peaks = np.delete(sorted_peaks, idx_cent)
                sorted_peaks = np.insert(sorted_peaks, 0, cent)
            else:
                print(f"Warning: ref_peak {ref_peak} not found in sorted_peaks")

        #check if the peak is close to an integer multiple of the MSP period.
        reference_peak = sorted_peaks[0]
        final_peaks = [reference_peak]
        for pk in sorted_peaks[1:]:
            delta = (ts[pk] - ts[reference_peak]) / pulsar_period
            if abs(delta - round(delta)) < time_threshold:
                final_peaks.append(pk)

        peaks_mask = np.isin(sorted_peaks, final_peaks)
        selected_heights = sorted_heights[peaks_mask]
        selected_widths = sorted_widths[peaks_mask]

        exclude_fractions = np.array(selected_widths)/256

        # Exclude a window around the pulse for off-pulse estimation
        N = len(ts_y)
        window_sizes = N * exclude_fractions/2
        window_sizes = np.array(window_sizes.astype(int))
        mask = np.ones(N, dtype=bool)

        time_peaks = []
        for i in range(0, (len(final_peaks))):
            mask[max(0, final_peaks[i] - window_sizes[i]):min(N, final_peaks[i] + window_sizes[i])] = False
            time_peaks.append(ts[final_peaks[i]])

        # off_pulse_mean = np.mean(ts_y[mask])
        off_pulse_std = np.std(ts_y[mask])

        widths_t = np.array(selected_widths)*tsamp
        flux_stds = ts_y[final_peaks] / off_pulse_std

        fluxes_mjy =[]
        for i in range(0, (len(final_peaks))):
            wid = widths_t[i]
            flx = flux_stds[i]
            flux = radiometer(t, gain, n_p, bw, wid, flx)
            fluxes_mjy.append(flux)

        label_dict["Flux (mJy)"] = f"{fluxes_mjy[0]:.2f}"

        if len(fluxes_mjy) >= 2:
            label_dict["Flux P2 (mJy)"] = f"{fluxes_mjy[1]:.2f}"
        if len(fluxes_mjy) >= 3:
            label_dict["Flux P3 (mJy)"] = f"{fluxes_mjy[2]:.2f}"


        plt.figure(figsize=(12, 7))
        plt.plot(ts, ts_norm)
        plt.scatter(ts[final_peaks[:]], ts_norm[final_peaks[:]], marker='o', label='Pulse Peak', facecolors='none', edgecolors='black', s=90)

        # Add label text in top right
        text_lines_all = [f"{k}: {v}" for k, v in label_dict.items()]
        text_lines_r = text_lines_all[0:2]
        label_text_r = "\n".join(text_lines_r)
        if len(selected_heights) == 1:
            text_lines_l = text_lines_all[2:4]
            label_text_l = "\n".join(text_lines_l)
        elif len(selected_heights) == 2:
            text_lines_l = text_lines_all[2:5]
            label_text_l = "\n".join(text_lines_l)
        else:
            text_lines_l = text_lines_all[2:6]
            label_text_l = "\n".join(text_lines_l)

        plt.gca().text(
            0.98, 0.98, label_text_r,
            transform=plt.gca().transAxes,
            ha='right', va='top',
            fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1)
        )
        plt.gca().text(
            0.25, 0.98, label_text_l,
            transform=plt.gca().transAxes,
            ha='right', va='top',
            fontsize=18, bbox=dict(boxstyle="round,pad=0.3", fc="white", alpha=0, ec="gray", lw=1)
        )


        plt.xlabel("Time (ms)",  fontsize=22)
        plt.ylabel("Flux (mJy) ",  fontsize=22)
        plt.tick_params(axis='both', labelsize=18)
        plt.grid(False)
        plt.tight_layout()

        savepath = save_path +"mjd_" + str(peak_mjd) + "_tcand_" + str(time_pulse) + "_dm_" + str(dm) + ".png"

        # Save or display
        if save:
            plt.savefig(savepath, dpi=300)
            if show:
                plt.show()
            else:
                plt.close()
        if show:
            plt.show()
        else:
            plt.close()

        peak_mjds = []

        for i in range(0,len(selected_heights)):
            pulse_mjd = peak_mjd + (time_pulse/86400) + (time_peaks[i] / 8.64e+7)
            peak_mjds.append(pulse_mjd)

    return fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm


#%%
h5_dir = pulsar_dir + "h5/"
h5_files = sorted(glob.glob(f"{pulsar_dir}*.h5"))

#%%
results = []

for filepath in h5_files:
    try:
        fluxes_mjy, flux_stds, widths_t, peak_mjds, time_peaks, time_pulse, dm = compute_snr_from_h5(filepath,
                                                                                                 detrend_ft=True,
                                                                                                 mad_filter=True,
                                                                                                 save=True,
                                                                                                 show=False,
                                                                                                 save_path=save_path)

        for i in range(0,(len(fluxes_mjy))):
            results.append({
                "mjd": peak_mjds[i],
                "flux_std": flux_stds[i],
                "flux_mJy": fluxes_mjy[i],
                "width_s": widths_t[i],
                "Pulse time (ms)": time_peaks[i],
                "tcand": time_pulse,
                "dm": dm,
                "filename": os.path.basename(filepath)
            })
    except Exception as e:
        print(f"Error processing {filepath}: {e}")

df = pd.DataFrame(results)
#%%
df['pulse_energy_jyus'] = (df['flux_mJy']/1000) * (df['width_s']*10**6)
df.to_csv(save_path+"flux.csv", index=False)