Source code for CorrHOD.weights

import numpy as np
import pandas as pd
import healpy as hp
from scipy.interpolate import InterpolatedUnivariateSpline

# Ignore the warning that appears when we are creating a new column in a dataframe
import warnings
try : 
    warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning) # Ignore the warning that appears when we are creating a new column in a dataframe
except AttributeError:
    warnings.filterwarnings('ignore', category=pd.core.common.SettingWithCopyWarning) # Old version pf pandas


[docs]def sky_fraction(ra, dec, nside=256):
    """
    Computes the fraction of the sky covered by the survey.
    
    Parameters
    ----------
    ra : array_like
        Right ascension of the galaxies in degrees.
    
    dec : array_like
        Declination of the galaxies in degrees.
        
    nside : int, optional
        The nside parameter of the HEALPix map. Defaults to `256`.
        
    Returns
    -------
    fsky : float
        The fraction of the sky covered by the survey.
    """
    
    npix = hp.nside2npix(nside)
    phi = np.radians(ra)
    theta = np.radians(90.0 - dec)
    
    pixel_indices = hp.ang2pix(nside, theta, phi)
    pixel_unique, counts = np.unique(pixel_indices, return_counts=True)
    fsky = len(pixel_unique)/npix   # fsky
    
    return fsky

[docs]def comoving_volume(cosmo, 
                    z_min, 
                    z_max, 
                    area: float = None, 
                    fsky: float = None):
    """ 
    Computes the comoving volume associated to the redshift bin [`z_min`, `z_max`].

    Parameters
    ----------
        cosmo
            Cosmology object that can be used to compute the comoving distance.
            It must have a method `comoving_radial_distance(z)` that returns the comoving distance at redshift `z`.
        
        z_min: float or array_like
            The minimum redshift of the redshift bin. (Can be an array, in which case the output is an array)
            Must be of the same format as `z_max` and same length if it is an array.
        
        z_max: float or array_like
            The maximum redshift of the redshift bin. (Can be an array, in which case the output is an array)
            Must be of the same format as `z_min` and same length if it is an array.
        
        area: float, optional
            The area of the survey in square degrees. Defaults to `None`.
        
        fsky: float, optional
            The fraction of the sky covered by the survey. Can be provided instead of area. Defaults to `None`.
        
    Returns
    -------
        comov_vol: float or array_like
            Comoving volume associated to the redshift bin [`z_min`, `z_max`].
            The output has the same format as `z_min` and `z_max`.
    """
    
    if fsky is None and area is None:
        raise ValueError("Either `fsky` or `area` must be specified.")
    elif fsky is not None and area is not None:
        warnings.warn("Both `fsky` and `area` are specified. `fsky` will be used.")
        area = fsky * 4*np.pi * (180.0/np.pi)**2
    elif area is None:
        area = fsky * 4*np.pi * (180.0/np.pi)**2

    # Compute the comoving distance associated to the redshift bin
    comov_dist_min = cosmo.comoving_radial_distance(z_min)
    comov_dist_max = cosmo.comoving_radial_distance(z_max)
    
    # Convert the area from square degrees to square radians
    area = area * (np.pi/180)**2

    # Compute the comoving volume associated to the redshift bin
    comov_vol = (area/3) * (comov_dist_max**3 - comov_dist_min**3)

    return comov_vol


[docs]def ScottsBinEdges(data) -> np.ndarray :
    """
    Computes the bin edges for a histogram using Scott's rule.
    Scott's rule is a rule of thumb for choosing the bin width of a histogram.
    It is based on the standard deviation of the data and is a function of the
    sample size. It is a good compromise when no other information is known
    about the data.
    
    Parameters
    ----------
    data : array_like
        The data we want to bin.
        
    Returns
    -------
    edges : ndarray
        The edges of the bins. Length `nbins + 1`.
    
    Notes
    -----
    Scott's rule defines the bin width as `dx = 3.5 * sigma / n**(1/3)`, where
    sigma is the standard deviation of the data and n is the sample size.
    (The factor of `3.5` comes from a `24*np.sqrt(np.pi)` factor at the power of 1/3).
    """
    
    # Convert the data to a numpy array
    data = np.asarray(data)
    
    # Extract the information needed from the data
    n = len(data) # Number of data points
    sigma = np.std(data) # Standard deviation of the data
    xmin = np.min(data) # Minimum of the data
    xmax = np.max(data) # Maximum of the data
    
    # Compute the bin width 
    factor = (24 * np.sqrt(np.pi))**(1/3) # A factor that appears in Scott's rule, approximated by 3.5
    dx = factor * sigma / n**(1/3) # The bin width according to Scott's rule
    
    # Compute the bin edges
    nbins = int(np.ceil((xmax - xmin) / dx)) # The number of bins
    nbins = max(nbins, 1) # Make sure that there is at least one bin
    edges = xmin + dx * np.arange(nbins + 1) # The edges of the bins
    
    return edges


[docs]def n_z(z, 
        cosmo, 
        edges: list = None, 
        area: float = None, 
        fsky:float=None) -> InterpolatedUnivariateSpline:
    """ 
    Computes the number density of galaxies in the data in the given redshift bin.

    Parameters
    ----------
        z: array_like
            The redshift of the galaxies in the data. 
            
        cosmo
            Cosmology object that can be used to compute the comoving distance.
            It must have a method `comoving_radial_distance(z)` that returns the comoving distance at redshift `z`.
            
        edges: list, optional
            The edges of the bins used to compute the number density. 
            If set to `None`, the edges are computed using Scott's rule. Defaults to `None`.
        
        area: float, optional
            The area of the survey in square degrees. Defaults to `None`.
        
        fsky: float, optional
            The fraction of the sky covered by the survey. Can be provided instead of area. Defaults to `None`.
        
    Returns
    -------
        n_func : InterpolatedUnivariateSpline
            The number density as a function of redshift. 
            It can be called as `n_func(z)` to get the number density at redshift `z`.
            
    Notes
    -----
    The number density is computed as `n(z) = N(z) / V(z)`, 
    where `N(z)` is the number of galaxies in the redshift bin [`z_min`, `z_max`] 
    and `V(z)` is the comoving volume associated to the redshift bin [`z_min`, `z_max`].
    """
    
    # Convert the z to a numpy array
    z = np.asarray(z)
    
    # Compute the number of galaxies in the redshift bin
    if edges is None:
        edges = ScottsBinEdges(z)
    bin_centers = 0.5*(edges[:-1] + edges[1:])
    
    # Compute the comoving volume associated to the redshift bin
    z_min = edges[:-1]
    z_max = edges[1:]
    V = comoving_volume(cosmo, z_min, z_max, area, fsky) # Comoving volume associated to the redshift bin
    
    # Compute the number of galaxies in the bins 
    N, _ = np.histogram(z, bins=edges) # Number of galaxies in the redshift bin
    nbar = N / V # Number density in the redshift bin
    
    # Interpolate the number density as a function of redshift
    n_func = InterpolatedUnivariateSpline(bin_centers, nbar, ext='zeros') # (ext='zeros' means that the number density is set to zero outside of the redshift range of the data)
    return n_func


[docs]def w_fkp(z_data, 
          z_random, 
          cosmo, 
          edges: list = None, 
          area: float = None, 
          fsky: float = None, 
          P0: float = 7000):
    """ 
    Computes the FKP weights for the data, and returns a column containing the FKP weights. (If cuts need to be applied, they should be applied before calling this function.)

    Parameters
    ----------
        z_data: array_like
            The redshift of the galaxies in the data.
        
        z_random: array_like
            The redshift of the galaxies in the randoms.
            
        cosmo
            Cosmology object that can be used to compute the comoving distance.
            It must have a method `comoving_radial_distance(z)` that returns the comoving distance at redshift `z`.
            
        edges: list, optional
            The edges of the bins used to compute the number density. 
            If set to `None`, the edges are computed using Scott's rule. Defaults to `None`.
        
        area: float, optional
            The area of the survey in square degrees. Defaults to `None`.
        
        fsky: float, optional
            The fraction of the sky covered by the survey. Can be provided instead of area. Defaults to `None`.
        
        P0: float, optional
            The power spectrum normalization (TODO : Check this definition). Defaults to `7000` for the BGS.
        
    Returns
    -------
        weight_data: array_like
            The FKP weights for the data for the respective galaxies in z_data.
            
        weight_random: array_like
            The FKP weights for the data for the respective galaxies in z_random.
    """

    # Convert the z to a numpy array
    z_data = np.asarray(z_data)
    z_random = np.asarray(z_random)
    
    # Compute the number density
    n_func = n_z(z_random, cosmo, edges=edges, area=area, fsky=fsky) # Number density as a function of redshift, computed from the randoms (the data should follow the same distribution as the randoms)
    
    # Re-normalize n(z) to the total size of the data catalog
    alpha = 1.0 * len(z_data) / len(z_random)
    
    n_data = n_func(z_data) * alpha # Number density at the redshift of the data
    n_random = n_func(z_random) * alpha # Number density at the redshift of the randoms

    # Compute the FKP weights
    weight_data = 1.0 / (1 + n_data*P0)
    weight_random = 1.0 / (1 + n_random*P0)
    
    return weight_data, weight_random



from pandas import qcut
[docs]def get_quantiles_weight(density, 
                         randoms_weights,
                         nquantiles=10):
    """
    Gets the weights of the quantiles of the density.

    Parameters
    ----------
    density : array_like
        The density of the data.
        
    randoms_weights : array_like
        The weights of the randoms.
        
    nquantiles : int, optional
        The number of quantiles to use. Defaults to `10`.
    """
    quantiles_idx = qcut(density, nquantiles, labels=False)
    quantiles_weights = []
    for i in range(nquantiles):
        quantiles_weights.append(randoms_weights[quantiles_idx == i])
    
    return quantiles_weights