This notebook presents the merge of the pristine catalogues from CFHT Megacam. This has to be conducted separately on XMM-LSS due to the large amount of memory required on this field.
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
import os
import time
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Column, Table
import numpy as np
from pymoc import MOC
from herschelhelp_internal.masterlist import merge_catalogues, nb_merge_dist_plot, specz_merge
from herschelhelp_internal.utils import coords_to_hpidx, ebv, gen_help_id, inMoc
TMP_DIR = os.environ.get('TMP_DIR', "./data_tmp")
OUT_DIR = os.environ.get('OUT_DIR', "./data")
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))
try:
os.makedirs(OUT_DIR)
except FileExistsError:
pass
#candels = Table.read("{}/CANDELS.fits".format(TMP_DIR)) # 1.1
#cfht_wirds = Table.read("{}/CFHT-WIRDS.fits".format(TMP_DIR)) # 1.3
cfhtls_wide = Table.read("{}/CFHTLS-WIDE.fits".format(TMP_DIR)) # 1.4a
cfhtls_deep = Table.read("{}/CFHTLS-DEEP.fits".format(TMP_DIR)) # 1.4b
#We no longer use CFHTLenS as it is the same raw data set as CFHTLS-WIDE
# cfhtlens = Table.read("{}/CFHTLENS.fits".format(TMP_DIR)) # 1.5
#decals = Table.read("{}/DECaLS.fits".format(TMP_DIR)) # 1.6
#servs = Table.read("{}/SERVS.fits".format(TMP_DIR)) # 1.8
#swire = Table.read("{}/SWIRE.fits".format(TMP_DIR)) # 1.7
#hsc_wide = Table.read("{}/HSC-WIDE.fits".format(TMP_DIR)) # 1.9a
#hsc_deep = Table.read("{}/HSC-DEEP.fits".format(TMP_DIR)) # 1.9b
#hsc_udeep = Table.read("{}/HSC-UDEEP.fits".format(TMP_DIR)) # 1.9c
#ps1 = Table.read("{}/PS1.fits".format(TMP_DIR)) # 1.10
#sxds = Table.read("{}/SXDS.fits".format(TMP_DIR)) # 1.11
sparcs = Table.read("{}/SpARCS.fits".format(TMP_DIR)) # 1.12
#dxs = Table.read("{}/UKIDSS-DXS.fits".format(TMP_DIR)) # 1.13
#uds = Table.read("{}/UKIDSS-UDS.fits".format(TMP_DIR)) # 1.14
#vipers = Table.read("{}/VIPERS.fits".format(TMP_DIR)) # 1.15
#vhs = Table.read("{}/VISTA-VHS.fits".format(TMP_DIR)) # 1.16
#video = Table.read("{}/VISTA-VIDEO.fits".format(TMP_DIR)) # 1.17
#viking = Table.read("{}/VISTA-VIKING.fits".format(TMP_DIR)) # 1.18
We first merge the optical catalogues and then add the infrared ones. We start with PanSTARRS because it coevrs the whole field.
At every step, we look at the distribution of the distances separating the sources from one catalogue to the other (within a maximum radius) to determine the best cross-matching radius.
master_catalogue = cfhtls_wide
master_catalogue['cfhtls-wide_ra'].name = 'ra'
master_catalogue['cfhtls-wide_dec'].name = 'dec'
del cfhtls_wide
nb_merge_dist_plot(
SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
SkyCoord(cfhtls_deep['cfhtls-deep_ra'], cfhtls_deep['cfhtls-deep_dec'])
)
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue,
cfhtls_deep,
"cfhtls-deep_ra",
"cfhtls-deep_dec",
radius=0.8*u.arcsec)
nb_merge_dist_plot(
SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
SkyCoord(sparcs['sparcs_ra'], sparcs['sparcs_dec'])
)
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, sparcs, "sparcs_ra", "sparcs_dec", radius=0.8*u.arcsec)
When we merge the catalogues, astropy masks the non-existent values (e.g. when a row comes only from a catalogue and has no counterparts in the other, the columns from the latest are masked for that row). We indicate to use NaN for masked values for floats columns, False for flag columns and -1 for ID columns.
for col in master_catalogue.colnames:
if "m_" in col or "merr_" in col or "f_" in col or "ferr_" in col or "stellarity" in col:
master_catalogue[col] = master_catalogue[col].astype(float)
master_catalogue[col].fill_value = np.nan
elif "flag" in col:
master_catalogue[col].fill_value = 0
elif "id" in col:
master_catalogue[col].fill_value = -1
master_catalogue = master_catalogue.filled()
#Since this is not the final merged catalogue. We rename column names to make them unique
master_catalogue['ra'].name = 'megacam_ra'
master_catalogue['dec'].name = 'megacam_dec'
master_catalogue['flag_merged'].name = 'megacam_flag_merged'
master_catalogue[:10].show_in_notebook()
master_catalogue.add_column(Column(data=(np.char.array(master_catalogue['cfhtls-wide_id'].astype(str))
+ np.char.array(master_catalogue['cfhtls-deep_id'].astype(str) )
+ np.char.array(master_catalogue['sparcs_intid'].astype(str))),
name="megacam_intid"))
id_names = []
for col in master_catalogue.colnames:
if '_id' in col:
id_names += [col]
if '_intid' in col:
id_names += [col]
print(id_names)
According to Mattia CFHTLenS is built on the same data as CFHTLS-WIDE and should not be included. I have therefore excluded it from the merge above.
CFHTLS-DEEP is prefferred to CFHTLS-WIDE which is prefferred to SpARCS
CFHTLS-WIDE | u, g, r, i, z |
CFHTLS-DEEP | u, g, r, i, z, y |
SpARCS | u, g, r, z, y |
megacam_origin = Table()
megacam_origin.add_column(master_catalogue['megacam_intid'])
megacam_stats = Table()
megacam_stats.add_column(Column(data=['u','g','r','i','z','y'], name="Band"))
for col in ["CFHTLS-DEEP", "CFHTLS-WIDE", "SpARCS"]:
megacam_stats.add_column(Column(data=np.full(6, 0), name="{}".format(col)))
megacam_stats.add_column(Column(data=np.full(6, 0), name="use {}".format(col)))
megacam_stats.add_column(Column(data=np.full(6, 0), name="{} ap".format(col)))
megacam_stats.add_column(Column(data=np.full(6, 0), name="use {} ap".format(col)))
megacam_bands = ['u','g','r','i','z','y'] # Lowercase naming convention (k is Ks)
for band in megacam_bands:
# Megacam total flux
has_cfhtls_deep = ~np.isnan(master_catalogue['f_cfhtls-deep_' + band])
if band == 'y':
has_cfhtls_wide = np.full(len(master_catalogue), False, dtype=bool)
else:
has_cfhtls_wide = ~np.isnan(master_catalogue['f_cfhtls-wide_' + band])
if band == 'i':
has_sparcs = np.full(len(master_catalogue), False, dtype=bool)
else:
has_sparcs = ~np.isnan(master_catalogue['f_sparcs_' + band])
use_cfhtls_deep = has_cfhtls_deep
use_cfhtls_wide = has_cfhtls_wide & ~has_cfhtls_deep
use_sparcs = has_sparcs & ~has_cfhtls_wide & ~has_cfhtls_deep
f_megacam = np.full(len(master_catalogue), np.nan)
f_megacam[use_cfhtls_deep] = master_catalogue['f_cfhtls-deep_' + band][use_cfhtls_deep]
if not (band == 'y'):
f_megacam[use_cfhtls_wide] = master_catalogue['f_cfhtls-wide_' + band][use_cfhtls_wide]
if not (band == 'i'):
f_megacam[use_sparcs] = master_catalogue['f_sparcs_' + band][use_sparcs]
ferr_megacam = np.full(len(master_catalogue), np.nan)
ferr_megacam[use_cfhtls_deep] = master_catalogue['ferr_cfhtls-deep_' + band][use_cfhtls_deep]
if not (band == 'y'):
ferr_megacam[use_cfhtls_wide] = master_catalogue['ferr_cfhtls-wide_' + band][use_cfhtls_wide]
if not (band == 'i'):
ferr_megacam[use_sparcs] = master_catalogue['ferr_sparcs_' + band][use_sparcs]
m_megacam = np.full(len(master_catalogue), np.nan)
m_megacam[use_cfhtls_deep] = master_catalogue['m_cfhtls-deep_' + band][use_cfhtls_deep]
if not (band == 'y'):
m_megacam[use_cfhtls_wide] = master_catalogue['m_cfhtls-wide_' + band][use_cfhtls_wide]
if not (band == 'i'):
m_megacam[use_sparcs] = master_catalogue['m_sparcs_' + band][use_sparcs]
merr_megacam = np.full(len(master_catalogue), np.nan)
merr_megacam[use_cfhtls_deep] = master_catalogue['merr_cfhtls-deep_' + band][use_cfhtls_deep]
if not (band == 'y'):
merr_megacam[use_cfhtls_wide] = master_catalogue['merr_cfhtls-wide_' + band][use_cfhtls_wide]
if not (band == 'i'):
merr_megacam[use_sparcs] = master_catalogue['merr_sparcs_' + band][use_sparcs]
flag_megacam = np.full(len(master_catalogue), False, dtype=bool)
flag_megacam[use_cfhtls_deep] = master_catalogue['flag_cfhtls-deep_' + band][use_cfhtls_deep]
if not (band == 'y'):
flag_megacam[use_cfhtls_wide] = master_catalogue['flag_cfhtls-wide_' + band][use_cfhtls_wide]
if not (band == 'i'):
flag_megacam[use_sparcs] = master_catalogue['flag_sparcs_' + band][use_sparcs]
master_catalogue.add_column(Column(data=f_megacam, name="f_megacam_" + band))
master_catalogue.add_column(Column(data=ferr_megacam, name="ferr_megacam_" + band))
master_catalogue.add_column(Column(data=m_megacam, name="m_megacam_" + band))
master_catalogue.add_column(Column(data=merr_megacam, name="merr_megacam_" + band))
master_catalogue.add_column(Column(data=flag_megacam, name="flag_megacam_" + band))
old_cfhtls_deep_columns = ['f_cfhtls-deep_' + band,
'ferr_cfhtls-deep_' + band,
'm_cfhtls-deep_' + band,
'merr_cfhtls-deep_' + band,
'flag_cfhtls-deep_' + band]
old_cfhtls_wide_columns = ['f_cfhtls-wide_' + band,
'ferr_cfhtls-wide_' + band,
'm_cfhtls-wide_' + band,
'merr_cfhtls-wide_' + band,
'flag_cfhtls-wide_' + band]
old_sparcs_columns = ['f_sparcs_' + band,
'ferr_sparcs_' + band,
'm_sparcs_' + band,
'merr_sparcs_' + band,
'flag_sparcs_' + band]
if (band == 'i'):
old_columns = old_cfhtls_deep_columns + old_cfhtls_wide_columns
elif (band == 'y'):
old_columns = old_cfhtls_deep_columns + old_sparcs_columns
else:
old_columns = old_cfhtls_deep_columns + old_cfhtls_wide_columns + old_sparcs_columns
master_catalogue.remove_columns(old_columns)
origin = np.full(len(master_catalogue), ' ', dtype='<U5')
origin[use_cfhtls_deep] = "CFHTLS-DEEP"
origin[use_cfhtls_wide] = "CFHTLS-WIDE"
origin[use_sparcs] = "SpARCS"
megacam_origin.add_column(Column(data=origin, name= 'f_megacam_' + band ))
# Megacam aperture flux
has_ap_cfhtls_deep = ~np.isnan(master_catalogue['f_ap_cfhtls-deep_' + band])
if band == 'y':
has_ap_cfhtls_wide = np.full(len(master_catalogue), False, dtype=bool)
else:
has_ap_cfhtls_wide = ~np.isnan(master_catalogue['f_ap_cfhtls-wide_' + band])
if band == 'i':
has_ap_sparcs = np.full(len(master_catalogue), False, dtype=bool)
else:
has_ap_sparcs = ~np.isnan(master_catalogue['f_ap_sparcs_' + band])
use_ap_cfhtls_deep = has_ap_cfhtls_deep
use_ap_cfhtls_wide = has_ap_cfhtls_wide & ~has_ap_cfhtls_deep
use_ap_sparcs = has_ap_sparcs & ~has_ap_cfhtls_wide & ~has_ap_cfhtls_deep
f_ap_megacam = np.full(len(master_catalogue), np.nan)
f_ap_megacam[use_ap_cfhtls_deep] = master_catalogue['f_ap_cfhtls-deep_' + band][use_ap_cfhtls_deep]
if not (band == 'y'):
f_ap_megacam[use_ap_cfhtls_wide] = master_catalogue['f_ap_cfhtls-wide_' + band][use_ap_cfhtls_wide]
if not (band == 'i'):
f_ap_megacam[use_ap_sparcs] = master_catalogue['f_ap_sparcs_' + band][use_ap_sparcs]
ferr_ap_megacam = np.full(len(master_catalogue), np.nan)
ferr_ap_megacam[use_ap_cfhtls_deep] = master_catalogue['ferr_ap_cfhtls-deep_' + band][use_ap_cfhtls_deep]
if not (band == 'y'):
ferr_ap_megacam[use_ap_cfhtls_wide] = master_catalogue['ferr_ap_cfhtls-wide_' + band][use_ap_cfhtls_wide]
if not (band == 'i'):
ferr_ap_megacam[use_ap_sparcs] = master_catalogue['ferr_ap_sparcs_' + band][use_ap_sparcs]
m_ap_megacam = np.full(len(master_catalogue), np.nan)
m_ap_megacam[use_ap_cfhtls_deep] = master_catalogue['m_ap_cfhtls-deep_' + band][use_ap_cfhtls_deep]
if not (band == 'y'):
m_ap_megacam[use_ap_cfhtls_wide] = master_catalogue['m_ap_cfhtls-wide_' + band][use_ap_cfhtls_wide]
if not (band == 'i'):
m_ap_megacam[use_ap_sparcs] = master_catalogue['m_ap_sparcs_' + band][use_ap_sparcs]
merr_ap_megacam = np.full(len(master_catalogue), np.nan)
merr_ap_megacam[use_ap_cfhtls_deep] = master_catalogue['merr_ap_cfhtls-deep_' + band][use_ap_cfhtls_deep]
if not (band == 'y'):
merr_ap_megacam[use_ap_cfhtls_wide] = master_catalogue['merr_ap_cfhtls-wide_' + band][use_ap_cfhtls_wide]
if not (band == 'i'):
merr_ap_megacam[use_ap_sparcs] = master_catalogue['merr_ap_sparcs_' + band][use_ap_sparcs]
master_catalogue.add_column(Column(data=f_ap_megacam, name="f_ap_megacam_" + band))
master_catalogue.add_column(Column(data=ferr_ap_megacam, name="ferr_ap_megacam_" + band))
master_catalogue.add_column(Column(data=m_ap_megacam, name="m_ap_megacam_" + band))
master_catalogue.add_column(Column(data=merr_ap_megacam, name="merr_ap_megacam_" + band))
old_ap_cfhtls_deep_columns = ['f_ap_cfhtls-deep_' + band,
'ferr_ap_cfhtls-deep_' + band,
'm_ap_cfhtls-deep_' + band,
'merr_ap_cfhtls-deep_' + band]
old_ap_cfhtls_wide_columns = ['f_ap_cfhtls-wide_' + band,
'ferr_ap_cfhtls-wide_' + band,
'm_ap_cfhtls-wide_' + band,
'merr_ap_cfhtls-wide_' + band]
old_ap_sparcs_columns = ['f_ap_sparcs_' + band,
'ferr_ap_sparcs_' + band,
'm_ap_sparcs_' + band,
'merr_ap_sparcs_' + band]
if (band == 'i'):
old_ap_columns = old_ap_cfhtls_deep_columns + old_ap_cfhtls_wide_columns
elif (band == 'y'):
old_ap_columns = old_ap_cfhtls_deep_columns + old_ap_sparcs_columns
else:
old_ap_columns = old_ap_cfhtls_deep_columns + old_ap_cfhtls_wide_columns + old_ap_sparcs_columns
master_catalogue.remove_columns(old_ap_columns)
origin_ap = np.full(len(master_catalogue), ' ', dtype='<U5')
origin_ap[use_ap_cfhtls_deep] = "CFHTLS-DEEP"
origin_ap[use_ap_cfhtls_wide] = "CFHTLS-WIDE"
origin_ap[use_ap_sparcs] = "SpARCS"
megacam_origin.add_column(Column(data=origin_ap, name= 'f_ap_megacam_' + band ))
megacam_stats['CFHTLS-DEEP'][megacam_stats['Band'] == band] = np.sum(has_cfhtls_deep)
megacam_stats['CFHTLS-WIDE'][megacam_stats['Band'] == band] = np.sum(has_cfhtls_wide)
megacam_stats['SpARCS'][megacam_stats['Band'] == band] = np.sum(has_sparcs)
megacam_stats['use CFHTLS-DEEP'][megacam_stats['Band'] == band] = np.sum(use_cfhtls_deep)
megacam_stats['use CFHTLS-WIDE'][megacam_stats['Band'] == band] = np.sum(use_cfhtls_wide)
megacam_stats['use SpARCS'][megacam_stats['Band'] == band] = np.sum(use_sparcs)
megacam_stats['CFHTLS-DEEP ap'][megacam_stats['Band'] == band] = np.sum(has_ap_cfhtls_deep)
megacam_stats['CFHTLS-WIDE ap'][megacam_stats['Band'] == band] = np.sum(has_ap_cfhtls_wide)
megacam_stats['SpARCS ap'][megacam_stats['Band'] == band] = np.sum(has_ap_sparcs)
megacam_stats['use CFHTLS-DEEP ap'][megacam_stats['Band'] == band] = np.sum(use_ap_cfhtls_deep)
megacam_stats['use CFHTLS-WIDE ap'][megacam_stats['Band'] == band] = np.sum(use_ap_cfhtls_wide)
megacam_stats['use SpARCS ap'][megacam_stats['Band'] == band] = np.sum(use_ap_sparcs)
megacam_stats.show_in_notebook()
megacam_origin.write("{}/xmm-lss_megacam_fluxes_origins{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)
master_catalogue.write("{}/megacam_merged_catalogue_xmm-lss.fits".format(TMP_DIR), overwrite=True)