This notebook presents the merge of the various pristine catalogues to produce the HELP master catalogue on XMM-LSS.
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
import os
import time
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Column, Table
import numpy as np
from pymoc import MOC
from herschelhelp_internal.masterlist import merge_catalogues, nb_merge_dist_plot, specz_merge
from herschelhelp_internal.utils import coords_to_hpidx, ebv, gen_help_id, inMoc
TMP_DIR = os.environ.get('TMP_DIR', "./data_tmp")
OUT_DIR = os.environ.get('OUT_DIR', "./data")
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))
try:
os.makedirs(OUT_DIR)
except FileExistsError:
pass
#candels = Table.read("{}/CANDELS.fits".format(TMP_DIR)) # 1.1
#cfht_wirds = Table.read("{}/CFHT-WIRDS.fits".format(TMP_DIR)) # 1.3
#cfhtls_wide = Table.read("{}/CFHTLS-WIDE.fits".format(TMP_DIR)) # 1.4a
#cfhtls_deep = Table.read("{}/CFHTLS-DEEP.fits".format(TMP_DIR)) # 1.4b
#We no longer use CFHTLenS as it is the same raw data set as CFHTLS-WIDE
# cfhtlens = Table.read("{}/CFHTLENS.fits".format(TMP_DIR)) # 1.5
#decals = Table.read("{}/DECaLS.fits".format(TMP_DIR)) # 1.6
#servs = Table.read("{}/SERVS.fits".format(TMP_DIR)) # 1.8
#swire = Table.read("{}/SWIRE.fits".format(TMP_DIR)) # 1.7
hsc_wide = Table.read("{}/HSC-WIDE.fits".format(TMP_DIR)) # 1.9a
hsc_deep = Table.read("{}/HSC-DEEP.fits".format(TMP_DIR)) # 1.9b
hsc_udeep = Table.read("{}/HSC-UDEEP.fits".format(TMP_DIR)) # 1.9c
#ps1 = Table.read("{}/PS1.fits".format(TMP_DIR)) # 1.10
#sxds = Table.read("{}/SXDS.fits".format(TMP_DIR)) # 1.11
#sparcs = Table.read("{}/SpARCS.fits".format(TMP_DIR)) # 1.12
#dxs = Table.read("{}/UKIDSS-DXS.fits".format(TMP_DIR)) # 1.13
#uds = Table.read("{}/UKIDSS-UDS.fits".format(TMP_DIR)) # 1.14
#vipers = Table.read("{}/VIPERS.fits".format(TMP_DIR)) # 1.15
#vhs = Table.read("{}/VISTA-VHS.fits".format(TMP_DIR)) # 1.16
#video = Table.read("{}/VISTA-VIDEO.fits".format(TMP_DIR)) # 1.17
#viking = Table.read("{}/VISTA-VIKING.fits".format(TMP_DIR)) # 1.18
We first merge the optical catalogues and then add the infrared ones. We start with PanSTARRS because it coevrs the whole field.
At every step, we look at the distribution of the distances separating the sources from one catalogue to the other (within a maximum radius) to determine the best cross-matching radius.
master_catalogue = hsc_wide
master_catalogue['hsc-wide_ra'].name = 'ra'
master_catalogue['hsc-wide_dec'].name = 'dec'
nb_merge_dist_plot(
SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
SkyCoord(hsc_deep['hsc-deep_ra'], hsc_deep['hsc-deep_dec'])
)
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, hsc_deep, "hsc-deep_ra", "hsc-deep_dec", radius=0.8*u.arcsec)
nb_merge_dist_plot(
SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
SkyCoord(hsc_udeep['hsc-udeep_ra'], hsc_udeep['hsc-udeep_dec'])
)
master_catalogue = merge_catalogues(master_catalogue, hsc_udeep, "hsc-udeep_ra", "hsc-udeep_dec", radius=0.8*u.arcsec)
When we merge the catalogues, astropy masks the non-existent values (e.g. when a row comes only from a catalogue and has no counterparts in the other, the columns from the latest are masked for that row). We indicate to use NaN for masked values for floats columns, False for flag columns and -1 for ID columns.
for col in master_catalogue.colnames:
if "m_" in col or "merr_" in col or "f_" in col or "ferr_" in col or "stellarity" in col:
master_catalogue[col] = master_catalogue[col].astype(float)
master_catalogue[col].fill_value = np.nan
elif "flag" in col:
master_catalogue[col].fill_value = 0
elif "id" in col:
master_catalogue[col].fill_value = -1
master_catalogue = master_catalogue.filled()
#Since this is not the final merged catalogue. We rename column names to make them unique
master_catalogue['ra'].name = 'hsc_ra'
master_catalogue['dec'].name = 'hsc_dec'
master_catalogue['flag_merged'].name = 'hsc_flag_merged'
master_catalogue[:10].show_in_notebook()
master_catalogue.add_column(Column(data=(np.char.array(master_catalogue['hsc-wide_id'].astype(str))
+ np.char.array(master_catalogue['hsc-deep_id'].astype(str) )
+ np.char.array(master_catalogue['hsc-udeep_id'].astype(str))),
name="hsc_intid"))
id_names = []
for col in master_catalogue.colnames:
if '_id' in col:
id_names += [col]
if '_intid' in col:
id_names += [col]
print(id_names)
Here we straightforwardly take the deepest
Survey | Bands observed |
---|---|
HSC-WIDE | grizy |
HSC-DEEP | grizy |
HSC-UDEEP | grizy n921 n816 |
suprime_origin = Table()
suprime_origin.add_column(master_catalogue['hsc_intid'])
suprime_stats = Table()
suprime_stats.add_column(Column(data=['g','r','i','z','y'], name="Band"))
for col in ["HSC-UDEEP", "HSC-DEEP", "HSC-WIDE"]:
suprime_stats.add_column(Column(data=np.full(5, 0), name="{}".format(col)))
suprime_stats.add_column(Column(data=np.full(5, 0), name="use {}".format(col)))
suprime_stats.add_column(Column(data=np.full(5, 0), name="{} ap".format(col)))
suprime_stats.add_column(Column(data=np.full(5, 0), name="use {} ap".format(col)))
#n921 and n816 only in udeep so we can just rename those columns
for col in master_catalogue.colnames:
if ('n921' in col) or ('n816' in col):
master_catalogue[col].name = col.replace('hsc-udeep', 'suprime')
suprime_bands = ['g','r','i','z','y'] # Lowercase naming convention (k is Ks)
for band in suprime_bands:
# Suprime total flux
has_hsc_udeep = ~np.isnan(master_catalogue['f_hsc-udeep_' + band])
has_hsc_deep = ~np.isnan(master_catalogue['f_hsc-deep_' + band])
has_hsc_wide = ~np.isnan(master_catalogue['f_hsc-wide_' + band])
use_hsc_udeep = has_hsc_udeep
use_hsc_deep = has_hsc_deep & ~has_hsc_udeep
use_hsc_wide = has_hsc_wide & ~has_hsc_deep & ~has_hsc_udeep
f_suprime = np.full(len(master_catalogue), np.nan)
f_suprime[use_hsc_udeep] = master_catalogue['f_hsc-udeep_' + band][use_hsc_udeep]
f_suprime[use_hsc_deep] = master_catalogue['f_hsc-deep_' + band][use_hsc_deep]
f_suprime[use_hsc_wide] = master_catalogue['f_hsc-wide_' + band][use_hsc_wide]
ferr_suprime = np.full(len(master_catalogue), np.nan)
ferr_suprime[use_hsc_udeep] = master_catalogue['ferr_hsc-udeep_' + band][use_hsc_udeep]
ferr_suprime[use_hsc_deep] = master_catalogue['ferr_hsc-deep_' + band][use_hsc_deep]
ferr_suprime[use_hsc_wide] = master_catalogue['ferr_hsc-wide_' + band][use_hsc_wide]
m_suprime = np.full(len(master_catalogue), np.nan)
m_suprime[use_hsc_udeep] = master_catalogue['m_hsc-udeep_' + band][use_hsc_udeep]
m_suprime[use_hsc_deep] = master_catalogue['m_hsc-deep_' + band][use_hsc_deep]
m_suprime[use_hsc_wide] = master_catalogue['m_hsc-wide_' + band][use_hsc_wide]
merr_suprime = np.full(len(master_catalogue), np.nan)
merr_suprime[use_hsc_udeep] = master_catalogue['merr_hsc-udeep_' + band][use_hsc_udeep]
merr_suprime[use_hsc_deep] = master_catalogue['merr_hsc-deep_' + band][use_hsc_deep]
merr_suprime[use_hsc_wide] = master_catalogue['merr_hsc-wide_' + band][use_hsc_wide]
flag_suprime = np.full(len(master_catalogue), False, dtype=bool)
flag_suprime[use_hsc_udeep] = master_catalogue['flag_hsc-udeep_' + band][use_hsc_udeep]
flag_suprime[use_hsc_deep] = master_catalogue['flag_hsc-deep_' + band][use_hsc_deep]
flag_suprime[use_hsc_wide] = master_catalogue['flag_hsc-wide_' + band][use_hsc_wide]
master_catalogue.add_column(Column(data=f_suprime, name="f_suprime_" + band))
master_catalogue.add_column(Column(data=ferr_suprime, name="ferr_suprime_" + band))
master_catalogue.add_column(Column(data=m_suprime, name="m_suprime_" + band))
master_catalogue.add_column(Column(data=merr_suprime, name="merr_suprime_" + band))
master_catalogue.add_column(Column(data=flag_suprime, name="flag_suprime_" + band))
old_hsc_udeep_columns = ['f_hsc-udeep_' + band,
'ferr_hsc-udeep_' + band,
'm_hsc-udeep_' + band,
'merr_hsc-udeep_' + band,
'flag_hsc-udeep_' + band]
old_hsc_deep_columns = ['f_hsc-deep_' + band,
'ferr_hsc-deep_' + band,
'm_hsc-deep_' + band,
'merr_hsc-deep_' + band,
'flag_hsc-deep_' + band]
old_hsc_wide_columns = ['f_hsc-wide_' + band,
'ferr_hsc-wide_' + band,
'm_hsc-wide_' + band,
'merr_hsc-wide_' + band,
'flag_hsc-wide_' + band]
old_columns = old_hsc_udeep_columns + old_hsc_deep_columns + old_hsc_wide_columns
master_catalogue.remove_columns(old_columns)
origin = np.full(len(master_catalogue), ' ', dtype='<U5')
origin[use_hsc_udeep] = "HSC-UDEEP"
origin[use_hsc_deep] = "HSC-DEEP"
origin[use_hsc_wide] = "HSC-WIDE"
suprime_origin.add_column(Column(data=origin, name= 'f_suprime_' + band ))
# Suprime aperture flux
has_ap_hsc_udeep = ~np.isnan(master_catalogue['f_ap_hsc-udeep_' + band])
has_ap_hsc_deep = ~np.isnan(master_catalogue['f_ap_hsc-deep_' + band])
has_ap_hsc_wide = ~np.isnan(master_catalogue['f_ap_hsc-wide_' + band])
use_ap_hsc_udeep = has_ap_hsc_udeep
use_ap_hsc_deep = has_ap_hsc_deep & ~has_ap_hsc_udeep
use_ap_hsc_wide = has_ap_hsc_wide & ~has_ap_hsc_deep & ~has_ap_hsc_udeep
f_ap_suprime = np.full(len(master_catalogue), np.nan)
f_ap_suprime[use_ap_hsc_udeep] = master_catalogue['f_ap_hsc-udeep_' + band][use_ap_hsc_udeep]
f_ap_suprime[use_ap_hsc_deep] = master_catalogue['f_ap_hsc-deep_' + band][use_ap_hsc_deep]
f_ap_suprime[use_ap_hsc_wide] = master_catalogue['f_ap_hsc-wide_' + band][use_ap_hsc_wide]
ferr_ap_suprime = np.full(len(master_catalogue), np.nan)
ferr_ap_suprime[use_ap_hsc_udeep] = master_catalogue['ferr_ap_hsc-udeep_' + band][use_ap_hsc_udeep]
ferr_ap_suprime[use_ap_hsc_deep] = master_catalogue['ferr_ap_hsc-deep_' + band][use_ap_hsc_deep]
ferr_ap_suprime[use_ap_hsc_wide] = master_catalogue['ferr_ap_hsc-wide_' + band][use_ap_hsc_wide]
m_ap_suprime = np.full(len(master_catalogue), np.nan)
m_ap_suprime[use_ap_hsc_udeep] = master_catalogue['m_ap_hsc-udeep_' + band][use_ap_hsc_udeep]
m_ap_suprime[use_ap_hsc_deep] = master_catalogue['m_ap_hsc-deep_' + band][use_ap_hsc_deep]
m_ap_suprime[use_ap_hsc_wide] = master_catalogue['m_ap_hsc-wide_' + band][use_ap_hsc_wide]
merr_ap_suprime = np.full(len(master_catalogue), np.nan)
merr_ap_suprime[use_ap_hsc_udeep] = master_catalogue['merr_ap_hsc-udeep_' + band][use_ap_hsc_udeep]
merr_ap_suprime[use_ap_hsc_deep] = master_catalogue['merr_ap_hsc-deep_' + band][use_ap_hsc_deep]
merr_ap_suprime[use_ap_hsc_wide] = master_catalogue['merr_ap_hsc-wide_' + band][use_ap_hsc_wide]
master_catalogue.add_column(Column(data=f_ap_suprime, name="f_ap_suprime_" + band))
master_catalogue.add_column(Column(data=ferr_ap_suprime, name="ferr_ap_suprime_" + band))
master_catalogue.add_column(Column(data=m_ap_suprime, name="m_ap_suprime_" + band))
master_catalogue.add_column(Column(data=merr_ap_suprime, name="merr_ap_suprime_" + band))
old_ap_hsc_udeep_columns = ['f_ap_hsc-udeep_' + band,
'ferr_ap_hsc-udeep_' + band,
'm_ap_hsc-udeep_' + band,
'merr_ap_hsc-udeep_' + band]
old_ap_hsc_deep_columns = ['f_ap_hsc-deep_' + band,
'ferr_ap_hsc-deep_' + band,
'm_ap_hsc-deep_' + band,
'merr_ap_hsc-deep_' + band]
old_ap_hsc_wide_columns = ['f_ap_hsc-wide_' + band,
'ferr_ap_hsc-wide_' + band,
'm_ap_hsc-wide_' + band,
'merr_ap_hsc-wide_' + band]
old_ap_columns = old_ap_hsc_udeep_columns + old_ap_hsc_deep_columns + old_ap_hsc_wide_columns
master_catalogue.remove_columns(old_ap_columns)
origin_ap = np.full(len(master_catalogue), ' ', dtype='<U5')
origin_ap[use_ap_hsc_udeep] = "HSC-UDEEP"
origin_ap[use_ap_hsc_deep] = "HSC-DEEP"
origin_ap[use_ap_hsc_wide] = "HSC-WIDE"
suprime_origin.add_column(Column(data=origin_ap, name= 'f_ap_suprime_' + band ))
suprime_stats['HSC-UDEEP'][suprime_stats['Band'] == band] = np.sum(has_hsc_udeep)
suprime_stats['HSC-DEEP'][suprime_stats['Band'] == band] = np.sum(has_hsc_deep)
suprime_stats['HSC-WIDE'][suprime_stats['Band'] == band] = np.sum(has_hsc_wide)
suprime_stats['use HSC-UDEEP'][suprime_stats['Band'] == band] = np.sum(use_hsc_udeep)
suprime_stats['use HSC-DEEP'][suprime_stats['Band'] == band] = np.sum(use_hsc_deep)
suprime_stats['use HSC-WIDE'][suprime_stats['Band'] == band] = np.sum(use_hsc_wide)
suprime_stats['HSC-UDEEP ap'][suprime_stats['Band'] == band] = np.sum(has_ap_hsc_udeep)
suprime_stats['HSC-DEEP ap'][suprime_stats['Band'] == band] = np.sum(has_ap_hsc_deep)
suprime_stats['HSC-WIDE ap'][suprime_stats['Band'] == band] = np.sum(has_ap_hsc_wide)
suprime_stats['use HSC-UDEEP ap'][suprime_stats['Band'] == band] = np.sum(use_ap_hsc_udeep)
suprime_stats['use HSC-DEEP ap'][suprime_stats['Band'] == band] = np.sum(use_ap_hsc_deep)
suprime_stats['use HSC-WIDE ap'][suprime_stats['Band'] == band] = np.sum(use_ap_hsc_wide)
suprime_stats.show_in_notebook()
suprime_origin.write("{}/xmm-lss_suprime_fluxes_origins{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)
columns = ["help_id", "field", "ra", "dec", "hp_idx"]
bands = [column[5:] for column in master_catalogue.colnames if 'f_ap' in column]
for band in bands:
columns += ["f_ap_{}".format(band), "ferr_ap_{}".format(band),
"m_ap_{}".format(band), "merr_ap_{}".format(band),
"f_{}".format(band), "ferr_{}".format(band),
"m_{}".format(band), "merr_{}".format(band),
"flag_{}".format(band)]
columns += ["stellarity", "stellarity_origin", "flag_cleaned",
"flag_merged", "flag_gaia", "flag_optnir_obs", "flag_optnir_det",
"zspec", "zspec_qual", "zspec_association_flag", "ebv"]
# We check for columns in the master catalogue that we will not save to disk.
print("Missing columns: {}".format(set(master_catalogue.colnames) - set(columns)))
master_catalogue.write("{}/hsc_merged_catalogue_xmm-lss.fits".format(TMP_DIR), overwrite=True)