Both SHELA and SpIES provide IRAC fluxes which have marginally overlapping coverage. We chose which to use here since in order to run in low memory mode we must have one catalogue per band before merging
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
import os
import time
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Column, Table
import numpy as np
from pymoc import MOC
from herschelhelp_internal.masterlist import merge_catalogues, nb_merge_dist_plot
from herschelhelp_internal.utils import coords_to_hpidx, ebv, gen_help_id, inMoc
TMP_DIR = os.environ.get('TMP_DIR', "./data_tmp")
OUT_DIR = os.environ.get('OUT_DIR', "./data")
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))
try:
os.makedirs(OUT_DIR)
except FileExistsError:
pass
shela = Table.read("{}/SHELA.fits".format(TMP_DIR))
spies= Table.read("{}/SpIES.fits".format(TMP_DIR))
We first merge the optical catalogues and then add the infrared ones: HSC, VHS, VICS82, UKIDSS-LAS, PanSTARRS, SHELA, SpIES.
At every step, we look at the distribution of the distances separating the sources from one catalogue to the other (within a maximum radius) to determine the best cross-matching radius.
master_catalogue = shela
master_catalogue['shela_ra'].name = 'ra'
master_catalogue['shela_dec'].name = 'dec'
nb_merge_dist_plot(
SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
SkyCoord(spies['spies_ra'], spies['spies_dec'])
)
# Given the graph above, we use 1 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, spies, "spies_ra", "spies_dec", radius=1.5*u.arcsec)
del spies
When we merge the catalogues, astropy masks the non-existent values (e.g. when a row comes only from a catalogue and has no counterparts in the other, the columns from the latest are masked for that row). We indicate to use NaN for masked values for floats columns, False for flag columns and -1 for ID columns.
for col in master_catalogue.colnames:
if "m_" in col or "merr_" in col or "f_" in col or "ferr_" in col or "stellarity" in col:
master_catalogue[col].fill_value = np.nan
elif "flag" in col:
master_catalogue[col].fill_value = 0
elif "id" in col:
master_catalogue[col].fill_value = -1
master_catalogue = master_catalogue.filled()
master_catalogue[:10].show_in_notebook()
Each pristine catalogue contains a flag indicating if the source was associated to a another nearby source that was removed during the cleaning process. We merge these flags in a single one.
flag_cleaned_columns = [column for column in master_catalogue.colnames
if 'flag_cleaned' in column]
flag_column = np.zeros(len(master_catalogue), dtype=bool)
for column in flag_cleaned_columns:
flag_column |= master_catalogue[column]
master_catalogue.add_column(Column(data=flag_column, name="irac_flag_cleaned"))
master_catalogue.remove_columns(flag_cleaned_columns)
Each pristine catalogue contains a flag indicating the probability of a source being a Gaia object (0: not a Gaia object, 1: possibly, 2: probably, 3: definitely). We merge these flags taking the highest value.
flag_gaia_columns = [column for column in master_catalogue.colnames
if 'flag_gaia' in column]
master_catalogue.add_column(Column(
data=np.max([master_catalogue[column] for column in flag_gaia_columns], axis=0),
name="irac_flag_gaia"
))
master_catalogue.remove_columns(flag_gaia_columns)
Each prisitine catalogue may contain one or several stellarity columns indicating the probability (0 to 1) of each source being a star. We merge these columns taking the highest value.
stellarity_columns = [column for column in master_catalogue.colnames
if 'stellarity' in column]
master_catalogue.add_column(Column(
data=np.nanmax([master_catalogue[column] for column in stellarity_columns], axis=0),
name="irac_stellarity"
))
master_catalogue.remove_columns(stellarity_columns)
We are producing a table associating to each HELP identifier, the identifiers of the sources in the pristine catalogue. This can be used to easily get additional information from them.
master_catalogue.add_column(Column(data=(np.char.array(master_catalogue['shela_intid'].astype(str))
+ np.char.array(master_catalogue['spies_intid'].astype(str) )),
name="irac_intid"))
id_names = []
for col in master_catalogue.colnames:
if '_id' in col:
id_names += [col]
if '_intid' in col:
id_names += [col]
print(id_names)
#master_catalogue[id_names].write(
# "{}/master_list_irac_cross_ident_herschel-stripe-82{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)
#Lets leave the shela and spies ids in so that the final cross ident table is complete
#master_catalogue.remove_columns(['shela_intid', 'spies_intid'])
Both SHELA and SpIES provide IRAC1 and IRAC2 fluxes. SpIES seems to go deeper and neither apear to suffer from the bright drop off that affects both SERVS and SWIRE.
seip = Table.read("../../dmu0/dmu0_SEIP/data/SEIP_Herschel-Stripe-82.fits")
seip_coords = SkyCoord(seip['ra'], seip['dec'])
idx, d2d, _ = seip_coords.match_to_catalog_sky(SkyCoord(master_catalogue['ra'], master_catalogue['dec']))
mask = d2d <= 2 * u.arcsec
# servs -> shela and swire -> spies
fig, ax = plt.subplots()
ax.scatter(seip['i1_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_shela_irac1'], label="SHELA", s=2.)
ax.scatter(seip['i1_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_spies_irac1'], label="SpIES", s=2.)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("SEIP flux [μJy]")
ax.set_ylabel("SHELA/SpIES flux [μJy]")
ax.set_title("IRAC 1")
ax.legend()
ax.axvline(2000, color="black", linestyle="--", linewidth=1.)
ax.plot(seip['i1_f_ap1'][mask], seip['i1_f_ap1'][mask], linewidth=.1, color="black", alpha=.5);
fig, ax = plt.subplots()
ax.scatter(seip['i2_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_shela_irac2'], label="SHELA", s=2.)
ax.scatter(seip['i2_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_spies_irac2'], label="SpIES", s=2.)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("SEIP flux [μJy]")
ax.set_ylabel("SHELA/SpIES flux [μJy]")
ax.set_title("IRAC 2")
ax.legend()
ax.axvline(2000, color="black", linestyle="--", linewidth=1.)
ax.plot(seip['i1_f_ap2'][mask], seip['i1_f_ap2'][mask], linewidth=.1, color="black", alpha=.5);
When both SHELA and SpIES fluxes are provided, we use the SpIES flux.
We create a table indicating for each source the origin on the IRAC1 and IRAC2 fluxes that will be saved separately.
irac_origin = Table()
irac_origin.add_column(master_catalogue['irac_intid'])
# IRAC1 aperture flux and magnitudes
has_shela = ~np.isnan(master_catalogue['f_ap_shela_irac1'])
has_spies = ~np.isnan(master_catalogue['f_ap_spies_irac1'])
has_both = has_shela & has_spies
print("{} sources with SHELA flux".format(np.sum(has_shela)))
print("{} sources with SpIES flux".format(np.sum(has_spies)))
print("{} sources with SHELA and SpIES flux".format(np.sum(has_both)))
use_shela = has_shela
use_spies = (has_spies & ~has_shela)
print("{} sources for which we use SHELA".format(np.sum(use_shela)))
print("{} sources for which we use SpIES".format(np.sum(use_spies)))
f_ap_irac = np.full(len(master_catalogue), np.nan)
f_ap_irac[use_shela] = master_catalogue['f_ap_shela_irac1'][use_shela]
f_ap_irac[use_spies] = master_catalogue['f_ap_spies_irac1'][use_spies]
ferr_ap_irac = np.full(len(master_catalogue), np.nan)
ferr_ap_irac[use_shela] = master_catalogue['ferr_ap_shela_irac1'][use_shela]
ferr_ap_irac[use_spies] = master_catalogue['ferr_ap_spies_irac1'][use_spies]
m_ap_irac = np.full(len(master_catalogue), np.nan)
m_ap_irac[use_shela] = master_catalogue['m_ap_shela_irac1'][use_shela]
m_ap_irac[use_spies] = master_catalogue['m_ap_spies_irac1'][use_spies]
merr_ap_irac = np.full(len(master_catalogue), np.nan)
merr_ap_irac[use_shela] = master_catalogue['merr_ap_shela_irac1'][use_shela]
merr_ap_irac[use_spies] = master_catalogue['merr_ap_spies_irac1'][use_spies]
master_catalogue.add_column(Column(data=f_ap_irac, name="f_ap_irac_i1"))
master_catalogue.add_column(Column(data=ferr_ap_irac, name="ferr_ap_irac_i1"))
master_catalogue.add_column(Column(data=m_ap_irac, name="m_ap_irac_i1"))
master_catalogue.add_column(Column(data=merr_ap_irac, name="merr_ap_irac_i1"))
master_catalogue.remove_columns(['f_ap_shela_irac1', 'f_ap_spies_irac1',
'ferr_ap_shela_irac1', 'ferr_ap_spies_irac1',
'm_ap_shela_irac1', 'm_ap_spies_irac1',
'merr_ap_shela_irac1', 'merr_ap_spies_irac1'])
origin = np.full(len(master_catalogue), ' ', dtype='<U5')
origin[use_shela] = "SHELA"
origin[use_spies] = "SpIES"
irac_origin.add_column(Column(data=origin, name="IRAC1_ap"))
# IRAC1 total flux and magnitudes
has_shela = ~np.isnan(master_catalogue['f_shela_irac1'])
has_spies = ~np.isnan(master_catalogue['f_spies_irac1'])
has_both = has_shela & has_spies
print("{} sources with SHELA total flux".format(np.sum(has_shela)))
print("{} sources with SpIES total flux".format(np.sum(has_spies)))
print("{} sources with SHELA and SpIES total flux".format(np.sum(has_both)))
use_shela = has_shela
use_spies = (has_spies & ~has_shela)
print("{} sources for which we use SHELA".format(np.sum(use_shela)))
print("{} sources for which we use SpIES".format(np.sum(use_spies)))
f_ap_irac = np.full(len(master_catalogue), np.nan)
f_ap_irac[use_shela] = master_catalogue['f_shela_irac1'][use_shela]
f_ap_irac[use_spies] = master_catalogue['f_spies_irac1'][use_spies]
ferr_ap_irac = np.full(len(master_catalogue), np.nan)
ferr_ap_irac[use_shela] = master_catalogue['ferr_shela_irac1'][use_shela]
ferr_ap_irac[use_spies] = master_catalogue['ferr_spies_irac1'][use_spies]
flag_irac = np.full(len(master_catalogue), False, dtype=bool)
flag_irac[use_shela] = master_catalogue['flag_shela_irac1'][use_shela]
flag_irac[use_spies] = master_catalogue['flag_spies_irac1'][use_spies]
m_ap_irac = np.full(len(master_catalogue), np.nan)
m_ap_irac[use_shela] = master_catalogue['m_shela_irac1'][use_shela]
m_ap_irac[use_spies] = master_catalogue['m_spies_irac1'][use_spies]
merr_ap_irac = np.full(len(master_catalogue), np.nan)
merr_ap_irac[use_shela] = master_catalogue['merr_shela_irac1'][use_shela]
merr_ap_irac[use_spies] = master_catalogue['merr_spies_irac1'][use_spies]
master_catalogue.add_column(Column(data=f_ap_irac, name="f_irac_i1"))
master_catalogue.add_column(Column(data=ferr_ap_irac, name="ferr_irac_i1"))
master_catalogue.add_column(Column(data=m_ap_irac, name="m_irac_i1"))
master_catalogue.add_column(Column(data=merr_ap_irac, name="merr_irac_i1"))
master_catalogue.add_column(Column(data=flag_irac, name="flag_irac_i1"))
master_catalogue.remove_columns(['f_shela_irac1', 'f_spies_irac1',
'ferr_shela_irac1', 'ferr_spies_irac1',
'm_shela_irac1', 'm_spies_irac1',
'merr_shela_irac1', 'merr_spies_irac1',
'flag_shela_irac1', 'flag_spies_irac1'])
origin = np.full(len(master_catalogue), ' ', dtype='<U5')
origin[use_shela] = "SHELA"
origin[use_spies] = "SpIES"
irac_origin.add_column(Column(data=origin, name="IRAC1_total"))
# IRAC2 aperture flux and magnitudes
has_shela = ~np.isnan(master_catalogue['f_ap_shela_irac2'])
has_spies = ~np.isnan(master_catalogue['f_ap_spies_irac2'])
has_both = has_shela & has_spies
print("{} sources with SHELA flux".format(np.sum(has_shela)))
print("{} sources with SpIES flux".format(np.sum(has_spies)))
print("{} sources with SHELA and SpIES flux".format(np.sum(has_both)))
use_shela = has_shela
use_spies = (has_spies & ~has_shela)
print("{} sources for which we use SHELA".format(np.sum(use_shela)))
print("{} sources for which we use SpIES".format(np.sum(use_spies)))
f_ap_irac = np.full(len(master_catalogue), np.nan)
f_ap_irac[use_shela] = master_catalogue['f_ap_shela_irac2'][use_shela]
f_ap_irac[use_spies] = master_catalogue['f_ap_spies_irac2'][use_spies]
ferr_ap_irac = np.full(len(master_catalogue), np.nan)
ferr_ap_irac[use_shela] = master_catalogue['ferr_ap_shela_irac2'][use_shela]
ferr_ap_irac[use_spies] = master_catalogue['ferr_ap_spies_irac2'][use_spies]
m_ap_irac = np.full(len(master_catalogue), np.nan)
m_ap_irac[use_shela] = master_catalogue['m_ap_shela_irac2'][use_shela]
m_ap_irac[use_spies] = master_catalogue['m_ap_spies_irac2'][use_spies]
merr_ap_irac = np.full(len(master_catalogue), np.nan)
merr_ap_irac[use_shela] = master_catalogue['merr_ap_shela_irac2'][use_shela]
merr_ap_irac[use_spies] = master_catalogue['merr_ap_spies_irac2'][use_spies]
master_catalogue.add_column(Column(data=f_ap_irac, name="f_ap_irac_i2"))
master_catalogue.add_column(Column(data=ferr_ap_irac, name="ferr_ap_irac_i2"))
master_catalogue.add_column(Column(data=m_ap_irac, name="m_ap_irac_i2"))
master_catalogue.add_column(Column(data=merr_ap_irac, name="merr_ap_irac_i2"))
master_catalogue.remove_columns(['f_ap_shela_irac2', 'f_ap_spies_irac2',
'ferr_ap_shela_irac2', 'ferr_ap_spies_irac2',
'm_ap_shela_irac2', 'm_ap_spies_irac2',
'merr_ap_shela_irac2', 'merr_ap_spies_irac2'])
origin = np.full(len(master_catalogue), ' ', dtype='<U5')
origin[use_shela] = "SHELA"
origin[use_spies] = "SpIES"
irac_origin.add_column(Column(data=origin, name="IRAC2_ap"))
# IRAC2 total flux and magnitudes
has_shela = ~np.isnan(master_catalogue['f_shela_irac2'])
has_spies = ~np.isnan(master_catalogue['f_spies_irac2'])
has_both = has_shela & has_spies
print("{} sources with SHELA total flux".format(np.sum(has_shela)))
print("{} sources with SpIES total flux".format(np.sum(has_spies)))
print("{} sources with SHELA and SpIES total flux".format(np.sum(has_both)))
use_shela = has_shela
use_spies = (has_spies & ~has_shela)
print("{} sources for which we use SHELA".format(np.sum(use_shela)))
print("{} sources for which we use SpIES".format(np.sum(use_spies)))
f_ap_irac = np.full(len(master_catalogue), np.nan)
f_ap_irac[use_shela] = master_catalogue['f_shela_irac2'][use_shela]
f_ap_irac[use_spies] = master_catalogue['f_spies_irac2'][use_spies]
ferr_ap_irac = np.full(len(master_catalogue), np.nan)
ferr_ap_irac[use_shela] = master_catalogue['ferr_shela_irac2'][use_shela]
ferr_ap_irac[use_spies] = master_catalogue['ferr_spies_irac2'][use_spies]
flag_irac = np.full(len(master_catalogue), False, dtype=bool)
flag_irac[use_shela] = master_catalogue['flag_shela_irac2'][use_shela]
flag_irac[use_spies] = master_catalogue['flag_spies_irac2'][use_spies]
m_ap_irac = np.full(len(master_catalogue), np.nan)
m_ap_irac[use_shela] = master_catalogue['m_shela_irac2'][use_shela]
m_ap_irac[use_spies] = master_catalogue['m_spies_irac2'][use_spies]
merr_ap_irac = np.full(len(master_catalogue), np.nan)
merr_ap_irac[use_shela] = master_catalogue['merr_shela_irac2'][use_shela]
merr_ap_irac[use_spies] = master_catalogue['merr_spies_irac2'][use_spies]
master_catalogue.add_column(Column(data=f_ap_irac, name="f_irac_i2"))
master_catalogue.add_column(Column(data=ferr_ap_irac, name="ferr_irac_i2"))
master_catalogue.add_column(Column(data=m_ap_irac, name="m_irac_i2"))
master_catalogue.add_column(Column(data=merr_ap_irac, name="merr_irac_i2"))
master_catalogue.add_column(Column(data=flag_irac, name="flag_irac_i2"))
master_catalogue.remove_columns(['f_shela_irac2', 'f_spies_irac2',
'ferr_shela_irac2', 'ferr_spies_irac2',
'm_shela_irac2', 'm_spies_irac2',
'merr_shela_irac2', 'merr_spies_irac2',
'flag_shela_irac2', 'flag_spies_irac2'])
origin = np.full(len(master_catalogue), ' ', dtype='<U5')
origin[use_shela] = "SHELA"
origin[use_spies] = "SpIES"
irac_origin.add_column(Column(data=origin, name="IRAC2_total"))
irac_origin.write("{}/herschel-stripe-82_irac_fluxes_origins{}.fits".format(OUT_DIR, SUFFIX), overwrite = True)
master_catalogue.colnames
master_catalogue["ra"].name = "irac_ra"
master_catalogue["dec"].name = "irac_dec"
master_catalogue["flag_merged"].name = "irac_flag_merged"
columns = ["irac_intid", "shela_intid", "spies_intid",
'irac_ra', 'irac_dec', 'irac_flag_merged',
'irac_flag_cleaned', 'irac_flag_gaia', 'irac_stellarity']
bands = [column[5:] for column in master_catalogue.colnames if 'f_ap' in column]
for band in bands:
columns += ["f_ap_{}".format(band), "ferr_ap_{}".format(band),
"m_ap_{}".format(band), "merr_ap_{}".format(band),
"f_{}".format(band), "ferr_{}".format(band),
"m_{}".format(band), "merr_{}".format(band),
"flag_{}".format(band)]
# We check for columns in the master catalogue that we will not save to disk.
print("Missing columns: {}".format(set(master_catalogue.colnames) - set(columns)))
master_catalogue[:10].show_in_notebook()
master_catalogue[columns].write("{}/IRAC.fits".format(TMP_DIR, SUFFIX), overwrite=True)