This notebook presents the creation of the HELP master list on the Bootes field. This field was originally ingested into HeDAM based on a masterlist produced by Eduardo Gonzales-Solares. Ken Duncan subsequently retrieved Michael Brown's precompiled masterlist for the Bootes field and therefore this notebook only contains some simple naming changes and astrometry correction. The original catalogue is described in DMU_0.
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
from collections import OrderedDict, Counter
import os
import time
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Column, hstack, Table, vstack
import numpy as np
from herschelhelp_internal.flagging import gaia_flag_column
from herschelhelp_internal.masterlist import nb_astcor_diag_plot, remove_duplicates, nb_merge_dist_plot
from herschelhelp_internal.utils import astrometric_correction, mag_to_flux, gen_help_id
TMP_DIR = os.environ.get('TMP_DIR', "./data_tmp")
OUT_DIR = os.environ.get('OUT_DIR', "./data")
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))
try:
os.makedirs(OUT_DIR)
except FileExistsError:
pass
RA_COL = "bootes_ra"
DEC_COL = "botes_dec"
#imported_columns = OrderedDict({
# "objID": "ps1_id",
# "raMean": "ps1_ra",
# "decMean": "ps1_dec",
#
# })
i_catalogue = Table.read("../../dmu0/dmu0_Bootes_Brown/data/Bootes_merged_Icorr_2014a_all_ap2_07112017.fits") #[list(imported_columns)]
irac_i2_catalogue = Table.read("../../dmu0/dmu0_Bootes_Brown/data/bootes_merged_ch2corr_2014a_all_ap2_07112017.fits")
#for column in imported_columns:
# catalogue[column].name = imported_columns[column]
#epoch = 2012
# Clean table metadata
#catalogue.meta = None
i_catalogue['ALPHA_J2000'].name = "ra"
i_catalogue['DELTA_J2000'].name = "dec"
i_catalogue['ra'].unit = u.deg
i_catalogue['dec'].unit = u.deg
i_catalogue[:10].show_in_notebook()
irac_i2_catalogue['ALPHA_J2000'].unit = u.deg
irac_i2_catalogue['DELTA_J2000'].unit = u.deg
irac_i2_catalogue[:10].show_in_notebook()
We remove duplicated objects from the input catalogues.
"""SORT_COLS = ['merr_ap_gpc1_r', 'merr_ap_gpc1_g', 'merr_ap_gpc1_i', 'merr_ap_gpc1_z', 'merr_ap_gpc1_y']
FLAG_NAME = 'ps1_flag_cleaned'
nb_orig_sources = len(catalogue)
catalogue = remove_duplicates(catalogue, RA_COL, DEC_COL, sort_col=SORT_COLS, flag_name=FLAG_NAME)
nb_sources = len(catalogue)
print("The initial catalogue had {} sources.".format(nb_orig_sources))
print("The cleaned catalogue has {} sources ({} removed).".format(nb_sources, nb_orig_sources - nb_sources))
print("The cleaned catalogue has {} sources flagged as having been cleaned".format(np.sum(catalogue[FLAG_NAME])))"""
We match the astrometry to the Gaia one. We limit the Gaia catalogue to sources with a g band flux between the 30th and the 70th percentile. Some quick tests show that this give the lower dispersion in the results.
gaia = Table.read("../../dmu0/dmu0_GAIA/data/GAIA_Bootes.fits")
gaia_coords = SkyCoord(gaia['ra'], gaia['dec'])
nb_astcor_diag_plot(i_catalogue["ra"], i_catalogue["dec"],
gaia_coords.ra, gaia_coords.dec)
delta_ra, delta_dec = astrometric_correction(
SkyCoord(i_catalogue["ra"], i_catalogue["dec"]),
gaia_coords
)
print("RA correction: {}".format(delta_ra))
print("Dec correction: {}".format(delta_dec))
i_catalogue["ra"] = i_catalogue["ra"] + delta_ra.to(u.deg)
i_catalogue["dec"] = i_catalogue["dec"] + delta_dec.to(u.deg)
nb_astcor_diag_plot(i_catalogue["ra"], i_catalogue["dec"],
gaia_coords.ra, gaia_coords.dec)
We then correct the other catalogue
nb_astcor_diag_plot(irac_i2_catalogue["ALPHA_J2000"], irac_i2_catalogue["DELTA_J2000"],
gaia_coords.ra, gaia_coords.dec)
delta_ra, delta_dec = astrometric_correction(
SkyCoord(irac_i2_catalogue["ALPHA_J2000"], irac_i2_catalogue["DELTA_J2000"]),
gaia_coords
)
print("RA correction: {}".format(delta_ra))
print("Dec correction: {}".format(delta_dec))
irac_i2_catalogue["ALPHA_J2000"] = irac_i2_catalogue["ALPHA_J2000"] + delta_ra.to(u.deg)
irac_i2_catalogue["DELTA_J2000"] = irac_i2_catalogue["DELTA_J2000"] + delta_dec.to(u.deg)
nb_astcor_diag_plot(irac_i2_catalogue["ALPHA_J2000"], irac_i2_catalogue["DELTA_J2000"],
gaia_coords.ra, gaia_coords.dec)
nb_merge_dist_plot(
SkyCoord(i_catalogue['ra'], i_catalogue['dec']),
SkyCoord(irac_i2_catalogue["ALPHA_J2000"], irac_i2_catalogue["DELTA_J2000"] )
)
radius = 1.0*u.arcsec
coords_1 = SkyCoord(i_catalogue['ra'], i_catalogue['dec'])
coords_2 = SkyCoord(irac_i2_catalogue["ALPHA_J2000"], irac_i2_catalogue["DELTA_J2000"])
# Search for sources in second catalogue matching the sources in the first
# one.
idx_2, idx_1, d2d, _ = coords_1.search_around_sky(coords_2, radius)
# We want to flag the possible mis-associations, i.e. the sources in each
# catalogue that are associated to several sources in the other one, but
# also all the sources that are associated to a problematic source in the
# other catalogue (e.g. if two sources in the first catalogue are
# associated to the same source in the second catalogue, they must be
# flagged as potentially problematic).
#
# Search for duplicate associations
toflag_idx_1 = np.unique([item for item, count in Counter(idx_1).items()
if count > 1])
toflag_idx_2 = np.unique([item for item, count in Counter(idx_2).items()
if count > 1])
# Flagging the sources associated to duplicates
dup_associated_in_idx1 = np.in1d(idx_2, toflag_idx_2)
dup_associated_in_idx2 = np.in1d(idx_1, toflag_idx_1)
toflag_idx_1 = np.unique(np.concatenate(
(toflag_idx_1, idx_1[dup_associated_in_idx1])
))
toflag_idx_2 = np.unique(np.concatenate(
(toflag_idx_2, idx_2[dup_associated_in_idx2])
))
# Adding the flags to the catalogue. In the second catalogue, the column
# is named "flag_merged_2" and will be combined to the flag_merged column
# one the merge is done.
try:
i_catalogue["flag_merged"] |= np.in1d(np.arange(len(i_catalogue), dtype=int),
toflag_idx_1)
except KeyError:
i_catalogue.add_column(Column(
data=np.in1d(np.arange(len(i_catalogue), dtype=int), toflag_idx_1),
name="flag_merged"
))
irac_i2_catalogue.add_column(Column(
data=np.in1d(np.arange(len(irac_i2_catalogue), dtype=int), toflag_idx_2),
name="flag_merged_2"
))
# Now that we have flagged the maybe spurious associations, we want to
# associate each source of each catalogue to at most one source in the
# other one.
# We sort the indices by the distance to take the nearest counterparts in
# the following steps.
sort_idx = np.argsort(d2d)
idx_1 = idx_1[sort_idx]
idx_2 = idx_2[sort_idx]
# These array will contain the indexes of the matching sources in both
# catalogues.
match_idx_1 = np.array([], dtype=int)
match_idx_2 = np.array([], dtype=int)
while len(idx_1) > 0:
both_first_idx = np.sort(np.intersect1d(
np.unique(idx_1, return_index=True)[1],
np.unique(idx_2, return_index=True)[1],
))
new_match_idx_1 = idx_1[both_first_idx]
new_match_idx_2 = idx_2[both_first_idx]
match_idx_1 = np.concatenate((match_idx_1, new_match_idx_1))
match_idx_2 = np.concatenate((match_idx_2, new_match_idx_2))
# We remove the matching sources in both catalogues.
to_remove = (np.in1d(idx_1, new_match_idx_1) |
np.in1d(idx_2, new_match_idx_2))
idx_1 = idx_1[~to_remove]
idx_2 = idx_2[~to_remove]
# Indices of un-associated object in both catalogues.
unmatched_idx_1 = np.delete(np.arange(len(i_catalogue), dtype=int),match_idx_1)
unmatched_idx_2 = np.delete(np.arange(len(irac_i2_catalogue), dtype=int),match_idx_2)
# Sources only in cat_1
only_in_cat_1 = i_catalogue[unmatched_idx_1]
# Sources only in cat_2
only_in_cat_2 = irac_i2_catalogue[unmatched_idx_2]
# We are using the ra and dec columns from cat_2 for the position.
only_in_cat_2["ALPHA_J2000"].name = "ra"
only_in_cat_2["DELTA_J2000"].name = "dec"
# Where we have an association we take the values from the i selected catalogue.
both_in_cat_1_and_cat_2 = i_catalogue[match_idx_1] #hstack([cat_1[match_idx_1], cat_2[match_idx_2]])
# We don't need the positions from the second catalogue anymore.
#both_in_cat_1_and_cat_2.remove_columns([racol_2, decol_2])
# Logging the number of rows
print("There are {} sources only in the first catalogue".format(len(only_in_cat_1)))
print("There are {} sources only in the second catalogue".format(len(only_in_cat_2)))
print("There are {} sources in both catalogues".format(len(both_in_cat_1_and_cat_2)))
merged_catalogue = vstack([only_in_cat_1, both_in_cat_1_and_cat_2,
only_in_cat_2])
# When vertically stacking the catalogues, some values in the flag columns
# are masked because they did not exist in the catalogue some row originate
# from. We must set them to the appropriate value.
for colname in merged_catalogue.colnames:
if 'flag' in colname:
merged_catalogue[colname][merged_catalogue[colname].mask] = False
# We combined the flag_merged flags
merged_catalogue['flag_merged'] |= merged_catalogue['flag_merged_2']
merged_catalogue.remove_column('flag_merged_2')
# Replace -99 values
for col in merged_catalogue.colnames:
if col.startswith('m_'):
errcol = "merr{}".format(col[1:])
merged_catalogue[col][merged_catalogue[col] < -90.] = np.nan
merged_catalogue[errcol][merged_catalogue[errcol] < -90.] = np.nan
if col.startswith('f_'):
errcol = "ferr{}".format(col[1:])
merged_catalogue[col][merged_catalogue[col] < -90.] = np.nan
merged_catalogue[errcol][merged_catalogue[errcol] < -90.] = np.nan
merged_catalogue['CLASS_STAR'].name = 'stellarity'
merged_catalogue.remove_columns(['FLAG_DEEP', 'IMAFLAGS', 'SEGFLAGS'])
merged_catalogue[:10].show_in_notebook()
merged_catalogue.add_column(
gaia_flag_column(SkyCoord(merged_catalogue["ra"], merged_catalogue["dec"]), 2012, gaia)
)
GAIA_FLAG_NAME = "bootes_flag_gaia"
merged_catalogue['flag_gaia'].name = GAIA_FLAG_NAME
print("{} sources flagged.".format(np.sum(merged_catalogue[GAIA_FLAG_NAME] > 0)))
merged_catalogue.add_column(Column(gen_help_id(merged_catalogue['ra'], merged_catalogue['dec']),
name="help_id"))
merged_catalogue.add_column(Column(np.full(len(merged_catalogue), "Bootes", dtype='<U18'),
name="field"))
# Check that the HELP Ids are unique
if len(merged_catalogue) != len(np.unique(merged_catalogue['help_id'])):
print("The HELP IDs are not unique!!!")
else:
print("OK!")
#merged_catalogue['help_id', 'id'].write(
# "{}/master_list_cross_ident_bootes{}.fits".format(OUT_DIR, SUFFIX))
#merged_catalogue.remove_columns(['id'])
merged_catalogue.write("{}/master_catalogue_bootes{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)