# Overview of database files

Since we do not store the data files on GitHub it can be difficult to verify mirrors. He we will add a summary of all data files.

In [1]:
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
import datetime
print("This notebook was executed on: \n{}".format(datetime.datetime.now()))

This notebook was run with herschelhelp_internal version: 
017bb1e (Mon Jun 18 14:58:59 2018 +0100)
This notebook was executed on: 
2018-06-25 13:41:17.916762


In [2]:
from astropy.table import Table, Column
from astropy import units as u
from astropy.io import fits
 
import numpy as np

import glob
import hashlib

import os
import time

import yaml


from pathlib import Path



In [3]:
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))
MAKE_HASHES = False

## list of fits files

The information about the product is stored in a yml file which points to the various actual data files. Here we use a template.

In [4]:
#fits_file_list = glob.glob('../dmu*/*/data/*.fits')
#fits_file_list = glob.glob('../dmu*/*/data/**/*.fits', recursive=True)

In [5]:
yml_files = glob.glob('../**/*.yml', recursive=True)

In [6]:
yml_files

['../dmu0/dmu0_NEP-Spitzer/meta_survey.yml',
 '../dmu0/dmu0_SSDF/meta_survey.yml',
 '../dmu0/dmu0_VICS82/meta_survey.yml',
 '../dmu0/dmu0_SHELA/meta_survey.yml',
 '../dmu0/dmu0_CFHTLS/meta_survey.yml',
 '../dmu0/dmu0_SDWFS/meta_survey.yml',
 '../dmu0/dmu0_UKIDSS-LAS/meta_survey.yml',
 '../dmu0/dmu0_UKIDSS-UDS/meta_survey.yml',
 '../dmu0/dmu0_CFHTLenS/meta_survey.yml',
 '../dmu0/dmu0_UHS/meta_survey.yml',
 '../dmu0/dmu0_GOODS-ACS/meta_survey.yml',
 '../dmu0/dmu0_VIPERS-MLS/meta_survey.yml',
 '../dmu0/dmu0_2MASS-extended-sources/meta_survey.yml',
 '../dmu0/dmu0_SpARCS/meta_survey.yml',
 '../dmu0/dmu0_2MASS-point-sources/meta_survey.yml',
 '../dmu0/dmu0_CANDELS-UDS/meta_survey.yml',
 '../dmu0/dmu0_IBIS/meta_survey.yml',
 '../dmu0/dmu0_IRAC-EGS/meta_survey.yml',
 '../dmu0/dmu0_DataFusion-Spitzer/meta_survey.yml',
 '../dmu0/dmu0_NDWFS/meta_survey.yml',
 '../dmu0/dmu0_DES/meta_survey.yml',
 '../dmu0/dmu0_Ultradeep-Ks-GOODS-N/meta_survey.yml',
 '../dmu0/dmu0_SPLASH-SXDF/meta_survey.yml',
 '..

In [7]:


ROOT = Path("/mnt/hedam/dmu_products/").absolute()
#ROOT = Path("/Users/rs548/GitHub/dmu_products/").absolute()


FIELDS = {
 "akari-nep": "AKARI-NEP",
 "akari_nep": "AKARI-NEP",
 "akari-sep": "AKARI-SEP",
 "akari_sep": "AKARI-SEP",
 "bootes": "Bootes",
 "cdfs-swire": "CDFS-SWIRE",
 "cosmos": "COSMOS",
 "egs": "EGS",
 "elais-n1": "ELAIS-N1",
 "elais_n1": "ELAIS-N1",
 "elais-n2": "ELAIS-N2",
 "elais_n2": "ELAIS-N2",
 "elais-s1": "ELAIS-S1",
 "elais_s1": "ELAIS-S1",
 "gama-09": "GAMA-09",
 "gama09": "GAMA-09",
 "gama_09": "GAMA-09",
 "gama-12": "GAMA-12",
 "gama12": "GAMA-12",
 "gama_12": "GAMA-12",
 "gama-15": "GAMA-15",
 "gama15": "GAMA-15",
 "gama_15": "GAMA-15",
 "hdf-n": "HDF-N",
 "hdfn": "HDF-N",
 "hdf_n": "HDF-N",
 "herschel-stripe-82": "Herschel-Stripe-82",
 "herschel_stripe_82": "Herschel-Stripe-82",
 "hs82": "Herschel-Stripe-82",
 "lockman-swire": "Lockman-SWIRE",
 "lockman_swire": "Lockman-SWIRE",
 "ngp": "HATLAS-NGP",
 "hatlas-ngp": "HATLAS-NGP",
 "hatlas_ngp": "HATLAS-NGP",
 "sa13": "SA13",
 "sgp": "HATLAS-SGP",
 "hatlas-sgp": "HATLAS-SGP",
 "hatlas_sgp": "HATLAS-SGP",
 "spire-nep": "SPIRE-NEP",
 "spire_nep": "SPIRE-NEP",
 "ssdf": "SSDF",
 "xfls": "xFLS",
 "xmm-13hr": "XMM-13hr",
 "xmm13hr": "XMM-13hr",
 "xmm-lss": "XMM-LSS",
 "xmmlss": "XMM-LSS",
}


def get_meta_main(filename):
 file_path = Path(filename)
 for dir_ in file_path.absolute().parents:
 meta_main = dir_ / "meta_main.yml"
 if meta_main.exists():
 return meta_main.relative_to(ROOT)
 if dir_ == ROOT:
 return
def get_meta_surveys(filename):
 meta_surveys = []
 file_path = Path(filename)
 for dir_ in file_path.absolute().parents:
 for filename in dir_.glob("meta_survey*.yml"):
 meta_surveys.append(filename.relative_to(ROOT))
 if dir_ == ROOT:
 break
 return meta_surveys

def get_field(filename):
 for field in FIELDS:
 if field in filename.lower():
 return FIELDS[field]
 return "BROKEN"

def map_or_cat(filename):
 try:
 hdu_list = fits.open(filename)
 hdu = hdu_list[0]
 if type(hdu) == fits.PrimaryHDU:
 if len(hdu_list) == 1:
 return "map"
 else:
 hdu = hdu_list[1]
 if type(hdu) == fits.ImageHDU:
 return "map"
 elif type(hdu) in [fits.TableHDU, fits.BinTableHDU]:
 return "catalogue"
 else:
 return "unknown"
 except OSError:
 print(f"Problem opening {filename}")

fits_file_list = []
for root, directories, filenames in os.walk(ROOT):
 for filename in filenames:
 if (filename.endswith(".fits") and "HELP_J" not in filename
 and "timeline" not in root.lower()):
 fits_file_list.append(os.path.join(root, filename))


files = []

for f in fits_file_list:
 d = {}
 d['filename'] = str(Path(f).relative_to(ROOT))
 d['field'] = get_field(f)

 d['dmu'] = d['filename'].split("/")[0]
 d['dmu_folder'] = d['filename'].split("/")[1]

 meta_main = get_meta_main(f)
 d['meta_main'] = str(meta_main)

 meta_surveys = get_meta_surveys(f)
 d['meta_surveys'] = ", ".join(str(item) for item in meta_surveys)

 if "dmu0" not in f:
 if meta_main is not None:
 try:
 meta = yaml.load(open(ROOT / meta_main, "r"))
 bands = meta.get("Filters", '')
 except yaml.scanner.ScannerError:
 bands = "Broken meta_main.yml"
 else:
 bands = "No meta_main.yml"

 else:
 bands = []
 for survey in meta_surveys:
 try:
 meta = yaml.load(open(ROOT / survey, "r"))
 b = meta.get("Filters", '')
 if b is not None:
 bands.append(b)
 except yaml.scanner.ScannerError:
 bands.append("Broken meta_survey.xml")
 bands = ", ".join(bands)

 d['bands'] = bands

 d['type'] = map_or_cat(f)

 files.append(d)

Table(files)['dmu',
 'dmu_folder',
 'filename',
 'field',
 'type',
 'bands',
 'meta_main',
 'meta_surveys'].write("fits_files.csv", format="ascii.csv")

Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/70101890.70101890-0.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/60095871.60095871-11.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/60095871.60095871-33.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/60095871.60095871-3.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/40041240.40041240-0.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/50061580.50061580-0.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/60095871.60095871-22.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XMM-LSS/mocs/40032010.40032010-0.MIPS.1.moc.fits
Problem opening /mnt/hedam/dmu_products/dmu17/dmu17_HELP-SEIP-maps/XM



Problem opening /mnt/hedam/dmu_products/dmu19/dmu19_HerMES/data/hers-helms-xmm_itermap_20160623_PSW.fits


In [8]:
fits_file_list

['/mnt/hedam/dmu_products/dmu23/dmu23_Lockman-SWIRE/data/Lockman_SWIRE-specz-v2.1.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_Herschel-Stripe-82/data/HELP-SPECZ_Herschel-Stripe-82_20170202.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_COSMOS/data/COSMOS-specz-v2.5-public_helpcoverage_helpid_20160512.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_SSDF/data/SSDF-specz-v2.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_ELAIS-S1/data/ELAIS-S1-specz-v2.2.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_ELAIS-S1/data/old/ELAIS-S1-specz-v2.1.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_ELAIS-S1/data/old/ELAIS-S1-specz-v2_hedam.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_GAMA-09/data/HELP-SPECZ_GAMA-09_20170202-2.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_ELAIS-N2/data/ELAIS-N2-specz-v2.1.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_ELAIS-N2/data/old/ELAIS-N2-specz-v2_hedam.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23_CDFS-SWIRE/data/CDFS_SWIRE-specz-v2.3.fits',
 '/mnt/hedam/dmu_products/dmu23/dmu23

## Add hashes (takes a very long time)

In [9]:


def hash_bytestr_iter(bytesiter, hasher, ashexstr=False):
 for block in bytesiter:
 hasher.update(block)
 return (hasher.hexdigest() if ashexstr else hasher.digest())

def file_as_blockiter(afile, blocksize=65536):
 with afile:
 block = afile.read(blocksize)
 while len(block) > 0:
 yield block
 block = afile.read(blocksize)
 

In [10]:
if MAKE_HASHES:
 hashes = [(fname, hash_bytestr_iter(file_as_blockiter(open(fname, 'rb')), hashlib.sha256()))
 for fname in Table(files)['filename']]
 file_overview.add_column(Column(data=[i[1] for i in hashes], name='hashes'))