umap-playground

messing around with UMAP
git clone git://jb55.com/umap-playground
Log | Files | Refs | README

commit 4640e017f9c61d34cdedda2cc524ffb047f1d8d5
Author: William Casarin <jb55@jb55.com>
Date:   Wed, 28 Apr 2021 08:02:52 -0700

messing around with umap and harps

Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
A.envrc | 1+
AHARPS_RVBank_header.txt | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adefault.nix | 6++++++
Aharps_umap.py | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 202 insertions(+), 0 deletions(-)

diff --git a/.envrc b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/HARPS_RVBank_header.txt b/HARPS_RVBank_header.txt @@ -0,0 +1,107 @@ +J/A+A/???/??? Radial velocities for HAPRS targets (Trifonov+, 2020) +================================================================================ +A public HARPS radial velocity database corrected for systematic errors + T. Trifonov et al. + <Astron. Astrophys. ???, ??? (2019)> + =2020A&A...???..???T +================================================================================ +ADC_Keywords: Radial velocities +Keywords: methods: data analysis - planetary systems + +Abstract: + +See https://arxiv.org/pdf/2001.05942.pdf + +Description: + Time series for radial velocities and activity indicators from HARPS + spectrograph are presented. See Trifonov et al. (2020) for a detailed + description of the parameters. + +File Summary: +-------------------------------------------------------------------------------- + FileName Lrecl Records Explanations +-------------------------------------------------------------------------------- +ReadMe 80 . This file +-------------------------------------------------------------------------------- + +Byte-by-byte Description of file: HARPS_RVBank.dat +-------------------------------------------------------------------------------- + Bytes Format Units Label Explanations +------------------------------------------------------------------------------ + 1-13 F13.5 d BJD Barycentric Julian date + 14-22 F8.3 m/s RV_mlc_nzp Radial velocity (from mlc.dat, sa, NZP and drift corrected, -pre and -post calculated separately) + 23-29 F6.3 m/s e_RV_mlc_nzp Radial velocity error + + 14-22 F8.3 m/s RV_drs_nzp Radial velocity (from drs.dat, sa, NZP and drift corrected -pre and -post calculated separately) + 23-29 F6.3 m/s e_RV_drs_nzp Radial velocity error + + 14-22 F8.3 m/s RV_mlc Radial velocity (from mlc.dat, sa and drift corrected, -pre and -post calculated separately) + 23-29 F6.3 m/s e_RV_mlc Radial velocity error + + 14-22 F8.3 m/s RV_drs Radial velocity (from drs.dat, sa and drift corrected, -pre and -post calculated separately) + 23-29 F6.3 m/s e_RV_drs Radial velocity error + + 14-22 F8.3 m/s RV_mlc_j Radial velocity (from mlc.dat, sa and drift corrected, -pre and -post calculated jointly) + 23-29 F6.3 m/s e_RV_mlc_j Radial velocity error + + 46-54 F8.3 m/s CRX Chromatic index + 55-63 F8.3 m/s e_CRX Chromatic index error + 64-72 F8.3 m/s*km/s dLW Differential line width + 73-81 F8.3 m/s*km/s e_dLW Differential line width error + 91-99 F8.4 --- Halpha Halpha index +100-108 F8.4 --- e_Halpha Halpha index error + 91-99 F8.4 --- NaD1 NaD1 index +100-108 F8.4 --- e_NaD1 NaD1 index error + 91-99 F8.4 --- NaD2 NaD2 index +100-108 F8.4 --- e_NaD2 NaD2 index error +109-112 I3 --- f_RV [0,15] Bitwise flag (1) +130-138 F8.3 km/s FWHM_DRS Full width half maximum +139-147 F8.3 --- CONTRAST_DRS +148-156 F8.3 m/s Bisector +157-166 F8.3 km/s RVGUESS User guess for RV +167-175 F8.3 m/s*km/s SNR_DRS Signal-to-noise ratio in order 55 +176-189 F13.5 km/s BJD_DRS Barycentric Julian date from DRS + 82-90 F8.3 km/s BERV Barycentric Earth radial velocity +190-198 F8.3 km/s BERV_DRS Barycentric Earth radial velocity from DRS +199-207 F8.3 m/s DRIFT drift measure +208-218 F8.3 m/s E_DRIFT drift measure error +219-227 F8.3 m/s SA Contribution from secular acceleration +228-239 F8.3 m/s NZP_mlc NZP for BJD +240-251 F8.3 m/s dNZP_mlc Contribution from intra-night systmatics +252-260 F8.3 --- TMMEAN Flux weighted mean point +261-270 F8.3 s EXPTIME Exposure time +298-306 F8.3 m/s MLCRX ML Chromatic index (Slope over logarithmic wavelength) +307-315 F8.3 m/s E_MLCRX error for MLCRX (slope error) + A --- TIMEID Identifier for file (time stamp) + A --- DRIFT_LAMP FP,ThAr? + A --- MASK DRS MASK for CCF + A --- PROGID Prog-ID + A --- PROGID Prog-PI + F8.2 m/s*km/s AIRMASS Airmass + A --- OBJAB + A --- THAR_FP + A --- DPR_TYPE + + + +Comments: + +1) The formating in this version is still not perfect !!! TBFixed +2) Some entries and details might change. + +Note (1): The flags are bitwise where: + 0 - normal (reliable) spectra, + 1 - nosci frame, e.g. calibration files, + 2 - spectra taken in I2 mode, + 4 - spectra taken in "eggs" mode, + 16 - coordinates too much off, + 32 - spectra not within a nautical twilight (daytime), + 64 - spectra with too low S/N, + 128 - spectra with too high S/N. +-------------------------------------------------------------------------------- + +Acknowledgements: + Trifon Trifnov <trifonov@mpia.de> + +================================================================================ +(End) Trifon Trifnov [MPIA, Germany] 15-August-2019 diff --git a/default.nix b/default.nix @@ -0,0 +1,6 @@ +{ pkgs ? import <nixpkgs> {} }: +with pkgs; +mkShell { + name = "umap_harps"; + buildInputs = with python3Packages; [ scikitlearn seaborn pandas matplotlib numpy umap-learn ]; +} diff --git a/harps_umap.py b/harps_umap.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +import numpy as np +from sklearn.datasets import load_digits +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +import matplotlib.pyplot as plt +import matplotlib.cm as cm +import matplotlib.colors as colors +import seaborn as sns +import pandas as pd +import umap + +#harps = pd.read_csv("harps-20.csv") +#harps = pd.read_csv("harps.csv") +harps = pd.read_csv("harps-all.csv") +harps.head() +harps = harps.dropna() +harps['target'] = harps['target'].astype('category') + +print(harps.RVGUESS.value_counts()) + +reducer = umap.UMAP() + +harps_data = harps[ + [ + #"RV_mlc_nzp" + #"e_RV_mlc_nzp" + #"RV_drs_nzp" + #,"e_RV_drs_nzp" + #,"RV_mlc" + #,"e_RV_mlc" + #,"RV_drs" + #,"e_RV_drs" + #,"RV_mlc_j" + #,"e_RV_mlc_j" + "CRX" + #,"e_CRX" + ,"dLW" + #,"e_dLW" + ,"Halpha" + #,"e_Halpha" + ,"NaD1" + #,"e_NaD1" + ,"NaD2" + #,"e_NaD2" + #,"f_RV" + #,"FWHM_DRS" + #,"CONTRAST_DRS" + #,"BIS" + #,"RVGUESS" + ,"SNR_DRS" + ,"BJD_DRS" + ,"BERV" + ,"BERV_DRS" + ,"DRIFT" + #,"e_DRIFT" + #,"SA" + #,"NZP_mlc" + #,"dNZP_mlc" + #,"NZP_drs" + ,"dNZP_drs" + ,"TMMEAN" + ,"EXPTIME" + ,"MLCRX" + #,"E_MLCRX" + ] +].values.astype(np.float32) + +embedding = reducer.fit_transform(harps_data) +print(embedding.shape) + +rv = harps.RV_mlc_nzp +rvmin = rv.min() + +plt.style.use('dark_background') + +plt.scatter( + embedding[:, 0], + embedding[:, 1], + norm=colors.SymLogNorm(linthresh=0.03, linscale=0.03, base=10, vmin=rv.min(), vmax=rv.max()), + c=rv, cmap=cm.cool, s=1 + ) + +plt.gca().set_aspect('equal', 'datalim') +plt.title('UMAP projection of the HARPS dataset', fontsize=24) + +plt.savefig('out.png', dpi=300)