harps_umap.py (2025B)
1 #!/usr/bin/env python3 2 3 import numpy as np 4 from sklearn.datasets import load_digits 5 from sklearn.model_selection import train_test_split 6 from sklearn.preprocessing import StandardScaler 7 import matplotlib.pyplot as plt 8 import matplotlib.cm as cm 9 import matplotlib.colors as colors 10 import seaborn as sns 11 import pandas as pd 12 import umap 13 14 #harps = pd.read_csv("harps-20.csv") 15 #harps = pd.read_csv("harps.csv") 16 harps = pd.read_csv("harps-all.csv") 17 harps.head() 18 harps = harps.dropna() 19 harps['target'] = harps['target'].astype('category') 20 21 print(harps.RVGUESS.value_counts()) 22 23 reducer = umap.UMAP() 24 25 harps_data = harps[ 26 [ 27 #"RV_mlc_nzp" 28 #"e_RV_mlc_nzp" 29 #"RV_drs_nzp" 30 #,"e_RV_drs_nzp" 31 #,"RV_mlc" 32 #,"e_RV_mlc" 33 #,"RV_drs" 34 #,"e_RV_drs" 35 #,"RV_mlc_j" 36 #,"e_RV_mlc_j" 37 "CRX" 38 #,"e_CRX" 39 ,"dLW" 40 #,"e_dLW" 41 ,"Halpha" 42 #,"e_Halpha" 43 ,"NaD1" 44 #,"e_NaD1" 45 ,"NaD2" 46 #,"e_NaD2" 47 #,"f_RV" 48 #,"FWHM_DRS" 49 #,"CONTRAST_DRS" 50 #,"BIS" 51 #,"RVGUESS" 52 ,"SNR_DRS" 53 ,"BJD_DRS" 54 ,"BERV" 55 ,"BERV_DRS" 56 ,"DRIFT" 57 #,"e_DRIFT" 58 #,"SA" 59 #,"NZP_mlc" 60 #,"dNZP_mlc" 61 #,"NZP_drs" 62 ,"dNZP_drs" 63 ,"TMMEAN" 64 ,"EXPTIME" 65 ,"MLCRX" 66 #,"E_MLCRX" 67 ] 68 ].values.astype(np.float32) 69 70 embedding = reducer.fit_transform(harps_data) 71 print(embedding.shape) 72 73 rv = harps.RV_mlc_nzp 74 rvmin = rv.min() 75 76 plt.style.use('dark_background') 77 78 plt.scatter( 79 embedding[:, 0], 80 embedding[:, 1], 81 norm=colors.SymLogNorm(linthresh=0.03, linscale=0.03, base=10, vmin=rv.min(), vmax=rv.max()), 82 c=rv, cmap=cm.cool, s=1 83 ) 84 85 plt.gca().set_aspect('equal', 'datalim') 86 plt.title('UMAP projection of the HARPS dataset', fontsize=24) 87 88 plt.savefig('out.png', dpi=300)