umap-playground

messing around with UMAP
git clone git://jb55.com/umap-playground
Log | Files | Refs | README

harps_umap.py (2025B)


      1 #!/usr/bin/env python3
      2 
      3 import numpy as np
      4 from sklearn.datasets import load_digits
      5 from sklearn.model_selection import train_test_split
      6 from sklearn.preprocessing import StandardScaler
      7 import matplotlib.pyplot as plt
      8 import matplotlib.cm as cm
      9 import matplotlib.colors as colors
     10 import seaborn as sns
     11 import pandas as pd
     12 import umap
     13 
     14 #harps = pd.read_csv("harps-20.csv")
     15 #harps = pd.read_csv("harps.csv")
     16 harps = pd.read_csv("harps-all.csv")
     17 harps.head()
     18 harps = harps.dropna()
     19 harps['target'] = harps['target'].astype('category')
     20 
     21 print(harps.RVGUESS.value_counts())
     22 
     23 reducer = umap.UMAP()
     24 
     25 harps_data = harps[
     26         [
     27             #"RV_mlc_nzp"
     28             #"e_RV_mlc_nzp"
     29             #"RV_drs_nzp"
     30            #,"e_RV_drs_nzp"
     31            #,"RV_mlc"
     32            #,"e_RV_mlc"
     33            #,"RV_drs"
     34            #,"e_RV_drs"
     35            #,"RV_mlc_j"
     36            #,"e_RV_mlc_j"
     37            "CRX"
     38            #,"e_CRX"
     39            ,"dLW"
     40            #,"e_dLW"
     41            ,"Halpha"
     42            #,"e_Halpha"
     43            ,"NaD1"
     44            #,"e_NaD1"
     45            ,"NaD2"
     46            #,"e_NaD2"
     47            #,"f_RV"
     48            #,"FWHM_DRS"
     49            #,"CONTRAST_DRS"
     50            #,"BIS"
     51            #,"RVGUESS"
     52            ,"SNR_DRS"
     53            ,"BJD_DRS"
     54            ,"BERV"
     55            ,"BERV_DRS"
     56            ,"DRIFT"
     57            #,"e_DRIFT"
     58            #,"SA"
     59            #,"NZP_mlc"
     60            #,"dNZP_mlc"
     61            #,"NZP_drs"
     62            ,"dNZP_drs"
     63            ,"TMMEAN"
     64            ,"EXPTIME"
     65            ,"MLCRX"
     66            #,"E_MLCRX"
     67         ]
     68 ].values.astype(np.float32)
     69 
     70 embedding = reducer.fit_transform(harps_data)
     71 print(embedding.shape)
     72 
     73 rv = harps.RV_mlc_nzp
     74 rvmin = rv.min()
     75 
     76 plt.style.use('dark_background')
     77 
     78 plt.scatter(
     79     embedding[:, 0],
     80     embedding[:, 1],
     81     norm=colors.SymLogNorm(linthresh=0.03, linscale=0.03, base=10, vmin=rv.min(), vmax=rv.max()),
     82     c=rv, cmap=cm.cool, s=1
     83     )
     84 
     85 plt.gca().set_aspect('equal', 'datalim')
     86 plt.title('UMAP projection of the HARPS dataset', fontsize=24)
     87 
     88 plt.savefig('out.png', dpi=300)