├── .gitignore
├── LICENSE
├── README.md
├── doc
    ├── slam_tutorial.html
    ├── slam_tutorial.ipynb
    ├── slam_tutorial.py
    └── slam_tutorial_2020.08.ipynb
├── install.sh
├── setup.py
├── slam
    ├── __init__.py
    ├── analysis.py
    ├── apogee.py
    ├── binning.py
    ├── config.py
    ├── costfunction.py
    ├── data
    │   └── OpticalLineList.csv
    ├── diagnostic.py
    ├── extern
    │   ├── __init__.py
    │   ├── interpolate.py
    │   └── polynomial.py
    ├── hyperparameter.py
    ├── lndi.py
    ├── logger.py
    ├── mcmc.py
    ├── model.py
    ├── normalization.py
    ├── parallel.py
    ├── plotting.py
    ├── postprocessing.py
    ├── predict.py
    ├── slam.py
    ├── slam2.py
    ├── slam3.py
    ├── standardization.py
    ├── train.py
    ├── train2.py
    └── utils.py
└── upload.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.html
 2 | *.fits
 3 | *.aux
 4 | *.dvi
 5 | *.log
 6 | *.blg
 7 | *.pyc
 8 | *.pdf
 9 | *.eps
10 | *.png
11 | .idea
12 | .ipynb_checkpoint
13 | *.ipynb
14 | MANIFEST
15 | dist/*
16 | build/
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Bo Zhang @NAOC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## SLAM [![DOI](https://zenodo.org/badge/161135292.svg)](https://zenodo.org/badge/latestdoi/161135292)
 2 | 
 3 | Stellar LAbel Machine (SLAM) is a forward model to estimate stellar labels (e.g., Teff, logg and chemical abundances).
 4 | It is based on Support Vector Regression (SVR) which is a non-parametric regression method.
 5 | 
 6 | For details of **SLAM**, see [Deriving the stellar labels of LAMOST spectra with Stellar LAbel Machine (SLAM)](https://ui.adsabs.harvard.edu/abs/2020ApJS..246....9Z/abstract).
 7 | Related projects: click [here](https://ui.adsabs.harvard.edu/abs/2020ApJS..246....9Z/citations).
 8 | 
 9 | ## Author
10 | 
11 | Bo Zhang (bozhang@nao.cas.cn)
12 | 
13 | ## Home page
14 | 
15 | - [https://github.com/hypergravity/astroslam](https://github.com/hypergravity/astroslam)
16 | - [https://pypi.org/project/astroslam/](https://pypi.org/project/astroslam/)
17 | 
18 | ## Install
19 | - for the latest **stable** version:
20 |   - `pip install -U astroslam`
21 | - for the latest **github** version:
22 |   - `pip install -U git+git://github.com/hypergravity/astroslam`
23 | - for Zenodo version
24 |   - [https://zenodo.org/record/3461504](https://zenodo.org/record/3461504)
25 | 
26 | ## Tutorial
27 | [updated on 2020-12-02]
28 | - A new SLAM tutorial can be found [here](https://nbviewer.jupyter.org/github/hypergravity/spectroscopy/blob/main/stellar_parameters/demo_slam/demo_slam.ipynb)
29 | - If you are interested in SLAM or have any related questions, do not hesitate to contact me.
30 | 
31 | ## Requirements
32 | - numpy
33 | - scipy
34 | - matplotlib
35 | - astropy
36 | - scikit-learn
37 | - joblib
38 | - pandas
39 | - emcee
40 | 
41 | ## How to cite
42 | Paper:
43 | ```
44 | @ARTICLE{2020ApJS..246....9Z,
45 |        author = {{Zhang}, Bo and {Liu}, Chao and {Deng}, Li-Cai},
46 |         title = "{Deriving the Stellar Labels of LAMOST Spectra with the Stellar LAbel Machine (SLAM)}",
47 |       journal = {\apjs},
48 |      keywords = {Astronomical methods, Astronomy data analysis, Bayesian statistics, Stellar abundances, Chemical abundances, Fundamental parameters of stars, Catalogs, Surveys, Astrophysics - Solar and Stellar Astrophysics, Astrophysics - Astrophysics of Galaxies, Astrophysics - Instrumentation and Methods for Astrophysics},
49 |          year = 2020,
50 |         month = jan,
51 |        volume = {246},
52 |        number = {1},
53 |           eid = {9},
54 |         pages = {9},
55 |           doi = {10.3847/1538-4365/ab55ef},
56 | archivePrefix = {arXiv},
57 |        eprint = {1908.08677},
58 |  primaryClass = {astro-ph.SR},
59 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2020ApJS..246....9Z},
60 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
61 | }
62 | ```
63 | Code:
64 | ```
65 | @misc{https://doi.org/10.5281/zenodo.3461504,
66 |     author = {Zhang, Bo},
67 |     title = {hypergravity/astroslam: Stellar LAbel Machine},
68 |     doi = {10.5281/zenodo.3461504},
69 |     url = {https://zenodo.org/record/3461504},
70 |     publisher = {Zenodo},
71 |     year = {2019}
72 | }
73 | ```
74 | 
75 | For other formats, please go to [https://search.datacite.org/works/10.5281/zenodo.3461504](https://search.datacite.org/works/10.5281/zenodo.3461504).
76 | 


--------------------------------------------------------------------------------
/doc/slam_tutorial.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # A simple tutorial to Stellar LAbel Machine (SLAM)
  5 | # 
  6 | # **Bo Zhang** (<mailto:bozhang@nao.cas.cn>), Created on Thu Jan 19 15:48:12 2017
  7 | # 
  8 | # 
  9 | # 
 10 | 
 11 | # In[2]:
 12 | 
 13 | import numpy as np
 14 | import matplotlib.pyplot as plt
 15 | import os
 16 | from slam.slam import Slam
 17 | from slam.diagnostic import compare_labels
 18 | from slam.apogee import aspcapStar_read, apStar_read
 19 | from slam.normalization import normalize_spectrum, normalize_spectra_block
 20 | from slam.binning import interp_pchip, interp_linear, interp_nearest
 21 | from astropy.table import Table, Column
 22 | from joblib import Parallel, delayed, dump, load
 23 | 
 24 | os.chdir("/home/cham/PycharmProjects/slam/doc/example_DR10/Data")
 25 | 
 26 | 
 27 | # In[3]:
 28 | 
 29 | """ load catalog """
 30 | t = Table.read('../reference_labels.csv')
 31 | label3 = np.array(t['Teff_{corr}', 'logg_{corr}', '[M/H]_{corr}'].to_pandas())
 32 | 
 33 | 
 34 | # In[4]:
 35 | 
 36 | """ define functions """
 37 | def apStar_read_interp(fp, wave_interp, i_use=0):
 38 |     spec = apStar_read(fp, full=True, meta=True, verbose=False)
 39 |     wave_rest = spec['wave']#/(1+rv/299792.458)
 40 | 
 41 |     if spec.meta['NVISITS'] > 1:
 42 |         flux_interp = interp_linear(wave_rest, spec['flux'][:, i_use], wave_interp, 0)
 43 |         ivar_interp = (1./interp_linear(wave_rest, spec['flux_err'][:, i_use], wave_interp, 1E10))**2
 44 |         mask_interp = interp_nearest(wave_rest, spec['mask'][:, i_use], wave_interp, 1)
 45 |     else:
 46 |         flux_interp = interp_linear(wave_rest, spec['flux'], wave_interp, 0)
 47 |         ivar_interp = (1./interp_linear(wave_rest, spec['flux_err'], wave_interp, 1E10))**2
 48 |         mask_interp = interp_nearest(wave_rest, spec['mask'], wave_interp, 1)
 49 |     return flux_interp, ivar_interp, mask_interp
 50 | 
 51 | def apStar_read_block(fps, wave_interp, n_jobs=1, verbose=False):
 52 |     r = Parallel(n_jobs=n_jobs, verbose=verbose)(
 53 |         delayed(apStar_read_interp)(fp,wave_interp,0) for fp in fps)
 54 |     flux_block = np.array([_[0] for _ in r])
 55 |     ivar_block = np.array([_[1] for _ in r])
 56 |     mask_block = np.array([_[2] for _ in r])
 57 |     return flux_block, ivar_block, mask_block
 58 | 
 59 | def aspcapStar_read_interp(fp, wave_interp, i_use=0):
 60 |     spec = aspcapStar_read(fp, meta=True)
 61 |     return spec['flux'], 1./spec['flux_err'].data**2.
 62 | 
 63 | def aspcapStar_read_block(fps, n_jobs=1, verbose=False):
 64 |     r = Parallel(n_jobs=n_jobs, verbose=verbose)(
 65 |         delayed(aspcapStar_read_interp)(fp,wave_interp,0) for fp in fps)
 66 |     flux_block = np.array([_[0] for _ in r])
 67 |     ivar_block = np.array([_[1] for _ in r])
 68 |     ivar_block = np.where(np.isfinite(ivar_block), ivar_block, np.zeros_like(ivar_block))
 69 |     return flux_block, ivar_block
 70 | 
 71 | 
 72 | # In[5]:
 73 | 
 74 | """ define wavelength grid according to the first spectrum """
 75 | spec = aspcapStar_read(t['ID'][0], True, True)
 76 | wave_interp = spec['wave'].data
 77 | 
 78 | """ read all spectra """
 79 | flux_block, ivar_block = aspcapStar_read_block(t['ID'].data, n_jobs=-1)
 80 | 
 81 | """ normalization """
 82 | r1 = normalize_spectra_block(
 83 |     wave_interp, flux_block, (15200., 17000.), 30., p=(1E-8, 1E-7), q=0.7,
 84 |     eps=1E-19, ivar_block=flux_block > 0, rsv_frac=1., n_jobs=10, verbose=5)
 85 | flux_norm, flux_cont = r1
 86 | ivar_norm = flux_cont**2*ivar_block
 87 | 
 88 | 
 89 | # In[6]:
 90 | 
 91 | """ training """
 92 | k = Slam(wave_interp, flux_norm, ivar_norm, label3)
 93 | # train: using simple user-defined hyper-parameters
 94 | # it takes ~2 min using 32 cores
 95 | k.train_pixels(method='simple', n_jobs=24, verbose=5, C=2.0, epsilon=0.1, gamma=1.)
 96 | 
 97 | 
 98 | # In[7]:
 99 | 
100 | """ test """
101 | # here simply to test SLAM on training sample
102 | slc = slice(0, flux_norm.shape[0], 20)
103 | # 1. derive stellar parameters using template matching
104 | # it takes a few minutes
105 | label_init = k.predict_labels_quick(flux_norm[slc, :], ivar_norm[slc, :], n_jobs=24)
106 | # 2. re-fine the initial guess
107 | # it takes an hour ...
108 | label_refined = k.predict_labels_multi(label_init, flux_norm[slc, :], ivar_norm[slc, :], n_jobs=24)
109 | 
110 | 
111 | # In[10]:
112 | 
113 | """ plot results """
114 | fig = compare_labels(label3[slc, :], label_refined, 'ASPCAP', 'SLAM')
115 | 
116 | 
117 | # In[11]:
118 | 
119 | fig
120 | 
121 | 
122 | # In[ ]:
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | rm -rf build
3 | rm -rf dist
4 | #python setup.py build_ext --inplace
5 | #python setup.py install
6 | python setup.py sdist
7 | pip install --force-reinstall dist/*.tar.gz


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name='astroslam',
 8 |     version="1.2022.1228.1",
 9 |     author='Bo Zhang',
10 |     author_email='bozhang@nao.cas.cn',
11 |     description=('A forward model using SVR to estimate stellar parameters'
12 |                  ' from spectra.'),  # short description
13 |     long_description=long_description,
14 |     long_description_content_type="text/markdown",
15 |     url='http://github.com/hypergravity/astroslam',
16 |     packages=setuptools.find_packages(),
17 |     license='MIT',
18 |     classifiers=["Development Status :: 5 - Production/Stable",
19 |                  "Intended Audience :: Science/Research",
20 |                  "License :: OSI Approved :: MIT License",
21 |                  "Operating System :: OS Independent",
22 |                  "Programming Language :: Python :: 3.9",
23 |                  "Topic :: Scientific/Engineering :: Physics",
24 |                  "Topic :: Scientific/Engineering :: Astronomy"],
25 |     package_dir={'slam': 'slam',
26 |                  'extern': 'slam/extern'},
27 |     package_data={'slam': ['data/*.csv']},
28 |     # include_package_data=True,
29 |     install_requires=[
30 |         'numpy',
31 |         'scipy',
32 |         'matplotlib',
33 |         'astropy',
34 |         'laspec',
35 |         'scikit-learn==1.2.0',
36 |         'joblib==1.2.0',
37 |         'pandas',
38 |         'emcee',
39 |         'lmfit',
40 |         'ipyparallel'
41 |     ]
42 | )
43 | 


--------------------------------------------------------------------------------
/slam/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.2019.0109.4"
2 | from .slam3 import Slam3 as Slam
3 | 


--------------------------------------------------------------------------------
/slam/analysis.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Tue Jun 20 10:00:00 2017
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Tue Jun 20 10:00:00 2017
 19 | 
 20 | Aims
 21 | ----
 22 | - error analysis
 23 | 
 24 | """
 25 | 
 26 | import numpy as np
 27 | from lmfit.models import GaussianModel
 28 | from matplotlib import pyplot as plt
 29 | from scipy.optimize import minimize, curve_fit
 30 | from joblib import dump, load, Parallel, delayed
 31 | 
 32 | 
 33 | # ################ #
 34 | # likelihood fit
 35 | # ################ #
 36 | 
 37 | def lnprior(theta):
 38 |     mu, sigma = theta
 39 |     if sigma < 0:
 40 |         return -np.inf
 41 |     else:
 42 |         return 0
 43 | 
 44 | 
 45 | def lnlike(theta, data):
 46 |     mu, sigma = theta
 47 |     return np.sum(-(data - mu) ** 2 / sigma ** 2 / 2) - \
 48 |            len(data) * np.log(sigma)
 49 | 
 50 | 
 51 | def lnpost(theta, data):
 52 |     lp = lnprior(theta)
 53 |     if not np.isfinite(lp):
 54 |         return lp
 55 |     else:
 56 |         return lnlike(theta, data) + lp
 57 | 
 58 | 
 59 | def nlnpost(*args):
 60 |     return -lnpost(*args)
 61 | 
 62 | 
 63 | def gfit_mle(theta0, data):
 64 |     return minimize(nlnpost, theta0, args=(data))
 65 | 
 66 | 
 67 | def label_diff_mle(label1, label2):
 68 |     label1 = np.array(label1)
 69 |     label2 = np.array(label2)
 70 |     assert label1.shape == label2.shape
 71 | 
 72 |     n_obs, n_dim = label1.shape
 73 |     bias = np.zeros((n_dim,), dtype=float)
 74 |     scatter = np.zeros((n_dim,), dtype=float)
 75 | 
 76 |     for i_dim in range(n_dim):
 77 |         data = label1[:, i_dim] - label2[:, i_dim]
 78 |         theta0 = np.array([np.median(data), 2 * np.std(data)])
 79 |         x = gfit_mle(theta0, data)
 80 |         if x.success:
 81 |             bias[i_dim], scatter[i_dim] = x['x']
 82 |         else:
 83 |             Warning("@GFIT: not successful [i_dim={}]!".format(i_dim))
 84 |             print("------- X [i_dim={}] -------".format(i_dim))
 85 |             print(x)
 86 |             bias[i_dim], scatter[i_dim] = x['x']
 87 | 
 88 |     return bias, scatter
 89 | 
 90 | 
 91 | def test_gfit_mle():
 92 |     data = np.random.randn(10000, )
 93 |     theta0 = np.array([1., 1.])
 94 |     print(gfit_mle(theta0, data))
 95 | 
 96 | 
 97 | # ################ #
 98 | # binned fit
 99 | # ################ #
100 | 
101 | def gauss1d(x, a, b, c):
102 |     return a / (np.sqrt(2. * np.pi) * c) * np.exp(
103 |         -(x - b) ** 2 / c ** 2 / 2)
104 | 
105 | 
106 | def gauss1d_cost(theta, x, y):
107 |     a, b, c = theta
108 |     if a <= 0 or c <= 0:
109 |         return -np.inf
110 |     return gauss1d(theta, x) - y
111 | 
112 | 
113 | def gfit_bin(theta0, data):
114 |     # hist, bin_edges = np.histogram(data, bins='auto')
115 |     bins = np.arange(np.min(data), np.max(data), np.std(data)/3)
116 |     hist, bin_edges = np.histogram(data, bins=bins)
117 |     bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
118 |     # figure();
119 |     # plot(bin_centers, hist)
120 |     # return least_squares(gauss1d_cost, theta0, args=(bin_centers, hist))
121 |     return curve_fit(gauss1d, bin_centers, hist, p0=theta0,
122 |                      bounds=([0, -np.inf, 0], [np.inf, np.inf, np.inf]))
123 | 
124 | 
125 | def test_gfit_bin():
126 |     data = np.random.randn(10000, )
127 |     theta0 = np.array([1., np.median(data), 2 * np.std(data)])
128 |     theta = gfit_bin(theta0, data)[0]
129 |     print(theta)
130 | 
131 |     # hist, bin_edges = np.histogram(data, bins='auto')
132 |     # bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
133 |     # figure();
134 |     # plot(bin_centers, hist)
135 |     # plot(bin_centers, gauss1d(theta, bin_centers))
136 | 
137 | 
138 | def label_diff_bin(label1, label2, plot=False):
139 |     label1 = np.array(label1)
140 |     label2 = np.array(label2)
141 |     assert label1.shape == label2.shape
142 | 
143 |     n_obs, n_dim = label1.shape
144 |     amp = np.zeros((n_dim,), dtype=float)
145 |     bias = np.zeros((n_dim,), dtype=float)
146 |     scatter = np.zeros((n_dim,), dtype=float)
147 | 
148 |     for i_dim in range(n_dim):
149 |         data = label1[:, i_dim] - label2[:, i_dim]
150 |         # data = data[np.logical_and(data>np.percentile(data, 0), data<np.percentile(data, 100))]
151 |         theta0 = np.array([len(data), np.median(data), 1 * np.std(data)])
152 |         popt, pcov = gfit_bin(theta0, data)
153 |         amp[i_dim], bias[i_dim], scatter[i_dim] = popt
154 | 
155 |     if plot:
156 |         fig = plt.figure(figsize=(3*n_dim, 4))
157 |         for i_dim in range(n_dim):
158 |             ax = fig.add_subplot(1, n_dim, i_dim+1)
159 |             data = label1[:, i_dim] - label2[:, i_dim]
160 |             # data = data[np.logical_and(data > np.percentile(data, 0),
161 |             #                            data < np.percentile(data, 100))]
162 |             bins = np.arange(np.min(data), np.max(data), np.std(data) / 2)
163 |             hist, bin_edges = np.histogram(data, bins=bins)
164 |             bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
165 |             ax.step(bin_edges, np.hstack((hist, 0)), where='post')
166 |             ax.plot(bin_edges, gauss1d(bin_edges, amp[i_dim], bias[i_dim], scatter[i_dim]))
167 | 
168 |     return bias, scatter
169 | 
170 | 
171 | # ############################### #
172 | # binned gaussian fit using LMFIT
173 | # ############################### #
174 | 
175 | def gfit_bin_lmfit(data, bins='', bin_std=3, plot=False):
176 |     if bins == 'robust':
177 |         bins = np.arange(np.min(data), np.max(data), np.std(data)/bin_std)
178 | 
179 |     # binned statistics
180 |     hist, bin_edges = np.histogram(data, bins=bins)
181 |     bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
182 |     
183 |     # fit a gaussian model to data using LMFIT
184 |     gm = GaussianModel()
185 |     theta_guess = gm.guess(hist, x=bin_centers)
186 |     fr = gm.fit(hist, theta_guess, x=bin_centers, method="least_squares")
187 |     # fr.fit_report()
188 | 
189 |     return (fr.values['amplitude'], fr.values['center'], fr.values['sigma']),fr
190 | 
191 | 
192 | def run_mcmc(fr, **kwargs):
193 |     fr.mcmc = fr.emcee(**kwargs)
194 |     return fr
195 | 
196 | 
197 | def label_diff_lmfit(label1, label2, bins='auto', bin_std=3, plot=False,
198 |                      emcee=True):
199 |     label1 = np.array(label1)
200 |     label2 = np.array(label2)
201 |     assert label1.shape == label2.shape
202 | 
203 |     n_obs, n_dim = label1.shape
204 |     amp = np.zeros((n_dim,), dtype=float)
205 |     bias = np.zeros((n_dim,), dtype=float)
206 |     scatter = np.zeros((n_dim,), dtype=float)
207 |     frs = np.zeros((n_dim,), dtype=object)
208 | 
209 |     for i_dim in range(n_dim):
210 |         data = label2[:, i_dim] - label1[:, i_dim]
211 |         theta, frs[i_dim] = \
212 |             gfit_bin_lmfit(data, bins=bins, bin_std=bin_std, plot=False)
213 |         amp[i_dim], bias[i_dim], scatter[i_dim] = theta
214 |     params = [fr.params for fr in frs]
215 | 
216 |     if emcee:
217 |         frs = Parallel(n_jobs=-1)(delayed(run_mcmc)(
218 |             fr, steps=1000, nwalkers=50, burn=300, workers=1) for fr in frs)
219 |         for i_dim in range(n_dim):
220 |             bias[i_dim] = np.median(frs[i_dim].mcmc.flatchain["center"])
221 |             scatter[i_dim] = np.median(frs[i_dim].mcmc.flatchain["sigma"])
222 |         params = [fr.mcmc.params for fr in frs]
223 | 
224 |     histdata = []
225 |     if plot:
226 |         gm = GaussianModel()
227 |         fig = plt.figure(figsize=(3 * n_dim, 4))
228 |         for i_dim in range(n_dim):
229 |             ax = fig.add_subplot(1, n_dim, i_dim + 1)
230 |             data = label2[:, i_dim] - label1[:, i_dim]
231 | 
232 |             # binned statistics
233 |             if bins == 'robust':
234 |                 bins = np.arange(
235 |                     np.min(data), np.max(data), np.std(data) / bin_std)
236 |             hist, bin_edges = np.histogram(data, bins=bins)
237 |             histdata.append((hist, bin_edges, data))
238 |             # bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
239 |             # bin_xx = np.linspace(bin_edges[0], bin_edges[-1], 100)
240 |             ax.hist(data, bins=bin_edges, histtype='step')
241 |             ax.plot(bin_edges, gm.eval(params[i_dim], x=bin_edges))
242 |             ax.set_title("{:5f}+-{:5f}".format(bias[i_dim], scatter[i_dim]))
243 |     else:
244 |         for i_dim in range(n_dim):
245 |             data = label2[:, i_dim] - label1[:, i_dim]
246 |             hist, bin_edges = np.histogram(data, bins=bins)
247 |             histdata.append((hist, bin_edges, data))
248 | 
249 |     return bias, scatter, frs, histdata
250 | 
251 | 
252 | if __name__ == "__main__":
253 |     test_gfit_mle()
254 |     test_gfit_bin()
255 | 


--------------------------------------------------------------------------------
/slam/apogee.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sat Oct 31 12:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sat Oct 31 12:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - APOGEE utils
 23 | 
 24 | """
 25 | 
 26 | from __future__ import print_function
 27 | 
 28 | import os
 29 | import urllib
 30 | import urllib.request
 31 | from collections import OrderedDict
 32 | 
 33 | import numpy as np
 34 | from astropy.io import fits
 35 | from astropy.table import Table, Column
 36 | 
 37 | __all__ = ["apStar_url", "apStar_download", "mkdir_loop"]
 38 | 
 39 | 
 40 | def reconstruct_wcs_coord_from_fits_header(hdr, dim=1):
 41 |     """ reconstruct wcs coordinates (e.g., wavelength array) """
 42 |     # assert dim is not larger than limit
 43 |     assert dim <= hdr["NAXIS"]
 44 | 
 45 |     # get keywords
 46 |     crval = hdr["CRVAL%d" % dim]
 47 |     cdelt = hdr["CDELT%d" % dim]
 48 |     try:
 49 |         crpix = hdr["CRPIX%d" % dim]
 50 |     except KeyError:
 51 |         crpix = 1
 52 | 
 53 |     # length of the current dimension
 54 |     naxis_ = hdr["NAXIS%d" % dim]
 55 | 
 56 |     # reconstruct wcs coordinates
 57 |     coord = np.arange(1 - crpix, naxis_ + 1 - crpix) * cdelt + crval
 58 |     return coord
 59 | 
 60 | 
 61 | def apStar_read(fp, full=False, meta=False, verbose=False):
 62 |     """ read apStar fits file
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     fp: string
 67 |         file path
 68 |     full: bool
 69 |         if False, return a simple version of apStar spec.
 70 |         if True, return a full version of apStar spec.
 71 |     meta: bool
 72 |         if True, attach Primary HDU header as spec.meta (OrderedDict)
 73 |     verbose: bool:
 74 |         if True, verbose.
 75 | 
 76 |     Returns
 77 |     -------
 78 |     spec (astropy.table.Table instance)
 79 | 
 80 |     Notes
 81 |     -----
 82 |     The url of the doc for apStar files:
 83 |     https://data.sdss.org/datamodel/files/APOGEE_REDUX/APRED_VERS/APSTAR_VERS/
 84 |     TELESCOPE/LOCATION_ID/apStar.html
 85 | 
 86 |     HDU0: master header with target information
 87 |     HDU1: spectra: combined and individual
 88 |     HDU2: error spectra
 89 |     HDU3: mask spectra
 90 |     HDU4: sky spectra
 91 |     HDU5: sky error spectra
 92 |     HDU6: telluric spectra
 93 |     HDU7: telluric error spectra
 94 |     HDU8: table with LSF coefficients
 95 |     HDU9: table with RV/binary information
 96 | 
 97 |     """
 98 |     # read apStar file
 99 |     hl = fits.open(fp)
100 | 
101 |     # construct Table instance
102 |     if not full:
103 |         # not full apStar info, [wave, flux, flux_err, mask] only
104 |         spec = Table([
105 |             Column(
106 |                 10. ** reconstruct_wcs_coord_from_fits_header(hl[1].header, 1),
107 |                 "wave"),
108 |             Column(hl[1].data.T, "flux"),
109 |             Column(hl[2].data.T, "flux_err"),
110 |             Column(hl[3].data.T, "mask")])
111 |     else:
112 |         # full apStar info
113 |         spec = Table([
114 |             Column(
115 |                 10. ** reconstruct_wcs_coord_from_fits_header(hl[1].header, 1),
116 |                 "wave"),
117 |             Column(hl[1].data.T, "flux"),
118 |             Column(hl[2].data.T, "flux_err"),
119 |             Column(hl[3].data.T, "mask"),
120 |             Column(hl[4].data.T, "sky"),
121 |             Column(hl[5].data.T, "sky_err"),
122 |             Column(hl[6].data.T, "telluric"),
123 |             Column(hl[7].data.T, "telluric_err")])
124 | 
125 |     # meta data
126 |     if meta:
127 |         spec.meta = OrderedDict(hl[0].header)
128 | 
129 |     if verbose:
130 |         print("@Cham: successfully load %s ..." % fp)
131 |     return spec
132 | 
133 | 
134 | def aspcapStar_read(fp, meta=False, verbose=False):
135 |     """ read apStar fits file
136 | 
137 |     Parameters
138 |     ----------
139 |     fp: string
140 |         file path
141 |     meta: bool
142 |         if True, attach Primary HDU header as spec.meta (OrderedDict)
143 |     verbose: bool:
144 |         if True, verbose.
145 | 
146 |     Returns
147 |     -------
148 |     spec (astropy.table.Table instance)
149 | 
150 |     Notes
151 |     -----
152 |     The url of the doc for aspcapStar files:
153 |     https://data.sdss.org/datamodel/files/APOGEE_REDUX/APRED_VERS/APSTAR_VERS/
154 |     ASPCAP_VERS/RESULTS_VERS/LOCATION_ID/aspcapStar.html
155 | 
156 |     HDU0: The Primary Header
157 |     HDU1: Spectrum array
158 |     HDU2: Error array
159 |     HDU3: Best fit spectrum
160 |     HDU4: ASPCAP data table
161 | 
162 |     """
163 |     # read apStar file
164 |     hl = fits.open(fp)
165 | 
166 |     # construct Table instance
167 |     spec = Table([
168 |         Column(
169 |             10. ** reconstruct_wcs_coord_from_fits_header(hl[1].header, 1),
170 |             "wave"),
171 |         Column(hl[1].data, "flux"),
172 |         Column(hl[2].data, "flux_err"),
173 |         Column(hl[3].data, "flux_fit")])
174 | 
175 |     # meta data
176 |     if meta:
177 |         spec.meta = OrderedDict(hl[0].header)
178 | 
179 |     if verbose:
180 |         print("@Cham: successfully load %s ..." % fp)
181 |     return spec
182 | 
183 | 
184 | def test_aspcapStar_read():
185 |     fp = "/pool/sdss/apogee_dr13/aspcapStar-r6-l30e.2-2M07332578+2044059.fits"
186 |     spec = aspcapStar_read(fp, True)
187 |     spec.pprint()
188 |     print(spec.meta)
189 |     return spec
190 |     # spec = test_aspcapStar_read()
191 | 
192 | 
193 | def test_apStar_read():
194 |     fp = "/pool/sdss/apogee_dr13/apStar-r6-VESTA.fits"
195 |     spec = apStar_read(fp, True)
196 |     spec.pprint()
197 |     print(spec.meta)
198 |     return spec
199 |     # spec = test_apStar_read()
200 | 
201 | 
202 | def apStar_url(telescope, location_id, field, file_,
203 |                url_header=None):
204 |     """ apStar url generator
205 |     which in principle is able to generate file path
206 | 
207 |     Parameters
208 |     ----------
209 |     telescope: string
210 |         TELESCOPE, {"apo1m', 'apo25m'}
211 |     location_id: int
212 |         for 'apo1m', it's 1
213 |         for 'apo25m', it's like PLATE
214 |     field: string
215 |         for 'apo1m', it's 'hip'|'calibration'|...
216 |         for 'apo25m', it's non-sense
217 |     file_: string
218 |         FILE
219 |     url_header: string
220 |         if None|'sas', it's set to be
221 |         "https://data.sdss.org/sas/dr13/apogee/spectro/redux/%s/stars"%version
222 | 
223 |     Returns
224 |     -------
225 |     url: string
226 |         the url of apStar file
227 | 
228 |     Note
229 |     ----
230 |     version: string
231 |         currently it's 'r6' @20161031
232 |     """
233 | 
234 |     if url_header is None or url_header is "sas":
235 |         url_header = ("https://data.sdss.org/sas/dr13/apogee"
236 |                       "/spectro/redux/r6/stars")
237 | 
238 |     url_header = url_header.strip()
239 |     telescope = telescope.strip()
240 |     field = field.strip()
241 |     file_ = file_.strip()
242 | 
243 |     if telescope == "apo1m":
244 |         # apo1m
245 |         url = "%s/%s/%s/%s" % (url_header, telescope, field, file_)
246 |     elif telescope == "apo25m":
247 |         # apo25m
248 |         url = "%s/%s/%s/%s" % (url_header, telescope, location_id, file_)
249 |     else:
250 |         raise(ValueError("@Cham: This is not an option!"))
251 |     return url
252 | 
253 | 
254 | def aspcapStar_url(location_id, file_, url_header=None):
255 | 
256 |     """ aspcapStar url generator
257 |     which in principle is able to generate file path
258 | 
259 |     Parameters
260 |     ----------
261 |     location_id: int
262 |         for 'apo1m', it's 1
263 |         for 'apo25m', it's like PLATE
264 |     file_: string
265 |         FILE
266 |     url_header: string
267 |         if None|'sas', it's set to be
268 |         "https://data.sdss.org/sas/dr13/apogee/spectro/redux/%s/stars/l30e/l30e.2"%version
269 | 
270 |     Returns
271 |     -------
272 |     url: string
273 |     the url of apStar file
274 | 
275 |     Note
276 |     ----
277 |     version: string
278 |         currently it's 'r6' @20161031
279 |     """
280 | 
281 |     if url_header is None or url_header is "sas":
282 |         url_header = ("https://data.sdss.org/sas/dr13/apogee"
283 |                       "/spectro/redux/r6/stars/l30e/l30e.2")
284 | 
285 |     url_header = url_header.strip()
286 |     file_ = file_.strip()
287 | 
288 |     try:
289 |         url = "%s/%s/%s" % (url_header, location_id, file_)
290 |     except:
291 |         raise (ValueError("@Cham: This is not an option!"))
292 | 
293 |     return url
294 | 
295 | 
296 | def apStar_download(url, file_path, verbose=False,
297 |                     username="sdss", password="2.5-meters"):
298 |     """ apStar file downloading utils
299 |     which in principle is able to download everything from a valid url
300 | 
301 |     Parameters
302 |     ----------
303 |     url: string
304 |         the url of the target
305 |     file_path: string
306 |         the path of the file to be saved
307 |     verbose:
308 |         if True, print status
309 | 
310 |     Returns
311 |     -------
312 |     status: bool
313 |         True if success.
314 | 
315 |     """
316 | 
317 |     # # if exists, do nothing
318 |     # if os.path.exists(file_path):
319 |     #     try:
320 |     #         fits.open(file_path)
321 |     #         return True
322 |     #     except Exception:
323 |     #         pass
324 | 
325 |     # credentials for sdss
326 |     p = urllib.request.HTTPPasswordMgrWithDefaultRealm()
327 |     p.add_password(None, url, username, password)
328 |     handler = urllib.request.HTTPBasicAuthHandler(p)
329 |     opener = urllib.request.build_opener(handler)
330 |     urllib.request.install_opener(opener)
331 | 
332 |     # request data
333 |     status = True
334 |     try:
335 |         local_file_path, header = urllib.request.urlretrieve(url, file_path)
336 |     except Exception:
337 |         status = False
338 | 
339 |     # verbose
340 |     if verbose:
341 |         if status:
342 |             print("@Cham: success: {0}".format(url))
343 |         else:
344 |             print("@Cham: failed: {0}".format(url))
345 | 
346 |     return status
347 | 
348 | 
349 | def mkdir_loop(file_path, n_loop=3, verbose=True):
350 |     """ a weak version of os.makedirs()
351 |     which may avoid infinite loop
352 | 
353 |     Parameters
354 |     ----------
355 |     file_path: string
356 |         file path
357 |     n_loop: int
358 |         make n-th parent directory possible to be created
359 |     verbose: bool
360 |         if True, verbose
361 | 
362 |     Returns
363 |     -------
364 |     bool
365 | 
366 |     """
367 |     dirname = os.path.dirname(file_path)
368 |     if n_loop > 0:
369 | 
370 |         if os.path.exists(dirname):
371 |             # if dirname exists
372 |             if verbose:
373 |                 print("@Cham: [n_loop=%s] dir exists: %s " % (n_loop, dirname))
374 |             return True
375 |         elif os.path.exists(os.path.dirname(dirname)):
376 |             # if dirname doesn't exist, but dirname(dirname) exists --> mkdir
377 |             if verbose:
378 |                 print("@Cham: [n_loop=%s] mkdir: %s ..." % (n_loop, dirname))
379 |             os.mkdir(dirname)
380 |             return True
381 |         else:
382 |             # if dirname(dirname) doesn't exists
383 |             if mkdir_loop(dirname, n_loop-1):
384 |                 return mkdir_loop(file_path, n_loop)
385 |             else:
386 |                 return False
387 |     else:
388 |         if verbose:
389 |             print("@Cham: [n_loop=%s] unable to mkdir %s ..." % (n_loop, dirname))
390 |         return False
391 | 


--------------------------------------------------------------------------------
/slam/binning.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Tue Oct 04 13:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Tue Oct 04 13:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - to implement functions for binning pixels
 23 | 
 24 | """
 25 | 
 26 | from __future__ import division
 27 | 
 28 | import matplotlib.pyplot as plt
 29 | import numpy as np
 30 | from scipy.interpolate import PchipInterpolator, interp1d
 31 | 
 32 | 
 33 | def interp_pchip(wave, spec, wave_interp, extrapolate=False):
 34 |     """ interpolate for single spectrum (pchip)
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     wave: ndarray
 39 |         wavelength array
 40 |     spec: ndarray
 41 |         spectrum array
 42 |     wave_interp: ndarray
 43 |         wavelength array to be interpolated to
 44 |     extrapolate: bool
 45 |         if True, extrapolate
 46 |         if False, return NaNs for out-of-bounds pixels
 47 | 
 48 |     Returns
 49 |     -------
 50 |     spec_interp: ndarray
 51 |         interpolated spectrum
 52 | 
 53 |     """
 54 |     P = PchipInterpolator(wave, spec, extrapolate=extrapolate)
 55 |     spec_interp = P(wave_interp)
 56 |     return spec_interp
 57 | 
 58 | 
 59 | def interp_linear(wave, spec, wave_interp, fill_value=np.nan):
 60 |     """ interpolate for single spectrum (linear)
 61 | 
 62 |     Parameters
 63 |     ----------
 64 |     wave: ndarray
 65 |         wavelength array
 66 |     spec: ndarray
 67 |         spectrum array
 68 |     wave_interp: ndarray
 69 |         wavelength array to be interpolated to
 70 |     fill_value: float/nan
 71 |         fill out-of-bounds pixels with fill_value
 72 | 
 73 |     Returns
 74 |     -------
 75 |     spec_interp: ndarray
 76 |         interpolated spectrum
 77 | 
 78 |     """
 79 |     I = interp1d(wave, spec, kind='linear',
 80 |                  bounds_error=False, fill_value=fill_value)
 81 |     return I(wave_interp)
 82 | 
 83 | 
 84 | def interp_cubic(wave, spec, wave_interp, fill_value=np.nan):
 85 |     """ interpolate for single spectrum (cubic)
 86 | 
 87 |     Parameters
 88 |     ----------
 89 |     wave: ndarray
 90 |         wavelength array
 91 |     spec: ndarray
 92 |         spectrum array
 93 |     wave_interp: ndarray
 94 |         wavelength array to be interpolated to
 95 |     fill_value: float/nan
 96 |         fill out-of-bounds pixels with fill_value
 97 | 
 98 |     Returns
 99 |     -------
100 |     spec_interp: ndarray
101 |         interpolated spectrum
102 | 
103 |     """
104 |     I = interp1d(wave, spec, kind='cubic',
105 |                  bounds_error=False, fill_value=fill_value)
106 |     return I(wave_interp)
107 | 
108 | 
109 | def interp_nearest(wave, spec, wave_interp, fill_value=np.nan):
110 |     """ interpolate for single spectrum (nearest)
111 | 
112 |     Parameters
113 |     ----------
114 |     wave: ndarray
115 |         wavelength array
116 |     spec: ndarray
117 |         spectrum array
118 |     wave_interp: ndarray
119 |         wavelength array to be interpolated to
120 |     fill_value: float/nan
121 |         fill out-of-bounds pixels with fill_value
122 | 
123 |     Returns
124 |     -------
125 |     spec_interp: ndarray
126 |         interpolated spectrum
127 | 
128 |     """
129 |     I = interp1d(wave, spec, kind='nearest',
130 |                  bounds_error=False, fill_value=fill_value)
131 |     return I(wave_interp)
132 | 
133 | 
134 | def add_noise_normal(flux, snr):
135 |     """ add normal random noise for flux (single spectrum)
136 | 
137 |     Parameters
138 |     ----------
139 |     flux: ndarray
140 |         flux array
141 |     snr: float
142 |         Signal-to-Noise Ratio
143 | 
144 |     Returns
145 |     -------
146 |     flux: ndarray
147 | 
148 |     """
149 |     nsr = np.random.randn(*flux.shape) / snr
150 |     nsr = np.where((nsr < 1.) * (nsr > -1.), nsr, np.zeros_like(flux))
151 | 
152 |     return flux * (1. + nsr)
153 | 
154 | 
155 | def add_noise_gpoisson(flux, k=1.0):
156 |     """ add SCALED Poisson random noise for flux (single spectrum)
157 | 
158 |     Parameters
159 |     ----------
160 |     flux: ndarray
161 |         flux array
162 |     k: float
163 |         k times better Poisson noise, implemented in case Poisson is too noisy
164 |         default value is 1.
165 | 
166 |     Returns
167 |     -------
168 |     flux: ndarray
169 | 
170 |     """
171 |     nsr = np.random.randn(*flux.shape) / np.sqrt(np.abs(flux)) / k
172 |     nsr = np.where((nsr < 1.) * (nsr > -1.), nsr, np.zeros_like(flux))
173 | 
174 |     return flux * (1. + nsr)
175 | 
176 | 
177 | def add_noise_poisson(flux):
178 |     """ add Poisson random noise for flux (single/multi spectrum)
179 | 
180 |     Parameters
181 |     ----------
182 |     flux: ndarray
183 |         flux array
184 | 
185 |     Returns
186 |     -------
187 |     flux: ndarray
188 | 
189 |     """
190 |     return np.random.poisson(flux)
191 | 
192 | 
193 | def measure_poisson_snr(flux):
194 |     """ measure Poisson SNR  for flux
195 | 
196 |     Parameters
197 |     ----------
198 |     flux: ndarray 2D
199 |         flux
200 | 
201 |     Returns
202 |     -------
203 |     snr_med: ndarray
204 |         the median Poisson SNR of flux
205 | 
206 |     """
207 |     # Poisson SNR
208 |     snr = np.sqrt(flux)
209 |     # median Poisson SNR
210 |     snr_med = np.median(snr, axis=1)
211 | 
212 |     return snr_med
213 | 
214 | 
215 | def shift_poisson_snr(flux, snr):
216 |     """ shift Poisson SNR for flux
217 | 
218 |     Parameters
219 |     ----------
220 |     flux: ndarray 1D/2D
221 |         flux
222 |     snr: float
223 |         target snr
224 | 
225 |     Returns
226 |     -------
227 |     flux__ : ndarray 2D
228 |         flux with median SNR = snr
229 | 
230 |     """
231 | 
232 |     if flux.ndim == 1:
233 |         # 1d flux
234 |         flux = flux.reshape(1, -1)
235 |     elif flux.ndim > 2:
236 |         # >2d
237 |         raise(ValueError('The number of dimensions of input flux is larger than 2!'))
238 | 
239 |     # measure poisson SNR for flux
240 |     snr_med = measure_poisson_snr(flux)[:, None]
241 |     # determine scale
242 |     scale_ = (snr_med/snr) ** 2.
243 |     # scale flux
244 |     flux_ = flux / scale_
245 | 
246 |     if flux.ndim == 1:
247 |         flux_ = flux_.flatten()
248 | 
249 |     return flux_
250 | 
251 | 
252 | def binning_pixels(wave, flux, ivar=None, n_pixel=3):
253 |     """
254 | 
255 |     Parameters
256 |     ----------
257 |     wave: ndarray
258 |         wavelength array
259 |     flux: ndarray
260 |         flux array
261 |     ivar: ndarray
262 |         ivar array
263 |     n_pixel: int
264 |         number of pixels binned
265 | 
266 |     Returns
267 |     -------
268 |     binned_wave: ndarray
269 |         binned wavelength array
270 |     binned_flux:
271 |         binned flux array
272 |     binned_ivar:
273 |         binned ivar array
274 | 
275 |     """
276 |     assert n_pixel > 0
277 | 
278 |     # default ivar
279 |     if ivar is None:
280 |         ivar = np.ones_like(flux)
281 | 
282 |     # determine the number of binned pixels
283 |     n_binned = np.fix(len(flux) / n_pixel)
284 | 
285 |     # initialization
286 |     binned_wave = np.ones(n_binned)
287 |     binned_flux = np.ones(n_binned)
288 |     binned_ivar = np.ones(n_binned)
289 | 
290 |     # iterate for each binned pixel [wave, flux, ivar]
291 |     for i_pix in range(n_binned):
292 |         binned_wave[i_pix] = np.mean(
293 |             wave[i_pix * n_pixel:(i_pix + 1) * n_pixel])
294 |         binned_flux[i_pix] = np.mean(
295 |             flux[i_pix * n_pixel:(i_pix + 1) * n_pixel])
296 |         this_ivar_array = ivar[i_pix * n_pixel:(i_pix + 1) * n_pixel]
297 |         if np.all((this_ivar_array > 0.) * np.isfinite(this_ivar_array)):
298 |             # all pixels are good
299 |             # ################## binning method #################### #
300 |             # (err1**2 + err2**2 + ... + errn**2) / n**2 = errbin**2 #
301 |             # 1/ivar1 + 1/ivar2 + ... + 1/ivarn = n**2 /ivar         #
302 |             # --> binning n pixels with the same error               #
303 |             # --> improves SNR by a factor of sqrt(n)                #
304 |             # ###################################################### #
305 |             binned_ivar[i_pix] = n_pixel ** 2. / np.sum(1. / this_ivar_array)
306 |         else:
307 |             # bad pixel exists
308 |             binned_ivar[i_pix] = 0.
309 | 
310 |     return binned_wave, binned_flux, binned_ivar
311 | 
312 | 
313 | def test_interpolation():
314 |     x = np.arange(0., 10., 1.)
315 |     y = np.sin(x)
316 |     plt.plot(x, y, 'r')
317 |     xx = np.arange(0., 10., 0.2)
318 |     plt.plot(xx, interp_pchip(x, y, xx), 'b')
319 |     plt.plot(xx, interp_linear(x, y, xx), 'g')
320 |     plt.plot(xx, interp_cubic(x, y, xx), 'c')
321 |     plt.plot(xx, interp_nearest(x, y, xx), 'm')
322 | 
323 | 
324 | if __name__ == "__main__":
325 |     test_interpolation()
326 | 


--------------------------------------------------------------------------------
/slam/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | __all__ = ['PACKAGE_PATH', 'WELCOME', 'AUTHOR', 'PACKAGE_URL']
 5 | 
 6 | 
 7 | PACKAGE_PATH = os.path.dirname(__file__)
 8 | 
 9 | WELCOME = """
10 | Welcome to **TheKeenan**. Have fun with it!
11 | """
12 | 
13 | AUTHOR = """
14 | Author: Bo Zhang (bozhang@nao.cas.cn)
15 | """
16 | 
17 | PACKAGE_URL = """
18 | Bitbucket url: https://bitbucket.org/hypergravity/thekeenan  # currently private
19 | Github url:    https://github.com/hypergravity/thekeenan     # in prep.
20 | PYPI url:      https://pypi.python.org/pypi/thekeenan        # in prep.
21 | """
22 | 


--------------------------------------------------------------------------------
/slam/costfunction.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | Author
 5 | ------
 6 | Bo Zhang
 7 | 
 8 | Email
 9 | -----
10 | bozhang@nao.cas.cn
11 | 
12 | Created on
13 | ----------
14 | - Sat Sep 03 12:00:00 2016
15 | 
16 | Modifications
17 | -------------
18 | - Sat Sep 03 12:00:00 2016
19 | 
20 | Aims
21 | ----
22 | - define cost functions
23 |  - temporally only chi2 is implemented
24 | 
25 | """
26 | 
27 | from __future__ import division
28 | 
29 | import numpy as np
30 | from scipy.stats import chisquare
31 | 
32 | 
33 | __all__ = ['chi2_simple_1d']
34 | 
35 | 
36 | def chi2_simple_1d(spec_obs, spec_pred, ivar=None):
37 |     """ Calculate ivar-weighted chi-square for two spectra
38 | 
39 |     Parameters
40 |     ----------
41 |     spec_obs: array-like
42 |         observed flux
43 | 
44 |     spec_pred: array-like
45 |         predicted flux
46 | 
47 |     ivar: array-like or None
48 |         inverse variance / weight of pixels
49 |         if None, assume pixels are even-weighted
50 | 
51 |     """
52 |     if ivar is None:
53 |         # ivar not specified
54 |         chi2_value = np.nansum((np.array(spec_obs).flatten() - np.array(
55 |             spec_pred).flatten()) ** 2.)
56 |     else:
57 |         # ivar specified
58 |         chi2_value = np.nansum((np.array(spec_obs).flatten() - np.array(
59 |             spec_pred).flatten()) ** 2. * np.array(ivar).flatten())
60 |     # print('chi2: ', chi2_value)
61 |     return chi2_value
62 | 
63 | 
64 | def chi2(a, b):
65 |     c = chisquare([1, 2, 3], [1, 1, 1])
66 |     return c.statistic
67 | 


--------------------------------------------------------------------------------
/slam/data/OpticalLineList.csv:
--------------------------------------------------------------------------------
 1 | line,wave_air,wave_vac,comment
 2 | CaIIK,3933.663,3934.777,IS absn line
 3 | CaIIH,3968.468,3969.591,IS absn line
 4 | OIIf3726,3726.03,3727.09,
 5 | OIIf3729,3728.82,3729.88,
 6 | NeIIIf1,3868.71,3869.81,
 7 | NeIIIf2,3967.41,3968.53,
 8 | H12,3750.15,3751.22,
 9 | H11,3770.63,3771.7,
10 | H10,3797.9,3798.98,
11 | H9,3835.39,3836.48,
12 | H8,3889.05,3890.15,
13 | H_epsilon,3970.07,3971.19,
14 | H_delta,4101.76,4102.92,
15 | H_gamma,4340.47,4341.69,
16 | H_beta,4861.33,4862.69,
17 | H_alpha,6562.8,6564.61,
18 | OIIIf5007,5006.84,5008.24,
19 | OIIIf4959,4958.92,4960.3,
20 | OIIIf4363,4363.21,4364.44,
21 | NIIf6549,6548.03,6549.84,
22 | NIIf6583,6583.41,6585.23,
23 | NIIf5755,5754.64,5756.24,
24 | SIIf6717,6716.47,6718.32,
25 | SIIf6731,6730.85,6732.71,
26 | NaI1,5889.951,5891.583,IS absn line
27 | NaI2,5895.924,5897.558,IS absn line
28 | HeI1,3888.65,3889.75,
29 | HeI2,5875.67,5877.3,
30 | HeI3,6678.152,6679.996,
31 | OIf6300,6300.3,6302.04,
32 | SIIIf,6312.1,6313.8,Use with [SIII]9069 as T diag
33 | NeIIf1,6583.41,6585.23,
34 | NeIIf2,6548.03,6549.84,
35 | NeIIf3,5754.64,5756.24,
36 | OIf5577,5577.3387,5578.8874,Strong sky line
37 | Mgb1,5167.321,5168.761,
38 | Mgb1,5172.684,5174.125,
39 | Mgb1,5183.604,5185.048,
40 | ArIIIf1,7135.8,7137.8,
41 | ArIIIf2,7751.1,7753.2,
42 | CaII1,8498.03,8500.36,CaII triplet
43 | CaII2,8542.09,8544.44,CaII triplet
44 | CaII3,8662.14,8664.52,CaII triplet
45 | SIIIf1,9068.6,9071.1,Use with [SIII]6312 as Tdiag
46 | SIIIf2,9530.6,9533.2,
47 | P11,8862.783,8865.217,
48 | P10,9014.91,9017.385,
49 | P9,9229.014,9231.547,
50 | P8,9545.972,9548.59,
51 | P7,10049.373,10052.128,
52 | P_gamma,10938.095,10941.091,
53 | P_beta,12818.08,12821.59,
54 | P_alpha,18751.01,18756.13,
55 | DIB4430,4430,4431.2,
56 | DIB5780,5780,5781.6,
57 | DIB5797,5797,5798.6,
58 | DIB6284,6284,6285.7,
59 | 


--------------------------------------------------------------------------------
/slam/diagnostic.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Tue Sep 06 21:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Tue Sep 06 21:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - diagnostics of tr/test_labels
 23 | - diagnostics of fitted spectra
 24 | 
 25 | """
 26 | 
 27 | import numpy as np
 28 | import matplotlib.pyplot as plt
 29 | 
 30 | from .predict import predict_pixel_for_diagnostic
 31 | 
 32 | 
 33 | __all__ = ['compare_labels', 'compare_spectra']
 34 | 
 35 | 
 36 | def compare_labels(label1, label2, labelname1='Label1', labelname2='Label2',
 37 |                    figsize=None, figpath=None, ):
 38 |     """ compare two sets of labels
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     label1 : ndarray (n_obs, n_dim)
 43 |         label set 1 (X)
 44 |     label2 : ndarray (n_obs, n_dim)
 45 |         label set 2 (Y)
 46 |     labelname1: string
 47 |         name of label1
 48 |     labelname2: string
 49 |         name of label2
 50 |     figsize : tuple of float
 51 |         figure size
 52 |     figpath : string
 53 |         filepath of this figure
 54 | 
 55 |     """
 56 | 
 57 |     assert label1.shape == label2.shape
 58 |     n_obs, n_dim = label1.shape
 59 | 
 60 |     # default figsize
 61 |     if figsize is None:
 62 |         figsize = (3.2 * n_dim, 6)
 63 | 
 64 |     # draw figure
 65 |     fig = plt.figure(figsize=figsize)
 66 |     for i in range(n_dim):
 67 |         x, y = label1[:, i], label2[:, i]
 68 |         xy = np.stack([x, y])
 69 |         xlim = (np.min(xy), np.max(xy))
 70 | 
 71 |         # diagnal plot
 72 |         ax = fig.add_subplot(2, n_dim, i + 1)
 73 |         ax.plot(x, y, 'b.')
 74 |         ax.plot(xlim, xlim, 'k--')
 75 |         ax.set_xlim(xlim)
 76 |         ax.set_ylim(xlim)
 77 |         ax.set_xlabel('%s : %s' % (labelname1, i))
 78 |         ax.set_ylabel('%s : %s' % (labelname2, i))
 79 | 
 80 |         # diff plot
 81 |         ax = fig.add_subplot(2, n_dim, i + 1 + n_dim)
 82 |         ax.plot(x, y - x, 'b.')
 83 |         ax.plot(xlim, [0., 0.], 'k--')
 84 |         ax.set_xlim(xlim)
 85 |         ax.set_xlabel('%s : %s' % (labelname1, i))
 86 |         ax.set_ylabel('%s : %s - %s : %s' % (labelname2, i, labelname1, i))
 87 | 
 88 |     fig.tight_layout()
 89 | 
 90 |     if figpath is not None:
 91 |         fig.savefig(figpath)
 92 | 
 93 |     return fig
 94 | 
 95 | 
 96 | def compare_spectra(spectra1, spectra2=None, ofst_step=0.2, wave=None,
 97 |                     mediannorm=False, figsize=(10, 6), plt_max=100):
 98 |     """ compare one/two spectra set """
 99 |     n_spec = spectra1.shape[0]
100 | 
101 |     # if mediannorm is a float, scale spectra to median*
102 |     if isinstance(mediannorm, float):
103 |         for i in range(n_spec):
104 |             spectra1[i] /= np.nanmedian(spectra1[i]) * mediannorm
105 |         if spectra2 is not None:
106 |             for i in range(n_spec):
107 |                 spectra2[i] /= np.nanmedian(spectra2[i]) * mediannorm
108 | 
109 |     # plot the figure
110 |     fig = plt.figure(figsize=figsize)
111 |     fig.add_subplot(111)
112 |     if wave is None:
113 |         for i in range(n_spec):
114 |             ofst = i * ofst_step
115 |             plt.plot(spectra1[i] + ofst, 'b')
116 |             if spectra2 is not None:
117 |                 plt.plot(spectra2[i] + ofst, 'r')
118 |     else:
119 |         for i in range(n_spec):
120 |             ofst = i * ofst_step
121 |             plt.plot(wave, spectra1[i] + ofst, 'b')
122 |             if spectra2 is not None:
123 |                 plt.plot(wave, spectra2[i] + ofst, 'r')
124 | 
125 |     return fig
126 | 
127 | 
128 | def single_pixel_diagnostic(svrs,
129 |                             i_pixel,
130 |                             test_labels,
131 |                             diag_dim=(0,),
132 |                             labels_scaler=None,
133 |                             flux_scaler=None):
134 |     """ diagnostic a single pixel in 1D/2D
135 | 
136 |     Parameters
137 |     ----------
138 |     svrs: list of sklearn.smv.SVR instance
139 |         k.svrs
140 |     i_pixel: int
141 |         No. of pixel that will be in diagnostic
142 |     test_labels: ndarray ( n, ndim )
143 |         test labels
144 |     diag_dim: tuple/list
145 |         diagnostic dimensions, e.g., (0, 1)
146 |     labels_scaler:
147 |         scaler for labels, e.g., k.tr_labels_scaler
148 |     flux_scaler:
149 |         scaler for flux, e.g., k.tr_flux_scaler
150 | 
151 |     Returns
152 |     -------
153 |     [X, (Y,) flux]
154 | 
155 |     """
156 |     # assertions
157 |     assert 1 <= len(diag_dim) <= 2
158 |     for dim_ in diag_dim:
159 |         assert 0 <= dim_ <= test_labels.shape[1]
160 | 
161 |     # draw scaling parameters for this pixel
162 |     if flux_scaler is None:
163 |         flux_mean_ = 0.
164 |         flux_scale_ = 1.
165 |     else:
166 |         flux_mean_ = flux_scaler.mean_[i_pixel]
167 |         flux_scale_ = flux_scaler.scale_[i_pixel]
168 | 
169 |     # prdict flux for this pixel
170 |     pixel_flux = predict_pixel_for_diagnostic(
171 |         svrs[i_pixel], test_labels,
172 |         labels_scaler=labels_scaler,
173 |         flux_mean_=flux_mean_,
174 |         flux_scale_=flux_scale_)
175 | 
176 |     result = []
177 |     for dim_ in diag_dim:
178 |         result.append(test_labels[:, dim_])
179 |     result.append(pixel_flux.flatten())
180 | 
181 |     # return [X, (Y,) flux]
182 |     return result
183 | 


--------------------------------------------------------------------------------
/slam/extern/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['interpolate', 'polynomial']
2 | 


--------------------------------------------------------------------------------
/slam/extern/interpolate.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | from __future__ import absolute_import, division
   3 | 
   4 | import numpy as np
   5 | import scipy.signal
   6 | 
   7 | # import scipy.sparse.linalg  # @UnusedImport
   8 | import scipy.sparse as sparse
   9 | from numpy import ones, zeros, prod, sin, diff, pi, inf, vstack, linspace
  10 | from scipy.interpolate import BPoly, interp1d
  11 | 
  12 | # from wafo import polynomial as pl
  13 | from . import polynomial as pl
  14 | 
  15 | __all__ = [
  16 |     'PPform', 'savitzky_golay', 'savitzky_golay_piecewise', 'sgolay2d',
  17 |     'SmoothSpline', 'pchip_slopes', 'slopes', 'stineman_interp', 'Pchip',
  18 |     'StinemanInterp', 'CubicHermiteSpline']
  19 | 
  20 | 
  21 | def savitzky_golay(y, window_size, order, deriv=0):
  22 |     """Smooth (and optionally differentiate) data with a Savitzky-Golay filter.
  23 |     The Savitzky-Golay filter removes high frequency noise from data.
  24 |     It has the advantage of preserving the original shape and
  25 |     features of the signal better than other types of filtering
  26 |     approaches, such as moving averages techhniques.
  27 | 
  28 |     Parameters
  29 |     ----------
  30 |     y : array_like, shape (N,)
  31 |         the values of the time history of the signal.
  32 |     window_size : int
  33 |         the length of the window. Must be an odd integer number.
  34 |     order : int
  35 |         the order of the polynomial used in the filtering.
  36 |         Must be less then `window_size` - 1.
  37 |     deriv: int
  38 |         order of the derivative to compute (default = 0 means only smoothing)
  39 | 
  40 |     Returns
  41 |     -------
  42 |     ys : ndarray, shape (N)
  43 |         the smoothed signal (or it's n-th derivative).
  44 | 
  45 |     Notes
  46 |     -----
  47 |     The Savitzky-Golay is a type of low-pass filter, particularly
  48 |     suited for smoothing noisy data. The test_doctstrings idea behind this
  49 |     approach is to make for each point a least-square fit with a
  50 |     polynomial of high order over a odd-sized window centered at
  51 |     the point.
  52 | 
  53 |     Examples
  54 |     --------
  55 |     >>> t = np.linspace(-4, 4, 500)
  56 |     >>> noise = np.random.normal(0, 0.05, t.shape)
  57 |     >>> noise = 0.4*np.sin(100*t)
  58 |     >>> y = np.exp( -t**2 ) + noise
  59 |     >>> ysg = savitzky_golay(y, window_size=31, order=4)
  60 |     >>> np.allclose(ysg[:10],
  61 |     ... [-0.00127789, -0.02390299, -0.04444364, -0.01738837,  0.00585856,
  62 |     ...  -0.01675704, -0.03140276,  0.00010455,  0.02099063, -0.00380031])
  63 |     True
  64 | 
  65 |     import matplotlib.pyplot as plt
  66 |     h=plt.plot(t, y, label='Noisy signal')
  67 |     h=plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
  68 |     h=plt.plot(t, ysg, 'r', label='Filtered signal')
  69 |     h=plt.legend()
  70 |     plt.show()
  71 | 
  72 |     References
  73 |     ----------
  74 |     .. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of
  75 |        Data by Simplified Least Squares Procedures. Analytical
  76 |        Chemistry, 1964, 36 (8), pp 1627-1639.
  77 |     .. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing
  78 |        W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
  79 |        Cambridge University Press ISBN-13: 9780521880688
  80 |     """
  81 |     try:
  82 |         window_size = np.abs(np.int(window_size))
  83 |         order = np.abs(np.int(order))
  84 |     except ValueError:
  85 |         raise ValueError("window_size and order have to be of type int")
  86 |     if window_size % 2 != 1 or window_size < 1:
  87 |         raise TypeError("window_size size must be a positive odd number")
  88 |     if window_size < order + 2:
  89 |         raise TypeError("window_size is too small for the polynomials order")
  90 |     order_range = range(order + 1)
  91 |     half_window = (window_size - 1) // 2
  92 |     # precompute coefficients
  93 |     b = np.mat([[k ** i for i in order_range]
  94 |                 for k in range(-half_window, half_window + 1)])
  95 |     m = np.linalg.pinv(b).A[deriv]
  96 |     # pad the signal at the extremes with
  97 |     # values taken from the signal itself
  98 |     firstvals = y[0] - np.abs(y[1:half_window + 1][::-1] - y[0])
  99 |     lastvals = y[-1] + np.abs(y[-half_window - 1:-1][::-1] - y[-1])
 100 |     y = np.concatenate((firstvals, y, lastvals))
 101 |     return np.convolve(m, y, mode='valid')
 102 | 
 103 | 
 104 | def savitzky_golay_piecewise(xvals, data, kernel=11, order=4):
 105 |     '''
 106 |     One of the most popular applications of S-G filter, apart from smoothing
 107 |     UV-VIS and IR spectra, is smoothing of curves obtained in electroanalytical
 108 |     experiments. In cyclic voltammetry, voltage (being the abcissa) changes
 109 |     like a triangle wave. And in the signal there are cusps at the turning
 110 |     points (at switching potentials) which should never be smoothed.
 111 |     In this case, Savitzky-Golay smoothing should be
 112 |     done piecewise, ie. separately on pieces monotonic in x
 113 | 
 114 |     Example
 115 |     -------
 116 |     >>> import numpy as np
 117 |     >>> import matplotlib.pyplot as plt
 118 |     >>> n = 1e3
 119 |     >>> x = np.linspace(0, 25, n)
 120 |     >>> y = np.round(sin(x))
 121 |     >>> sig2 = linspace(0,0.5,50)
 122 | 
 123 |     # As an example, this figure shows the effect of an additive noise with a
 124 |     # variance of 0.2 (original signal (black), noisy signal (red) and filtered
 125 |     # signal (blue dots)).
 126 |     >>> noise = np.sqrt(0.2)*np.random.randn(*x.shape)
 127 |     >>> noise = np.sqrt(0.2)*np.sin(1000*x)
 128 |     >>> yn = y + noise
 129 |     >>> yr = savitzky_golay_piecewise(x, yn, kernel=11, order=4)
 130 |     >>> np.allclose(yr[:10],
 131 |     ...    [-0.02708216, -0.04295155, -0.08522043, -0.13995016, -0.1908162 ,
 132 |     ...     -0.22938387, -0.26932722, -0.30614865, -0.33942134, -0.3687596 ])
 133 |     True
 134 | 
 135 |     h=plt.plot(x, yn, 'r', x, y, 'k', x, yr, 'b.')
 136 |     '''
 137 |     turnpoint = 0
 138 |     last = len(xvals)
 139 |     if xvals[1] > xvals[0]:  # x is increasing?
 140 |         for i in range(1, last):  # yes
 141 |             if xvals[i] < xvals[i - 1]:  # search where x starts to fall
 142 |                 turnpoint = i
 143 |                 break
 144 |     else:  # no, x is decreasing
 145 |         for i in range(1, last):  # search where it starts to rise
 146 |             if xvals[i] > xvals[i - 1]:
 147 |                 turnpoint = i
 148 |                 break
 149 |     if turnpoint == 0:  # no change in direction of x
 150 |         return savitzky_golay(data, kernel, order)
 151 |     else:
 152 |         # smooth the first piece
 153 |         firstpart = savitzky_golay(data[0:turnpoint], kernel, order)
 154 |         # recursively smooth the rest
 155 |         rest = savitzky_golay_piecewise(
 156 |             xvals[turnpoint:], data[turnpoint:], kernel, order)
 157 |         return np.concatenate((firstpart, rest))
 158 | 
 159 | 
 160 | def sgolay2d(z, window_size, order, derivative=None):
 161 |     """
 162 |     Savitsky - Golay filters can also be used to smooth two dimensional data
 163 |     affected by noise. The algorithm is exactly the same as for the one
 164 |     dimensional case, only the math is a bit more tricky. The basic algorithm
 165 |     is as follow: for each point of the two dimensional matrix extract a sub
 166 |     - matrix, centered at that point and with a size equal to an odd number
 167 |     "window_size". for this sub - matrix compute a least - square fit of a
 168 |     polynomial surface, defined as
 169 |     p(x, y) = a0 + a1 * x + a2 * y + a3 * x2 + a4 * y2 + a5 * x * y + ... .
 170 | 
 171 |     Note that x and y are equal to zero at the central point.
 172 |     replace the initial central point with the value computed with the fit.
 173 |     Note that because the fit coefficients are linear with respect to the data
 174 |     spacing, they can pre - computed for efficiency. Moreover, it is important
 175 |     to appropriately pad the borders of the data, with a mirror image of the
 176 |     data itself, so that the evaluation of the fit at the borders of the data
 177 |     can happen smoothly.
 178 |     Here is the code for two dimensional filtering.
 179 | 
 180 |     Example
 181 |     -------
 182 |     # create some sample twoD data
 183 |     >>> x = np.linspace(-3,3,100)
 184 |     >>> y = np.linspace(-3,3,100)
 185 |     >>> X, Y = np.meshgrid(x,y)
 186 |     >>> Z = np.exp( -(X**2+Y**2))
 187 | 
 188 |     # add noise
 189 |     >>> noise = np.random.normal( 0, 0.2, Z.shape )
 190 |     >>> noise = np.sqrt(0.2) * np.sin(100*X)*np.sin(100*Y)
 191 |     >>> Zn = Z + noise
 192 | 
 193 |     # filter it
 194 |     >>> Zf = sgolay2d( Zn, window_size=29, order=4)
 195 |     >>> np.allclose(Zf[:3,:5],
 196 |     ...  [[ 0.29304073,  0.29749652,  0.29007645,  0.2695685 ,  0.23541966],
 197 |     ...    [ 0.29749652,  0.29819304,  0.28766723,  0.26524542,  0.23081572],
 198 |     ...    [ 0.29007645,  0.28766723,  0.27483445,  0.25141198,  0.21769662]])
 199 |     True
 200 | 
 201 |     # do some plotting
 202 |     import matplotlib.pyplot as plt
 203 |     h=plt.matshow(Z)
 204 |     h=plt.matshow(Zn)
 205 |     h=plt.matshow(Zf)
 206 |     """
 207 |     # number of terms in the polynomial expression
 208 |     n_terms = (order + 1) * (order + 2) / 2.0
 209 | 
 210 |     if window_size % 2 == 0:
 211 |         raise ValueError('window_size must be odd')
 212 | 
 213 |     if window_size ** 2 < n_terms:
 214 |         raise ValueError('order is too high for the window size')
 215 | 
 216 |     half_size = window_size // 2
 217 | 
 218 |     # exponents of the polynomial.
 219 |     # p(x,y) = a0 + a1*x + a2*y + a3*x^2 + a4*y^2 + a5*x*y + ...
 220 |     # this line gives a list of two item tuple. Each tuple contains
 221 |     # the exponents of the k-th term. First element of tuple is for x
 222 |     # second element for y.
 223 |     # Ex. exps = [(0,0), (1,0), (0,1), (2,0), (1,1), (0,2), ...]
 224 |     exps = [(k - n, n) for k in range(order + 1) for n in range(k + 1)]
 225 | 
 226 |     # coordinates of points
 227 |     ind = np.arange(-half_size, half_size + 1, dtype=np.float)
 228 |     dx = np.repeat(ind, window_size)
 229 |     dy = np.tile(ind, [window_size, 1]).reshape(window_size ** 2, )
 230 | 
 231 |     # build matrix of system of equation
 232 |     A = np.empty((window_size ** 2, len(exps)))
 233 |     for i, exp in enumerate(exps):
 234 |         A[:, i] = (dx ** exp[0]) * (dy ** exp[1])
 235 | 
 236 |     # pad input array with appropriate values at the four borders
 237 |     new_shape = z.shape[0] + 2 * half_size, z.shape[1] + 2 * half_size
 238 |     Z = np.zeros((new_shape))
 239 |     # top band
 240 |     band = z[0, :]
 241 |     Z[:half_size, half_size:-half_size] = band - \
 242 |                                           np.abs(np.flipud(
 243 |                                               z[1:half_size + 1, :]) - band)
 244 |     # bottom band
 245 |     band = z[-1, :]
 246 |     Z[-half_size:, half_size:-half_size] = band + \
 247 |                                            np.abs(np.flipud(
 248 |                                                z[-half_size - 1:-1, :]) - band)
 249 |     # left band
 250 |     band = np.tile(z[:, 0].reshape(-1, 1), [1, half_size])
 251 |     Z[half_size:-half_size, :half_size] = band - \
 252 |                                           np.abs(np.fliplr(
 253 |                                               z[:, 1:half_size + 1]) - band)
 254 |     # right band
 255 |     band = np.tile(z[:, -1].reshape(-1, 1), [1, half_size])
 256 |     Z[half_size:-half_size, -half_size:] = band + \
 257 |                                            np.abs(np.fliplr(
 258 |                                                z[:, -half_size - 1:-1]) - band)
 259 |     # central band
 260 |     Z[half_size:-half_size, half_size:-half_size] = z
 261 | 
 262 |     # top left corner
 263 |     band = z[0, 0]
 264 |     Z[:half_size, :half_size] = band - \
 265 |                                 np.abs(
 266 |                                     np.flipud(np.fliplr(z[1:half_size + 1,
 267 |                                                         1:half_size + 1])) - band)
 268 |     # bottom right corner
 269 |     band = z[-1, -1]
 270 |     Z[-half_size:, -half_size:] = band + \
 271 |                                   np.abs(np.flipud(np.fliplr(
 272 |                                       z[-half_size - 1:-1,
 273 |                                       -half_size - 1:-1])) -
 274 |                                          band)
 275 | 
 276 |     # top right corner
 277 |     band = Z[half_size, -half_size:]
 278 |     Z[:half_size, -half_size:] = band - \
 279 |                                  np.abs(
 280 |                                      np.flipud(
 281 |                                          Z[half_size + 1:2 * half_size + 1,
 282 |                                          -half_size:]) - band)
 283 |     # bottom left corner
 284 |     band = Z[-half_size:, half_size].reshape(-1, 1)
 285 |     Z[-half_size:, :half_size] = band - \
 286 |                                  np.abs(
 287 |                                      np.fliplr(Z[-half_size:,
 288 |                                                half_size + 1:2 * half_size + 1]) - band)
 289 | 
 290 |     # solve system and convolve
 291 |     if derivative is None:
 292 |         m = np.linalg.pinv(A)[0].reshape((window_size, -1))
 293 |         return scipy.signal.fftconvolve(Z, m, mode='valid')
 294 |     elif derivative == 'col':
 295 |         c = np.linalg.pinv(A)[1].reshape((window_size, -1))
 296 |         return scipy.signal.fftconvolve(Z, -c, mode='valid')
 297 |     elif derivative == 'row':
 298 |         r = np.linalg.pinv(A)[2].reshape((window_size, -1))
 299 |         return scipy.signal.fftconvolve(Z, -r, mode='valid')
 300 |     elif derivative == 'both':
 301 |         c = np.linalg.pinv(A)[1].reshape((window_size, -1))
 302 |         r = np.linalg.pinv(A)[2].reshape((window_size, -1))
 303 |         return (scipy.signal.fftconvolve(Z, -r, mode='valid'),
 304 |                 scipy.signal.fftconvolve(Z, -c, mode='valid'))
 305 | 
 306 | 
 307 | class PPform(object):
 308 |     """The ppform of the piecewise polynomials
 309 |                     is given in terms of coefficients and breaks.
 310 |     The polynomial in the ith interval is
 311 |         x_{i} <= x < x_{i+1}
 312 | 
 313 |     S_i = sum(coefs[m,i]*(x-breaks[i])^(k-m), m=0..k)
 314 |     where k is the degree of the polynomial.
 315 | 
 316 |     Example
 317 |     -------
 318 |     >>> import matplotlib.pyplot as plt
 319 |     >>> coef = np.array([[1,1]]) # unit step function
 320 |     >>> coef = np.array([[1,1],[0,1]]) # linear from 0 to 2
 321 |     >>> coef = np.array([[1,1],[1,1],[0,2]]) # linear from 0 to 2
 322 |     >>> breaks = [0,1,2]
 323 |     >>> self = PPform(coef, breaks)
 324 |     >>> x = linspace(-1, 3, 21)
 325 |     >>> y = self(x)
 326 |     >>> np.allclose(y, [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.24,  0.56,
 327 |     ...    0.96, 1.44,  2.  ,  2.24,  2.56,  2.96,  3.44,  4.  ,  0.  ,  0.  ,
 328 |     ...     0.  ,  0.  ,  0.  ])
 329 |     True
 330 | 
 331 |     h=plt.plot(x, y)
 332 |     """
 333 | 
 334 |     def __init__(self, coeffs, breaks, fill=0.0, sort=False, a=None, b=None):
 335 |         if sort:
 336 |             self.breaks = np.sort(breaks)
 337 |         else:
 338 |             self.breaks = np.asarray(breaks)
 339 |         if a is None:
 340 |             a = self.breaks[0]
 341 |         if b is None:
 342 |             b = self.breaks[-1]
 343 |         self.coeffs = np.asarray(coeffs)
 344 |         self.order = self.coeffs.shape[0]
 345 |         self.fill = fill
 346 |         self.a = a
 347 |         self.b = b
 348 | 
 349 |     def __call__(self, xnew):
 350 |         saveshape = np.shape(xnew)
 351 |         xnew = np.ravel(xnew)
 352 |         res = np.empty_like(xnew)
 353 |         mask = (self.a <= xnew) & (xnew <= self.b)
 354 |         res[~mask] = self.fill
 355 |         xx = xnew.compress(mask)
 356 |         indxs = np.searchsorted(self.breaks[:-1], xx) - 1
 357 |         indxs = indxs.clip(0, len(self.breaks))
 358 |         pp = self.coeffs
 359 |         dx = xx - self.breaks.take(indxs)
 360 | 
 361 |         v = pp[0, indxs]
 362 |         for i in range(1, self.order):
 363 |             v = dx * v + pp[i, indxs]
 364 |         values = v
 365 | 
 366 |         res[mask] = values
 367 |         res.shape = saveshape
 368 |         return res
 369 | 
 370 |     def linear_extrapolate(self, output=True):
 371 |         '''
 372 |         Return 1D PPform which extrapolate linearly outside its basic interval
 373 |         '''
 374 | 
 375 |         max_order = 2
 376 | 
 377 |         if self.order <= max_order:
 378 |             if output:
 379 |                 return self
 380 |             else:
 381 |                 return
 382 |         breaks = self.breaks.copy()
 383 |         coefs = self.coeffs.copy()
 384 |         # pieces = len(breaks) - 1
 385 | 
 386 |         # Add new breaks beyond each end
 387 |         breaks2add = breaks[[0, -1]] + np.array([-1, 1])
 388 |         newbreaks = np.hstack([breaks2add[0], breaks, breaks2add[1]])
 389 | 
 390 |         dx = newbreaks[[0, -2]] - breaks[[0, -2]]
 391 | 
 392 |         dx = dx.ravel()
 393 | 
 394 |         # Get coefficients for the new last polynomial piece (a_n)
 395 |         # by just relocate the previous last polynomial and
 396 |         # then set all terms of order > maxOrder to zero
 397 | 
 398 |         a_nn = coefs[:, -1]
 399 |         dxN = dx[-1]
 400 | 
 401 |         a_n = pl.polyreloc(a_nn, -dxN)  # Relocate last polynomial
 402 |         # set to zero all terms of order > maxOrder
 403 |         a_n[0:self.order - max_order] = 0
 404 | 
 405 |         # Get the coefficients for the new first piece (a_1)
 406 |         # by first setting all terms of order > maxOrder to zero and then
 407 |         # relocate the polynomial.
 408 | 
 409 |         # Set to zero all terms of order > maxOrder, i.e., not using them
 410 |         a_11 = coefs[self.order - max_order::, 0]
 411 |         dx1 = dx[0]
 412 | 
 413 |         a_1 = pl.polyreloc(a_11, -dx1)  # Relocate first polynomial
 414 |         a_1 = np.hstack([zeros(self.order - max_order), a_1])
 415 | 
 416 |         newcoefs = np.hstack([a_1.reshape(-1, 1), coefs, a_n.reshape(-1, 1)])
 417 |         if output:
 418 |             return PPform(newcoefs, newbreaks, a=-inf, b=inf)
 419 |         else:
 420 |             self.coeffs = newcoefs
 421 |             self.breaks = newbreaks
 422 |             self.a = -inf
 423 |             self.b = inf
 424 | 
 425 |     def derivative(self):
 426 |         """
 427 |         Return first derivative of the piecewise polynomial
 428 |         """
 429 | 
 430 |         cof = pl.polyder(self.coeffs)
 431 |         brks = self.breaks.copy()
 432 |         return PPform(cof, brks, fill=self.fill)
 433 | 
 434 |     def integrate(self):
 435 |         """
 436 |         Return the indefinite integral of the piecewise polynomial
 437 |         """
 438 |         cof = pl.polyint(self.coeffs)
 439 | 
 440 |         pieces = len(self.breaks) - 1
 441 |         if 1 < pieces:
 442 |             # evaluate each integrated polynomial at the right endpoint of its
 443 |             # interval
 444 |             xs = diff(self.breaks[:-1, ...], axis=0)
 445 |             index = np.arange(pieces - 1)
 446 | 
 447 |             vv = xs * cof[0, index]
 448 |             k = self.order
 449 |             for i in range(1, k):
 450 |                 vv = xs * (vv + cof[i, index])
 451 | 
 452 |             cof[-1] = np.hstack((0, vv)).cumsum()
 453 | 
 454 |         return PPform(cof, self.breaks, fill=self.fill)
 455 | 
 456 | 
 457 | # def fromspline(self, xk, cvals, order, fill=0.0):
 458 | #         N = len(xk) - 1
 459 | #         sivals = np.empty((order + 1, N), dtype=float)
 460 | #         for m in range(order, -1, -1):
 461 | #             fact = spec.gamma(m + 1)
 462 | #             res = _fitpack._bspleval(xk[:-1], xk, cvals, order, m)
 463 | #             res /= fact
 464 | #             sivals[order - m, :] = res
 465 | #         return self(sivals, xk, fill=fill)
 466 | 
 467 | 
 468 | class SmoothSpline(PPform):
 469 |     """
 470 |     Cubic Smoothing Spline.
 471 | 
 472 |     Parameters
 473 |     ----------
 474 |     x : array-like
 475 |         x-coordinates of data. (vector)
 476 |     y : array-like
 477 |         y-coordinates of data. (vector or matrix)
 478 |     p : real scalar
 479 |         smoothing parameter between 0 and 1:
 480 |         0 -> LS-straight line
 481 |         1 -> cubic spline interpolant
 482 |     lin_extrap : bool
 483 |         if False regular smoothing spline
 484 |         if True a smoothing spline with a constraint on the ends to
 485 |         ensure linear extrapolation outside the range of the data (default)
 486 |     var : array-like
 487 |         variance of each y(i) (default  1)
 488 | 
 489 |     Returns
 490 |     -------
 491 |     pp : ppform
 492 |         If xx is not given, return self-form of the spline.
 493 | 
 494 |     Given the approximate values
 495 | 
 496 |         y(i) = g(x(i))+e(i)
 497 | 
 498 |     of some smooth function, g, where e(i) is the error. SMOOTH tries to
 499 |     recover g from y by constructing a function, f, which  minimizes
 500 | 
 501 |       p * sum (Y(i) - f(X(i)))^2/d2(i)  +  (1-p) * int (f'')^2
 502 | 
 503 | 
 504 |     Example
 505 |     -------
 506 |     >>> import numpy as np
 507 |     >>> import matplotlib.pyplot as plt
 508 |     >>> x = np.linspace(0, 1, 21)
 509 |     >>> noise = 1e-1*np.random.randn(x.size)
 510 |     >>> noise = np.array(
 511 |     ...    [-0.03298601, -0.08164429, -0.06845745, -0.20718593,  0.08666282,
 512 |     ...     0.04702094,  0.08208645, -0.1017021 , -0.03031708,  0.22871709,
 513 |     ...    -0.10302486, -0.17724316, -0.05885157, -0.03875947, -0.1102984 ,
 514 |     ...    -0.05542001, -0.12717549,  0.14337697, -0.02637848, -0.10353976,
 515 |     ...    -0.0618834 ])
 516 | 
 517 |     >>> y = np.exp(x) + noise
 518 |     >>> pp9 = SmoothSpline(x, y, p=.9)
 519 |     >>> pp99 = SmoothSpline(x, y, p=.99, var=0.01)
 520 | 
 521 |     >>> y99 = pp99(x); y9 = pp9(x)
 522 |     >>> np.allclose(y9,
 523 |     ...    [ 0.8754795 ,  0.95285289,  1.03033239,  1.10803792,  1.18606854,
 524 |     ...     1.26443234,  1.34321265,  1.42258227,  1.5027733 ,  1.58394785,
 525 |     ...     1.66625727,  1.74998243,  1.8353173 ,  1.92227431,  2.01076693,
 526 |     ...     2.10064087,  2.19164551,  2.28346334,  2.37573696,  2.46825194,
 527 |     ...     2.56087699])
 528 |     True
 529 |     >>> np.allclose(y99,
 530 |     ...     [ 0.95227461,  0.97317995,  1.01159244,  1.08726908,  1.21260587,
 531 |     ...     1.31545644,  1.37829108,  1.42719649,  1.51308685,  1.59669367,
 532 |     ...     1.61486217,  1.64481078,  1.72970022,  1.83208819,  1.93312796,
 533 |     ...     2.05164767,  2.19326122,  2.34608425,  2.45023567,  2.5357288 ,
 534 |     ...     2.6357401 ])
 535 |     True
 536 | 
 537 | 
 538 |     h=plt.plot(x,y, x,pp99(x),'g', x,pp9(x),'k', x,np.exp(x),'r')
 539 | 
 540 |     See also
 541 |     --------
 542 |     lc2tr, dat2tr
 543 | 
 544 | 
 545 |     References
 546 |     ----------
 547 |     Carl de Boor (1978)
 548 |     'Practical Guide to Splines'
 549 |     Springer Verlag
 550 |     Uses EqXIV.6--9, self 239
 551 |     """
 552 | 
 553 |     def __init__(self, xx, yy, p=None, lin_extrap=True, var=1):
 554 |         coefs, brks = self._compute_coefs(xx, yy, p, var)
 555 |         super(SmoothSpline, self).__init__(coefs, brks)
 556 |         if lin_extrap:
 557 |             self.linear_extrapolate(output=False)
 558 | 
 559 |     def _compute_coefs(self, xx, yy, p=None, var=1):
 560 |         x, y = np.atleast_1d(xx, yy)
 561 |         x = x.ravel()
 562 |         dx = np.diff(x)
 563 |         must_sort = (dx < 0).any()
 564 |         if must_sort:
 565 |             ind = x.argsort()
 566 |             x = x[ind]
 567 |             y = y[..., ind]
 568 |             dx = np.diff(x)
 569 | 
 570 |         n = len(x)
 571 | 
 572 |         # ndy = y.ndim
 573 |         szy = y.shape
 574 | 
 575 |         nd = np.int(prod(szy[:-1]))  ## modified by Bo Zhang
 576 |         ny = szy[-1]
 577 | 
 578 |         if n < 2:
 579 |             raise ValueError('There must be >=2 data points.')
 580 |         elif (dx <= 0).any():
 581 |             raise ValueError('Two consecutive values in x can not be equal.')
 582 |         elif n != ny:
 583 |             raise ValueError('x and y must have the same length.')
 584 | 
 585 |         dydx = np.diff(y) / dx
 586 | 
 587 |         if (n == 2):  # % straight line
 588 |             coefs = np.vstack([dydx.ravel(), y[0, :]])
 589 |         else:
 590 | 
 591 |             dx1 = 1. / dx
 592 |             D = sparse.spdiags(var * ones(n), 0, n, n)  # The variance
 593 | 
 594 |             u, p = self._compute_u(p, D, dydx, dx, dx1, n)
 595 |             dx1.shape = (n - 1, -1)
 596 |             dx.shape = (n - 1, -1)
 597 |             zrs = zeros(nd)
 598 |             if p < 1:
 599 |                 # faster than yi-6*(1-p)*Q*u
 600 |                 Qu = D * diff(vstack([zrs, diff(vstack([zrs, u, zrs]),
 601 |                                                 axis=0) * dx1, zrs]), axis=0)
 602 |                 ai = (y - (6 * (1 - p) * Qu).T).T
 603 |             else:
 604 |                 ai = y.reshape(n, -1)
 605 | 
 606 |             # The piecewise polynominals are written as
 607 |             # fi=ai+bi*(x-xi)+ci*(x-xi)^2+di*(x-xi)^3
 608 |             # where the derivatives in the knots according to Carl de Boor are:
 609 |             #    ddfi  = 6*p*[0;u] = 2*ci;
 610 |             #    dddfi = 2*diff([ci;0])./dx = 6*di;
 611 |             #    dfi   = diff(ai)./dx-(ci+di.*dx).*dx = bi;
 612 | 
 613 |             ci = np.vstack([zrs, 3 * p * u])
 614 |             di = (diff(vstack([ci, zrs]), axis=0) * dx1 / 3)
 615 |             bi = (diff(ai, axis=0) * dx1 - (ci + di * dx) * dx)
 616 |             ai = ai[:n - 1, ...]
 617 |             if nd > 1:
 618 |                 di = di.T
 619 |                 ci = ci.T
 620 |                 ai = ai.T
 621 |             if not any(di):
 622 |                 if not any(ci):
 623 |                     coefs = vstack([bi.ravel(), ai.ravel()])
 624 |                 else:
 625 |                     coefs = vstack([ci.ravel(), bi.ravel(), ai.ravel()])
 626 |             else:
 627 |                 coefs = vstack(
 628 |                     [di.ravel(), ci.ravel(), bi.ravel(), ai.ravel()])
 629 | 
 630 |         return coefs, x
 631 | 
 632 |     @staticmethod
 633 |     def _compute_u(p, D, dydx, dx, dx1, n):
 634 |         if p is None or p != 0:
 635 |             data = [dx[1:n - 1], 2 * (dx[:n - 2] + dx[1:n - 1]), dx[:n - 2]]
 636 |             R = sparse.spdiags(data, [-1, 0, 1], n - 2, n - 2)
 637 | 
 638 |         if p is None or p < 1:
 639 |             Q = sparse.spdiags(
 640 |                 [dx1[:n - 2], -(dx1[:n - 2] + dx1[1:n - 1]), dx1[1:n - 1]],
 641 |                 [0, -1, -2], n, n - 2)
 642 |             QDQ = (Q.T * D * Q)
 643 |             if p is None or p < 0:
 644 |                 # Estimate p
 645 |                 p = 1. / \
 646 |                     (1. + QDQ.diagonal().sum() /
 647 |                      (100. * R.diagonal().sum() ** 2))
 648 | 
 649 |             if p == 0:
 650 |                 QQ = 6 * QDQ
 651 |             else:
 652 |                 QQ = (6 * (1 - p)) * (QDQ) + p * R
 653 |         else:
 654 |             QQ = R
 655 | 
 656 |         # Make sure it uses symmetric matrix solver
 657 |         ddydx = diff(dydx, axis=0)
 658 |         # sp.linalg.use_solver(useUmfpack=True)
 659 |         u = 2 * sparse.linalg.spsolve((QQ + QQ.T), ddydx)  # @UndefinedVariable
 660 |         return u.reshape(n - 2, -1), p
 661 | 
 662 | 
 663 | def _edge_case(m0, d1):
 664 |     return np.where((d1 == 0) | (m0 == 0), 0.0, 1.0 / (1.0 / m0 + 1.0 / d1))
 665 | 
 666 | 
 667 | def pchip_slopes(x, y):
 668 |     # Determine the derivatives at the points y_k, d_k, by using
 669 |     #  PCHIP algorithm is:
 670 |     # We choose the derivatives at the point x_k by
 671 |     # Let m_k be the slope of the kth segment (between k and k+1)
 672 |     # If m_k=0 or m_{k-1}=0 or sgn(m_k) != sgn(m_{k-1}) then d_k == 0
 673 |     # else use weighted harmonic mean:
 674 |     #   w_1 = 2h_k + h_{k-1}, w_2 = h_k + 2h_{k-1}
 675 |     #   1/d_k = 1/(w_1 + w_2)*(w_1 / m_k + w_2 / m_{k-1})
 676 |     #   where h_k is the spacing between x_k and x_{k+1}
 677 | 
 678 |     hk = x[1:] - x[:-1]
 679 |     mk = (y[1:] - y[:-1]) / hk
 680 |     smk = np.sign(mk)
 681 |     condition = ((smk[1:] != smk[:-1]) | (mk[1:] == 0) | (mk[:-1] == 0))
 682 | 
 683 |     w1 = 2 * hk[1:] + hk[:-1]
 684 |     w2 = hk[1:] + 2 * hk[:-1]
 685 |     whmean = 1.0 / (w1 + w2) * (w1 / mk[1:] + w2 / mk[:-1])
 686 | 
 687 |     dk = np.zeros_like(y)
 688 |     dk[1:-1][condition] = 0.0
 689 |     dk[1:-1][~condition] = 1.0 / whmean[~condition]
 690 | 
 691 |     # For end-points choose d_0 so that 1/d_0 = 1/m_0 + 1/d_1 unless
 692 |     #  one of d_1 or m_0 is 0, then choose d_0 = 0
 693 | 
 694 |     dk[0] = _edge_case(mk[0], dk[1])
 695 |     dk[-1] = _edge_case(mk[-1], dk[-2])
 696 |     return dk
 697 | 
 698 | 
 699 | def slopes(x, y, method='parabola', tension=0, monotone=False):
 700 |     '''
 701 |     Return estimated slopes y'(x)
 702 | 
 703 |     Parameters
 704 |     ----------
 705 |     x, y : array-like
 706 |         array containing the x- and y-data, respectively.
 707 |         x must be sorted low to high... (no repeats) while
 708 |         y can have repeated values.
 709 |     method : string
 710 |         defining method of estimation for yp. Valid options are:
 711 |         'Catmull-Rom'  yp = (y[k+1]-y[k-1])/(x[k+1]-x[k-1])
 712 |         'Cardinal'     yp = (1-tension) * (y[k+1]-y[k-1])/(x[k+1]-x[k-1])
 713 |         'parabola'
 714 |         'secant' average secants
 715 |            yp = 0.5*((y[k+1]-y[k])/(x[k+1]-x[k]) + (y[k]-y[k-1])/(x[k]-x[k-1]))
 716 |     tension : real scalar between 0 and 1.
 717 |         tension parameter used in Cardinal method
 718 |     monotone : bool
 719 |         If True modifies yp to preserve monoticity
 720 | 
 721 |     Returns
 722 |     -------
 723 |     yp : ndarray
 724 |         estimated slope
 725 | 
 726 |     References:
 727 |     -----------
 728 |     Wikipedia:  Monotone cubic interpolation
 729 |                 Cubic Hermite spline
 730 | 
 731 |     '''
 732 |     x = np.asarray(x, np.float_)
 733 |     y = np.asarray(y, np.float_)
 734 |     yp = np.zeros(y.shape, np.float_)
 735 | 
 736 |     dx = x[1:] - x[:-1]
 737 |     # Compute the slopes of the secant lines between successive points
 738 |     dydx = (y[1:] - y[:-1]) / dx
 739 | 
 740 |     method = method.lower()
 741 |     if method.startswith('p'):  # parabola'):
 742 |         yp[1:-1] = (dydx[:-1] * dx[1:] + dydx[1:] * dx[:-1]) / \
 743 |                    (dx[1:] + dx[:-1])
 744 |         yp[0] = 2.0 * dydx[0] - yp[1]
 745 |         yp[-1] = 2.0 * dydx[-1] - yp[-2]
 746 |     else:
 747 |         # At the endpoints - use one-sided differences
 748 |         yp[0] = dydx[0]
 749 |         yp[-1] = dydx[-1]
 750 |         if method.startswith('s'):  # secant'):
 751 |             # In the middle - use the average of the secants
 752 |             yp[1:-1] = (dydx[:-1] + dydx[1:]) / 2.0
 753 |         else:  # Cardinal or Catmull-Rom method
 754 |             yp[1:-1] = (y[2:] - y[:-2]) / (x[2:] - x[:-2])
 755 |             if method.startswith('car'):  # cardinal'):
 756 |                 yp = (1 - tension) * yp
 757 | 
 758 |     if monotone:
 759 |         # Special case: intervals where y[k] == y[k+1]
 760 |         # Setting these slopes to zero guarantees the spline connecting
 761 |         # these points will be flat which preserves monotonicity
 762 |         ii, = (dydx == 0.0).nonzero()
 763 |         yp[ii] = 0.0
 764 |         yp[ii + 1] = 0.0
 765 | 
 766 |         alpha = yp[:-1] / dydx
 767 |         beta = yp[1:] / dydx
 768 |         dist = alpha ** 2 + beta ** 2
 769 |         tau = 3.0 / np.sqrt(dist)
 770 | 
 771 |         # To prevent overshoot or undershoot, restrict the position vector
 772 |         # (alpha, beta) to a circle of radius 3.  If (alpha**2 +  beta**2)>9,
 773 |         # then set m[k] = tau[k]alpha[k]delta[k] and
 774 |         #  m[k+1] =  tau[k]beta[b]delta[k]
 775 |         # where tau = 3/sqrt(alpha**2 + beta**2).
 776 | 
 777 |         # Find the indices that need adjustment
 778 |         indices_to_fix, = (dist > 9.0).nonzero()
 779 |         for ii in indices_to_fix:
 780 |             yp[ii] = tau[ii] * alpha[ii] * dydx[ii]
 781 |             yp[ii + 1] = tau[ii] * beta[ii] * dydx[ii]
 782 | 
 783 |     return yp
 784 | 
 785 | 
 786 | def stineman_interp(xi, x, y, yp=None):
 787 |     """
 788 |     Given data vectors *x* and *y*, the slope vector *yp* and a new
 789 |     abscissa vector *xi*, the function :func:`stineman_interp` uses
 790 |     Stineman interpolation to calculate a vector *yi* corresponding to
 791 |     *xi*.
 792 | 
 793 |     Here's an example that generates a coarse sine curve, then
 794 |     interpolates over a finer abscissa::
 795 | 
 796 |       x = linspace(0,2*pi,20);  y = sin(x); yp = cos(x)
 797 |       xi = linspace(0,2*pi,40);
 798 |       yi = stineman_interp(xi,x,y,yp);
 799 |       plot(x,y,'o',xi,yi)
 800 | 
 801 |     The interpolation method is described in the article A
 802 |     CONSISTENTLY WELL BEHAVED METHOD OF INTERPOLATION by Russell
 803 |     W. Stineman. The article appeared in the July 1980 issue of
 804 |     Creative Computing with a note from the editor stating that while
 805 |     they were:
 806 | 
 807 |       not an academic journal but once in a while something serious
 808 |       and original comes in adding that this was
 809 |       "apparently a real solution" to a well known problem.
 810 | 
 811 |     For *yp* = *None*, the routine automatically determines the slopes
 812 |     using the :func:`slopes` routine.
 813 | 
 814 |     *x* is assumed to be sorted in increasing order.
 815 | 
 816 |     For values ``xi[j] < x[0]`` or ``xi[j] > x[-1]``, the routine
 817 |     tries an extrapolation.  The relevance of the data obtained from
 818 |     this, of course, is questionable...
 819 | 
 820 |     Original implementation by Halldor Bjornsson, Icelandic
 821 |     Meteorolocial Office, March 2006 halldor at vedur.is
 822 | 
 823 |     Completely reworked and optimized for Python by Norbert Nemec,
 824 |     Institute of Theoretical Physics, University or Regensburg, April
 825 |     2006 Norbert.Nemec at physik.uni-regensburg.de
 826 |     """
 827 | 
 828 |     # Cast key variables as float.
 829 |     x = np.asarray(x, np.float_)
 830 |     y = np.asarray(y, np.float_)
 831 |     assert x.shape == y.shape
 832 |     # N = len(y)
 833 | 
 834 |     if yp is None:
 835 |         yp = slopes(x, y)
 836 |     else:
 837 |         yp = np.asarray(yp, np.float_)
 838 | 
 839 |     xi = np.asarray(xi, np.float_)
 840 |     # yi = np.zeros(xi.shape, np.float_)
 841 | 
 842 |     # calculate linear slopes
 843 |     dx = x[1:] - x[:-1]
 844 |     dy = y[1:] - y[:-1]
 845 |     s = dy / dx  # note length of s is N-1 so last element is #N-2
 846 | 
 847 |     # find the segment each xi is in
 848 |     # this line actually is the key to the efficiency of this implementation
 849 |     idx = np.searchsorted(x[1:-1], xi)
 850 | 
 851 |     # now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1]
 852 |     # except at the boundaries, where it may be that xi[j] < x[0] or xi[j] >
 853 |     # x[-1]
 854 | 
 855 |     # the y-values that would come out from a linear interpolation:
 856 |     sidx = s.take(idx)
 857 |     xidx = x.take(idx)
 858 |     yidx = y.take(idx)
 859 |     xidxp1 = x.take(idx + 1)
 860 |     yo = yidx + sidx * (xi - xidx)
 861 | 
 862 |     # the difference that comes when using the slopes given in yp
 863 |     # using the yp slope of the left point
 864 |     dy1 = (yp.take(idx) - sidx) * (xi - xidx)
 865 |     # using the yp slope of the right point
 866 |     dy2 = (yp.take(idx + 1) - sidx) * (xi - xidxp1)
 867 | 
 868 |     dy1dy2 = dy1 * dy2
 869 |     # The following is optimized for Python. The solution actually
 870 |     # does more calculations than necessary but exploiting the power
 871 |     # of numpy, this is far more efficient than coding a loop by hand
 872 |     # in Python
 873 |     dy1mdy2 = np.where(dy1dy2, dy1 - dy2, np.inf)
 874 |     dy1pdy2 = np.where(dy1dy2, dy1 + dy2, np.inf)
 875 |     yi = yo + dy1dy2 * np.choose(
 876 |         np.array(np.sign(dy1dy2), np.int32) + 1,
 877 |         ((2 * xi - xidx - xidxp1) / ((dy1mdy2) * (xidxp1 - xidx)), 0.0,
 878 |          1 / (dy1pdy2)))
 879 |     return yi
 880 | 
 881 | 
 882 | class StinemanInterp(object):
 883 |     '''
 884 |     Returns an interpolating function
 885 |         that runs through a set of points according to the algorithm of
 886 |         Stineman (1980).
 887 | 
 888 |     Parameters
 889 |     ----------
 890 |     x,y : array-like
 891 |         coordinates of points defining the interpolating function.
 892 |     yp : array-like
 893 |         slopes of the interpolating function at x.
 894 |         Optional: only given if they are known, else the argument is not used.
 895 |     method : string
 896 |         method for computing the slope at the given points if the slope is not
 897 |         known. With method= "parabola" calculates the slopes from a parabola
 898 |         through every three points.
 899 | 
 900 |     Notes
 901 |     -----
 902 |     The interpolation method is described by Russell W. Stineman (1980)
 903 | 
 904 |     According to Stineman, the interpolation procedure has "the following
 905 |     properties:
 906 | 
 907 |     If values of the ordinates of the specified points change monotonically,
 908 |     and the slopes of the line segments joining the points change
 909 |     monotonically, then the interpolating curve and its slope will change
 910 |     monotonically. If the slopes of the line segments joining the specified
 911 |     points change monotonically, then the slopes of the interpolating curve
 912 |     will change monotonically. Suppose that the conditions in (1) or (2) are
 913 |     satisfied by a set of points, but a small change in the ordinate or slope
 914 |     at one of the points will result conditions(1) or (2) being not longer
 915 |     satisfied. Then making this small change in the ordinate or slope at a
 916 |     point will cause no more than a small change in the interpolating
 917 |     curve." The method is based on rational interpolation with specially chosen
 918 |     rational functions to satisfy the above three conditions.
 919 | 
 920 |     Slopes computed at the given points with the methods provided by the
 921 |     `StinemanInterp' function satisfy Stineman's requirements.
 922 |     The original method suggested by Stineman(method="scaledstineman", the
 923 |     default, and "stineman") result in lower slopes near abrupt steps or spikes
 924 |     in the point sequence, and therefore a smaller tendency for overshooting.
 925 |     The method based on a second degree polynomial(method="parabola") provides
 926 |     better approximation to smooth functions, but it results in in higher
 927 |     slopes near abrupt steps or spikes and can lead to some overshooting where
 928 |     Stineman's method does not. Both methods lead to much less tendency for
 929 |     `spurious' oscillations than traditional interplation methods based on
 930 |     polynomials, such as splines
 931 |     (see the examples section).
 932 | 
 933 |     Stineman states that "The complete assurance that the procedure will never
 934 |     generate `wild' points makes it attractive as a general purpose procedure".
 935 | 
 936 |     This interpolation method has been implemented in Matlab and R in addition
 937 |     to Python.
 938 | 
 939 |     Examples
 940 |     --------
 941 |     >>> import wafo.interpolate as wi
 942 |     >>> import numpy as np
 943 |     >>> import matplotlib.pyplot as plt
 944 |     >>> x = np.linspace(0,2*pi,20)
 945 |     >>> y = np.sin(x); yp = np.cos(x)
 946 |     >>> xi = np.linspace(0,2*pi,40);
 947 |     >>> yi = wi.StinemanInterp(x,y)(xi)
 948 |     >>> np.allclose(yi[:10],
 949 |     ...    [ 0.,  0.16258231,  0.31681338,  0.46390886,  0.60091421,
 950 |     ...      0.7206556 ,  0.82314953,  0.90304148,  0.96059538,  0.99241945])
 951 |     True
 952 |     >>> yi1 = wi.CubicHermiteSpline(x,y, yp)(xi)
 953 |     >>> yi2 = wi.Pchip(x,y, method='parabola')(xi)
 954 | 
 955 |     h=plt.subplot(211)
 956 |     h=plt.plot(x,y,'o',xi,yi,'r', xi,yi1, 'g', xi,yi1, 'b')
 957 |     h=plt.subplot(212)
 958 |     h=plt.plot(xi,np.abs(sin(xi)-yi), 'r',
 959 |                xi,  np.abs(sin(xi)-yi1), 'g',
 960 |                xi, np.abs(sin(xi)-yi2), 'b')
 961 | 
 962 |     References
 963 |     ----------
 964 |     Stineman, R. W. A Consistently Well Behaved Method of Interpolation.
 965 |     Creative Computing (1980), volume 6, number 7, p. 54-57.
 966 | 
 967 |     See Also
 968 |     --------
 969 |     slopes, Pchip
 970 |     '''
 971 | 
 972 |     def __init__(self, x, y, yp=None, method='parabola', monotone=False):
 973 |         if yp is None:
 974 |             yp = slopes(x, y, method, monotone=monotone)
 975 |         self.x = np.asarray(x, np.float_)
 976 |         self.y = np.asarray(y, np.float_)
 977 |         self.yp = np.asarray(yp, np.float_)
 978 | 
 979 |     def __call__(self, xi):
 980 |         xi = np.asarray(xi, np.float_)
 981 |         x = self.x
 982 |         y = self.y
 983 |         yp = self.yp
 984 |         # calculate linear slopes
 985 |         dx = x[1:] - x[:-1]
 986 |         dy = y[1:] - y[:-1]
 987 |         s = dy / dx  # note length of s is N-1 so last element is #N-2
 988 | 
 989 |         # find the segment each xi is in
 990 |         # this line actually is the key to the efficiency of this
 991 |         # implementation
 992 |         idx = np.searchsorted(x[1:-1], xi)
 993 | 
 994 |         # now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1]
 995 |         # except at the boundaries, where it may be that xi[j] < x[0] or xi[j]
 996 |         # > x[-1]
 997 | 
 998 |         # the y-values that would come out from a linear interpolation:
 999 |         sidx = s.take(idx)
1000 |         xidx = x.take(idx)
1001 |         yidx = y.take(idx)
1002 |         xidxp1 = x.take(idx + 1)
1003 |         yo = yidx + sidx * (xi - xidx)
1004 | 
1005 |         # the difference that comes when using the slopes given in yp
1006 |         # using the yp slope of the left point
1007 |         dy1 = (yp.take(idx) - sidx) * (xi - xidx)
1008 |         # using the yp slope of the right point
1009 |         dy2 = (yp.take(idx + 1) - sidx) * (xi - xidxp1)
1010 | 
1011 |         dy1dy2 = dy1 * dy2
1012 |         # The following is optimized for Python. The solution actually
1013 |         # does more calculations than necessary but exploiting the power
1014 |         # of numpy, this is far more efficient than coding a loop by hand
1015 |         # in Python
1016 |         dy1mdy2 = np.where(dy1dy2, dy1 - dy2, np.inf)
1017 |         dy1pdy2 = np.where(dy1dy2, dy1 + dy2, np.inf)
1018 |         yi = yo + dy1dy2 * np.choose(
1019 |             np.array(np.sign(dy1dy2), np.int32) + 1,
1020 |             ((2 * xi - xidx - xidxp1) / ((dy1mdy2) * (xidxp1 - xidx)), 0.0,
1021 |              1 / (dy1pdy2)))
1022 |         return yi
1023 | 
1024 | 
1025 | class StinemanInterp2(BPoly):
1026 |     def __init__(self, x, y, yp=None, method='parabola', monotone=False):
1027 |         if yp is None:
1028 |             yp = slopes(x, y, method, monotone=monotone)
1029 |         yyp = [z for z in zip(y, yp)]
1030 |         bpoly = BPoly.from_derivatives(x, yyp)
1031 |         super(StinemanInterp2, self).__init__(bpoly.c, x)
1032 | 
1033 | 
1034 | class CubicHermiteSpline(BPoly):
1035 |     '''
1036 |     Piecewise Cubic Hermite Interpolation using Catmull-Rom
1037 |     method for computing the slopes.
1038 |     '''
1039 | 
1040 |     def __init__(self, x, y, yp=None, method='Catmull-Rom'):
1041 |         if yp is None:
1042 |             yp = slopes(x, y, method, monotone=False)
1043 |         yyp = [z for z in zip(y, yp)]
1044 |         bpoly = BPoly.from_derivatives(x, yyp, orders=3)
1045 |         super(CubicHermiteSpline, self).__init__(bpoly.c, x)
1046 |         # super(CubicHermiteSpline, self).__init__(x, yyp, orders=3)
1047 | 
1048 | 
1049 | class Pchip(BPoly):
1050 |     """PCHIP 1-d monotonic cubic interpolation
1051 | 
1052 |     Description
1053 |     -----------
1054 |     x and y are arrays of values used to approximate some function f:
1055 |        y = f(x)
1056 |     This class factory function returns a callable class whose __call__ method
1057 |     uses monotonic cubic, interpolation to find the value of new points.
1058 | 
1059 |     Parameters
1060 |     ----------
1061 |     x : array
1062 |         A 1D array of monotonically increasing real values.  x cannot
1063 |         include duplicate values (otherwise f is overspecified)
1064 |     y : array
1065 |         A 1-D array of real values.  y's length along the interpolation
1066 |         axis must be equal to the length of x.
1067 |     yp : array
1068 |         slopes of the interpolating function at x.
1069 |         Optional: only given if they are known, else the argument is not used.
1070 |     method : string
1071 |         method for computing the slope at the given points if the slope is not
1072 |         known. With method="parabola" calculates the slopes from a parabola
1073 |         through every three points.
1074 | 
1075 |     Assumes x is sorted in monotonic order (e.g. x[1] > x[0])
1076 | 
1077 |     Example
1078 |     -------
1079 |     >>> import wafo.interpolate as wi
1080 | 
1081 |     # Create a step function (will demonstrate monotonicity)
1082 |     >>> x = np.arange(7.0) - 3.0
1083 |     >>> y = np.array([-1.0, -1,-1,0,1,1,1])
1084 | 
1085 |     # Interpolate using monotonic piecewise Hermite cubic spline
1086 |     >>> n = 20.
1087 |     >>> xvec = np.arange(n)/10. - 1.0
1088 |     >>> yvec = wi.Pchip(x, y)(xvec)
1089 |     >>> np.allclose(yvec, [-1.   , -0.981, -0.928, -0.847, -0.744, -0.625,
1090 |     ...    -0.496, -0.363, -0.232, -0.109,  0.   ,  0.109,  0.232,  0.363,
1091 |     ...    0.496,  0.625, 0.744,  0.847,  0.928,  0.981])
1092 |     True
1093 | 
1094 |     # Call the Scipy cubic spline interpolator
1095 |     >>> from scipy.interpolate import interpolate
1096 |     >>> function = interpolate.interp1d(x, y, kind='cubic')
1097 |     >>> yvec1 = function(xvec)
1098 |     >>> np.allclose(yvec1, [-1.00000000e+00, -9.41911765e-01, -8.70588235e-01,
1099 |     ...        -7.87500000e-01,  -6.94117647e-01,  -5.91911765e-01,
1100 |     ...        -4.82352941e-01,  -3.66911765e-01,  -2.47058824e-01,
1101 |     ...        -1.24264706e-01,   2.49800181e-16,   1.24264706e-01,
1102 |     ...         2.47058824e-01,   3.66911765e-01,   4.82352941e-01,
1103 |     ...         5.91911765e-01,   6.94117647e-01,   7.87500000e-01,
1104 |     ...         8.70588235e-01,   9.41911765e-01])
1105 |     True
1106 | 
1107 | 
1108 |     # Non-montonic cubic Hermite spline interpolator using
1109 |     # Catmul-Rom method for computing slopes...
1110 |     >>> yvec2 = wi.CubicHermiteSpline(x,y)(xvec)
1111 |     >>> yvec3 = wi.StinemanInterp(x, y)(xvec)
1112 | 
1113 |     >>> np.allclose(yvec2, [-1., -0.9405, -0.864 , -0.7735, -0.672 , -0.5625,
1114 |     ...    -0.448 , -0.3315, -0.216 , -0.1045,  0.    ,  0.1045,  0.216 ,
1115 |     ...    0.3315, 0.448 ,  0.5625,  0.672 ,  0.7735,  0.864 ,  0.9405])
1116 |     True
1117 | 
1118 |     >>> np.allclose(yvec3, [-1. , -0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3,
1119 |     ... -0.2, -0.1,  0. , 0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9])
1120 |     True
1121 | 
1122 |     # Plot the results
1123 |     import matplotlib.pyplot as plt
1124 |     h=plt.plot(x,    y,     'ro')
1125 |     h=plt.plot(xvec, yvec,  'b')
1126 |     h=plt.plot(xvec, yvec1, 'k')
1127 |     h=plt.plot(xvec, yvec2, 'g')
1128 |     h=plt.plot(xvec, yvec3, 'm')
1129 |     h=plt.title("pchip() step function test")
1130 | 
1131 |     h=plt.xlabel("X")
1132 |     h=plt.ylabel("Y")
1133 |     txt = "Comparing pypchip() vs. Scipy interp1d() vs. non-monotonic CHS"
1134 |     h=plt.title(txt)
1135 |     legends = ["Data", "pypchip()", "interp1d","CHS", 'SI']
1136 |     h=plt.legend(legends, loc="upper left")
1137 |     plt.show()
1138 | 
1139 |     """
1140 | 
1141 |     def __init__(self, x, y, yp=None, method='secant'):
1142 |         if yp is None:
1143 |             yp = slopes(x, y, method=method, monotone=True)
1144 |         yyp = [z for z in zip(y, yp)]
1145 |         bpoly = BPoly.from_derivatives(x, yyp, orders=3)
1146 |         super(Pchip, self).__init__(bpoly.c, x)
1147 |         # super(Pchip, self).__init__(x, yyp, orders=3)
1148 | 
1149 | 
1150 | def interp3(x, y, z, v, xi, yi, zi, method='cubic'):
1151 |     """Interpolation on 3-D. x, y, xi, yi should be 1-D
1152 |     and z.shape == (len(x), len(y), len(z))"""
1153 |     q = (x, y, z)
1154 |     qi = (xi, yi, zi)
1155 |     for j in range(3):
1156 |         pp = interp1d(q[j], v, axis=j, kind=method)
1157 |         v = pp(qi[j])
1158 |     return v
1159 | 
1160 | 
1161 | def somefunc(x, y, z):
1162 |     return x ** 2 + y ** 2 - z ** 2 + x * y * z
1163 | 
1164 | 
1165 | def test_interp3():
1166 |     # some input data
1167 |     x = np.linspace(0, 1, 5)
1168 |     y = np.linspace(0, 2, 6)
1169 |     z = np.linspace(0, 3, 7)
1170 |     v = somefunc(x[:, None, None], y[None, :, None], z[None, None, :])
1171 | 
1172 |     # interpolate
1173 |     xi = np.linspace(0, 1, 45)
1174 |     yi = np.linspace(0, 2, 46)
1175 |     zi = np.linspace(0, 3, 47)
1176 |     vi = interp3(x, y, z, v, xi, yi, zi)
1177 | 
1178 |     import matplotlib.pyplot as plt
1179 |     X, Y = np.meshgrid(xi, yi)
1180 |     plt.figure(1)
1181 |     plt.subplot(1, 2, 1)
1182 |     plt.pcolor(X, Y, vi[:, :, 12].T)
1183 |     plt.title('interpolated')
1184 |     plt.subplot(1, 2, 2)
1185 |     plt.pcolor(X, Y, somefunc(xi[:, None], yi[None, :], zi[12]).T)
1186 |     plt.title('exact')
1187 |     plt.show('hold')
1188 | 
1189 | 
1190 | def test_smoothing_spline():
1191 |     x = linspace(0, 2 * pi + pi / 4, 20)
1192 |     y = sin(x)  # + np.random.randn(x.size)
1193 |     pp = SmoothSpline(x, y, p=1)
1194 |     x1 = linspace(-1, 2 * pi + pi / 4 + 1, 20)
1195 |     y1 = pp(x1)
1196 |     pp1 = pp.derivative()
1197 |     pp0 = pp1.integrate()
1198 |     dy1 = pp1(x1)
1199 |     y01 = pp0(x1)
1200 |     # dy = y-y1
1201 |     import matplotlib.pyplot as plt
1202 | 
1203 |     plt.plot(x, y, x1, y1, '.', x1, dy1, 'ro', x1, y01, 'r-')
1204 |     plt.show('hold')
1205 |     pass
1206 |     # tck = interpolate.splrep(x, y, s=len(x))
1207 | 
1208 | 
1209 | def compare_methods():
1210 |     #
1211 |     # Sine wave test
1212 |     #
1213 |     fun = np.sin
1214 |     # Create a example vector containing a sine wave.
1215 |     x = np.arange(30.0) / 10.
1216 |     y = fun(x)
1217 | 
1218 |     # Interpolate the data above to the grid defined by "xvec"
1219 |     xvec = np.arange(250.) / 100.
1220 | 
1221 |     # Initialize the interpolator slopes
1222 |     # Create the pchip slopes
1223 |     m = slopes(x, y, method='parabola', monotone=True)
1224 |     m1 = slopes(x, y, method='parabola', monotone=False)
1225 |     m2 = slopes(x, y, method='catmul', monotone=False)
1226 |     m3 = pchip_slopes(x, y)
1227 | 
1228 |     # Call the monotonic piece-wise Hermite cubic interpolator
1229 |     yvec = Pchip(x, y, m)(xvec)
1230 |     yvec1 = Pchip(x, y, m1)(xvec)
1231 |     yvec2 = Pchip(x, y, m2)(xvec)
1232 |     yvec3 = Pchip(x, y, m3)(xvec)
1233 | 
1234 |     import matplotlib.pyplot as plt
1235 | 
1236 |     plt.figure()
1237 |     plt.plot(x, y, 'ro', xvec, fun(xvec), 'r')
1238 |     plt.title("pchip() Sin test code")
1239 | 
1240 |     # Plot the interpolated points
1241 |     plt.plot(xvec, yvec, xvec, yvec1, xvec, yvec2, 'g.', xvec, yvec3)
1242 |     plt.legend(
1243 |         ['true', 'true', 'parbola_monoton', 'parabola', 'catmul', 'pchip'],
1244 |         frameon=False, loc=0)
1245 |     plt.ioff()
1246 |     plt.show()
1247 | 
1248 | 
1249 | def demo_monoticity():
1250 |     # Step function test...
1251 |     import matplotlib.pyplot as plt
1252 |     plt.figure(2)
1253 |     plt.title("pchip() step function test")
1254 |     # Create a step function (will demonstrate monotonicity)
1255 |     x = np.arange(7.0) - 3.0
1256 |     y = np.array([-1.0, -1, -1, 0, 1, 1, 1])
1257 | 
1258 |     # Interpolate using monotonic piecewise Hermite cubic spline
1259 |     xvec = np.arange(599.) / 100. - 3.0
1260 | 
1261 |     # Create the pchip slopes
1262 |     m = slopes(x, y, monotone=True)
1263 |     #    m1 = slopes(x, y, monotone=False)
1264 |     #    m2  = slopes(x,y,method='catmul',monotone=False)
1265 |     m3 = pchip_slopes(x, y)
1266 |     # Interpolate...
1267 |     yvec = Pchip(x, y, m)(xvec)
1268 | 
1269 |     # Call the Scipy cubic spline interpolator
1270 |     from scipy.interpolate import interpolate as ip
1271 |     function = ip.interp1d(x, y, kind='cubic')
1272 |     yvec2 = function(xvec)
1273 | 
1274 |     # Non-montonic cubic Hermite spline interpolator using
1275 |     # Catmul-Rom method for computing slopes...
1276 |     yvec3 = CubicHermiteSpline(x, y)(xvec)
1277 |     yvec4 = StinemanInterp(x, y)(xvec)
1278 |     yvec5 = Pchip(x, y, m3)(xvec)  # @UnusedVariable
1279 | 
1280 |     # Plot the results
1281 |     plt.plot(x, y, 'ro', label='Data')
1282 |     plt.plot(xvec, yvec, 'b', label='Pchip')
1283 |     plt.plot(xvec, yvec2, 'k', label='interp1d')
1284 |     plt.plot(xvec, yvec3, 'g', label='CHS')
1285 |     plt.plot(xvec, yvec4, 'm', label='Stineman')
1286 |     # plt.plot(xvec, yvec5, 'yo', label='Pchip2')
1287 |     plt.xlabel("X")
1288 |     plt.ylabel("Y")
1289 |     plt.title("Comparing Pchip() vs. Scipy interp1d() vs. non-monotonic CHS")
1290 |     #    legends = ["Data", "Pchip()", "interp1d","CHS", 'Stineman']
1291 |     plt.legend(loc="upper left", frameon=False)
1292 |     plt.ioff()
1293 |     plt.show()
1294 | 
1295 | 
1296 | def test_func():
1297 |     from scipy import interpolate
1298 |     import matplotlib.pyplot as plt
1299 |     import matplotlib
1300 |     matplotlib.interactive(False)
1301 | 
1302 |     coef = np.array([[1, 1], [0, 1]])  # linear from 0 to 2
1303 |     # coef = np.array([[1,1],[1,1],[0,2]]) # linear from 0 to 2
1304 |     breaks = [0, 1, 2]
1305 |     pp = PPform(coef, breaks, a=-100, b=100)
1306 |     x = linspace(-1, 3, 20)
1307 |     y = pp(x)  # @UnusedVariable
1308 | 
1309 |     x = linspace(0, 2 * pi + pi / 4, 20)
1310 |     y = sin(x) + np.random.randn(x.size)
1311 |     tck = interpolate.splrep(x, y, s=len(x))  # @UndefinedVariable
1312 |     xnew = linspace(0, 2 * pi, 100)
1313 |     ynew = interpolate.splev(xnew, tck, der=0)  # @UndefinedVariable
1314 |     tck0 = interpolate.splmake(  # @UndefinedVariable
1315 |         xnew, ynew, order=3, kind='smoothest', conds=None)
1316 |     pp = interpolate.ppform.fromspline(*tck0)  # @UndefinedVariable
1317 | 
1318 |     plt.plot(x, y, "x", xnew, ynew, xnew, sin(xnew), x, y, "b", x, pp(x), 'g')
1319 |     plt.legend(['Linear', 'Cubic Spline', 'True'])
1320 |     plt.title('Cubic-spline interpolation')
1321 |     plt.show()
1322 | 
1323 |     t = np.arange(0, 1.1, .1)
1324 |     x = np.sin(2 * np.pi * t)
1325 |     y = np.cos(2 * np.pi * t)
1326 |     _tck1, _u = interpolate.splprep([t, y], s=0)  # @UndefinedVariable
1327 |     tck2 = interpolate.splrep(t, y, s=len(t), task=0)  # @UndefinedVariable
1328 |     # interpolate.spl
1329 |     tck = interpolate.splmake(t, y, order=3, kind='smoothest', conds=None)
1330 |     self = interpolate.ppform.fromspline(*tck2)  # @UndefinedVariable
1331 |     plt.plot(t, self(t))
1332 |     plt.show('hold')
1333 |     pass
1334 | 
1335 | 
1336 | def test_pp():
1337 |     coef = np.array([[1, 1], [0, 0]])  # linear from 0 to 2 @UnusedVariable
1338 | 
1339 |     # quadratic from 0 to 1 and 1 to 2.
1340 |     coef = np.array([[1, 1], [1, 1], [0, 2]])
1341 |     dc = pl.polyder(coef, 1)
1342 |     c2 = pl.polyint(dc, 1)  # @UnusedVariable
1343 |     breaks = [0, 1, 2]
1344 |     pp = PPform(coef, breaks)
1345 |     pp(0.5)
1346 |     pp(1)
1347 |     pp(1.5)
1348 |     dpp = pp.derivative()
1349 |     import matplotlib.pyplot as plt
1350 |     x = np.linspace(-1, 3)
1351 |     plt.plot(x, pp(x), x, dpp(x), '.')
1352 |     plt.show()
1353 | 
1354 | 
1355 | def test_docstrings():
1356 |     import doctest
1357 |     print('Testing docstrings in {}'.formate(__file__))
1358 |     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
1359 | 
1360 | 
1361 | if __name__ == '__main__':
1362 |     # test_func()
1363 |     test_docstrings()
1364 |     # test_smoothing_spline()
1365 |     # compare_methods()
1366 |     # demo_monoticity()
1367 |     # test_interp3()
1368 | 


--------------------------------------------------------------------------------
/slam/hyperparameter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Mon Sep 05 12:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Mon Sep 05 12:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - SVR hyper-parameters
 23 | 
 24 | """
 25 | 
 26 | import numpy as np
 27 | from astropy.table import Table
 28 | from sklearn.svm import SVR
 29 | from sklearn.model_selection import GridSearchCV
 30 | from copy import deepcopy
 31 | import pandas as pd
 32 | from joblib import Parallel, delayed
 33 | 
 34 | 
 35 | __all__ = ['summarize_hyperparameters_to_table', 'summarize_table']
 36 | 
 37 | 
 38 | # ############################ #
 39 | # to summarize grid parameters #
 40 | # ############################ #
 41 | def hyperparameter_grid_stats(svrs, pivot=("param_C", "param_gamma"),
 42 |                               n_jobs=10, verbose=10):
 43 |     """ statistics for GridSearchCV results """
 44 |     stats_train = []
 45 |     stats_test = []
 46 |     r = Parallel(n_jobs=n_jobs, verbose=verbose)(
 47 |         delayed(hyperparameter_grid_stats_)(svr, pivot=pivot) for svr in svrs)
 48 |     for i in range(len(r)):
 49 |         stats_train.append(r[i][0])
 50 |         stats_test.append(r[i][1])
 51 |     return stats_train, stats_test
 52 | 
 53 | 
 54 | def hyperparameter_grid_stats_(svr, pivot=("param_C", "param_gamma")):
 55 |     """ statistics for GridSearchCV results """
 56 |     if isinstance(svr, GridSearchCV):
 57 |         # yes, that's it
 58 |         cvr = svr.cv_results_
 59 |         stats_train_ = deepcopy(cvr)
 60 |         stats_test_ = deepcopy(cvr)
 61 |         for k in cvr.keys():
 62 |             if k.find("test") > -1:
 63 |                 stats_train_.pop(k)
 64 |             elif k.find("train") > -1:
 65 |                 stats_test_.pop(k)
 66 | 
 67 |         if pivot is not None:
 68 |             return (
 69 |                 pd.DataFrame(stats_train_).pivot(*pivot, "mean_train_score"),
 70 |                 pd.DataFrame(stats_test_).pivot(*pivot, "mean_test_score"))
 71 |         else:
 72 |             return pd.DataFrame(stats_train_), pd.DataFrame(stats_test_)
 73 |     else:
 74 |         return pd.DataFrame(), pd.DataFrame()
 75 | 
 76 | 
 77 | # ######################## #
 78 | # summarize best estimator #
 79 | # ######################## #
 80 | 
 81 | def summarize_hyperparameters_to_table(svrs):
 82 |     """ summarize hyper-parameters as a Table
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     svrs: list of sklearn.svm.SVR objects
 87 |         a list of fitted SVR objets
 88 | 
 89 |     """
 90 |     hyperparams = []
 91 |     for svr in svrs:
 92 |         if isinstance(svr, SVR):
 93 |             hyperparams.append((svr.C, svr.gamma, svr.epsilon))
 94 |         elif isinstance(svr, GridSearchCV):
 95 |             hyperparams.append((svr.best_estimator_.C,
 96 |                                 svr.best_estimator_.gamma,
 97 |                                 svr.best_estimator_.epsilon))
 98 |     hp_array = np.array(hyperparams)
 99 |     return Table(data=hp_array, names=['C', 'gamma', 'epsilon'])
100 | 
101 | 
102 | def summarize_table(hpt):
103 |     """ summarize table data
104 | 
105 |     Parameters
106 |     ----------
107 |     hpt: astropy.table.Table
108 |         a table of parameter
109 | 
110 |     """
111 | 
112 |     # simply use pandas.DataFrame.describe()
113 |     print(hpt.to_pandas().describe())
114 | 
115 |     return
116 | 


--------------------------------------------------------------------------------
/slam/lndi.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Thu Nov 17 19:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Thu Nov 17 19:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - Lndi class:
 23 |     An interpolator making use of scipy.interpolate.LinearNDInterpolator.
 24 |     This is particularly designed for synthetic spectra.
 25 | 
 26 | """
 27 | 
 28 | from __future__ import print_function
 29 | 
 30 | import os
 31 | 
 32 | import numpy as np
 33 | from scipy.interpolate import LinearNDInterpolator
 34 | from astropy.table import Table
 35 | from joblib import load, dump, Parallel, delayed
 36 | 
 37 | from .normalization import normalize_spectra_block
 38 | from .hyperparameter import summarize_hyperparameters_to_table, summarize_table
 39 | from .predict import predict_labels, predict_labels_chi2, predict_spectrum
 40 | from .standardization import standardize, standardize_ivar
 41 | from .train import train_multi_pixels, train_single_pixel
 42 | from .mcmc import predict_label_mcmc
 43 | from .diagnostic import compare_labels, single_pixel_diagnostic
 44 | 
 45 | __all__ = ['Lndi']
 46 | 
 47 | 
 48 | class Lndi(object):
 49 |     """
 50 |     An interpolator implemented based on scipy.interpolate.LinearNDInterpolator
 51 |     """
 52 |     wave = None
 53 |     tr_flux = None
 54 |     tr_label = None
 55 |     lndi = None
 56 |     trained = False
 57 | 
 58 |     def __init__(self, wave, tr_flux, tr_label):
 59 |         """ Constructor of an Lndi instance """
 60 |         self.wave = wave
 61 |         self.tr_label = tr_label
 62 |         self.tr_flux = tr_flux
 63 | 
 64 |         self.lndi = LinearNDInterpolator(tr_label, tr_flux)
 65 |         self.trained = True
 66 | 
 67 |         self.ntrain = tr_label.shape[0]
 68 |         self.ndim = tr_label.shape[1]
 69 | 
 70 |     def predict_spectra(self, test_label):
 71 |         """ predict spectra given labels """
 72 |         return self.lndi(test_label)
 73 | 
 74 |     def generate_spectra_rand(self, X_init, X_std, n_rand):
 75 |         """ generate spectra randomly
 76 | 
 77 |         Parameters
 78 |         ----------
 79 |         X_init: ndarray (1, ndim)
 80 |             the central X
 81 |         X_std: ndarray (1, ndim)
 82 |             the std of gaussian random numbers
 83 |         n_rand: int
 84 |             the central X
 85 | 
 86 |         """
 87 |         X_rand = np.random.randn(n_rand, self.ndim)
 88 |         X_rand = X_init + X_rand * X_std.reshape(1, self.ndim)
 89 |         return self.predict_spectra(X_rand)
 90 | 
 91 |     def generate_label_rand(self, X_init, X_std, n_rand):
 92 |         """ generate labels randomly
 93 | 
 94 |         Parameters
 95 |         ----------
 96 |         X_init: ndarray (1, ndim)
 97 |             the central X
 98 |         X_std: ndarray (1, ndim)
 99 |             the std of gaussian random numbers
100 |         n_rand: int
101 |             the central X
102 | 
103 |         """
104 |         X_rand = np.random.randn(n_rand, self.ndim)
105 |         X_rand = X_init + X_rand * X_std.reshape(1, self.ndim)
106 |         return X_rand
107 | 
108 |     def predict_label_rand(self, flux_goal, X_init, X_std,
109 |                            n_rand, frac_ext=4, n_0_th=2,
110 |                            verbose=False):
111 |         n_loop = 0
112 |         n_0 = 0
113 |         while True:
114 |             n_loop += 1
115 | 
116 |             if verbose:
117 |                 print('@Cham: n_loop = %s ...' % n_loop)
118 | 
119 |             # generate random numbers
120 |             n_rand = np.int(n_rand)
121 |             frac_ext = np.int(frac_ext)
122 |             n_rand_2 = n_rand / frac_ext
123 |             n_rand_1 = n_rand - n_rand_2
124 |             X_rand_1 = self.generate_label_rand(X_init, X_std, n_rand_1)
125 |             X_rand_2 = self.generate_label_rand(X_init, X_std*3., n_rand_2)
126 |             X_rand = np.vstack((X_init, X_rand_1, X_rand_2))
127 |             flux_rand = self.predict_spectra(X_rand)
128 | 
129 |             # kick nan
130 |             ind_nan = np.any(np.isnan(flux_rand), axis=1)
131 |             X_rand = X_rand[~ind_nan]
132 |             flux_rand = flux_rand[~ind_nan]
133 |             # find chi2_min
134 |             i_min, chi2_min, flux_cont_min = best_chi2(
135 |                 self.wave, flux_rand, flux_goal)
136 | 
137 |             if verbose:
138 |                 print('@Cham: ', i_min, chi2_min)
139 | 
140 |             # if n_0 > n_0_th, but stuck in the same place --> end
141 |             if i_min == 0:
142 |                 n_0 += 1
143 |                 if n_0 > n_0_th:
144 |                     break
145 |             else:
146 |                 n_0 = 0
147 |                 X_init = X_rand[i_min]
148 | 
149 |         return(X_rand[i_min], chi2_min,
150 |                flux_cont_min, flux_rand[i_min]*flux_cont_min)
151 | 
152 | 
153 | def best_chi2(wave, flux, flux_goal, ivar=None):
154 |     flux_cont = determine_continuum(wave, flux, flux_goal,
155 |                                     norm_range=(4000, 8000),
156 |                                     dwave=100, p=(1E-7, 1E-7))
157 |     if ivar is None:
158 |         chi2 = np.nansum((flux*flux_cont-flux_goal)**2., axis=1)
159 |     else:
160 |         chi2 = np.nansum((flux*flux_cont - flux_goal) ** 2. *
161 |                          ivar.reshape(1, -1), axis=1)
162 | 
163 |     i_min = np.argmin(chi2)
164 |     chi2_min = chi2[i_min]
165 |     return i_min, chi2_min, flux_cont[i_min]
166 | 
167 | 
168 | def determine_continuum(wave, flux, flux_goal,
169 |                         norm_range=(4000, 8000), dwave=100, p=(1E-7, 1E-7),
170 |                         q=.5, rsv_frac=3):
171 |     """ determine the best continuum making flux to fit flux_goal """
172 |     # TODO: ivar should be considered for emission line stars
173 |     flux_cont = normalize_spectra_block(wave, flux_goal/flux,
174 |                                         norm_range=norm_range, dwave=dwave,
175 |                                         p=p, q=q, ivar_block=None,
176 |                                         rsv_frac=rsv_frac,
177 |                                         n_jobs=1, verbose=False)[1]
178 |     return flux_cont
179 | 


--------------------------------------------------------------------------------
/slam/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | Author
 5 | ------
 6 | Bo Zhang
 7 | 
 8 | Email
 9 | -----
10 | bozhang@nao.cas.cn
11 | 
12 | Created on
13 | ----------
14 | - Thu Jan 19 16:00:00 2017
15 | 
16 | Modifications
17 | -------------
18 | - Thu Jan 19 16:00:00 2017
19 | 
20 | Aims
21 | ----
22 | - a customized logger for SLAM
23 | 
24 | """
25 | 
26 | # verbose level:
27 | # 1 debug
28 | # 2 info
29 | # 3 warning
30 | # 4 error
31 | # 5 critical
32 | 
33 | 
34 | import logging
35 | 
36 | # configuration
37 | format_slam = "[%(asctime)s] [%(module)s] %(levelname)s: %(message)s"
38 | logging.basicConfig(format=format_slam, level=0)
39 | 
40 | # create a logger for SLAM
41 | logger = logging.getLogger('SLAM')
42 | 
43 | # how to use it in other modules:
44 | # from .logger import logger
45 | # logger.info("msg")
46 | 


--------------------------------------------------------------------------------
/slam/mcmc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sat Sep 03 12:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sat Sep 03 12:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - Implement bayesian estimation of stars
 23 | 
 24 | """
 25 | 
 26 | import numpy as np
 27 | from emcee import EnsembleSampler
 28 | 
 29 | from .predict import predict_spectrum
 30 | 
 31 | 
 32 | __all__ = ['lnlike_gaussian', 'lnprior_uniform', 'lnprob',
 33 |            'predict_label_mcmc', 'predict_spectrum',
 34 |            'theta_between', 'check_chains', 'sampler_mcc']
 35 | 
 36 | eps = 1e-10  # Once flux_ivar < eps, these pixels are ignored
 37 | stablechain_corrcoef_threshold = 0.4
 38 | 
 39 | 
 40 | def lnlike_gaussian(theta, svrs, flux_obs, flux_ivar, mask):
 41 |     """ Gaussian likelihood function
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     theta: ndarray
 46 |         the stellar labels
 47 |     svrs: list
 48 |         a list of sklearn.svm.SVR objects
 49 |     flux_obs: ndarray
 50 |         the observed stellar spectrum
 51 |     flux_ivar: ndarray
 52 |         the inverse variance of observed spectrum
 53 |     mask:
 54 |         label scaler, i.e., tr_labels_scaler
 55 | 
 56 | 
 57 |     Returns
 58 |     -------
 59 |     Gaussian likelihood function
 60 | 
 61 |     NOTE
 62 |     ----
 63 |     since flux_ivar appears in denominator, 0 values are ignored
 64 | 
 65 |     """
 66 |     # determine good pixels
 67 |     # ind_good = flux_ivar > eps
 68 |     # flux_ivar[~ind_good] = eps
 69 | 
 70 |     # preprocessing is already done
 71 | 
 72 |     # predict spectrum
 73 |     flux_pred = predict_spectrum(svrs, theta, mask=mask)
 74 | 
 75 |     # Gaussian likelihood
 76 |     return - 0.5 * np.nansum((flux_obs - flux_pred) ** 2. * flux_ivar +
 77 |                              np.log(2. * np.pi / flux_ivar))
 78 | 
 79 | 
 80 | def lnprior_uniform(theta, theta_lb, theta_ub):
 81 |     """ loose uniform prior for theta
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     theta: ndarray
 86 |         the stellar labels
 87 |     theta_lb: 2-element ndarray
 88 |         the lower bound of theta
 89 |     theta_ub: 2-element ndarray
 90 |         the upper bound of theta
 91 | 
 92 |     """
 93 |     theta = np.array(theta)
 94 | 
 95 |     # if np.all(-np.inf < theta) and np.all(theta < np.inf):
 96 |     if np.all(theta_lb < theta) and np.all(theta < theta_ub):
 97 |         # reasonable theta
 98 |         return 0.
 99 |     # unreasonable theta
100 |     return -np.inf
101 | 
102 | 
103 | def lnprob(theta, svrs, flux_obs, flux_ivar, mask, theta_lb, theta_ub):
104 |     """ posterior probability function
105 | 
106 |     Parameters
107 |     ----------
108 |     theta : ndarray
109 |         the stellar labels
110 |     svrs: list
111 |         a list of sklearn.svm.SVR objects
112 |     flux_obs : ndarray
113 |         the observed stellar spectrum
114 |     flux_ivar : ndarray
115 |         the inverse variance of observed spectrum
116 |     mask: bool array
117 | 
118 | 
119 | 
120 |     Returns
121 |     -------
122 |     Gaussian likelihood function
123 | 
124 |     NOTE
125 |     ----
126 |     since flux_ivar appears in denominator, 0 values are ignored
127 | 
128 |     """
129 |     # calculate prior
130 |     lp = lnprior_uniform(theta, theta_lb, theta_ub)
131 | 
132 |     if not np.isfinite(lp):
133 |         # if prior is unreasonable (-inf), avoiding lnlike computing
134 |         return -np.inf
135 | 
136 |     # if prior is reasonable
137 |     lp += lnlike_gaussian(theta, svrs, flux_obs, flux_ivar, mask)
138 |     # print("theta: ", theta, "lp: ", lp)
139 |     return lp
140 | 
141 | 
142 | def predict_label_mcmc(theta0, svrs, flux_obs, flux_ivar, mask,
143 |                        theta_lb=None, theta_ub=None,
144 |                        n_walkers=10, n_burnin=200, n_run=500, threads=1,
145 |                        return_chain=False, mcmc_run_max_iter=5, mcc=0.4,
146 |                        prompt=None, **kwargs):
147 |     """ predict labels using emcee MCMC """
148 |     # theta length
149 |     n_dim = len(theta0)
150 | 
151 |     # default theta lower/upper bounds
152 |     if theta_lb is None:
153 |         theta_lb = np.ones_like(theta0) * -10.
154 |     if theta_ub is None:
155 |         theta_ub = np.ones_like(theta0) * 10.
156 | 
157 |     # instantiate EnsambleSampler
158 |     sampler = EnsembleSampler(n_walkers, n_dim, lnprob,
159 |                               args=(svrs, flux_obs, flux_ivar, mask,
160 |                                     theta_lb, theta_ub),
161 |                               threads=threads)  # **kwargs?
162 | 
163 |     # burn in
164 |     pos0 = [theta0 + np.random.uniform(-1, 1, size=(len(theta0),)) * 1.e-3
165 |             for _ in range(n_walkers)]
166 |     pos, prob, rstate = sampler.run_mcmc(pos0, n_burnin)
167 | 
168 |     # run mcmc
169 |     for i_run in range(mcmc_run_max_iter):
170 |         print("--------------------------------------------------------------")
171 |         print(prompt, " i_run : ", i_run)
172 |         print(prompt, " Current pos : \n", pos)
173 | 
174 |         # new position
175 |         pos_new, state, pos_best = check_chains(
176 |             sampler, pos, theta_lb, theta_ub, mode_list=['bounds'])
177 |         print(prompt, " New pos : ", pos_new)
178 |         print(prompt, " Best pos : ", pos_best)
179 | 
180 |         if np.any(np.logical_not(state)):
181 |             print(prompt, " Chain states : ", state)
182 |             print(
183 |             prompt, " RESET chain : ", np.arange(0, len(state) + 1)[state])
184 | 
185 |         # maximum correlation coefficients
186 |         mcc_qtl, mcc_mat = sampler_mcc(sampler)
187 |         # state_mcc = True --> not any out of threshold --> good chain
188 |         state_mcc = ~np.any(np.abs(mcc_qtl) >= mcc)
189 | 
190 |         print(prompt, " *** MCC quantiles *** : ", mcc_qtl)
191 |         # print(prompt, " MCC_MAT : -----------------------------------------")
192 |         # for i in range(mcc_mat.shape[2]):
193 |         #     print(prompt, " MCC_MAT[:,:,%s]: " % i, mcc_mat[:, :, i])
194 | 
195 |         # if chains are good, break and do statistics
196 |         if state_mcc and i_run > 0:
197 |             break
198 | 
199 |         # else continue running
200 |         sampler.reset()
201 |         pos, prob, rstate = sampler.run_mcmc(pos_new, n_run)
202 | 
203 |     print(prompt, ' state_mcc : ', state_mcc)
204 | 
205 |     # estimate percentiles
206 |     theta_est_mcmc = np.nanpercentile(sampler.flatchain, [15., 50., 85.], axis=0)
207 | 
208 |     # format of theta_est_mcmc:
209 |     # array([theta_p15,
210 |     #        theta_p50,
211 |     #        theta_p85])
212 |     # e.g.:
213 |     # array([[ 3.21908185,  5.66655696,  8.99618546],
214 |     #        [ 3.22411158,  5.68827311,  9.08791289],
215 |     #        [ 3.22909087,  5.71157073,  9.17812294]])
216 | 
217 |     # sampler is not returned, for saving memory
218 |     if return_chain:
219 |         result = {'theta': theta_est_mcmc,
220 |                   'state_mcc': state_mcc,
221 |                   'mcc_qtl': mcc_qtl,
222 |                   'mcc_mat': mcc_mat,
223 |                   'i_run': i_run,
224 |                   'flatchain': sampler.flatchain}
225 |     else:
226 |         result = {'theta': theta_est_mcmc,
227 |                   'state_mcc': state_mcc,
228 |                   'mcc_qtl': mcc_qtl,
229 |                   'mcc_mat': mcc_mat,
230 |                   'i_run': i_run}
231 | 
232 |     return result
233 |     # if not return_chain:
234 |     #     return theta_est_mcmc
235 |     # else:
236 |     #     return theta_est_mcmc, sampler.flatchain
237 | 
238 | 
239 | def theta_between(theta, theta_lb, theta_ub):
240 |     """ check if theta is between [theta_lb, theta_ub] """
241 |     state = np.all(theta.flatten() >= theta_lb.flatten()) and \
242 |             np.all(theta.flatten() <= theta_ub.flatten()) #and \
243 |             # np.all(np.isfinite())
244 |     return state
245 | 
246 | 
247 | def check_chains(sampler, pos, theta_lb, theta_ub,
248 |                  mode_list=['bounds']):
249 |     """ check chains
250 | 
251 |     1> reset out-of-bound chains
252 |     2> reset all chains to max likelihood neighbours
253 |     """
254 |     mode_all = ['bounds', 'reset_all']
255 | 
256 |     for mode in mode_list:
257 |         assert mode in mode_all
258 | 
259 |     n_walkers, n_step, n_dim = sampler.chain.shape
260 | 
261 |     # state of each chain
262 |     state = np.ones((n_walkers,), dtype=np.bool)
263 | 
264 |     # the best position
265 |     pos_best = sampler.flatchain[np.argsort(sampler.flatlnprobability)[-1]]
266 | 
267 |     # 'bounds' : chain pos should be between theta_lb, theta_ub
268 |     if 'bounds' in mode_list:
269 |         state = np.logical_and(state, np.array(
270 |             [theta_between(pos[i], theta_lb, theta_ub) for i in
271 |              range(n_walkers)]))
272 | 
273 |     # 'reset_all' : reset all chains
274 |     if 'reset_all' in mode_list:
275 |         state = np.logical_and(state,
276 |                                np.zeros((n_walkers,), dtype=np.bool))
277 | 
278 |     # determine new pos
279 |     pos_new = []
280 |     for i, state_ in enumerate(state):
281 |         if not state_:
282 |             # state_ = False, reset
283 |             pos_new.append(pos_best +
284 |                            np.random.uniform(-1, 1,
285 |                                              size=pos_best.shape) * 1.e-3)
286 |         else:
287 |             pos_new.append(pos[i])
288 | 
289 |     return np.array(pos_new), state, pos_best
290 | 
291 | 
292 | # IMPORTANT : this function is designed to implement "adaptive burn in length"
293 | def sampler_mcc(sampler):
294 |     """ calculate correlation coefficient matrix of chains
295 | 
296 |     Parameters
297 |     ----------
298 |     sampler : emcee.EnsembleSampler instance
299 |         sampler
300 | 
301 |     Returns
302 |     -------
303 |     mcc_qtl : ndarray [3,]
304 |         the [25, 50, 75] th percentiles of coefs
305 |     coefs : ndarray [n_chain, n_chain, n_dim]
306 |         the corrcoef between each pair of chains
307 | 
308 |     """
309 |     n_chain = sampler.k
310 | 
311 |     # correlation coefficient matrix
312 |     coefs = chain_corrcoef(sampler)
313 |     # set diagonal to np.nan
314 |     for idim in range(coefs.shape[2]):
315 |         for ichain in range(n_chain):
316 |             coefs[ichain, ichain, idim] = np.nan
317 | 
318 |     # correlation coefficient quantile
319 |     mcc_qtl = np.nanpercentile(coefs, [25., 50., 75.])
320 | 
321 |     # return quantiles
322 |     return mcc_qtl, coefs
323 | 
324 | 
325 | def chain_corrcoef(sampler):
326 |     """ calculate correlation coefficients of chains
327 | 
328 |     Parameters
329 |     ----------
330 |     sampler: emcee.EnsembleSampler
331 |         MCMC flatchain
332 | 
333 |     Returns
334 |     -------
335 |     coefs : ndarray [n_chain, n_chain, n_dim]
336 |         the corrcoef between each pair of chains
337 | 
338 |     """
339 |     n_chain = sampler.k
340 |     n_dim = sampler.dim
341 | 
342 |     coefs = np.zeros((n_chain, n_chain, n_dim))
343 |     for i in range(n_chain):
344 |         for j in range(n_chain):
345 |             for k in range(n_dim):
346 |                 coefs[i, j, k] = np.corrcoef(sampler.chain[i, :, k],
347 |                                              sampler.chain[j, :, k])[1, 0]
348 |     return coefs
349 | 
350 | 
351 | # deprecated
352 | def flatchain_mean_std_check(fchain, fprob, n_step, theta_lb, theta_ub):
353 |     """ calculate correlation coefficients of chains
354 | 
355 |     Parameters
356 |     ----------
357 |     fchain : ndarray [n_step*n_chain, n_dim]
358 |         MCMC flatchain
359 |     n_step : int
360 |         number of steps of each chain
361 | 
362 |     Returns
363 |     -------
364 |     coefs : ndarray [n_chain, n_chain, n_dim]
365 |         the corrcoef between each pair of chains
366 | 
367 |     """
368 |     n_chain = fchain.shape[0] / n_step
369 |     # n_dim = fchain.shape[1]
370 | 
371 |     # mean & std
372 |     m, s = flatchain_mean_std(fchain, n_step)
373 |     mm = np.median(m, axis=0)  # median of means of chains
374 |     mbest = fchain[np.argsort(fprob)[-1]]  # largest lnprob value
375 | 
376 |     # assume that only a few chains are bad
377 |     # if theta between [theta_lb, theta_ub]
378 |     # within 3sigma can hit median(mean) of all chains
379 |     # => this is a good chain
380 |     bad_chain_mask = np.zeros((n_chain,), dtype=bool)
381 |     for i_chain, m_ in enumerate(m):
382 |         if np.all(m_ > theta_lb) and np.all(m_ < theta_ub) \
383 |                 and np.all(np.abs(m_ - mbest) < s * 3.):
384 |             continue
385 |         else:
386 |             bad_chain_mask[i_chain] = True
387 | 
388 |     # return bad chain mask
389 |     # suppose mm is a good position
390 |     return bad_chain_mask, mm, mbest
391 | 
392 | 
393 | # deprecated
394 | def flatchain_mean_std(fchain, n_step):
395 |     """ calculate correlation coefficients of chains
396 | 
397 |     Parameters
398 |     ----------
399 |     fchain : ndarray [n_step*n_chain, n_dim]
400 |         MCMC flatchain
401 |     n_step : int
402 |         number of steps of each chain
403 | 
404 |     Returns
405 |     -------
406 |     coefs : ndarray [n_chain, n_chain, n_dim]
407 |         the corrcoef between each pair of chains
408 | 
409 |     """
410 |     n_chain = fchain.shape[0] / n_step
411 |     n_dim = fchain.shape[1]
412 | 
413 |     m = np.zeros((n_chain, n_dim))
414 |     s = np.zeros((n_chain, n_dim))
415 | 
416 |     for i in range(n_chain):
417 |         ind_i = np.arange(i * n_step, (i + 1) * n_step)
418 |         m[i] = np.mean(fchain[ind_i], axis=0)
419 |         s[i] = np.std(fchain[ind_i], axis=0)
420 | 
421 |     return m, s
422 | 
423 | 
424 | # deprecated
425 | def flatchain_corrcoef_mean(fchain, n_step):
426 |     """ calculate correlation coefficients of chains
427 | 
428 |     Parameters
429 |     ----------
430 |     fchain : ndarray [n_step*n_chain, n_dim]
431 |         MCMC flatchain
432 |     n_step : int
433 |         number of steps of each chain
434 | 
435 |     Returns
436 |     -------
437 |     coefs : ndarray [n_chain, n_chain, n_dim]
438 |         the corrcoef between each pair of chains
439 | 
440 |     """
441 |     n_chain = fchain.shape[0] / n_step
442 | 
443 |     coefs = chain_corrcoef(fchain, n_step)
444 | 
445 |     return np.mean(coefs) - 1. / n_chain


--------------------------------------------------------------------------------
/slam/model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Thu Feb 16 17:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Thu Feb 16 17:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - SlamModel
 23 | 
 24 | """
 25 | 
 26 | 
 27 | from abc import ABCMeta, abstractmethod
 28 | 
 29 | import numpy as np
 30 | from sklearn.neural_network import MLPRegressor
 31 | from sklearn.svm import SVR, NuSVR
 32 | from sklearn.tree import DecisionTreeRegressor
 33 | from sklearn.model_selection import GridSearchCV
 34 | from sklearn import ensemble
 35 | from sklearn.model_selection import cross_val_score
 36 | from sklearn.metrics import mean_squared_error
 37 | 
 38 | 
 39 | METHOD_ALL = ["simple", "grid"]
 40 | 
 41 | MODEL_ALL = ["svr", "nusvr", "nn", "dt"]
 42 | MODEL_MAP = {'svr': SVR,
 43 |              'nusvr': NuSVR,
 44 |              'nn': MLPRegressor,
 45 |              # the three types above are recommended
 46 |              'dt': DecisionTreeRegressor,
 47 |              'abr': ensemble.AdaBoostRegressor,
 48 |              'br': ensemble.BaggingRegressor,
 49 |              'etr': ensemble.ExtraTreesRegressor,
 50 |              'gbr': ensemble.GradientBoostingRegressor,
 51 |              'rfr': ensemble.RandomForestRegressor
 52 |              }
 53 | 
 54 | SCORING_ALL = ["neg_mean_absolute_error",
 55 |                "neg_mean_squared_error",
 56 |                "neg_median_absolute_error"
 57 |                "r2"]
 58 | 
 59 | 
 60 | class SlamModel(object):
 61 | 
 62 |     regressor = None
 63 |     model = "svr"
 64 |     method = ""
 65 |     cv = 1
 66 |     scoring = "neg_mean_squared_error"
 67 | 
 68 |     # trained
 69 |     trained = False
 70 | 
 71 |     # GridSearchCV attributes
 72 |     cv_results_ = None
 73 | 
 74 |     def __init__(self, model="nn", method="grid",
 75 |                  param_grid=None, cv=8, scoring="neg_mean_squared_error",
 76 |                  **kwargs):
 77 | 
 78 |         try:
 79 |             assert model in MODEL_ALL
 80 |         except AssertionError as ae:
 81 |             print("@SlamModel: invalid kind!")
 82 |             raise ae
 83 | 
 84 |         try:
 85 |             assert method in METHOD_ALL
 86 |         except AssertionError as ae:
 87 |             print("@SlamModel: invalid method!")
 88 |             raise ae
 89 | 
 90 |         self.model = model
 91 |         self.method = method
 92 |         self.param_grid = param_grid
 93 |         self.cv = np.int(cv)
 94 |         self.scoring = scoring
 95 |         self.score_ = 0
 96 | 
 97 |         if self.method == "simple":
 98 |             self.regressor = MODEL_MAP[model](**kwargs)
 99 |         elif self.method == "grid":
100 |             assert param_grid is not None
101 |             assert self.cv > 2
102 |             assert self.scoring in SCORING_ALL
103 | 
104 |             self.regressor = GridSearchCV(MODEL_MAP[model](**kwargs),
105 |                                           cv=self.cv, param_grid=param_grid,
106 |                                           scoring=self.scoring)
107 | 
108 |     def update(self):
109 |         sm = SlamModel()
110 |         sm.regressor = self.regressor
111 |         sm.model = self.model
112 |         sm.method = self.method
113 |         sm.cv = self.cv
114 |         sm.scoring = self.scoring
115 |         sm.trained = self.trained
116 |         sm.cv_results_ = self.cv_results_
117 | 
118 |         return sm
119 | 
120 |     def eval_score(self, X, y, sample_weight=None):
121 |         if self.method == "grid":
122 |             self.score_ = self.best_score_
123 |         else:
124 |             if self.model == "nn":
125 |                 self.score_ = self.regressor.score(X, y)
126 |             else:
127 |                 self.score_ = self.regressor.score(X, y, sample_weight)
128 | 
129 |     def score(self, *args, **kwargs):
130 |         return self.regressor.score(*args, **kwargs)
131 | 
132 |     def cross_val_score(self, X, y):
133 |         """ retrun NMSE if cv<2 """
134 |         if self.cv < 2:
135 |             return mean_squared_error(y, self.predict(X), None)
136 |         else:
137 |             return cross_val_score(self.regressor, X, y, cv=self.cv,
138 |                                    scoring=self.scoring).mean()
139 | 
140 |     def fit(self, X, y, weight=None):
141 |         mock = False
142 |         if weight is None:
143 |             # support weight
144 |             if self.model == "nn":
145 |                 self.regressor.fit(X, y)
146 |             else:
147 |                 self.regressor.fit(X, y)
148 | 
149 |             if self.method == "grid":
150 |                 self.score_ = self.regressor.best_score_
151 |             else:
152 |                 self.score_ = self.score(X, y)
153 | 
154 |         else:
155 |             ind_weight = weight > 0
156 |             X_ = X[ind_weight]
157 |             y_ = y[ind_weight]
158 |             weight_ = weight[ind_weight]
159 | 
160 |             if np.sum(ind_weight) < self.cv:
161 |                 X_ = np.zeros((self.cv, X.shape[1]), float)
162 |                 y_ = np.zeros((self.cv,), float)
163 |                 weight_ = np.zeros((self.cv,), float)
164 |                 mock = True
165 | 
166 |             # support weight
167 |             if self.model == "nn":
168 |                 self.regressor.fit(X_, y_)
169 |                 if self.method == "grid":
170 |                     self.score_ = self.regressor.best_score_
171 |                 else:
172 |                     self.score_ = self.cross_val_score(X_, y_)
173 | 
174 |             else:
175 |                 self.regressor.fit(X_, y_, sample_weight=weight_)
176 | 
177 |                 if self.method == "grid":
178 |                     self.score_ = self.regressor.best_score_
179 |                 else:
180 |                     self.score_ = self.cross_val_score(X_, y_, weight_)
181 | 
182 |         self.trained = True
183 |         if mock:
184 |             self.score_ = 1.0
185 | 
186 |         return
187 | 
188 |     def predict(self, X):
189 |         return self.regressor.predict(X)
190 | 
191 |     @staticmethod
192 |     def train(X, y, sample_weight=None, model="nn", method="grid",
193 |               param_grid=None, cv=8, scoring="neg_mean_squared_error",
194 |               **kwargs):
195 |         """ train a single pixel using GridSearchCV
196 | 
197 |         Parameters
198 |         ----------
199 |         X: ndarray with shape (n_obs x n_dim)
200 |             X in sklearn notation
201 |         y: ndarray with shape (n_obs, ) --> 1D
202 |             y in sklearn notation
203 |         sample_weight: ndarray with shape (n_obs, ) --> 1D
204 |             weight for sample data
205 |         model:
206 |             model type
207 |         method:
208 |             "simple" | "grid"
209 |         param_grid: dict
210 |             key, value pairs of hyper-parameter grids
211 |             >>> param_grid = dict(C=2. ** np.arange(-5., 6.),
212 |             >>>                   epsilon=[0.01, 0.05, 0.1, 0.15],
213 |             >>>                   gamma=['auto', 0.2, 0.25, 0.3, 0.5])
214 |         cv: int / None
215 |             if cv>=3, Cross-Validation will be performed to calculate MSE
216 |         scoring:
217 |             the scoring scheme of cross validation
218 |         kwargs:
219 |             extra kwargs will be passed to svm.SVR() method
220 |             e.g., C=1.0, gamma='auto', epsilon=0.1
221 | 
222 |         Returns
223 |         -------
224 |         svm.SVR() instance & best hyper-parameters & score
225 |         if CV is not performed, score = np.nan
226 | 
227 |         """
228 |         sm = SlamModel(model=model, method=method,
229 |                        param_grid=param_grid, cv=cv, scoring=scoring,
230 |                        **kwargs)
231 | 
232 |         # fit data
233 |         sm.fit(X, y, sample_weight)
234 | 
235 |         return sm, sm.score_
236 | 
237 | 
238 | class Model(object):
239 |     __metaclass__ = ABCMeta
240 | 
241 |     @abstractmethod
242 |     def train(self):
243 |         pass
244 | 
245 |     @abstractmethod
246 |     def predict_single_spectrum(self):
247 |         pass
248 | 
249 |     @abstractmethod
250 |     def predict_multi_spectra(self):
251 |         pass
252 | 
253 | 
254 | def nmse(model, X, y, sample_weight=None):
255 |     """ return NMSE for svr, X, y and sample_weight """
256 |     if sample_weight is None:
257 |         sample_weight = np.ones_like(y, int)
258 | 
259 |     ind_use = sample_weight > 0
260 |     if np.sum(ind_use) > 0:
261 |         X_ = X[ind_use]
262 |         y_ = y[ind_use]
263 |         # sample_weight_ = sample_weight[ind_use]
264 |         return -np.mean(np.square(model.predict(X_) - y_))
265 |     else:
266 |         return 0.


--------------------------------------------------------------------------------
/slam/normalization.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sat Sep 03 12:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sat Sep 03 12:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - normalization
 23 | 
 24 | """
 25 | from __future__ import division
 26 | 
 27 | import numpy as np
 28 | from scipy.interpolate import interp1d
 29 | from joblib import Parallel, delayed
 30 | 
 31 | from .extern.interpolate import SmoothSpline
 32 | 
 33 | print("slam.normalization module will be deprecated in future")
 34 | 
 35 | 
 36 | def normalize_spectrum_null(wave):
 37 |     return np.ones_like(wave)*np.nan, np.ones_like(wave)*np.nan
 38 | 
 39 | 
 40 | def normalize_spectrum(wave, flux, norm_range, dwave,
 41 |                        p=(1E-6, 1E-6), q=0.5, ivar=None, eps=1e-10,
 42 |                        rsv_frac=1.):
 43 |     """ A double smooth normalization of a spectrum
 44 | 
 45 |     Converted from Chao Liu's normSpectrum.m
 46 |     Updated by Bo Zhang
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     wave: ndarray (n_pix, )
 51 |         wavelegnth array
 52 |     flux: ndarray (n_pix, )
 53 |         flux array
 54 |     norm_range: tuple
 55 |         a tuple consisting (wave_start, wave_stop)
 56 |     dwave: float
 57 |         binning width
 58 |     p: tuple of 2 ps
 59 |         smoothing parameter between 0 and 1:
 60 |         0 -> LS-straight line
 61 |         1 -> cubic spline interpolant
 62 |     q: float in range of [0, 100]
 63 |         percentile, between 0 and 1
 64 |     ivar: ndarray (n_pix, ) | None
 65 |         ivar array, default is None
 66 |     eps: float
 67 |         the ivar threshold
 68 |     rsv_frac: float
 69 |         the fraction of pixels reserved in terms of std. default is 3.
 70 | 
 71 |     Returns
 72 |     -------
 73 |     flux_norm: ndarray
 74 |         normalized flux
 75 |     flux_cont: ndarray
 76 |         continuum flux
 77 | 
 78 |     Example
 79 |     -------
 80 |     >>> flux_norm, flux_cont = normalize_spectrum(
 81 |     >>>     wave, flux, (4000., 8000.), 100., p=(1E-8, 1E-7), q=0.5,
 82 |     >>>     rsv_frac=2.0)
 83 | 
 84 |     """
 85 |     if np.sum(np.logical_and(np.isfinite(flux), flux > 0)) <= 100:
 86 |         return normalize_spectrum_null(wave)
 87 | 
 88 |     if ivar is not None:
 89 |         # ivar is set
 90 |         ivar = np.where(np.logical_or(wave < norm_range[0],
 91 |                                       wave > norm_range[1]), 0, ivar)
 92 |         ivar = np.where(ivar <= eps, eps, ivar)
 93 |         # mask = ivar <= eps
 94 |         var = 1. / ivar
 95 |     else:
 96 |         # default config is even weight
 97 |         var = np.ones_like(flux)
 98 | 
 99 |     # wave = wave[~mask]
100 |     # flux = flux[~mask]
101 | 
102 |     # check q region
103 |     assert 0. < q < 1.
104 | 
105 |     # n_iter = len(p)
106 |     n_bin = np.int(np.fix(np.diff(norm_range) / dwave) + 1)
107 |     wave1 = norm_range[0]
108 | 
109 |     # SMOOTH 1
110 |     # print(wave.shape, flux.shape, var.shape)
111 |     if ivar is not None:
112 |         ind_good_init = 1. * (ivar > 0.) * (flux > 0.)
113 |     else:
114 |         ind_good_init = 1. * (flux > 0.)
115 |     ind_good_init = ind_good_init.astype(np.bool)
116 |     # print("@Cham: sum(ind_good_init)", np.sum(ind_good_init))
117 | 
118 |     flux_smoothed1 = SmoothSpline(wave[ind_good_init], flux[ind_good_init],
119 |                                   p=p[0], var=var[ind_good_init])(wave)
120 |     dflux = flux - flux_smoothed1
121 | 
122 |     # collecting continuum pixels --> ITERATION 1
123 |     ind_good = np.zeros(wave.shape, dtype=np.bool)
124 |     for i_bin in range(n_bin):
125 |         ind_bin = np.logical_and(wave > wave1 + (i_bin - 0.5) * dwave,
126 |                                  wave <= wave1 + (i_bin + 0.5) * dwave)
127 |         if np.sum(ind_bin > 0):
128 |             # median & sigma
129 |             bin_median = np.median(dflux[ind_bin])
130 |             bin_std = np.median(np.abs(dflux - bin_median))
131 |             # within 1 sigma with q-percentile
132 |             ind_good_ = ind_bin * (
133 |                 np.abs(dflux - np.nanpercentile(dflux[ind_bin], q * 100.)) < (
134 |                 rsv_frac * bin_std))
135 |             ind_good = np.logical_or(ind_good, ind_good_)
136 | 
137 |     ind_good = np.logical_and(ind_good, ind_good_init)
138 |     # assert there is continuum pixels
139 |     try:
140 |         assert np.sum(ind_good) > 0
141 |     except AssertionError:
142 |         Warning("@Keenan.normalize_spectrum(): unable to find continuum!")
143 |         ind_good = np.ones(wave.shape, dtype=np.bool)
144 | 
145 |     # SMOOTH 2
146 |     # continuum flux
147 |     flux_smoothed2 = SmoothSpline(
148 |         wave[ind_good], flux[ind_good], p=p[1], var=var[ind_good])(wave)
149 |     # normalized flux
150 |     flux_norm = flux / flux_smoothed2
151 | 
152 |     return flux_norm, flux_smoothed2
153 | 
154 | 
155 | def normalize_spectrum_iter(wave, flux, p=1E-6, q=0.5, lu=(-1, 1), binwidth=30,
156 |                             niter=5):
157 |     """ A double smooth normalization of a spectrum
158 | 
159 |     Converted from Chao Liu's normSpectrum.m
160 |     Updated by Bo Zhang
161 | 
162 |     Parameters
163 |     ----------
164 |     wave: ndarray (n_pix, )
165 |         wavelegnth array
166 |     flux: ndarray (n_pix, )
167 |         flux array
168 |     p: float
169 |         smoothing parameter between 0 and 1:
170 |         0 -> LS-straight line
171 |         1 -> cubic spline interpolant
172 |     q: float in range of [0, 100]
173 |         percentile, between 0 and 1
174 |     lu: float tuple
175 |         the lower & upper exclusion limits
176 |     binwidth: float
177 |         width of each bin
178 |     niter: int
179 |         number of iterations
180 |     Returns
181 |     -------
182 |     flux_norm: ndarray
183 |         normalized flux
184 |     flux_cont: ndarray
185 |         continuum flux
186 | 
187 |     Example
188 |     -------
189 |     >>> fnorm,fcont=normalize_spectrum_iter(
190 |     >>>     wave, flux, p=1e-6,q=0.6, binwidth=200, lu=(-1,5),niter=niter)
191 | 
192 |     """
193 |     if np.sum(np.logical_and(np.isfinite(flux), flux > 0)) <= 10:
194 |         return normalize_spectrum_null(wave)
195 | 
196 |     # default config is even weight
197 |     var = np.ones_like(flux)
198 | 
199 |     # check q region
200 |     # assert 0. <= q <= 1.
201 | 
202 |     nbins = np.int(np.ceil((wave[-1] - wave[0]) / binwidth) + 1)
203 |     bincenters = np.linspace(wave[0], wave[-1], nbins)
204 | 
205 |     # iteratively smoothing
206 |     ind_good = np.isfinite(flux)
207 |     for _ in range(niter):
208 | 
209 |         flux_smoothed1 = SmoothSpline(wave[ind_good], flux[ind_good],
210 |                                       p=p, var=var[ind_good])(wave)
211 |         # residual
212 |         res = flux - flux_smoothed1
213 | 
214 |         # determine sigma
215 |         stdres = np.zeros(nbins)
216 |         for ibin in range(nbins):
217 |             ind_this_bin = ind_good & (np.abs(wave-bincenters[ibin]) <= binwidth)
218 |             if 0 <= q <= 0:
219 |                 stdres[ibin] = np.std(
220 |                     res[ind_this_bin] - np.percentile(res[ind_this_bin], 100 * q))
221 |             else:
222 |                 stdres[ibin] = np.std(res[ind_this_bin])
223 |         stdres_interp = interp1d(bincenters, stdres, kind="linear")(wave)
224 |         if 0 <= q <= 1:
225 |             res1 = (res - np.percentile(res, 100 * q)) / stdres_interp
226 |         else:
227 |             res1 = res / stdres_interp
228 |         ind_good = ind_good & (res1 > lu[0]) & (res1 < lu[1])
229 | 
230 |         # assert there is continuum pixels
231 |         try:
232 |             assert np.sum(ind_good) > 0
233 |         except AssertionError:
234 |             Warning("@normalize_spectrum_iter: unable to find continuum!")
235 |             ind_good = np.ones(wave.shape, dtype=np.bool)
236 | 
237 |     # final smoothing
238 |     flux_smoothed2 = SmoothSpline(
239 |         wave[ind_good], flux[ind_good], p=p, var=var[ind_good])(wave)
240 |     # normalized flux
241 |     flux_norm = flux / flux_smoothed2
242 | 
243 |     return flux_norm, flux_smoothed2
244 | 
245 | 
246 | def normalize_spectra_block(wave, flux_block, norm_range, dwave,
247 |                             p=(1E-6, 1E-6), q=0.5, ivar_block=None, eps=1e-10,
248 |                             rsv_frac=3., n_jobs=1, verbose=10):
249 |     """ normalize multiple spectra using the same configuration
250 |     This is specially designed for TheKeenan
251 | 
252 |     Parameters
253 |     ----------
254 |     wave: ndarray (n_pix, )
255 |         wavelegnth array
256 |     flux_block: ndarray (n_obs, n_pix)
257 |         flux array
258 |     norm_range: tuple
259 |         a tuple consisting (wave_start, wave_stop)
260 |     dwave: float
261 |         binning width
262 |     p: tuple of 2 ps
263 |         smoothing parameter between 0 and 1:
264 |         0 -> LS-straight line
265 |         1 -> cubic spline interpolant
266 |     q: float in range of [0, 100]
267 |         percentile, between 0 and 1
268 |     ivar_block: ndarray (n_pix, ) | None
269 |         ivar array, default is None
270 |     eps: float
271 |         the ivar threshold
272 |     rsv_frac: float
273 |         the fraction of pixels reserved in terms of std. default is 3.
274 |     n_jobs: int
275 |         number of processes launched by joblib
276 |     verbose: int / bool
277 |         verbose level
278 | 
279 |     Returns
280 |     -------
281 |     flux_norm_block: ndarray
282 |         normalized flux
283 | 
284 |     flux_cont_block: ndarray
285 |         continuum flux
286 | 
287 |     """
288 |     if ivar_block is None:
289 |         ivar_block = np.ones_like(flux_block)
290 | 
291 |     if flux_block.ndim == 1:
292 |         flux_block.reshape(1, -1)
293 |     n_spec = flux_block.shape[0]
294 | 
295 |     results = Parallel(n_jobs=n_jobs, verbose=verbose)(
296 |         delayed(normalize_spectrum)(
297 |             wave, flux_block[i], norm_range, dwave, p=p, q=q,
298 |             ivar=ivar_block[i], eps=eps, rsv_frac=rsv_frac)
299 |         for i in range(n_spec))
300 | 
301 |     # unpack results
302 |     flux_norm_block = []
303 |     flux_cont_block = []
304 |     for result in results:
305 |         flux_norm_block.append(result[0])
306 |         flux_cont_block.append(result[1])
307 | 
308 |     return np.array(flux_norm_block), np.array(flux_cont_block)
309 | 
310 | 
311 | def get_stable_pixels(pixel_disp, wave_arm=100, frac=0.20):
312 |     """
313 | 
314 |     Parameters
315 |     ----------
316 |     pixel_disp: np.ndarray
317 |         dispersion array
318 |     wave_arm: int
319 |         the arm length in terms of pixels
320 |     frac: float
321 |         the reserved fraction, between 0.00 and 1.00
322 | 
323 |     Returns
324 |     -------
325 |     ind_stable
326 | 
327 |     """
328 |     ind_stable = np.zeros_like(pixel_disp, dtype=np.bool)
329 | 
330 |     for i in range(len(ind_stable)):
331 |         edge_l = np.max([i - wave_arm, 0])
332 |         edge_r = np.min([i + wave_arm, len(pixel_disp)])
333 |         if pixel_disp[i] <= \
334 |                 np.percentile(pixel_disp[edge_l:edge_r], frac * 100.):
335 |             ind_stable[i] = True
336 | 
337 |     return ind_stable
338 | 
339 | 
340 | # TODO: this is a generalized version
341 | def normalize_spectra(wave_flux_tuple_list, norm_range, dwave,
342 |                       p=(1E-6, 1E-6), q=50, n_jobs=1, verbose=False):
343 |     """ normalize multiple spectra using the same configuration
344 | 
345 |     Parameters
346 |     ----------
347 |     wave_flux_tuple_list: list[n_obs]
348 |         a list of (wave, flux) tuple
349 |     norm_range: tuple
350 |         a tuple consisting (wave_start, wave_stop)
351 |     dwave: float
352 |         binning width
353 |     p: tuple of 2 ps
354 |         smoothing parameter between 0 and 1:
355 |         0 -> LS-straight line
356 |         1 -> cubic spline interpolant
357 |     q: float in range of [0, 100]
358 |         percentile, between 0 and 1
359 |     n_jobs: int
360 |         number of processes launched by joblib
361 |     verbose: int / bool
362 |         verbose level
363 | 
364 |     Returns
365 |     -------
366 |     flux_norm: ndarray
367 |         normalized flux
368 | 
369 |     """
370 |     pass
371 | 
372 | 
373 | # def test_normaliza_spectra_block():
374 | #     import os
375 | #
376 | #     os.chdir('/pool/projects/TheKeenan/data/TheCannonData')
377 | #
378 | #     from TheCannon import apogee
379 | #     import matplotlib.pyplot as plt
380 | #
381 | #     tr_ID, wl, tr_flux, tr_ivar = apogee.load_spectra("example_DR10/Data")
382 | #     tr_label = apogee.load_labels("example_DR10/reference_labels.csv")
383 | #
384 | #     test_ID = tr_ID
385 | #     test_flux = tr_flux
386 | #     test_ivar = tr_ivar
387 | #
388 | #     r = normalize_spectra_block(wl, tr_flux, (15200., 16900.), 30., q=0.9,
389 | #                                 rsv_frac=0.5,
390 | #                                 p=(1E-10, 1E-10), ivar_block=tr_ivar,
391 | #                                 n_jobs=10, verbose=10)
392 | #
393 | #     flux_norm, flux_cont = r
394 | #     flux_norm = np.array(flux_norm)
395 | #     flux_cont = np.array(flux_cont)
396 | #     flux_ivar = tr_ivar * flux_cont ** 2
397 | #
398 | #     fig = plt.figure()
399 | #     ax = fig.add_subplot(111)
400 | #     for i in range(10, 20):
401 | #         ofst = i * 0.5
402 | #         ax.plot(wl, tr_flux[i] + ofst, 'b')
403 | #         ax.plot(wl, flux_cont[i] + ofst, 'r')
404 | #     fig.tight_layout()
405 | #     fig.savefig(
406 | #         '/pool/projects/TheKeenan/data/TheCannonData/test_norm_spec_1.pdf')
407 | 
408 | 
409 | if __name__ == '__main__':
410 |     pass
411 |     # test_normaliza_spectra_block()
412 | 


--------------------------------------------------------------------------------
/slam/parallel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sun Jun 25 13:00:00 2017
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sun Jun 25 13:00:00 2017
 19 | 
 20 | Aims
 21 | ----
 22 | - utils for computing in parallel
 23 | 
 24 | """
 25 | 
 26 | from copy import deepcopy
 27 | import numpy as np
 28 | from ipyparallel import Client
 29 | 
 30 | 
 31 | def launch_ipcluster_dv(profile="default", targets="all", block=True, max_engines=None):
 32 |     # initiate ipcluster
 33 |     rc = Client(profile=profile)
 34 | 
 35 |     # print ipcluster information
 36 |     n_proc = len(rc.ids)
 37 |     if targets == "all":
 38 |         targets = rc.ids
 39 | 
 40 |     dv = rc.direct_view(targets=targets)
 41 | 
 42 |     # check number of engines
 43 |     # print(rc.ids, dv.targets, targets, max_engines)
 44 |     if max_engines is not None:
 45 |         if len(dv.targets) > max_engines:
 46 |             targets = deepcopy(dv.targets)
 47 |             np.random.shuffle(targets)
 48 |             targets = targets[:max_engines]
 49 |             targets.sort()
 50 | 
 51 |             dv = rc.direct_view(targets=targets)
 52 | 
 53 |     print("===================================================")
 54 |     print("@Slam: ipcluster[{}, n_engines={}/{}]".format(
 55 |         profile, len(dv.targets), n_proc))
 56 |     print("---------------------------------------------------")
 57 | 
 58 |     dv.block = block
 59 | 
 60 |     # import basic modules in ipcluster
 61 |     dv.execute("import os")
 62 |     dv.execute("import numpy as np")
 63 |     dv.execute("from joblib import Parallel, delayed, dump, load")
 64 | 
 65 |     # print host information
 66 |     dv.execute("host_names = os.uname()[1]").get()
 67 |     u_host_names, u_counts = np.unique(
 68 |         dv["host_names"], return_counts=True)
 69 |     for i in range(len(u_counts)):
 70 |         print("host: {} x {}".format(u_host_names[i], u_counts[i]))
 71 |     print("===================================================")
 72 | 
 73 |     return dv
 74 | 
 75 | 
 76 | def reset_dv(dv):
 77 |     dv.execute("import IPython\n"
 78 |                "ipy=IPython.get_ipython()\n"
 79 |                "ipy.run_line_magic(\"reset\", \" -f\")\n")
 80 |     return
 81 | 
 82 | 
 83 | def print_time_cost(dtime, unit_max="hour"):
 84 |     """ return string for delta_time """
 85 |     if dtime <= 60 * 1.5:
 86 |         dtime_str = "{:.3f} sec".format(dtime)
 87 |     elif dtime <= 60 * 60 * 1.5:
 88 |         dtime_str = "{:.3f} min".format(dtime / 60.)
 89 |     elif dtime <= (60 * 60 * 24 * 3):
 90 |         dtime_str = "{:.3f} hours".format(dtime / 3600.)
 91 |     else:
 92 |         # even larger
 93 |         if unit_max == "hour":
 94 |             dtime <= (60 * 60 * 24 * 3)
 95 |             dtime_str = "{:.3f} hours".format(dtime / 3600.)
 96 |         else:
 97 |             dtime_str = "{:.3f} days".format(dtime / 86400.)
 98 | 
 99 |     return dtime_str
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/slam/plotting.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sat Sep 03 16:00:00 2017
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sat Sep 03 12:00:00 2017
 19 | 
 20 | Aims
 21 | ----
 22 | - plotting tools
 23 | 
 24 | """
 25 | 
 26 | 
 27 | import numpy as np
 28 | import matplotlib.pyplot as plt
 29 | from scipy.stats import binned_statistic_2d
 30 | from matplotlib import cm
 31 | from lmfit.models import GaussianModel
 32 | from mpl_toolkits.mplot3d import Axes3D
 33 | 
 34 | from .analysis import label_diff_lmfit
 35 | 
 36 | 
 37 | def plot_mse(s):
 38 |     fig, ax = plt.subplots(1, 1, figsize=(8, 6), tight_layout=True)
 39 |     plt.hist(-s.nmse[s.nmse != 0], np.linspace(0, 1, 80), histtype='step',
 40 |              lw=2, label="MSE")
 41 |     plt.hist(-s.scores[s.nmse != 0], np.linspace(0, 1, 80), histtype='step',
 42 |              lw=2, label="CV MSE")
 43 |     ylim = plt.gca().get_ylim()
 44 |     plt.vlines(np.percentile(-s.nmse[s.nmse != 0], [14, 50, 86]), *ylim,
 45 |                linestyle='--', label="14, 50, 86 percentiles")
 46 |     plt.xlim(0, 1)
 47 |     plt.ylim(*ylim)
 48 |     plt.ylabel("Counts")
 49 |     plt.xlabel("MSE")
 50 |     fig.tight_layout()
 51 |     return fig
 52 | 
 53 | 
 54 | # ################ #
 55 | # image
 56 | # ################ #
 57 | def image(ax, x, y, xbins, ybins, log=True):
 58 |     plt.sca(ax)
 59 |     c, xe, ye, bn = binned_statistic_2d(x, y, x, statistic="count",
 60 |                                         bins=[xbins, ybins])
 61 |     if log:
 62 |         c = np.log10(c)
 63 |     plt.imshow(c.T, origin="lower", extent=(*xbins[[0, -1]], *ybins[[0, -1]]),
 64 |                cmap=cm.viridis, aspect="auto")
 65 |     return
 66 | 
 67 | 
 68 | def compare_labels(X_true, X_pred,
 69 |                    xlabels=None, ylabels=None, reslabels=None,
 70 |                    xlims=None, reslims=None,
 71 |                    histlim=None, nxb=30, cornerlabel="",
 72 |                    figsize=None):
 73 | 
 74 |     nlabel = X_true.shape[1]
 75 | 
 76 |     if xlabels is None:
 77 |         xlabels = ["$X_{{true}}:{}$".format(i) for i in range(nlabel)]
 78 |     if ylabels is None:
 79 |         ylabels = ["$X_{{pred}}:{}$".format(i) for i in range(nlabel)]
 80 |     if reslabels is None:
 81 |         reslabels = ["$X_{{res}}:{}$".format(i) for i in range(nlabel)]
 82 | 
 83 |     # default xlim
 84 |     if xlims is None:
 85 |         xlim1 = np.min(np.vstack((np.percentile(X_true, 1, axis=0),
 86 |                                   np.percentile(X_pred, 1, axis=0))), axis=0)
 87 |         xlim2 = np.min(np.vstack((np.percentile(X_true, 99, axis=0),
 88 |                                   np.percentile(X_pred, 99, axis=0))), axis=0)
 89 |         xlims = (xlim2 - xlim1).reshape(-1, 1) * 0.4 * np.array(
 90 |             [-1, 1]) + np.vstack((xlim1, xlim2)).T
 91 |     if reslims is None:
 92 |         reslims = np.repeat(
 93 |             np.max(np.abs(np.percentile(X_pred - X_true, [1, 99], axis=0).T),
 94 |                    axis=1).reshape(-1, 1), 2, axis=1) * np.array([-1, 1])
 95 |         reslims = np.abs(np.diff(reslims, axis=1)) * np.array(
 96 |             [-1, 1]) * 0.2 + reslims
 97 | 
 98 |     # run MCMC
 99 |     X_bias, X_scatter, frs, histdata = label_diff_lmfit(
100 |         X_true, X_pred, bins="auto", plot=False, emcee=True)
101 |     print("bias", X_bias)
102 |     print("scatter", X_scatter)
103 |     if histlim is None:
104 |         histlim = (0, np.max([np.max(histdata_[0]) for histdata_ in histdata]))
105 |     histlim = np.array(histlim)
106 | 
107 |     if figsize is None:
108 |         figsize = (3 * nlabel, 3 * nlabel)
109 | 
110 |     # draw figure
111 |     fig, axs2 = plt.subplots(nlabel+1, nlabel+1, figsize=figsize)
112 | 
113 |     # 1. Gaussian
114 |     gm = GaussianModel()
115 |     for i in range(nlabel):
116 |         plt.sca(axs2[i + 1, -1])
117 |         fr = frs[i]
118 |         hist_, bin_edge_, data_ = histdata[i]
119 |         plt.hist(data_, bins=bin_edge_, histtype="step",
120 |                  orientation="horizontal")
121 |         axs2[i + 1, -1].plot(gm.eval(fr.mcmc.params, x=bin_edge_), bin_edge_)
122 |         axs2[i + 1, -1].tick_params(direction='in', pad=5)
123 |         axs2[i + 1, -1].set_xlim(histlim)
124 |         axs2[i + 1, -1].set_ylim(reslims[i])
125 |         axs2[i + 1, -1].set_ylim(reslims[i])
126 |         axs2[i + 1, -1].yaxis.tick_right()
127 |         axs2[i + 1, -1].hlines(X_bias[i], *histlim, linestyle='--', color="k")
128 | 
129 |         pos_text_x = np.dot(np.array([[0.9, 0.1]]), histlim.reshape(-1, 1))
130 |         pos_text_y = np.dot(np.array([[0.15, 0.85]]),
131 |                             reslims[i].reshape(-1, 1))
132 |         axs2[i + 1, -1].text(pos_text_x, pos_text_y,
133 |                              "$bias={:.4f}$".format(X_bias[i]))
134 |         pos_text_x = np.dot(np.array([[0.9, 0.1]]), histlim.reshape(-1, 1))
135 |         pos_text_y = np.dot(np.array([[0.30, 0.70]]),
136 |                             reslims[i].reshape(-1, 1))
137 |         axs2[i + 1, -1].text(pos_text_x, pos_text_y,
138 |                              "$\\sigma={:.4f}$".format(X_scatter[i]))
139 | 
140 |         axs2[i + 1, -1].yaxis.tick_right()
141 | 
142 |         if i < nlabel-1:
143 |             axs2[i + 1, -1].set_xticklabels([])
144 | 
145 |     axs2[-1, -1].set_xlabel("Counts")
146 | 
147 |     # 2. diagnal
148 |     for i in range(nlabel):
149 |         image(axs2[0, i], X_true[:, i], X_pred[:, i],
150 |               np.linspace(xlims[i][0], xlims[i][1], nxb),
151 |               np.linspace(xlims[i][0], xlims[i][1], nxb))
152 |         axs2[0, i].set_xlim(*xlims[i])
153 |         axs2[0, i].set_ylim(*xlims[i])
154 |         axs2[0, i].tick_params(direction='in', pad=5)
155 |         axs2[0, i].set_xticklabels([])
156 |         axs2[0, i].set_ylabel(ylabels[i])
157 |         axs2[0, i].plot(xlims[i], xlims[i], 'k--')
158 | 
159 |     # 3. Xres vs X
160 |     X_res = X_pred - X_true
161 |     for i in range(nlabel):
162 |         for j in range(nlabel):
163 |             image(axs2[j + 1, i], X_true[:, i], X_res[:, j],
164 |                   np.linspace(xlims[i][0], xlims[i][1], nxb),
165 |                   np.linspace(reslims[j][0], reslims[j][1], nxb))
166 |             axs2[j + 1, i].set_xlim(*xlims[i])
167 |             axs2[j + 1, i].set_ylim(*reslims[j])
168 |             axs2[j + 1, i].tick_params(direction='in', pad=5)
169 | 
170 |             if j != nlabel - 1:
171 |                 axs2[j + 1, i].set_xticklabels([])
172 |             else:
173 |                 axs2[j + 1, i].set_xlabel(xlabels[i])
174 | 
175 |             if i != 0:
176 |                 axs2[j + 1, i].set_yticklabels([])
177 |             else:
178 |                 axs2[j + 1, i].set_ylabel(reslabels[j])
179 | 
180 |     axs2[0, -1].set_axis_off()
181 |     axs2[0, -1].text(np.mean(axs2[0, -1].get_xlim()),
182 |                      np.mean(axs2[0, -1].get_ylim()),
183 |                      cornerlabel, horizontalalignment='center',
184 |                      verticalalignment='center')
185 | 
186 |     fig.tight_layout()
187 |     plt.subplots_adjust(wspace=0., hspace=0.)
188 | 
189 |     return fig, frs
190 | 
191 | # gs1 = plt.GridSpec(1, 6, left=0.05, bottom=0.85, right=0.95, top=0.95, hspace=0.2, wspace=0)
192 | # gs2 = plt.GridSpec(7, 7, left=0.05, bottom=0.1, right=0.95, top=0.8, hspace=0, wspace=0)
193 | 
194 | # axs1 = np.array([fig.add_subplot(gs1[i]) for i in range(6)])
195 | # axs2 = np.array([[fig.add_subplot(gs2[j, i]) for i in range(7)] for j in range(7)])
196 | 
197 | 
198 | def visual3d(s, wave, diag_dim=()):
199 |     """ single pixel diagnostic """
200 | 
201 |     diag_dim = (0, 1)
202 | 
203 |     i_pixel = 5175 - 3900
204 |     # i_pixel = 4861-3900
205 |     x, y, flux = s.single_pixel_diagnostic(i_pixel, s.tr_labels,
206 |                                            diag_dim=diag_dim)
207 | 
208 |     fig = plt.figure(figsize=(10, 10))
209 |     ax = fig.add_subplot(111, projection='3d')
210 |     ax.scatter3D(s.tr_labels[:, 0], s.tr_labels[:, 1], s.tr_flux[:, i_pixel],
211 |                  s=10, c=s.tr_labels[:, 2], alpha=.5, cmap=cm.jet)
212 |     plt.colorbar()
213 |     # plt.plot(x,y, flux,'b.')
214 |     ax.set_zlim(0., 2.)
215 |     plt.xlabel('Teff')
216 |     plt.ylabel('logg')
217 |     plt.title('PIXEL: %s' % i_pixel)
218 |     fig.tight_layout()
219 | 
220 |     # %%
221 |     diag_dim = (0, 2)
222 | 
223 |     i_pixel = 6564 - 3900
224 |     i_pixel = 4861 - 3900
225 | 
226 |     sdiag_teff = np.arange(4000., 8000., 100.)
227 |     sdiag_logg = np.arange(1., 5., .2)
228 |     sdiag_logg = np.arange(-2, 1., .2)
229 |     msdiag_teff, msdiag_logg = np.meshgrid(sdiag_teff, sdiag_logg)
230 |     msdiag_feh = np.zeros_like(msdiag_teff)
231 |     msdiag_labels = np.array([msdiag_teff.flatten(),
232 |                               msdiag_logg.flatten(),
233 |                               msdiag_feh.flatten()]).T
234 | 
235 |     H, xedges, yedges = np.histogram2d(train_labels[:, 0], train_labels[:, 2],
236 |                                        bins=(sdiag_teff, sdiag_logg))
237 |     xcenters = (xedges[:-1] + xedges[1:]) / 2.
238 |     ycenters = (yedges[:-1] + yedges[1:]) / 2.
239 |     xmesh, ymesh = np.meshgrid(xcenters, ycenters)
240 | 
241 |     x, y, flux = s.single_pixel_diagnostic(i_pixel, msdiag_labels,
242 |                                            diag_dim=diag_dim)
243 | 
244 |     # flux[:20]=np.nan
245 | 
246 |     fig = plt.figure(figsize=(10, 10))
247 |     ax = fig.add_subplot(111, projection='3d')
248 |     # ax.scatter(x,y,flux, c=msdiag_feh)
249 |     surf = ax.plot_surface(msdiag_teff, msdiag_logg,
250 |                            flux.reshape(msdiag_teff.shape),
251 |                            vmin=np.min(flux), vmax=np.max(flux), cmap=cm.jet)
252 |     ax.contour(xmesh, ymesh, np.log(H.T), extend3d=False, offset=1.20,
253 |                color='k')
254 |     # plt.plot(x,y, flux,'b.')
255 |     ax.set_zlim(0., 2.)
256 |     plt.xlabel('Teff')
257 |     plt.ylabel('logg')
258 |     plt.title('PIXEL: %s' % i_pixel)
259 | 
260 |     fig.colorbar(surf, shrink=.5, aspect=5)
261 |     fig.tight_layout()
262 |     # fig.savefig("../data/laap/figs/PIXEL6564_C8_E0P08_G1.svg")


--------------------------------------------------------------------------------
/slam/postprocessing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | """
 4 | 
 5 | define error processing functions
 6 | 
 7 | """
 8 | 
 9 | 
10 | # jacobian matrix to covariance
11 | def jac_to_cov(jac):
12 |     from scipy.linalg import svd
13 |     _, s, VT = svd(jac, full_matrices=False)
14 |     threshold = np.finfo(float).eps * max(jac.shape) * s[0]
15 |     s = s[s > threshold]
16 |     VT = VT[:s.size]
17 |     pcov = np.dot(VT.T / s**2, VT)
18 |     return pcov
19 | 
20 | 
21 | # covariance to standard deviation
22 | def cov_to_err(cov):
23 |     return np.sqrt(np.diag(cov))
24 | 
25 | 
26 | # jacobian matrix to standard deviation
27 | def jac_to_err(jac):
28 |     from scipy.linalg import svd
29 |     _, s, VT = svd(jac, full_matrices=False)
30 |     threshold = np.finfo(float).eps * max(jac.shape) * s[0]
31 |     s = s[s > threshold]
32 |     VT = VT[:s.size]
33 |     pcov = np.dot(VT.T / s**2, VT)
34 |     return np.sqrt(np.diag(pcov))
35 | 
36 | 
37 | # inversed hessian matrix to standard deviation
38 | def hessinv_to_err(hess_inv):
39 |     return np.sqrt(np.diag(hess_inv))
40 | 
41 | 
42 | # least_squares[jac] --> jacobian matrix --> standard deviation
43 | def do_post(ls_result, label_scaler=None):
44 |     """
45 | 
46 |     Parameters
47 |     ----------
48 |     ls_result:
49 |         least_squares full output
50 |         https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.least_squares.html
51 | 
52 |     label_scaler:
53 |         SLAM label scale for training labels
54 | 
55 |     Returns
56 |     -------
57 |     pp_result:
58 |         post-processed result
59 | 
60 |     """
61 |     pp_result = dict()
62 |     pp_result["cost"] = ls_result["cost"]
63 |     pp_result["grad"] = ls_result["grad"]
64 | 
65 |     pp_result["pcov"] = jac_to_cov(ls_result["jac"]) * \
66 |                         np.dot(label_scaler.scale_.reshape(-1, 1), label_scaler.scale_.reshape(1, -1))
67 |     pp_result["pstd"] = cov_to_err(pp_result["pcov"])
68 | 
69 |     pp_result["message"] = ls_result["message"]
70 |     pp_result["nfev"] = ls_result["nfev"]
71 |     pp_result["optimality"] = ls_result["optimality"]
72 |     pp_result["status"] = ls_result["status"]
73 |     pp_result["success"] = ls_result["success"]
74 | 
75 |     pp_result["x"] = label_scaler.inverse_transform(ls_result["x"].reshape(1, -1)).flatten()
76 | 
77 |     return pp_result
78 | 


--------------------------------------------------------------------------------
/slam/predict.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sun Sep 04 16:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sun Sep 04 16:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - utils for training SVRs
 23 | 
 24 | """
 25 | 
 26 | import numpy as np
 27 | from scipy.optimize import minimize
 28 | from joblib import Parallel, delayed
 29 | 
 30 | # from .mcmc import lnlike_gaussian
 31 | from .costfunction import chi2_simple_1d
 32 | from scipy.optimize import leastsq, least_squares
 33 | from .postprocessing import do_post
 34 | 
 35 | 
 36 | def predict_pixel(svr, X_, mask=True):
 37 |     """ predict single pixels for a given wavelength
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     svr: sklearn.svm.SVR instance
 42 |         the pixel SVR to diagnostic
 43 |     X_: ndarray ( :, ndim )
 44 |         test_labels that will be evaluated
 45 |     mask: bool
 46 |         if True, evaluate
 47 |         if False, pass
 48 | 
 49 |     Returns
 50 |     -------
 51 |     y: ndarray
 52 |         predicted flux
 53 | 
 54 |     """
 55 |     assert X_.ndim == 2
 56 | 
 57 |     # print('mask: ', mask)
 58 |     if mask:
 59 |         y = svr.predict(X_)  # predicted y is a flatten array
 60 |     else:
 61 |         y = np.nan
 62 | 
 63 |     return y
 64 | 
 65 | 
 66 | def predict_spectrum(svrs, X_, mask=None, scaler=None):
 67 |     """ predict a single spectrum given a list of svrs & mask
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     svrs : list
 72 |         a list of svr objects
 73 |     X_ : ndarray
 74 |         the labels of predicted spectra
 75 |     mask : None | bool array
 76 |         predict the pixels where mask==True
 77 |     scaler : scaler object
 78 |         if not None, scale X_ before predictions using this scaler
 79 | 
 80 |     Returns
 81 |     -------
 82 |     ys : ndarray
 83 |         predicted spectra
 84 | 
 85 |     """
 86 |     if X_.ndim == 1:
 87 |         X_ = X_.reshape(1, -1)
 88 | 
 89 |     # scale X_ if necessary
 90 |     if scaler is not None:
 91 |         X_ = scaler.transform(X_)
 92 | 
 93 |     # default is to use all pixels
 94 |     if mask is None:
 95 |         mask = np.ones((len(svrs),), dtype=np.bool)
 96 | 
 97 |     # make predictions
 98 |     # print('number of true mask: ', np.sum(mask))
 99 |     # print('mask len: ', mask.shape)
100 |     ys = [predict_pixel(svr, X_, mask_) for svr, mask_ in zip(svrs, mask)]
101 |     ys = np.array(ys, dtype=float).T
102 | 
103 |     return ys
104 | 
105 | 
106 | def predict_labels(X0, svrs, test_flux, test_ivar=None, mask=None,
107 |                    flux_scaler=None, ivar_scaler=None, labels_scaler=None,
108 |                    **kwargs):
109 |     """ predict scaled labels for test_flux
110 | 
111 |     Parameters
112 |     ----------
113 |     X0 : ndarray (n_test, n_dim)
114 |         initial guess
115 |     svrs: list
116 |         a list of svr objects
117 |     test_flux: ndarray
118 |         test flux
119 |     test_ivar: ndarray
120 |         test ivar
121 |     mask: None | bool array
122 |         predict the pixels where mask==True
123 |     flux_scaler: scaler object
124 |         if not None, scale test_flux before predictions
125 |     labels_scaler: scaler object
126 |         if not None, scale predicted labels back to normal scale
127 | 
128 |     Returns
129 |     -------
130 |     X_pred: ndarray
131 |         predicted lables (scaled)
132 | 
133 |     """
134 |     # assert X0 is 2D array
135 |     assert X0.ndim == 2
136 | 
137 |     # scale test_flux if necessary
138 |     if flux_scaler is not None:
139 |         test_flux = flux_scaler.transform(test_flux.reshape(1, -1)).flatten()
140 | 
141 |     # scale test_ivar if necessary
142 |     if ivar_scaler is not None:
143 |         test_ivar = ivar_scaler.transform(test_ivar.reshape(1, -1)).flatten()
144 | 
145 |     # print ("Xshape in predict_labels: ", X0.shape)
146 |     # print costfun_for_label(X0, svrs, test_flux, test_ivar, mask)
147 |     X_pred, ier = leastsq(costfun_for_label, X0,
148 |                           args=(svrs, test_flux, test_ivar, mask), **kwargs)
149 |     # do minimization using Nelder-Mead method [tol=1.e-8 set by user!]
150 |     # X_pred = minimize(costfun_for_label, X0,
151 |     #                   args=(svrs, test_flux, test_ivar, mask),
152 |     #                   method='Nelder-Mead', **kwargs)
153 |     # nll = lambda *args: -lnlike_gaussian(*args)
154 |     # X_pred = minimize(nll, X0,
155 |     #                   args=(svrs, test_flux, test_ivar, mask),
156 |     #                   method='Nelder-Mead', **kwargs)
157 |     print('@Cham: X_init=', X0, 'X_final=', X_pred, 'ier', ier)
158 |     # , 'nit=', X_pred['nit']
159 | 
160 |     # scale X_pred back if necessary
161 |     if labels_scaler is not None:
162 |         X_pred = labels_scaler.inverse_transform(
163 |             X_pred.reshape(1, -1)).flatten()
164 |     else:
165 |         X_pred = X_pred.flatten()
166 | 
167 |     return X_pred
168 | 
169 | 
170 | def predict_labels3(X0, svrs, test_flux, test_ivar=None, mask=None,
171 |                     flux_scaler=None, ivar_scaler=None, labels_scaler=None,
172 |                     **kwargs):
173 |     """ predict scaled labels for test_flux
174 | 
175 |     Parameters
176 |     ----------
177 |     X0 : ndarray (n_test, n_dim)
178 |         initial guess
179 |     svrs: list
180 |         a list of svr objects
181 |     test_flux: ndarray
182 |         test flux
183 |     test_ivar: ndarray
184 |         test ivar
185 |     mask: None | bool array
186 |         predict the pixels where mask==True
187 |     flux_scaler: scaler object
188 |         if not None, scale test_flux before predictions
189 |     labels_scaler: scaler object
190 |         if not None, scale predicted labels back to normal scale
191 | 
192 |     Returns
193 |     -------
194 |     X_pred: ndarray
195 |         predicted lables (scaled)
196 | 
197 |     """
198 |     # assert X0 is 2D array
199 |     # assert X0.ndim == 2
200 | 
201 |     # scale test_flux if necessary
202 |     if flux_scaler is not None:
203 |         test_flux = flux_scaler.transform(test_flux.reshape(1, -1)).flatten()
204 | 
205 |     # scale test_ivar if necessary
206 |     if ivar_scaler is not None:
207 |         test_ivar = ivar_scaler.transform(test_ivar.reshape(1, -1)).flatten()
208 | 
209 |     ls_r = least_squares(costfun_for_label, X0, method="trf", loss="soft_l1",
210 |                          args=(svrs, test_flux, test_ivar, mask), **kwargs)
211 |     pp_r = do_post(ls_r, labels_scaler)
212 | 
213 |     # verbose
214 |     print("@SLAM3: nfev={}, status={}, pstd={}".format(pp_r["nfev"], pp_r["status"], pp_r["pstd"]))
215 | 
216 |     # print ("Xshape in predict_labels: ", X0.shape)
217 |     # print costfun_for_label(X0, svrs, test_flux, test_ivar, mask)
218 |     # X_pred, ier = leastsq(costfun_for_label, X0,
219 |     #                       args=(svrs, test_flux, test_ivar, mask), **kwargs)
220 |     # do minimization using Nelder-Mead method [tol=1.e-8 set by user!]
221 |     # X_pred = minimize(costfun_for_label, X0,
222 |     #                   args=(svrs, test_flux, test_ivar, mask),
223 |     #                   method='Nelder-Mead', **kwargs)
224 |     # nll = lambda *args: -lnlike_gaussian(*args)
225 |     # X_pred = minimize(nll, X0,
226 |     #                   args=(svrs, test_flux, test_ivar, mask),
227 |     #                   method='Nelder-Mead', **kwargs)
228 |     # print('@Cham: X_init=', X0, 'X_final=', X_pred, 'ier', ier)
229 |     # , 'nit=', X_pred['nit']
230 | 
231 |     # scale X_pred back if necessary
232 |     # if labels_scaler is not None:
233 |     #     X_pred = labels_scaler.inverse_transform(
234 |     #         X_pred.reshape(1, -1)).flatten()
235 |     # else:
236 |     #     X_pred = X_pred.flatten()
237 | 
238 |     return pp_r
239 | 
240 | 
241 | def costfun_for_label(X_, svrs, test_flux, test_ivar, mask):
242 |     """ calculate (ivar weighted) chi2 for a single spectrum
243 | 
244 |     Parameters
245 |     ----------
246 |     svrs: list
247 |         a list of svr objects
248 |     test_flux: ndarray (n_pix, )
249 |         test flux
250 |     test_ivar: ndarray (n_pix, )
251 |         test ivar
252 |     mask: None | bool array
253 |         predict the pixels where mask==True
254 | 
255 |     """
256 |     # print ("X_ in costfun_for_label: ", X_)
257 |     X_.reshape(1, -1)
258 |     # default is to use all pixels [True->will be used, False->deprecated]
259 |     if mask is None:
260 |         mask = np.ones((len(test_flux),), dtype=np.bool)
261 | 
262 |     # default ivar is all 1
263 |     if test_ivar is None:
264 |         test_ivar = np.ones_like(test_flux)
265 |     else:
266 |         test_ivar[test_ivar < 0] = 0.
267 |         # kick more pixels using 0.01 ivar --> NON-PHYSICAL
268 |         # mask = np.logical_and(mask, test_ivar > 0.01 * np.median(test_ivar))
269 | 
270 |     # do prediction
271 |     pred_flux = predict_spectrum(svrs, X_, mask).astype(np.float)
272 |     # the pred_flux contains nan for mask=False pixels
273 | 
274 |     # print ("test_flux", test_flux, test_flux.shape)
275 |     # print ("pred_flux", pred_flux, pred_flux.shape)
276 |     # print ("test_ivar", test_ivar, test_ivar.shape)
277 | 
278 |     # calculate chi2
279 |     # return chi2_simple_1d(test_flux, pred_flux, ivar=test_ivar)
280 |     res = (test_flux.flatten()-pred_flux.flatten())*np.sqrt(test_ivar.flatten())
281 |     res[np.isnan(res)] = 0.
282 |     return res
283 | 
284 | 
285 | def predict_labels_chi2(tplt_flux, tplt_ivar, tplt_labels, test_flux, test_ivar,
286 |                         n_jobs=1, verbose=False):
287 |     """ a quick search for initial values of test_labels for test_flux
288 | 
289 |     NOTE
290 |     ----
291 |     this is a nested function
292 | 
293 |     """
294 | 
295 |     assert tplt_flux.ndim == 2 and tplt_labels.ndim == 2
296 | 
297 |     if test_flux.ndim == 1:
298 |         # only one test_flux
299 |         # n_test = 1
300 |         assert tplt_flux.shape[1] == test_flux.shape[0]
301 | 
302 |         i_min = np.argsort(
303 |             np.nanmean((tplt_flux - test_flux) ** 2. * test_ivar * np.where(
304 |                 tplt_ivar > 0, 1., np.nan), axis=1)
305 |         ).flatten()[0]
306 | 
307 |         return tplt_labels[i_min, :]
308 | 
309 |     else:
310 |         n_test = test_flux.shape[0]
311 |         results = Parallel(n_jobs=n_jobs, verbose=verbose)(
312 |             delayed(predict_labels_chi2)(
313 |                 tplt_flux, tplt_ivar, tplt_labels, test_flux[i, :], test_ivar[i, :])
314 |             for i in range(n_test)
315 |         )
316 | 
317 |         return np.array(results)
318 | 
319 | 
320 | def predict_pixel_for_diagnostic(svr,
321 |                                  test_labels,
322 |                                  labels_scaler=None,
323 |                                  flux_mean_=0.,
324 |                                  flux_scale_=1.):
325 |     """
326 | 
327 |     Parameters
328 |     ----------
329 |     svrs: list of sklearn.svm.SVR instance
330 |         the pixel SVR to diagnostic
331 |     test_labels: ndarray ( :, ndim )
332 |         test_labels that will be evaluated
333 |     labels_scaler: sklearn.preprocessing.StandardScaler
334 |         the scaler for labels
335 |     flux_scaler: sklearn.preprocessing.StandardScaler
336 |         the scaler for flux
337 | 
338 |     Returns
339 |     -------
340 |     test_flux
341 | 
342 |     """
343 |     # transform test labels
344 |     if labels_scaler is not None:
345 |         test_labels = labels_scaler.transform(test_labels)
346 | 
347 |     # predict pixels
348 |     test_flux = predict_pixel(svr, test_labels, mask=True)[:, None]
349 | 
350 |     # inverse transform predicted flux
351 |     if flux_mean_ is not None and flux_scale_ is not None:
352 |         test_flux = test_flux * flux_scale_ + flux_mean_
353 | 
354 |     return test_flux
355 | 


--------------------------------------------------------------------------------
/slam/standardization.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Fri Sep 02 13:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Fri Sep 02 13:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - standardization
 23 | 
 24 | """
 25 | 
 26 | import numpy as np
 27 | from copy import deepcopy
 28 | 
 29 | from sklearn import preprocessing
 30 | 
 31 | 
 32 | def standardize(X, weight=None, robust=False):
 33 |     """ Standardize X (flux / labels)
 34 | 
 35 |     Parameters
 36 |     ----------
 37 |     X: ndarray
 38 |         data array
 39 | 
 40 |     Returns
 41 |     -------
 42 |     scaler: sklearn.StandardScaler
 43 |         scaler
 44 | 
 45 |     X_scaled: ndarray
 46 |         scaled X
 47 | 
 48 |     """
 49 |     if weight is None:
 50 |         weight = np.ones_like(X, int)
 51 |     weight = np.logical_and(weight > 0, np.isfinite(X))
 52 | 
 53 |     ind_good = weight > 0
 54 |     n_good = np.sum(ind_good, axis=0)
 55 | 
 56 |     scaler = preprocessing.StandardScaler()
 57 |     scaler.fit(X)
 58 |     n_col = X.shape[1]
 59 | 
 60 |     scaler.scale_ = np.ones((n_col,), float)
 61 |     scaler.mean_ = np.zeros((n_col,), float)
 62 | 
 63 |     if robust:
 64 |         # estimate using percentiles
 65 |         for i_col in range(n_col):
 66 |             if n_good[i_col] > 0:
 67 |                 # at least 1 good pixels
 68 |                 scaler.mean_[i_col] = (np.nanpercentile(
 69 |                     X[ind_good[:, i_col], i_col], 84) + np.nanpercentile(
 70 |                     X[ind_good[:, i_col], i_col], 16)) / 2.
 71 |                 scaler.scale_[i_col] = (np.nanpercentile(
 72 |                     X[ind_good[:, i_col], i_col], 84) - np.nanpercentile(
 73 |                     X[ind_good[:, i_col], i_col], 16)) / 2.
 74 | 
 75 |     else:
 76 |         # estimate using mean and std
 77 |         for i_col in range(n_col):
 78 |             if n_good[i_col] > 0:
 79 |                 # at least 1 good pixels
 80 |                 scaler.scale_[i_col] = np.std(X[ind_good[:, i_col], i_col])
 81 |                 scaler.mean_[i_col] = np.mean(X[ind_good[:, i_col], i_col])
 82 | 
 83 |     scaler.scale_ = np.where(scaler.scale_ < 1e-300, 1., scaler.scale_)
 84 |     scaler.robust = robust
 85 |     X_scaled = scaler.transform(X)
 86 |     return scaler, X_scaled
 87 | 
 88 | 
 89 | def standardize_ivar(ivar, flux_scaler):
 90 |     """ ivar_scaler is copied from flux_scaler, but mean_ is set to be 0
 91 |     """
 92 |     # copy flux_scaler & generate ivar_scaler
 93 |     ivar_scaler = deepcopy(flux_scaler)
 94 |     ivar_scaler.mean_ *= 0
 95 |     ivar_scaler.scale_ **= -2.  # this is extremely important!
 96 |     # transform ivar data
 97 |     ivar_scaled = ivar_scaler.transform(ivar)
 98 |     return ivar_scaler, ivar_scaled
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     import numpy as np
103 | 
104 |     x = np.random.randn(10, 20)
105 |     s, xs = standardize(x)
106 |     print (x, xs)
107 |     print (s, s.mean_, s.scale_)
108 | 


--------------------------------------------------------------------------------
/slam/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sat Sep 03 12:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sat Sep 03 12:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - utils for training SVRs
 23 | 
 24 | """
 25 | 
 26 | import numpy as np
 27 | from joblib import Parallel, delayed
 28 | from scipy import stats
 29 | from scipy.optimize import minimize
 30 | from sklearn import svm, model_selection
 31 | from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
 32 | 
 33 | 
 34 | def train_single_pixel(X, y, sample_weight=None, cv=10,
 35 |                        **kwargs):
 36 |     """ train a single pixel, simply CV
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     X: ndarray with shape (n_obs x n_dim)
 41 |         X in sklearn notation
 42 |     y: ndarray with shape (n_obs, ) --> 1D
 43 |         y in sklearn notation
 44 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
 45 |         weight for sample data
 46 |     cv: int / None
 47 |         if cv>=3, Cross-Validation will be performed to calculate MSE
 48 | 
 49 |     kwargs:
 50 |         extra kwargs will be passed to svm.SVR() method
 51 |         e.g., C=1.0, gamma='auto', epsilon=0.1
 52 | 
 53 |     Returns
 54 |     -------
 55 |     svm.SVR() instance & score
 56 |     if CV is not performed, score = np.nan
 57 | 
 58 |     """
 59 |     # instantiate SVR
 60 |     svr = svm.SVR(**kwargs)
 61 | 
 62 |     if sample_weight is None:
 63 |         sample_weight = np.ones_like(y, float)
 64 | 
 65 |     ind_use = sample_weight > 0
 66 |     X_ = X[ind_use]
 67 |     y_ = y[ind_use]
 68 |     sample_weight_ = sample_weight[ind_use]
 69 | 
 70 |     # fit data
 71 |     svr.fit(X_, y_, sample_weight=sample_weight_)
 72 | 
 73 |     # Cross-Validation
 74 |     if cv is None or cv < 2:
 75 |         # no cross-validation will be performed
 76 |         score = -np.mean(np.square(svr.predict(X_) - y_))
 77 |     else:
 78 |         # cross-validation will be performed to calculate MSE
 79 |         assert isinstance(cv, int) and cv >= 2
 80 |         scores = model_selection.cross_val_score(
 81 |             svr, X_, y_, scoring='neg_mean_squared_error',
 82 |             cv=np.min((cv, len(y_))))
 83 |         score = scores.mean()
 84 | 
 85 |     return svr, score
 86 | 
 87 | 
 88 | def train_single_pixel_grid(X, y, sample_weight=None, cv=10,
 89 |                             param_grid=None, **kwargs):
 90 |     """ train a single pixel using GridSearchCV
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     X: ndarray with shape (n_obs x n_dim)
 95 |         X in sklearn notation
 96 |     y: ndarray with shape (n_obs, ) --> 1D
 97 |         y in sklearn notation
 98 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
 99 |         weight for sample data
100 |     cv: int / None
101 |         if cv>=3, Cross-Validation will be performed to calculate MSE
102 |     param_grid: dict
103 |         key, value pairs of hyper-parameter grids
104 |         >>> param_grid = dict(C=2. ** np.arange(-5., 6.),
105 |         >>>                   epsilon=[0.01, 0.05, 0.1, 0.15],
106 |         >>>                   gamma=['auto', 0.2, 0.25, 0.3, 0.5])
107 | 
108 |     kwargs:
109 |         extra kwargs will be passed to svm.SVR() method
110 |         e.g., C=1.0, gamma='auto', epsilon=0.1
111 | 
112 |     Returns
113 |     -------
114 |     svm.SVR() instance & best hyper-parameters & score
115 |     if CV is not performed, score = np.nan
116 | 
117 |     """
118 |     if sample_weight is None:
119 |         sample_weight = np.zeros_like(y, float)
120 | 
121 |     # default param_grid
122 |     if param_grid is None:
123 |         param_grid = dict(C=2. ** np.arange(-5., 6.),
124 |                           epsilon=[0.01, 0.05, 0.1, 0.15],
125 |                           gamma=['auto', 0.2, 0.25, 0.3, 0.5])
126 |     # instantiate SVR
127 |     svr = svm.SVR(**kwargs)
128 | 
129 |     ind_use = sample_weight > 0
130 |     X_ = X[ind_use]
131 |     y_ = y[ind_use]
132 |     sample_weight_ = sample_weight[ind_use]
133 | 
134 |     # perform GridSearchCV
135 |     if len(y_) >= 2:
136 |         # if number of good pixels larger than 2, save it
137 |         grid = GridSearchCV(svr, param_grid, cv=np.min((cv, len(y_))),
138 |                             fit_params={'sample_weight': sample_weight_},
139 |                             scoring='neg_mean_squared_error', n_jobs=1)
140 |         # fit data
141 |         grid.fit(X_, y_)
142 | 
143 |         # return (svr, score)
144 |         return grid, grid.best_score_
145 | 
146 |     else:
147 |         # if only 1 good pixel, fit mock data
148 |         grid = GridSearchCV(svr, param_grid, cv=cv,
149 |                             fit_params={'sample_weight': sample_weight_},
150 |                             scoring='neg_mean_squared_error', n_jobs=1)
151 |         # fit mock data
152 |         grid.fit(np.zeros((cv, X.shape[1]), float), np.zeros((cv,), float))
153 | 
154 |         # return (svr, score)
155 |         return grid, 1.0
156 | 
157 | 
158 | def train_single_pixel_rand(X, y, sample_weight=None, cv=10,
159 |                             n_iter=100, param_dist=None, **kwargs):
160 |     """ train a single pixel using RandomizedSearchCV
161 | 
162 |     Parameters
163 |     ----------
164 |     X: ndarray with shape (n_obs x n_dim)
165 |         X in sklearn notation
166 |     y: ndarray with shape (n_obs, ) --> 1D
167 |         y in sklearn notation
168 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
169 |         weight for sample data
170 |     cv: int / None
171 |         if cv>=3, Cross-Validation will be performed to calculate MSE
172 |     n_iter: int
173 |         the number of sampling of the random subset of hyper-parameter space
174 |     param_dist: dict
175 |         key, value pairs of hyper-parameter grids
176 |         >>> param_dist = dict(C=stats.expon(scale=3),
177 |         >>>                   gamma=stats.expon(scale=.1))
178 | 
179 |     kwargs:
180 |         extra kwargs will be passed to svm.SVR() method
181 |         e.g., C=1.0, gamma='auto', epsilon=0.1
182 | 
183 |     Returns
184 |     -------
185 |     svm.SVR() instance & best hyper-parameters & score
186 |     if CV is not performed, score = np.nan
187 | 
188 |     """
189 | 
190 |     if sample_weight is None:
191 |         sample_weight = np.ones_like(y, float)
192 | 
193 |     # default param_grid
194 |     if param_dist is None:
195 |         param_dist = dict(C=stats.expon(scale=3),
196 |                           gamma=stats.expon(scale=.1))
197 |     # instantiate SVR
198 |     svr = svm.SVR(**kwargs)
199 |     # perform RandomizedSearchCV
200 |     rand = RandomizedSearchCV(svr, param_dist, n_iter=n_iter, cv=cv,
201 |                               fit_params={'sample_weight': sample_weight},
202 |                               scoring='neg_mean_squared_error', n_jobs=1)
203 |     # fit data
204 |     rand.fit(X, y)
205 | 
206 |     # return (svr, score)
207 |     return rand, rand.best_score_
208 | 
209 | 
210 | def svr_mse(hyperparam, X, y, verbose=False):
211 |     """ Cross-Validation MES for SVR """
212 |     gamma, C, epsilon = 10. ** np.array(hyperparam)
213 | 
214 |     # instantiate
215 |     svr = svm.SVR(gamma=gamma, C=C, epsilon=epsilon)
216 | 
217 |     # MSE
218 |     scores = model_selection.cross_val_score(
219 |         svr, X, y, scoring='neg_mean_squared_error', cv=10, verbose=False)
220 |     score = -scores.mean()
221 | 
222 |     # verbose
223 |     if verbose:
224 |         print(gamma, C, epsilon, score)
225 | 
226 |     return score
227 | 
228 | 
229 | def train_single_pixel_mini(X, y, sample_weight=None, cv=10, **kwargs):
230 |     """ train a single pixel using minize
231 | 
232 |     Parameters
233 |     ----------
234 |     X: ndarray with shape (n_obs x n_dim)
235 |         X in sklearn notation
236 |     y: ndarray with shape (n_obs, ) --> 1D
237 |         y in sklearn notation
238 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
239 |         weight for sample data
240 |     cv: int / None
241 |         if cv>=3, Cross-Validation will be performed to calculate MSE
242 | 
243 |     kwargs:
244 |         extra parameters that will be passed to svm.SVR()
245 | 
246 | 
247 |     Returns
248 |     -------
249 |     svm.SVR() instance & best hyper-parameters & score
250 |     if CV is not performed, score = np.nan
251 | 
252 |     """
253 |     # find optimized hyper-parameters
254 |     hp0 = (-2., .7, -.15)
255 |     hp = minimize(svr_mse, hp0, args=(X, y, sample_weight))
256 |     gamma, C, epsilon = 10. ** np.array(hp)
257 | 
258 |     # specify hyper-parameters directly
259 |     return train_single_pixel(X, y, sample_weight=sample_weight, cv=cv,
260 |                               gamma=gamma, C=C, epsilon=epsilon, **kwargs)
261 | 
262 | 
263 | def train_multi_pixels(X, ys, sample_weights, cv=1, min_pix=10,
264 |                        method='simple', n_jobs=1, verbose=10, **kwargs):
265 |     """ train multi pixels
266 | 
267 |     Parameters
268 |     ----------
269 |     X: ndarray with shape (n_obs x n_dim)
270 |         X in sklearn notation
271 |     ys: ndarray with shape (n_obs x n_pix) -->
272 |         y in sklearn notation
273 |     sample_weights: ndarray
274 |         weight of sample data
275 |     cv: int
276 |         number of fold in Cross-Validation
277 |     method: string
278 |         {'simple', 'grid', 'rand'}
279 |     n_jobs: int
280 |         number of processes that will be launched by joblib
281 |     verbose: int
282 |         the same as joblib.Parallel() parameter verbose
283 |     kwargs:
284 |         extra kwargs will be passed to svm.SVR() method
285 | 
286 |     Returns
287 |     -------
288 |     svm.SVR() instance
289 | 
290 |     """
291 |     # determine method
292 |     train_funcs = {'simple': train_single_pixel,
293 |                    'grid': train_single_pixel_grid,
294 |                    'rand': train_single_pixel_rand}
295 |     train_func = train_funcs[method]
296 | 
297 |     # parallel run for SVR
298 |     data = []
299 |     for y, sample_weight in zip(ys, sample_weights):
300 |         this_X = np.asarray(X, float, order='C')
301 |         this_y = np.asarray(y, float, order='C')
302 |         this_sw = np.asarray(sample_weight, float, order='C')
303 |         this_ind = this_sw > 0
304 |         data.append((this_X[this_ind], this_y[this_ind], this_sw[this_ind]))
305 | 
306 |     results = Parallel(n_jobs=n_jobs, verbose=verbose)(
307 |         delayed(train_func)(*this_data, cv=cv, **kwargs) for this_data in data)
308 | 
309 |     # return results
310 |     return results
311 | 


--------------------------------------------------------------------------------
/slam/train2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sat Sep 03 12:00:00 2016
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sat Sep 03 12:00:00 2016
 19 | 
 20 | Aims
 21 | ----
 22 | - utils for training SVRs
 23 | 
 24 | """
 25 | 
 26 | import numpy as np
 27 | from joblib import Parallel, delayed
 28 | from scipy import stats
 29 | from scipy.optimize import minimize
 30 | from sklearn import svm, model_selection
 31 | from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
 32 | from .model import SlamModel
 33 | 
 34 | 
 35 | def train_single_pixel(X, y, sample_weight=None, cv=10,
 36 |                        **kwargs):
 37 |     """ train a single pixel, simply CV
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     X: ndarray with shape (n_obs x n_dim)
 42 |         X in sklearn notation
 43 |     y: ndarray with shape (n_obs, ) --> 1D
 44 |         y in sklearn notation
 45 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
 46 |         weight for sample data
 47 |     cv: int / None
 48 |         if cv>=3, Cross-Validation will be performed to calculate MSE
 49 | 
 50 |     kwargs:
 51 |         extra kwargs will be passed to svm.SVR() method
 52 |         e.g., C=1.0, gamma='auto', epsilon=0.1
 53 | 
 54 |     Returns
 55 |     -------
 56 |     svm.SVR() instance & score
 57 |     if CV is not performed, score = np.nan
 58 | 
 59 |     """
 60 |     # instantiate SVR
 61 |     svr = svm.SVR(**kwargs)
 62 | 
 63 |     if sample_weight is None:
 64 |         sample_weight = np.ones_like(y, float)
 65 |     ind_use = sample_weight > 0
 66 |     X_ = X[ind_use]
 67 |     y_ = y[ind_use]
 68 |     sample_weight_ = sample_weight[ind_use]
 69 | 
 70 |     # fit data
 71 |     svr.fit(X_, y_, sample_weight=sample_weight_)
 72 | 
 73 |     # Cross-Validation
 74 |     if cv is None or cv < 2:
 75 |         # no cross-validation will be performed
 76 |         score = -np.mean(np.square(svr.predict(X_) - y_))
 77 |     else:
 78 |         # cross-validation will be performed to calculate MSE
 79 |         assert isinstance(cv, int) and cv >= 2
 80 |         scores = model_selection.cross_val_score(
 81 |             svr, X_, y_, scoring='neg_mean_squared_error', cv=cv)
 82 |         score = scores.mean()
 83 | 
 84 |     return svr, score
 85 | 
 86 | 
 87 | def train_single_pixel_grid(X, y, sample_weight=None, cv=10,
 88 |                             param_grid=None, **kwargs):
 89 |     """ train a single pixel using GridSearchCV
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     X: ndarray with shape (n_obs x n_dim)
 94 |         X in sklearn notation
 95 |     y: ndarray with shape (n_obs, ) --> 1D
 96 |         y in sklearn notation
 97 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
 98 |         weight for sample data
 99 |     cv: int / None
100 |         if cv>=3, Cross-Validation will be performed to calculate MSE
101 |     param_grid: dict
102 |         key, value pairs of hyper-parameter grids
103 |         >>> param_grid = dict(C=2. ** np.arange(-5., 6.),
104 |         >>>                   epsilon=[0.01, 0.05, 0.1, 0.15],
105 |         >>>                   gamma=['auto', 0.2, 0.25, 0.3, 0.5])
106 | 
107 |     kwargs:
108 |         extra kwargs will be passed to svm.SVR() method
109 |         e.g., C=1.0, gamma='auto', epsilon=0.1
110 | 
111 |     Returns
112 |     -------
113 |     svm.SVR() instance & best hyper-parameters & score
114 |     if CV is not performed, score = np.nan
115 | 
116 |     """
117 | 
118 |     # default param_grid
119 |     if param_grid is None:
120 |         param_grid = dict(C=2. ** np.arange(-5., 6.),
121 |                           epsilon=[0.01, 0.05, 0.1, 0.15],
122 |                           gamma=['auto', 0.2, 0.25, 0.3, 0.5])
123 |     # instantiate SVR
124 |     svr = svm.SVR(**kwargs)
125 |     # perform GridSearchCV
126 |     grid = GridSearchCV(svr, param_grid, cv=cv,
127 |                         fit_params={'sample_weight': sample_weight},
128 |                         scoring='neg_mean_squared_error', n_jobs=1)
129 |     # fit data
130 |     grid.fit(X, y)
131 | 
132 |     # return (svr, score)
133 |     return grid, grid.best_score_
134 | 
135 | 
136 | def train_single_pixel_rand(X, y, sample_weight=None, cv=10,
137 |                             n_iter=100, param_dist=None, **kwargs):
138 |     """ train a single pixel using RandomizedSearchCV
139 | 
140 |     Parameters
141 |     ----------
142 |     X: ndarray with shape (n_obs x n_dim)
143 |         X in sklearn notation
144 |     y: ndarray with shape (n_obs, ) --> 1D
145 |         y in sklearn notation
146 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
147 |         weight for sample data
148 |     cv: int / None
149 |         if cv>=3, Cross-Validation will be performed to calculate MSE
150 |     n_iter: int
151 |         the number of sampling of the random subset of hyper-parameter space
152 |     param_dist: dict
153 |         key, value pairs of hyper-parameter grids
154 |         >>> param_dist = dict(C=stats.expon(scale=3),
155 |         >>>                   gamma=stats.expon(scale=.1))
156 | 
157 |     kwargs:
158 |         extra kwargs will be passed to svm.SVR() method
159 |         e.g., C=1.0, gamma='auto', epsilon=0.1
160 | 
161 |     Returns
162 |     -------
163 |     svm.SVR() instance & best hyper-parameters & score
164 |     if CV is not performed, score = np.nan
165 | 
166 |     """
167 | 
168 |     # default param_grid
169 |     if param_dist is None:
170 |         param_dist = dict(C=stats.expon(scale=3),
171 |                           gamma=stats.expon(scale=.1))
172 |     # instantiate SVR
173 |     svr = svm.SVR(**kwargs)
174 |     # perform RandomizedSearchCV
175 |     rand = RandomizedSearchCV(svr, param_dist, n_iter=n_iter, cv=cv,
176 |                               fit_params={'sample_weight': sample_weight},
177 |                               scoring='neg_mean_squared_error', n_jobs=1)
178 |     # fit data
179 |     rand.fit(X, y)
180 | 
181 |     # return (svr, score)
182 |     return rand, rand.best_score_
183 | 
184 | 
185 | def svr_mse(hyperparam, X, y, verbose=False):
186 |     """ Cross-Validation MES for SVR """
187 |     gamma, C, epsilon = 10. ** np.array(hyperparam)
188 | 
189 |     # instantiate
190 |     svr = svm.SVR(gamma=gamma, C=C, epsilon=epsilon)
191 | 
192 |     # MSE
193 |     scores = model_selection.cross_val_score(
194 |         svr, X, y, scoring='neg_mean_squared_error', cv=10, verbose=False)
195 |     score = -scores.mean()
196 | 
197 |     # verbose
198 |     if verbose:
199 |         print(gamma, C, epsilon, score)
200 | 
201 |     return score
202 | 
203 | 
204 | def train_single_pixel_mini(X, y, sample_weight=None, cv=10, **kwargs):
205 |     """ train a single pixel using minize
206 | 
207 |     Parameters
208 |     ----------
209 |     X: ndarray with shape (n_obs x n_dim)
210 |         X in sklearn notation
211 |     y: ndarray with shape (n_obs, ) --> 1D
212 |         y in sklearn notation
213 |     sample_weight: ndarray with shape (n_obs, ) --> 1D
214 |         weight for sample data
215 |     cv: int / None
216 |         if cv>=3, Cross-Validation will be performed to calculate MSE
217 | 
218 |     kwargs:
219 |         extra parameters that will be passed to svm.SVR()
220 | 
221 | 
222 |     Returns
223 |     -------
224 |     svm.SVR() instance & best hyper-parameters & score
225 |     if CV is not performed, score = np.nan
226 | 
227 |     """
228 |     # find optimized hyper-parameters
229 |     hp0 = (-2., .7, -.15)
230 |     hp = minimize(svr_mse, hp0, args=(X, y, sample_weight))
231 |     gamma, C, epsilon = 10. ** np.array(hp)
232 | 
233 |     # specify hyper-parameters directly
234 |     return train_single_pixel(X, y, sample_weight=sample_weight, cv=cv,
235 |                               gamma=gamma, C=C, epsilon=epsilon, **kwargs)
236 | 
237 | 
238 | def train_multi_pixels(X, ys, sample_weights, model="nn", method="grid",
239 |                        param_grid=None, cv=8, scoring="neg_mean_squared_error",
240 |                        n_jobs=1, verbose=10, backend="multiprocessing", **kwargs):
241 |     """ train multi pixels
242 | 
243 |     Parameters
244 |     ----------
245 |     X: ndarray with shape (n_obs x n_dim)
246 |         X in sklearn notation
247 |     ys: ndarray with shape (n_obs x n_pix) -->
248 |         y in sklearn notation
249 |     sample_weights: ndarray
250 |         weight of sample data
251 |     model:
252 |         model type
253 |     method:
254 |         "simple" | "grid"
255 |     param_grid:
256 |         grid of parameters
257 |     cv: int
258 |         number of fold in Cross-Validation
259 |     scoring:
260 |         scoring scheme
261 |     n_jobs: int
262 |         number of processes that will be launched by joblib
263 |     verbose: int
264 |         the same as joblib.Parallel() parameter verbose
265 |     backend:
266 |         joblib backend
267 |     kwargs:
268 |         extra kwargs will be passed to svm.SVR() method
269 | 
270 |     Returns
271 |     -------
272 |     svm.SVR() instance
273 | 
274 |     """
275 |     # parallel run for SVR
276 |     data = []
277 |     for y, sample_weight in zip(ys, sample_weights):
278 |         this_X = np.asarray(X, float, order='C')
279 |         this_y = np.asarray(y, float, order='C')
280 |         this_sw = np.asarray(sample_weight, float, order='C')
281 |         this_ind = this_sw > 0
282 |         data.append((this_X[this_ind], this_y[this_ind], this_sw[this_ind]))
283 | 
284 |     kwargs.update(dict(model=model,
285 |                        method=method,
286 |                        param_grid=param_grid,
287 |                        cv=cv,
288 |                        scoring=scoring))
289 | 
290 |     results = Parallel(n_jobs=n_jobs, verbose=verbose, backend=backend)(
291 |         delayed(SlamModel.train)(*this_data, **kwargs) for this_data in data)
292 | 
293 |     # return results
294 |     return results
295 | 


--------------------------------------------------------------------------------
/slam/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 
  4 | Author
  5 | ------
  6 | Bo Zhang
  7 | 
  8 | Email
  9 | -----
 10 | bozhang@nao.cas.cn
 11 | 
 12 | Created on
 13 | ----------
 14 | - Sun Jun 11 12:00:00 2017
 15 | 
 16 | Modifications
 17 | -------------
 18 | - Sun Jun 11 12:00:00 2017
 19 | 
 20 | Aims
 21 | ----
 22 | - utils for processing spectra
 23 | 
 24 | """
 25 | import sys
 26 | from collections import OrderedDict, deque
 27 | from collections.abc import Mapping
 28 | from numbers import Number
 29 | 
 30 | import numpy as np
 31 | 
 32 | 
 33 | def convolve_mask(mask, kernel_size_coef=.25, kernel_size_limit=(2, 100),
 34 |                   sink_region=(200, .5)):
 35 |     """
 36 |     
 37 |     Parameters
 38 |     ----------
 39 |     mask: array like
 40 |         initial mask. True for good pixels, False for bad ones.
 41 |     kernel_size_coef:
 42 |         the kernel_size/bad_chunk_length coefficient
 43 |     kernel_size_limit: tuple
 44 |         (lower limit, upper limit)
 45 |     sink_region: tuple
 46 |         (width, threshold fraction of good pixels in this region)
 47 |         if None, pass
 48 |         
 49 |     Returns
 50 |     -------
 51 |     convolved mask
 52 | 
 53 |     """
 54 |     # mask: True for good, False for bad
 55 |     # 1. kernel length: at least 2 pixel at most 100 pixels
 56 |     mask0 = np.array(mask, bool)
 57 |     mask1 = np.array(np.hstack((True, mask0, True)), int)
 58 |     mask2 = np.copy(mask0)
 59 | 
 60 |     mask1_diff = np.diff(mask1)
 61 |     bad_chunks = np.vstack(
 62 |         (np.where(mask1_diff < 0)[0], np.where(mask1_diff > 0)[0])).T
 63 | 
 64 |     bad_chunks_len = np.round(np.diff(bad_chunks, axis=1) * kernel_size_coef)
 65 |     bad_chunks_len = np.where(bad_chunks_len < kernel_size_limit[0],
 66 |                               kernel_size_limit[0], bad_chunks_len)
 67 |     bad_chunks_len = np.where(bad_chunks_len > kernel_size_limit[1],
 68 |                               kernel_size_limit[1], bad_chunks_len)
 69 | 
 70 |     bad_chunks_convolved = np.array(
 71 |         bad_chunks_len.reshape(-1, 1) * np.array([-1, 1]) + bad_chunks, int)
 72 |     bad_chunks_convolved = np.where(
 73 |         bad_chunks_convolved < 0, 0, bad_chunks_convolved)
 74 |     bad_chunks_convolved = np.where(
 75 |         bad_chunks_convolved >= len(mask0), len(mask0), bad_chunks_convolved)
 76 | 
 77 |     for i_chunk in range(bad_chunks_convolved.shape[0]):
 78 |         mask2[bad_chunks_convolved[i_chunk, 0]:bad_chunks_convolved[
 79 |             i_chunk, 1]] = False
 80 | 
 81 |     # 2. sink_region: second round mask convolution
 82 |     if sink_region is not None:
 83 |         ind_min = 0
 84 |         ind_max = len(mask0)
 85 | 
 86 |         good_frac = np.zeros_like(mask0, float)
 87 |         for i in range(ind_max):
 88 |             this_start = np.max((ind_min, i - sink_region[0]))
 89 |             this_stop = np.min((ind_max, i + sink_region[0]))
 90 |             if (this_stop - this_start) < (2 * sink_region[0]):
 91 |                 if this_start == ind_min:
 92 |                     this_stop = this_start + sink_region[0]
 93 |                 else:
 94 |                     this_start = this_stop - sink_region[0]
 95 |             good_frac[i] = np.sum(mask0[this_start:this_stop]) / \
 96 |                            (this_stop - this_start)
 97 |         mask2 = np.where(good_frac < sink_region[1], False, mask2)
 98 | 
 99 |     return mask2
100 | 
101 | 
102 | def uniform(tr_labels, bins, n_pick=3, ignore_out=False, digits=8):
103 |     """ make a uniform sample --> index stored in Slam.uniform_good
104 | 
105 |     Parameters
106 |     ----------
107 |     bins: list of arrays
108 |         bins in each dim
109 |     n_pick: int
110 |         how many to pick in each bin
111 |     ignore_out: bool
112 |         if True, kick stars out of bins
113 |         if False, raise error if there is any star out of bins 
114 |     digits: int
115 |         digits to form string
116 |         
117 |     Examples
118 |     --------
119 |     >>> uniform(data, [np.arange(3000, 6000, 100), np.arange(-1, 5, .2),
120 |     >>>     np.arange(-5, 1, .1)], n_pick=1, ignore_out=False)
121 | 
122 |     Returns
123 |     -------
124 |     index of selected sub sample
125 | 
126 |     """
127 | 
128 |     n_obs, n_dim = tr_labels.shape
129 |     try:
130 |         assert len(bins) == n_dim
131 |     except AssertionError:
132 |         print("@utils.uniform: ", len(bins), n_dim, "don't match")
133 | 
134 |     # initiate arrays
135 |     uniform_good = np.ones((n_obs,), bool)
136 |     uniform_ind = np.ones_like(tr_labels, int) * np.nan
137 | 
138 |     # make IDs for bins
139 |     for i_dim in range(n_dim):
140 |         this_bins = bins[i_dim]
141 |         for i_bin in range(len(this_bins) - 1):
142 |             ind = np.logical_and(
143 |                 tr_labels[:, i_dim] > this_bins[i_bin],
144 |                 tr_labels[:, i_dim] < this_bins[i_bin + 1])
145 |             uniform_ind[ind, i_dim] = i_bin
146 | 
147 |     # check bins covering all stars
148 |     ind_not_in_bins = np.any(
149 |         np.logical_not(np.isfinite(uniform_ind)), axis=1)
150 |     if np.sum(ind_not_in_bins) > 0:
151 |         if ignore_out:
152 |             print("@utils.uniform: These stars are out of bins and ignored")
153 |             print("i = ", np.where(ind_not_in_bins)[0])
154 |             uniform_good &= np.logical_not(ind_not_in_bins)
155 |         else:
156 |             raise (ValueError(
157 |                 "@utils.uniform: bins not wide enough to cover all stars"))
158 | 
159 |     # make ID string for bins
160 |     fmt = "{{:0{}.0f}}".format(digits)
161 |     uniform_str = []
162 |     for i_obs in range(n_obs):
163 |         str_ = ""
164 |         for i_dim in range(n_dim):
165 |             str_ += fmt.format(uniform_ind[i_obs, i_dim])
166 |         uniform_str.append(str_)
167 |     uniform_str = np.array(uniform_str)
168 | 
169 |     # unique IDs
170 |     u_str, u_inverse, u_counts = np.unique(
171 |         uniform_str, return_inverse=True, return_counts=True)
172 | 
173 |     # pick stars from these bins
174 |     ind_bin_need_to_pick = np.where(u_counts > n_pick)[0]
175 |     for _ in ind_bin_need_to_pick:
176 |         ind_in_this_bin = np.where(u_inverse == _)[0]
177 |         np.random.shuffle(ind_in_this_bin)
178 |         uniform_good[ind_in_this_bin[n_pick:]] = False
179 | 
180 |     print("@utils.uniform: [{}/{}] stars chosen to make a uniform sample!"
181 |           "".format(np.sum(uniform_good), n_obs))
182 | 
183 |     return dict(uniform_picked=uniform_good,
184 |                 uniform_unpicked=np.logical_not(uniform_good),
185 |                 uniform_ind=uniform_ind,
186 |                 uniform_str=uniform_str,
187 |                 uniform_bins=bins,
188 |                 n_pick=n_pick,
189 |                 digits=digits,
190 |                 ignore_out=ignore_out)
191 | 
192 | unit_scale_dict = dict(
193 |     b=1.,
194 |     kb=1024**-1,
195 |     mb=1024**-2,
196 |     gb=1024**-3,
197 | )
198 | 
199 | 
200 | # deprecated
201 | def sizeof(obj, unit='mb', verbose=False, key_removed=None):
202 | 
203 |     # get scale for unit
204 |     try:
205 |         scale = unit_scale_dict[unit]
206 |     except KeyError:
207 |         print("@sizeof: unit should be in ", unit_scale_dict.keys())
208 | 
209 |     # get size
210 |     v_dict = OrderedDict()
211 |     for _ in dir(obj):
212 |         v_dict[_] = getsize(obj.__getattribute__(_))
213 | 
214 |     v_dict['_total'] = np.sum([v_dict[_] for _ in v_dict.keys()])
215 |     for k in v_dict.keys():
216 |         v_dict[k] = int(scale * v_dict[k])
217 | 
218 |     v_dict['_unit'] = unit
219 | 
220 |     if verbose:
221 |         print(v_dict)
222 | 
223 |     return v_dict
224 | 
225 | 
226 | # ########################################################################### #
227 | # adapted from https://stackoverflow.com/questions/449560/how-do-i-determine-th
228 | # e-size-of-an-object-in-python
229 | # ########################################################################### #
230 | 
231 | # try: # Python 2
232 | #     zero_depth_bases = (basestring, Number, xrange, bytearray)
233 | #     iteritems = 'iteritems'
234 | # except NameError: # Python 3
235 | zero_depth_bases = (str, bytes, Number, range, bytearray)
236 | iteritems = 'items'
237 | 
238 | 
239 | def getsize(obj_0):
240 |     """Recursively iterate to sum size of object & members."""
241 |     def inner(obj, _seen_ids = set()):
242 |         obj_id = id(obj)
243 |         if obj_id in _seen_ids:
244 |             return 0
245 |         _seen_ids.add(obj_id)
246 |         size = sys.getsizeof(obj)
247 |         if isinstance(obj, zero_depth_bases):
248 |             pass # bypass remaining control flow and return
249 |         elif isinstance(obj, (tuple, list, Set, deque)):
250 |             size += sum(inner(i) for i in obj)
251 |         elif isinstance(obj, Mapping) or hasattr(obj, iteritems):
252 |             size += sum(inner(k) + inner(v) for k, v in getattr(obj, iteritems)())
253 |         # Check for custom object instances - may subclass above too
254 |         if hasattr(obj, '__dict__'):
255 |             size += inner(vars(obj))
256 |         if hasattr(obj, '__slots__'): # can have __slots__ with __dict__
257 |             size += sum(inner(getattr(obj, s)) for s in obj.__slots__ if hasattr(obj, s))
258 |         return size
259 |     return inner(obj_0)
260 | 
261 | 


--------------------------------------------------------------------------------
/upload.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #python setup.py sdist upload
3 | python setup.py sdist bdist_wheel
4 | twine upload dist/*
5 | 


--------------------------------------------------------------------------------