├── .gitignore
├── CHANGELOG.rst
├── INSTALL.rst
├── LICENSE.txt
├── README.rst
├── cuvarbase
    ├── __init__.py
    ├── bls.py
    ├── ce.py
    ├── core.py
    ├── cunfft.py
    ├── kernels
    │   ├── bls.cu
    │   ├── ce.cu
    │   ├── cunfft.cu
    │   ├── lomb.cu
    │   ├── pdm.cu
    │   └── wavelet.cu
    ├── lombscargle.py
    ├── pdm.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_bls.py
    │   ├── test_ce.py
    │   ├── test_lombscargle.py
    │   ├── test_nfft.py
    │   └── test_pdm.py
    └── utils.py
├── docs
    ├── Makefile
    ├── requirements.txt
    └── source
    │   ├── bls.rst
    │   ├── ce.rst
    │   ├── conf.py
    │   ├── cuvarbase.rst
    │   ├── cuvarbase.tests.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── logo.png
    │   ├── lomb.rst
    │   ├── modules.rst
    │   ├── plots
    │       ├── benchmarks.py
    │       ├── bls_example.py
    │       ├── bls_example_transit.py
    │       ├── bls_transit_diagram.py
    │       ├── ce_example.py
    │       ├── logo.py
    │       └── planet_transit_diagram.py
    │   └── whatsnew.rst
├── notebooks
    ├── Conditional entropy.ipynb
    ├── Lomb Scargle.ipynb
    ├── PDM2_bin.jpg
    ├── PDM2_binless_gauss.jpg
    ├── PDM2_binless_tophat.jpg
    ├── PDM_bin.jpg
    └── Phase Dispersion Minimization.ipynb
├── publish_docs.sh
├── requirements.txt
├── setup.cfg
├── setup.py
└── test_python_versions.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | .pytest_cache/
46 | 
47 | # Translations
48 | *.mo
49 | *.pot
50 | 
51 | # Django stuff:
52 | *.log
53 | 
54 | # Sphinx documentation
55 | docs/build/
56 | 
57 | # PyBuilder
58 | target/
59 | 
60 | # emacs backups
61 | *~
62 | \#*\#
63 | 
64 | .ipynb_checkpoints
65 | .idea/*
66 | tools/repos
67 | Untitled*.ipynb
68 | 
69 | # vim backups
70 | *.swp
71 | 
72 | # LaTeX
73 | *.aux
74 | *.pdf
75 | 
76 | # misc
77 | scripts/saved_results
78 | .DS_Store
79 | work/
80 | *.png
81 | *.gif
82 | *HAT*txt
83 | testing/*
84 | custom_test_ce.py
85 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | What's new in cuvarbase
 2 | ***********************
 3 | * **0.2.5**
 4 |     * swap out pycuda.autoinit for pycuda.autoprimaryctx to handle "cuFuncSetBlockShape" error
 5 |     
 6 | * **0.2.4**
 7 |     * bugfix for pytest (broke b/c of incorrect fixture usage)
 8 |     * added ``ignore_negative_delta_sols`` option to BLS to ignore inverted dips in the lightcurve
 9 | 
10 | * **0.2.1**
11 |     * bugfix for memory leak in BLS
12 |     * contact email changed in setup
13 | 
14 | * **0.2.0**
15 | 	* Many more unit tests for BLS and CE.
16 | 	* BLS
17 | 		* Now several orders of magnitude faster! Use ``use_fast=True`` in ``eebls_transit_gpu`` or use ``eebls_gpu_fast``.
18 | 		* Bug-fix for boost-python error when calling ``eebls_gpu_fast``.
19 |   	* CE
20 | 		* New ``use_fast`` parameter in ``ConditionalEntropyAsyncProcess``; if selected will use a kernel that should be substantially more efficient and that requires no memory overhead. If selected, you should use the ``run`` function and not the ``large_run`` function. Currently the ``weighted`` option is not supported when ``use_fast`` is ``True``.
21 | 		* Bug-fix for ``mag_overlap > 0``.
22 | 
23 | * **0.1.9**
24 | 	* Added Sphinx documentation
25 | 	* **Now Python 3 compatible!**
26 | 	* Miscillaneous bug fixes
27 | 	* CE
28 | 		* Run functions for ``ConditionalEntropyAsyncProcess`` now allow for a ``balanced_magbins`` argument to set the magnitude bins to have widths that vary with the distribution of magnitude values. This is more robust to outliers, but performance comparisons between the usual CE algorithm indicate that you should use care.
29 | 		* Added ``precompute`` function to ``ConditionalEntropyAsyncProcess`` that allows you to speed up computations without resorting to the ``batched_run_constant_nfreq`` function. Currently it still assumes that the frequencies used will be the same for all lightcurves.
30 | 	* GLS
31 | 		* Added ``precompute`` function to ``LombScargleAsyncProcess``.
32 | 		* Avoids allocating GPU memory for NFFT when ``use_fft`` is ``False``.
33 | 		* ``LombScargleAsyncProcess.memory_requirement`` is now implemented.
34 | 	* BLS
35 | 		* ``eebls_gpu``, ``eebls_transit_gpu``, and ``eebls_custom_gpu`` now have a ``max_memory`` option that allows you to automatically set the ``batch_size`` without worrying about memory allocation errors.
36 | 		* ``eebls_transit_gpu`` now allows for a ``freqs`` argument and a ``qvals`` argument for customizing the frequencies and the fiducial ``q`` values
37 | 		* Fixed a small bug in ``fmin_transit`` that miscalculated the minimum frequency.
38 | 
39 | * **0.1.8**
40 |     * Removed gamma function usage from baluev 2008 false alarm probability (``use_gamma=True`` will override this)
41 |     * Fixed a bug in the GLS notebook
42 | 
43 | * **0.1.6/0.1.7**
44 |     * Some bug fixes for GLS
45 |     * ``large_run`` function for Conditional Entropy period finder allows large frequency grids
46 |       without raising memory allocation errors.
47 |     * More unit tests for conditional entropy
48 |     * Conditional entropy now supports double precision with the ``use_double`` argument
49 | 
50 | * **0.1.5**
51 | 	* Conditional Entropy period finder now unit tested
52 | 		* Weighted variant also implemented -- accounts for heteroskedasticity if
53 | 		  that's important
54 | 	* BLS
55 | 		* New unit tests
56 | 		* A new transiting exoplanet BLS function: ``eebls_transit_gpu``
57 | 			* Only searches plausible parameter space for Keplerian orbit
58 | 	* GLS
59 | 		* False alarm probability: ``fap_baluev``
60 | 			* Implements `Baluev 2008 <http://adsabs.harvard.edu/abs/2008MNRAS.385.1279B>`_ false alarm probability measure based on extreme value theory
61 | 
62 | 


--------------------------------------------------------------------------------
/INSTALL.rst:
--------------------------------------------------------------------------------
  1 | Install instructions
  2 | ********************
  3 | 
  4 | These installation instructions are for Linux/BSD-based systems (OS X/macOS, Ubuntu, etc.). Windows users, your suggestions and feedback is welcome if we can make your life easier!
  5 | 
  6 | Installing the Nvidia Toolkit
  7 | -----------------------------
  8 | 
  9 | ``cuvarbase`` requires PyCUDA and scikit-cuda, which both require the Nvidia toolkit for access to the Nvidia compiler, drivers, and runtime libraries.
 10 | 
 11 | Go to the `NVIDIA Download page <https://developer.nvidia.com/cuda-downloads>`_ and select the distribution for your operating system. Everything has been developed and tested using **version 8.0**, so it may be best to stick with that version for now until we verify that later versions are OK.
 12 | 
 13 | .. warning::
 14 | 
 15 | 	Make sure that your ``$PATH`` environment variable contains the location of the ``CUDA`` binaries. You can test this by trying
 16 | 	``which nvcc`` from your terminal. If nothing is printed, you'll have to amend your ``~/.bashrc`` file: 
 17 | 
 18 | 	``echo "export PATH=/usr/local/cuda/bin:${PATH}" >> ~/.bashrc && . ~/.bashrc``
 19 | 
 20 | 	The ``>>`` is not a typo -- using one ``>`` will *overwrite* the ``~/.bashrc`` file. Make sure you change ``/usr/local/cuda`` to the appropriate location of your Nvidia install.
 21 | 
 22 | 	**Also important**
 23 | 
 24 | 	Make sure your ``$LD_LIBRARY_PATH`` and ``$DYLD_LIBRARY_PATH`` are also similarly modified to include the ``/lib`` directory of the CUDA install:
 25 | 
 26 | 	``echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib:${LD_LIBRARY_PATH}" >> ~/.bashrc && . ~/.bashrc``
 27 | 	``echo "export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:${DYLD_LIBRARY_PATH}" >> ~/.bashrc && . ~/.bashrc``
 28 | 
 29 | 
 30 | Using conda
 31 | -----------
 32 | 
 33 | `Conda <https://www.continuum.io/downloads>`_ is a great way to do this in a safe, isolated environment.
 34 | 
 35 | First create a new conda environment (named ``pycu`` here) that will use Python 2.7 (python 2.7, 3.4, 3.5, and 3.6
 36 | have been tested), with the numpy library installed. 
 37 | 
 38 | .. code:: bash
 39 | 
 40 | 	conda create -n pycu python=2.7 numpy
 41 | 
 42 | .. note::
 43 | 
 44 | 	The numpy library *has* to be installed *before* PyCUDA is installed with pip. 
 45 | 	The PyCUDA setup needs to be able to access the numpy library for building against it. You can do this with
 46 | 	the above command, or alternatively just do ``pip install numpy && pip install cuvarbase``
 47 | 
 48 | Then activate the virtual environment
 49 | 
 50 | .. code:: bash
 51 | 
 52 | 	source activate pycu
 53 | 
 54 | and then use ``pip`` to install ``cuvarbase``
 55 | 
 56 | .. code:: bash
 57 | 
 58 | 	pip install cuvarbase
 59 | 
 60 | 
 61 | Installing with just ``pip``
 62 | ----------------------------
 63 | 
 64 | **If you don't want to use conda** the following should work with just pip
 65 | 
 66 | .. code:: bash
 67 | 
 68 | 	pip install numpy 
 69 | 	pip install cuvarbase
 70 | 
 71 | 
 72 | Troubleshooting PyCUDA installation problems
 73 | --------------------------------------------
 74 | 
 75 | The ``PyCUDA`` installation step may be a hiccup in this otherwise orderly process. If you run into problems installing ``PyCUDA`` with pip, you may have to install PyCUDA from source yourself. It's not too bad, but if you experience any problems, please submit an `Issue <https://github.com/johnh2o2/cuvarbase/issues>`_ at the ``cuvarbase`` Github page and I'll amend this documentation.
 76 | 
 77 | Below is a small bash script that (hopefully) automates the process of installing PyCUDA in the event of any problems you've encountered at this point.
 78 | 
 79 | .. code-block:: bash
 80 | 	
 81 | 	PYCUDA="pycuda-2017.1.1"
 82 | 	PYCUDA_URL="https://pypi.python.org/packages/b3/30/9e1c0a4c10e90b4c59ca7aa3c518e96f37aabcac73ffe6b5d9658f6ef843/pycuda-2017.1.1.tar.gz#md5=9e509f53a23e062b31049eb8220b2e3d"
 83 | 	CUDA_ROOT=/usr/local/cuda
 84 | 
 85 | 	# Download
 86 | 	wget $PYCUDA_URL
 87 | 
 88 | 	# Unpack
 89 | 	tar xvf ${PYCUDA}.tar.gz
 90 | 	cd $PYCUDA
 91 | 
 92 | 	# Configure with current python exe
 93 | 	./configure.py --python-exe=`which python` --cuda-root=$CUDA_ROOT
 94 | 	python setup.py build
 95 | 	python setup.py install
 96 | 
 97 | If everything goes smoothly, you should now test if ``pycuda`` is working correctly.
 98 | 
 99 | .. code:: bash
100 | 
101 | 	python -c "import pycuda.autoinit; print 'Hurray!'"
102 | 
103 | If everything works up until now, we should be ready to install ``cuvarbase``
104 | 
105 | .. code:: bash
106 | 
107 | 	pip install cuvarbase
108 | 
109 | Installing from source
110 | ----------------------
111 | 
112 | You can also install directly from the repository. Clone the ``git`` repository on your machine:
113 | 
114 | .. code:: bash
115 | 	
116 | 	git clone https://github.com/johnh2o2/cuvarbase
117 | 
118 | Then install!
119 | 
120 | .. code:: bash
121 | 
122 | 	cd cuvarbase
123 | 	python setup.py install
124 | 
125 | The last command can also be done with pip:
126 | 
127 | .. code:: bash
128 | 
129 | 	pip install -e .
130 | 
131 | 
132 | 
133 | Troubleshooting on a Mac
134 | ------------------------
135 | 
136 | Nvidia offers `CUDA for Mac OSX <https://developer.nvidia.com/cuda-downloads>`_. After installing the
137 | package via downloading and running the ``.dmg`` file, you'll have to make a couple of edits to your
138 | ``~/.bash_profile``:
139 | 
140 | .. code:: sh
141 |     
142 |     export DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}:/usr/local/cuda/lib"
143 |     export PATH="/usr/local/cuda/bin:${PATH}"
144 | 
145 | and then source these changes in your current shell by running ``. ~/.bash_profile``. 
146 | 
147 | Another important note: **nvcc (8.0.61) does not appear to support the latest clang compiler**. If this is
148 | the case, running ``python example.py`` should produce the following error:
149 | 
150 | .. code:: bash
151 | 
152 |     nvcc fatal   : The version ('80100') of the host compiler ('Apple clang') is not supported
153 | 
154 | You can fix this problem by temporarily downgrading your clang compiler. To do this:
155 | 
156 | - `Download Xcode command line tools 7.3.1 <http://adcdownload.apple.com/Developer_Tools/Command_Line_Tools_OS_X_10.11_for_Xcode_7.3.1/Command_Line_Tools_OS_X_10.11_for_Xcode_7.3.1.dmg>`_
157 | - Install.
158 | - Run ``sudo xcode-select --switch /Library/Developer/CommandLineTools`` until ``clang --version`` says ``7.3``.
159 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | cuvarbase
 2 | =========
 3 | 
 4 | .. image:: https://badge.fury.io/py/cuvarbase.svg
 5 |     :target: https://badge.fury.io/py/cuvarbase
 6 | 
 7 | John Hoffman
 8 | (c) 2017
 9 | 
10 | ``cuvarbase`` is a Python library that uses `PyCUDA <https://mathema.tician.de/software/pycuda/>`_ to implement several time series tools used in astronomy on GPUs.
11 | 
12 | See the `documentation <https://johnh2o2.github.io/cuvarbase/>`_.
13 | 
14 | This project is under active development, and currently includes implementations of
15 | 
16 | - Generalized `Lomb Scargle <https://arxiv.org/abs/0901.2573>`_ periodogram
17 | - Box-least squares (`BLS <http://adsabs.harvard.edu/abs/2002A%26A...391..369K>`_ )
18 | - Non-equispaced fast Fourier transform (adjoint operation) (`NFFT paper <http://epubs.siam.org/doi/abs/10.1137/0914081>`_)
19 | - Conditional entropy period finder (`CE <http://adsabs.harvard.edu/abs/2013MNRAS.434.2629G>`_)
20 | - Phase dispersion minimization (`PDM2 <http://www.stellingwerf.com/rfs-bin/index.cgi?action=PageView&id=29>`_)
21 | 	- Currently operational but minimal unit testing or documentation (yet)
22 | 
23 | Hopefully future developments will have
24 | 
25 | - (Weighted) wavelet transforms
26 | - Spectrograms (for PDM and GLS)
27 | - Multiharmonic extensions for GLS
28 | 
29 | 
30 | Dependencies
31 | ------------
32 | 
33 | - `PyCUDA <https://mathema.tician.de/software/pycuda/>`_ **<-essential**
34 | - `scikit cuda <https://scikit-cuda.readthedocs.io/en/latest/>`_ **<-also essential**
35 | 	- used for access to the CUDA FFT runtime library
36 | - `matplotlib <https://matplotlib.org/>`_ (for plotting utilities)
37 | - `nfft <https://github.com/jakevdp/nfft>`_ (for unit testing)
38 | - `astropy <http://www.astropy.org/>`_ (for unit testing)
39 | 
40 | 
41 | Using multiple GPUs
42 | -------------------
43 | 
44 | If you have more than one GPU, you can choose which one to
45 | use in a given script by setting the ``CUDA_DEVICE`` environment
46 | variable:
47 | 
48 | .. code:: sh
49 | 
50 |     CUDA_DEVICE=1 python script.py
51 | 
52 | If anyone is interested in implementing multi-device load-balancing
53 | solution, they are encouraged to do so! At some point this may
54 | become important, but for the time being manually splitting up the
55 | jobs to different GPU's will have to suffice.
56 | 


--------------------------------------------------------------------------------
/cuvarbase/__init__.py:
--------------------------------------------------------------------------------
1 | # import pycuda.autoinit causes problems when running e.g. FFT
2 | import pycuda.autoprimaryctx
3 | __version__ = "0.2.6"
4 | 


--------------------------------------------------------------------------------
/cuvarbase/core.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from builtins import range
 6 | from builtins import object
 7 | import numpy as np
 8 | from .utils import gaussian_window, tophat_window, get_autofreqs
 9 | import pycuda.driver as cuda
10 | from pycuda.compiler import SourceModule
11 | 
12 | 
13 | class GPUAsyncProcess(object):
14 |     def __init__(self, *args, **kwargs):
15 |         self.reader = kwargs.get('reader', None)
16 |         self.nstreams = kwargs.get('nstreams', None)
17 |         self.function_kwargs = kwargs.get('function_kwargs', {})
18 |         self.device = kwargs.get('device', 0)
19 |         self.streams = []
20 |         self.gpu_data = []
21 |         self.results = []
22 |         self._adjust_nstreams = self.nstreams is None
23 |         if self.nstreams is not None:
24 |                 self._create_streams(self.nstreams)
25 |         self.prepared_functions = {}
26 | 
27 |     def _create_streams(self, n):
28 |         for i in range(n):
29 |             self.streams.append(cuda.Stream())
30 | 
31 |     def _compile_and_prepare_functions(self):
32 |         raise NotImplementedError()
33 | 
34 |     def run(self, *args, **kwargs):
35 |         raise NotImplementedError()
36 | 
37 |     def finish(self):
38 |         """ synchronize all active streams """
39 |         for i, stream in enumerate(self.streams):
40 |             stream.synchronize()
41 | 
42 |     def batched_run(self, data, batch_size=10, **kwargs):
43 |         """ Run your data in batches (avoids memory problems) """
44 |         nsubmit = 0
45 |         results = []
46 |         while nsubmit < len(data):
47 |             batch = []
48 |             while len(batch) < batch_size and nsubmit < len(data):
49 |                 batch.append(data[nsubmit])
50 |                 nsubmit += 1
51 | 
52 |             res = self.run(batch, **kwargs)
53 |             self.finish()
54 |             results.extend(res)
55 | 
56 |         return results
57 | 


--------------------------------------------------------------------------------
/cuvarbase/cunfft.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | from builtins import object
  7 | 
  8 | import sys
  9 | import resource
 10 | import numpy as np
 11 | 
 12 | import pycuda.driver as cuda
 13 | import pycuda.gpuarray as gpuarray
 14 | from pycuda.compiler import SourceModule
 15 | # import pycuda.autoinit
 16 | 
 17 | import skcuda.fft as cufft
 18 | 
 19 | from .core import GPUAsyncProcess
 20 | from .utils import find_kernel, _module_reader
 21 | 
 22 | 
 23 | class NFFTMemory(object):
 24 |     def __init__(self, sigma, stream, m, use_double=False,
 25 |                  precomp_psi=True, **kwargs):
 26 | 
 27 |         self.sigma = sigma
 28 |         self.stream = stream
 29 |         self.m = m
 30 |         self.use_double = use_double
 31 |         self.precomp_psi = precomp_psi
 32 | 
 33 |         # set datatypes
 34 |         self.real_type = np.float32 if not self.use_double \
 35 |             else np.float64
 36 |         self.complex_type = np.complex64 if not self.use_double \
 37 |             else np.complex128
 38 | 
 39 |         self.other_settings = {}
 40 |         self.other_settings.update(kwargs)
 41 | 
 42 |         self.t = kwargs.get('t', None)
 43 |         self.y = kwargs.get('y', None)
 44 |         self.f0 = kwargs.get('f0', 0.)
 45 |         self.n0 = kwargs.get('n0', None)
 46 |         self.nf = kwargs.get('nf', None)
 47 |         self.t_g = kwargs.get('t_g', None)
 48 |         self.y_g = kwargs.get('y_g', None)
 49 |         self.ghat_g = kwargs.get('ghat_g', None)
 50 |         self.ghat_c = kwargs.get('ghat_c', None)
 51 |         self.q1 = kwargs.get('q1', None)
 52 |         self.q2 = kwargs.get('q2', None)
 53 |         self.q3 = kwargs.get('q3', None)
 54 |         self.cu_plan = kwargs.get('cu_plan', None)
 55 | 
 56 |         D = (2 * self.sigma - 1) * np.pi
 57 |         self.b = float(2 * self.sigma * self.m) / D
 58 | 
 59 |     def allocate_data(self, **kwargs):
 60 |         self.n0 = kwargs.get('n0', self.n0)
 61 |         self.nf = kwargs.get('nf', self.nf)
 62 | 
 63 |         assert(self.n0 is not None)
 64 |         assert(self.nf is not None)
 65 | 
 66 |         self.t_g = gpuarray.zeros(self.n0, dtype=self.real_type)
 67 |         self.y_g = gpuarray.zeros(self.n0, dtype=self.real_type)
 68 | 
 69 |         return self
 70 | 
 71 |     def allocate_precomp_psi(self,  **kwargs):
 72 |         self.n0 = kwargs.get('n0', self.n0)
 73 | 
 74 |         assert(self.n0 is not None)
 75 | 
 76 |         self.q1 = gpuarray.zeros(self.n0, dtype=self.real_type)
 77 |         self.q2 = gpuarray.zeros(self.n0, dtype=self.real_type)
 78 |         self.q3 = gpuarray.zeros(2 * self.m + 1, dtype=self.real_type)
 79 | 
 80 |         return self
 81 | 
 82 |     def allocate_grid(self, **kwargs):
 83 |         self.nf = kwargs.get('nf', self.nf)
 84 | 
 85 |         assert(self.nf is not None)
 86 | 
 87 |         self.n = int(self.sigma * self.nf)
 88 |         self.ghat_g = gpuarray.zeros(self.n,
 89 |                                      dtype=self.complex_type)
 90 |         self.cu_plan = cufft.Plan(self.n, self.complex_type, self.complex_type,
 91 |                                   stream=self.stream)
 92 |         return self
 93 | 
 94 |     def allocate_pinned_cpu(self, **kwargs):
 95 |         self.nf = kwargs.get('nf', self.nf)
 96 | 
 97 |         assert(self.nf is not None)
 98 |         self.ghat_c = cuda.aligned_zeros(shape=(self.nf,),
 99 |                                          dtype=self.complex_type,
100 |                                          alignment=resource.getpagesize())
101 |         self.ghat_c = cuda.register_host_memory(self.ghat_c)
102 | 
103 |         return self
104 | 
105 |     def is_ready(self):
106 |         assert(self.n0 == len(self.t_g))
107 |         assert(self.n0 == len(self.y_g))
108 |         assert(self.n == len(self.ghat_g))
109 | 
110 |         if self.ghat_c is not None:
111 |             assert(self.nf == len(self.ghat_c))
112 | 
113 |         if self.precomp_psi:
114 |             assert(self.n0 == len(self.q1))
115 |             assert(self.n0 == len(self.q2))
116 |             assert(2 * self.m + 1 == len(self.q3))
117 | 
118 |     def allocate(self, **kwargs):
119 |         self.n0 = kwargs.get('n0', self.n0)
120 |         self.nf = kwargs.get('nf', self.nf)
121 | 
122 |         assert(self.n0 is not None)
123 |         assert(self.nf is not None)
124 |         self.n = int(self.sigma * self.nf)
125 | 
126 |         self.allocate_data(**kwargs)
127 |         self.allocate_grid(**kwargs)
128 |         self.allocate_pinned_cpu(**kwargs)
129 |         if self.precomp_psi:
130 |             self.allocate_precomp_psi(**kwargs)
131 | 
132 |         return self
133 | 
134 |     def transfer_data_to_gpu(self, **kwargs):
135 |         t = kwargs.get('t', self.t)
136 |         y = kwargs.get('y', self.y)
137 | 
138 |         assert(t is not None)
139 |         assert(y is not None)
140 | 
141 |         self.t_g.set_async(t, stream=self.stream)
142 |         self.y_g.set_async(y, stream=self.stream)
143 | 
144 |     def transfer_nfft_to_cpu(self, **kwargs):
145 |         cuda.memcpy_dtoh_async(self.ghat_c, self.ghat_g.ptr,
146 |                                stream=self.stream)
147 | 
148 |     def fromdata(self, t, y, allocate=True, **kwargs):
149 |         self.tmin = min(t)
150 |         self.tmax = max(t)
151 | 
152 |         self.t = np.asarray(t).astype(self.real_type)
153 |         self.y = np.asarray(y).astype(self.real_type)
154 | 
155 |         self.n0 = kwargs.get('n0', len(t))
156 |         self.nf = kwargs.get('nf', self.nf)
157 | 
158 |         if self.nf is not None and allocate:
159 |             self.allocate(**kwargs)
160 | 
161 |         return self
162 | 
163 | 
164 | def nfft_adjoint_async(memory, functions,
165 |                        minimum_frequency=0., block_size=256,
166 |                        just_return_gridded_data=False, use_grid=None,
167 |                        fast_grid=True, transfer_to_device=True,
168 |                        transfer_to_host=True, precomp_psi=True,
169 |                        samples_per_peak=1, **kwargs):
170 |     """
171 |     Asynchronous NFFT adjoint operation.
172 | 
173 |     Use the ``NFFTAsyncProcess`` class and related subroutines when possible.
174 | 
175 |     Parameters
176 |     ----------
177 |     memory: ``NFFTMemory``
178 |         Allocated memory, must have data already set (see, e.g.,
179 |         ``NFFTAsyncProcess.allocate()``)
180 |     functions: tuple, length 5
181 |         Tuple of compiled functions from `SourceModule`. Must be prepared with
182 |         their appropriate dtype.
183 |     minimum_frequency: float, optional (default: 0)
184 |         First frequency of transform
185 |     block_size: int, optional
186 |         Number of CUDA threads per block
187 |     just_return_gridded_data: bool, optional
188 |         If True, returns grid via `grid_g.get()` after gridding
189 |     use_grid: ``GPUArray``, optional
190 |         If specified, will skip gridding procedure and use the `GPUArray`
191 |         provided
192 |     fast_grid: bool, optional, default: True
193 |         Whether or not to use the "fast" gridding procedure
194 |     transfer_to_device: bool, optional, (default: True)
195 |         If the data is already on the gpu, set as False
196 |     transfer_to_host: bool, optional, (default: True)
197 |         If False, will not transfer the resulting nfft to CPU memory
198 |     precomp_psi: bool, optional, (default: True)
199 |         Only relevant if ``fast`` is True. Will precompute values for the
200 |         fast gridding procedure.
201 |     samples_per_peak: float, optional (default: 1)
202 |         Frequency spacing is reduced by this factor, but number of frequencies
203 |         is kept the same
204 | 
205 |     Returns
206 |     -------
207 |     ghat_cpu: ``np.array``
208 |         The resulting NFFT
209 |     """
210 | 
211 |     precompute_psi, fast_gaussian_grid, slow_gaussian_grid, \
212 |         nfft_shift, normalize = functions
213 | 
214 |     stream = memory.stream
215 | 
216 |     block = (block_size, 1, 1)
217 | 
218 |     batch_size = 1
219 | 
220 |     def grid_size(nthreads):
221 |         return int(np.ceil(float(nthreads) / block_size))
222 | 
223 |     minimum_frequency = memory.real_type(minimum_frequency)
224 | 
225 |     # transfer data -> gpu
226 |     if transfer_to_device:
227 |         memory.transfer_data_to_gpu()
228 | 
229 |     # smooth data onto uniform grid
230 |     if fast_grid:
231 |         if memory.precomp_psi:
232 |             grid = (grid_size(memory.n0 + 2 * memory.m + 1), 1)
233 |             args = (grid, block, stream)
234 |             args += (memory.t_g.ptr,)
235 |             args += (memory.q1.ptr, memory.q2.ptr, memory.q3.ptr)
236 |             args += (np.int32(memory.n0), np.int32(memory.n),
237 |                      np.int32(memory.m), memory.real_type(memory.b))
238 |             args += (memory.real_type(memory.tmin),
239 |                      memory.real_type(memory.tmax),
240 |                      memory.real_type(samples_per_peak))
241 |             precompute_psi.prepared_async_call(*args)
242 | 
243 |         grid = (grid_size(memory.n0), 1)
244 |         args = (grid, block, stream)
245 |         args += (memory.t_g.ptr, memory.y_g.ptr, memory.ghat_g.ptr)
246 |         args += (memory.q1.ptr, memory.q2.ptr, memory.q3.ptr)
247 |         args += (np.int32(memory.n0), np.int32(memory.n),
248 |                  np.int32(batch_size), np.int32(memory.m))
249 |         args += (memory.real_type(memory.tmin),
250 |                  memory.real_type(memory.tmax),
251 |                  memory.real_type(samples_per_peak))
252 |         fast_gaussian_grid.prepared_async_call(*args)
253 | 
254 |     else:
255 |         grid = (grid_size(memory.n), 1)
256 |         args = (grid, block, stream)
257 |         args += (memory.t_g.ptr, memory.y_g.ptr, memory.ghat_g.ptr)
258 |         args += (np.int32(memory.n0), np.int32(memory.n),
259 |                  np.int32(batch_size), np.int32(memory.m),
260 |                  memory.real_type(memory.b))
261 |         args += (memory.real_type(memory.tmin),
262 |                  memory.real_type(memory.tmax),
263 |                  memory.real_type(samples_per_peak))
264 |         slow_gaussian_grid.prepared_async_call(*args)
265 | 
266 |     # Stop if user wants the grid
267 |     if just_return_gridded_data:
268 |         stream.synchronize()
269 |         return np.real(memory.ghat_g.get())
270 | 
271 |     # Set the grid manually if the user wants to
272 |     # (only for debugging)
273 |     if use_grid is not None:
274 |         memory.ghat_g.set(use_grid)
275 | 
276 |     # for a non-zero minimum frequency, do a shift
277 |     if abs(minimum_frequency) > 1E-9:
278 |         grid = (grid_size(memory.n), 1)
279 |         args = (grid, block, stream)
280 |         args += (memory.ghat_g.ptr, memory.ghat_g.ptr)
281 |         args += (np.int32(memory.n), np.int32(batch_size))
282 |         args += (memory.real_type(memory.tmin),
283 |                  memory.real_type(memory.tmax),
284 |                  memory.real_type(samples_per_peak),
285 |                  memory.real_type(minimum_frequency))
286 |         nfft_shift.prepared_async_call(*args)
287 | 
288 |     # Run IFFT on grid
289 |     cufft.ifft(memory.ghat_g, memory.ghat_g, memory.cu_plan)
290 | 
291 |     # Normalize result (deconvolve smoothing kernel)
292 |     grid = (grid_size(memory.nf), 1)
293 |     args = (grid, block, stream)
294 |     args += (memory.ghat_g.ptr, memory.ghat_g.ptr)
295 |     args += (np.int32(memory.n),
296 |              np.int32(memory.nf),
297 |              np.int32(batch_size),
298 |              memory.real_type(memory.b))
299 |     args += (memory.real_type(memory.tmin),
300 |              memory.real_type(memory.tmax),
301 |              memory.real_type(samples_per_peak),
302 |              memory.real_type(minimum_frequency))
303 |     normalize.prepared_async_call(*args)
304 | 
305 |     # Transfer result!
306 |     if transfer_to_host:
307 |         memory.transfer_nfft_to_cpu()
308 | 
309 |     return memory.ghat_c
310 | 
311 | 
312 | class NFFTAsyncProcess(GPUAsyncProcess):
313 |     """
314 |     `GPUAsyncProcess` for the adjoint NFFT.
315 | 
316 |     Parameters
317 |     ----------
318 |     sigma: float, optional (default: 2)
319 |         Size of NFFT grid will be NFFT_SIZE * sigma
320 |     m: int, optional (default: 8)
321 |         Maximum radius for grid contributions (by default,
322 |         this value will automatically be set based on a specified
323 |         error tolerance)
324 |     autoset_m: bool, optional (default: True)
325 |         Automatically set the ``m`` parameter based on the
326 |         error tolerance given by the ``m_tol`` parameter
327 |     tol: float, optional (default: 1E-8)
328 |         Error tolerance for the NFFT (used to auto set ``m``)
329 |     block_size: int, optional (default: 256)
330 |         CUDA block size.
331 |     use_double: bool, optional (default: False)
332 |         Use double precision. On non-Tesla cards this will
333 |         make things ~24 times slower.
334 |     use_fast_math: bool, optional (default: True)
335 |         Compile kernel with the ``--use_fast_math`` option
336 |         supplied to ``nvcc``.
337 | 
338 |     Example
339 |     -------
340 | 
341 |     >>> import numpy as np
342 |     >>> t = np.random.rand(100)
343 |     >>> y = np.cos(10 * t - 0.4) + 0.1 * np.random.randn(len(t))
344 |     >>> proc = NFFTAsyncProcess()
345 |     >>> data = [(t, y, 2 * len(t))]
346 |     >>> nfft_adjoint = proc.run(data)
347 | 
348 |     """
349 | 
350 |     def __init__(self, *args, **kwargs):
351 |         super(NFFTAsyncProcess, self).__init__(*args, **kwargs)
352 | 
353 |         self.sigma = kwargs.get('sigma', 4)
354 |         self.m = kwargs.get('m', 8)
355 |         self.autoset_m = kwargs.get('autoset_m', False)
356 |         self.block_size = kwargs.get('block_size', 256)
357 |         self.use_double = kwargs.get('use_double', False)
358 |         self.m_tol = kwargs.get('tol', 1E-8)
359 |         self.module_options = []
360 |         if kwargs.get('use_fast_math', True):
361 |             self.module_options.append('--use_fast_math')
362 | 
363 |         self.real_type = np.float64 if self.use_double \
364 |             else np.float32
365 |         self.complex_type = np.complex128 if self.use_double \
366 |             else np.complex64
367 | 
368 |         self._cpp_defs = dict(BLOCK_SIZE=self.block_size)
369 |         if self.use_double:
370 |             self._cpp_defs['DOUBLE_PRECISION'] = None
371 | 
372 |         self.function_names = ['precompute_psi',
373 |                                'fast_gaussian_grid',
374 |                                'slow_gaussian_grid', 'nfft_shift',
375 |                                'normalize']
376 | 
377 |         self.allocated_memory = []
378 | 
379 |     def m_from_C(self, C, sigma):
380 |         """ 
381 |         Returns an estimate for what ``m`` value to use from ``C``,
382 |         where ``C`` is something like ``err_tolerance/N_freq``.
383 | 
384 |         Pulled from <https://github.com/jakevdp/nfft>_
385 |         """
386 |         D = (np.pi * (1. - 1. / (2. * sigma - 1.)))
387 |         return int(np.ceil(-np.log(0.25 * C) / D))
388 | 
389 |     def estimate_m(self, N):
390 |         """
391 |         Estimate ``m`` based on an error tolerance of ``self.tol``.
392 | 
393 |         Parameters
394 |         ----------
395 |         N: int
396 |             size of NFFT
397 | 
398 |         Returns
399 |         -------
400 |         m: int
401 |             Maximum grid radius
402 | 
403 |         Notes
404 |         -----
405 |         Pulled from <https://github.com/jakevdp/nfft>_.
406 | 
407 |         """
408 | 
409 |         # TODO: this should be computed in terms of the L1-norm of the true
410 |         #   Fourier coefficients... see p. 11 of
411 |         #   https://www-user.tu-chemnitz.de/~potts/nfft/guide/nfft3.pdf
412 |         #   Need to think about how to estimate the value of m more accurately
413 |         return self.m_from_C(self.m_tol / N, self.sigma)
414 | 
415 |     def get_m(self, N=None):
416 |         """ 
417 |         Returns the ``m`` value for ``N`` frequencies.
418 | 
419 |         Parameters
420 |         ----------
421 |         N: int
422 |             Number of frequencies, only needed if ``autoset_m`` is ``False``.
423 | 
424 |         Returns
425 |         -------
426 |         m: int
427 |             The filter radius (in grid points)
428 |         """
429 |         if self.autoset_m:
430 |             return self.estimate_m(N)
431 |         else:
432 |             return self.m
433 | 
434 |     def _compile_and_prepare_functions(self, **kwargs):
435 |         module_txt = _module_reader(find_kernel('cunfft'), self._cpp_defs)
436 | 
437 |         self.module = SourceModule(module_txt, options=self.module_options)
438 | 
439 |         self.dtypes = dict(
440 |             precompute_psi=[np.intp, np.intp, np.intp, np.intp, np.int32,
441 |                             np.int32, np.int32, self.real_type,
442 |                             self.real_type, self.real_type, self.real_type],
443 | 
444 |             fast_gaussian_grid=[np.intp, np.intp, np.intp, np.intp,
445 |                                 np.intp, np.intp, np.int32, np.int32,
446 |                                 np.int32, np.int32, self.real_type,
447 |                                 self.real_type, self.real_type],
448 | 
449 |             slow_gaussian_grid=[np.intp, np.intp, np.intp, np.int32,
450 |                                 np.int32, np.int32, np.int32, self.real_type,
451 |                                 self.real_type, self.real_type,
452 |                                 self.real_type],
453 | 
454 |             normalize=[np.intp, np.intp, np.int32, np.int32, np.int32,
455 |                        self.real_type, self.real_type, self.real_type,
456 |                        self.real_type, self.real_type],
457 | 
458 |             nfft_shift=[np.intp, np.intp, np.int32, np.int32, self.real_type,
459 |                         self.real_type, self.real_type, self.real_type]
460 |         )
461 | 
462 |         for function, dtype in self.dtypes.items():
463 |             func = self.module.get_function(function)
464 |             self.prepared_functions[function] = func.prepare(dtype)
465 | 
466 |         self.function_tuple = tuple([self.prepared_functions[f]
467 |                                      for f in self.function_names])
468 | 
469 |     def allocate(self, data, **kwargs):
470 |         """
471 |         Allocate GPU memory for NFFT-related computations
472 | 
473 |         Parameters
474 |         ----------
475 |         data: list of (t, y, N) tuples
476 |             List of data, ``[(t_1, y_1, N_1), ...]``
477 |             * ``t``: Observation times.
478 |             * ``y``: Observations.
479 |             * ``nf``: int, FFT size
480 |         **kwargs
481 | 
482 |         Returns
483 |         -------
484 |         allocated_memory: list of ``NFFTMemory`` objects
485 |             List of allocated memory for each dataset
486 | 
487 |         """
488 | 
489 |         # Purge any previously allocated memory
490 |         allocated_memory = []
491 | 
492 |         if len(data) > len(self.streams):
493 |             self._create_streams(len(data) - len(self.streams))
494 | 
495 |         for i, (t, y, nf) in enumerate(data):
496 | 
497 |             m = self.get_m(nf)
498 | 
499 |             mem = NFFTMemory(self.sigma, self.streams[i], m,
500 |                              use_double=self.use_double, **kwargs)
501 | 
502 |             allocated_memory.append(mem.fromdata(t, y, nf=nf,
503 |                                                  allocate=True,
504 |                                                  **kwargs))
505 | 
506 |         return allocated_memory
507 | 
508 |     def run(self, data, memory=None, **kwargs):
509 |         """
510 |         Run the adjoint NFFT on a batch of data
511 | 
512 |         Parameters
513 |         ----------
514 |         data: list of tuples
515 |             list of [(t, y, w), ...] containing
516 |             * ``t``: observation times
517 |             * ``y``: observations
518 |             * ``nf``: int, size of NFFT
519 |         memory:
520 |         **kwargs
521 | 
522 |         Returns
523 |         -------
524 |         powers: list of np.ndarrays
525 |             List of adjoint NFFTs
526 | 
527 |         """
528 |         if not hasattr(self, 'prepared_functions') or \
529 |             not all([func in self.prepared_functions
530 |                      for func in self.function_names]):
531 |             self._compile_and_prepare_functions(**kwargs)
532 | 
533 |         if memory is None:
534 |             memory = self.allocate(data, **kwargs)
535 | 
536 |         nfft_kwargs = dict(block_size=self.block_size)
537 |         nfft_kwargs.update(kwargs)
538 | 
539 |         results = [nfft_adjoint_async(mem, self.function_tuple,
540 |                                       **nfft_kwargs)
541 |                    for mem in memory]
542 | 
543 |         return results
544 | 


--------------------------------------------------------------------------------
/cuvarbase/kernels/bls.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #define RESTRICT __restrict__
  3 | #define CONSTANT const
  4 | #define MIN_W 1E-3
  5 | //{CPP_DEFS}
  6 | 
  7 | __device__ unsigned int get_id(){
  8 | 	return blockIdx.x * blockDim.x + threadIdx.x;
  9 | }
 10 | 
 11 | __device__ int mod(int a, int b){
 12 | 	int r = a % b;
 13 | 	return (r < 0) ? r + b : r;
 14 | }
 15 | 
 16 | __device__ float mod1(float a){
 17 | 	return a - floorf(a);
 18 | }
 19 | 
 20 | __device__ float bls_value(float ybar, float w, unsigned int ignore_negative_delta_sols){
 21 |     // if ignore negative delta sols is turned on, that means only solutions where
 22 |     // the mean amplitude within the transit is _lower_ than the mean amplitude of the source 
 23 |     // are considered: it will ignore "inverted dips"
 24 | 	float bls = (w > 1e-10 && w < 1.f - 1e-10) ? ybar * ybar / (w * (1.f - w)) : 0.f;
 25 |     return ((ignore_negative_delta_sols == 1) & (ybar > 0)) ? 0.f : bls;
 26 | }
 27 | 
 28 | __global__ void binned_bls_bst(float *yw, float *w, float *bls, unsigned int n, unsigned int ignore_negative_delta_sols){
 29 | 	unsigned int i = get_id();
 30 | 
 31 | 	if (i < n){
 32 | 		bls[i] = bls_value(yw[i], w[i], ignore_negative_delta_sols);
 33 | 	}
 34 | }
 35 | 
 36 | 
 37 | __device__ unsigned int dnbins(unsigned int nbins, float dlogq){
 38 | 
 39 | 	if (dlogq < 0)
 40 | 		return 1;
 41 | 
 42 | 	unsigned int n = (unsigned int) floorf(dlogq * nbins);
 43 | 
 44 | 	return (n == 0) ? 1 : n;
 45 | }
 46 | 
 47 | __device__ unsigned int nbins_iter(unsigned int i, unsigned int nb0, float dlogq){
 48 | 	
 49 | 
 50 | 	if (i == 0)
 51 | 		return nb0;
 52 | 
 53 | 	unsigned int nb = nb0;
 54 | 	for(int j = 0; j < i; j++)
 55 | 		nb += dnbins(nb, dlogq);
 56 | 
 57 | 	return nb;
 58 | }
 59 | 
 60 | __device__ unsigned int count_tot_nbins(unsigned int nbins0, unsigned int nbinsf, float dlogq){
 61 | 	unsigned int ntot = 0;
 62 | 
 63 | 	for(int i = 0; nbins_iter(i, nbins0, dlogq) <= nbinsf; i++)
 64 | 		ntot += nbins_iter(i, nbins0, dlogq);
 65 | 	return ntot;
 66 | }
 67 | 
 68 | 
 69 | 
 70 | __global__ void store_best_sols_custom(unsigned int *argmaxes, float *best_phi, 
 71 | 	                            float *best_q, float *q_values,
 72 | 	                            float *phi_values, unsigned int nq, unsigned int nphi,
 73 | 	                            unsigned int nfreq, unsigned int freq_offset){
 74 | 
 75 | 	unsigned int i = get_id();
 76 | 
 77 | 	if (i < nfreq){
 78 | 		unsigned int imax = argmaxes[i + freq_offset];
 79 | 
 80 | 		best_phi[i + freq_offset] = phi_values[imax / nq];
 81 | 		best_q[i + freq_offset] = q_values[imax % nq];
 82 | 	}
 83 | }
 84 | 
 85 | 
 86 | __device__ int divrndup(int a, int b){
 87 | 	return (a % b > 0) ? a/b + 1 : a/b;
 88 | }
 89 | 
 90 | 
 91 | 
 92 | 
 93 | __global__ void store_best_sols(unsigned int *argmaxes, float *best_phi, 
 94 | 	                            float *best_q,
 95 | 	                            unsigned int nbins0, unsigned int nbinsf, 
 96 | 	                            unsigned int noverlap, 
 97 | 	                            float dlogq, unsigned int nfreq, unsigned int freq_offset){
 98 | 
 99 | 	unsigned int i = get_id();
100 | 
101 | 	if (i < nfreq){
102 | 		unsigned int imax = argmaxes[i + freq_offset];
103 | 		float dphi = 1. / noverlap;
104 | 
105 | 		unsigned int nb = nbins0;
106 | 		unsigned int bin_offset = 0;
107 | 		unsigned int i_iter = 0;
108 | 		while ((bin_offset + nb) * noverlap <= imax){
109 | 			bin_offset += nb;
110 | 			nb = nbins_iter(++i_iter, nbins0, dlogq);
111 | 		}
112 | 
113 | 		float q = 1. / nb;
114 | 		int s = (((int) imax) - ((int) (bin_offset * noverlap))) / nb;
115 | 		int jphi = (((int) imax) - ((int) (bin_offset * noverlap))) % nb;
116 | 		
117 | 		float phi = mod1((float) (((double) q) * (((double) jphi) + ((double) s) * ((double) dphi))));
118 | 
119 | 		best_phi[i + freq_offset] = phi;
120 | 		best_q[i + freq_offset] = q;
121 | 	}
122 | }
123 | 
124 | // needs ndata * nfreq threads
125 | // noverlap -- number of overlapped bins (noverlap * (1 / q) total bins)
126 | // Note: this thread heavily utilizes global atomic operations, and could
127 | //       likely be improved by 1-2 orders of magnitude for large Ndata (10^4)
128 | //       if shared memory atomics were utilized.
129 | __global__ void bin_and_phase_fold_bst_multifreq(
130 | 	                    float *t, float *yw, float *w,
131 | 						float *yw_bin, float *w_bin, float *freqs,
132 | 						unsigned int ndata, unsigned int nfreq, unsigned int nbins0, unsigned int nbinsf,
133 | 						unsigned int freq_offset, unsigned int noverlap, float dlogq,
134 | 						unsigned int nbins_tot){
135 | 	unsigned int i = get_id();
136 | 
137 | 	if (i < ndata * nfreq){
138 | 		unsigned int i_data = i % ndata;
139 | 		unsigned int i_freq = i / ndata;
140 | 
141 | 		unsigned int offset = i_freq * nbins_tot * noverlap;
142 | 
143 | 		float W = w[i_data];
144 | 		float YW = yw[i_data];
145 | 
146 | 		// get phase [0, 1)
147 | 		float phi = mod1(t[i_data] * freqs[i_freq + freq_offset]);
148 | 
149 | 		float dphi = 1.f / noverlap;
150 | 		unsigned int nbtot = 0;
151 | 		unsigned int nb, b;
152 | 
153 | 		// iterate through bins (logarithmically spaced)
154 | 		for(int j = 0; nbins_iter(j, nbins0, dlogq) <= nbinsf; j++){
155 | 			nb = nbins_iter(j, nbins0, dlogq);
156 | 
157 | 			// iterate through offsets [ 0, 1./sigma, ..., 
158 | 			//                           (sigma - 1) / sigma ]
159 | 			for (int s = 0; s < noverlap; s++){
160 | 				b = (unsigned int) mod((int) floorf(nb * phi - s * dphi), nb);
161 | 				b += offset + s * nb + noverlap * nbtot;
162 | 
163 | 				atomicAdd(&(yw_bin[b]), YW);
164 | 				atomicAdd(&(w_bin[b]), W);
165 | 			}
166 | 			nbtot += nb;
167 | 		}
168 | 	}
169 | }
170 | 
171 | 
172 | __global__ void full_bls_no_sol(
173 | 	                    const float* __restrict__ t, 
174 | 	                    const float* __restrict__ yw, 
175 | 	                    const float* __restrict__ w,
176 | 						float* __restrict__ bls, 
177 | 						const float* __restrict__ freqs,
178 | 						const unsigned int * __restrict__ nbins0, 
179 | 						const unsigned int * __restrict__ nbinsf, 
180 | 						unsigned int ndata, 
181 | 						unsigned int nfreq,
182 | 						unsigned int freq_offset,
183 | 						unsigned int hist_size,
184 | 						unsigned int noverlap,
185 | 						float dlogq,
186 | 						float dphi,
187 |                         unsigned int ignore_negative_delta_sols){
188 | 	unsigned int i = get_id();
189 | 
190 | 	extern __shared__ float sh[];
191 | 
192 | 	float *block_bins = sh;
193 | 	float *best_bls = (float *)&sh[2 * hist_size];
194 | 
195 | 	__shared__ float f0;
196 | 	__shared__ int nb0, nbf, max_bin_width;
197 | 
198 | #ifdef USE_LOG_BIN_SPACING
199 | 	__shared__ int tot_nbins;
200 | #endif
201 | 
202 | 	unsigned int s;
203 | 	int b;
204 | 	float phi, bls1, bls2, thread_max_bls, thread_yw, thread_w;
205 | 
206 | 	// this will be inefficient for block sizes >> number of bins per frequency
207 | 	unsigned int i_freq = blockIdx.x;
208 | 	while (i_freq < nfreq){
209 | 
210 | 		thread_max_bls = 0.f;
211 | 
212 | 		if (threadIdx.x == 0){
213 | 			// read frequency from global memory
214 | 			f0 = freqs[i_freq + freq_offset];
215 | 
216 | 			// read nbins from global memory
217 | 			nb0 = nbins0[i_freq + freq_offset];
218 | 			nbf = nbinsf[i_freq + freq_offset];
219 | 
220 | 			max_bin_width = divrndup(nbf, nb0);
221 | 
222 | #ifdef USE_LOG_BIN_SPACING
223 | 			tot_nbins = count_tot_nbins(nb0, nbf, dlogq);
224 | #endif
225 | 		}
226 | 
227 | 		// wait for broadcasting to finish
228 | 		__syncthreads();
229 | 
230 | 		// intialize bins to 0 (synchronization is necessary here...)
231 | 		for(unsigned int k = threadIdx.x; k < nbf; k += blockDim.x){
232 | 			block_bins[2 * k] = 0.f;
233 | 			block_bins[2 * k + 1] = 0.f;
234 | 		}
235 | 
236 | 		// wait for initialization to finish
237 | 		__syncthreads();
238 | 
239 | 		// histogram the data
240 | 		for (unsigned int k = threadIdx.x; k < ndata; k += blockDim.x){
241 | 			phi = mod1(t[k] * f0);
242 | 
243 | 			b = mod((int) floorf(((float) nbf) * phi - dphi), (int) nbf);
244 | 
245 | 			// shared memory atomics should (hopefully) be faster.
246 | 			atomicAdd(&(block_bins[2 * b]), yw[k]);
247 | 			atomicAdd(&(block_bins[2 * b + 1]), w[k]);
248 | 		}
249 | 
250 | 		// wait for everyone to finish adding data to the histogram
251 | 		__syncthreads();
252 | 		
253 | 		// get max bls for this THREAD
254 | #ifdef USE_LOG_BIN_SPACING
255 | 		for (unsigned int n = threadIdx.x; n < tot_nbins; n += blockDim.x){
256 | 
257 | 			unsigned int bin_offset = 0;
258 | 			unsigned int nb = nb0;
259 | 			while ((bin_offset + nb) * noverlap < n){
260 | 				bin_offset += nb;
261 | 				nb += dnbins(nb, dlogq);
262 | 			}
263 | 			
264 | 			b = (((int) n) - ((int) (bin_offset * noverlap))) % nb;
265 | 			s = (((int) n) - ((int) (bin_offset * noverlap))) / nb;
266 | 
267 | 			thread_yw = 0.f;
268 | 			thread_w = 0.f;
269 | 			unsigned int m0 = 0;
270 | 
271 | 			for (unsigned int m = b; m < b + nb; m ++){
272 | 				thread_yw += block_bins[2 * (m % nbf)];
273 | 				thread_w += block_bins[2 * (m % nbf) + 1];
274 | 			}
275 | 
276 | 			bls1 = bls_value(thread_yw, thread_w, ignore_negative_delta_sols);
277 | 			if (bls1 > thread_max_bls)
278 | 				thread_max_bls = bls1;
279 | 		}
280 | 
281 | #else
282 | 		for (unsigned int n = threadIdx.x; n < nbf; n += blockDim.x){
283 | 			
284 | 			thread_yw = 0.f;
285 | 			thread_w = 0.f;
286 | 			unsigned int m0 = 0;
287 | 
288 | 			for (unsigned int m = 1; m < max_bin_width; m += dnbins(m, dlogq)){
289 | 				for (s = m0; s < m; s++){
290 | 					thread_yw += block_bins[2 * ((n + s) % nbf)];
291 | 					thread_w += block_bins[2 * ((n + s) % nbf) + 1];
292 | 				}
293 | 				m0 = m;
294 | 
295 | 				bls1 = bls_value(thread_yw, thread_w, ignore_negative_delta_sols);
296 | 				if (bls1 > thread_max_bls)
297 | 					thread_max_bls = bls1;
298 | 			}
299 | 		}
300 | #endif
301 | 
302 | 		best_bls[threadIdx.x] = thread_max_bls;
303 | 
304 | 		// wait for everyone to finish
305 | 		__syncthreads();
306 | 
307 | 		// get max bls for this BLOCK
308 | 		for(unsigned int k = (blockDim.x / 2); k > 0; k /= 2){
309 | 			if(threadIdx.x < k){
310 | 				bls1 = best_bls[threadIdx.x];
311 | 				bls2 = best_bls[threadIdx.x + k];
312 | 				
313 | 				best_bls[threadIdx.x] = (bls1 > bls2) ? bls1 : bls2;
314 | 			}
315 | 			__syncthreads();
316 | 		}
317 | 
318 | 		// store block max to global memory
319 | 		if (threadIdx.x == 0)
320 | 			bls[i_freq + freq_offset] = best_bls[0];
321 | 
322 | 		// increment frequency
323 | 		i_freq += gridDim.x;
324 | 	}
325 | }
326 | 
327 | 
328 | // needs ndata * nfreq threads
329 | // noverlap -- number of overlapped bins (noverlap * (1 / q) total bins)
330 | __global__ void bin_and_phase_fold_custom(
331 | 	                    float *t, float *yw, float *w,
332 | 						float *yw_bin, float *w_bin, float *freqs,
333 | 						float *q_values, float *phi_values, 
334 | 						unsigned int nq, unsigned int nphi, unsigned int ndata, 
335 | 						unsigned int nfreq, unsigned int freq_offset){
336 | 	unsigned int i = get_id();
337 | 
338 | 	if (i < ndata * nfreq){
339 | 		unsigned int i_data = i % ndata;
340 | 		unsigned int i_freq = i / ndata;
341 | 
342 | 		unsigned int offset = i_freq * nq * nphi;
343 | 
344 | 		float W = w[i_data];
345 | 		float YW = yw[i_data];
346 | 
347 | 		// get phase [0, 1)
348 | 		float phi = mod1(t[i_data] * freqs[i_freq + freq_offset]);
349 | 
350 | 		for(int pb = 0; pb < nphi; pb++){
351 | 			float dphi = phi - phi_values[pb];
352 | 			dphi -= floorf(dphi);
353 | 
354 | 			for(int qb = 0; qb < nq; qb++){
355 | 				if (dphi < q_values[qb]){
356 | 					atomicAdd(&(yw_bin[pb * nq + qb + offset]), YW);
357 | 					atomicAdd(&(w_bin[pb * nq + qb + offset]), W);
358 | 				}
359 | 			}
360 | 		}
361 | 	}
362 | }
363 | 
364 | 
365 | 
366 | 
367 | __global__ void reduction_max(float *arr, unsigned int *arr_args, unsigned int nfreq, 
368 | 	                          unsigned int nbins, unsigned int stride,
369 |                               float *block_max, unsigned int *block_arg_max, 
370 |                               unsigned int offset, unsigned int init){
371 | 
372 | 	__shared__ float partial_max[BLOCK_SIZE];
373 | 	__shared__ unsigned int partial_arg_max[BLOCK_SIZE];
374 | 
375 | 	unsigned int id = blockIdx.x * blockDim.x + threadIdx.x;
376 | 
377 | 	unsigned int nblocks_per_freq = gridDim.x / nfreq;
378 | 	unsigned int nthreads_per_freq = blockDim.x * nblocks_per_freq;
379 | 
380 | 
381 | 
382 | 
383 | 	//	freq_no / b
384 | 	//			----block 1 -----       ----- block N ------------------------
385 | 	//		  0 | 0 1 2 .. B - 1 | ... | (N - 1)B, ... , ndata, ..., N * B - 1|
386 | 	//
387 | 	//			---block N + 1---       ---- block 2N ------------------------
388 | 	//		  1 | 0 1 2 .. B - 1 | ... | (N - 1)B, ... , ndata, ..., N * B - 1|
389 | 	//			...
390 | 	//
391 | 	//			---(nf - 1)N ----       --- nf * N ---
392 | 	//   nf - 1 | ..             | ... |             |
393 | 
394 | 	unsigned int fno = id / nthreads_per_freq;
395 | 	unsigned int b   = id % nthreads_per_freq;
396 | 
397 | 	// read part of array from global memory into shared memory
398 | 	partial_max[threadIdx.x] = (fno < nfreq && b < nbins) ?
399 | 	                                 arr[fno * stride + b] : -1.f;
400 | 
401 | 	partial_arg_max[threadIdx.x] = (fno < nfreq && b < nbins) ?
402 | 									(
403 | 										(init == 1) ?
404 | 											b : arr_args[fno * stride + b]
405 | 									) : 0;
406 | 
407 | 	__syncthreads();
408 | 
409 | 	float m1, m2;
410 | 
411 | 	// reduce to find max of shared memory array
412 | 	for(int s = blockDim.x / 2; s > 0; s /= 2){
413 | 		if(threadIdx.x < s){
414 | 			m1 = partial_max[threadIdx.x];
415 | 			m2 = partial_max[threadIdx.x + s];
416 | 
417 | 			partial_max[threadIdx.x] = (m1 > m2) ? m1 : m2;
418 | 
419 | 			partial_arg_max[threadIdx.x] = (m1 > m2) ?
420 | 			 						partial_arg_max[threadIdx.x] :
421 | 			 						partial_arg_max[threadIdx.x + s];
422 | 		}
423 | 
424 | 		__syncthreads();
425 | 	}
426 | 
427 | 	// store partial max back into global memory
428 | 	if (threadIdx.x == 0 && fno < nfreq){
429 | 		unsigned int i = (gridDim.x == nfreq) ? 0 :
430 | 			                 fno * stride - fno * nblocks_per_freq;
431 | 
432 | 		i += blockIdx.x + offset;
433 | 
434 | 		block_max[i] = partial_max[0];
435 | 		block_arg_max[i] = partial_arg_max[0];
436 | 	}
437 | }
438 | 


--------------------------------------------------------------------------------
/cuvarbase/kernels/ce.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | 
  3 | //{CPP_DEFS}
  4 | 
  5 | #ifndef MAX_SHARED_MEM_SIZE
  6 | 	#define MAX_SHARED_MEM_SIZE 48000
  7 | #endif
  8 | 
  9 | #ifdef DOUBLE_PRECISION
 10 | 	#define ATOMIC_ADD atomicAddDouble
 11 | 	#define FLT double
 12 | #else
 13 | 	#define ATOMIC_ADD atomicAdd
 14 | 	#define FLT float
 15 | #endif
 16 | 
 17 | 
 18 | __device__ double atomicAddDouble(double* address, double val)
 19 | {
 20 |     unsigned long long int* address_as_ull =
 21 |                        (unsigned long long int*)address;
 22 |     unsigned long long int old = *address_as_ull, assumed;
 23 |     do {
 24 |         assumed = old;
 25 |         old = atomicCAS(address_as_ull, assumed,
 26 |                         __double_as_longlong(val +
 27 |                         __longlong_as_double(assumed)));
 28 |     } while (assumed != old);
 29 |     return __longlong_as_double(old);
 30 | }
 31 | 
 32 | 
 33 | __device__ FLT mod1(FLT x){
 34 | 	return x - floor(x);
 35 | }
 36 | 
 37 | __device__ int phase_ind(FLT ft){
 38 | 	int n = (int) (mod1(ft) * NPHASE);
 39 | 	return n % NPHASE;
 40 | }
 41 | 
 42 | __device__ int posmod(int n, int N){
 43 | 	return (n < 0) ? n + N : n % N;
 44 | }
 45 | 
 46 | 
 47 | 
 48 | __global__ void histogram_data_weighted(FLT *t, FLT *y, FLT *dy, 
 49 | 	                                    FLT *bin, FLT *freqs,
 50 | 	                                    unsigned int nfreq, unsigned int ndata, 
 51 | 	                                    FLT max_phi){
 52 | 
 53 | 	unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
 54 | 
 55 | 	unsigned int i_freq = i / ndata;
 56 | 	unsigned int j_data = i % ndata;
 57 | 
 58 | 	if (i_freq < nfreq){
 59 | 		FLT Y = y[j_data];
 60 | 		FLT DY = dy[j_data];
 61 | 		
 62 | 		int n0 = phase_ind(freqs[i_freq] * t[j_data]);
 63 | 		unsigned int offset = i_freq * (NMAG * NPHASE);
 64 | 
 65 | 		int m0 = (int) (Y * NMAG);
 66 | 
 67 | 		for(int m = 0; m < NMAG; m++){
 68 | 			FLT z = (((FLT) m) / NMAG - Y);
 69 | 			if (abs(z) > max_phi * DY && m != m0)
 70 | 				continue;
 71 | 			FLT zmax = z + (1 + MAG_OVERLAP) / ((FLT) NMAG);
 72 | 			FLT wtot = normcdf(zmax / DY) - normcdf(z / DY);
 73 | 
 74 | 			for(int n = n0; n >= n0 - PHASE_OVERLAP; n--)
 75 | 				ATOMIC_ADD(&(bin[offset + posmod(n, NPHASE) * NMAG + m]), wtot);
 76 | 			
 77 | 		}
 78 | 	}
 79 | 
 80 | }
 81 | 
 82 | __global__ void histogram_data_count(FLT *t, unsigned int *y,
 83 | 	                                 unsigned int *bin,
 84 | 	                                 FLT *freqs, unsigned int nfreq, 
 85 | 	                                 unsigned int ndata){
 86 | 
 87 | 	unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
 88 | 
 89 | 	unsigned int i_freq = i / ndata;
 90 | 	unsigned int j_data = i % ndata;
 91 | 	if (i_freq < nfreq){
 92 | 		unsigned int offset = i_freq * (NMAG * NPHASE);
 93 | 		unsigned int m0 = y[j_data];
 94 | 		int n0 = phase_ind(freqs[i_freq] * t[j_data]);
 95 | 
 96 | 		for (int n = (int) n0; n >= (((int) n0) - PHASE_OVERLAP); n--){
 97 | 			for (int m = (int) m0; m >= 0 && m >= (((int) m0) - MAG_OVERLAP); m--) {
 98 | 				atomicInc(&(bin[offset + posmod(n, NPHASE) * NMAG + m]), 
 99 | 				           (PHASE_OVERLAP + 1) * (MAG_OVERLAP + 1) * ndata);
100 | 			}
101 | 		}	
102 | 	}
103 | }
104 | 
105 | __device__ unsigned int rnduppow2(unsigned int u){
106 | 	unsigned int v = u;
107 | 	v--;
108 | 	v |= v >> 1;
109 | 	v |= v >> 2;
110 | 	v |= v >> 4;
111 | 	v |= v >> 8;
112 | 	v |= v >> 16;
113 | 	v++;
114 | 
115 | 	return v;
116 | }
117 | 
118 | 
119 | 
120 | 
121 | __global__ void ce_classical_fast(const FLT * __restrict__ t, 
122 | 	                              const unsigned int * __restrict__ y,
123 | 	                              const FLT * __restrict__ freqs, 
124 | 	                              FLT * __restrict__ ce, 
125 | 	                              unsigned int nfreq,
126 | 	                              unsigned int freq_offset,
127 | 	                              unsigned int ndata,
128 | 	                              unsigned int nphase,
129 | 	                              unsigned int nmag,
130 | 	                              unsigned int phase_overlap,
131 | 	                              unsigned int mag_overlap){
132 | 
133 | 	extern __shared__ unsigned int sh[];
134 | 
135 | 	// (unsigned int + FLT) * nmag * nphase + nphase * (unsigned int) 
136 | 	//__shared__ float *t_sh = sh;
137 | 	//__shared__ unsigned int *y_sh = (unsigned int *)&t_sh[ndata];
138 | 	//__shared__ unsigned int *bin = (unsigned int *)&y_sh[ndata];
139 | 
140 | 	unsigned int * block_bin = (unsigned int *)sh;
141 | 	unsigned int * block_bin_phi = (unsigned int *)&block_bin[nmag * nphase];
142 | 
143 | 	// align!
144 | 	unsigned int r = ((nmag * nphase + nphase) * sizeof(unsigned int)) % sizeof(FLT);
145 | 	FLT * Hc = (FLT *)&block_bin_phi[nphase + r];
146 | 	__shared__ FLT f0;
147 | 
148 | 	// each block works on a single frequency.
149 | 	unsigned int i_freq = blockIdx.x;
150 | 
151 | 	unsigned int i, N, Nphi;
152 | 	unsigned int ntot_2 = rnduppow2(nmag * nphase);
153 | 	unsigned int nphase_2 = rnduppow2(nphase);
154 | 	int m, n, m0, n0;
155 | 
156 | 	FLT dm0 = ((FLT) (mag_overlap + 1.f)) / nmag;
157 | 	FLT dm;
158 | 	while (i_freq < nfreq){
159 | 
160 | 		// read frequency from global data
161 | 		if (threadIdx.x == 0){
162 | 			f0 = freqs[i_freq + freq_offset];
163 | 		}
164 | 
165 | 		// initialise blocks to zero
166 | 		for(i = threadIdx.x; i < nmag * nphase; i += blockDim.x){
167 | 			if (i < nphase)
168 | 				block_bin_phi[i] = 0;
169 | 			
170 | 			block_bin[i] = 0;
171 | 			Hc[i] = 0.f;
172 | 		}
173 | 
174 | 		__syncthreads();
175 | 
176 | 		// make 2d histogram
177 | 		for(i = threadIdx.x; i < ndata; i += blockDim.x){
178 | 			m0 = (int) (y[i]);
179 | 			n0 = ((int) floor(nphase * mod1(t[i] * f0))) % nphase;
180 | 
181 | 			for (n = n0; n >= (((int) n0) - ((int) phase_overlap)); n--){
182 | 				for (m = m0; m >= 0 && m >= (((int) m0) - ((int) mag_overlap)); m--)
183 | 					atomicInc(&(block_bin[posmod(n, nphase) * nmag + m]), 
184 | 					      (phase_overlap + 1) * (mag_overlap + 1) * ndata);
185 | 				
186 | 			}
187 | 		}	
188 | 
189 | 		__syncthreads();
190 | 
191 | 		// Get the total number of data points across phi bins
192 | 		for(n=threadIdx.x; n < nmag * nphase; n+=blockDim.x)
193 | 			atomicAdd(&(block_bin_phi[n / nmag]), block_bin[n]);
194 | 
195 | 		__syncthreads();
196 | 
197 | 		// Convert to dH
198 | 		for(n=threadIdx.x; n < nmag * nphase; n+=blockDim.x){
199 | 			m0 = n % nmag;
200 | 			n0 = n / nmag;
201 | 
202 | 			N = block_bin[n];
203 | 			Nphi = block_bin_phi[n0];
204 | 
205 | 			if (Nphi * N == 0)
206 | 				continue;
207 | 
208 | 			// adjust mag bin width for overlapping mag bins (phase bins are periodic)
209 | 			dm = (m0 + mag_overlap + 1 > nmag) ? (((int) nmag) -  m0) * dm0 / (1.f + mag_overlap) : dm0;
210 | 
211 | 			Hc[n] = ((FLT) N) * log((dm * ((FLT) Nphi)) / ((FLT) N));
212 | 		}
213 | 
214 | 		__syncthreads();
215 | 		
216 | 		//add up contributions
217 | 		for(n = ntot_2 / 2; n > 0; n/=2){
218 | 			for (m = threadIdx.x; m < n && m + n < nmag * nphase; m += blockDim.x)
219 | 				Hc[m] += Hc[m + n];
220 | 			__syncthreads();
221 | 		}
222 | 
223 | 		// add up total bin counts
224 | 		for(n = nphase_2 / 2; n > 0; n/=2){
225 | 			for (m = threadIdx.x; m < n && m + n < nphase; m += blockDim.x)
226 | 				block_bin_phi[m] += block_bin_phi[m + n];
227 | 			__syncthreads();
228 | 		}
229 | 
230 | 		// write result to global memory
231 | 		if (threadIdx.x == 0)
232 | 			ce[i_freq + freq_offset] = Hc[0] / block_bin_phi[0];
233 | 		
234 | 		i_freq += gridDim.x;
235 | 	}
236 | }
237 | 
238 | 
239 | 
240 | 
241 | __global__ void ce_classical_faster(const FLT * __restrict__ t, 
242 | 	                                const unsigned int * __restrict__ y,
243 | 	                                const FLT * __restrict__ freqs, 
244 | 	                                FLT * __restrict__ ce, 
245 | 	                                unsigned int nfreq,
246 | 	                                unsigned int freq_offset,
247 | 	                                unsigned int ndata,
248 | 	                                unsigned int nphase,
249 | 	                                unsigned int nmag,
250 | 	                                unsigned int phase_overlap,
251 | 	                                unsigned int mag_overlap){
252 | 
253 | 	extern __shared__ unsigned int sh[];
254 | 
255 | 	// (unsigned int + FLT) * nmag * nphase + nphase * (unsigned int)
256 | 	unsigned int * block_bin = (unsigned int *)sh;
257 | 	unsigned int * block_bin_phi = (unsigned int *)&block_bin[nmag * nphase];
258 | 
259 | 	// align!
260 | 	unsigned int r = ((nmag * nphase + nphase) * sizeof(unsigned int)) % sizeof(FLT);
261 | 	FLT * Hc = (FLT *)&block_bin_phi[nphase + r];
262 | 	FLT * t_sh = (FLT *)&Hc[nmag * nphase];
263 | 	unsigned int * y_sh = (unsigned int *)&t_sh[ndata];
264 | 	__shared__ FLT f0;
265 | 
266 | 	unsigned int i, N, Nphi;
267 | 	// each block works on a single frequency.
268 | 	unsigned int i_freq = blockIdx.x;
269 | 	unsigned int ntot_2 = rnduppow2(nmag * nphase);
270 | 	unsigned int nphase_2 = rnduppow2(nphase);
271 | 	int m, n, m0, n0;
272 | 
273 | 	// load data into shared memory
274 | 	for (int i = threadIdx.x; i < ndata; i += blockDim.x){
275 | 		t_sh[i] = t[i];
276 | 		y_sh[i] = y[i];
277 | 	}
278 | 	
279 | 	__syncthreads();
280 | 
281 | 	FLT dm0 = ((FLT) (mag_overlap + 1.f)) / nmag;
282 | 	FLT dm;
283 | 	while (i_freq < nfreq){
284 | 
285 | 		// read frequency from global data
286 | 		if (threadIdx.x == 0){
287 | 			f0 = freqs[i_freq + freq_offset];
288 | 		}
289 | 
290 | 
291 | 		// initialise blocks to zero
292 | 		for(i = threadIdx.x; i < nmag * nphase; i += blockDim.x){
293 | 			if (i < nphase)
294 | 				block_bin_phi[i] = 0;
295 | 			
296 | 			block_bin[i] = 0;
297 | 			Hc[i] = 0.f;
298 | 		}
299 | 
300 | 		__syncthreads();
301 | 
302 | 		// make 2d histogram
303 | 		for(i = threadIdx.x; i < ndata; i += blockDim.x){
304 | 			m0 = (int) (y[i]);
305 | 			n0 = ((int) floor(nphase * mod1(t_sh[i] * f0))) % nphase;
306 | 
307 | 			for (n = n0; n >= (((int) n0) - ((int) phase_overlap)); n--){
308 | 				for (m = m0; m >= 0 && m >= (((int) m0) - ((int) mag_overlap)); m--)
309 | 					atomicInc(&(block_bin[posmod(n, nphase) * nmag + m]), 
310 | 					          (phase_overlap + 1) * (mag_overlap + 1) * ndata);
311 | 			}
312 | 				
313 | 		}
314 | 
315 | 		__syncthreads();
316 | 
317 | 		// Get the total number of data points across phi bins
318 | 		for(n=threadIdx.x; n < nmag * nphase; n+=blockDim.x)
319 | 			atomicAdd(&(block_bin_phi[n / nmag]), block_bin[n]);
320 | 
321 | 		__syncthreads();
322 | 
323 | 		// Convert to dH
324 | 		for(n=threadIdx.x; n < nmag * nphase; n+=blockDim.x){
325 | 			m0 = n % nmag;
326 | 			n0 = n / nmag;
327 | 
328 | 			Nphi = block_bin_phi[n0];
329 | 			N = block_bin[n];
330 | 			if (Nphi*N == 0)
331 | 				continue;
332 | 
333 | 			// adjust mag bin width for overlapping mag bins (phase bins are periodic)
334 | 			dm = (m0 + mag_overlap + 1 > ((int) nmag)) ? (((int) nmag) -  m0) * dm0 / (1.f + mag_overlap) : dm0;
335 | 
336 | 			Hc[n] = ((FLT) N) * log((dm * ((FLT) Nphi)) / ((FLT) N));
337 | 		}
338 | 
339 | 		__syncthreads();
340 | 
341 | 		//add up contributions
342 | 		for(n = ntot_2 / 2; n > 0; n/=2){
343 | 			for (m = threadIdx.x; (m < n) && ((m + n) < nmag * nphase); m += blockDim.x)
344 | 				Hc[m] += Hc[m + n];
345 | 			__syncthreads();
346 | 		}
347 | 
348 | 		// add up total bin counts
349 | 		for(n = nphase_2 / 2; n > 0; n/=2){
350 | 			for (m = threadIdx.x; (m < n) && ((m + n) < nphase); m += blockDim.x)
351 | 				block_bin_phi[m] += block_bin_phi[m + n];
352 | 			__syncthreads();
353 | 		}
354 | 
355 | 		// write result to global memory
356 | 		if (threadIdx.x == 0)
357 | 			ce[i_freq + freq_offset] = Hc[0] / ((FLT) (block_bin_phi[0]));
358 | 		
359 | 		i_freq += gridDim.x;
360 | 	}
361 | }
362 | 
363 | 
364 | 
365 | 
366 | __global__ void weighted_ce(FLT *bins, unsigned int nfreq, FLT *ce){
367 | 	unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
368 | 
369 | 	if (i < nfreq){
370 | 		FLT Hc = 0.f;
371 | 		FLT bin_tot = 0.f;
372 | 		FLT dm = ((FLT)(MAG_OVERLAP + 1)) / NMAG;
373 | 		for(int n=0; n < NPHASE; n++){
374 | 			unsigned int offset = i * (NMAG * NPHASE) + n * NMAG;
375 | 
376 | 			FLT p_phi_n = 0.f;
377 | 			for (int m=0; m < NMAG; m++)
378 | 				p_phi_n += bins[offset + m];
379 | 
380 | 			for (int m=0; m < NMAG; m++){
381 | 				FLT pmn = bins[offset + m];
382 | 				bin_tot += pmn;
383 | 
384 | 				if (pmn > 0.f && p_phi_n > 1E-10)
385 | 					Hc += pmn * log((dm * p_phi_n) / pmn);
386 | 			}
387 | 		}
388 | 		ce[i] = Hc / bin_tot;
389 | 	}
390 | }
391 | 
392 | __global__ void standard_ce(unsigned int *bins, unsigned int nfreq,
393 |                             FLT *ce){
394 | 	unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
395 | 	FLT dm, dm0, Hc;
396 | 	unsigned int bin_tot, offset, Nphi, N;
397 | 
398 | 	if (i < nfreq){
399 | 		Hc = 0.f;
400 | 		dm0 = ((FLT)(MAG_OVERLAP + 1)) / NMAG;
401 | 		bin_tot = 0;
402 | 		for(int n=0; n < NPHASE; n++){
403 | 			offset = i * (NMAG * NPHASE) + n * NMAG;
404 | 
405 | 			Nphi = 0;
406 | 			for (int m=0; m < NMAG; m++)
407 | 				Nphi += bins[offset + m];
408 | 
409 | 			if (Nphi == 0)
410 | 				continue;
411 | 
412 | 			for (int m=0; m < NMAG; m++){
413 | 				N = bins[offset + m];
414 | 
415 | 				if (N == 0)
416 | 					continue;
417 | 
418 | 				bin_tot += N;
419 | 
420 | 				// adjust mag bin width for overlapping bins
421 | 				dm = (m + MAG_OVERLAP + 1 > NMAG) ? (((FLT) NMAG) -  ((FLT) m)) * dm0 / (1.f + MAG_OVERLAP) : dm0;
422 | 				Hc += N * log((dm * Nphi) / N);
423 | 			}
424 | 		}
425 | 		
426 | 		ce[i] = Hc / bin_tot;
427 | 	}
428 | }
429 | 
430 | __global__ void constdpdm_ce(unsigned int *bins, unsigned int nfreq,
431 |                              FLT *ce, FLT *mag_bwf){
432 | 	unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
433 | 
434 | 	if (i < nfreq){
435 | 		FLT Hc = 0.f;
436 | 		unsigned int bin_tot = 0;
437 | 		for(int n=0; n < NPHASE; n++){
438 | 			unsigned int offset = i * (NMAG * NPHASE) + n * NMAG;
439 | 
440 | 			unsigned int Nphi = 0;
441 | 			for (int m=0; m < NMAG; m++)
442 | 				Nphi += bins[offset + m];
443 | 			
444 | 			if (Nphi == 0)
445 | 				continue;
446 | 
447 | 			for (int m=0; m < NMAG; m++){
448 | 				unsigned int N = bins[offset + m];
449 | 
450 | 				if (N == 0)
451 | 					continue;
452 | 				
453 | 				bin_tot += N;
454 | 				Hc += N * log((mag_bwf[m] * Nphi) / N);
455 | 			}
456 | 		}
457 | 		
458 | 		ce[i] = Hc / bin_tot;
459 | 	}
460 | }
461 | 
462 | __global__ void log_prob(unsigned int *bins, unsigned int nfreq,
463 |                          FLT *log_proba, FLT *mag_bin_fracs){
464 | 	unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
465 | 
466 | 	if (i < nfreq){
467 | 		FLT logP = 0.f;
468 | 		for(int n=0; n < NPHASE; n++){
469 | 			unsigned int offset = i * (NMAG * NPHASE) + n * NMAG;
470 | 
471 | 			unsigned int Nphi = 0;
472 | 			for (int m=0; m < NMAG; m++)
473 | 				Nphi += bins[offset + m];
474 | 			
475 | 			if (Nphi == 0)
476 | 				continue;
477 | 
478 | 			for (int m=0; m < NMAG; m++){
479 | 				FLT N = (FLT) (bins[offset + m]);
480 | 
481 | 				FLT Nexp = Nphi * mag_bin_fracs[m];
482 | 
483 | 				if (Nexp < 1e-9)
484 | 					continue;
485 | 
486 | 				logP += N * log(Nexp) - Nexp - lgamma(N + 1.f);
487 | 			}
488 | 		}
489 | 		
490 | 		log_proba[i] = logP / (PHASE_OVERLAP + 1.f);
491 | 	}
492 | }
493 | 
494 | 


--------------------------------------------------------------------------------
/cuvarbase/kernels/cunfft.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <pycuda-complex.hpp>
  3 | 
  4 | #define RESTRICT __restrict__
  5 | #define CONSTANT const
  6 | #define PI 3.14159265358979323846264338327950288f
  7 | #define FILTER gauss_filter
  8 | //{CPP_DEFS}
  9 | 
 10 | #ifdef DOUBLE_PRECISION
 11 | 	#define ATOMIC_ADD atomicAddDouble
 12 | 	#define FLT double
 13 | 
 14 | #else
 15 | 	#define ATOMIC_ADD atomicAdd
 16 | 	#define FLT float
 17 | #endif
 18 | 
 19 | #define CMPLX pycuda::complex<FLT>
 20 | 
 21 | __device__ double atomicAddDouble(double* address, double val)
 22 | {
 23 |     unsigned long long int* address_as_ull =
 24 |                                           (unsigned long long int*)address;
 25 |     unsigned long long int old = *address_as_ull, assumed;
 26 |     do {
 27 |         assumed = old;
 28 |         old = atomicCAS(address_as_ull, assumed,
 29 |                         __double_as_longlong(val +
 30 |                         __longlong_as_double(assumed)));
 31 |     } while (assumed != old);
 32 |     return __longlong_as_double(old);
 33 | }
 34 | 
 35 | 
 36 | __device__ FLT gauss_filter(CONSTANT FLT x, CONSTANT FLT b) {
 37 | 	return exp(-(x*x) / b) / sqrt(PI * b);
 38 | }
 39 | 
 40 | __device__ int mod(CONSTANT int a, CONSTANT int b) {
 41 |    int ret = a % b;
 42 |    return (ret < 0) ? ret + b : ret;
 43 | }
 44 | 
 45 | __device__ float modflt(CONSTANT FLT a, CONSTANT FLT b){
 46 | 	return a - floor(a / b) * b;
 47 | }
 48 | 
 49 | __device__ FLT diffmod(CONSTANT FLT a, CONSTANT FLT b, CONSTANT FLT M) {
 50 | 	FLT ret = a - b;
 51 | 	if (fabsf(ret) > M/2){
 52 | 		if (ret > 0)
 53 | 			return ret - M;
 54 | 		return M + ret;
 55 | 	}
 56 | 	return ret;
 57 | }
 58 | 
 59 | __global__ void nfft_shift(
 60 | 	CMPLX *in,
 61 | 	CMPLX *out,
 62 | 	CONSTANT int ng,
 63 | 	CONSTANT int nbatch,
 64 | 	CONSTANT FLT x0,
 65 | 	CONSTANT FLT xf,
 66 | 	CONSTANT FLT spp,
 67 | 	CONSTANT FLT f0){
 68 | 
 69 | 	int i = blockIdx.x *blockDim.x + threadIdx.x;
 70 | 
 71 | 	int batch = i / ng;
 72 | 
 73 | 	if (batch < nbatch) {
 74 |         FLT k0 = f0 * spp * (xf - x0);
 75 | 
 76 | 		FLT phi = (2.f * PI * (i % ng) * k0) / ng;
 77 | 
 78 |         CMPLX shift = CMPLX(cos(phi), sin(phi));
 79 | 
 80 | 		out[i] = shift * in[i];
 81 | 	}
 82 | }
 83 | 
 84 | __global__ void precompute_psi(
 85 | 	FLT *RESTRICT x, // observation times
 86 | 	FLT * q1,        // precomputed filter values (length n0)
 87 | 	FLT * q2,        // precomputed filter values (length n0)
 88 | 	FLT * q3,        // precomputed filter values (length 2 * m + 1)
 89 | 	CONSTANT int n0,     // data size
 90 | 	CONSTANT int ng,      // grid size
 91 | 	CONSTANT int m,      // max filter radius
 92 | 	CONSTANT FLT b,      // filter scaling
 93 | 	CONSTANT FLT x0,     // min(x)
 94 | 	CONSTANT FLT xf,     // max(x)
 95 | 	CONSTANT FLT spp)    // samples per peak
 96 | {
 97 | 	int i = blockIdx.x *blockDim.x + threadIdx.x;
 98 | 
 99 | 	FLT binv = 1.f/b;
100 | 	if (i < n0){
101 | 		FLT xg = (x[i] - x0) / (spp * (xf - x0));
102 | 
103 | 		xg = m + modflt(ng * xg, 1.f);
104 | 
105 | 		q1[i] = exp(-xg * (xg * binv)) / sqrt(b * PI);
106 | 		q2[i] = exp( 2.f * xg * binv);
107 | 
108 | 	} else if (i - n0 < 2 * m + 1) {
109 | 		int l = i - n0;
110 | 		q3[l] = exp(-l * l * binv);
111 | 	}
112 | 
113 | }
114 | 
115 | __global__ void fast_gaussian_grid(
116 | 	FLT *RESTRICT x,     // data (observation times), length n0
117 | 	FLT *RESTRICT y,     // data (observations), length (nbatch * n0)
118 | 	CMPLX * grid,          // grid, length n * nbatch
119 | 	FLT *RESTRICT q1,	 // precomputed filter values
120 | 	FLT *RESTRICT q2,	 // precomputed filter values
121 | 	FLT *RESTRICT q3,	 // precomputed filter values
122 | 	CONSTANT int n0,     // data size
123 | 	CONSTANT int ng,      // grid size
124 | 	CONSTANT int nbatch, // number of grids/datasets
125 | 	CONSTANT int m,      // max filter radius
126 | 	CONSTANT FLT x0,     // min(x)
127 | 	CONSTANT FLT xf,     // max(x)
128 | 	CONSTANT FLT spp)    // samples per peak
129 | {
130 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
131 | 
132 | 	int batch = i / n0;
133 | 
134 | 	if (batch < nbatch){
135 | 
136 | 		// datapoint
137 | 		int di = i % n0;
138 | 
139 | 		// scale
140 | 		FLT xval = (x[di] - x0) / (spp * (xf - x0));
141 | 
142 | 		// observation
143 | 		FLT yi = y[i];
144 | 
145 | 		// nearest gridpoint (rounding down)
146 | 		int u = (int) floorf(ng * xval - m);
147 | 
148 | 		// precomputed filter values
149 | 		FLT Q  = q1[di];
150 | 		FLT Q2 = q2[di];
151 | 
152 | 		// add datapoint to grid
153 | 		for(int k = 0; k < 2 * m + 1; k++){
154 |             FLT dg = Q * q3[k] * yi;
155 |             if (!(isnan(dg) || isinf(dg)))
156 | 			    ATOMIC_ADD(&(grid[mod(k + u, ng) + batch * ng]._M_re),
157 | 				          dg);
158 |             else
159 |                 break;
160 |             Q *= Q2;
161 | 		}
162 | 	}
163 | }
164 | 
165 | 
166 | 
167 | __global__ void slow_gaussian_grid(
168 | 	FLT *RESTRICT x,     // data (observation times)
169 | 	FLT *RESTRICT y,     // data (observations)
170 | 	CMPLX * grid,          // grid
171 | 	CONSTANT int n0,     // data size
172 | 	CONSTANT int ng,      // grid size
173 | 	CONSTANT int nbatch, // number of grids
174 | 	CONSTANT int m,      // max filter radius
175 | 	CONSTANT FLT b,      // filter scaling
176 | 	CONSTANT FLT x0,     // min(x)
177 | 	CONSTANT FLT xf,     // max(x)
178 | 	CONSTANT FLT spp)    // samples per peak
179 | {
180 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
181 | 
182 | 	int batch = i / ng;
183 | 
184 | 	if (batch < nbatch){
185 | 		FLT dx, dgi;
186 | 
187 | 
188 | 
189 | 		// grid index for this thread
190 | 		int grid_index = i - ng * batch;
191 | 
192 | 		// iterate through data
193 | 		for(int di = 0; di < n0; di ++){
194 | 
195 | 			// scale
196 | 			FLT xval = (x[di] - x0) / (spp * (xf - x0));
197 | 
198 | 			// grid index of datapoint (float)
199 | 			dgi = ng * xval;
200 | 
201 | 			// "distance" between grid_index and datapoint
202 | 			dx = diffmod(dgi, grid_index, ng);
203 | 
204 | 			// skip if datapoint too far away
205 | 			if (dx > m)
206 | 				continue;
207 | 
208 | 			// add (weighted) datapoint to grid
209 | 			grid[i] += FILTER(dx, b) * y[di + n0 * batch];
210 | 		}
211 | 	}
212 | }
213 | 
214 | __global__ void normalize(
215 | 	CMPLX *gin,
216 | 	CMPLX *gout,
217 | 	CONSTANT int ng, // sigma * nf
218 | 	CONSTANT int nf,     // number of desired frequency samples
219 | 	CONSTANT int nbatch, // number of transforms
220 | 	CONSTANT FLT b,      // filter scaling
221 | 	CONSTANT FLT x0,     // min(x)
222 | 	CONSTANT FLT xf,     // max(x)
223 | 	CONSTANT FLT spp,    // samples per peak
224 | 	CONSTANT FLT f0)     // first frequency
225 | {
226 | 	int i = blockIdx.x *blockDim.x + threadIdx.x;
227 | 
228 | 	int batch = i / nf;
229 | 
230 | 	if (batch < nbatch){
231 | 		int k = i % nf;
232 | 
233 | 		FLT sT = spp * (xf - x0);
234 |         FLT n0 = (x0 / sT) * ng;
235 | 		FLT k0 = f0 * sT;
236 | 		CMPLX G = gin[batch * ng + k];
237 | 
238 | 		// *= exp(2pi i (k0 + k) * n0 / n)
239 | 		FLT theta_k = (2.f * PI * n0 * (k0 + k)) / ng;
240 | 
241 | 		G *= CMPLX(cos(theta_k), sin(theta_k));
242 | 
243 | 		// normalization factor from gridding kernel (gaussian)
244 | 		FLT khat = PI * (k0 + k) / ng;
245 | 		gout[i] = G * exp(b * khat * khat);
246 | 	}
247 | 
248 | }
249 | 
250 | 


--------------------------------------------------------------------------------
/cuvarbase/kernels/lomb.cu:
--------------------------------------------------------------------------------
  1 | #include <pycuda-complex.hpp>
  2 | #include <stdio.h>
  3 | //{CPP_DEFS}
  4 | 
  5 | #define EPSILON 1E-8
  6 | #define PI 3.141592653589793238462643383279502884f
  7 | #ifdef DOUBLE_PRECISION
  8 | 	#define FLT double
  9 | #else
 10 | 	#define FLT float
 11 | #endif
 12 | 
 13 | #define STANDARD 0
 14 | #define FLOATING_MEAN 1
 15 | #define WINDOW 2
 16 | 
 17 | 
 18 | 
 19 | __device__ FLT cossum(FLT *t, FLT *y, int n, FLT freq){
 20 | 	FLT C = 0;
 21 | 	for(int i = 0; i < n; i++)
 22 | 		C += y[i] * cos((t[i] + 0.5f) * freq * 2.f * PI);
 23 | 
 24 | 	return C;
 25 | }
 26 | 
 27 | 
 28 | __device__ FLT sinsum(FLT *t, FLT *y, int n, FLT freq){
 29 | 	FLT S = 0;
 30 | 	for(int i = 0; i < n; i++)
 31 | 		S += y[i] * sin((t[i] + 0.5f) * freq * 2.f * PI);
 32 | 
 33 | 	return S;
 34 | }
 35 | 
 36 | __device__ FLT lspow_flmean(FLT C, FLT S, 
 37 | 	                        FLT C2, FLT S2, 
 38 | 	                        FLT YCh, FLT YSh, 
 39 | 	                        FLT YY, FLT Y, 
 40 | 	                        FLT *reg){
 41 | 	FLT r0 = 0.f, r1 = 0.f, r2 = 0.f;
 42 | 	if (reg != NULL){
 43 | 		r0 = reg[0];
 44 | 		r1 = reg[1];
 45 | 		r2 = reg[2];
 46 | 	}
 47 | 	FLT tan_2omega_tau = (S2 - 2 * S * C) / (C2 - (C * C - S * S));
 48 | 
 49 | 	FLT C2wInv2 = 1.f + tan_2omega_tau * tan_2omega_tau;
 50 | 
 51 | 	FLT C2w = 1.f / sqrt(C2wInv2);
 52 | 	FLT S2w = tan_2omega_tau * C2w;
 53 | 
 54 | 	FLT Cw = sqrt(0.5f * (1.f + C2w));
 55 | 	FLT Sw = sqrt(0.5f * (1.f - C2w));
 56 | 
 57 | 	if (S2w < 0.f)
 58 | 		Sw *= -1.f;
 59 | 
 60 | 	FLT Cshft = C * Cw + S * Sw;
 61 | 	FLT Sshft = S * Cw - C * Sw;
 62 | 
 63 | 	FLT CC = 0.5f * (1.f + C2 * C2w + S2 * S2w);
 64 | 	FLT SS = 0.5f * (1.f - C2 * C2w - S2 * S2w);
 65 | 
 66 | 	CC -= Cshft * Cshft;
 67 |     SS -= Sshft * Sshft;
 68 | 
 69 |     FLT xreg = r2 / (1.f + r2);
 70 | 
 71 |     CC += Cshft * Cshft * xreg + r0;
 72 |     SS += Sshft * Sshft * xreg + r1;
 73 | 
 74 |     FLT YC = (YCh + Y * C * xreg) * Cw + (YSh + Y * S * xreg) * Sw;
 75 | 	FLT YS = (YSh + Y * S * xreg) * Cw - (YCh + Y * C * xreg) * Sw;
 76 |     
 77 |     FLT P = ((YC * YC) / CC + (YS * YS) / SS) / YY;
 78 | 
 79 |     if (isnan(P) || isinf(P) || P < 0.f)
 80 |     	P = -1.;
 81 | 
 82 |     return P;
 83 | }
 84 | 
 85 | __device__ FLT lspow0(FLT C, FLT S, 
 86 | 					  FLT C2, FLT S2, 
 87 | 					  FLT YCh, FLT YSh, 
 88 | 					  FLT YY, FLT Y,
 89 | 					  FLT *reg){
 90 | 
 91 | 	FLT tan_2omega_tau = S2 / C2;
 92 | 	FLT r0 = 0.f, r1 = 0.f;
 93 | 	if (reg != NULL){
 94 | 		r0 = reg[0];
 95 | 		r1 = reg[1];
 96 | 	}
 97 | 
 98 | 	FLT C2wInv2 = 1.f + tan_2omega_tau * tan_2omega_tau;
 99 | 
100 | 	FLT C2w = 1.f / sqrt(C2wInv2);
101 | 	FLT S2w = tan_2omega_tau * C2w;
102 | 
103 | 	FLT Cw = sqrt(0.5f * (1.f + C2w));
104 | 	FLT Sw = sqrt(0.5f * (1.f - C2w));
105 | 
106 | 	if (S2w < 0)
107 | 		Sw *= -1.f;
108 | 
109 |     FLT YC = (YCh + Y * C) * Cw + (YSh + Y * S) * Sw;
110 | 	FLT YS = (YSh + Y * S) * Cw - (YCh + Y * C) * Sw;
111 | 
112 | 	FLT CC = 0.5f * (1.f + C2 * C2w + S2 * S2w) + r0;
113 | 	FLT SS = 0.5f * (1.f - C2 * C2w - S2 * S2w) + r1;
114 | 
115 |     FLT P = ((YC * YC) / CC + (YS * YS) / SS) / (YY + Y * Y);
116 | 
117 |     if (isnan(P) || isinf(P) || P < 0.f){
118 |     	//printf("%e, %e, %e, %e, %e: %e\n", C, S, CC, SS, YY + Y*Y, P);
119 |     	P = -1.f;
120 |     }
121 | 
122 |     return P;
123 | }
124 | 
125 | 
126 | __device__ FLT lspow(FLT C, FLT S, 
127 | 	                 FLT C2, FLT S2, 
128 | 	                 FLT YCh, FLT YSh, 
129 | 	                 FLT YY, FLT Y, 
130 | 	                 FLT *reg, int mode){
131 | 	switch(mode){
132 | 	 	case STANDARD:
133 | 		 	return lspow0(C, S, C2, S2, YCh, YSh, YY, Y, reg);
134 | 		case FLOATING_MEAN:
135 | 			return lspow_flmean(C, S, C2, S2, YCh, YSh, YY, Y, reg);
136 | 		case WINDOW:
137 | 			return lspow0(C, S, C2, S2, C, S, 0.f, 1.f, NULL);
138 | 		default:
139 | 			return -1.f;
140 | 	}
141 | }
142 | 
143 | 
144 | __global__ void lomb_dirsum(FLT *t, FLT *yw, FLT *w,
145 | 							FLT *lsp, FLT *reg,
146 | 							int nfreq, int n, FLT YY, FLT Y, FLT df, 
147 | 							FLT fmin, int mode){
148 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
149 | 	// reg = (lambda_a, lambda_b, lambda_c)
150 | 	if (i < nfreq){
151 | 
152 | 		FLT frq = fmin + i * df;
153 | 
154 | 		FLT C = cossum(t, w, n, frq);
155 | 		FLT S = sinsum(t, w, n, frq);
156 | 
157 | 		FLT C2 = cossum(t, w, n, 2.f * frq);
158 | 		FLT S2 = sinsum(t, w, n, 2.f * frq);
159 | 
160 | 		FLT YCh = cossum(t, yw, n, frq);
161 | 		FLT YSh = sinsum(t, yw, n, frq);
162 | 
163 | 		lsp[i] = lspow(C, S, C2, S2, YCh, YSh, YY, Y, reg, mode);
164 |    	}
165 | }
166 | 
167 | __global__ void lomb_dirsum_custom_frq(FLT *t, FLT *w, FLT *yw, FLT *freqs,
168 | 							FLT *lsp, FLT *reg,
169 | 							int nfreq, int n, FLT YY, FLT Y, int mode){
170 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
171 | 	// reg = (lambda_a, lambda_b, lambda_c)
172 | 	if (i < nfreq){
173 | 
174 | 		FLT frq = freqs[i];
175 | 
176 | 		FLT C = cossum(t, w, n, frq);
177 | 		FLT S = sinsum(t, w, n, frq);
178 | 
179 | 		FLT C2 = cossum(t, w, n, 2.f * frq);
180 | 		FLT S2 = sinsum(t, w, n, 2.f * frq);
181 | 
182 | 		FLT YCh = cossum(t, yw, n, frq);
183 | 		FLT YSh = sinsum(t, yw, n, frq);
184 | 
185 |    		lsp[i] = lspow(C, S, C2, S2, YCh, YSh, YY, Y, reg, mode);
186 |    	}
187 | }
188 | 
189 | __global__ void lomb(pycuda::complex<FLT>  *sw,
190 | 					 pycuda::complex<FLT>  *syw,
191 | 					 FLT *lsp,
192 | 					 FLT *reg,
193 | 					 int nfreq, 
194 | 					 FLT YY, 
195 | 					 FLT Y, 
196 | 					 int k0, 
197 | 					 int mode){
198 | 
199 | 	// least squares (lomb scargle with FLTing mean)
200 | 
201 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
202 | 	// reg = (lambda_a, lambda_b, lambda_c)
203 | 	if (i < nfreq){
204 | 		pycuda::complex<FLT> SW, SW2, SYW;
205 | 		SW = sw[i];
206 | 		SW2 = sw[2 * i + k0];
207 | 		SYW = syw[i];
208 | 
209 | 		FLT C = SW.real();
210 | 		FLT S = SW.imag();
211 | 
212 | 		FLT C2 = SW2.real();
213 | 		FLT S2 = SW2.imag();
214 | 
215 | 		FLT YCh = SYW.real();
216 | 		FLT YSh = SYW.imag();
217 | 
218 |         lsp[i] = lspow(C, S, C2, S2, YCh, YSh, YY, Y, reg, mode);
219 | 	}
220 | }
221 | 
222 | 
223 | __global__ void lomb_mh(pycuda::complex<FLT>  *sw,
224 | 					    pycuda::complex<FLT>  *syw,
225 | 					    FLT *lsp,
226 | 					    FLT *reg,
227 | 					    int nfreq, 
228 | 					    int nharmonics,
229 | 					    FLT YY, 
230 | 					    FLT Y, 
231 | 					    int k0, 
232 | 					    int mode){
233 | 
234 | 	// least squares (lomb scargle with FLTing mean)
235 | 
236 | 	unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
237 | 	// reg = (lambda_a, lambda_b, lambda_c)
238 | 	if (i < nfreq){
239 | 		pycuda::complex<FLT> SW, SW2, SYW;
240 | 		SW = sw[i];
241 | 		SW2 = sw[2 * i + k0];
242 | 		SYW = syw[i];
243 | 
244 | 		FLT C = SW.real();
245 | 		FLT S = SW.imag();
246 | 
247 | 		FLT C2 = SW2.real();
248 | 		FLT S2 = SW2.imag();
249 | 
250 | 		FLT YCh = SYW.real();
251 | 		FLT YSh = SYW.imag();
252 | 
253 |         lsp[i] = lspow(C, S, C2, S2, YCh, YSh, YY, Y, reg, mode);
254 | 	}
255 | }


--------------------------------------------------------------------------------
/cuvarbase/kernels/pdm.cu:
--------------------------------------------------------------------------------
  1 | #include<stdio.h>
  2 | #define WEIGHT(k) (w==NULL ? 1.0f : w[k])
  3 | #define GAUSSIAN(x) expf(-0.5f *x*x)
  4 | #define WEIGHTED_LININTERP true
  5 | #define SKIP_BIN(i) (bin_wtots[i] * NBINS < 0.01f)
  6 | //INSERT_NBINS_HERE
  7 | #define PHASE(x,f) (x * f - floorf(x * f))
  8 | 
  9 | #define RESTRICT __restrict__
 10 | #define CONSTANT const
 11 | 
 12 | 
 13 | __device__ float phase_diff(
 14 |         CONSTANT float dt,
 15 |         CONSTANT float freq){
 16 | 	float dphi = dt * freq - floorf(dt * freq);
 17 | 	return ((dphi > 0.5f) ? 1.0f - dphi : dphi);
 18 | }
 19 | 
 20 | __device__ float var_step_function(
 21 |         float *RESTRICT t,
 22 |         float *RESTRICT y,
 23 |         float *RESTRICT w,
 24 |         CONSTANT float freq,
 25 |         CONSTANT int ndata){
 26 |     float bin_means[NBINS];
 27 |     float bin_wtots[NBINS];
 28 |     int bin;
 29 |     float var_tot = 0.f;
 30 |     for (int i = 0; i < NBINS; i++){
 31 |         bin_wtots[i] = 0.f;
 32 |         bin_means[i] = 0.f;
 33 |     }
 34 |     for(int i = 0; i < ndata; i++){
 35 |         bin = (int) (PHASE(t[i], freq) * NBINS);
 36 |         bin = bin % NBINS;
 37 |         bin_wtots[bin] += w[i];
 38 |         bin_means[bin] += y[i] * w[i];
 39 |     }
 40 | 
 41 |     for(int i = 0; i < NBINS; i++){
 42 |         if (bin_wtots[i] == 0.f)
 43 |             continue;
 44 |         bin_means[i] /= bin_wtots[i];
 45 |     }
 46 | 
 47 |     for(int i = 0; i < ndata; i++){
 48 |         bin = (int) (PHASE(t[i], freq) * NBINS);
 49 |         var_tot += w[i] * (y[i] - bin_means[bin]) * (y[i] - bin_means[bin]);
 50 |     }
 51 | 
 52 |     return var_tot;
 53 | }
 54 | 
 55 | __device__ float var_linear_interp(
 56 |         float *RESTRICT t,
 57 |         float *RESTRICT y,
 58 |         float *RESTRICT w,
 59 |         CONSTANT float freq,
 60 |         CONSTANT int ndata){
 61 | 
 62 |     float bin_means[NBINS];
 63 |     float bin_wtots[NBINS];
 64 |     int bin, bin0, bin1;
 65 |     float var_tot = 0.f;
 66 |     float phase, y0, alpha;
 67 |     for(int i = 0; i < NBINS; i++){
 68 |         bin_wtots[i] = 0.f;
 69 |         bin_means[i] = 0.f;
 70 |     }
 71 | 
 72 |     for(int i = 0; i < ndata; i++){
 73 |         bin = (int) (PHASE(t[i], freq) * NBINS);
 74 |         bin = bin % NBINS;
 75 |         bin_wtots[bin] += w[i];
 76 |         bin_means[bin] += w[i] * y[i];
 77 |     }
 78 | 
 79 |     for (int i = 0; i < NBINS; i++){
 80 |         if (bin_wtots[i] == 0.f)
 81 |             continue;
 82 |         bin_means[i] /= bin_wtots[i];
 83 |     }
 84 | 
 85 | 
 86 |     for (int i = 0; i < ndata; i++){
 87 |         phase = PHASE(t[i], freq);
 88 |         bin = (int) (phase * NBINS);
 89 |         bin = bin % NBINS;
 90 | 
 91 |         alpha = phase * NBINS - floorf(phase * NBINS) - 0.5f;
 92 |         bin0 = (alpha < 0) ? bin - 1 : bin;
 93 |         bin1 = (alpha < 0) ? bin : bin + 1;
 94 | 
 95 |         if (bin0 < 0)
 96 |             bin0 += NBINS;
 97 |         if (bin1 >= NBINS)
 98 |             bin1 -= NBINS;
 99 | 
100 |         alpha += (alpha < 0) ? 1.f : 0.f;
101 |         y0 = (1.f - alpha) * bin_means[bin0] + alpha * bin_means[bin1];
102 |         var_tot += w[i] * (y[i] - y0) * (y[i] - y0);
103 |     }
104 | 
105 |     return var_tot;
106 | }
107 | 
108 | 
109 | __device__ float var_binless_tophat(
110 |         float *RESTRICT t,
111 |         float *RESTRICT y,
112 |         float *RESTRICT w,
113 |         CONSTANT float freq,
114 |         CONSTANT int ndata,
115 |         CONSTANT float dphi){
116 | 	float mbar, tj, wtot, var;
117 | 	bool in_bin;
118 | 	var = 0.f;
119 | 	for(int j = 0; j < ndata; j++){
120 | 		mbar = 0.f;
121 | 		wtot = 0.f;
122 | 		tj = t[j];
123 | 		for(int k = 0; k < ndata; k++){
124 | 			in_bin = phase_diff(fabsf(t[k] - tj), freq) < dphi;
125 | 			wtot += in_bin ? w[k] : 0.f;
126 | 			mbar += in_bin ? w[k] * y[k] : 0.f;
127 | 		}
128 | 		mbar /= wtot;
129 | 		var += w[j] * (y[j] - mbar) * (y[j] - mbar);
130 | 	}
131 | 	return var;
132 | }
133 | __device__ float var_binless_gauss(
134 |         float *RESTRICT t,
135 |         float *RESTRICT y,
136 |         float *RESTRICT w,
137 |         CONSTANT float freq,
138 |         CONSTANT int ndata,
139 |         CONSTANT float dphi){
140 |     float mbar, tj, wtot, var, wgt;
141 | 	var = 0.f;
142 |     for(int j = 0; j < ndata; j++){
143 |         mbar = 0.f;
144 |         wtot = 0.f;
145 |         tj = t[j];
146 |         for(int k = 0; k < ndata; k++){
147 | 			float dphase = phase_diff(fabsf(t[k] - tj), freq);
148 | 			wgt   = w[k] * GAUSSIAN(dphase / dphi);
149 |             mbar += wgt * y[k];
150 |             wtot += wgt;
151 |         }
152 |         mbar /= wtot;
153 |         var  += w[j] * (y[j] - mbar) * (y[j] - mbar);
154 |     }
155 |     return var;
156 | }
157 | __global__ void pdm_binless_tophat(
158 |         float *RESTRICT t,
159 |         float *RESTRICT y,
160 |         float *RESTRICT w,
161 |         float *RESTRICT freqs,
162 |         float *power,
163 |         CONSTANT int ndata,
164 |         CONSTANT int nfreqs,
165 |         CONSTANT float dphi,
166 |         CONSTANT float var){
167 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
168 | 	if (i < nfreqs){
169 | 		power[i] = 1.f - var_binless_tophat(t, y, w, freqs[i], ndata, dphi) / var;
170 | 	}
171 | }
172 | 
173 | __global__ void pdm_binless_gauss(
174 |         float *RESTRICT t,
175 |         float *RESTRICT y,
176 |         float *RESTRICT w,
177 |         float *RESTRICT freqs,
178 |         float *power,
179 |         CONSTANT int ndata,
180 |         CONSTANT int nfreqs,
181 |         CONSTANT float dphi,
182 |         CONSTANT float var){
183 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
184 | 	if (i < nfreqs){
185 | 		power[i] = 1.f - var_binless_gauss(t, y, w, freqs[i], ndata, dphi) / var;
186 | 	}
187 | }
188 | 
189 | __global__ void pdm_binned_linterp(
190 |         float *RESTRICT t,
191 |         float *RESTRICT y,
192 |         float *RESTRICT w,
193 |         float *RESTRICT freqs,
194 |         float *power,
195 |         CONSTANT int ndata,
196 |         CONSTANT int nfreqs,
197 |         CONSTANT float dphi,
198 |         CONSTANT float var){
199 | 
200 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
201 | 	if (i < nfreqs){
202 | 		power[i] = 1.f - var_linear_interp(t, y, w, freqs[i], ndata) / var;
203 | 	}
204 | }
205 | __global__ void pdm_binned_step(
206 |         float *RESTRICT t,
207 |         float *RESTRICT y,
208 |         float *RESTRICT w,
209 |         float *RESTRICT freqs,
210 |         float *power,
211 |         CONSTANT int ndata,
212 |         CONSTANT int nfreqs,
213 |         CONSTANT float dphi,
214 |         CONSTANT float var){
215 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
216 | 	if (i < nfreqs){
217 | 		power[i] = 1.f - var_step_function(t, y, w, freqs[i], ndata) / var;
218 | 	}
219 | }
220 | 


--------------------------------------------------------------------------------
/cuvarbase/kernels/wavelet.cu:
--------------------------------------------------------------------------------
  1 | #include<stdio.h>
  2 | #define WEIGHT(k) (w==NULL ? 1.0f : w[k])
  3 | #define GAUSSIAN(x) expf(-0.5f *x*x)
  4 | #define WEIGHTED_LININTERP true
  5 | #define SKIP_BIN(i) (bin_wtots[i] * NBINS < 0.01f)
  6 | //INSERT_NBINS_HERE
  7 | #define PHASE(x,f) (x * f - floorf(x * f))
  8 | #define TWOPI 6.28318530718f
  9 | #define RESTRICT __restrict__
 10 | #define CONSTANT const
 11 | #define MIN_NOBS 10
 12 | #define wavelet full_wavelet
 13 | 
 14 | 
 15 | __device__ float fast_wavelet(float dt, float sigma, float freq){
 16 | 	float a = fabs(TWOPI * sigma * freq * dt);
 17 | 	
 18 | 	return a < 1.f ? 1.f - 3.f * a * a + 2.f * a * a * a : 0.f;
 19 | }
 20 | 
 21 | __device__ float full_wavelet(float dt, float sigma, float freq){
 22 | 	float a = fabs(TWOPI * sigma * freq * dt);
 23 | 	
 24 | 	return expf(-a*a);
 25 | }
 26 | 
 27 | __device__ float cosine_wtransform(float *t, float *y, float *w, float freq, float tau, float sigma, 
 28 | 									int imin, int imax){
 29 | 	float pow = 0.f;
 30 | 	float weight = 0.f;
 31 | 	float tot_weight = 0.f;
 32 | 	for(int i = imin; i <= imax; i++){
 33 | 		weight = wavelet(t[i] - tau, sigma, freq) * (w == NULL ? 1.f : w[i]);
 34 | 		tot_weight += weight;
 35 | 		pow += y[i] * weight * cos(TWOPI * freq * t[i]);
 36 | 	}
 37 | 	return pow / tot_weight;
 38 | }
 39 | 
 40 | __device__ float sine_wtransform(float *t, float *y, float *w, float freq, float tau, float sigma, 
 41 | 									int imin, int imax){
 42 | 	float pow = 0.f;
 43 | 	float weight = 0.f;
 44 | 	float tot_weight = 0.f;
 45 | 	for(int i = imin; i <= imax; i++){
 46 | 		weight = wavelet(t[i] - tau, sigma, freq) * (w == NULL ? 1.f : w[i]);
 47 | 		tot_weight += weight;
 48 | 		pow += y[i] * weight * cos(TWOPI * freq * t[i]);
 49 | 	}
 50 | 	return pow / tot_weight;
 51 | }
 52 | 
 53 | __device__ float weighted_mean(float *t, float *y, float *w, float freq, float tau, 
 54 | 							float sigma, int imin, int imax){
 55 | 	float s = 0.f;
 56 | 	float weight = 0.f;
 57 | 	float total_weight = 0.f;
 58 | 	for(int i = imin; i <= imax; i++){
 59 | 		weight = wavelet(t[i] - tau, sigma, freq) * (w == NULL ? 1.f : w[i]);
 60 | 		s += y[i] * weight;
 61 | 		total_weight += weight;
 62 | 	}
 63 | 	return s / total_weight;
 64 | }
 65 | 
 66 | __device__ float weighted_var(float *t, float *y, float *w, float freq, float tau, 
 67 | 							float sigma, int imin, int imax){
 68 | 	float s = 0.f;
 69 | 	float weight = 0.f;
 70 | 	float total_weight = 0.f;
 71 | 	for(int i = imin; i <= imax; i++){
 72 | 		weight = wavelet(t[i] - tau, sigma, freq) * (w == NULL ? 1.f : w[i]);
 73 | 		s += y[i] * y[i] * weight;
 74 | 		total_weight += weight;
 75 | 	}
 76 | 	return s / total_weight;
 77 | }
 78 | 
 79 | __device__ float power(float *t, float *y, float *w, float freq, float tau, 
 80 | 						float prec, float sigma, int nobs){
 81 | 
 82 | 	// least squares (lomb scargle with floating mean)
 83 | 
 84 | 	int imin = 0;
 85 | 	int imax = nobs - 1;
 86 | 
 87 | 	float wmin = pow(10.f, -prec);
 88 | 
 89 | 	while( imin < nobs && wavelet(t[imin] - tau, sigma, freq) < wmin) imin ++;
 90 | 	while( imax > 0    && wavelet(t[imax] - tau, sigma, freq) < wmin) imax --;
 91 | 
 92 | 	if (imax - imin < MIN_NOBS) return 0.f;
 93 | 
 94 | 	float Y = weighted_mean(t, y, w, freq, tau, sigma, imin, imax);
 95 | 	float YY = weighted_var(t, y, w, freq, tau, sigma, imin, imax) - Y*Y;
 96 | 
 97 | 	float C = cosine_wtransform(t, w, NULL, freq, tau, sigma, imin, imax);
 98 | 	float S =   sine_wtransform(t, w, NULL, freq, tau, sigma, imin, imax);
 99 | 
100 | 	float C2 = cosine_wtransform(t, w, NULL, 2 * freq, tau, sigma, imin, imax);
101 | 	float S2 =   sine_wtransform(t, w, NULL, 2 * freq, tau, sigma, imin, imax);
102 | 
103 | 	float YC = cosine_wtransform(t, y, w, freq, tau, sigma, imin, imax) - Y * C;
104 | 	float YS =   sine_wtransform(t, y, w, freq, tau, sigma, imin, imax) - Y * S;
105 | 
106 | 	float CC = 0.5f * ( 1.f + C2 ) - C * C;
107 | 	float CS = 0.5f * S2 - C * S;
108 | 	float SS = 0.5f * ( 1.f - C2 ) - S * S;
109 | 
110 | 	float D = CC * SS - CS * CS;
111 | 
112 | 	float p = (SS * YC * YC + CC * YS * YS - 2 * CS * YC * YS) / (YY * D);
113 | 
114 | 	// force 0 < p < 1
115 | 	return p < 0.f ? 0.f : (p > 1.f ? 0.f : p);
116 | }
117 | 
118 | 
119 | __device__ int sumint(int *arr, int len){
120 | 	int s = 0.f;
121 | 	for(int i = 0; i < len; i++)
122 | 		s += arr[i];
123 | 	return s;
124 | }
125 | 
126 | 
127 | __global__ void wavelet_spectrogram(float *t, float *y, float *w, float *spectrogram, 
128 | 										float *freqs, float *taus, int *ntaus, int nfreqs, 
129 | 										int nobs, float sigma, float prec){
130 | 
131 | 	int i = blockIdx.x * blockDim.x + threadIdx.x;
132 | 
133 | 	int tot_ntaus = sumint(ntaus, nfreqs);
134 | 	if (i < tot_ntaus){
135 | 		int fno = 0;
136 | 		int s = 0;
137 | 		while(s < i){
138 | 			fno ++;
139 | 			s += ntaus[fno];
140 | 		}
141 | 
142 | 		float tau = taus[i];
143 | 		float freq = freqs[fno];
144 | 
145 | 		spectrogram[i] = power(t, y, w, freq, tau, prec, sigma, nobs);
146 | 	}
147 | }


--------------------------------------------------------------------------------
/cuvarbase/pdm.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from builtins import zip
  6 | from builtins import range
  7 | 
  8 | import numpy as np
  9 | import resource
 10 | import warnings
 11 | 
 12 | import pycuda.driver as cuda
 13 | import pycuda.gpuarray as gpuarray
 14 | from pycuda.compiler import SourceModule
 15 | # import pycuda.autoinit
 16 | 
 17 | from .core import GPUAsyncProcess
 18 | from .utils import weights, find_kernel, dphase
 19 | 
 20 | def var_tophat(t, y, w, freq, dphi):
 21 |     var = 0.
 22 |     for i, (T, Y, W) in enumerate(zip(t, y, w)):
 23 |         mbar = 0.
 24 |         wtot = 0.
 25 |         for j, (T2, Y2, W2) in enumerate(zip(t, y, w)):
 26 |             dph = dphase(abs(T2 - T), freq)
 27 |             if dph < dphi:
 28 |                 mbar += W2 * Y2
 29 |                 wtot += W2
 30 | 
 31 |         var += W * (Y - mbar / wtot)**2
 32 | 
 33 |     return var
 34 | 
 35 | def var_gauss(t, y, w, freq, dphi):
 36 |     gaussian = lambda x: np.exp(-0.5 *x**2)
 37 |     var = 0.
 38 |     for i, (T, Y, W) in enumerate(zip(t, y, w)):
 39 |         mbar = 0.
 40 |         wtot = 0.
 41 | 
 42 |         for j, (T2, Y2, W2) in enumerate(zip(t, y, w)):
 43 |             dph = dphase(abs(T2 - T), freq)
 44 |             wgt   = W2 * gaussian(dph / dphi)
 45 |             mbar += wgt * Y2
 46 |             wtot += wgt
 47 | 
 48 |         var += W * (Y - mbar / wtot)**2
 49 | 
 50 |     return var
 51 | 
 52 | def binned_pdm_model(t, y, w, freq, nbins, linterp=True):
 53 | 
 54 |     if len(t) == 0:
 55 |         return lambda p, **kwargs: np.zeros_like(p)
 56 | 
 57 |     bin_means = np.zeros(nbins)
 58 |     phase = (t * freq) % 1.0
 59 |     bins = [int(p * nbins) % nbins for p in phase]
 60 | 
 61 |     for i in range(nbins):
 62 |         wtot = max([sum([W for j, W in enumerate(w) if bins[j] == i]), 1E-10])
 63 |         bin_means[i] = sum([W * Y for j, (Y, W) in enumerate(zip(y, w))
 64 |                             if bins[j] == i]) / wtot
 65 | 
 66 |     def pred_y(p, nbins=nbins, linterp=linterp, bin_means=bin_means):
 67 |         bs = np.array([int(P * nbins) % nbins for P in p])
 68 |         if not linterp:
 69 |             return bin_means[bs]
 70 |         alphas = p * nbins - np.floor(p * nbins) - 0.5
 71 |         di = np.floor(alphas).astype(np.int32)
 72 |         bins0 = bs + di
 73 |         bins1 = bins0 + 1
 74 | 
 75 |         alphas[alphas < 0] += 1
 76 |         bins0[bins0 < 0] += nbins
 77 |         bins1[bins1 >= nbins] -= nbins
 78 | 
 79 |         return (1 - alphas) * bin_means[bins0] + alphas * bin_means[bins1]
 80 | 
 81 |     return pred_y
 82 | 
 83 | 
 84 | def var_binned(t, y, w, freq, nbins, linterp=True):
 85 |     ypred = binned_pdm_model(t, y, w, freq, nbins, linterp=linterp)((t * freq) % 1.0)
 86 |     return np.dot(w, np.power(y - ypred, 2))
 87 | 
 88 | 
 89 | def binless_pdm_cpu(t, y, w, freqs, dphi=0.05, tophat=True):
 90 |     # Prepare data
 91 |     t -= np.mean(t)
 92 |     y -= np.mean(y)
 93 | 
 94 |     ybar = np.dot(w, y)
 95 |     var = np.dot(w, np.power(y - ybar, 2))
 96 |     if tophat:
 97 |         return [1 - var_tophat(t, y, w, freq, dphi) / var for freq in freqs]
 98 |     else:
 99 |         return [1 - var_gauss(t, y, w, freq, dphi) / var for freq in freqs]
100 | 
101 | def pdm2_cpu(t, y, w, freqs, nbins=30, linterp=True):
102 |     # Prepare data
103 |     t -= np.mean(t)
104 |     y -= np.mean(y)
105 | 
106 |     ybar = np.dot(w, y)
107 |     var = np.dot(w, np.power(y - ybar, 2))
108 |     return [1 - var_binned(t, y, w, freq,
109 |                            nbins=nbins, linterp=linterp) / var
110 |             for freq in freqs]
111 | 
112 | 
113 | def pdm2_single_freq(t, y, w, freq, nbins=30, linterp=True):
114 |     # Prepare data
115 |     t -= np.mean(t)
116 |     y -= np.mean(y)
117 | 
118 |     ybar = np.dot(w, y)
119 |     var = np.dot(w, np.power(y - ybar, 2))
120 |     return 1 - var_binned(t, y, w, freq, nbins=nbins, linterp=linterp) / var
121 | 
122 | 
123 | def pdm_async(stream, data_cpu, data_gpu, pow_cpu, function,
124 |               dphi=0.05, block_size=256):
125 |     t, y, w, freqs = data_cpu
126 |     t_g, y_g, w_g, freqs_g, pow_g = data_gpu
127 | 
128 |     if t_g is None:
129 |         return pow_cpu
130 | 
131 |     # constants
132 |     nfreqs = np.int32(len(freqs))
133 |     ndata = np.int32(len(t))
134 |     dphi = np.float32(dphi)
135 | 
136 |     # kernel size
137 |     grid_size = int(np.ceil(float(nfreqs) / block_size))
138 |     grid = (grid_size, 1)
139 |     block = (block_size, 1, 1)
140 | 
141 |     # weights + weighted variance
142 |     ybar = np.dot(w, y)
143 |     var = np.float32(np.dot(w, np.power(y - ybar, 2)))
144 | 
145 |     # transfer data
146 |     w_g.set_async(np.asarray(w).astype(np.float32), stream=stream)
147 |     t_g.set_async(np.asarray(t).astype(np.float32), stream=stream)
148 |     y_g.set_async(np.asarray(y).astype(np.float32), stream=stream)
149 | 
150 |     function.prepared_async_call(grid, block, stream,
151 |                                  t_g.ptr, y_g.ptr, w_g.ptr,
152 |                                  freqs_g.ptr, pow_g.ptr,
153 |                                  ndata, nfreqs, dphi, var)
154 | 
155 |     pow_g.get_async(stream=stream, ary=pow_cpu)
156 | 
157 |     return pow_cpu
158 | 
159 | 
160 | class PDMAsyncProcess(GPUAsyncProcess):
161 | 
162 |     def __init__(self, *args, **kwargs):
163 |         super(PDMAsyncProcess, self).__init__(*args, **kwargs)
164 |         warnings.warn("PDM is experimental at this point. "
165 |                       "Use with great caution.")
166 | 
167 |     def _compile_and_prepare_functions(self, nbins=10):
168 |         pdm2_txt = open(find_kernel('pdm'), 'r').read()
169 |         pdm2_txt = pdm2_txt.replace('//INSERT_NBINS_HERE',
170 |                                     '#define NBINS %d' % (nbins))
171 | 
172 |         self.module = SourceModule(pdm2_txt, options=['--use_fast_math'])
173 | 
174 |         self.dtypes = [np.intp, np.intp, np.intp, np.intp, np.intp,
175 |                        np.int32, np.int32, np.float32, np.float32]
176 |         for function in ['pdm_binless_tophat', 'pdm_binless_gauss',
177 |                          'pdm_binned_linterp_%dbins' % (nbins),
178 |                          'pdm_binned_step_%dbins' % (nbins)]:
179 |             func = function.replace('_%dbins' % (nbins), '')
180 |             func = self.module.get_function(func).prepare(self.dtypes)
181 |             self.prepared_functions[function] = func
182 | 
183 |     def allocate(self, data):
184 |         if len(data) > len(self.streams):
185 |             self._create_streams(len(data) - len(self.streams))
186 | 
187 |         gpu_data, pow_cpus = [], []
188 | 
189 |         for t, y, w, freqs in data:
190 | 
191 |             pow_cpu = cuda.aligned_zeros(shape=(len(freqs),),
192 |                                          dtype=np.float32,
193 |                                          alignment=resource.getpagesize())
194 | 
195 |             pow_cpu = cuda.register_host_memory(pow_cpu)
196 | 
197 |             t_g, y_g, w_g = None, None, None
198 |             if len(t) > 0:
199 |                 t_g, y_g, w_g = tuple([gpuarray.zeros(len(t), dtype=np.float32)
200 |                                        for i in range(3)])
201 | 
202 |             pow_g = gpuarray.zeros(len(pow_cpu), dtype=pow_cpu.dtype)
203 |             freqs_g = gpuarray.to_gpu(np.asarray(freqs).astype(np.float32))
204 | 
205 |             gpu_data.append((t_g, y_g, w_g, freqs_g, pow_g))
206 |             pow_cpus.append(pow_cpu)
207 |         return gpu_data, pow_cpus
208 | 
209 |     def run(self, data, gpu_data=None, pow_cpus=None,
210 |             kind='binned_linterp', nbins=10, dphi=0.05, **pdm_kwargs):
211 | 
212 |         if kind in ['binless_tophat', 'binless_gauss']:
213 |             function = 'pdm_%s' % (kind)
214 |         elif kind in ['binned_linterp','binned_step']:
215 |             function = 'pdm_%s_%dbins' % (kind, nbins)
216 |         else:
217 |             raise KeyError('Function not available. Please use one of the followings: ' + \
218 |                             'binless_tophat, binless_gauss, binned_linterp, binned_step')
219 | 
220 |         if function not in self.prepared_functions:
221 |             self._compile_and_prepare_functions(nbins=nbins)
222 | 
223 |         # Prepare data
224 |         for i,(t, y, w, freqs) in enumerate(data):
225 |             t, y, w, freqs = t.copy(), y.copy(), w.copy(), freqs.copy()
226 |             t -= np.mean(t)
227 |             y -= np.mean(y)
228 |             data[i] = t, y, w, freqs
229 | 
230 |         if pow_cpus is None or gpu_data is None:
231 |             gpu_data, pow_cpus = self.allocate(data)
232 |         streams = [s for i, s in enumerate(self.streams) if i < len(data)]
233 |         func = self.prepared_functions[function]
234 |         results = [pdm_async(stream, cdat, gdat, pcpu, func, dphi=dphi, **pdm_kwargs)
235 |                    for stream, cdat, gdat, pcpu in
236 |                    zip(streams, data, gpu_data, pow_cpus)]
237 | 
238 |         return results
239 | 


--------------------------------------------------------------------------------
/cuvarbase/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnh2o2/cuvarbase/0d97ae11bea01fdfb71cfbe15059979ebfe37373/cuvarbase/tests/__init__.py


--------------------------------------------------------------------------------
/cuvarbase/tests/test_bls.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from builtins import zip
  6 | from builtins import range
  7 | from builtins import object
  8 | from itertools import product 
  9 | import pytest
 10 | import numpy as np
 11 | from numpy.testing import assert_allclose
 12 | from pycuda.tools import mark_cuda_test
 13 | from ..bls import eebls_gpu, eebls_transit_gpu, \
 14 |                   q_transit, compile_bls, hone_solution,\
 15 |                   single_bls, eebls_gpu_custom, eebls_gpu_fast
 16 | 
 17 | 
 18 | def transit_model(phi0, q, delta, q1=0.):
 19 |     def model(t, freq, q=q, phi0=phi0, delta=delta):
 20 | 
 21 |         phi = t * freq - phi0
 22 |         phi -= np.floor(phi)
 23 | 
 24 |         if not hasattr(t, '__iter__'):
 25 |             return -delta if np.absolute(phi) < q else 0
 26 |         y = np.zeros(len(t))
 27 |         y[np.absolute(phi) < q] -= delta
 28 | 
 29 |         return y
 30 |     return model
 31 | 
 32 | 
 33 | def plot_bls_sol(t, y, dy, freq, q, phi0):
 34 | 
 35 |     w = np.power(dy, -2)
 36 |     w /= sum(w)
 37 | 
 38 |     phi_plot = np.linspace(0, 1, 50./q)
 39 | 
 40 |     phi = (t * freq)
 41 |     phi -= np.floor(phi)
 42 | 
 43 |     dphi = phi - phi0 - np.floor(phi - phi0)
 44 |     mask = dphi < q
 45 | 
 46 |     ybt = np.dot(w[mask], y[mask]) / sum(w[mask])
 47 |     yb0 = np.dot(w[~mask], y[~mask]) / sum(w[~mask])
 48 | 
 49 |     delta = yb0 - ybt
 50 | 
 51 |     model = transit_model(phi0, q, delta)
 52 | 
 53 |     ym = model(phi_plot, 1.) + yb0
 54 | 
 55 |     import matplotlib.pyplot as plt
 56 | 
 57 |     f, ax = plt.subplots()
 58 | 
 59 |     ax.scatter(phi[~mask], y[~mask], c='k', s=1, alpha=0.1)
 60 |     ax.scatter(phi[mask], y[mask], c='g', s=1, alpha=0.8)
 61 |     ax.plot(phi_plot, ym, color='r')
 62 |     ax.axvline(phi0, color='k', ls=':')
 63 |     ax.axvline(phi0 + q, color='k', ls=':')
 64 | 
 65 |     plt.show()
 66 | 
 67 | 
 68 | def data(seed=100, sigma=0.1, ybar=12., snr=10, ndata=200, freq=10.,
 69 |          q=0.01, phi0=None, baseline=1., negative_delta=False):
 70 | 
 71 |     rand = np.random.RandomState(seed)
 72 | 
 73 |     if phi0 is None:
 74 |         phi0 = rand.rand()
 75 | 
 76 |     delta = snr * sigma / np.sqrt(ndata * q * (1 - q))
 77 | 
 78 |     if negative_delta:
 79 |         delta *= -1
 80 | 
 81 |     model = transit_model(phi0, q, delta)
 82 | 
 83 |     t = baseline * np.sort(rand.rand(ndata))
 84 |     y = model(t, freq) + sigma * rand.randn(len(t))
 85 |     y += ybar - np.mean(y)
 86 |     err = sigma * np.ones_like(y)
 87 | 
 88 |     return t, y, err
 89 | 
 90 | 
 91 | def get_total_nbins(nbins0, nbinsf, dlogq):
 92 |     nbins_tot = 0
 93 |     while (int(x * nbins0) <= nbinsf):
 94 |         nb = int(x * nbins0)
 95 |         x *= 1 + dlogq
 96 | 
 97 |         nbins_tot += nb
 98 | 
 99 |     return nbins_tot
100 | 
101 | 
102 | def mod1(x):
103 |     return x - np.floor(x)
104 | 
105 | 
106 | def manual_binning(t, y, dy, freqs, nbins0, nbinsf, dlogq,
107 |                    phi_min, phi_max, noverlap):
108 |     """
109 |     for possible tests of the binning procedure. this
110 |     method has *not* been tested!
111 |     """
112 | 
113 |     w = np.power(dy, -2)
114 |     w /= sum(w)
115 | 
116 |     yw = np.multiply(y, w)
117 | 
118 |     nbins_tot = get_total_nbins(nbins0, nbinsf, dlogq)
119 | 
120 |     yw_bins = np.zeros(nbins_tot * len(freqs) * noverlap)
121 |     w_bins = np.zeros(nbins_tot * len(freqs) * noverlap)
122 | 
123 |     dphi = 1. / noverlap
124 |     for i, freq in enumerate(freqs):
125 |         nb = nbins0
126 |         nbtot = 0
127 |         x = 1.
128 |         while (int(x * nbins0) <= nbinsf):
129 |             nb = int(x * nbins0)
130 |             x *= 1 + dlogq
131 | 
132 |             q = 1./nb
133 | 
134 |             for s in range(noverlap):
135 |                 phi = t * freq
136 |                 bf = np.floor(nb * mod1(phi - s * q * dphi))
137 | 
138 |                 bf += i * nbins_tot * noverlap + s * nb + noverlap * nbtot
139 |                 for b, YW, W in zip(bf[mask], yw[mask], w[mask]):
140 |                     yw_bins[b] += YW
141 |                     w_bins[b] += W
142 | 
143 |             nbtot += nb
144 |     return yw_bins, w_bins
145 | 
146 | 
147 | class TestBLS(object):
148 |     seed = 100
149 |     rand = np.random.RandomState(seed)
150 |     plot = False
151 |     rtol = 1e-3
152 |     atol = 1e-5
153 | 
154 |     # TODO: tests that have specific bls values; test single_bls function returns
155 |     #       what you expect it to for several example problems
156 |     class SolutionParams(object):
157 |         def __init__(self, freq, phi0, q, baseline, ybar, snr, negative_delta):
158 |             self.freq = freq
159 |             self.phi0 = phi0
160 |             self.q = q
161 |             self.baseline = baseline
162 |             self.ybar = ybar
163 |             self.snr = snr
164 |             self.negative_delta = negative_delta
165 | 
166 |     @pytest.mark.parametrize("args", [(
167 |             SolutionParams(freq=0.3, phi0=0.5, q=0.2, baseline=365., ybar=0., snr=50.,
168 |                            negative_delta=True),
169 |             {'bls0': 0.8902446483898836, 'bls_ignore': 0}
170 |         )
171 |     ])
172 |     def test_ignore_positive_sols(self, args):
173 |         solution, bls_values = args
174 |         t, y_neg, dy = data(snr=solution.snr,
175 |                             q=solution.q,
176 |                             phi0=solution.phi0,
177 |                             freq=solution.freq,
178 |                             baseline=solution.baseline,
179 |                             ybar=solution.ybar,
180 |                             negative_delta=solution.negative_delta)
181 |         
182 |         freq, q, phi0 = solution.freq, solution.q, solution.phi0
183 | 
184 |         bls_default = single_bls(t, y_neg, dy, freq, q, phi0)
185 |         bls0 = single_bls(t, y_neg, dy, freq, q, phi0, ignore_negative_delta_sols=False)
186 |         bls_ignore = single_bls(t, y_neg, dy, freq, q, phi0, 
187 |                                 ignore_negative_delta_sols=True)
188 |         assert np.allclose(bls_values['bls0'] , bls0)
189 |         assert bls_values['bls_ignore'] == bls_ignore
190 |         assert (bls0 == bls_default)
191 | 
192 |     @pytest.mark.parametrize("freq", [0.3])
193 |     @pytest.mark.parametrize("phi0", [0.0, 0.5])
194 |     @pytest.mark.parametrize("dlogq", [0.2, -1])
195 |     @pytest.mark.parametrize("nstreams", [1, 3])
196 |     @pytest.mark.parametrize("freq_batch_size", [1, 3, None])
197 |     @pytest.mark.parametrize("ignore_negative_delta_sols", [True, False])
198 |     def test_transit_parameter_consistency(self, freq, phi0, dlogq, nstreams,
199 |                                            freq_batch_size, ignore_negative_delta_sols):
200 |         q = q_transit(freq)
201 | 
202 |         t, y, dy = data(snr=30, q=q, phi0=phi0, freq=freq, baseline=365.)
203 | 
204 |         freqs, power, sols = eebls_transit_gpu(t, y, dy,
205 |                                                samples_per_peak=2,
206 |                                                freq_batch_size=freq_batch_size,
207 |                                                nstreams=nstreams,
208 |                                                dlogq=dlogq,
209 |                                                ignore_negative_delta_sols=ignore_negative_delta_sols,
210 |                                                fmin=freq * 0.99,
211 |                                                fmax=freq * 1.01)
212 |         pcpu = [single_bls(t, y, dy, x[0], *x[1], ignore_negative_delta_sols=ignore_negative_delta_sols)
213 |                 for x in zip(freqs, sols)]
214 |         pcpu = np.asarray(pcpu)
215 | 
216 |         if self.plot:
217 |             import matplotlib.pyplot as plt
218 |             f, ax = plt.subplots()
219 |             ax.plot(freqs, pcpu)
220 |             ax.plot(freqs, power)
221 |             plt.show()
222 | 
223 |         sorted_results = sorted(zip(pcpu, power, freqs, sols),
224 |                                 key=lambda x: -abs(x[1] - x[0]))
225 | 
226 |         for i, (pcs, pgs, freq, (qs, phs)) in enumerate(sorted_results):
227 |             if i > 10:
228 |                 break
229 |             print(pcs, pgs, (qs, phs))
230 |             if self.plot:
231 |                 plot_bls_sol(t, y, dy, freq, qs, phs)
232 | 
233 |         pows, diffs = list(zip(*sorted(zip(pcpu,
234 |                                            np.absolute(power - pcpu)),
235 |                                        key=lambda x: -x[1])))
236 | 
237 |         upper_bound = self.rtol * np.array(pows) + self.atol
238 |         mostly_ok = sum(np.array(diffs) > upper_bound) / len(pows) < 1e-2
239 |         not_too_bad = max(diffs) < 1e-1
240 | 
241 |         print(max(diffs))
242 |         assert mostly_ok and not_too_bad
243 | 
244 |     @pytest.mark.parametrize("freq", [1.0])
245 |     @pytest.mark.parametrize("phi_index", [0, 10])
246 |     @pytest.mark.parametrize("q_index", [0, 5])
247 |     @pytest.mark.parametrize("nstreams", [1, 3])
248 |     @pytest.mark.parametrize("freq_batch_size", [1, 3, None])
249 |     @pytest.mark.parametrize("ignore_negative_delta_sols", [True, False])
250 |     def test_custom(self, freq, q_index, phi_index, freq_batch_size, nstreams,
251 |                     ignore_negative_delta_sols):
252 |         q_values = np.logspace(-1.1, -0.8, num=10)
253 |         phi_values = np.linspace(0, 1, int(np.ceil(2./min(q_values))))
254 | 
255 |         q = q_values[q_index]
256 |         phi = phi_values[phi_index]
257 | 
258 |         t, y, dy = data(snr=10, q=q, phi0=phi, freq=freq,
259 |                         baseline=365., ndata=500)
260 | 
261 |         df = min(q_values) / (10 * (max(t) - min(t)))
262 |         freqs = np.linspace(freq - 10 * df, freq + 10 * df, 20)
263 | 
264 |         power, gsols = eebls_gpu_custom(t, y, dy, freqs,
265 |                                         q_values, phi_values,
266 |                                         ignore_negative_delta_sols=ignore_negative_delta_sols,
267 |                                         freq_batch_size=freq_batch_size,
268 |                                         nstreams=nstreams)
269 | 
270 |         for freq, (qg, phg), gpower in zip(freqs, gsols, power):
271 |             q_and_phis = product(q_values, phi_values)
272 |             
273 |             best_q, best_phi, best_p = None, None, None
274 |             for Q, PHI in q_and_phis:
275 |                 p = single_bls(t, y, dy, freq, Q, PHI,
276 |                                ignore_negative_delta_sols=ignore_negative_delta_sols)
277 |                 if best_p is None or p > best_p:
278 |                     best_p = p
279 |                     best_q = Q
280 |                     best_phi = PHI
281 |             
282 |             assert np.abs(best_p - gpower) < 1e-5
283 | 
284 |     @pytest.mark.parametrize("freq", [1.0])
285 |     @pytest.mark.parametrize("phi_index", [0, 10, -1])
286 |     @pytest.mark.parametrize("q_index", [0, 5, -1])
287 |     @pytest.mark.parametrize("nstreams", [1, 3])
288 |     @pytest.mark.parametrize("freq_batch_size", [1, 3, None])
289 |     @pytest.mark.parametrize("ignore_negative_delta_sols", [True, False])
290 |     def test_standard(self, freq, q_index, phi_index, nstreams, freq_batch_size,
291 |                       ignore_negative_delta_sols):
292 | 
293 |         q_values = np.logspace(-1.5, np.log10(0.1), num=100)
294 |         phi_values = np.linspace(0, 1, int(np.ceil(2./min(q_values))))
295 | 
296 |         q = q_values[q_index]
297 |         phi = phi_values[phi_index]
298 | 
299 |         t, y, dy = data(snr=10, q=q, phi0=phi, freq=freq,
300 |                         baseline=365.)
301 | 
302 |         df = min(q_values) / (10 * (max(t) - min(t)))
303 | 
304 |         delta_f = 5 * df / freq
305 |         freqs = np.linspace(freq * (1 - delta_f),
306 |                             (1 + delta_f) * freq,
307 |                             int(5. * 2 * delta_f * freq / df))
308 |         power, gsols = eebls_gpu(t, y, dy, freqs,
309 |                                  qmin=0.1 * q, qmax=2.0 * q,
310 |                                  nstreams=nstreams, noverlap=2, dlogq=0.5,
311 |                                  freq_batch_size=freq_batch_size,
312 |                                  ignore_negative_delta_sols=ignore_negative_delta_sols)
313 | 
314 |         bls_c = [single_bls(t, y, dy, x[0], *x[1],
315 |                             ignore_negative_delta_sols=ignore_negative_delta_sols)
316 |                  for x in zip(freqs, gsols)]
317 |         if self.plot:
318 |             import matplotlib.pyplot as plt
319 |             f, ax = plt.subplots()
320 | 
321 |             ax.plot(freqs, bls_c)
322 |             ax.plot(freqs, power)
323 | 
324 |             plt.show()
325 | 
326 |             inds = sorted(np.arange(len(power)),
327 |                           key=lambda i: -abs(power[i] - bls_c[i]))
328 | 
329 |             all_qs, all_phis = zip(*gsols)
330 | 
331 |             for i in inds[:100]:
332 |                 qs, phis = gsols[i]
333 |                 print(power[i], bls_c[i], abs(power[i] - bls_c[i]),
334 |                       qs, phis)
335 |                 #plot_bls_sol(t, y, dy, freqs[i], qs, phis)
336 | 
337 |         pows, diffs = list(zip(*sorted(zip(bls_c, np.absolute(power - bls_c)),
338 |                                key=lambda x: -x[1])))
339 | 
340 |         upper_bound = self.rtol * np.array(pows) + self.atol
341 |         mostly_ok = sum(np.array(diffs) > upper_bound) / len(pows) < 1e-2
342 |         not_too_bad = max(diffs) < 1e-1
343 | 
344 |         print(diffs[0], pows[0])
345 |         assert mostly_ok and not_too_bad
346 |         # assert_allclose(bls_c, power, rtol=1e-3, atol=1e-5)
347 | 
348 |     @pytest.mark.parametrize("freq", [1.0])
349 |     @pytest.mark.parametrize("dlogq", [0.5, -1.0])
350 |     @pytest.mark.parametrize("freq_batch_size", [1, 10, None])
351 |     @pytest.mark.parametrize("phi0", [0.0])
352 |     @pytest.mark.parametrize("use_fast", [True, False])
353 |     @pytest.mark.parametrize("nstreams", [1, 4])
354 |     @pytest.mark.parametrize("ignore_negative_delta_sols", [True, False])
355 |     def test_transit(self, freq, use_fast, freq_batch_size, nstreams, phi0, dlogq,
356 |                      ignore_negative_delta_sols):
357 |         q = q_transit(freq)
358 |         samples_per_peak = 2
359 |         noverlap = 2
360 | 
361 |         t, y, err = data(snr=10, q=q, phi0=phi0, freq=freq,
362 |                          baseline=365.)
363 | 
364 |         kw = dict(samples_per_peak=samples_per_peak,
365 |                   freq_batch_size=freq_batch_size, dlogq=dlogq,
366 |                   ignore_negative_delta_sols=ignore_negative_delta_sols,
367 |                   nstreams=nstreams, noverlap=noverlap,
368 |                   fmin=0.9 * freq, fmax=1.1 * freq,
369 |                   use_fast=use_fast)
370 | 
371 |         if use_fast:
372 |             freqs, power = eebls_transit_gpu(t, y, err, **kw)
373 | 
374 |             kw['use_fast'] = False
375 |             freqs, power_slow, sols = eebls_transit_gpu(t, y, err, **kw)
376 |             kw['use_fast'] = True
377 |             dfsol = freqs[np.argmax(power)] - freqs[np.argmax(power_slow)]
378 |             close_enough = abs(dfsol) * (max(t) - min(t)) / q < 3
379 |             if not close_enough and self.plot:
380 |                 import matplotlib.pyplot as plt
381 |                 plt.plot(freqs, power, alpha=0.5)
382 |                 plt.plot(freqs, power_slow, alpha=0.5)
383 |                 plt.show()
384 | 
385 |             assert(close_enough)
386 |             return
387 | 
388 |         freqs, power, sols = eebls_transit_gpu(t, y, err, **kw)
389 |         power_cpu = np.array([single_bls(t, y, err, x[0], *x[1],
390 |                                          ignore_negative_delta_sols=ignore_negative_delta_sols)
391 |                               for x in zip(freqs, sols)])
392 | 
393 |         if self.plot:
394 |             import matplotlib.pyplot as plt
395 |             f, ax = plt.subplots()
396 | 
397 |             ax.plot(freqs, power_cpu)
398 |             ax.plot(freqs, power)
399 | 
400 |             pows, diffs = list(zip(*sorted(zip(power_cpu, power - power_cpu),
401 |                                key=lambda x: -abs(x[1]))))
402 |             print(list(zip(pows[:10], diffs[:10])))
403 |             plt.show()
404 | 
405 |         diffs = np.absolute(power - power_cpu)
406 |         upper_bound = 1e-3 * np.array(power_cpu) + 1e-5
407 |         mostly_ok = sum(np.array(diffs) > upper_bound) / len(diffs) < 1e-2
408 |         not_too_bad = max(diffs) < 1e-1
409 | 
410 |         print(max(diffs))
411 |         assert mostly_ok and not_too_bad
412 | 
413 |     @pytest.mark.parametrize("freq", [1.0])
414 |     @pytest.mark.parametrize("q", [0.1])
415 |     @pytest.mark.parametrize("phi0", [0.0])
416 |     @pytest.mark.parametrize("dphi", [0.0, 1.0])
417 |     @pytest.mark.parametrize("freq_batch_size", [None, 100])
418 |     @pytest.mark.parametrize("dlogq", [0.5, -1.0])
419 |     @pytest.mark.parametrize("ignore_negative_delta_sols", [True, False])
420 |     def test_fast_eebls(self, freq, q, phi0, freq_batch_size, dlogq, dphi,
421 |                         ignore_negative_delta_sols, **kwargs):
422 |         t, y, err = data(snr=50, q=q, phi0=phi0, freq=freq,
423 |                          baseline=365.)
424 | 
425 |         df = 0.25 * q / (max(t) - min(t))
426 |         fmin = 0.9 * freq
427 |         fmax = 1.1 * freq
428 |         nf = int(np.ceil((fmax - fmin) / df))
429 |         freqs = fmin + df * np.arange(nf)
430 | 
431 |         kw = dict(qmin=1e-2, qmax=0.5, dphi=dphi,
432 |                   ignore_negative_delta_sols=ignore_negative_delta_sols,
433 |                   freq_batch_size=freq_batch_size, dlogq=dlogq)
434 | 
435 |         kw.update(kwargs)
436 | 
437 |         power = eebls_gpu_fast(t, y, err, freqs, **kw)
438 | 
439 |         power0, sols = eebls_gpu(t, y, err, freqs, **kw)
440 |         if self.plot:
441 |             import matplotlib.pyplot as plt
442 |             f, ax = plt.subplots()
443 |             ax.plot(freqs, power, alpha=0.5)
444 |             ax.axvline(freq, ls=':', color='k')
445 |             ax.plot(freqs, power0, alpha=0.5)
446 |             ax.set_yscale('log')
447 |             plt.show()
448 | 
449 |         # this is janky. Need better test
450 |         # to ensure we're getting the best results,
451 |         # but no apples-to-apples comparison is
452 |         # possible for eebls_gpu and eebls_gpu_fast
453 |         fmax_fast = freqs[np.argmax(power)]
454 |         fmax_regular = freqs[np.argmax(power0)]
455 |         assert(abs(fmax_fast - fmax_regular) * (max(t) - min(t)) / q < 3)
456 | 


--------------------------------------------------------------------------------
/cuvarbase/tests/test_ce.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from builtins import zip
  6 | from builtins import range
  7 | from builtins import object
  8 | import pytest
  9 | from pycuda.tools import mark_cuda_test
 10 | import numpy as np
 11 | from numpy.testing import assert_allclose
 12 | from ..ce import ConditionalEntropyAsyncProcess
 13 | lsrtol = 1E-2
 14 | lsatol = 1E-5
 15 | seed = 100
 16 | 
 17 | rand = np.random.RandomState(seed)
 18 | 
 19 | 
 20 | def data(sigma=0.1, ndata=500, freq=3., snr=1000, t0=0.):
 21 | 
 22 |     t = np.sort(rand.rand(ndata)) + t0
 23 |     y = snr * sigma * np.cos(2 * np.pi * freq * t) / np.sqrt(len(t))
 24 | 
 25 |     y += sigma * rand.randn(len(t))
 26 | 
 27 |     err = sigma * np.ones_like(y)
 28 | 
 29 |     return t, y, err
 30 | 
 31 | 
 32 | def assert_similar(pdg0, pdg, top=5):
 33 |     inds = (np.argsort(pdg0)[::-1])[:top]
 34 | 
 35 |     p0 = np.asarray(pdg0)[inds]
 36 |     p = np.asarray(pdg)[inds]
 37 |     diff = np.absolute(p - p0)
 38 | 
 39 |     assert(all(diff < lsrtol * 0.5 * (p + p0) + lsatol))
 40 | 
 41 | 
 42 | class TestCE(object):
 43 |     plot = False
 44 | 
 45 |     @pytest.mark.parametrize('ndatas', [1, 5, 10])
 46 |     def test_multiple_datasets(self, ndatas, **kwargs):
 47 |         datas = [data() for i in range(ndatas)]
 48 |         proc = ConditionalEntropyAsyncProcess(**kwargs)
 49 | 
 50 |         df = 0.02
 51 |         max_freq = 1.1
 52 |         min_freq = 0.9
 53 |         nf = int((max_freq - min_freq) / df)
 54 |         freqs = min_freq + df * np.arange(nf)
 55 | 
 56 |         mult_results = proc.run(datas, freqs=freqs)
 57 |         proc.finish()
 58 | 
 59 |         sing_results = []
 60 | 
 61 |         for d in datas:
 62 |             sing_results.extend(proc.run([d], freqs=freqs))
 63 |             proc.finish()
 64 | 
 65 |         for rb, rnb in zip(mult_results, sing_results):
 66 |             fb, pb = rb
 67 |             fnb, pnb = rnb
 68 | 
 69 |             assert(not any(np.isnan(pb)))
 70 |             assert(not any(np.isnan(pnb)))
 71 | 
 72 |             assert_allclose(pnb, pb, rtol=lsrtol, atol=lsatol)
 73 |             assert_allclose(fnb, fb, rtol=lsrtol, atol=lsatol)
 74 | 
 75 |     @pytest.mark.parametrize('ndatas', [1, 7])
 76 |     @pytest.mark.parametrize('batch_size', [1, 3])
 77 |     @pytest.mark.parametrize('use_double', [True, False])
 78 |     @pytest.mark.parametrize('use_fast,weighted,shmem_lc,freq_batch_size',
 79 |                              [(True, False, False, 1),
 80 |                               (True, False, True, None),
 81 |                               (False, True, False, None),
 82 |                               (False, False, False, None)])
 83 |     @pytest.mark.parametrize('phase_bins,phase_overlap',
 84 |                              [(10, 1)])
 85 |     @pytest.mark.parametrize('mag_bins,mag_overlap',
 86 |                              [(5, 0)])
 87 |     def test_batched_run(self, ndatas, batch_size, use_double,
 88 |                          mag_bins, phase_bins, mag_overlap,
 89 |                          phase_overlap, use_fast,
 90 |                          shmem_lc, weighted,
 91 |                          freq_batch_size):
 92 | 
 93 |         datas = [data(ndata=rand.randint(50, 100))
 94 |                  for i in range(ndatas)]
 95 |         kwargs = dict(use_double=use_double,
 96 |                       mag_bins=mag_bins,
 97 |                       phase_bins=phase_bins,
 98 |                       phase_overlap=phase_overlap,
 99 |                       mag_overlap=mag_overlap,
100 |                       use_fast=use_fast,
101 |                       weighted=weighted)
102 |         proc = ConditionalEntropyAsyncProcess(**kwargs)
103 |         df = 0.02
104 |         max_freq = 1.1
105 |         min_freq = 0.9
106 |         nf = int((max_freq - min_freq) / df)
107 |         freqs = min_freq + df * np.arange(nf)
108 | 
109 |         run_kw = dict(shmem_lc=shmem_lc, freqs=freqs,
110 |                       freq_batch_size=freq_batch_size)
111 |         batched_results = proc.batched_run(datas, **run_kw)
112 |         proc.finish()
113 | 
114 |         non_batched_results = []
115 |         for d in datas:
116 |             r = proc.run([d], freqs=freqs)
117 |             proc.finish()
118 |             non_batched_results.extend(r)
119 | 
120 |         for rb, rnb in zip(batched_results, non_batched_results):
121 |             fb, pb = rb
122 |             fnb, pnb = rnb
123 | 
124 |             assert(not any(np.isnan(pb)))
125 |             assert(not any(np.isnan(pnb)))
126 | 
127 |             assert_allclose(pnb, pb, rtol=lsrtol, atol=lsatol)
128 |             assert_allclose(fnb, fb, rtol=lsrtol, atol=lsatol)
129 | 
130 |     @pytest.mark.parametrize('ndatas', [1, 7])
131 |     @pytest.mark.parametrize('batch_size', [1, 3])
132 |     @pytest.mark.parametrize('use_double', [True, False])
133 |     @pytest.mark.parametrize('use_fast,weighted,shmem_lc,freq_batch_size',
134 |                              [(True, False, False, 1),
135 |                               (True, False, True, None),
136 |                               (False, True, False, None),
137 |                               (False, False, False, None)])
138 |     @pytest.mark.parametrize('phase_bins,phase_overlap',
139 |                              [(10, 1)])
140 |     @pytest.mark.parametrize('mag_bins,mag_overlap',
141 |                              [(5, 0)])
142 |     def test_batched_run_const_nfreq(self, ndatas, batch_size, use_double,
143 |                                      mag_bins, phase_bins, mag_overlap,
144 |                                      phase_overlap, use_fast, weighted,
145 |                                      shmem_lc, freq_batch_size):
146 |         frequencies = np.sort(10 + rand.rand(ndatas) * 100.)
147 |         datas = [data(ndata=rand.randint(50, 100),
148 |                       freq=freq)
149 |                  for i, freq in enumerate(frequencies)]
150 | 
151 |         kwargs = dict(use_double=use_double,
152 |                       mag_bins=mag_bins,
153 |                       phase_bins=phase_bins,
154 |                       phase_overlap=phase_overlap,
155 |                       mag_overlap=mag_overlap,
156 |                       use_fast=use_fast)
157 |         proc = ConditionalEntropyAsyncProcess(**kwargs)
158 | 
159 |         df = 0.02
160 |         max_freq = 1.1
161 |         min_freq = 0.9
162 |         nf = int((max_freq - min_freq) / df)
163 |         freqs = min_freq + df * np.arange(nf)
164 | 
165 |         run_kw = dict(shmem_lc=shmem_lc, freqs=freqs,
166 |                       freq_batch_size=freq_batch_size)
167 |         batched_results = proc.batched_run_const_nfreq(datas, **run_kw)
168 |         proc.finish()
169 | 
170 |         procnb = ConditionalEntropyAsyncProcess(**kwargs)
171 | 
172 |         non_batched_results = []
173 |         for d, (frq, p) in zip(datas, batched_results):
174 |             r = procnb.run([d], **run_kw)
175 |             procnb.finish()
176 |             non_batched_results.extend(r)
177 | 
178 |         for f0, (fb, pb), (fnb, pnb) in zip(frequencies, batched_results,
179 |                                             non_batched_results):
180 | 
181 |             if self.plot:
182 |                 import matplotlib.pyplot as plt
183 |                 plt.plot(fnb, pnb, color='k', lw=3)
184 |                 plt.plot(fb, pb, color='r')
185 |                 plt.axvline(f0)
186 |                 plt.show()
187 |             assert(not any(np.isnan(pb)))
188 |             assert(not any(np.isnan(pnb)))
189 | 
190 |             assert_allclose(pnb, pb, rtol=lsrtol, atol=lsatol)
191 |             assert_allclose(fnb, fb, rtol=lsrtol, atol=lsatol)
192 | 
193 |     @pytest.mark.parametrize('use_double', [True, False])
194 |     @pytest.mark.parametrize('use_fast,weighted,shmem_lc,freq_batch_size',
195 |                              [(True, False, False, 1),
196 |                               (True, False, True, None),
197 |                               (False, True, False, None),
198 |                               (False, False, False, None)])
199 |     @pytest.mark.parametrize('phase_bins,phase_overlap',
200 |                              [(10, 1)])
201 |     @pytest.mark.parametrize('mag_bins,mag_overlap',
202 |                              [(5, 0)])
203 |     @pytest.mark.parametrize('freq', [10.0])
204 |     @pytest.mark.parametrize('t0', [0.0])
205 |     @pytest.mark.parametrize('balanced_magbins', [True, False])
206 |     def test_inject_and_recover(self, freq,
207 |                                 use_double, mag_bins, phase_bins, mag_overlap,
208 |                                 phase_overlap, use_fast, t0, balanced_magbins,
209 |                                 weighted, shmem_lc, freq_batch_size):
210 | 
211 |         kwargs = dict(use_double=use_double,
212 |                       mag_bins=mag_bins,
213 |                       phase_bins=phase_bins,
214 |                       phase_overlap=phase_overlap,
215 |                       mag_overlap=mag_overlap,
216 |                       use_fast=use_fast,
217 |                       balanced_magbins=balanced_magbins,
218 |                       weighted=weighted)
219 |         proc = ConditionalEntropyAsyncProcess(**kwargs)
220 |         t, y, err = data(freq=freq, t0=t0)
221 | 
222 |         df = 1. / (max(t) - min(t)) / 10
223 |         max_freq = 1.1 * freq
224 |         min_freq = 0.9 * freq
225 |         nf = int((max_freq - min_freq) / df)
226 |         freqs = min_freq + df * np.arange(nf)
227 | 
228 |         run_kw = dict(shmem_lc=shmem_lc, freq_batch_size=freq_batch_size)
229 |         results = proc.large_run([(t, y, err)],
230 |                                  freqs=freqs, **run_kw)
231 |         proc.finish()
232 |         frq, p = results[0]
233 |         best_freq = frq[np.argmin(p)]
234 | 
235 |         if self.plot:
236 |             import matplotlib.pyplot as plt
237 |             f, ax = plt.subplots()
238 |             ax.plot(frq, p)
239 |             ax.axvline(freq, ls='-', color='k')
240 |             ax.axvline(best_freq, ls=':', color='r')
241 |             plt.show()
242 | 
243 |         # print best_freq, freq, abs(best_freq - freq) / freq
244 |         assert(not any(np.isnan(p)))
245 |         assert(abs(best_freq - freq) / freq < 3E-2)
246 | 
247 |     def test_large_run(self, make_plot=False, **kwargs):
248 |         proc = ConditionalEntropyAsyncProcess(**kwargs)
249 |         t, y, dy = data(sigma=0.01, ndata=100, freq=4.)
250 |         df = 0.001
251 |         max_freq = 100.
252 |         min_freq = df
253 |         nf = int((max_freq - min_freq) / df)
254 |         freqs = min_freq + df * np.arange(nf)
255 | 
256 |         r0 = proc.run([(t, y, dy)], freqs=freqs)
257 |         r1 = proc.large_run([(t, y, dy)], freqs=freqs, max_memory=1e7)
258 | 
259 |         f0, p0 = r0[0]
260 |         f1, p1 = r1[0]
261 | 
262 |         rel_err = max(np.absolute(p0 - p1)) / np.median(np.absolute(p0))
263 |         print(max(np.absolute(p0 - p1)), rel_err)
264 |         assert_allclose(p0, p1, rtol=1e-4, atol=1e-2)
265 | 
266 |     @pytest.mark.parametrize('use_double', [True, False])
267 |     @pytest.mark.parametrize('use_fast,weighted,shmem_lc,freq_batch_size',
268 |                              [(True, False, False, 1)])
269 |     @pytest.mark.parametrize('phase_bins,phase_overlap',
270 |                              [(10, 1)])
271 |     @pytest.mark.parametrize('mag_bins,mag_overlap',
272 |                              [(5, 0)])
273 |     @pytest.mark.parametrize('freq', [10.0])
274 |     @pytest.mark.parametrize('balanced_magbins', [True, False])
275 |     def test_time_shift_invariance(self, freq,
276 |                                    use_double, mag_bins, phase_bins,
277 |                                    mag_overlap, phase_overlap, use_fast,
278 |                                    balanced_magbins, weighted,
279 |                                    shmem_lc, freq_batch_size):
280 | 
281 |         kwargs = dict(use_double=use_double,
282 |                       mag_bins=mag_bins,
283 |                       phase_bins=phase_bins,
284 |                       phase_overlap=phase_overlap,
285 |                       mag_overlap=mag_overlap,
286 |                       use_fast=use_fast,
287 |                       balanced_magbins=balanced_magbins,
288 |                       weighted=weighted)
289 |         proc = ConditionalEntropyAsyncProcess(**kwargs)
290 | 
291 |         run_kw = dict(shmem_lc=shmem_lc, freq_batch_size=freq_batch_size)
292 |         for t0 in [-1e4, 1e4]:
293 |             t, y, err = data(freq=freq)
294 | 
295 |             df = 1. / (max(t) - min(t)) / 10
296 |             max_freq = 1.1 * freq
297 |             min_freq = 0.9 * freq
298 |             nf = int((max_freq - min_freq) / df)
299 | 
300 |             freqs = min_freq + df * np.arange(nf)
301 | 
302 |             results = proc.run([(t, y, err)], freqs=freqs, **run_kw)
303 |             proc.finish()
304 |             frq, p = results[0]
305 | 
306 |             results_shift = proc.run([(t + t0, y, err)], freqs=freqs, **run_kw)
307 |             frq_shft, p_shft = results_shift[0]
308 | 
309 |             best_freq = frq[np.argmin(p)]
310 |             best_freq_shft = frq_shft[np.argmin(p_shft)]
311 | 
312 |             if self.plot:
313 |                 import matplotlib.pyplot as plt
314 |                 f, ax = plt.subplots()
315 |                 ax.plot(frq, p)
316 |                 ax.plot(frq_shft, p_shft)
317 |                 ax.axvline(freq, ls='-', color='k')
318 |                 ax.axvline(best_freq, ls=':', color='r')
319 |                 plt.show()
320 | 
321 |             assert(not any(np.isnan(p)))
322 |             assert(not any(np.isnan(p_shft)))
323 | 
324 |             baseline = max(t) - min(t)
325 |             delta_f = abs(best_freq - best_freq_shft)
326 |             top_freq_is_close = delta_f * baseline < 1
327 | 
328 |             diffs = np.absolute(p - p_shft)
329 |             atol, rtol = 1e-1 * max(np.absolute(p)), 2e-1
330 |             upper_limit = atol + rtol * np.absolute(p)
331 | 
332 |             pct_out_of_bounds = sum(diffs > upper_limit) / len(diffs)
333 | 
334 |             print(pct_out_of_bounds, delta_f * baseline)
335 |             assert(top_freq_is_close and pct_out_of_bounds < 5e-2)
336 | 
337 |     @pytest.mark.parametrize('use_double', [True, False])
338 |     @pytest.mark.parametrize('shmem_lc', [True, False])
339 |     @pytest.mark.parametrize('freq_batch_size', [1, None])
340 |     @pytest.mark.parametrize('phase_bins,phase_overlap,mag_bins,mag_overlap',
341 |                              [(10, 0, 5, 0), (10, 1, 5, 1)])
342 |     @pytest.mark.parametrize('freq', [12.0])
343 |     @pytest.mark.parametrize('t0', [0.0])
344 |     #@pytest.mark.parametrize('balanced_magbins', [True, False])
345 |     @pytest.mark.parametrize('balanced_magbins', [False])
346 |     @pytest.mark.parametrize('weighted', [False])
347 |     @pytest.mark.parametrize('force_nblocks', [1, None])
348 |     @pytest.mark.parametrize('ndata', [300])
349 |     def test_fast(self, freq, use_double, mag_bins, phase_bins, mag_overlap,
350 |                   phase_overlap, t0, balanced_magbins, weighted,
351 |                   shmem_lc, freq_batch_size, force_nblocks, ndata):
352 | 
353 |         kwargs = dict(use_double=use_double,
354 |                       mag_bins=mag_bins,
355 |                       phase_bins=phase_bins,
356 |                       phase_overlap=phase_overlap,
357 |                       mag_overlap=mag_overlap,
358 |                       balanced_magbins=balanced_magbins,
359 |                       weighted=weighted)
360 |         proc_fast = ConditionalEntropyAsyncProcess(use_fast=True, **kwargs)
361 |         proc_slow = ConditionalEntropyAsyncProcess(use_fast=False, **kwargs)
362 |         t, y, err = data(freq=freq, t0=t0, ndata=ndata)
363 | 
364 |         df = 1. / (max(t) - min(t)) / 10
365 |         max_freq = 1.1 * freq
366 |         min_freq = 0.9 * freq
367 |         nf = int((max_freq - min_freq) / df)
368 |         freqs = min_freq + df * np.arange(nf)
369 | 
370 |         run_kw = dict(shmem_lc=shmem_lc,
371 |                       freq_batch_size=freq_batch_size,
372 |                       force_nblocks=force_nblocks)
373 |         results_fast = proc_fast.run([(t + t0, y, err)], freqs=freqs,
374 |                                      **run_kw)
375 |         proc_fast.finish()
376 |         frq_fast, p_fast = results_fast[0]
377 | 
378 |         results_slow = proc_slow.run([(t + t0, y, err)], freqs=freqs)
379 |         proc_slow.finish()
380 |         frq_slow, p_slow = results_slow[0]
381 | 
382 |         max_diff = 2e-2 * max(np.absolute(p_slow))
383 |         if self.plot and \
384 |                 not all(np.absolute(p_slow - p_fast) < max_diff):
385 |             import matplotlib.pyplot as plt
386 | 
387 |             f, ax = plt.subplots()
388 |             ax.plot(frq_slow, p_slow, alpha=0.5)
389 |             ax.plot(frq_fast, p_fast, alpha=0.5)
390 |             ax.axvline(freq, ls='-', color='k')
391 |             plt.show()
392 | 
393 |             f, ax = plt.subplots()
394 |             ax.plot(frq_slow, (p_slow - p_fast) / max(np.absolute(p_slow)))
395 |             ax.axvline(freq, ls='-', color='k')
396 |             plt.show()
397 |         # print best_freq, freq, abs(best_freq - freq) / freq
398 |         assert(not any(np.isnan(p_slow)))
399 |         assert(not any(np.isnan(p_fast)))
400 |         assert_allclose(p_slow, p_fast, atol=2e-2 * max(np.absolute(p_slow)))
401 | 


--------------------------------------------------------------------------------
/cuvarbase/tests/test_lombscargle.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from builtins import zip
  6 | from builtins import range
  7 | from builtins import object
  8 | import numpy as np
  9 | import pytest
 10 | 
 11 | from numpy.testing import assert_allclose
 12 | from astropy.timeseries import LombScargle
 13 | 
 14 | from ..lombscargle import LombScargleAsyncProcess
 15 | from pycuda.tools import mark_cuda_test
 16 | #import pycuda.autoinit
 17 | import pycuda.autoprimaryctx
 18 | spp = 3
 19 | nfac = 3
 20 | lsrtol = 1E-2
 21 | lsatol = 1E-2
 22 | nfft_sigma = 5
 23 | 
 24 | rand = np.random.RandomState(100)
 25 | 
 26 | 
 27 | def data(seed=100, sigma=0.1, ndata=100, freq=3.):
 28 |     t = np.sort(rand.rand(ndata))
 29 |     y = np.cos(2 * np.pi * freq * t)
 30 | 
 31 |     y += sigma * rand.randn(len(t))
 32 | 
 33 |     err = sigma * np.ones_like(y)
 34 | 
 35 |     return t, y, err
 36 | 
 37 | 
 38 | def assert_similar(pdg0, pdg, top=5):
 39 |     inds = (np.argsort(pdg0)[::-1])[:top]
 40 | 
 41 |     p0 = np.asarray(pdg0)[inds]
 42 |     p = np.asarray(pdg)[inds]
 43 |     diff = np.absolute(p - p0)
 44 | 
 45 |     res = sorted(zip(p0, p, diff), key=lambda x: -x[2])
 46 | 
 47 |     for p0v, pv, dv in res:
 48 |         if dv > 1e-3:
 49 |             print(p0v, pv, dv)
 50 | 
 51 |     assert_allclose(p, p0, atol=lsatol, rtol=lsrtol)
 52 |     assert(all(diff < lsrtol * 0.5 * (p + p0) + lsatol))
 53 | 
 54 | 
 55 | class TestLombScargle(object):
 56 |     def test_against_astropy_double(self):
 57 |         t, y, err = data()
 58 |         ls_proc = LombScargleAsyncProcess(use_double=True,
 59 |                                           sigma=nfft_sigma)
 60 | 
 61 |         results = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
 62 |                               use_fft=True,
 63 |                               samples_per_peak=spp)
 64 |         ls_proc.finish()
 65 | 
 66 |         fgpu, pgpu = results[0]
 67 | 
 68 |         power = LombScargle(t, y, err).power(fgpu)
 69 | 
 70 |         assert_similar(power, pgpu)
 71 | 
 72 |     def test_against_astropy_single(self):
 73 |         t, y, err = data()
 74 |         ls_proc = LombScargleAsyncProcess(use_double=False,
 75 |                                           sigma=nfft_sigma)
 76 | 
 77 |         results = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
 78 |                               samples_per_peak=spp)
 79 |         ls_proc.finish()
 80 |         fgpu, pgpu = results[0]
 81 | 
 82 |         power = LombScargle(t, y, err).power(fgpu)
 83 | 
 84 |         assert_similar(power, pgpu)
 85 | 
 86 |     def test_ls_kernel(self):
 87 |         t, y, err = data()
 88 |         ls_proc = LombScargleAsyncProcess(use_double=False,
 89 |                                           sigma=nfft_sigma)
 90 | 
 91 |         results = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
 92 |                               samples_per_peak=spp)
 93 |         ls_proc.finish()
 94 |         fgpu, pgpu = results[0]
 95 | 
 96 |         ls = LombScargle(t, y, err, fit_mean=True, center_data=False)
 97 |         power = ls.power(fgpu)
 98 | 
 99 |         assert_similar(power, pgpu)
100 | 
101 |     def test_ls_kernel_direct_sums(self):
102 |         t, y, err = data()
103 |         ls_proc = LombScargleAsyncProcess(use_double=True,
104 |                                           sigma=nfft_sigma)
105 | 
106 |         results = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
107 |                               samples_per_peak=spp, use_fft=False)
108 |         ls_proc.finish()
109 |         fgpu, pgpu = results[0]
110 | 
111 |         ls = LombScargle(t, y, err, fit_mean=True, center_data=True)
112 |         power = ls.power(fgpu)
113 | 
114 |         assert_similar(power, pgpu)
115 | 
116 |     def test_ls_kernel_direct_sums_is_consistent(self):
117 |         t, y, err = data()
118 |         ls_proc = LombScargleAsyncProcess(use_double=False,
119 |                                           sigma=nfft_sigma)
120 | 
121 |         results_ds = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
122 |                                  samples_per_peak=spp, use_fft=False)
123 |         ls_proc.finish()
124 | 
125 |         fgpu_ds, pgpu_ds = results_ds[0]
126 | 
127 |         results_reg = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
128 |                                   samples_per_peak=spp, use_cpu_nfft=True)
129 |         ls_proc.finish()
130 | 
131 |         fgpu_reg, pgpu_reg = results_reg[0]
132 | 
133 |         assert_similar(pgpu_reg, pgpu_ds)
134 | 
135 |     def test_ls_kernel_direct_sums_against_python(self):
136 | 
137 |         t, y, err = data()
138 |         ls_proc = LombScargleAsyncProcess(use_double=False, sigma=nfft_sigma)
139 | 
140 |         result_ds = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
141 |                                 samples_per_peak=spp, use_fft=False)
142 |         ls_proc.finish()
143 | 
144 |         fgpu_ds, pgpu_ds = result_ds[0]
145 | 
146 |         result_reg = ls_proc.run([(t, y, err)], nyquist_factor=nfac,
147 |                                  samples_per_peak=spp,
148 |                                  use_fft=False,
149 |                                  python_dir_sums=True)
150 |         ls_proc.finish()
151 |         fgpu_reg, pgpu_reg = result_reg[0]
152 | 
153 |         assert_similar(pgpu_reg, pgpu_ds)
154 | 
155 |     def test_multiple_datasets(self, ndatas=5):
156 |         datas = [data() for i in range(ndatas)]
157 |         ls_proc = LombScargleAsyncProcess(sigma=nfft_sigma)
158 | 
159 |         mult_results = ls_proc.run(datas, nyquist_factor=nfac,
160 |                                    samples_per_peak=spp)
161 |         ls_proc.finish()
162 | 
163 |         sing_results = []
164 | 
165 |         for d in datas:
166 |             sing_results.extend(ls_proc.run([d], nyquist_factor=nfac,
167 |                                 samples_per_peak=spp))
168 |             ls_proc.finish()
169 | 
170 |         for rb, rnb in zip(mult_results, sing_results):
171 |             fb, pb = rb
172 |             fnb, pnb = rnb
173 | 
174 |             assert_allclose(pnb, pb, rtol=lsrtol, atol=lsatol)
175 |             assert_allclose(fnb, fb, rtol=lsrtol, atol=lsatol)
176 | 
177 |     def test_batched_run(self, ndatas=5, batch_size=5, sigma=nfft_sigma,
178 |                          samples_per_peak=spp, nyquist_factor=nfac,
179 |                          **kwargs):
180 | 
181 |         datas = [data(ndata=rand.randint(50, 100))
182 |                  for i in range(ndatas)]
183 |         ls_proc = LombScargleAsyncProcess(sigma=sigma, **kwargs)
184 | 
185 |         kw = dict(nyquist_factor=nyquist_factor,
186 |                   samples_per_peak=samples_per_peak)
187 | 
188 |         batched_results = ls_proc.batched_run(datas, **kw)
189 |         ls_proc.finish()
190 | 
191 |         non_batched_results = []
192 |         for d in datas:
193 |             r = ls_proc.run([d], nyquist_factor=nyquist_factor,
194 |                             samples_per_peak=samples_per_peak)
195 |             ls_proc.finish()
196 |             non_batched_results.extend(r)
197 | 
198 |         for rb, rnb in zip(batched_results, non_batched_results):
199 |             fb, pb = rb
200 |             fnb, pnb = rnb
201 | 
202 |             assert_allclose(pnb, pb, rtol=lsrtol, atol=lsatol)
203 |             assert_allclose(fnb, fb, rtol=lsrtol, atol=lsatol)
204 | 
205 |     def test_batched_run_const_nfreq(self, make_plot=False, ndatas=27,
206 |                                      batch_size=5, sigma=nfft_sigma,
207 |                                      samples_per_peak=spp,
208 |                                      nyquist_factor=nfac,
209 |                                      **kwargs):
210 | 
211 |         frequencies = 10 + rand.rand(ndatas) * 100.
212 |         datas = [data(ndata=rand.randint(50, 100),
213 |                       freq=freq)
214 |                  for i, freq in enumerate(frequencies)]
215 |         ls_proc = LombScargleAsyncProcess(sigma=sigma, **kwargs)
216 | 
217 |         kw = dict(samples_per_peak=spp,
218 |                   batch_size=batch_size)
219 |         kw.update(kwargs)
220 |         batched_results = ls_proc.batched_run_const_nfreq(datas, **kw)
221 |         ls_proc.finish()
222 | 
223 |         ls_procnb = LombScargleAsyncProcess(sigma=nfft_sigma,
224 |                                             use_double=False, **kwargs)
225 | 
226 |         non_batched_results = []
227 |         for d, (frq, p) in zip(datas, batched_results):
228 |             r = ls_procnb.run([d], freqs=frq, **kwargs)
229 |             ls_procnb.finish()
230 |             non_batched_results.extend(r)
231 | 
232 |         # for f0, (fb, pb), (fnb, pnb) in zip(frequencies, batched_results,
233 |         #                                    non_batched_results):
234 |         #    print f0, fb[np.argmax(pb)], fnb[np.argmax(pnb)]
235 | 
236 |         for f0, (fb, pb), (fnb, pnb) in zip(frequencies, batched_results,
237 |                                             non_batched_results):
238 | 
239 |             if make_plot:
240 |                 import matplotlib.pyplot as plt
241 |                 plt.plot(fnb, pnb, color='k', lw=3)
242 |                 plt.plot(fb, pb, color='r')
243 |                 plt.axvline(f0)
244 |                 plt.show()
245 | 
246 |             assert_allclose(pnb, pb, rtol=lsrtol, atol=lsatol)
247 |             assert_allclose(fnb, fb, rtol=lsrtol, atol=lsatol)
248 | 


--------------------------------------------------------------------------------
/cuvarbase/tests/test_nfft.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from builtins import zip
  6 | from builtins import range
  7 | from builtins import object
  8 | import pytest
  9 | import numpy as np
 10 | from numpy.testing import assert_allclose
 11 | from scipy import fftpack
 12 | 
 13 | from pycuda.tools import mark_cuda_test
 14 | from pycuda import gpuarray
 15 | 
 16 | import skcuda.fft as cufft
 17 | 
 18 | from nfft import nfft_adjoint as nfft_adjoint_cpu
 19 | from nfft.utils import nfft_matrix
 20 | from nfft.kernels import KERNELS
 21 | 
 22 | from ..cunfft import NFFTAsyncProcess
 23 | 
 24 | nfft_sigma = 5
 25 | nfft_m = 8
 26 | nfft_rtol = 5E-3
 27 | nfft_atol = 5E-3
 28 | spp = 1
 29 | 
 30 | 
 31 | def direct_sums(t, y, freqs):
 32 |     def sfunc(func):
 33 |         return [np.sum(y * func(2 * np.pi * t * f)) for f in freqs]
 34 |     return np.asarray(sfunc(np.cos)) + 1j * np.asarray(sfunc(np.sin))
 35 | 
 36 | 
 37 | def scale_time(t, samples_per_peak):
 38 |     return (t - min(t)) / (samples_per_peak * (max(t) - min(t))) - 0.5
 39 | 
 40 | 
 41 | def data(seed=100, sigma=0.1, ndata=100, samples_per_peak=spp):
 42 | 
 43 |     rand = np.random.RandomState(seed)
 44 | 
 45 |     t = np.sort(rand.rand(ndata))
 46 |     y = np.cos(2 * np.pi * (3./(max(t) - min(t))) * t)
 47 | 
 48 |     tscl = scale_time(t, samples_per_peak=samples_per_peak)
 49 | 
 50 |     y += sigma * rand.randn(len(t))
 51 | 
 52 |     err = sigma * np.ones_like(y)
 53 | 
 54 |     return t, tscl, y, err
 55 | 
 56 | 
 57 | def get_b(sigma, m):
 58 |     return (2. * sigma * m) / ((2 * sigma - 1) * np.pi)
 59 | 
 60 | 
 61 | def precomp_psi(t, b, n, m):
 62 |     xg = m + n * t - np.floor(n * t)
 63 | 
 64 |     q1 = np.exp(-xg ** 2 / b) / np.sqrt(np.pi * b)
 65 |     q2 = np.exp(2 * xg / b)
 66 |     q3 = np.exp(-np.arange(2 * m + 1) ** 2 / b)
 67 | 
 68 |     return q1, q2, q3
 69 | 
 70 | 
 71 | def gpu_grid_scalar(t, y, sigma, m, N):
 72 |     b = get_b(sigma, m)
 73 | 
 74 |     n = int(sigma * N)
 75 | 
 76 |     q1, q2, q3 = precomp_psi(t, b, n, m)
 77 | 
 78 |     u = (np.floor(n * (t + 0.5) - m)).astype(np.int)
 79 | 
 80 |     grid = np.zeros(n)
 81 | 
 82 |     inds = np.arange(2 * m + 1)
 83 |     for i, (U, Y) in enumerate(zip(u, y)):
 84 |         q2vals = np.array([pow(q2[i], j) for j in inds])
 85 |         grid[(U + inds) % len(grid)] += Y * q1[i] * q2vals * q3
 86 | 
 87 |     return grid
 88 | 
 89 | 
 90 | def simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, use_double=False,
 91 |                     m=nfft_m, samples_per_peak=spp, **kwargs):
 92 |     proc = NFFTAsyncProcess(sigma=sigma, m=m, autoset_m=False,
 93 |                             use_double=use_double)
 94 | 
 95 |     for stream in proc.streams:
 96 |         stream.synchronize()
 97 | 
 98 |     nfft_kwargs = dict(samples_per_peak=samples_per_peak)
 99 |     nfft_kwargs.update(kwargs)
100 |     results = proc.run([(t, y, nf)], **nfft_kwargs)
101 | 
102 |     proc.finish()
103 |     return results[0]
104 | 
105 | 
106 | def get_cpu_grid(t, y, nf, sigma=nfft_sigma, m=nfft_m):
107 |     kernel = KERNELS.get('gaussian', 'gaussian')
108 |     mat = nfft_matrix(t, int(nf * sigma), m, sigma, kernel, truncated=True)
109 |     return mat.T.dot(y)
110 | 
111 | 
112 | #@mark_cuda_test
113 | class TestNFFT(object):
114 | 
115 |     def test_fast_gridding_with_jvdp_nfft(self):
116 |         t, tsc, y, err = data()
117 | 
118 |         nf = int(nfft_sigma * len(t))
119 |         gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m,
120 |                                    just_return_gridded_data=True,
121 |                                    fast_grid=True,
122 |                                    minimum_frequency=-int(nf/2),
123 |                                    samples_per_peak=spp)
124 | 
125 |         # get CPU grid
126 |         cpu_grid = get_cpu_grid(tsc, y, nf, sigma=nfft_sigma, m=nfft_m)
127 | 
128 |         assert_allclose(gpu_grid, cpu_grid, atol=1E-4, rtol=0)
129 | 
130 |     def test_fast_gridding_against_scalar_version(self):
131 |         t, tsc, y, err = data()
132 | 
133 |         nf = int(nfft_sigma * len(t))
134 |         gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m,
135 |                                    just_return_gridded_data=True,
136 |                                    fast_grid=True,
137 |                                    minimum_frequency=-int(nf/2),
138 |                                    samples_per_peak=spp)
139 | 
140 |         # get python version of gpu grid calculation
141 |         cpu_grid = gpu_grid_scalar(tsc, y, nfft_sigma, nfft_m, nf)
142 | 
143 |         tols = dict(rtol=nfft_rtol, atol=nfft_atol)
144 |         assert_allclose(gpu_grid, cpu_grid, **tols)
145 | 
146 |     def test_slow_gridding_against_scalar_fast_gridding(self):
147 |         t, tsc, y, err = data()
148 | 
149 |         nf = int(nfft_sigma * len(t))
150 |         gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m,
151 |                                    just_return_gridded_data=True,
152 |                                    fast_grid=False,
153 |                                    minimum_frequency=-int(nf/2),
154 |                                    samples_per_peak=spp)
155 | 
156 |         # get python version of gpu grid calculation
157 |         cpu_grid = gpu_grid_scalar(tsc, y, nfft_sigma, nfft_m, nf)
158 | 
159 |         tols = dict(rtol=nfft_rtol, atol=nfft_atol)
160 |         assert_allclose(gpu_grid, cpu_grid, **tols)
161 | 
162 |     def test_slow_gridding_against_jvdp_nfft(self):
163 |         t, tsc, y, err = data()
164 | 
165 |         nf = int(nfft_sigma * len(t))
166 |         gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m,
167 |                                    just_return_gridded_data=True,
168 |                                    fast_grid=False,
169 |                                    minimum_frequency=-int(nf/2),
170 |                                    samples_per_peak=spp)
171 | 
172 |         # get CPU grid
173 |         cpu_grid = get_cpu_grid(tsc, y, nf, sigma=nfft_sigma, m=nfft_m)
174 | 
175 |         diffs = np.absolute(gpu_grid - cpu_grid)
176 |         inds = (np.argsort(diffs)[::-1])[:10]
177 | 
178 |         for i, gpug, cpug, d in zip(inds, gpu_grid[inds],
179 |                                     cpu_grid[inds],
180 |                                     diffs[inds]):
181 |             print(i, gpug, cpug, d)
182 | 
183 |         tols = dict(rtol=nfft_rtol, atol=nfft_atol)
184 |         assert_allclose(gpu_grid, cpu_grid, **tols)
185 | 
186 |     def test_ffts(self):
187 |         t, tsc, y, err = data()
188 | 
189 |         yhat = np.empty(len(y))
190 | 
191 |         yg = gpuarray.to_gpu(y.astype(np.complex128))
192 |         yghat = gpuarray.to_gpu(yhat.astype(np.complex128))
193 | 
194 |         plan = cufft.Plan(len(y), np.complex128, np.complex128)
195 |         cufft.ifft(yg, yghat, plan)
196 | 
197 |         yhat = fftpack.ifft(y) * len(y)
198 | 
199 |         tols = dict(rtol=nfft_rtol, atol=nfft_atol)
200 |         assert_allclose(yhat, yghat.get(), **tols)
201 | 
202 |     def nfft_against_direct_sums(self, samples_per_peak=spp,
203 |                                  f0=None, scaled=True):
204 |         t, tsc, y, err = data(samples_per_peak=samples_per_peak)
205 | 
206 |         nf = int(nfft_sigma * len(t))
207 | 
208 |         df = 1./(samples_per_peak * (max(t) - min(t)))
209 |         if f0 is None:
210 |             f0 = -0.5 * nf * df
211 |         k0 = int(f0 / df)
212 | 
213 |         f0 = k0 if scaled else k0 * df
214 |         tg = tsc if scaled else t
215 |         sppg = samples_per_peak
216 | 
217 |         gpu_nfft = simple_gpu_nfft(tg, y, nf, sigma=nfft_sigma, m=nfft_m,
218 |                                    minimum_frequency=f0,
219 |                                    samples_per_peak=sppg)
220 | 
221 |         freqs = (float(k0) + np.arange(nf))
222 |         if not scaled:
223 |             freqs *= df
224 |         direct_dft = direct_sums(tg, y, freqs)
225 | 
226 |         tols = dict(rtol=nfft_rtol, atol=nfft_atol)
227 | 
228 |         def dsort(arr0, arr):
229 |             d = np.absolute(arr0 - arr)
230 |             return np.argsort(-d)
231 | 
232 |         inds = dsort(np.real(direct_dft), np.real(gpu_nfft))
233 | 
234 |         npr = 5
235 |         q = list(zip(inds[:npr], direct_dft[inds[:npr]], gpu_nfft[inds[:npr]]))
236 |         for i, dft, gnfft in q:
237 |             print(i, dft, gnfft)
238 |         assert_allclose(np.real(direct_dft), np.real(gpu_nfft), **tols)
239 |         assert_allclose(np.imag(direct_dft), np.imag(gpu_nfft), **tols)
240 | 
241 |     def test_nfft_against_existing_impl_scaled_centered_spp1(self):
242 |         self.nfft_against_direct_sums(samples_per_peak=1, scaled=True, f0=None)
243 | 
244 |     def test_nfft_against_existing_impl_scaled_centered_spp5(self):
245 |         self.nfft_against_direct_sums(samples_per_peak=5, scaled=True, f0=None)
246 | 
247 |     def test_nfft_against_existing_impl_scaled_uncentered_spp1(self):
248 |         self.nfft_against_direct_sums(samples_per_peak=1, scaled=True, f0=10.)
249 | 
250 |     def test_nfft_against_existing_impl_unscaled_centered_spp1(self):
251 |         self.nfft_against_direct_sums(samples_per_peak=1, scaled=False,
252 |                                       f0=None)
253 | 
254 |     def test_nfft_against_existing_impl_unscaled_uncentered_spp5(self):
255 |         self.nfft_against_direct_sums(samples_per_peak=5, scaled=False, f0=0.)
256 | 
257 |     def test_nfft_adjoint_async(self, f0=0., ndata=10,
258 |                                 batch_size=3, use_double=False):
259 |         datas = []
260 |         for i in range(ndata):
261 |             t, tsc, y, err = data()
262 |             nf = int(nfft_sigma * len(t))
263 | 
264 |             datas.append((t, y, nf))
265 | 
266 |         kwargs = dict(minimum_frequency=f0, samples_per_peak=spp)
267 | 
268 |         proc = NFFTAsyncProcess(sigma=nfft_sigma, m=nfft_m, autoset_m=False,
269 |                                 use_double=use_double)
270 | 
271 |         single_nffts = []
272 |         for t, y, nf in datas:
273 |             nfft = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m,
274 |                                    use_double=use_double, **kwargs)
275 |             single_nffts.append(nfft)
276 | 
277 |         multi_nffts = proc.run(datas, **kwargs)
278 | 
279 |         batch_nffts = proc.batched_run(datas, batch_size=batch_size, **kwargs)
280 |         proc.finish()
281 | 
282 |         tols = dict(rtol=nfft_rtol, atol=nfft_atol)
283 |         for ghat_m, ghat_s, ghat_b in zip(multi_nffts, single_nffts,
284 |                                           batch_nffts):
285 |             assert_allclose(ghat_s.real, ghat_m.real, **tols)
286 |             assert_allclose(ghat_s.imag, ghat_m.imag, **tols)
287 | 
288 |             assert_allclose(ghat_s.real, ghat_b.real, **tols)
289 |             assert_allclose(ghat_s.imag, ghat_b.imag, **tols)
290 | 


--------------------------------------------------------------------------------
/cuvarbase/tests/test_pdm.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | from numpy.testing import assert_allclose
 7 | import pytest
 8 | from ..utils import weights
 9 | from ..pdm import pdm2_cpu, binless_pdm_cpu, PDMAsyncProcess
10 | from pycuda.tools import mark_cuda_test
11 | 
12 | pytest.nbins = 10
13 | pytest.seed = 100
14 | pytest.nfreqs = 100
15 | pytest.ndata = 10
16 | pytest.sigma = 0.1
17 | 
18 | @pytest.fixture(scope="function")
19 | def pow_cpu(request):
20 |     rand = np.random.RandomState(pytest.seed)
21 | 
22 |     t = np.sort(rand.rand(pytest.ndata))
23 |     y = np.cos(2 * np.pi * (10./(max(t) - min(t))) * t)
24 | 
25 |     y += pytest.sigma * rand.randn(len(t))
26 | 
27 |     err = pytest.sigma * np.ones_like(y)
28 | 
29 |     w = weights(err)
30 |     freqs = np.linspace(0, 100./(max(t) - min(t)), pytest.nfreqs)
31 |     freqs += 0.5 * (freqs[1] - freqs[0])
32 | 
33 |     pow_cpu = pdm2_cpu(t, y, w, freqs,
34 |                        linterp=(request.param == 'binned_linterp'),
35 |                        nbins=pytest.nbins)
36 | 
37 |     return pow_cpu
38 | 
39 | @pytest.fixture(scope="function")
40 | def binless_pow_cpu(request):
41 |     rand = np.random.RandomState(pytest.seed)
42 | 
43 |     t = np.sort(rand.rand(pytest.ndata))
44 |     y = np.cos(2 * np.pi * (10./(max(t) - min(t))) * t)
45 | 
46 |     y += pytest.sigma * rand.randn(len(t))
47 | 
48 |     err = pytest.sigma * np.ones_like(y)
49 | 
50 |     w = weights(err)
51 |     freqs = np.linspace(0, 100./(max(t) - min(t)), pytest.nfreqs)
52 |     freqs += 0.5 * (freqs[1] - freqs[0])
53 | 
54 |     pow_cpu = binless_pdm_cpu(t, y, w, freqs, tophat=(request.param == 'binless_tophat'))
55 | 
56 |     return pow_cpu
57 | 
58 | @pytest.fixture(scope="function")
59 | def pow_gpu(request):
60 |     rand = np.random.RandomState(pytest.seed)
61 | 
62 |     t = np.sort(rand.rand(pytest.ndata))
63 |     y = np.cos(2 * np.pi * (10./(max(t) - min(t))) * t)
64 | 
65 |     y += pytest.sigma * rand.randn(len(t))
66 | 
67 |     err = pytest.sigma * np.ones_like(y)
68 | 
69 |     w = weights(err)
70 |     freqs = np.linspace(0, 100./(max(t) - min(t)), pytest.nfreqs)
71 |     freqs += 0.5 * (freqs[1] - freqs[0])
72 | 
73 |     pdm_proc = PDMAsyncProcess()
74 |     results = pdm_proc.run([(t, y, w, freqs)], kind=request.param, nbins=pytest.nbins)
75 |     pdm_proc.finish()
76 | 
77 |     return results[0]
78 | 
79 | @pytest.mark.parametrize(["pow_cpu","pow_gpu"], [("binned_linterp","binned_linterp")], indirect=True)
80 | def test_cuda_pdm_binned_linterp(pow_cpu,pow_gpu):
81 |     assert_allclose(pow_cpu, pow_gpu, atol=1E-2, rtol=0)
82 | 
83 | @pytest.mark.parametrize(["pow_cpu","pow_gpu"], [("binned_step","binned_step")], indirect=True)
84 | def test_cuda_pdm_binned_step(pow_cpu,pow_gpu):
85 |     assert_allclose(pow_cpu, pow_gpu, atol=1E-2, rtol=0)
86 | 
87 | 
88 | @pytest.mark.parametrize(["binless_pow_cpu","pow_gpu"], [("binless_gauss","binless_gauss")], indirect=True)
89 | def test_cuda_pdm_binless_gauss(binless_pow_cpu,pow_gpu):
90 |     assert_allclose(binless_pow_cpu, pow_gpu, atol=1E-2, rtol=0)
91 | 
92 | 
93 | @pytest.mark.parametrize(["binless_pow_cpu","pow_gpu"], [("binless_tophat","binless_tophat")], indirect=True)
94 | def test_cuda_pdm_binless_tophat(binless_pow_cpu,pow_gpu):
95 |     assert_allclose(binless_pow_cpu, pow_gpu, atol=1E-2, rtol=0)
96 | 


--------------------------------------------------------------------------------
/cuvarbase/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import pkg_resources
  7 | 
  8 | 
  9 | def weights(err):
 10 |     """ generate observation weights from uncertainties """
 11 |     w = np.power(err, -2)
 12 |     return w/sum(w)
 13 | 
 14 | 
 15 | def find_kernel(name):
 16 |     return pkg_resources.resource_filename('cuvarbase',
 17 |                                            'kernels/%s.cu' % (name))
 18 | 
 19 | 
 20 | def _module_reader(fname, cpp_defs=None):
 21 |     txt = open(fname, 'r').read()
 22 | 
 23 |     if cpp_defs is None:
 24 |         return txt
 25 | 
 26 |     preamble = ['#define {key} {value}'.format(key=key,
 27 |                                                value=('' if value is None
 28 |                                                       else value))
 29 |                 for key, value in cpp_defs.items()]
 30 |     txt = txt.replace('//{CPP_DEFS}', '\n'.join(preamble))
 31 | 
 32 |     return txt
 33 | 
 34 | 
 35 | def tophat_window(t, t0, d):
 36 |     w_window = np.zeros_like(t)
 37 |     w_window[np.absolute(t - t0) < d] += 1.
 38 |     return w_window / max(w_window)
 39 | 
 40 | 
 41 | def gaussian_window(t, t0, d):
 42 |     w_window = np.exp(-0.5 * np.power(t - t0, 2) / (d * d))
 43 |     return w_window / (1. if len(w_window) == 0 else max(w_window))
 44 | 
 45 | 
 46 | def autofrequency(t, nyquist_factor=5, samples_per_peak=5,
 47 |                   minimum_frequency=None,
 48 |                   maximum_frequency=None, **kwargs):
 49 |     """
 50 |     Determine a suitable frequency grid for data.
 51 | 
 52 |     Note that this assumes the peak width is driven by the observational
 53 |     baseline, which is generally a good assumption when the baseline is
 54 |     much larger than the oscillation period.
 55 |     If you are searching for periods longer than the baseline of your
 56 |     observations, this may not perform well.
 57 | 
 58 |     Even with a large baseline, be aware that the maximum frequency
 59 |     returned is based on the concept of "average Nyquist frequency", which
 60 |     may not be useful for irregularly-sampled data. The maximum frequency
 61 |     can be adjusted via the nyquist_factor argument, or through the
 62 |     maximum_frequency argument.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     samples_per_peak : float (optional, default=5)
 67 |         The approximate number of desired samples across the typical peak
 68 |     nyquist_factor : float (optional, default=5)
 69 |         The multiple of the average nyquist frequency used to choose the
 70 |         maximum frequency if maximum_frequency is not provided.
 71 |     minimum_frequency : float (optional)
 72 |         If specified, then use this minimum frequency rather than one
 73 |         chosen based on the size of the baseline.
 74 |     maximum_frequency : float (optional)
 75 |         If specified, then use this maximum frequency rather than one
 76 |         chosen based on the average nyquist frequency.
 77 | 
 78 |     Returns
 79 |     -------
 80 |     frequency : ndarray or Quantity
 81 |         The heuristically-determined optimal frequency bin
 82 |     """
 83 |     baseline = max(t) - min(t)
 84 |     n_samples = len(t)
 85 | 
 86 |     df = 1. / (baseline * samples_per_peak)
 87 | 
 88 |     nf0 = 1
 89 |     if minimum_frequency is not None:
 90 |         nf0 = max([nf0, int(minimum_frequency / df)])
 91 | 
 92 |     if maximum_frequency is not None:
 93 |         Nf = int(maximum_frequency / df) - nf0
 94 |     else:
 95 |         Nf = int(0.5 * samples_per_peak * nyquist_factor * n_samples)
 96 | 
 97 |     return df * (nf0 + np.arange(Nf))
 98 | 
 99 | 
100 | def dphase(dt, freq):
101 |     dph = dt * freq - np.floor(dt * freq)
102 |     dph_final = dph if dph < 0.5 else 1 - dph
103 |     return dph_final
104 | 
105 | 
106 | def get_autofreqs(t, **kwargs):
107 |     autofreqs_kwargs = {var: value for var, value in kwargs.items()
108 |                         if var in ['minimum_frequency', 'maximum_frequency',
109 |                                    'nyquist_factor', 'samples_per_peak']}
110 |     return autofrequency(t, **autofreqs_kwargs)
111 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    = -E -a
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = cuvarbase
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | astropy
3 | astrobase
4 | numpy
5 | matplotlib


--------------------------------------------------------------------------------
/docs/source/bls.rst:
--------------------------------------------------------------------------------
  1 | Box least squares (BLS) periodogram
  2 | ***********************************
  3 | 
  4 | The box-least squares periodogram [BLS]_ searches for the periodic dips in brightness that occur when, e.g., a planet passes in front of its host star. The algorithm fits
  5 | a `boxcar function <https://en.wikipedia.org/wiki/Boxcar_function>`_ to the data. The parameters used are
  6 | 
  7 | - ``q``: the transit duration as a fraction of the period :math:`t_{\rm trans} / P`
  8 | - ``phi0``: the phase offset of the transit (from 0)
  9 | - ``delta``: the difference between the out-of-transit brightness and the brightness during transit 
 10 | - ``y0``: The out-of-transit brightness
 11 | 
 12 | 
 13 | .. plot:: plots/bls_transit_diagram.py
 14 | 
 15 | 
 16 | Using ``cuvarbase`` BLS
 17 | -----------------------
 18 | 
 19 | 
 20 | .. plot:: plots/bls_example.py
 21 | 	:include-source:
 22 | 
 23 | 
 24 | A shortcut: assuming orbital mechanics
 25 | --------------------------------------
 26 | 
 27 | If you assume :math:`R_p\ll R_{\star}`, :math:`M_p\ll M_{\star}`, :math:`L_p\ll L_{\star}`, and :math:`e\ll 1`,  where :math:`e` is the ellipticity of the planetary orbit, :math:`L` is the luminosity, :math:`R` is the radius, and :math:`M` mass, you can eliminate a free parameter.
 28 | 
 29 | This is because the orbital period obeys `Kepler's third law <https://en.wikipedia.org/wiki/Kepler's_laws_of_planetary_motion#Third_law>`_,
 30 | 
 31 | .. math::
 32 | 	P^2 \approx \frac{4\pi^2a^3}{G(M_p + M_{\star})}
 33 | 
 34 | .. plot:: plots/planet_transit_diagram.py
 35 | 
 36 | 
 37 | The angle of the transit is
 38 | 
 39 | .. math::
 40 | 
 41 | 	\theta = 2{\rm arcsin}\left(\frac{R_p + R_{\star}}{a}\right)
 42 | 
 43 | and :math:`q` is therefore :math:`\theta / (2\pi)`. Thus we have a relation between :math:`q` and the period :math:`P`
 44 | 
 45 | .. math::
 46 | 
 47 | 	\sin{\pi q} = (R_p + R_{\star})\left(\frac{4\pi^2}{P^2 G(M_p + M_{\star})}\right)^{1/3}
 48 | 
 49 | By incorporating the fact that
 50 | 
 51 | .. math::
 52 | 	
 53 | 	R_{\star} = \left(\frac{3}{4\pi\rho_{\star}}\right)^{1/3}M_{\star}^{1/3}
 54 | 
 55 | where :math:`\rho_{\star}` is the average stellar density of the host star, we can write
 56 | 
 57 | .. math::
 58 | 
 59 | 	\sin{\pi q} = \frac{(1 + r)}{(1 + m)^{1/3}} \left(\frac{3\pi}{G\rho_{\star}}\right)^{1/3} P^{-2/3}
 60 | 
 61 | where :math:`r = R_p / R_{\star}` and :math:`m = M_p / M_{\star}`. We can get rid of the constant factors and convert this to more intuitive units to obtain
 62 | 
 63 | .. math::
 64 | 
 65 | 	\sin{\pi q} \approx 0.238 (1 + r - \frac{m}{3} + \dots{}) \left(\frac{\rho_{\star}}{\rho_{\odot}}\right)^{-1/3} \left(\frac{P}{\rm day}\right)^{-2/3}
 66 | 
 67 | where here we've expanded :math:`(1 + r) / (1 + m)^{1/3}` to first order in :math:`r` and :math:`m`.
 68 | 
 69 | 
 70 | Using the Keplerian assumption in ``cuvarbase``
 71 | -----------------------------------------------
 72 | 
 73 | .. plot:: plots/bls_example_transit.py
 74 | 	:include-source:
 75 | 
 76 | 
 77 | Period spacing considerations
 78 | -----------------------------
 79 | 
 80 | The frequency spacing :math:`\delta f` needed to resolve a BLS signal with width :math:`q`, is
 81 | 
 82 | .. math::
 83 | 	\delta f \lesssim \frac{q}{T}
 84 | 
 85 | where :math:`T` is the baseline of the observations (:math:`T = {\rm max}(t) - {\rm min}(t)`). This can be especially problematic if no assumptions are made about the nature of the signal (e.g., a Keplerian assumption). If you want to resolve a transit signal with a few observations, the minimum :math:`q` value that you would need to search is :math:`\propto 1/N` where :math:`N` is the number of observations.
 86 | 
 87 | For a typical Lomb-Scargle periodogram, the frequency spacing is :math:`\delta f \lesssim 1/T`, so running a BLS spectrum with an adequate frequency spacing over the same frequency range requires a factor of :math:`\mathcal{O}(N)` more trial frequencies, each of which requiring :math:`\mathcal{O}(N)` computations to estimate the best fit BLS parameters. That means that BLS scales as :math:`\mathcal{O}(N^2N_f)` while Lomb-Scargle only scales as :math:`\mathcal{O}(N_f\log N_f)`
 88 | 
 89 | However, if you can use the assumption that the transit is caused by an edge-on transit of a circularly orbiting planet, we not only eliminate a degree of freedom, but (assuming :math:`\sin{\pi q}\approx \pi q`)
 90 | 
 91 | .. math::
 92 | 	
 93 | 	\delta f \propto q \propto f^{2/3}
 94 | 
 95 | The minimum frequency you could hope to measure a transit period would be :math:`f_{\rm min} \approx 2/T`, and the maximum frequency is determined by :math:`\sin{\pi q} < 1` which implies
 96 | 
 97 | .. math::
 98 | 
 99 | 	f_{max} = 8.612~{\rm c/day}~\times \left(1 - \frac{3r}{2} + \frac{m}{2} -\dots{}\right) \sqrt{\frac{\rho_{\star}}{\rho_{\odot}}}
100 | 
101 | 
102 | For a 10 year baseline, this translates to :math:`2.7\times 10^5` trial frequencies. The number of trial frequencies needed to perform Lomb-Scargle over this frequency range is only about :math:`3.1\times 10^4`, so 8-10 times less. However, if we were to search the *entire* range of possible :math:`q` values at each trial frequency instead of making a Keplerian assumption, we would instead require :math:`5.35\times 10^8` trial frequencies, so the Keplerian assumption reduces the number of frequencies by over 1,000.
103 | 
104 | 
105 | .. [BLS] `Kovacs et al. 2002 <http://adsabs.harvard.edu/abs/2002A%26A...391..369K>`_


--------------------------------------------------------------------------------
/docs/source/ce.rst:
--------------------------------------------------------------------------------
 1 | Conditional Entropy
 2 | ===================
 3 | 
 4 | The conditional entropy period finder [G2013]_ phase-folds the data at each trial frequencies and estimates
 5 | the conditional entropy :math:`H(m|\phi)` of the data. The idea is that the data with the least entropy (intuitively: the greatest "structure" or "non-randomness"), should correspond to the correct frequency of a stationary signal.
 6 | 
 7 | Here,
 8 | 
 9 | .. math::
10 | 	H(m|\phi) = H(m, \phi) - H(\phi) = \sum_{m,\phi}p(m, \phi)\log\left(\frac{p(\phi)}{p(m, \phi)}\right)
11 | 
12 | 
13 | where :math:`p(m, \phi)` is the density of points that fall within the bin located at phase :math:`\phi` and magnitude :math:`m` and :math:`p(\phi) = \sum_m p(m, \phi)` is the density of points that fall within the phi range.
14 | 
15 | .. plot:: plots/ce_example.py
16 | 
17 | 
18 | An example with ``cuvarbase``
19 | -----------------------------
20 | 
21 | .. code-block:: python
22 | 	
23 | 	import cuvarbase.ce as ce
24 | 	import numpy as np
25 | 
26 | 	# make some fake data
27 | 	t = np.sort(np.random.rand(100))
28 | 	y = np.cos(2 * np.pi * 10 * t)
29 | 	y += np.random.randn(len(t))
30 | 	dy = np.ones_like(t)
31 | 
32 | 	# start a conditional entropy process
33 | 	proc = ConditionalEntropyAsyncProcess(phase_bins=10, mag_bins=5)
34 | 
35 | 	# format your data as a list of lightcurves (t, y, dy)
36 | 	data = [(t, y, dy)]
37 | 
38 | 	# run the CE process with your data
39 | 	results = proc.run(data)
40 | 
41 | 	# finish the process (probably not necessary but ensures 
42 | 	# all data has been transferred)
43 | 	proc.finish()
44 | 
45 | 	# Results is a list of [(freqs, CE), ...] for each lightcurve
46 | 	# in ``data``.
47 | 	freqs, ce_spectrum = results[0]
48 | 
49 | 
50 | If you want to run CE on large datasets, you can do
51 | 
52 | .. code-block:: python
53 | 	
54 | 	proc.large_run(data, max_memory=1e9)
55 | 
56 | instead of ``run``, which will ensure that the memory limit (1 GB in this case) is not exceeded on the GPU (unless of course you have other processes running). 
57 | 
58 | 
59 | .. [G2013] `Graham et al. 2013 <http://adsabs.harvard.edu/cgi-bin/bib_query?arXiv:1306.6664>`_
60 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # cuvarbase documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Sep 22 21:34:29 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | import ctypes
 22 | import io
 23 | import re
 24 | 
 25 | cuda_dir = "/Developer/NVIDIA/CUDA-8.0/lib/"
 26 | sys.path.insert(0, os.path.abspath('../..'))
 27 | sys.path.insert(0, cuda_dir)
 28 | 
 29 | # Set DYLD and LD library paths
 30 | dyld_lpath = os.environ.get('DYLD_LIBRARY_PATH', '')
 31 | ld_lpath = os.environ.get('LD_LIBRARY_PATH', '')
 32 | 
 33 | 
 34 | def lpath_insert(p, lpath):
 35 |     return '%s:%s' % (p, lpath)
 36 | 
 37 | dyld_lpath = lpath_insert(cuda_dir, dyld_lpath)
 38 | ld_lpath = lpath_insert(cuda_dir, ld_lpath)
 39 | 
 40 | 
 41 | os.environ['DYLD_LIBRARY_PATH'] = dyld_lpath
 42 | os.environ['LD_LIBRARY_PATH'] = ld_lpath
 43 | 
 44 | 
 45 | def read(path, encoding='utf-8'):
 46 |     path = os.path.join(os.path.dirname(__file__), path)
 47 |     with io.open(path, encoding=encoding) as fp:
 48 |         return fp.read()
 49 | 
 50 | 
 51 | def version(path):
 52 |     """Obtain the packge version from a python file e.g. pkg/__init__.py
 53 | 
 54 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
 55 |     """
 56 |     version_file = read(path)
 57 |     version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""",
 58 |                               version_file, re.M)
 59 |     if version_match:
 60 |         return version_match.group(1)
 61 |     raise RuntimeError("Unable to find version string.")
 62 | 
 63 | 
 64 | VERSION = version('../../cuvarbase/__init__.py')
 65 | 
 66 | 
 67 | # -- General configuration ------------------------------------------------
 68 | 
 69 | # If your documentation needs a minimal Sphinx version, state it here.
 70 | #
 71 | # needs_sphinx = '1.0'
 72 | 
 73 | # Add any Sphinx extension module names here, as strings. They can be
 74 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 75 | # ones.
 76 | extensions = ['sphinx.ext.autodoc',
 77 |               'sphinx.ext.doctest',
 78 |               'sphinx.ext.todo',
 79 |               'sphinx.ext.coverage',
 80 |               'sphinx.ext.mathjax',
 81 |               'sphinx.ext.ifconfig',
 82 |               'sphinx.ext.viewcode',
 83 |               'sphinx.ext.githubpages',
 84 |               'sphinx.ext.napoleon',
 85 |               'matplotlib.sphinxext.only_directives',
 86 |               'matplotlib.sphinxext.plot_directive']
 87 | 
 88 | # Add any paths that contain templates here, relative to this directory.
 89 | templates_path = ['.templates']
 90 | 
 91 | # The suffix(es) of source filenames.
 92 | # You can specify multiple suffix as a list of string:
 93 | #
 94 | # source_suffix = ['.rst', '.md']
 95 | source_suffix = '.rst'
 96 | 
 97 | # The master toctree document.
 98 | master_doc = 'index'
 99 | 
100 | # General information about the project.
101 | project = u'cuvarbase'
102 | copyright = u'2017, John Hoffman'
103 | author = u'John Hoffman'
104 | 
105 | # The version info for the project you're documenting, acts as replacement for
106 | # |version| and |release|, also used in various other places throughout the
107 | # built documents.
108 | #
109 | # The short X.Y version.
110 | version = VERSION
111 | # The full version, including alpha/beta/rc tags.
112 | release = VERSION
113 | 
114 | # The language for content autogenerated by Sphinx. Refer to documentation
115 | # for a list of supported languages.
116 | #
117 | # This is also used if you do content translation via gettext catalogs.
118 | # Usually you set "language" from the command line for these cases.
119 | language = None
120 | 
121 | # List of patterns, relative to source directory, that match files and
122 | # directories to ignore when looking for source files.
123 | # This patterns also effect to html_static_path and html_extra_path
124 | exclude_patterns = []
125 | 
126 | # The name of the Pygments (syntax highlighting) style to use.
127 | pygments_style = 'sphinx'
128 | 
129 | # If true, `todo` and `todoList` produce output, else they produce nothing.
130 | todo_include_todos = True
131 | 
132 | 
133 | # -- Options for HTML output ----------------------------------------------
134 | 
135 | # The theme to use for HTML and HTML Help pages.  See the documentation for
136 | # a list of builtin themes.
137 | #
138 | html_theme = 'alabaster'
139 | 
140 | html_logo = './logo.png'
141 | # Theme options are theme-specific and customize the look and feel of a theme
142 | # further.  For a list of options available for each theme, see the
143 | # documentation.
144 | #
145 | # html_theme_options = {}
146 | 
147 | # Add any paths that contain custom static files (such as style sheets) here,
148 | # relative to this directory. They are copied after the builtin static files,
149 | # so a file named "default.css" will overwrite the builtin "default.css".
150 | html_static_path = ['.static']
151 | 
152 | # Custom sidebar templates, must be a dictionary that maps document names
153 | # to template names.
154 | #
155 | # This is required for the alabaster theme
156 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
157 | html_sidebars = {
158 |     '**': [
159 |         'about.html',
160 |         'navigation.html',
161 |         'relations.html',  # needs 'show_related': True theme option to display
162 |         'searchbox.html',
163 |         'donate.html',
164 |     ]
165 | }
166 | 
167 | 
168 | # -- Options for HTMLHelp output ------------------------------------------
169 | 
170 | # Output file base name for HTML help builder.
171 | htmlhelp_basename = 'cuvarbasedoc'
172 | 
173 | 
174 | # -- Options for LaTeX output ---------------------------------------------
175 | 
176 | latex_elements = {
177 |     # The paper size ('letterpaper' or 'a4paper').
178 |     #
179 |     # 'papersize': 'letterpaper',
180 | 
181 |     # The font size ('10pt', '11pt' or '12pt').
182 |     #
183 |     # 'pointsize': '10pt',
184 | 
185 |     # Additional stuff for the LaTeX preamble.
186 |     #
187 |     # 'preamble': '',
188 | 
189 |     # Latex figure (float) alignment
190 |     #
191 |     # 'figure_align': 'htbp',
192 | }
193 | 
194 | # Grouping the document tree into LaTeX files. List of tuples
195 | # (source start file, target name, title,
196 | #  author, documentclass [howto, manual, or own class]).
197 | latex_documents = [
198 |     (master_doc, 'cuvarbase.tex', u'cuvarbase Documentation',
199 |      u'John Hoffman', 'manual'),
200 | ]
201 | 
202 | 
203 | # -- Options for manual page output ---------------------------------------
204 | 
205 | # One entry per manual page. List of tuples
206 | # (source start file, name, description, authors, manual section).
207 | man_pages = [
208 |     (master_doc, 'cuvarbase', u'cuvarbase Documentation',
209 |      [author], 1)
210 | ]
211 | 
212 | 
213 | # -- Options for Texinfo output -------------------------------------------
214 | 
215 | # Grouping the document tree into Texinfo files. List of tuples
216 | # (source start file, target name, title, author,
217 | #  dir menu entry, description, category)
218 | texinfo_documents = [
219 |     (master_doc, 'cuvarbase', u'cuvarbase Documentation',
220 |      author, 'cuvarbase', 'One line description of project.',
221 |      'Miscellaneous'),
222 | ]
223 | 


--------------------------------------------------------------------------------
/docs/source/cuvarbase.rst:
--------------------------------------------------------------------------------
 1 | cuvarbase package
 2 | =================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     cuvarbase.tests
10 | 
11 | Submodules
12 | ----------
13 | 
14 | cuvarbase\.bls module
15 | ---------------------
16 | 
17 | .. automodule:: cuvarbase.bls
18 |     :members:
19 |     :undoc-members:
20 |     :show-inheritance:
21 | 
22 | cuvarbase\.ce module
23 | --------------------
24 | 
25 | .. automodule:: cuvarbase.ce
26 |     :members:
27 |     :undoc-members:
28 |     :show-inheritance:
29 | 
30 | cuvarbase\.core module
31 | ----------------------
32 | 
33 | .. automodule:: cuvarbase.core
34 |     :members:
35 |     :undoc-members:
36 |     :show-inheritance:
37 | 
38 | cuvarbase\.cunfft module
39 | ------------------------
40 | 
41 | .. automodule:: cuvarbase.cunfft
42 |     :members:
43 |     :undoc-members:
44 |     :show-inheritance:
45 | 
46 | cuvarbase\.lombscargle module
47 | -----------------------------
48 | 
49 | .. automodule:: cuvarbase.lombscargle
50 |     :members:
51 |     :undoc-members:
52 |     :show-inheritance:
53 | 
54 | cuvarbase\.pdm module
55 | ---------------------
56 | 
57 | .. automodule:: cuvarbase.pdm
58 |     :members:
59 |     :undoc-members:
60 |     :show-inheritance:
61 | 
62 | 
63 | cuvarbase\.utils module
64 | -----------------------
65 | 
66 | .. automodule:: cuvarbase.utils
67 |     :members:
68 |     :undoc-members:
69 |     :show-inheritance:
70 | 
71 | 
72 | Module contents
73 | ---------------
74 | 
75 | .. automodule:: cuvarbase
76 |     :members:
77 |     :undoc-members:
78 |     :show-inheritance:
79 | 


--------------------------------------------------------------------------------
/docs/source/cuvarbase.tests.rst:
--------------------------------------------------------------------------------
 1 | cuvarbase\.tests package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | cuvarbase\.tests\.test\_bls module
 8 | ----------------------------------
 9 | 
10 | .. automodule:: cuvarbase.tests.test_bls
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | cuvarbase\.tests\.test\_ce module
16 | ---------------------------------
17 | 
18 | .. automodule:: cuvarbase.tests.test_ce
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | cuvarbase\.tests\.test\_lombscargle module
24 | ------------------------------------------
25 | 
26 | .. automodule:: cuvarbase.tests.test_lombscargle
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | cuvarbase\.tests\.test\_nfft module
32 | -----------------------------------
33 | 
34 | .. automodule:: cuvarbase.tests.test_nfft
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | cuvarbase\.tests\.test\_pdm module
40 | ----------------------------------
41 | 
42 | .. automodule:: cuvarbase.tests.test_pdm
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | 
48 | Module contents
49 | ---------------
50 | 
51 | .. automodule:: cuvarbase.tests
52 |     :members:
53 |     :undoc-members:
54 |     :show-inheritance:
55 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. cuvarbase documentation master file, created by
 2 |    sphinx-quickstart on Fri Sep 22 21:34:29 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | 
 7 | 
 8 | .. include:: ../../README.rst
 9 | 
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 |    :caption: Contents:
14 | 
15 |    whatsnew
16 |    install
17 |    ce
18 |    lomb
19 |    bls
20 |    modules
21 | 
22 | Indices and tables
23 | ==================
24 | 
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../INSTALL.rst


--------------------------------------------------------------------------------
/docs/source/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnh2o2/cuvarbase/0d97ae11bea01fdfb71cfbe15059979ebfe37373/docs/source/logo.png


--------------------------------------------------------------------------------
/docs/source/lomb.rst:
--------------------------------------------------------------------------------
  1 | Lomb-Scargle periodogram
  2 | ************************
  3 | 
  4 | The Lomb-Scargle periodogram ([Barning1963]_, [Vanicek1969]_, [Scargle1982]_, [Lomb1976]_) is one of the best known and most popular period finding algorithms used in astrononomy. If you would like to learn more about least-squares methods for periodic signals, see the review article by [VanderPlas2017]_.
  5 | 
  6 | The LS periodogram is a least-squares estimator for the following model
  7 | 
  8 | .. math:: 
  9 | 	
 10 | 	\hat{y}(t|\omega, \theta) = \theta_1\cos{\omega t} + \theta_2\sin{\omega t}
 11 | 
 12 | and it is equivalent to the Discrete Fourier Transform in the regularly-sampled limit. For irregularly sampled data, LS is a maximum likelihood estimator for the parameters :math:`\theta` in the case where the noise is Gaussian. The periodogram has many normalizations in the literature, but ``cuvarbase`` adopts
 13 | 
 14 | .. math::
 15 | 
 16 | 	P(\omega) = \frac{\chi^2_0 - \chi^2(\omega)}{\chi^2_0}
 17 | 
 18 | where 
 19 | 
 20 | .. math::
 21 | 	
 22 | 	\chi^2(\omega) = \sum_i \left(\frac{y_i - \hat{y}(t_i|\omega, \theta)}{\sigma_i}\right)^2
 23 | 
 24 | is the goodness-of-fit statistic for the optimal parameters :math:`\theta` and
 25 | 
 26 | .. math::
 27 | 	
 28 | 	\chi^2_0 = \sum_i \left(\frac{y_i - \bar{y}}{\sigma_i}\right)^2
 29 | 
 30 | is the goodness-of-fit statistic for a constant fit, and :math:`\bar{y}` is the weighted mean, 
 31 | 
 32 | 
 33 | .. math::
 34 | 
 35 | 	\bar{y} = \sum_i w_i y_i
 36 | 
 37 | where :math:`w_i \propto 1/\sigma_i^2` and :math:`\sum_iw_i = 1`. 
 38 | 
 39 | The closed form of the periodogram is given by
 40 | 
 41 | .. math::
 42 | 
 43 | 	P(\omega) = \frac{1}{\chi^2_0}\left(\frac{YC_{\tau}^2}{CC_{\tau}} + \frac{YS_{\tau}^2}{SS_{\tau}}\right)
 44 | 
 45 | Where
 46 | 
 47 | .. math::
 48 | 	
 49 | 	YC_{\tau} &= \sum_i w_iy_i\cos{\omega (t_i - \tau)}\\
 50 | 
 51 | 	YS_{\tau} &= \sum_i w_iy_i\sin{\omega (t_i - \tau)}\\
 52 | 
 53 | 	CC_{\tau} &= \sum_i w_i\cos^2{\omega (t_i - \tau)}\\
 54 | 
 55 | 	SS_{\tau} &= \sum_i w_i\sin^2{\omega (t_i - \tau)}\\
 56 | 
 57 | 	\tan{2\omega\tau} &= \frac{\sum_i w_i \sin{2\omega t_i}}{\sum_i w_i \sin{2\omega t_i}}
 58 | 
 59 | For the original formulation of the Lomb-Scargle periodogram without the constant offset term. 
 60 | 
 61 | Adding a constant offset
 62 | ------------------------
 63 | 
 64 | Lomb-Scargle can be extended in many ways, most commonly to include a constant offset [ZK2009]_.
 65 | 
 66 | .. math::
 67 | 
 68 | 	\hat{y}^{\rm GLS}(t|\omega, \theta) = \theta_1\cos{\omega t} + \theta_2\sin{\omega t} + \theta_3
 69 | 
 70 | This protects against cases where the mean of the data does not correspond with the mean of the underlying
 71 | signal, as is usually the case with sparsely sampled data or for signals with large amplitudes that become
 72 | too bright or dim to be observed during part of the signal phase. 
 73 | 
 74 | With the constant offset term, the closed-form solution to :math:`P(\omega)` is the same, but the terms
 75 | are slightly different. Derivations of this are in [ZK2009]_.
 76 | 
 77 | Getting :math:`\mathcal{O}(N\log N)` performance
 78 | ------------------------------------------------
 79 | 
 80 | The secret to Lomb-Scargle's speed lies in the fact that computing it requires evaluating sums that, for regularly-spaced data, can be evaluated with the fast Fourier transform (FFT), which scales as :math:`\mathcal{O}(N_f\log N_f)` where :math:`N_f` is the number of frequencies. For *irregularly* spaced data, however, we can employ tricks to get to this scaling.
 81 | 
 82 | 1. We can "extirpolate" the data with Legendre polynomials to a regular grid and then perform the FFT [PressRybicki1989]_, or,
 83 | 2. We can use the non-equispaced fast Fourier transform (NFFT) [DuttRokhlin1993]_, which is tailor made for this exact problem.
 84 | 
 85 | The latter was shown by [Leroy2012]_ to give roughly an order-of-magnitude speed improvement over the [PressRybicki1989]_ method, with the added benefit that the NFFT is a rigorous extension of the FFT and has proven error bounds.
 86 | 
 87 | It's worth mentioning the [Townsend2010]_ CUDA implementation of Lomb-Scargle, however this uses the :math:`\mathcal{O}(N_{\rm obs}N_f)` "naive" implementation
 88 | of LS without any FFT's.
 89 | 
 90 | Estimating significance
 91 | -----------------------
 92 | 
 93 | See [Baluev2008]_ for more information (TODO.)
 94 | 
 95 | 
 96 | Example: Basic
 97 | --------------
 98 | 
 99 | .. plot::
100 | 	:include-source:
101 | 
102 | 	import skcuda.fft
103 | 	import cuvarbase.lombscargle as gls
104 | 	import numpy as np
105 | 	import matplotlib.pyplot as plt
106 | 
107 | 
108 | 	t = np.sort(np.random.rand(300))
109 | 	y = 1 + np.cos(2 * np.pi * 100 * t - 0.1)
110 | 	dy = 0.1 * np.ones_like(y)
111 | 	y += dy * np.random.randn(len(t))
112 | 
113 | 	# Set up LombScargleAsyncProcess (compilation, etc.)
114 | 	proc = gls.LombScargleAsyncProcess()
115 | 
116 | 	# Run on single lightcurve
117 | 	result = proc.run([(t, y, dy)])
118 | 
119 | 	# Synchronize all cuda streams
120 | 	proc.finish()
121 | 
122 | 	# Read result!
123 | 	freqs, ls_power = result[0]
124 | 
125 | 	############
126 | 	# Plotting #
127 | 	############
128 | 
129 | 	f, ax = plt.subplots()
130 | 	ax.set_xscale('log')
131 | 
132 | 	ax.plot(freqs, ls_power)
133 | 	ax.set_xlabel('Frequency')
134 | 	ax.set_ylabel('Lomb-Scargle')
135 | 	plt.show()
136 | 
137 | Example: Batches of lightcurves
138 | -------------------------------
139 | 
140 | 
141 | .. plot::
142 | 	:include-source:
143 | 
144 | 	import skcuda.fft
145 | 	import cuvarbase.lombscargle as gls
146 | 	import numpy as np
147 | 	import matplotlib.pyplot as plt
148 | 
149 | 	nlcs = 9
150 | 
151 | 	def lightcurve(freq=100, ndata=300):
152 | 		t = np.sort(np.random.rand(ndata))
153 | 		y = 1 + np.cos(2 * np.pi * freq * t - 0.1)
154 | 		dy = 0.1 * np.ones_like(y)
155 | 		y += dy * np.random.randn(len(t))
156 | 		return t, y, dy
157 | 
158 | 	freqs = 200 * np.random.rand(nlcs)
159 | 	data = [lightcurve(freq=freq) for freq in freqs]
160 | 
161 | 	# Set up LombScargleAsyncProcess (compilation, etc.)
162 | 	proc = gls.LombScargleAsyncProcess()
163 | 
164 | 	# Run on batch of lightcurves
165 | 	results = proc.batched_run_const_nfreq(data)
166 | 
167 | 	# Synchronize all cuda streams
168 | 	proc.finish()
169 | 
170 | 	############
171 | 	# Plotting #
172 | 	############
173 | 	max_n_cols = 4
174 | 	ncols = max([1, min([int(np.sqrt(nlcs)), max_n_cols])])
175 | 	nrows = int(np.ceil(float(nlcs) / ncols))
176 | 	f, axes = plt.subplots(nrows, ncols,
177 | 	                       figsize=(3 * ncols, 3 * nrows))
178 | 
179 | 	for (frqs, ls_power), ax, freq in zip(results,
180 | 	                                      np.ravel(axes),
181 | 	                                      freqs):
182 | 		ax.set_xscale('log')
183 | 		ax.plot(frqs, ls_power)
184 | 		ax.axvline(freq, ls=':', color='r')
185 | 
186 | 	f.text(0.05, 0.5, "Lomb-Scargle", rotation=90, 
187 | 	       va='center', ha='right', fontsize=20)
188 | 	f.text(0.5, 0.05, "Frequency", 
189 | 	       va='top', ha='center', fontsize=20)
190 | 
191 | 
192 | 	for i, ax in enumerate(np.ravel(axes)):
193 | 		if i >= nlcs:
194 | 			ax.axis('off')
195 | 	f.tight_layout()
196 | 	f.subplots_adjust(left=0.1, bottom=0.1)
197 | 	plt.show()
198 | 
199 | 
200 | .. [DuttRokhlin1993] `Dutt, A., & Rokhlin, V. 1993, SIAM J. Sci. Comput., 14(6), 1368–1393. <http://epubs.siam.org/doi/abs/10.1137/0914081>`_
201 | .. [PressRybicki1989] `Press, W. H., & Rybicki, G. B. 1989, ApJ, 338, 277 <http://adsabs.harvard.edu/abs/1989ApJ...338..277P>`_
202 | .. [Baluev2008] `Baluev, R. V. 2008, MNRAS, 385, 1279 <http://adsabs.harvard.edu/abs/2008MNRAS.385.1279B>`_
203 | .. [ZK2009] `Zechmeister, M., & Kürster, M. 2009, AAP, 496, 577 <http://adsabs.harvard.edu/abs/2009A%26A...496..577Z>`_
204 | .. [VanderPlas2017] `VanderPlas, J. T. 2017, arXiv:1703.09824 <http://adsabs.harvard.edu/abs/2017arXiv170309824V>`_
205 | .. [Leroy2012] `Leroy, B. 2012, AAP, 545, A50 <http://adsabs.harvard.edu/abs/2012A%26A...545A..50L>`_
206 | .. [Townsend2010] `Townsend, R. H. D. 2010, ApJS, 191, 247 <http://adsabs.harvard.edu/abs/2010ApJS..191..247T>`_
207 | .. [Barning1963] `Barning, F. J. M. 1963, BAN, 17, 22 <http://adsabs.harvard.edu/abs/1963BAN....17...22B>`_
208 | .. [Vanicek1969] `Vaníček, P. 1969, APSS, 4, 387 <http://adsabs.harvard.edu/abs/1969Ap&SS...4..387V>`_
209 | .. [Scargle1982] `Scargle, J. D. 1982, ApJ, 263, 835 <http://adsabs.harvard.edu/abs/1982ApJ...263..835S>`_
210 | .. [Lomb1976] `Lomb, N. R. 1976, APSS, 39, 447 <http://adsabs.harvard.edu/abs/1976Ap%26SS..39..447L>`_


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | API documentation
2 | =================
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    cuvarbase
8 | 


--------------------------------------------------------------------------------
/docs/source/plots/benchmarks.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | import sys
  6 | import numpy as np
  7 | from time import time
  8 | import copy
  9 | import matplotlib
 10 | matplotlib.use('Agg')
 11 | import matplotlib.pyplot as plt
 12 | import pycuda.autoinit
 13 | import pycuda.driver as cuda
 14 | 
 15 | import cuvarbase.bls as bls
 16 | import cuvarbase.ce as ce
 17 | import cuvarbase.lombscargle as ls
 18 | from astrobase.periodbase.kbls import _bls_runner as astrobase_bls
 19 | from astropy.stats.lombscargle import LombScargle as AstropyLombScargle
 20 | from tqdm import tqdm
 21 | 
 22 | 
 23 | def get_freqs(baseline=5 * 365., fmin=None,
 24 |               fmax=(24 * 60.) / 30., samples_per_peak=5):
 25 | 
 26 |     df = 1. / baseline / samples_per_peak
 27 |     if fmin is None:
 28 |         fmin = 2./baseline
 29 | 
 30 |     nf = int(np.ceil((fmax - fmin) / df))
 31 | 
 32 |     return fmin + df * np.arange(nf)
 33 | 
 34 | 
 35 | def data(ndata, baseline=5 * 365.):
 36 |     t = baseline * np.sort(np.random.rand(ndata))
 37 |     y = np.cos(2 * np.pi * t)
 38 |     dy = 0.1 * np.ones_like(t)
 39 | 
 40 |     y += dy * np.random.randn(len(t))
 41 | 
 42 |     return t, y, dy
 43 | 
 44 | def profile(func):
 45 |     def profiled_func(*args, **kwargs):
 46 |         cuda.start_profiler()
 47 |         func(*args, **kwargs)
 48 |         cuda.stop_profiler()
 49 |         #pycuda.autoinit.context.detach()
 50 |         sys.exit()
 51 |     return profiled_func
 52 | 
 53 | def function_timer(func, nreps=3):
 54 |     def timed_func(*args, **kwargs):
 55 |         dts = []
 56 |         for n in range(nreps):
 57 |             t0 = time()
 58 |             func(*args, **kwargs)
 59 |             dt = time() - t0
 60 |             dts.append(dt)
 61 |         return min(dts)
 62 | 
 63 |     return timed_func
 64 | 
 65 | 
 66 | eebls_gpu = function_timer(bls.eebls_gpu)
 67 | eebls_transit_gpu = function_timer(bls.eebls_transit_gpu)
 68 | eebls_gpu_fast = function_timer(bls.eebls_gpu_fast)
 69 | astrobase_bls = function_timer(astrobase_bls)
 70 | 
 71 | _eebls_defaults = dict(qmin_fac=0.5, qmax_fac=2.0, dlogq=0.25,
 72 |                        samples_per_peak=4, noverlap=2)
 73 | 
 74 | 
 75 | def profile_cuvarbase_ce(t, y, dy, freqs, **kwargs):
 76 | 
 77 |     proc = ce.ConditionalEntropyAsyncProcess(**kwargs)
 78 |     proc.preallocate(len(t), freqs, **kwargs)
 79 |     run = profile(proc.run)
 80 | 
 81 |     run([(t, y, None)], freqs=freqs, **kwargs)
 82 | 
 83 |     return True
 84 | 
 85 | def time_cuvarbase_ce_run(t, y, dy, freqs, **kwargs):
 86 |     proc = ce.ConditionalEntropyAsyncProcess(**kwargs)
 87 |     proc.preallocate(len(t), freqs, **kwargs)
 88 |     run = function_timer(proc.run)
 89 | 
 90 |     return run([(t, y, None)], freqs=freqs, **kwargs)
 91 | 
 92 | 
 93 | def time_cuvarbase_bls(t, y, dy, freqs, qmin=1e-2, qmax=0.5,
 94 |                        memory=None, pre_transfer=False, transit=False,
 95 |                        use_fast=True, **kwargs):
 96 | 
 97 |     kw = copy.deepcopy(_eebls_defaults)
 98 |     kw.update(kwargs)
 99 |     kw['use_fast'] = use_fast
100 | 
101 |     if memory is None and not transit:
102 |         memory = bls.BLSMemory.fromdata(t, y, dy, freqs=freqs,
103 |                                         transfer=pre_transfer,
104 |                                         qmin=qmin, qmax=qmax)
105 | 
106 |     if not transit and use_fast:
107 |         return eebls_gpu_fast(t, y, dy, freqs, memory=memory,
108 |                               qmin=qmin, qmax=qmax,
109 |                               transfer_to_device=(not pre_transfer),
110 |                               **kw)
111 |     if not transit:
112 |         return eebls_gpu(t, y, dy, freqs, qmin=qmin, qmax=qmax,
113 |                          **kw)
114 | 
115 |     qvals = kwargs.get('qvals', None)
116 |     if freqs is None:
117 |         freqs, qvals = bls.transit_autofreq(t, **kw)
118 |     elif qvals is None:
119 |         qvals = bls.q_transit(freqs, **kw)
120 | 
121 |     return eebls_transit_gpu(t, y, dy, freqs=freqs, qvals=qvals, **kw)
122 | 
123 | 
124 | def time_astrobase_bls(t, y, dy, freqs, qmin=1e-2, qmax=0.5,
125 |                        **kwargs):
126 | 
127 |     nfreqs = len(freqs)
128 |     minfreq = min(freqs)
129 |     stepsize = freqs[1] - freqs[0]
130 |     nphasebins = int(np.ceil(1./qmin))
131 | 
132 |     args = (t, y)
133 |     args += (nfreqs, minfreq, stepsize, nphasebins, qmin, qmax)
134 |     return astrobase_bls(*args)
135 | 
136 | 
137 | def subset_data(t, y, dy, ndata):
138 |     inds = np.arange(1, len(t) - 1)
139 |     np.random.shuffle(inds)
140 | 
141 |     subinds = np.concatenate(([0], np.argsort(t[inds[:ndata-2]]),
142 |                              [len(t) - 1]))
143 |     return (arr[subinds] for arr in (t, y, dy))
144 | 
145 | 
146 | def time_group(task_dict, group_func, values):
147 |     times = {}
148 |     for name in task_dict.keys():
149 |         print(name)
150 |         dts = []
151 |         for v in tqdm(values):
152 |             dts.append((v, group_func(task_dict[name], v)))
153 |         times[name] = dts
154 |     return times
155 | 
156 | n0 = 1000
157 | ndatas = np.floor(np.logspace(1, 4.5, num=8)).astype(np.int)
158 | #nblocks = np.arange(1, 25)
159 | #nblocks = np.concatenate((nblocks, np.arange(nblocks[-1], 3000, 50)))
160 | nblocks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 50, 100, 200, 500, 1000, 2000, 5000]
161 | freq_batch_sizes = [1, 5, 10, 50, 100, 500, 1000, 2000, 5000]
162 | 
163 | t, y, dy = data(max(ndatas), baseline=10. * 365)
164 | freqs_t, qvals_t = bls.transit_autofreq(t, fmin=0.01, **_eebls_defaults)
165 | t0, y0, dy0 = subset_data(t, y, dy, n0)
166 | 
167 | qmin = min(qvals_t)
168 | qmax = max(qvals_t)
169 | freqs = get_freqs(baseline=(max(t) - min(t)), samples_per_peak=4, fmin=0.01)
170 | 
171 | print(qmin, qmax, len(freqs_t), len(freqs))
172 | # profile_cuvarbase_ce(t0, y0, dy0, freqs=freqs, use_fast=True, force_nblocks=200) 
173 | 
174 | 
175 | tasks = {
176 |     'BLS: cuvarbase (0.2.0)': lambda T, Y, DY, FREQS=freqs,
177 |     force_nblocks=1000, **kwargs:
178 |     time_cuvarbase_bls(T, Y, DY, FREQS, use_fast=True,
179 |                        force_nblocks=force_nblocks, **kwargs),
180 |     
181 |     'BLS: cuvarbase (0.2.0) -- transit': lambda T, Y, DY, FREQS=freqs,
182 |     force_nblocks=1000, **kwargs:
183 |     time_cuvarbase_bls(T, Y, DY, None, use_fast=True,
184 |                        force_nblocks=force_nblocks, transit=True,
185 |                        **kwargs),
186 | 
187 |     'BLS: cuvarbase (0.1.9)': lambda T, Y, DY, FREQS=freqs, **kwargs:
188 |     time_cuvarbase_bls(T, Y, DY, FREQS, use_fast=False, **kwargs),
189 | 
190 |     'BLS: astrobase': lambda T, Y, DY, FREQS=freqs, **kwargs:
191 |     time_astrobase_bls(T, Y, DY, FREQS, **kwargs),
192 | 
193 |     'CE: cuvarbase (0.1.9) 25-2-10-1': lambda T, Y, DY, FREQS=freqs,
194 |     use_fast=False, phase_bins=25, phase_overlap=2, mag_bins=10,
195 |     mag_overlap=1, use_double=False, **kwargs:
196 |     time_cuvarbase_ce_run(T, Y, DY, FREQS, use_fast=use_fast, **kwargs),
197 | 
198 |     'CE: cuvarbase (0.2.0) 25-2-10-1': lambda T, Y, DY, FREQS=freqs,
199 |     use_fast=True, phase_bins=25, phase_overlap=2, mag_bins=10,
200 |     mag_overlap=1, use_double=False, **kwargs:
201 |     time_cuvarbase_ce_run(T, Y, DY, FREQS, use_fast=use_fast, **kwargs)
202 |     
203 | 
204 | }
205 | 
206 | 
207 | 
208 | tasks_nblocks = {name: tasks[name] for name in ['BLS: cuvarbase (0.2.0)',
209 |                                                 'CE: cuvarbase (0.2.0) '
210 |                                                 '25-2-10-1']}
211 | 
212 | 
213 | def nblock_group_func(func, nblock):
214 |     return func(t0, y0, dy0, freqs, force_nblocks=nblock)
215 | 
216 | 
217 | def ndata_group_func(func, ndata):
218 |     T, Y, DY = subset_data(t, y, dy, ndata)
219 |     return func(T, Y, DY, freqs)
220 | 
221 | 
222 | def freq_batch_size_group_func(func, fbs):
223 |     return func(t0, y0, dy0, freqs, freq_batch_size=fbs)
224 | 
225 | 
226 | groups = {
227 |     'N observations': (tasks, ndata_group_func, ndatas),
228 |     'Grid size': (tasks_nblocks, nblock_group_func, nblocks),
229 |     'Frequencies per kernel call': (tasks_nblocks,
230 |                                     freq_batch_size_group_func,
231 |                                     freq_batch_sizes)
232 | }
233 | 
234 | dev = pycuda.autoinit.device
235 | attrs = dev.get_attributes()
236 | device_name = dev.name()
237 | 
238 | print(device_name)
239 | #print(len(freqs))
240 | #for attr in attrs.keys():
241 | #    print("{attr}: {value}".format(attr=attr, value=attrs[attr]))
242 | 
243 | group_times = {}
244 | for group in groups.keys():
245 |     print("="*len(group))
246 |     print(group)
247 |     print("="*len(group))
248 |     group_times[group] = time_group(*groups[group])
249 | 
250 | for group in group_times:
251 |     times = group_times[group]
252 | 
253 |     f, ax = plt.subplots()
254 |     for taskname in sorted(list(times.keys())):
255 |         values, dts = zip(*times[taskname])
256 |         ax.plot(values, dts, label=taskname)
257 | 
258 |     f.suptitle(device_name)
259 |     ax.set_xlabel(group)
260 |     ax.legend(loc='best')
261 |     ax.set_yscale('log')
262 |     ax.set_xscale('log')
263 |     
264 |     device_name.replace(' ', '_')
265 |     group.replace(' ', '_')
266 |     fname = '{dev}-{group}.png'.format(dev=device_name.replace(' ', '_'),
267 |                                        group=group.replace(' ', '_'))
268 | 
269 |     f.savefig(fname)
270 | 
271 |     # plt.show()
272 | 


--------------------------------------------------------------------------------
/docs/source/plots/bls_example.py:
--------------------------------------------------------------------------------
  1 | import cuvarbase.bls as bls
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | def phase(t, freq, phi0=0.):
  7 |     phi = (t * freq - phi0)
  8 |     phi -= np.floor(phi)
  9 | 
 10 |     return phi
 11 | 
 12 | 
 13 | def transit_model(t, freq, y0=0.0, delta=1., q=0.01, phi0=0.5):
 14 |     phi = phase(t, freq, phi0=phi0)
 15 |     transit = phi < q
 16 | 
 17 |     y = y0 * np.ones_like(t)
 18 |     y[transit] -= delta
 19 |     return y
 20 | 
 21 | 
 22 | def data(ndata=100, baseline=1, freq=10, sigma=1., **kwargs):
 23 |     t = baseline * np.sort(np.random.rand(ndata))
 24 |     y = transit_model(t, freq, **kwargs)
 25 |     dy = sigma * np.ones_like(t)
 26 | 
 27 |     y += dy * np.random.randn(len(t))
 28 | 
 29 |     return t, y, dy
 30 | 
 31 | 
 32 | def plot_bls_model(ax, y0, delta, q, phi0, **kwargs):
 33 |     phi_plot = np.linspace(0, 1, 50./q)
 34 |     y_plot = transit_model(phi_plot, 1., y0=y0,
 35 |                            delta=delta, q=q, phi0=phi0)
 36 | 
 37 |     ax.plot(phi_plot, y_plot, **kwargs)
 38 | 
 39 | 
 40 | def plot_bls_sol(ax, t, y, dy, freq, q, phi0, **kwargs):
 41 |     w = np.power(dy, -2)
 42 |     w /= sum(w)
 43 | 
 44 |     phi = phase(t, freq, phi0=phi0)
 45 |     transit = phi < q
 46 | 
 47 |     def ybar(mask):
 48 |         return np.dot(w[mask], y[mask]) / sum(w[mask])
 49 | 
 50 |     y0 = ybar(~transit)
 51 |     delta = y0 - ybar(transit)
 52 | 
 53 |     ax.scatter((phi[~transit] + phi0) % 1.0, y[~transit],
 54 |                c='k', s=1, alpha=0.5)
 55 |     ax.scatter((phi[transit] + phi0) % 1.0, y[transit],
 56 |                c='r', s=1, alpha=0.5)
 57 |     plot_bls_model(ax, y0, delta, q, phi0, **kwargs)
 58 | 
 59 |     ax.set_xlim(0, 1)
 60 |     ax.set_xlabel('$\phi$ ($f = %.3f$)' % (freq))
 61 |     ax.set_ylabel('$y$')
 62 | 
 63 | # set the transit parameters
 64 | transit_kwargs = dict(freq=0.1,
 65 |                       q=0.1,
 66 |                       y0=10.,
 67 |                       sigma=0.002,
 68 |                       delta=0.05,
 69 |                       phi0=0.5)
 70 | 
 71 | # generate data with a transit
 72 | t, y, dy = data(ndata=300,
 73 |                 baseline=365.,
 74 |                 **transit_kwargs)
 75 | 
 76 | # set up search parameters
 77 | search_params = dict(qmin=1e-2,
 78 |                      qmax=0.5,
 79 | 
 80 |                      # The logarithmic spacing of q
 81 |                      dlogq=0.1,
 82 | 
 83 |                      # Number of overlapping phase bins
 84 |                      # to use for finding the best phi0
 85 |                      noverlap=3)
 86 | 
 87 | # derive baseline from the data for consistency
 88 | baseline = max(t) - min(t)
 89 | 
 90 | # df ~ qmin / baseline
 91 | df = search_params['qmin'] / baseline
 92 | fmin = 2. / baseline
 93 | fmax = 2.
 94 | 
 95 | nf = int(np.ceil((fmax - fmin) / df))
 96 | freqs = fmin + df * np.arange(nf)
 97 | 
 98 | bls_power, sols = bls.eebls_gpu(t, y, dy, freqs,
 99 |                                 **search_params)
100 | 
101 | # best BLS fit
102 | q_best, phi0_best = sols[np.argmax(bls_power)]
103 | f_best = freqs[np.argmax(bls_power)]
104 | 
105 | # Plot results
106 | f, (ax_bls, ax_true, ax_best) = plt.subplots(1, 3, figsize=(9, 3))
107 | 
108 | # Periodogram
109 | ax_bls.plot(freqs, bls_power)
110 | ax_bls.axvline(transit_kwargs['freq'],
111 |                ls=':', color='k', label="$f_0$")
112 | ax_bls.axvline(f_best, ls=':', color='r',
113 |                label='BLS $f_{\\rm best}$')
114 | ax_bls.set_xlabel('freq.')
115 | ax_bls.set_ylabel('BLS power')
116 | 
117 | # True solution
118 | plot_bls_sol(ax_true, t, y, dy,
119 |              transit_kwargs['freq'],
120 |              transit_kwargs['q'],
121 |              transit_kwargs['phi0'])
122 | 
123 | # Best-fit solution
124 | plot_bls_sol(ax_best, t, y, dy,
125 |              f_best, q_best, phi0_best)
126 | 
127 | 
128 | ax_true.set_title("True parameters")
129 | ax_best.set_title("Best BLS parameters")
130 | 
131 | f.tight_layout()
132 | plt.show()
133 | 


--------------------------------------------------------------------------------
/docs/source/plots/bls_example_transit.py:
--------------------------------------------------------------------------------
  1 | import cuvarbase.bls as bls
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | def phase(t, freq, phi0=0.):
  7 |     phi = (t * freq - phi0)
  8 |     phi -= np.floor(phi)
  9 | 
 10 |     return phi
 11 | 
 12 | 
 13 | def transit_model(t, freq, y0=0.0, delta=1., q=0.01, phi0=0.5):
 14 |     phi = phase(t, freq, phi0=phi0)
 15 |     transit = phi < q
 16 | 
 17 |     y = y0 * np.ones_like(t)
 18 |     y[transit] -= delta
 19 |     return y
 20 | 
 21 | 
 22 | def data(ndata=100, baseline=1, freq=10, sigma=1., **kwargs):
 23 |     t = baseline * np.sort(np.random.rand(ndata))
 24 |     y = transit_model(t, freq, **kwargs)
 25 |     dy = sigma * np.ones_like(t)
 26 | 
 27 |     y += dy * np.random.randn(len(t))
 28 | 
 29 |     return t, y, dy
 30 | 
 31 | 
 32 | def plot_bls_model(ax, y0, delta, q, phi0, **kwargs):
 33 |     phi_plot = np.linspace(0, 1, 50./q)
 34 |     y_plot = transit_model(phi_plot, 1., y0=y0,
 35 |                            delta=delta, q=q, phi0=phi0)
 36 | 
 37 |     ax.plot(phi_plot, y_plot, **kwargs)
 38 | 
 39 | 
 40 | def plot_bls_sol(ax, t, y, dy, freq, q, phi0, **kwargs):
 41 |     w = np.power(dy, -2)
 42 |     w /= sum(w)
 43 | 
 44 |     phi = phase(t, freq, phi0=phi0)
 45 |     transit = phi < q
 46 | 
 47 |     def ybar(mask):
 48 |         return np.dot(w[mask], y[mask]) / sum(w[mask])
 49 | 
 50 |     y0 = ybar(~transit)
 51 |     delta = y0 - ybar(transit)
 52 | 
 53 |     ax.scatter((phi[~transit] + phi0) % 1.0, y[~transit],
 54 |                c='k', s=1, alpha=0.5)
 55 |     ax.scatter((phi[transit] + phi0) % 1.0, y[transit],
 56 |                c='r', s=1, alpha=0.5)
 57 |     plot_bls_model(ax, y0, delta, q, phi0, **kwargs)
 58 | 
 59 |     ax.set_xlim(0, 1)
 60 |     ax.set_xlabel('$\phi$ ($f = %.3f$)' % (freq))
 61 |     ax.set_ylabel('$y$')
 62 | 
 63 | # the mean density of the host star in solar units
 64 | # i.e. rho = rho_star / rho_sun
 65 | rho = 1.
 66 | 
 67 | # set the transit parameters
 68 | transit_kwargs = dict(freq=2.,
 69 |                       q=bls.q_transit(2., rho=rho),
 70 |                       y0=10.,
 71 |                       sigma=0.005,
 72 |                       delta=0.01,
 73 |                       phi0=0.5)
 74 | 
 75 | # generate data with a transit
 76 | t, y, dy = data(ndata=300,
 77 |                 baseline=365.,
 78 |                 **transit_kwargs)
 79 | 
 80 | # set up search parameters
 81 | search_params = dict(
 82 |                      # Searches q values in the range
 83 |                      # (q0 * qmin_fac, q0 * qmax_fac)
 84 |                      # where q0 = q0(f, rho) is the fiducial
 85 |                      # q value for Keplerian transit around
 86 |                      # star with mean density rho
 87 |                      qmin_fac=0.5,
 88 |                      qmax_fac=2.0,
 89 | 
 90 |                      # Assumed mean stellar density
 91 |                      rho=1.0,
 92 | 
 93 |                      # The min/max frequencies as a fraction
 94 |                      # of their autoset values
 95 |                      fmin_fac=1.0,
 96 |                      fmax_fac=1.5,
 97 | 
 98 |                      # oversampling factor; frequency spacing
 99 |                      # is multiplied by 1/samples_per_peak
100 |                      samples_per_peak=2,
101 | 
102 |                      # The logarithmic spacing of q
103 |                      dlogq=0.1,
104 | 
105 |                      # Number of overlapping phase bins
106 |                      # to use for finding the best phi0
107 |                      noverlap=3)
108 | 
109 | # Run keplerian BLS; frequencies are automatically set!
110 | freqs, bls_power, sols = bls.eebls_transit_gpu(t, y, dy,
111 |                                                **search_params)
112 | 
113 | # best BLS fit
114 | q_best, phi0_best = sols[np.argmax(bls_power)]
115 | f_best = freqs[np.argmax(bls_power)]
116 | 
117 | # Plot results
118 | f, (ax_bls, ax_true, ax_best) = plt.subplots(1, 3, figsize=(9, 3))
119 | 
120 | # Periodogram
121 | ax_bls.plot(freqs, bls_power)
122 | ax_bls.axvline(transit_kwargs['freq'],
123 |                ls=':', color='k', label="$f_0$")
124 | ax_bls.axvline(f_best, ls=':', color='r',
125 |                label='BLS $f_{\\rm best}$')
126 | ax_bls.set_xlabel('freq.')
127 | ax_bls.set_ylabel('BLS power')
128 | ax_bls.set_xscale('log')
129 | 
130 | # True solution
131 | label_true = '$q=%.3f$, ' % (transit_kwargs['q'])
132 | label_true += '$\\phi_0=%.3f$' % (transit_kwargs['phi0'])
133 | plot_bls_sol(ax_true, t, y, dy,
134 |              transit_kwargs['freq'],
135 |              transit_kwargs['q'],
136 |              transit_kwargs['phi0'],
137 |              label=label_true)
138 | ax_true.legend(loc='best')
139 | 
140 | label_best = '$q=%.3f$, ' % (q_best)
141 | label_best += '$\\phi_0=%.3f$' % (phi0_best)
142 | # Best-fit solution
143 | plot_bls_sol(ax_best, t, y, dy,
144 |              f_best, q_best, phi0_best,
145 |              label=label_best)
146 | ax_best.legend(loc='best')
147 | 
148 | ax_true.set_title("True parameters")
149 | ax_best.set_title("Best BLS parameters")
150 | 
151 | f.tight_layout()
152 | plt.show()
153 | 


--------------------------------------------------------------------------------
/docs/source/plots/bls_transit_diagram.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import cuvarbase.bls as bls
 4 | 
 5 | 
 6 | def transit_model(phi0, q, delta, q1=0.):
 7 |     def model(t, freq, q=q, phi0=phi0, delta=delta):
 8 | 
 9 |         phi = t * freq - phi0
10 |         phi -= np.floor(phi)
11 | 
12 |         if not hasattr(t, '__iter__'):
13 |             return -delta if np.absolute(phi) < q else 0
14 |         y = np.zeros(len(t))
15 |         y[np.absolute(phi) < q] -= delta
16 | 
17 |         return y
18 |     return model
19 | 
20 | 
21 | def plot_bls_sol(t, y, dy, freq, q, phi0):
22 | 
23 |     w = np.power(dy, -2)
24 |     w /= sum(w)
25 | 
26 |     phi_plot = np.linspace(0, 1, 50./q)
27 | 
28 |     phi = (t * freq)
29 |     phi -= np.floor(phi)
30 | 
31 |     dphi = phi - phi0 - np.floor(phi - phi0)
32 |     mask = dphi < q
33 | 
34 |     ybt = np.dot(w[mask], y[mask]) / sum(w[mask])
35 |     yb0 = np.dot(w[~mask], y[~mask]) / sum(w[~mask])
36 | 
37 |     delta = yb0 - ybt
38 | 
39 |     model = transit_model(phi0, q, delta)
40 | 
41 |     ym = model(phi_plot, 1.) + yb0
42 | 
43 |     f, ax = plt.subplots()
44 | 
45 |     ax.scatter(phi[~mask], y[~mask], c='k', s=1, alpha=0.4)
46 |     ax.scatter(phi[mask], y[mask], c='g', s=1, alpha=0.8)
47 |     ax.plot(phi_plot, ym, color='r')
48 |     ax.axvline(phi0, color='k', ls=':')
49 |     # ax.axvline(phi0 + q, color='k', ls=':')
50 | 
51 |     ax.axis('off')
52 | 
53 |     ax.annotate('$\\delta$', xy=(phi0 - 0.03, -0.5 * delta), xytext=(-5, 0),
54 |                 textcoords='offset points', ha='right', va='center',
55 |                 fontsize=20)
56 | 
57 |     ax.plot([phi0 - 0.03, phi0 - 0.03], [-delta, -0.03 * delta], ls='--',
58 |             color='k')
59 | 
60 |     ax.plot([phi0, phi0 + q], [-1.03 * delta, -1.03 * delta], ls='--',
61 |             color='k')
62 |     ax.annotate('$q$', xy=(phi0 + 0.5 * q, -1.03 * delta), xytext=(0, -5),
63 |                 textcoords='offset points', ha='center', va='top',
64 |                 fontsize=20, transform=ax.transData)
65 | 
66 |     ax.annotate('$\\phi_0$', xy=(phi0, 0), xytext=(5, 5),
67 |                 textcoords='offset points', ha='left', va='bottom',
68 |                 fontsize=20, transform=ax.transData)
69 | 
70 |     ax.annotate('$y_0$', xy=(0.05, 0), xytext=(5, 5),
71 |                 textcoords='offset points', ha='left', va='bottom',
72 |                 fontsize=20, transform=ax.transData)
73 |     plt.show()
74 | 
75 | model = transit_model(0.5, 0.1, 0.1)
76 | t = np.sort(np.random.rand(200))
77 | y = model(t, 10.)
78 | dy = 0.01 * np.ones_like(y)
79 | 
80 | y += dy * np.random.randn(len(t))
81 | 
82 | plot_bls_sol(t, y, dy, 10., 0.1, 0.5)
83 | 


--------------------------------------------------------------------------------
/docs/source/plots/ce_example.py:
--------------------------------------------------------------------------------
  1 | import cuvarbase.ce as ce
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from copy import copy
  5 | 
  6 | 
  7 | def phase(t, freq, phi0=0.):
  8 |     phi = (t * freq - phi0)
  9 |     phi -= np.floor(phi)
 10 | 
 11 |     return phi
 12 | 
 13 | 
 14 | def sine_model(t, freq, amp=1., y0=0.0, phi0=0.5):
 15 |     return y0 + amp * np.sin((t * freq - phi0) * 2 * np.pi)
 16 | 
 17 | 
 18 | def transit_model(t, freq, y0=0.0, delta=1., q=0.01, phi0=0.5):
 19 |     phi = phase(t, freq, phi0=phi0)
 20 |     transit = phi < q
 21 | 
 22 |     y = y0 * np.ones_like(t)
 23 |     y[transit] -= delta
 24 |     return y
 25 | 
 26 | 
 27 | def data(ndata=100, baseline=1, freq=10, sigma=1.,
 28 |          model=transit_model, **kwargs):
 29 |     t = baseline * np.sort(np.random.rand(ndata))
 30 |     y = model(t, freq, **kwargs)
 31 |     dy = sigma * np.ones_like(t)
 32 | 
 33 |     y += dy * np.random.randn(len(t))
 34 | 
 35 |     return t, y, dy
 36 | 
 37 | 
 38 | def plot_ce_bins(ax, t, y, dy, freq, ce_proc):
 39 |     ax.set_xlim(0, 1)
 40 | 
 41 |     y0 = min(y)
 42 |     yrange = max(y) - y0
 43 | 
 44 |     # Phase-fold the data at the trial frequency
 45 |     phi = phase(t, freq)
 46 | 
 47 |     # Bin the data
 48 |     phi_bins = np.floor(phi * ce_proc.phase_bins).astype(np.int)
 49 | 
 50 |     yi = ce_proc.mag_bins * (y - y0)/yrange
 51 |     mag_bins = np.floor(yi).astype(np.int)
 52 | 
 53 |     bins = [[sum((phi_bins == i) & (mag_bins == j))
 54 |              for j in range(ce_proc.mag_bins)]
 55 |             for i in range(ce_proc.phase_bins)]
 56 |     bins = np.array(bins).astype(np.float)
 57 | 
 58 |     # Convert to N(bin) / Ntotal
 59 |     bins /= np.sum(bins.ravel())
 60 | 
 61 |     # The fraction of data that fall within a given phase bin
 62 |     p_phi = [np.sum(bins[i]) for i in range(ce_proc.phase_bins)]
 63 | 
 64 |     # fractional width of the (magnitude) bins
 65 |     dm = float(1 + ce_proc.mag_overlap) / ce_proc.mag_bins
 66 |     dphi = float(1 + ce_proc.phase_overlap) / ce_proc.phase_bins
 67 |     dY = yrange * dm
 68 | 
 69 |     # Compute conditional entropy contribution from each of the bins
 70 |     dH = [[bins[i][j] * np.log(dm * p_phi[i] / bins[i][j])
 71 |            if bins[i][j] > 0 else 0.
 72 |            for j in range(ce_proc.mag_bins)]
 73 |           for i in range(ce_proc.phase_bins)]
 74 | 
 75 |     dH = np.array(dH)
 76 | 
 77 |     extent = [0, 1, min(y), max(y)]
 78 | 
 79 |     # Mask out the unoccupied bins
 80 |     dH = np.ma.masked_where(dH == 0, dH)
 81 | 
 82 |     palette = copy(plt.cm.GnBu_r)
 83 |     palette.set_bad('w', 0.)
 84 | 
 85 |     # Draw gridlines
 86 |     for i in range(ce_proc.phase_bins + 1):
 87 |         ax.axvline(0 + i * dphi, ls=':', color='k',
 88 |                    alpha=0.5, zorder=95)
 89 | 
 90 |     for i in range(ce_proc.mag_bins + 1):
 91 |         ax.axhline(min(y) + i * dY, ls=':', color='k',
 92 |                    alpha=0.5, zorder=95)
 93 | 
 94 |     # Plot the conditional entropy
 95 |     cplot = ax.imshow(dH.T, cmap=palette, extent=extent,
 96 |                       aspect='auto', origin='lower',
 97 |                       alpha=0.5, zorder=90)
 98 | 
 99 |     # Plot the data
100 |     ax.scatter(phi, y, c='k', s=1, alpha=1, zorder=100)
101 | 
102 |     return cplot
103 | 
104 | # Set up the signal parameters
105 | freq = 0.1
106 | signal_params = dict(y0=10.,
107 |                      freq=freq,
108 |                      sigma=0.01,
109 |                      ndata=100,
110 |                      baseline=365.,
111 |                      amp=0.1,
112 |                      phi0=0.,
113 |                      model=sine_model)
114 | 
115 | # Generate data
116 | t, y, dy = data(**signal_params)
117 | 
118 | # Start GPU process for conditional entropy
119 | # (this does things like compiling the kernel,
120 | #  setting parameter values, etc.)
121 | proc = ce.ConditionalEntropyAsyncProcess()
122 | 
123 | # Set frequencies
124 | df = 1. / (2 * signal_params['baseline'])
125 | fmin = 2. / signal_params['baseline']
126 | fmax = 50 * len(t) * df
127 | 
128 | nf = int(np.ceil((fmax - fmin) / df))
129 | freqs = fmin + df * np.arange(nf)
130 | 
131 | ####################
132 | # Run the process! #
133 | ####################
134 | 
135 | # Data is sent in list of tuples (in case we want
136 | # to run CE on more than one lightcurve)
137 | data = [(t, y, dy)]
138 | 
139 | # The large_run function is an alternative to the run
140 | # function if the frequency grid & binning array is too
141 | # large to fit in GPU memory.
142 | try:
143 |     results = proc.run(data, freqs=freqs)
144 | except:
145 |     results = proc.large_run(data, freqs=freqs, max_memory=1e8)
146 | 
147 | proc.finish()
148 | 
149 | # The results come back as [(freqs, CE), ...] for
150 | # each element of the data list. In this case, there is only
151 | # one lightcurve.
152 | frq, p = results[0]
153 | 
154 | # Find the best frequency (that *minimizes* the conditional entropy)
155 | f_best = frq[np.argmin(p)]
156 | 
157 | 
158 | #####################
159 | # Plot the results! #
160 | #####################
161 | 
162 | f, (ax_ce, ax_bin) = plt.subplots(1, 2, figsize=(8, 4))
163 | ax_ce.plot(frq, p)
164 | ax_ce.set_xlabel('freq.', fontsize=15)
165 | ax_ce.set_ylabel('Conditional Entropy ($H(f)$)', fontsize=15)
166 | ax_ce.set_xscale('log')
167 | ax_ce.axvline(freq, color='k', ls=':')
168 | ax_ce.axvline(f_best, color='r', ls=':')
169 | 
170 | cplot = plot_ce_bins(ax_bin, t, y, dy, freq, proc)
171 | cbar = f.colorbar(cplot)
172 | cbar.ax.set_title('$H(\\phi, m)$')
173 | ax_bin.set_xlabel('$\\phi$', fontsize=15)
174 | ax_bin.set_ylabel('$m$', fontsize=15)
175 | ax_bin.set_title('$f = {\\rm argmin}_{f}(H(f))$')
176 | 
177 | f.tight_layout()
178 | plt.show()
179 | 


--------------------------------------------------------------------------------
/docs/source/plots/logo.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import cuvarbase.lombscargle as ls
 4 | 
 5 | rand = np.random.RandomState(100)
 6 | freq = 40
 7 | def data(ndata=100, freq=freq, sigma=0.4):
 8 |     t = np.sort(rand.rand(ndata))
 9 |     y = sum([np.cos(2 * np.pi * n * freq * t - n) / np.sqrt(abs(n - 2) + 1) for n in range(4)])
10 |     dy = sigma * np.ones_like(t)
11 | 
12 |     y += dy * rand.randn(ndata)
13 | 
14 |     return t, y, dy
15 | 
16 | t, y, dy = data()
17 | data = [(t, y, dy)]
18 | proc = ls.LombScargleAsyncProcess()
19 | result = proc.run(data, minimum_frequency=10, maximum_frequency=150)
20 | proc.finish()
21 | 
22 | frq, p = result[0]
23 | 
24 | mask = np.absolute(frq - freq) / freq < 0.02
25 | 
26 | f, ax = plt.subplots(figsize=(3, 3))
27 | 
28 | phi = (t * freq) % 2.0
29 | 
30 | #ax.plot(frq[~mask], p[~mask], color='k', lw=2, zorder=10)
31 | #ax.plot(frq[mask], p[mask], color='r', lw=2, zorder=11)
32 | ax.plot(frq, p, color='0.6', lw=2)
33 | 
34 | for n in range(1, 4):
35 |     mask = np.absolute(frq - n * freq) / freq < 1e-1
36 |     ax.plot(frq[mask], p[mask])
37 | 
38 | ax.set_xlim(min(frq), max(frq))
39 | xmin, xmax = ax.get_xlim()
40 | ymin, ymax = ax.get_ylim()
41 | yrange = max(y) - min(y)
42 | ys = (ymax - ymin) * (y - min(y)) / yrange
43 | 
44 | #ax.scatter(0.5 * phi * (xmax - xmin), ys, s=2, c='k', alpha=0.2)
45 | ax.axis('off')
46 | #ax.axvline(freq, ls=':', color='r')
47 | f.subplots_adjust(left=0, top=1, bottom=0, right=1)
48 | f.savefig('../logo.png')
49 | #plt.show()
50 | 


--------------------------------------------------------------------------------
/docs/source/plots/planet_transit_diagram.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import matplotlib.patches as mpatches
 3 | import numpy as np
 4 | 
 5 | 
 6 | def theta_entry(a_rs=3., rp_rs=0.5):
 7 |     return -np.arcsin((1 + rp_rs) / a_rs)
 8 | 
 9 | 
10 | def circ_pos(center, theta, r):
11 |     x, y = center
12 |     y1 = y - r * np.cos(theta)
13 |     x1 = x + r * np.sin(theta)
14 | 
15 |     return (x1, y1)
16 | 
17 | 
18 | def draw_system(ax, a_rs=3., rp_rs=0.5, theta=None,
19 |                 rs=0.1, xs=(0.5, 0.5), label_radii=True,
20 |                 draw_planet=True, draw_sun=True,
21 |                 planet_circle_kwargs=dict(color='0.7'),
22 |                 sun_circle_kwargs=dict(color='r', alpha=0.5),
23 |                 draw_a=True, a_kwargs=dict(color='k', ls='-'),
24 |                 label_a=True):
25 |     if theta is None:
26 |         theta = theta_entry(a_rs=a_rs, rp_rs=rp_rs)
27 | 
28 |     xp = circ_pos(xs, theta, a_rs * rs)
29 | 
30 |     if draw_planet:
31 |         planet = plt.Circle(xp, rp_rs * rs, **planet_circle_kwargs)
32 |         ax.add_artist(planet)
33 |     if draw_sun:
34 |         star = plt.Circle(xs, rs, **sun_circle_kwargs)
35 |         ax.add_artist(star)
36 | 
37 |     ax.plot([xs[0] - rs, xs[0] - rs], [0, xs[1]], color='k', ls=':')
38 |     ax.plot([xs[0] + rs, xs[0] + rs], [0, xs[1]], color='k', ls=':')
39 | 
40 |     if draw_a:
41 |         ax.plot(*zip(xs, xp), **a_kwargs)
42 | 
43 |     if draw_a and label_a:
44 |         atext_xy = tuple(0.5 * (np.array(xs) + np.array(xp)))
45 | 
46 |         acoords = (-5 * np.cos(theta), -5 * np.sin(theta))
47 |         ax.annotate("$a$", xy=atext_xy, xytext=acoords,
48 |                     textcoords='offset points', xycoords='data',
49 |                     ha='right', va='bottom' if theta < 0 else 'top',
50 |                     fontsize=20)
51 | 
52 |     if label_radii:
53 |         ax.plot([xs[0], xs[0] + rs], [xs[1], xs[1]], ls='--', color='k')
54 |         ax.annotate("$R_{\\star}$", xy=(xs[0] + 0.5 * rs, xs[1]),
55 |                     xytext=(0, 3),
56 |                     textcoords='offset points', xycoords='data',
57 |                     ha='center', va='bottom', fontsize=20)
58 | 
59 |         ax.plot([xp[0], xp[0] - rs * rp_rs],
60 |                 [xp[1], xp[1]], ls='--', color='k')
61 |         ax.annotate("$R_p$", xy=(xp[0] - 0.5 * rs * rp_rs, xp[1]),
62 |                     xytext=(-5, -5),
63 |                     textcoords='offset points', xycoords='data',
64 |                     ha='right', va='top', fontsize=20)
65 | 
66 | f, ax = plt.subplots()
67 | 
68 | x0 = (0.5, 0.8)
69 | theta = -theta_entry()
70 | rs = 0.2
71 | 
72 | draw_system(ax, theta=-theta, rs=rs, xs=x0)
73 | 
74 | draw_system(ax, theta=theta, rs=rs, xs=x0,
75 |             draw_sun=False, label_radii=False, label_a=False)
76 | 
77 | arc_rad = 1.2 * rs
78 | arc = mpatches.Arc(x0, 2 * arc_rad, 2 * arc_rad,
79 |                    theta1=np.degrees(-np.pi/2 - theta),
80 |                    theta2=np.degrees(-np.pi/2 + theta))
81 | 
82 | arc2 = mpatches.Arc(x0, 6 * rs, 6 * rs,
83 |                     theta1=np.degrees(-np.pi/2 - 2 * theta),
84 |                     theta2=np.degrees(-np.pi/2 + 2 * theta),
85 |                     ls='--', color='k')
86 | 
87 | ax.add_patch(arc)
88 | ax.add_patch(arc2)
89 | ax.annotate('$\\theta$', xy=(x0[0], x0[1] - arc_rad), xytext=(0, -5),
90 |             textcoords='offset points', fontsize=20, va='top', ha='center')
91 | 
92 | 
93 | ax.axis('off')
94 | ax.set_aspect('equal', 'datalim')
95 | 
96 | ax.set_xlim(0, 1)
97 | ax.set_ylim(0, 1)
98 | plt.show()
99 | 


--------------------------------------------------------------------------------
/docs/source/whatsnew.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CHANGELOG.rst
2 | 


--------------------------------------------------------------------------------
/notebooks/Conditional entropy.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Conditional Entropy period finder\n",
 8 |     "\n"
 9 |    ]
10 |   }
11 |  ],
12 |  "metadata": {
13 |   "kernelspec": {
14 |    "display_name": "Python 2",
15 |    "language": "python",
16 |    "name": "python2"
17 |   },
18 |   "language_info": {
19 |    "codemirror_mode": {
20 |     "name": "ipython",
21 |     "version": 2
22 |    },
23 |    "file_extension": ".py",
24 |    "mimetype": "text/x-python",
25 |    "name": "python",
26 |    "nbconvert_exporter": "python",
27 |    "pygments_lexer": "ipython2",
28 |    "version": "2.7.13"
29 |   }
30 |  },
31 |  "nbformat": 4,
32 |  "nbformat_minor": 2
33 | }
34 | 


--------------------------------------------------------------------------------
/notebooks/PDM2_bin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnh2o2/cuvarbase/0d97ae11bea01fdfb71cfbe15059979ebfe37373/notebooks/PDM2_bin.jpg


--------------------------------------------------------------------------------
/notebooks/PDM2_binless_gauss.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnh2o2/cuvarbase/0d97ae11bea01fdfb71cfbe15059979ebfe37373/notebooks/PDM2_binless_gauss.jpg


--------------------------------------------------------------------------------
/notebooks/PDM2_binless_tophat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnh2o2/cuvarbase/0d97ae11bea01fdfb71cfbe15059979ebfe37373/notebooks/PDM2_binless_tophat.jpg


--------------------------------------------------------------------------------
/notebooks/PDM_bin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnh2o2/cuvarbase/0d97ae11bea01fdfb71cfbe15059979ebfe37373/notebooks/PDM_bin.jpg


--------------------------------------------------------------------------------
/publish_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # A hack-ish way to automate the document publishing process for
 4 | # github pages.
 5 | #
 6 | # This won't work if you're not @johnh2o2 on Github.
 7 | #
 8 | # To build docs locally
 9 | # ---------------------
10 | # Just ``cd docs && make html``. Then open docs/build/html/index.html.
11 | set -x
12 | 
13 | DOC_BRANCH=master
14 | NEEDED="cuvarbase docs/Makefile docs/source README.rst INSTALL.rst CHANGELOG.rst"
15 | 
16 | # We need to grab hidden files with mv...
17 | shopt -s dotglob nullglob
18 | 
19 | # Create gh-pages branch if one doesn't already exist.
20 | HAS_GH_BRANCH=`git branch | grep gh-pages`
21 | if [ "$HAS_GH_BRANCH" == "" ]; then
22 |     echo "Did not detect gh-pages branch. Creating now."
23 |     git checkout -b gh-pages || exit 1
24 | else 
25 |     git checkout gh-pages || exit 1
26 | fi
27 | 
28 | # update
29 | git pull origin gh-pages
30 | 
31 | # clean out
32 | git rm -rf .
33 | 
34 | # checkout the files we need for the documentation
35 | git checkout $DOC_BRANCH $NEEDED
36 | git reset HEAD
37 | 
38 | # make docs
39 | cd docs
40 | make html || exit 1
41 | cd ..
42 | 
43 | # move content to parent directory
44 | mv docs/build/html/* ./
45 | 
46 | # remove unneeded files
47 | rm -rf $NEEDED docs
48 | 
49 | # update the repo
50 | git add --all
51 | git commit -m "Updating docs"
52 | git push -u origin gh-pages
53 | 
54 | # go home
55 | git checkout $DOC_BRANCH
56 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | future
2 | numpy >= 1.6
3 | scipy
4 | pycuda >= 2017.1.1, != 2024.1.2
5 | scikit-cuda
6 | -e .
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [aliases]
5 | test=pytest
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import io
 4 | import os
 5 | import re
 6 | 
 7 | try:
 8 |     from setuptools import setup
 9 | except ImportError:
10 |     from distutils.core import setup
11 | 
12 | 
13 | def read(path, encoding='utf-8'):
14 |     path = os.path.join(os.path.dirname(__file__), path)
15 |     with io.open(path, encoding=encoding) as fp:
16 |         return fp.read()
17 | 
18 | 
19 | def version(path):
20 |     """Obtain the packge version from a python file e.g. pkg/__init__.py
21 | 
22 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
23 |     """
24 |     version_file = read(path)
25 |     version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""",
26 |                               version_file, re.M)
27 |     if version_match:
28 |         return version_match.group(1)
29 |     raise RuntimeError("Unable to find version string.")
30 | 
31 | 
32 | VERSION = version('cuvarbase/__init__.py')
33 | 
34 | setup(name='cuvarbase',
35 |       version=VERSION,
36 |       description="Period-finding and variability on the GPU",
37 |       author='John Hoffman',
38 |       author_email='johnh2o2@gmail.com',
39 |       packages=['cuvarbase',
40 |                 'cuvarbase.tests'],
41 |       package_data={'cuvarbase': ['kernels/*cu']},
42 |       url='https://github.com/johnh2o2/cuvarbase',
43 |       setup_requires=['pytest-runner', 'future'],
44 |       install_requires=['future',
45 |                         'numpy>=1.6',
46 |                         'scipy',
47 |                         'pycuda>=2017.1.1,!=2024.1.2',
48 |                         'scikit-cuda'],
49 |       tests_require=['pytest',
50 |                      'future',
51 |                      'nfft',
52 |                      'matplotlib',
53 |                      'astropy'],
54 |       classifiers=[
55 |         'Development Status :: 4 - Beta',
56 |         'Environment :: Console',
57 |         'Intended Audience :: Science/Research',
58 |         'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
59 |         'Natural Language :: English',
60 |         'Programming Language :: Python :: 2.7',
61 |         'Programming Language :: Python :: 3.4',
62 |         'Programming Language :: Python :: 3.5',
63 |         'Programming Language :: Python :: 3.6',
64 |         'Programming Language :: C',
65 |         'Programming Language :: C++'])
66 | 


--------------------------------------------------------------------------------
/test_python_versions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Very rough script for testing cuvarbase compatibility across python 
 4 | # versions
 5 | #
 6 | # (c) John Hoffman
 7 | #
 8 | # Run this from the top-level cuvarbase directory
 9 | 
10 | 
11 | # Print everything you do.
12 | set -x
13 | 
14 | # Decide which python version to test
15 | PYTHON_VERSION=2.7
16 | 
17 | # Put your cuda installation directory here
18 | export CUDA_ROOT=/usr/local/cuda
19 | 
20 | ########################################################################
21 | CONDA_ENVIRONMENT_NAME=cuvar
22 | CUVARBASE_DIR=$PWD
23 | 
24 | # Export the library paths
25 | export LD_LIBRARY_PATH="${CUDA_ROOT}/lib:${LD_LIBRARY_PATH}"
26 | export DYLD_LIBRARY_PATH="${CUDA_ROOT}/lib:${DYLD_LIBRARY_PATH}"
27 | export PATH="${CUDA_ROOT}/bin:${PATH}"
28 | 
29 | # Erase the testing conda environment if it already exists
30 | test_str=`conda info --envs | grep ${CONDA_ENVIRONMENT_NAME}`
31 | if [ "$test_str" != "" ]; then
32 |         echo "removing conda environment ${CONDA_ENVIRONMENT_NAME}"
33 |         conda remove -y --name ${CONDA_ENVIRONMENT_NAME} --all
34 | fi
35 | 
36 | # Create the conda environment for testing with the right Python version
37 | conda create -y -n $CONDA_ENVIRONMENT_NAME python=$PYTHON_VERSION numpy
38 | 
39 | # Activate the conda environment
40 | source activate $CONDA_ENVIRONMENT_NAME
41 | 
42 | cd $CUVARBASE_DIR
43 | 
44 | # Install from the present directory, ignoring caches
45 | pip install --no-cache-dir -e .
46 | 
47 | # test
48 | python setup.py test
49 | 
50 | # (optionally) clean up conda environment
51 | #source deactivate
52 | #conda remove -y --name $CONDA_ENVIRONMENT_NAME --all
53 | 


--------------------------------------------------------------------------------