├── clustering
    ├── __init__.py
    ├── Makefile
    ├── sdml.py
    ├── cneighbors.c
    ├── radfriendsregion.py
    └── neighbors.py
├── .gitignore
├── pres
    ├── massivens.pdf
    ├── massivens2.pdf
    ├── massivens3.pdf
    ├── mnras.layout
    ├── plotjointcontour.py
    ├── plotcontour.py
    └── mnras_template.tex
├── gennothing.py
├── Makefile
├── LICENSE
├── plotmuseposterior.py
├── plotscaling.py
├── plotevidences.py
├── checkoutput.py
├── TODO.rst
├── README.rst
├── plotposterior.py
├── adaptive_progress.py
├── gensimple_horns.py
├── gensimple_bright.py
├── gen.py
├── cmuselike.c
├── gensimple.py
├── gen_realistic.py
├── gensimple_faint.py
├── clike.c
├── elldrawer.py
├── cachedconstrainer.py
├── profile_generate_subsets.py
├── musefuse_postprocess.py
├── multi_nested_integrator.py
├── hiermetriclearn.py
├── sample.py
├── whitenedmcmc.py
├── friends.py
├── multi_nested_sampler.py
└── musefuse.py


/clustering/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.hdf5
2 | *.pyc
3 | *.npz
4 | *.so
5 | *.json
6 | *.pdf
7 | *.png
8 | prof*
9 | 


--------------------------------------------------------------------------------
/pres/massivens.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JohannesBuchner/massivedatans/master/pres/massivens.pdf


--------------------------------------------------------------------------------
/pres/massivens2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JohannesBuchner/massivedatans/master/pres/massivens2.pdf


--------------------------------------------------------------------------------
/pres/massivens3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JohannesBuchner/massivedatans/master/pres/massivens3.pdf


--------------------------------------------------------------------------------
/pres/mnras.layout:
--------------------------------------------------------------------------------
1 | #% Do not delete the line below; configure depends on this
2 | # \DeclareLaTeXClass[mnras]{article (mnras)}
3 | # Input general definitions
4 | Input stdclass.inc
5 | Style Abstract
6 | 	InTitle 1
7 | End
8 | 


--------------------------------------------------------------------------------
/clustering/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | CC := gcc
 3 | CFLAGS += -fPIC -std=c99 -Wall -lm -Wextra 
 4 | CFLAGS += -O3
 5 | 
 6 | all: cneighbors.so cneighbors-parallel.so
 7 | 
 8 | %-parallel.so: %.c
 9 | 	${CC} ${CFLAGS} -fopenmp -DPARALLEL=1 $< -o $@ -shared
10 | 
11 | %.so: %.c
12 | 	${CC} ${CFLAGS} $< -o $@ -shared
13 | clean: 
14 | 	rm *.so
15 | 
16 | .PHONY: all clean
17 | 
18 | 


--------------------------------------------------------------------------------
/gennothing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | import h5py
 5 | from numpy import exp
 6 | import sys
 7 | x = numpy.linspace(400, 800, 200)
 8 | 
 9 | N = int(sys.argv[1])
10 | noise_level = 0.01
11 | numpy.random.seed(N)
12 | y = numpy.random.normal(0, noise_level, size=(len(x),N))
13 | 
14 | with h5py.File('data_nothing_%s.hdf5' % sys.argv[1], 'w') as f:
15 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
16 | 	f.create_dataset('y', data=y, compression='gzip', shuffle=True)
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | CC := gcc
 3 | CFLAGS += -fPIC -std=c99 -Wall -lm -Wextra -pedantic 
 4 | #CFLAGS += -Wduplicated-cond -Wduplicated-branches -Wrestrict -Wnull-dereference
 5 | CFLAGS += -Wlogical-op -Wjump-misses-init -Wdouble-promotion -Wshadow -Wformat=2
 6 | CFLAGS += -O3
 7 | 
 8 | all: clike.so clike-parallel.so cmuselike.so cmuselike-parallel.so clustering
 9 | 
10 | clustering: 
11 | 	$(MAKE) -C clustering/
12 | 
13 | %-parallel.so: %.c
14 | 	${CC} ${CFLAGS} -fopenmp -DPARALLEL=1 $< -o $@ -shared
15 | 
16 | %.so: %.c
17 | 	${CC} ${CFLAGS} $< -o $@ -shared
18 | clean: 
19 | 	rm *.so
20 | 
21 | .PHONY: all clean clustering
22 | 
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Johannes Buchner
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10 | 


--------------------------------------------------------------------------------
/plotmuseposterior.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import json
 3 | import numpy
 4 | from numpy import log, log10, arctan, pi, exp
 5 | import sys
 6 | import matplotlib.pyplot as plt
 7 | import h5py
 8 | import scipy.stats
 9 | import corner
10 | 
11 | filename = sys.argv[1]
12 | with h5py.File(filename, 'r') as f:
13 | 	logZ = f['logZ'].value
14 | 	for i in range(len(logZ)):
15 | 		print('   %d ...' % i)
16 | 		w = f['w'][:,i] + f['L'][:,i]
17 | 		mask = numpy.isfinite(w)
18 | 		if mask.sum() < 4000:
19 | 			continue
20 | 		jparent = numpy.where(mask)[0]
21 | 		w = w[jparent]
22 | 		#print w, w.min(), w.max()
23 | 		w = numpy.exp(w - w.max())
24 | 		w = w / w.sum()
25 | 		j = numpy.random.choice(jparent, size=100000, p=w)
26 | 		
27 | 		O = numpy.log10(f['x'][:,i,0][j])
28 | 		Z = f['x'][:,i,1][j]
29 | 		SFtau = f['x'][:,i,2][j]
30 | 		SFage = numpy.log10(f['x'][:,i,3][j])
31 | 		EBV = f['x'][:,i,4][j]
32 | 		print(w.shape, O.shape, Z.shape, SFtau.shape, SFage.shape, EBV.shape)
33 | 		data = numpy.transpose([O, Z, SFtau, SFage, EBV])
34 | 		
35 | 		# make marginal plots
36 | 		
37 | 		figure = corner.corner(data, 
38 | 			labels=[r"Continuum", r"logZ", r"SFtau", r"SFage", r'EBV'],
39 |                 	quantiles=[0.16, 0.5, 0.84],
40 |                 	show_titles=True, title_kwargs={"fontsize": 12})
41 | 		figure.savefig('museposterior_%d.pdf' % (i+1), bbox_inches='tight')
42 | 		plt.close()
43 | 
44 | 


--------------------------------------------------------------------------------
/plotscaling.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import json
 3 | import numpy
 4 | from numpy import log
 5 | import sys
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | xx = []
 9 | yy = []
10 | 
11 | for filename in sys.argv[1:]:
12 | 	data = json.load(open(filename))
13 | 	if 'ndata' in data:
14 | 		x = data['ndata']
15 | 	else:
16 | 		x = int(filename.split('.')[0].split('_')[-1])
17 | 	#y = json.load(open(filename))['ndraws']
18 | 	#if 'duration' not in data:
19 | 	#	continue
20 | 	#y = data['duration']
21 | 	y = data['ndraws']
22 | 	xx.append(x)
23 | 	yy.append(y)
24 | 
25 | i = numpy.argsort(xx)
26 | xx = numpy.array(xx)[i]
27 | yy = numpy.array(yy)[i]
28 | 
29 | plt.figure(figsize=(5,5))
30 | plt.plot(xx, xx * max(yy/xx), '-', label='linear cost', color='k')
31 | plt.plot(xx, numpy.sqrt(xx) * numpy.nanmax(yy / numpy.sqrt(xx)), ':', label='sqrt cost', color='gray')
32 | #plt.plot(xx, xx**0.333 * numpy.nanmax(yy / xx**0.333), '--', label='cubic root cost')
33 | #plt.plot(xx, log(xx) * numpy.nanmax(yy / log(xx)), '-.', label='log cost')
34 | plt.ylabel('Model Evaluations')
35 | plt.xlabel('Data Sets')
36 | plt.yscale('log')
37 | plt.xscale('log')
38 | #plt.xlim(0.9, 10000)
39 | plt.xlim(0.8, max(xx)*1.5)
40 | plt.plot(xx, yy, 'o ', label='our algorithm', color='r')
41 | plt.legend(loc='upper left', numpoints=1, prop=dict(size=10))
42 | plt.savefig('plotscaling.pdf', bbox_inches='tight')
43 | plt.close()
44 | 
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/plotevidences.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import json
 3 | import numpy
 4 | from numpy import log, log10
 5 | import sys
 6 | import matplotlib.pyplot as plt
 7 | import h5py
 8 | import scipy.stats
 9 | 
10 | xx = []
11 | yy = []
12 | 
13 | filename_in = sys.argv[1]
14 | filename = sys.argv[2]
15 | plt.figure(figsize=(6,4))
16 | f = h5py.File(filename_in, 'r')
17 | logZ0 = numpy.sum(-0.5 * (f['y'].value/0.01)**2, axis=0)
18 | f = h5py.File(filename, 'r')
19 | logZ1 = f['logZ'].value
20 | B = numpy.log10(numpy.exp(logZ1 - logZ0))
21 | B[B > 4] = 4
22 | bins = numpy.linspace(B.min(), 10, 40)
23 | plt.hist(B, bins=bins, color='k', histtype='step', normed=True)
24 | 
25 | filename_in = sys.argv[3]
26 | filename = sys.argv[4]
27 | f = h5py.File(filename_in, 'r')
28 | logZ0 = numpy.sum(-0.5 * (f['y'].value/0.01)**2, axis=0)
29 | f = h5py.File(filename, 'r')
30 | logZ1 = f['logZ'].value
31 | B = numpy.log10(numpy.exp(logZ1 - logZ0))
32 | Blim = sorted(B)[int(len(B)*0.999)]
33 | Blim = B.max()
34 | print(10**Blim)
35 | bins = numpy.linspace(-5, 5, 100)
36 | plt.hist(B, bins=bins, color='r', histtype='step', normed=True)
37 | x = list(range(-1, 5))
38 | plt.vlines(Blim, 0, 4, color='green', linestyles=[':'])
39 | plt.ylim(0, 4)
40 | plt.yticks([0, 1, 2, 3, 4])
41 | y = ['${10}^{%d}$' % xi for xi in x]
42 | plt.xticks(x, y)
43 | plt.xlim(-2, 4.5)
44 | plt.xlabel('Bayes factor B')
45 | plt.ylabel('Frequency')
46 | plt.savefig('plotevidences.pdf', bbox_inches='tight')
47 | plt.close()
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/checkoutput.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import h5py
 3 | import sys
 4 | import numpy
 5 | from numpy import log, log10, exp, pi
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | for filename in sys.argv[1:]:
 9 | 	with h5py.File(filename) as f:
10 | 		print(filename)
11 | 		logZ = f['logZ'].value
12 | 		logZerr = f['logZerr'].value
13 | 		L = f['L'].value
14 | 		
15 | 		if len(logZ.shape) > 0:
16 | 			logZ = logZ[0]
17 | 			logZerr = logZerr[0]
18 | 			L = L[:,0]
19 | 			#print f['x'][-1,0]
20 | 		else:
21 | 			#print f['x'][-1]
22 | 			pass
23 | 		ndraws = f['ndraws'].value
24 | 		print('logZ = %.1f +- %.1f' % (logZ, logZerr))
25 | 		print('ndraws:', ndraws)
26 | 		#plt.plot(L)
27 | 		ndata = f['w'].shape[1]
28 | 		for d in range(ndata):
29 | 			w = f['w'][:,d]
30 | 			w = exp(w - w.max())
31 | 			w.sort()
32 | 			w /= w.sum()
33 | 			i = numpy.random.choice(numpy.arange(len(w)), size=1000, replace=True, p=w)
34 | 			A, mu, logsigma = f['x'][:,d,:].transpose()
35 | 			print(numpy.isfinite(A).all(), A[~numpy.isfinite(A)])
36 | 			A = log10(A[i])
37 | 			#A = A[i]
38 | 			mu = mu[i]
39 | 			logsigma = logsigma[i]
40 | 			print('A', A.mean(), A.std())
41 | 			print('mu', mu.mean(), mu.std())
42 | 			print('logsigma', logsigma.mean(), logsigma.std())
43 | 			plt.subplot(3, 1, 1)
44 | 			plt.plot(A, mu, 'x ')
45 | 			plt.xlabel('A')
46 | 			plt.ylabel('mu')
47 | 			plt.subplot(3, 1, 2)
48 | 			plt.plot(logsigma, mu, 'x ')
49 | 			plt.xlabel('logsigma')
50 | 			plt.ylabel('mu')
51 | 			plt.subplot(3, 1, 3)
52 | 			L = f['L'][:,d]
53 | 			L = L[numpy.isfinite(L)]
54 | 			plt.plot(L, '-')
55 | 		plt.show()
56 | 		print(f['w'].shape, f['x'].shape)
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/TODO.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | TODO
 3 | ============
 4 | 
 5 | Non-performance
 6 | -----------------
 7 | 
 8 | * Change prints to logging
 9 | 
10 | Performance (single-threaded)
11 | ------------------------------
12 | 
13 | Here we discuss wall-clock time, not number of model evaluations.
14 | If the model is slow enough, there is no issue.
15 | 
16 | Currently, the execution speed is limited by two functions:
17 | 
18 | 1. Building the RadFriends region draw_constrained -> maxdistance
19 | 
20 | maxdistance could be optimized by calling it less often. This is
21 | what sample.CachedConstrainer tries to do. The checks there could be more 
22 | generous. -> Done, but maybe more optimisation possible?
23 | 
24 | One could also increase the rebuild_every parameters
25 | 
26 | One could modify MetricLearningFriendsConstrainer to rebuild not every n calls, 
27 | but every n likelihood evaluations. This would improve performance when drawing
28 | is already quite efficient. See nestle, which does this.
29 | -> Done!
30 | 
31 | 2. Building the graph to find independent data sets, multi_nested_sampler.generate_subsets_graph
32 | 
33 | igraph could be replaced with graphtool, which supports parallelisation.
34 | 
35 | One could further explore when to use 
36 | generate_subsets_graph vs generate_subsets_nograph (controlled by use_graph)
37 | 
38 | 
39 | Performance (parallelisation)
40 | ------------------------------
41 | 
42 | * The subsets could be sampled in parallel.
43 | 
44 | * The entire framework could be set up in a MapReduce/MPI way, with the 
45 |   MetricLearningFriendsConstrainer proposing (multiple) points,
46 |   passing to multiple machines for evaluating the model,
47 |   then using MapReduce to evaluate the likelihood over the Big Data set,
48 |   and returning this to MetricLearningFriendsConstrainer.
49 |   See MultiNest, which already parallelises the likelihood evaluations.
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | =========================================================================
 2 | Big Data vs. complex physical models - a scalable inference algorithm
 3 | =========================================================================
 4 | 
 5 | A algorithm for fitting models against many data sets, giving parameter probability distributions.
 6 | The key is that model evaluations are efficiently re-used between data sets,
 7 | making the algorithm scale sub-linearly.
 8 | 
 9 | See paper for details: https://arxiv.org/abs/1707.04476
10 | 
11 | How to run
12 | ============
13 | 
14 | You need to install
15 | 
16 | * python-igraph
17 | * numpy, scipy
18 | * h5py
19 | * progressbar
20 | * gcc
21 | 
22 | Then run::
23 | 
24 | 	$ # build
25 | 	$ make
26 | 	$ # simulate data set
27 | 	$ python gensimple_horns.py 10000
28 | 	$ # analyse
29 | 	$ OMP_NUM_THREADS=4 python sample.py data_widths_10000.hdf5 100
30 | 	$ # simulate no-signal data set
31 | 	$ python gennothing.py 10000 # simulate no-signal data set
32 | 	$ # analyse
33 | 	$ OMP_NUM_THREADS=4 python sample.py data_nothing_10000.hdf5 10000
34 | 
35 | See paper draft for details.
36 | 
37 | Improving Performance
38 | =======================
39 | 
40 | See TODO.
41 | 
42 | Implementation notes and Code organisation
43 | ============================================
44 | 
45 | * sample.py sets up everything
46 | * Set your problem definition (parameters, model, likelihood) in sample.py
47 | * Integrator: multi_nested_integrator.py . Calls sampler repeatedly.
48 | * Joint Sampler: multi_nested_sampler.py . This deals with managing the graph and the queues and which live points to use for a new draw. Calls draw_constrained
49 | * The queues (paper) are called shelves in the code.
50 | * RadFriends: hiermetriclearn.py: Suggests new samples from live points and filters with likelihood function to return a higher point.
51 | * clustering/: Fast C implementations for checking if a point is in the neighbourhood and computing safe distances.
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/plotposterior.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import json
 3 | import numpy
 4 | from numpy import log, log10
 5 | import sys
 6 | import matplotlib.pyplot as plt
 7 | import h5py
 8 | import scipy.stats
 9 | 
10 | xx = []
11 | yy = []
12 | 
13 | filename = sys.argv[1]
14 | colors = ['yellow', 'pink', 'cyan', 'magenta']
15 | cmap = plt.cm.gray
16 | zs = []
17 | plt.figure(figsize=(6,4))
18 | with h5py.File(filename, 'r') as f:
19 | 	logZ = f['logZ'].value
20 | 	for i in range(len(logZ)):
21 | 		w = f['w'][:,i] + f['L'][:,i]
22 | 		mask = numpy.isfinite(w)
23 | 		jparent = numpy.where(mask)[0]
24 | 		w = w[jparent]
25 | 		#print w, w.min(), w.max()
26 | 		w = numpy.exp(w - w.max())
27 | 		w = w / w.sum()
28 | 		j = numpy.random.choice(jparent, size=1000, p=w)
29 | 		mu = f['x'][:,i,1][j]
30 | 		if mu.std() < 50:
31 | 			zs.append(mu.mean() / 440 - 1)
32 | 		#if mu.std() > 40:
33 | 		#	print 'skipping unconstrained: %.1f' % mu.std()
34 | 		#	continue
35 | 		#A = log10(f['x'][:,i,0][j])
36 | 		A = f['x'][:,i,0][j] * 100
37 | 		#if i < 4:
38 | 		#	plt.plot(mu[:100], A[:100], '. ', color='r', alpha=0.2)
39 | 		if i < 4:
40 | 			color = colors[i]
41 | 		else:
42 | 			color = cmap(0.8 * min(50, mu.std())/50.)
43 | 		plt.errorbar(x=numpy.mean(mu), xerr=mu.std(), 
44 | 			y=A.mean(), yerr=A.std(),
45 | 			capsize=0, color=color,
46 | 			elinewidth=4 if i < 4 else 1)
47 | plt.xlabel('Wavelength [nm]')
48 | plt.ylabel('Line amplitude')
49 | plt.xlim(400, 800)
50 | plt.ylim(1, 20)
51 | plt.yticks([1,2,10], [1,2,10])
52 | plt.yscale('log')
53 | plt.savefig('plotposterior.pdf', bbox_inches='tight')
54 | plt.close()
55 | 
56 | plt.figure(figsize=(5,1.5))
57 | plt.hist(zs, bins=10, histtype='step', label='Well-constrained lines', normed=True)
58 | alpha, beta, scale = 2., 7., 1
59 | x = numpy.linspace(0, 2, 1000)
60 | plt.plot(x, scipy.stats.beta(alpha, beta).pdf(x), '-', color='k', label='Input redshift distribution')
61 | plt.ylabel('Frequency')
62 | plt.xlabel('Redshift')
63 | plt.xlim(0, 1)
64 | plt.savefig('plotposteriorz.pdf', bbox_inches='tight')
65 | plt.close()
66 | 
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/adaptive_progress.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import progressbar
 3 | 
 4 | """
 5 | From 2.3-dev of progressbar, not in release yet.
 6 | """
 7 | 
 8 | class AdaptiveETA(progressbar.Timer):
 9 |     """Widget which attempts to estimate the time of arrival.
10 | 
11 |     Uses a weighted average of two estimates:
12 |       1) ETA based on the total progress and time elapsed so far
13 |       2) ETA based on the progress as per tha last 10 update reports
14 | 
15 |     The weight depends on the current progress so that to begin with the
16 |     total progress is used and at the end only the most recent progress is
17 |     used.
18 |     """
19 | 
20 |     TIME_SENSITIVE = True
21 |     NUM_SAMPLES = 10
22 | 
23 |     def _update_samples(self, currval, elapsed):
24 |         sample = (currval, elapsed)
25 |         if not hasattr(self, 'samples'):
26 |             self.samples = [sample] * (self.NUM_SAMPLES + 1)
27 |         else:
28 |             self.samples.append(sample)
29 |         return self.samples.pop(0)
30 | 
31 |     def _eta(self, maxval, currval, elapsed):
32 |         return elapsed * maxval / float(currval) - elapsed
33 | 
34 |     def update(self, pbar):
35 |         """Updates the widget to show the ETA or total time when finished."""
36 |         if pbar.currval == 0:
37 |             return 'ETA:  --:--:--'
38 |         elif pbar.finished:
39 |             return 'Time: %s' % self.format_time(pbar.seconds_elapsed)
40 |         else:
41 |             elapsed = pbar.seconds_elapsed
42 |             currval1, elapsed1 = self._update_samples(pbar.currval, elapsed)
43 |             eta = self._eta(pbar.maxval, pbar.currval, elapsed)
44 |             if pbar.currval > currval1:
45 |                 etasamp = self._eta(pbar.maxval - currval1,
46 |                                     pbar.currval - currval1,
47 |                                     elapsed - elapsed1)
48 |                 weight = (pbar.currval / float(pbar.maxval)) ** 0.5
49 |                 eta = (1 - weight) * eta + weight * etasamp
50 |             return 'ETA:  %s' % self.format_time(eta)
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/pres/plotjointcontour.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | CX = [2, 2.2]
 6 | CSX = [0.5, 0.5] 
 7 | CSY = [0.2, 0.2] 
 8 | CY = [1.1, 1.2]
 9 | 
10 | def likelihood(x, y):
11 | 	l = 0
12 | 	for cx, cy, csx, csy in zip(CX, CY, CSX, CSY):
13 | 		l = -0.5 * (((cx - x)/csx)**2 + ((cy - y)/csy)**2)
14 | 		yield l
15 | 
16 | x = numpy.linspace(-2.5, 6.5, 100)
17 | y = numpy.linspace(-2.5, 6.5, 100)
18 | X, Y = numpy.meshgrid(x, y)
19 | XY = numpy.array(numpy.transpose([X.flatten(), Y.flatten()]), order='C')
20 | L1, L2 = likelihood(X, Y)
21 | Lsorted = L1[30:-30,30:-30].flatten()
22 | Lsorted.sort()
23 | levels = Lsorted[::Lsorted.size/7-1].tolist() # + [L.max()]
24 | levels = levels[2:]
25 | #levels = L.max() - numpy.arange(5) * 4 - 2
26 | plt.figure(figsize=(6, 3), frameon=False)
27 | plt.axis('off')
28 | plt.contour(X, Y, L1, levels)
29 | plt.contour(X, Y, L2, levels)
30 | plt.savefig('plotjointcontour.png', bbox_inches='tight')
31 | plt.savefig('plotjointcontour.pdf', bbox_inches='tight')
32 | plt.close()
33 | 
34 | numpy.random.seed(1)
35 | N = 10000
36 | x = numpy.random.uniform(-2, 6, size=N)
37 | y = numpy.random.uniform(-2, 6, size=N)
38 | l1, l2 = likelihood(x, y)
39 | Nlive = 100
40 | for i in range(len(levels)):
41 | 	plt.figure(figsize=(6, 2.2), frameon=False)
42 | 	plt.axis('off')
43 | 	#plt.text(-2, 4, 'Iteration %d' % (i*100))
44 | 	#plt.text(-2, 4, '(%d)' % (i+1))
45 | 	mask1 = l1 > levels[i]
46 | 	mask2 = l2 > levels[i]
47 | 	maskboth = numpy.logical_and(mask1, mask2)
48 | 	maskone = numpy.logical_or(mask1, mask2)
49 | 	N1 = 0
50 | 	N2 = 0
51 | 	for j in range(N):
52 | 		if mask1[j] and mask2[j]: # joint
53 | 			plt.plot(x[j], y[j], '.', color='k')
54 | 			N1 += 1
55 | 			N2 += 1
56 | 		elif mask1[j] and N1 < Nlive: 
57 | 			plt.plot(x[j], y[j], 'x', color='cyan')
58 | 			N1 += 1
59 | 		elif mask2[j] and N2 < Nlive: 
60 | 			plt.plot(x[j], y[j], '+', color='magenta')
61 | 			N2 += 1
62 | 		else:
63 | 			pass
64 | 		if N1 >= Nlive and N2 >= Nlive:
65 | 			break
66 | 	plt.contour(X, Y, L1, levels[i:i+1], colors=['cyan'], linestyles=[':'])
67 | 	plt.contour(X, Y, L2, levels[i:i+1], colors=['magenta'], linestyles=[':'])
68 | 	plt.ylim(-2.5, 6.2)
69 | 	plt.xlim(-3, 7)
70 | 	plt.savefig('plotjointcontour_%d.png' % (i+1), bbox_inches='tight')
71 | 	plt.savefig('plotjointcontour_%d.pdf' % (i+1), bbox_inches='tight')
72 | 	plt.close()
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/gensimple_horns.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | import h5py
 5 | from numpy import exp, pi, arctan
 6 | import sys
 7 | 
 8 | def gauss(x, A, mu, sig):
 9 | 	xT = x.reshape((1,-1))
10 | 	AT = A.reshape((-1,1))
11 | 	muT = mu.reshape((-1,1))
12 | 	sigT = sig.reshape((-1,1))
13 | 	return AT * exp(-0.5 * ((muT - xT)/sigT)**2)
14 | 
15 | x = numpy.linspace(400, 800, 200)
16 | 
17 | N = 40
18 | N = int(sys.argv[1])
19 | numpy.random.seed(N)
20 | z = arctan(numpy.random.uniform(-pi, pi, size=N)) * 0.1
21 | rest_wave = 656
22 | print('generating parameters ...')
23 | width_narrow = 5.0 * numpy.ones(N)
24 | mean_narrow = rest_wave * (1 + z)
25 | width_narrow = width_narrow
26 | noise_level = 0.01
27 | signal_level = 0.02 / numpy.random.power(3, size=N)
28 | height_narrow = signal_level
29 | 
30 | print('generating signal ...')
31 | ym = gauss(A=height_narrow, mu=mean_narrow, x=x, sig=width_narrow)
32 | ym = numpy.transpose(ym)
33 | print(ym.shape)
34 | 
35 | # add noise
36 | print('adding noise...')
37 | y = ym.copy()
38 | for i in range(N):
39 | 	y[:,i] += numpy.random.normal(0, noise_level, size=len(x))
40 | 
41 | print('plotting ...')
42 | #for i in range(min(N, 20)):
43 | #	#plt.plot(x, y[:,i], '.-')
44 | #	plt.plot(x, y[:,i], '-')
45 | #plt.savefig('gen_widths.pdf', bbox_inches='tight')
46 | #plt.close()
47 | colors = ['yellow', 'pink', 'cyan', 'magenta']
48 | colors = ['magenta', 'cyan', 'pink', 'yellow']
49 | for i in range(min(N, 4)):
50 | 	#plt.plot(x, y[:,i], '.-')
51 | 	plt.plot(rest_wave * (1 + z[i]), 1.1 * y[:,i].max() / noise_level, 'v', color=colors[i], ms=12, mew=0.5, mec='k')
52 | 	#plt.plot(rest_wave * (1 + z[i]), 4, 'v', color=colors[i], ms=12)
53 | 	plt.plot(x, y[:,i] / noise_level, '-', color=colors[i], lw=1)
54 | plt.ylabel('Detector signal')
55 | plt.xlabel('Wavelength [nm]')
56 | plt.savefig('genhorns.pdf', bbox_inches='tight')
57 | plt.close()
58 | 
59 | 
60 | #print x.shape, y.shape, z.shape
61 | with h5py.File('data_widths_%s.hdf5' % sys.argv[1], 'w') as f:
62 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
63 | 	f.create_dataset('y', data=y, compression='gzip', shuffle=True)
64 | 	f.create_dataset('z', data=z, compression='gzip', shuffle=True)
65 | 	f.create_dataset('mean_narrow', data=mean_narrow, compression='gzip', shuffle=True)
66 | 	f.create_dataset('width_narrow', data=width_narrow, compression='gzip', shuffle=True)
67 | 	f.create_dataset('height_narrow', data=height_narrow, compression='gzip', shuffle=True)
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/gensimple_bright.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | import h5py
 5 | from numpy import exp
 6 | import sys
 7 | 
 8 | def gauss(x, z, A, mu, sig):
 9 | 	xT = x.reshape((1,-1))
10 | 	zT = z.reshape((-1,1))
11 | 	AT = A.reshape((-1,1))
12 | 	muT = mu.reshape((-1,1))
13 | 	sigT = sig.reshape((-1,1))
14 | 	return AT * exp(-0.5 * ((muT - xT / (1. + zT))/sigT)**2)
15 | 
16 | x = numpy.linspace(400, 800, 200)
17 | 
18 | N = 40
19 | N = int(sys.argv[1])
20 | numpy.random.seed(N)
21 | z = numpy.zeros(N) + 0.01
22 | rest_wave = 440
23 | print('generating parameters ...')
24 | # in km/s
25 | width_broad = 4000 * rest_wave / 300000 * numpy.ones(N)
26 | width_narrow = 400 * rest_wave / 300000 * numpy.ones(N)
27 | # convert to nm
28 | mean_broad  = rest_wave * numpy.ones(N)
29 | mean_narrow = rest_wave * numpy.ones(N)
30 | width_broad = width_broad
31 | width_narrow = width_narrow
32 | noise_level = 0.01
33 | #signal_level = numpy.random.exponential(size=N) * 0.4
34 | signal_level = numpy.ones(N) * 0.2
35 | #signal_level = numpy.random.uniform(size=N) * 0.5
36 | #is_type1 = numpy.random.uniform(size=N) < 0.5
37 | height_broad  = 10**-1 * signal_level
38 | height_narrow = signal_level
39 | 
40 | #X = numpy.array([x])
41 | 
42 | print('generating signal ...')
43 | ym =  gauss(A=height_broad, mu=mean_broad, x=x, z=z, sig=width_broad)
44 | ym += gauss(A=height_narrow, mu=mean_narrow, x=x, z=z, sig=width_narrow)
45 | ym = numpy.transpose(ym)
46 | print(ym.shape)
47 | 
48 | # add noise
49 | print('adding noise...')
50 | y = ym.copy()
51 | for i in range(N):
52 | 	y[:,i] += numpy.random.normal(0, noise_level, size=len(x))
53 | 
54 | print('plotting ...')
55 | for i in range(min(N, 20)):
56 | 	#plt.plot(x, y[:,i], '.-')
57 | 	plt.plot(x, y[:,i], '-')
58 | plt.savefig('gen_bright.pdf', bbox_inches='tight')
59 | plt.close()
60 | 
61 | #print x.shape, y.shape, z.shape
62 | with h5py.File('data_bright_%s.hdf5' % sys.argv[1], 'w') as f:
63 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
64 | 	f.create_dataset('y', data=y, compression='gzip', shuffle=True)
65 | 	f.create_dataset('z', data=z, compression='gzip', shuffle=True)
66 | 	f.create_dataset('mean_broad', data=mean_broad, compression='gzip', shuffle=True)
67 | 	f.create_dataset('width_broad', data=width_broad, compression='gzip', shuffle=True)
68 | 	f.create_dataset('height_broad', data=height_broad, compression='gzip', shuffle=True)
69 | 	f.create_dataset('mean_narrow', data=mean_narrow, compression='gzip', shuffle=True)
70 | 	f.create_dataset('width_narrow', data=width_narrow, compression='gzip', shuffle=True)
71 | 	f.create_dataset('height_narrow', data=height_narrow, compression='gzip', shuffle=True)
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/pres/plotcontour.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | #from nested_sampling.clustering.neighbors import find_rdistance, is_within_distance_of, count_within_distance_of, any_within_distance_of
 5 | from nested_sampling.samplers.hiermetriclearn import ClusterResult, RadFriendsRegion
 6 | 
 7 | 
 8 | CX = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4]
 9 | CS = [0.2, 0.2, 0.2, 0.2, 0.15, 0.2, 0.15, 0.2, 0.2] 
10 | CY = [0.2, 0, 0, 0, 0.1, 0.3, 1, 1.4, 2]
11 | CW = [1, 2, 2, 2, 2, 2, 20, 2, 2]
12 | 
13 | CX = numpy.linspace(0, 4, 20)
14 | CY = CX*-0.2 + CX**2*0.3
15 | #plt.plot(x, x*-0.2 + x**2*0.2)
16 | CW = CX * 0 + 2 + 10*CY**2
17 | CW = 1./CW
18 | CW[0] = 0.5
19 | CW[1] = 1
20 | #CW[-5] = 20
21 | CS = CX * 0 + 0.2
22 | #CS[-5] = 0.12
23 | 
24 | 
25 | def likelihood(x, y):
26 | 	l = 0
27 | 	for cx, cy, cw, cs in zip(CX, CY, CW, CS):
28 | 		l += cw * numpy.exp(-0.5 * (((cx - x)/cs)**2 + ((cy - y)/cs)**2))
29 | 	return numpy.log(l)
30 | 
31 | 
32 | x = numpy.linspace(-2.5, 6.5, 100)
33 | y = numpy.linspace(-2.5, 6.5, 100)
34 | X, Y = numpy.meshgrid(x, y)
35 | XY = numpy.array(numpy.transpose([X.flatten(), Y.flatten()]), order='C')
36 | print(XY.dtype)
37 | L = likelihood(X, Y)
38 | Lsorted = L[30:-30,30:-30].flatten()
39 | Lsorted.sort()
40 | levels = Lsorted[::Lsorted.size/7-1].tolist() # + [L.max()]
41 | levels = levels[2:]
42 | #levels = L.max() - numpy.arange(5) * 4 - 2
43 | plt.figure(figsize=(6, 3), frameon=False)
44 | plt.axis('off')
45 | plt.contour(X, Y, L, levels)
46 | plt.savefig('plotcontour.png', bbox_inches='tight')
47 | plt.savefig('plotcontour.pdf', bbox_inches='tight')
48 | plt.close()
49 | 
50 | numpy.random.seed(1)
51 | N = 10000
52 | x = numpy.random.uniform(-2, 6, size=N)
53 | y = numpy.random.uniform(-2, 6, size=N)
54 | l = likelihood(x, y)
55 | Nlive = 100
56 | for i in range(len(levels)):
57 | 	plt.figure(figsize=(6, 2.2), frameon=False)
58 | 	plt.axis('off')
59 | 	plt.text(-2, 4, 'Iteration %d' % (i*100))
60 | 	#plt.text(-2, 4, '(%d)' % (i+1))
61 | 	mask = l > levels[i]
62 | 	xlevel = x[mask][:Nlive]
63 | 	ylevel = y[mask][:Nlive]
64 | 	live_points = numpy.array(numpy.transpose([xlevel, ylevel]), order='C')
65 | 	plt.contour(X, Y, L, levels[i:i+1], colors=['k'], linestyles=[':'])
66 | 	plt.plot(xlevel, ylevel, '.', color='k')
67 | 	# do radfriends with these points
68 | 	region = RadFriendsRegion(live_points)
69 | 	mask = region.are_inside(XY)
70 | 	maskregion = mask.reshape(X.shape)
71 | 	plt.contour(X, Y, maskregion*1., [0.5], colors=['orange'], linestyles=['-'])
72 | 	
73 | 	plt.ylim(-2.5, 6.2)
74 | 	plt.xlim(-3, 7)
75 | 	plt.savefig('plotcontour_%d.png' % (i+1), bbox_inches='tight')
76 | 	plt.savefig('plotcontour_%d.pdf' % (i+1), bbox_inches='tight')
77 | 	plt.close()
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/gen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | import h5py
 5 | from numpy import exp
 6 | 
 7 | def gauss(x, z, A, mu, sig):
 8 | 	xT = x.reshape((1,-1))
 9 | 	zT = z.reshape((-1,1))
10 | 	AT = A.reshape((-1,1))
11 | 	muT = mu.reshape((-1,1))
12 | 	sigT = sig.reshape((-1,1))
13 | 	return AT * exp(-0.5 * ((muT - xT / (1. + zT))/sigT)**2)
14 | 
15 | x = numpy.linspace(400, 800, 200)
16 | 
17 | N = 1000
18 | N = int(sys.argv[1])
19 | numpy.random.seed(1)
20 | z = numpy.random.beta(2, 30, size=N) * 2
21 | #z = numpy.zeros(N) + 0.01
22 | rest_wave = 440
23 | # in km/s
24 | width_broad = 10**numpy.random.normal(3, 0.2, size=N) * rest_wave / 300000
25 | width_narrow = 10**numpy.random.normal(1, 0.2, size=N) * rest_wave / 300000
26 | print(width_narrow.min())
27 | print(width_broad.min())
28 | # convert to nm
29 | mean_broad  = rest_wave * numpy.ones(N)
30 | mean_narrow = rest_wave * numpy.ones(N)
31 | width_broad = width_broad
32 | width_narrow = width_narrow
33 | noise_level = 0.01
34 | signal_level = numpy.random.exponential(size=N) * 10
35 | #signal_level = numpy.ones(N) * 10
36 | is_type1 = numpy.random.uniform(size=N) < 0.5
37 | #is_type1 = numpy.random.uniform(size=N) > 0
38 | height_broad  = numpy.where(is_type1, 10**numpy.random.normal(0, 0.2, size=N), 10**numpy.random.normal(-2, 0.2, size=N)) * signal_level
39 | height_narrow = signal_level
40 | 
41 | #X = numpy.array([x])
42 | 
43 | ym =  gauss(A=height_broad, mu=mean_broad, x=x, z=z, sig=width_broad)
44 | ym += gauss(A=height_narrow, mu=mean_narrow, x=x, z=z, sig=width_narrow)
45 | ym = numpy.transpose(ym)
46 | print(ym.shape)
47 | 
48 | # add noise
49 | print('adding noise')
50 | y = numpy.random.normal(0, noise_level, size=ym.shape) + ym
51 | print('plotting ...')
52 | for i in range(min(N, 20)):
53 | 	#plt.plot(x, y[:,i], '.-')
54 | 	plt.plot(x, y[:,i], '-')
55 | plt.savefig('gen.pdf', bbox_inches='tight')
56 | plt.close()
57 | 
58 | print(x.shape, y.shape, z.shape)
59 | with h5py.File('data.hdf5', 'w') as f:
60 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
61 | 	f.create_dataset('y', data=y, compression='gzip', shuffle=True)
62 | 	f.create_dataset('z', data=z, compression='gzip', shuffle=True)
63 | 	f.create_dataset('mean_broad', data=mean_broad, compression='gzip', shuffle=True)
64 | 	f.create_dataset('mean_narrow', data=mean_narrow, compression='gzip', shuffle=True)
65 | 	f.create_dataset('width_broad', data=width_broad, compression='gzip', shuffle=True)
66 | 	f.create_dataset('height_broad', data=height_broad, compression='gzip', shuffle=True)
67 | 	f.create_dataset('width_broad', data=width_broad, compression='gzip', shuffle=True)
68 | 	f.create_dataset('height_broad', data=height_broad, compression='gzip', shuffle=True)
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/cmuselike.c:
--------------------------------------------------------------------------------
 1 | /***
 2 | 
 3 | Likelihood implementation in C
 4 | --------------------------------
 5 | 
 6 | Copyright (c) 2017 Johannes Buchner
 7 | 
 8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15 | 
16 | ***/
17 | #include<stdbool.h>
18 | #include<stdio.h>
19 | #include<stdlib.h>
20 | #include<assert.h>
21 | #include<math.h>
22 | #ifdef PARALLEL
23 | #include<omp.h>
24 | #endif
25 | 
26 | #define IFVERBOSE if(0)
27 | #define IFDEBUG if(0)
28 | #define adouble double
29 | #define bdouble double
30 | #define sqr(x) (pow(x,2))
31 | 
32 | // Parallelisation does not work at the moment, you are welcome to fix it
33 | // ret = lib.like(yd, vd, ypred, data_mask, ndata, nspec, Lout)
34 | int like(
35 | 	const void * yyp, const void * vvp, const void * ypredp, const void * data_maskp, 
36 | 	const int ndata, const int nx,
37 | 	void * Loutp
38 | ) {
39 | 	const adouble * yy = (const adouble*) yyp;
40 | 	const adouble * vv = (const adouble*) vvp;
41 | 	const adouble * ypred = (const adouble*) ypredp;
42 | 	const bool * data_mask = (const bool*) data_maskp;
43 | 	adouble * Lout = (adouble*) Loutp;
44 | 	
45 | 	#ifdef PARALLEL
46 | 	#pragma omp parallel for
47 | 	#endif
48 | 	for (int i = 0; i < ndata; i++) {
49 | 		if (data_mask[i]) {
50 | 			// compute s
51 | 			double s1 = 0.;
52 | 			double s2 = 1e-10;
53 | 			for (int j = 0; j < nx; j++) {
54 | 				s1 += yy[i+j*ndata] * ypred[j] / vv[i+j*ndata];
55 | 				s2 += pow(ypred[j], 2) / vv[i+j*ndata];
56 | 			}
57 | 			double s = s1/s2;
58 | 			double chi = 0.;
59 | 			for (int j = 0; j < nx; j++) {
60 | 				chi += pow(yy[i+j*ndata] - s * ypred[j], 2) / vv[i+j*ndata];
61 | 			}
62 | 			Lout[i] = -0.5 * chi;
63 | 		}
64 | 	}
65 | 	return 0;
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/gensimple.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | import h5py
 5 | from numpy import exp
 6 | import sys
 7 | 
 8 | def gauss(x, z, A, mu, sig):
 9 | 	xT = x.reshape((1,-1))
10 | 	zT = z.reshape((-1,1))
11 | 	AT = A.reshape((-1,1))
12 | 	muT = mu.reshape((-1,1))
13 | 	sigT = sig.reshape((-1,1))
14 | 	return AT * exp(-0.5 * ((muT - xT / (1. + zT))/sigT)**2)
15 | 
16 | x = numpy.linspace(400, 800, 200)
17 | 
18 | N = 40
19 | N = int(sys.argv[1])
20 | numpy.random.seed(N)
21 | alpha, beta, scale = 2., 7., 1
22 | z = numpy.random.beta(alpha, beta, size=N) * scale
23 | #z = numpy.zeros(N) + 0.01
24 | rest_wave = 440
25 | print('generating parameters ...')
26 | # in km/s
27 | width_broad = 4000 * rest_wave / 300000 * numpy.ones(N)
28 | width_narrow = 400 * rest_wave / 300000 * numpy.ones(N)
29 | # convert to nm
30 | mean_broad  = rest_wave * numpy.ones(N)
31 | mean_narrow = rest_wave * numpy.ones(N)
32 | width_broad = width_broad
33 | width_narrow = width_narrow
34 | noise_level = 0.01
35 | #signal_level = numpy.random.exponential(size=N) * 0.4
36 | #signal_level = numpy.ones(N) * 0.04
37 | signal_level = numpy.random.normal(0.5, 0.5, size=10*N)
38 | signal_level = signal_level[signal_level>0.2][:N]
39 | #signal_level = numpy.random.uniform(size=N) * 0.5
40 | #is_type1 = numpy.random.uniform(size=N) < 0.5
41 | height_broad  = 10**-1 * signal_level
42 | height_narrow = signal_level
43 | 
44 | #X = numpy.array([x])
45 | 
46 | print('generating signal ...')
47 | ym =  gauss(A=height_broad, mu=mean_broad, x=x, z=z, sig=width_broad)
48 | ym += gauss(A=height_narrow, mu=mean_narrow, x=x, z=z, sig=width_narrow)
49 | ym = numpy.transpose(ym)
50 | print(ym.shape)
51 | 
52 | # add noise
53 | print('adding noise...')
54 | y = ym.copy()
55 | for i in range(N):
56 | 	y[:,i] += numpy.random.normal(0, noise_level, size=len(x))
57 | 
58 | print('plotting ...')
59 | for i in range(min(N, 20)):
60 | 	#plt.plot(x, y[:,i], '.-')
61 | 	plt.plot(x, y[:,i], '-')
62 | plt.savefig('gen.pdf', bbox_inches='tight')
63 | plt.close()
64 | 
65 | #print x.shape, y.shape, z.shape
66 | with h5py.File('data_%s.hdf5' % sys.argv[1], 'w') as f:
67 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
68 | 	f.create_dataset('y', data=y, compression='gzip', shuffle=True)
69 | 	f.create_dataset('z', data=z, compression='gzip', shuffle=True)
70 | 	f.create_dataset('mean_broad', data=mean_broad, compression='gzip', shuffle=True)
71 | 	f.create_dataset('width_broad', data=width_broad, compression='gzip', shuffle=True)
72 | 	f.create_dataset('height_broad', data=height_broad, compression='gzip', shuffle=True)
73 | 	f.create_dataset('mean_narrow', data=mean_narrow, compression='gzip', shuffle=True)
74 | 	f.create_dataset('width_narrow', data=width_narrow, compression='gzip', shuffle=True)
75 | 	f.create_dataset('height_narrow', data=height_narrow, compression='gzip', shuffle=True)
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/gen_realistic.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import sys
 3 | import numpy
 4 | import matplotlib.pyplot as plt
 5 | import h5py
 6 | from numpy import exp
 7 | 
 8 | def gauss(x, z, A, mu, sig):
 9 | 	xT = x.reshape((1,-1))
10 | 	zT = z.reshape((-1,1))
11 | 	AT = A.reshape((-1,1))
12 | 	muT = mu.reshape((-1,1))
13 | 	sigT = sig.reshape((-1,1))
14 | 	return AT * exp(-0.5 * ((muT - xT / (1. + zT))/sigT)**2)
15 | 
16 | x = numpy.linspace(400, 800, 1000)
17 | 
18 | N = 10000
19 | numpy.random.seed(1)
20 | z = numpy.random.beta(2, 30, size=N) * 2
21 | #z = numpy.zeros(N) + 0.01
22 | rest_wave = 440
23 | # in km/s
24 | width_broad = 10**numpy.random.normal(3, 0.2, size=N) * rest_wave / 300000
25 | width_narrow = 10**numpy.random.normal(1, 0.2, size=N) * rest_wave / 300000
26 | # convert to nm
27 | mean_broad  = rest_wave * numpy.ones(N)
28 | mean_narrow = rest_wave * numpy.ones(N)
29 | width_broad = width_broad
30 | width_narrow = width_narrow
31 | noise_level = 0.01
32 | #signal_level = numpy.random.exponential(size=N) * 10
33 | signal_level = 1./(numpy.random.power(1, size=N)*100 + 2) # bright
34 | #signal_level = 1./(numpy.random.power(1, size=N)*200 + 20) # faint, up to SNR of 5
35 | #signal_level = numpy.ones(N) * 10
36 | is_type1 = numpy.random.uniform(size=N) < 0.5
37 | #is_type1 = numpy.random.uniform(size=N) > 0
38 | height_broad  = numpy.where(is_type1, 10**numpy.random.normal(0, 0.2, size=N), 10**numpy.random.normal(-2, 0.2, size=N)) * signal_level
39 | height_narrow = signal_level
40 | 
41 | #X = numpy.array([x])
42 | 
43 | ym =  gauss(A=height_broad, mu=mean_broad, x=x, z=z, sig=width_broad)
44 | ym += gauss(A=height_narrow, mu=mean_narrow, x=x, z=z, sig=width_narrow)
45 | ym = numpy.transpose(ym)
46 | print(ym.shape)
47 | 
48 | # add noise
49 | print('adding noise')
50 | y = numpy.random.normal(0, noise_level, size=ym.shape) + ym
51 | print('truncating ...')
52 | N = int(sys.argv[1])
53 | y = y[:,:N]
54 | print('plotting ...')
55 | for i in range(min(N, 20)):
56 | 	#plt.plot(x, y[:,i], '.-')
57 | 	plt.plot(x, y[:,i], '-')
58 | plt.savefig('gen_realistic.pdf', bbox_inches='tight')
59 | plt.close()
60 | 
61 | print(x.shape, y.shape, z.shape)
62 | with h5py.File('data_realistic_%d.hdf5' % N, 'w') as f:
63 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
64 | 	f.create_dataset('y', data=y, compression='gzip', shuffle=True)
65 | 	f.create_dataset('z', data=z, compression='gzip', shuffle=True)
66 | 	f.create_dataset('mean_broad', data=mean_broad, compression='gzip', shuffle=True)
67 | 	f.create_dataset('width_broad', data=width_broad, compression='gzip', shuffle=True)
68 | 	f.create_dataset('height_broad', data=height_broad, compression='gzip', shuffle=True)
69 | 	f.create_dataset('width_narrow', data=width_narrow, compression='gzip', shuffle=True)
70 | 	f.create_dataset('height_narrow', data=height_narrow, compression='gzip', shuffle=True)
71 | 	f.create_dataset('mean_narrow', data=mean_narrow, compression='gzip', shuffle=True)
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/gensimple_faint.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | import numpy
 3 | import matplotlib.pyplot as plt
 4 | import h5py
 5 | from numpy import exp
 6 | import sys
 7 | 
 8 | def gauss(x, z, A, mu, sig):
 9 | 	xT = x.reshape((1,-1))
10 | 	zT = z.reshape((-1,1))
11 | 	AT = A.reshape((-1,1))
12 | 	muT = mu.reshape((-1,1))
13 | 	sigT = sig.reshape((-1,1))
14 | 	return AT * exp(-0.5 * ((muT - xT / (1. + zT))/sigT)**2)
15 | 
16 | x = numpy.linspace(400, 800, 200)
17 | 
18 | N = 40
19 | N = int(sys.argv[1])
20 | numpy.random.seed(N)
21 | alpha, beta, scale = 2., 7., 1
22 | z = numpy.random.beta(alpha, beta, size=N) * scale
23 | #z = numpy.zeros(N) + 0.01
24 | rest_wave = 440
25 | print('generating parameters ...')
26 | # in km/s
27 | width_broad = 4000 * rest_wave / 300000 * numpy.ones(N)
28 | width_narrow = 400 * rest_wave / 300000 * numpy.ones(N)
29 | # convert to nm
30 | mean_broad  = rest_wave * numpy.ones(N)
31 | mean_narrow = rest_wave * numpy.ones(N)
32 | width_broad = width_broad
33 | width_narrow = width_narrow
34 | noise_level = 0.01
35 | #signal_level = numpy.random.exponential(size=N) * 0.4
36 | #signal_level = numpy.ones(N) * 0.04
37 | signal_level = numpy.random.normal(0.2, 0.2, size=10*N)
38 | signal_level = signal_level[signal_level>0.1][:N]
39 | #signal_level = numpy.random.uniform(size=N) * 0.5
40 | #is_type1 = numpy.random.uniform(size=N) < 0.5
41 | height_broad  = 10**-1 * signal_level
42 | height_narrow = signal_level
43 | 
44 | #X = numpy.array([x])
45 | 
46 | print('generating signal ...')
47 | ym =  gauss(A=height_broad, mu=mean_broad, x=x, z=z, sig=width_broad)
48 | ym += gauss(A=height_narrow, mu=mean_narrow, x=x, z=z, sig=width_narrow)
49 | ym = numpy.transpose(ym)
50 | print(ym.shape)
51 | 
52 | # add noise
53 | print('adding noise...')
54 | y = ym.copy()
55 | for i in range(N):
56 | 	y[:,i] += numpy.random.normal(0, noise_level, size=len(x))
57 | 
58 | print('plotting ...')
59 | colors = ['yellow', 'pink', 'cyan', 'magenta']
60 | for i in range(min(N, 4)):
61 | 	#plt.plot(x, y[:,i], '.-')
62 | 	plt.plot( rest_wave * (1+z[i]), 0.15 * height_narrow[i] / noise_level, 'v', color=colors[i], ms=12)
63 | 	plt.plot(x, y[:,i] / noise_level, '-', color=colors[i])
64 | plt.ylabel('Detector signal')
65 | plt.xlabel('Wavelength [nm]')
66 | plt.savefig('genfaint.pdf', bbox_inches='tight')
67 | plt.close()
68 | 
69 | #print x.shape, y.shape, z.shape
70 | with h5py.File('data_faint_%s.hdf5' % sys.argv[1], 'w') as f:
71 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
72 | 	f.create_dataset('y', data=y, compression='gzip', shuffle=True)
73 | 	f.create_dataset('z', data=z, compression='gzip', shuffle=True)
74 | 	f.create_dataset('mean_broad', data=mean_broad, compression='gzip', shuffle=True)
75 | 	f.create_dataset('width_broad', data=width_broad, compression='gzip', shuffle=True)
76 | 	f.create_dataset('height_broad', data=height_broad, compression='gzip', shuffle=True)
77 | 	f.create_dataset('mean_narrow', data=mean_narrow, compression='gzip', shuffle=True)
78 | 	f.create_dataset('width_narrow', data=width_narrow, compression='gzip', shuffle=True)
79 | 	f.create_dataset('height_narrow', data=height_narrow, compression='gzip', shuffle=True)
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/clike.c:
--------------------------------------------------------------------------------
 1 | /***
 2 | 
 3 | Likelihood implementation in C
 4 | --------------------------------
 5 | 
 6 | Copyright (c) 2017 Johannes Buchner
 7 | 
 8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15 | 
16 | ***/
17 | #include<stdbool.h>
18 | #include<stdio.h>
19 | #include<stdlib.h>
20 | #include<assert.h>
21 | #include<math.h>
22 | #ifdef PARALLEL
23 | #include<omp.h>
24 | #endif
25 | 
26 | #define IFVERBOSE if(0)
27 | #define IFDEBUG if(0)
28 | #define adouble double
29 | #define bdouble double
30 | #define sqr(x) (pow(x,2))
31 | 
32 | // Parallelisation does not work at the moment, you are welcome to fix it
33 | 
34 | int like(
35 | 	const void * xp, const void * yyp, const int ndata, const int nx,
36 | 	const double A, const double mu, const double sig,
37 | 	const double noise_level,
38 | 	const void * data_maskp, 
39 | 	void * Loutp
40 | ) {
41 | 	const adouble * x = (const adouble*) xp;
42 | 	const adouble * yy = (const adouble*) yyp;
43 | 	const bool * data_mask = (const bool*) data_maskp;
44 | 	adouble * Lout = (adouble*) Loutp;
45 | 	
46 | 	{
47 | 	#ifdef PARALLEL
48 | 	int k = 0;
49 | 	#pragma omp parallel for
50 | 	// this is stupid because it does not actually safe model evaluations,
51 | 	// but at least it should run faster for our testing purposes.
52 | 	for (int i = 0; i < ndata; i++) {
53 | 		if (data_mask[i]) {
54 | 			Lout[k] = 0;
55 | 			for (int j = 0; j < nx; j++) {
56 | 				const double ypred = A * exp(-0.5 * sqr((mu - x[j])/sig));
57 | 				IFVERBOSE printf("y %d %d: %f %f\n", i, j, yy[i + j*ndata], ypred);
58 | 				Lout[k] += sqr((ypred - yy[i + j*ndata]) / noise_level);
59 | 			}
60 | 			k++;
61 | 		}
62 | 	}
63 | 	#else
64 | 	for (int j = 0; j < nx; j++) {
65 | 		const double ypred = A * exp(-0.5 * sqr((mu - x[j])/sig));
66 | 		
67 | 		int k = 0;
68 | 		for (int i = 0; i < ndata; i++) {
69 | 			IFVERBOSE printf("data_mask %d: %d\n", i, data_mask[i]);
70 | 			if (data_mask[i]) {
71 | 				IFVERBOSE printf("y %d %d: %f %f\n", i, j, yy[i + j*ndata], ypred);
72 | 				Lout[k] += sqr((ypred - yy[i + j*ndata]) / noise_level);
73 | 				k++;
74 | 			}
75 | 		}
76 | 	}
77 | 	#endif
78 | 	}
79 | 	IFVERBOSE {
80 | 		int k = 0;
81 | 		for (int i = 0; i < ndata; i++) {
82 | 			if (data_mask[i]) {
83 | 				printf("L %d: %f\n", k, Lout[k]);
84 | 				k++;
85 | 			}
86 | 		}
87 | 	}
88 | 	return 0;
89 | }
90 | 
91 | 


--------------------------------------------------------------------------------
/clustering/sdml.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | """
 3 | 
 4 | Geometry learning algorithms
 5 | -------------------------------
 6 | 
 7 | Copyright (c) 2017 Johannes Buchner
 8 | 
 9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
10 | 
11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 | 
13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 | 
17 | """
18 | 
19 | 
20 | import numpy as np
21 | import numpy
22 | from numpy import exp
23 | import scipy.linalg
24 | 
25 | class IdentityMetric(object):
26 | 	"""
27 | 	Input is output.
28 | 	"""
29 | 	def fit(self, x):
30 | 		pass
31 | 	def transform(self, x):
32 | 		return x
33 | 	def untransform(self, y):
34 | 		return y
35 | 	def __eq__(self, other): 
36 | 		return self.__dict__ == other.__dict__
37 | 
38 | class SimpleScaling(object):
39 | 	"""
40 | 	Whitens by subtracting the mean and scaling by the 
41 | 	standard deviation of each axis.
42 | 	"""
43 | 	def __init__(self, verbose=False):
44 | 		self.verbose = verbose
45 | 
46 | 	def fit(self, X, W=None):
47 | 		self.mean = numpy.mean(X, axis=0)
48 | 		X = X - self.mean
49 | 		self.scale = numpy.std(X, axis=0)
50 | 		if self.verbose: 'Scaling metric:', self.scale
51 | 	def transform(self, x):
52 | 		return (x - self.mean) / self.scale
53 | 	
54 | 	def untransform(self, y):
55 | 		return y * self.scale + self.mean
56 | 
57 | 	def __eq__(self, other): 
58 | 		return self.__dict__ == other.__dict__
59 | 
60 | class TruncatedScaling(object):
61 | 	"""
62 | 	Whitens by subtracting the mean and scaling by the 
63 | 	standard deviation of each axis. The scaling is discretized on 
64 | 	a log axis onto integers.
65 | 	"""
66 | 	def __init__(self, verbose=False):
67 | 		self.verbose = verbose
68 | 	def fit(self, X, W=None):
69 | 		self.mean = numpy.mean(X, axis=0)
70 | 		X = X - self.mean
71 | 		#scale = numpy.max(X, axis=0) - numpy.min(X, axis=0)
72 | 		scale = numpy.std(X, axis=0)
73 | 		scalemax = scale.max() * 1.001
74 | 		scalemin = scale.min()
75 | 		# round onto discrete log scale to avoid random walk
76 | 		logscale = (-numpy.log2(scale / scalemax)).astype(int)
77 | 		self.scale = 2**(logscale.astype(float))
78 | 		#print 'Scaling metric:', self.scale, '(from', scale, ')'
79 | 		if self.verbose: 'Discretized scaling metric:\n', logscale
80 | 	
81 | 	def transform(self, x):
82 | 		return (x - self.mean) / self.scale
83 | 	
84 | 	def untransform(self, y):
85 | 		return y * self.scale + self.mean
86 | 
87 | 	def __eq__(self, other): 
88 | 		return self.__dict__ == other.__dict__
89 | 
90 | 


--------------------------------------------------------------------------------
/elldrawer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Implementation of MultiEllipsoidal sampling via nestle
  5 | 
  6 | Copyright (c) 2017 Johannes Buchner
  7 | 
  8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  9 | 
 10 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 11 | 
 12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 13 | 
 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 15 | 
 16 | 
 17 | 
 18 | """
 19 | 
 20 | import numpy
 21 | from numpy import exp, log, log10, pi
 22 | from nestle import bounding_ellipsoid, bounding_ellipsoids, sample_ellipsoids
 23 | from collections import defaultdict
 24 | 
 25 | class MultiEllipsoidalConstrainer(object):
 26 | 	def __init__(self, rebuild_every = 1000, verbose = False, enlarge=3.):
 27 | 		self.iter = 0
 28 | 		self.ndraws_since_rebuild = 0
 29 | 		self.rebuild_every = int(rebuild_every)
 30 | 		self.enlarge = enlarge
 31 | 		self.verbose = verbose
 32 | 		self.ells = None
 33 | 		self.last_cluster_points = None
 34 | 	
 35 | 	def update(self, points):
 36 | 		# volume is larger than standard Ellipsoid computation
 37 | 		# because we have a superset of various likelihood contours
 38 | 		# increase proportional to number of points
 39 | 		pointvol = exp(-self.iter / self.nlive_points) * (len(points) * 1. / self.nlive_points) / self.nlive_points
 40 | 		self.ells = bounding_ellipsoids(numpy.asarray(points), pointvol=pointvol)
 41 | 		for ell in self.ells:
 42 | 			ell.scale_to_vol(ell.vol * self.enlarge)
 43 | 
 44 | 	def generate(self, ndim):
 45 | 		ntotal = 0
 46 | 		N = 10000
 47 | 		while True:
 48 | 			u = sample_ellipsoids(self.ells, rstate=numpy.random)
 49 | 			if not (numpy.all(u > 0.) and numpy.all(u < 1.)):
 50 | 				continue
 51 | 			yield u, ntotal
 52 | 	
 53 | 	def rebuild(self, u, ndim):
 54 | 		if self.last_cluster_points is not None and \
 55 | 			len(self.last_cluster_points) == len(u) and \
 56 | 			numpy.all(self.last_cluster_points == u):
 57 | 			# do nothing if everything stayed the same
 58 | 			return
 59 | 		
 60 | 		self.update(points=u)
 61 | 		self.last_cluster_points = u
 62 | 		
 63 | 		self.generator = self.generate(ndim)
 64 | 	
 65 | 	def _draw_constrained_prepare(self, Lmins, priortransform, loglikelihood, live_pointsu, ndim, **kwargs):
 66 | 		rebuild = self.ndraws_since_rebuild > self.rebuild_every or self.ells is None
 67 | 		if rebuild:
 68 | 			print('rebuild triggered at call')
 69 | 			self.rebuild(numpy.asarray(live_pointsu), ndim)
 70 | 			self.ndraws_since_rebuild = 0
 71 | 		assert self.generator is not None
 72 | 		return rebuild
 73 | 	
 74 | 	def draw_constrained(self, Lmins, priortransform, loglikelihood, live_pointsu, ndim, iter, nlive_points, **kwargs):
 75 | 		ntoaccept = 0
 76 | 		self.iter = iter
 77 | 		self.nlive_points = nlive_points
 78 | 		#print 'MLFriends trying to replace', Lmins
 79 | 		rebuild = self._draw_constrained_prepare(Lmins, priortransform, loglikelihood, live_pointsu, ndim, **kwargs)
 80 | 		while True:
 81 | 			#print '    starting generator ...'
 82 | 			for u, ntotal in self.generator:
 83 | 				assert (u >= 0).all() and (u <= 1).all(), u
 84 | 				x = priortransform(u)
 85 | 				L = loglikelihood(x)
 86 | 				ntoaccept += 1
 87 | 				self.ndraws_since_rebuild += 1
 88 | 
 89 | 				if numpy.any(L > Lmins):
 90 | 					# yay, we win
 91 | 					#print 'accept after %d tries' % ntoaccept
 92 | 					return u, x, L, ntoaccept
 93 | 				
 94 | 				# if running very inefficient, optimize clustering 
 95 | 				#     if we haven't done so at the start
 96 | 				if not rebuild and self.ndraws_since_rebuild > self.rebuild_every:
 97 | 					rebuild = True
 98 | 					print('Ellipsoid rebuild triggered after %d draws' % self.ndraws_since_rebuild)
 99 | 					self.rebuild(numpy.asarray(live_pointsu), ndim)
100 | 					self.ndraws_since_rebuild = 0
101 | 					break
102 | 				
103 | 


--------------------------------------------------------------------------------
/cachedconstrainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy
  3 | from hiermetriclearn import MetricLearningFriendsConstrainer
  4 | from elldrawer import MultiEllipsoidalConstrainer
  5 | 
  6 | # use this for MLFriends (RadFriends, but with standardized Euclidean metric)
  7 | def generate_fresh_constrainer_mlfriends():
  8 | 	return MetricLearningFriendsConstrainer(
  9 | 		metriclearner = 'truncatedscaling', force_shrink=True,
 10 | 		rebuild_every=1000, metric_rebuild_every=20, 
 11 | 		verbose=False)
 12 | 
 13 | # use this for Ellipsoidal Sampling, like MultiNest
 14 | def generate_fresh_constrainer_multiellipsoidal():
 15 | 	return MultiEllipsoidalConstrainer(rebuild_every=1000, enlarge=3.)
 16 | 
 17 | generate_fresh_constrainer = generate_fresh_constrainer_multiellipsoidal
 18 | 
 19 | class CachedConstrainer(object):
 20 | 	"""
 21 | 	This keeps metric learners if they are used (in the last three iterations).
 22 | 	Otherwise, constructs a fresh one.
 23 | 	"""
 24 | 	def __init__(self, sampler=None):
 25 | 		self.iter = -1
 26 | 		self.prev_prev_prev_generation = {}
 27 | 		self.prev_prev_generation = {}
 28 | 		self.prev_generation = {}
 29 | 		self.curr_generation = {}
 30 | 		self.last_mask = []
 31 | 		self.last_points = []
 32 | 		self.last_realmask = None
 33 | 		self.sampler = sampler
 34 | 	
 35 | 	def get(self, mask, realmask, points, it):
 36 | 		while self.iter < it:
 37 | 			# new generation
 38 | 			self.prev_prev_prev_generation = self.prev_prev_generation
 39 | 			self.prev_prev_generation = self.prev_generation
 40 | 			self.prev_generation = self.curr_generation
 41 | 			self.curr_generation = {}
 42 | 			self.last_mask = []
 43 | 			self.last_realmask = None
 44 | 			self.last_points = []
 45 | 			self.iter += 1
 46 | 		
 47 | 		# if we only dropped a single (or a few) data sets
 48 | 		# compared to the call just before, lets reuse the same
 49 | 		# this happens in the focussed draw with 1000s of data sets
 50 | 		# where a single data set can accept a point; 
 51 | 		# not worth to recompute the region.
 52 | 		if self.last_realmask is not None and len(mask) < len(self.last_mask) and \
 53 | 			len(mask) > 0.80 * len(self.last_mask) and \
 54 | 			len(points) <= len(self.last_points) and \
 55 | 			len(points) > 0.90 * len(self.last_points) and \
 56 | 			numpy.mean(self.last_realmask == realmask) > 0.80 and \
 57 | 			numpy.in1d(points, self.last_points).all():
 58 | 			print('re-using previous, similar region (%.1f%% data set overlap, %.1f%% points overlap)' % (numpy.mean(self.last_realmask == realmask) * 100., len(points) * 100. / len(self.last_points), ))
 59 | 			k = tuple(self.last_mask.tolist())
 60 | 			return self.curr_generation[k].draw_constrained
 61 | 		print('not re-using region', (len(mask), len(self.last_mask), len(points), len(self.last_points), (len(mask) < len(self.last_mask), len(mask) > 0.80 * len(self.last_mask), len(points) > 0.90 * len(self.last_points), numpy.mean(self.last_realmask == realmask) ) ))
 62 | 		
 63 | 		# normal operation:
 64 | 		k = tuple(mask.tolist())
 65 | 		self.last_realmask = realmask
 66 | 		self.last_mask = mask
 67 | 		self.last_points = points
 68 | 		
 69 | 		# try to recycle
 70 | 		if k in self.curr_generation:
 71 | 			pass
 72 | 		elif k in self.prev_generation:
 73 | 			print('re-using previous1 region')
 74 | 			self.curr_generation[k] = self.prev_generation[k]
 75 | 		elif k in self.prev_prev_generation:
 76 | 			print('re-using previous2 region')
 77 | 			self.curr_generation[k] = self.prev_prev_generation[k]
 78 | 		elif k in self.prev_prev_prev_generation:
 79 | 			print('re-using previous3 region')
 80 | 			self.curr_generation[k] = self.prev_prev_prev_generation[k]
 81 | 		else:
 82 | 			# nothing found, so start from scratch
 83 | 			self.curr_generation[k] = generate_fresh_constrainer()
 84 | 			#self.curr_generation[k] = MetricLearningFriendsConstrainer(
 85 | 			#	metriclearner = 'truncatedscaling', force_shrink=True,
 86 | 			#	rebuild_every=1000, metric_rebuild_every=20, 
 87 | 			#	verbose=False)
 88 | 			self.curr_generation[k].sampler = self.sampler
 89 | 		
 90 | 		return self.curr_generation[k].draw_constrained
 91 | 
 92 | def generate_individual_constrainer(rebuild_every=1000, metric_rebuild_every=20):
 93 | 	individual_constrainers = {}
 94 | 	individual_constrainers_lastiter = {}
 95 | 	def individual_draw_constrained(i, it, sampler):
 96 | 		if i not in individual_constrainers:
 97 | 			#individual_constrainers[i] = MetricLearningFriendsConstrainer(
 98 | 			#	metriclearner = 'truncatedscaling', force_shrink=True,
 99 | 			#	rebuild_every=rebuild_every, metric_rebuild_every=metric_rebuild_every, 
100 | 			#	verbose=False)
101 | 			individual_constrainers[i] = generate_fresh_constrainer()
102 | 			individual_constrainers[i].sampler = sampler
103 | 			individual_constrainers_lastiter[i] = it
104 | 		if it > individual_constrainers_lastiter[i] + 5:
105 | 			# force rebuild
106 | 			individual_constrainers[i].region = None
107 | 		individual_constrainers_lastiter[i] = it
108 | 		return individual_constrainers[i].draw_constrained
109 | 	return individual_constrainers, individual_constrainers_lastiter, individual_draw_constrained
110 | 
111 | def generate_superset_constrainer():
112 | 	return generate_fresh_constrainer()
113 | 	#return MetricLearningFriendsConstrainer(metriclearner = 'truncatedscaling', 
114 | 	#	rebuild_every=1000, metric_rebuild_every=20, verbose=False, force_shrink=True)
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/clustering/cneighbors.c:
--------------------------------------------------------------------------------
  1 | /***
  2 | 
  3 | Neighbourhood helper functions accelerated with parallelised C
  4 | ---------------------------------------------------------------
  5 | 
  6 | Copyright (c) 2017 Johannes Buchner
  7 | 
  8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  9 | 
 10 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 11 | 
 12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 13 | 
 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 15 | 
 16 | ***/
 17 | 
 18 | #include<stdio.h>
 19 | #include<stdlib.h>
 20 | #include<assert.h>
 21 | #include<math.h>
 22 | #ifdef PARALLEL
 23 | #include<omp.h>
 24 | #endif
 25 | 
 26 | #define IFVERBOSE if(0)
 27 | #define IFDEBUG if(0)
 28 | #define adouble double
 29 | #define bdouble double
 30 | #define sqr(x) (pow(x,2))
 31 | 
 32 | double most_distant_nearest_neighbor(
 33 | 	const void * xxp, int nsamples, int ndim
 34 | ) {
 35 | 	const adouble * xx = (const adouble*) xxp;
 36 | 	double nearest_ds[nsamples];
 37 | 
 38 | 	IFVERBOSE {
 39 | 		for (int i = 0; i < nsamples; i++) { // one sample at a time
 40 | 			printf("%d: ", i);
 41 | 			for (int k = 0; k < ndim; k++) {
 42 | 				printf("%e\t", xx[i*ndim + k]);
 43 | 			}
 44 | 			printf("\n");
 45 | 		}
 46 | 	}
 47 | 	#ifdef PARALLEL
 48 | 	#pragma omp parallel for
 49 | 	#endif
 50 | 	for (int i = 0; i < nsamples; i++) { // one sample at a time
 51 | 		// consider all other samples before i
 52 | 		double nearest_d = 1e300;
 53 | 		for (int j = 0; j < nsamples; j++) {
 54 | 			if (j != i) {
 55 | 				double d = 0;
 56 | 				for (int k = 0; k < ndim; k++) {
 57 | 					d += sqr(xx[i*ndim + k] - xx[j*ndim + k]);
 58 | 				}
 59 | 				if (d < nearest_d) {
 60 | 					nearest_d = d;
 61 | 				}
 62 | 			}
 63 | 		}
 64 | 		IFVERBOSE printf("%d: %f\n", i, sqrt(nearest_d));
 65 | 		nearest_ds[i] = sqrt(nearest_d);
 66 | 	}
 67 | 	double furthest_d = nearest_ds[0];
 68 | 
 69 | 	for (int i = 1; i < nsamples; i++) {
 70 | 		if (nearest_ds[i] > furthest_d)
 71 | 			furthest_d = nearest_ds[i];
 72 | 	}
 73 | 	IFVERBOSE printf("result: %f\n", furthest_d);
 74 | 	return furthest_d;
 75 | }
 76 | 
 77 | int is_within_distance_of(
 78 | 	const void * xxp, int nsamples, int ndim, double maxdistance, const void * yp
 79 | ) {
 80 | 	const adouble * xx = (const adouble*) xxp;
 81 | 	const adouble * y = (const adouble*) yp;
 82 | 
 83 | 	for (int i = 0; i < nsamples; i++) { // one sample at a time
 84 | 		double d = 0;
 85 | 		for (int k = 0; k < ndim; k++) {
 86 | 			d += sqr(xx[i*ndim + k] - y[k]);
 87 | 		}
 88 | 		if (sqrt(d) < maxdistance)
 89 | 			return 1;
 90 | 	}
 91 | 	return 0;
 92 | }
 93 | 
 94 | 
 95 | int count_within_distance_of(
 96 | 	const void * xxp, int nsamples, int ndim, double maxdistance, 
 97 | 	const void * yyp, int nothers, void * outp, const int countmax
 98 | ) {
 99 | 	const adouble * xx = (const adouble*) xxp;
100 | 	const adouble * yy = (const adouble*) yyp;
101 | 	double * out = (double*) outp;
102 | 
103 | 	for (int j = 0; j < nothers; j++) { // one sample at a time
104 | 		for (int i = 0; i < nsamples; i++) { // one sample at a time
105 | 			double d = 0;
106 | 			for (int k = 0; k < ndim; k++) {
107 | 				d += sqr(xx[i*ndim + k] - yy[j*ndim + k]);
108 | 			}
109 | 			if (sqrt(d) < maxdistance) {
110 | 				out[j]++;
111 | 				// printf("%d: %f\n", j, out[j]);
112 | 				if (countmax > 0 && out[j] >= countmax) {
113 | 					break;
114 | 				}
115 | 			}
116 | 		}
117 | 	}
118 | 	return 0;
119 | }
120 | 
121 | /**
122 |  * xxp are double points (nsamples x ndim)
123 |  * choicep is whether the point is selected in the bootstrap round (nsamples x nbootstraps)
124 |  */
125 | double bootstrapped_maxdistance(
126 | 	const void * xxp, 
127 | 	int nsamples, int ndim,
128 | 	const void * choicep,
129 | 	int nbootstraps
130 | ) {
131 | 	const adouble * xx = (const adouble*) xxp;
132 | 	const adouble * chosen = (const adouble*) choicep;
133 | 
134 | 	double furthest_ds[nbootstraps];
135 | 	double furthest_d_bs;
136 | 	
137 | 	#ifdef PARALLEL
138 | 	#pragma omp parallel for
139 | 	#endif
140 | 	for(int b = 0; b < nbootstraps; b++) {
141 | 		double nearest_ds[nsamples];
142 | 		double furthest_d = 0;
143 | 		//printf("bootstrap round %d\n", b);
144 | 		// find one that was not chosen
145 | 		for (int i = 0; i < nsamples; i++) {
146 | 			if (chosen[i*nbootstraps + b] != 0) continue;
147 | 			//printf("   considering %d\n", i);
148 | 			double nearest_d = 1e300;
149 | 			for (int j = 0; j < nsamples; j++) {
150 | 				if (chosen[j*nbootstraps + b] == 0) continue;
151 | 				double d = 0;
152 | 				for (int k = 0; k < ndim; k++) {
153 | 					d += sqr(xx[i*ndim + k] - xx[j*ndim + k]);
154 | 				}
155 | 				if (d < nearest_d) {
156 | 					nearest_d = d;
157 | 				}
158 | 			}
159 | 			//printf("    %d: %f\n", i, sqrt(nearest_d));
160 | 			nearest_ds[i] = sqrt(nearest_d);
161 | 		}
162 | 		for (int i = 1; i < nsamples; i++) {
163 | 			if (chosen[i*nbootstraps + b] != 0) continue;
164 | 			if (nearest_ds[i] > furthest_d)
165 | 				furthest_d = nearest_ds[i];
166 | 		}
167 | 		//printf("bootstrap round %d gave %f\n", b, furthest_d);
168 | 		furthest_ds[b] = furthest_d;
169 | 	}
170 | 	
171 | 	furthest_d_bs = furthest_ds[0];
172 | 	for (int i = 1; i < nbootstraps; i++) {
173 | 		if (furthest_ds[i] > furthest_d_bs)
174 | 			furthest_d_bs = furthest_ds[i];
175 | 	}
176 | 
177 | 	IFVERBOSE printf("result: %f\n", furthest_d_bs);
178 | 	return furthest_d_bs;
179 | }
180 | 


--------------------------------------------------------------------------------
/profile_generate_subsets.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy
  3 | import sys
  4 | import time
  5 | import networkx
  6 | import igraph
  7 | 
  8 | def generate_subsets_reference(data_mask, live_pointsp, graph, _):
  9 | 	# generate data subsets which share points.
 10 | 	firstmember = numpy.where(data_mask)[0][0]
 11 | 	if len(live_pointsp[:,firstmember]) == len(numpy.unique(live_pointsp[:,data_mask].flatten())):
 12 | 		# trivial case: all live points are the same across data sets
 13 | 		yield data_mask, live_pointsp[:,firstmember]
 14 | 		return
 15 | 	
 16 | 	to_handle = data_mask.copy()
 17 | 	while to_handle.any():
 18 | 		firstmember = numpy.where(to_handle)[0][0]
 19 | 		to_handle[firstmember] = False
 20 | 		members = [firstmember]
 21 | 		# get live points of this member
 22 | 		member_live_pointsp = live_pointsp[:,firstmember].tolist()
 23 | 		# look through to_handle for entries and check if they have the points
 24 | 		i = 0
 25 | 		while True:
 26 | 			if i >= len(member_live_pointsp) or not to_handle.any():
 27 | 				break
 28 | 			p = member_live_pointsp[i]
 29 | 			sharing = (live_pointsp[:,to_handle] == p).any(axis=0)
 30 | 			#assert len(sharing) == to_handle.sum()
 31 | 			newmembers = numpy.where(to_handle)[0][sharing]
 32 | 			#assert numpy.all(newmembers == numpy.arange(len(to_handle))[to_handle][sharing])
 33 | 
 34 | 			#print 'new members:', newmembers
 35 | 			members += newmembers.tolist()
 36 | 			for newp in numpy.unique(live_pointsp[:,newmembers]):
 37 | 				if newp not in member_live_pointsp:
 38 | 					member_live_pointsp.append(newp)
 39 | 			to_handle[newmembers] = False
 40 | 			i = i + 1
 41 | 		
 42 | 		# now we have our members and live points
 43 | 		member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
 44 | 		member_data_mask[members] = True
 45 | 		#print 'returning:', member_data_mask, member_live_pointsp
 46 | 		yield member_data_mask, member_live_pointsp
 47 | 
 48 | def generate_subsets_graph_simple(data_mask, live_pointsp, graph, _):
 49 | 	# generate data subsets which share points.
 50 | 	firstmember = numpy.where(data_mask)[0][0]
 51 | 	# then identify disjoint subgraphs
 52 | 	for subgraph in networkx.connected_component_subgraphs(graph, copy=False):
 53 | 		member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
 54 | 		member_live_pointsp = []
 55 | 		for nodetype, i in subgraph.nodes():
 56 | 			if nodetype == 0:
 57 | 				member_data_mask[i] = True
 58 | 			else:
 59 | 				member_live_pointsp.append(i)
 60 | 		yield member_data_mask, member_live_pointsp
 61 | 	
 62 | def generate_subsets_graph(data_mask, live_pointsp, graph, _):
 63 | 	# generate data subsets which share points.
 64 | 	firstmember = numpy.where(data_mask)[0][0]
 65 | 	allp = numpy.unique(live_pointsp[:,data_mask].flatten())
 66 | 	if len(live_pointsp[:,firstmember]) == len(allp):
 67 | 		# trivial case: all live points are the same across data sets
 68 | 		yield data_mask, live_pointsp[:,firstmember]
 69 | 		return
 70 | 	
 71 | 	subgraphs = list(networkx.connected_component_subgraphs(graph, copy=False))
 72 | 	if len(subgraphs) == 1:
 73 | 		yield data_mask, allp
 74 | 		return
 75 | 	
 76 | 	# then identify disjoint subgraphs
 77 | 	for subgraph in subgraphs:
 78 | 		print('networkx subgraph:', subgraph.nodes())
 79 | 		member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
 80 | 		member_live_pointsp = []
 81 | 		for nodetype, i in subgraph.nodes():
 82 | 			if nodetype == 0:
 83 | 				member_data_mask[i] = True
 84 | 			else:
 85 | 				member_live_pointsp.append(i)
 86 | 		yield member_data_mask, member_live_pointsp
 87 | 
 88 | def generate_subsets_igraph(data_mask, live_pointsp, _graph, graph):
 89 | 	# generate data subsets which share points.
 90 | 	firstmember = numpy.where(data_mask)[0][0]
 91 | 	allp = numpy.unique(live_pointsp[:,data_mask].flatten())
 92 | 	if len(live_pointsp[:,firstmember]) == len(allp):
 93 | 		# trivial case: all live points are the same across data sets
 94 | 		yield data_mask, live_pointsp[:,firstmember]
 95 | 		return
 96 | 	
 97 | 	subgraphs = graph.clusters()
 98 | 	if len(subgraphs) == 1:
 99 | 		yield data_mask, allp
100 | 		return
101 | 	# then identify disjoint subgraphs
102 | 	for subgraph in subgraphs:
103 | 		#print 'igraph subgraph:', subgraph
104 | 		member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
105 | 		member_live_pointsp = []
106 | 		for vi in subgraph:
107 | 			att = graph.vs[vi].attributes()
108 | 			#print '    ', att
109 | 			if att['vtype'] == 0:
110 | 				i = att['nodeid']
111 | 				member_data_mask[i] = True
112 | 			else:
113 | 				p = att['pointid']
114 | 				member_live_pointsp.append(p)
115 | 		if member_data_mask.any():
116 | 			yield member_data_mask, member_live_pointsp
117 | 	
118 | data_sets = []
119 | 
120 | t0 = time.time()
121 | for filename in sys.argv[1:]:
122 | 	data = numpy.load(filename)
123 | 	data_mask, live_pointsp = data['data_mask'], data['live_pointsp']
124 | 	# create graph
125 | 	graph = networkx.Graph()
126 | 	graph2 = igraph.Graph()
127 | 	# pointing from live_point to member
128 | 	
129 | 	for p in numpy.unique(live_pointsp):
130 | 		graph2.add_vertex("p%d" % p, pointid=p, vtype=1)
131 | 	for i in numpy.where(data_mask)[0]:
132 | 		graph.add_edges_from((((0, i), (1, p)) for p in live_pointsp[:,i]))
133 | 		graph2.add_vertex("n%d" % i, nodeid=i, vtype=0)
134 | 		graph2.add_edges([("n%d" % i, "p%d" % p) for p in live_pointsp[:,i]])
135 | 	
136 | 	data_sets.append((data_mask, live_pointsp, graph, graph2))
137 | t1 = time.time()
138 | print('loading took %fs' % (t1 - t0))
139 | 
140 | prev_output = []
141 | for implementation in [generate_subsets_reference, generate_subsets_graph_simple, generate_subsets_graph, generate_subsets_igraph]:
142 | 	print('running', implementation)
143 | 	output = []
144 | 	t0 = time.time()
145 | 	for a, b, graph, graph2 in data_sets:
146 | 		out = list(implementation(a, b, graph, graph2))
147 | 		output.append(out)
148 | 	t1 = time.time()
149 | 	print('   took %fs' % (t1 - t0))
150 | 	#for a, b in  zip(output, 
151 | 	if prev_output != []:
152 | 		print('checking for correctness...')
153 | 		for memberlist1, memberlist2 in zip(output, prev_output):
154 | 			memberlist1 = sorted(memberlist1, key=lambda ml: (len(ml), ml[0][0]))
155 | 			memberlist2 = sorted(memberlist2, key=lambda ml: (len(ml), ml[0][0]))
156 | 			for (md, ml), (md2, ml2) in zip(memberlist1, memberlist2):
157 | 				#print len(md), md.sum(), len(md2), md2.sum(), len(ml), len(ml2)
158 | 				assert numpy.all(md == md2), (numpy.where(md), numpy.where(md2))
159 | 				assert sorted(ml) == sorted(ml2)
160 | 			assert len(memberlist1) == len(memberlist2), (len(memberlist1), len(memberlist2))
161 | 	prev_output = output
162 | 
163 | 
164 | 	
165 | 	
166 | 


--------------------------------------------------------------------------------
/musefuse_postprocess.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Main program
  5 | ---------------
  6 | 
  7 | Copyright (c) 2017 Johannes Buchner
  8 | 
  9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 10 | 
 11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 12 | 
 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 14 | 
 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 16 | 
 17 | """
 18 | 
 19 | import numpy
 20 | from numpy import exp
 21 | import h5py
 22 | import sys
 23 | import json
 24 | import os
 25 | import time
 26 | import astropy.io.fits as pyfits
 27 | import matplotlib.pyplot as plt
 28 | 
 29 | print('loading data...')
 30 | f = pyfits.open(sys.argv[1])
 31 | datasection = f['DATA'] 
 32 | y = datasection.data # values
 33 | y = y[:3600,:,:]
 34 | noise_level = f['STAT'].data # variance
 35 | noise_level = noise_level[:3600,:,:]
 36 | nspec, npixx, npixy = y.shape
 37 | 
 38 | print('applying subselection ...')
 39 | ## replaced by mask
 40 | #y = y[:,80:200,170:240]
 41 | #noise_level = noise_level[:,80:200,170:240]
 42 | #y = y[:,70:80,35:45]
 43 | #noise_level = noise_level[:,70:80,35:45]
 44 | 
 45 | regionfile = sys.argv[2]
 46 | import pyregion
 47 | region = pyregion.parse(open(regionfile).read())
 48 | mask = region.get_mask(shape=(npixx, npixy))
 49 | ymask = numpy.array([mask] * len(y))
 50 | xids, yids = numpy.where(mask)
 51 | y = y[ymask]
 52 | y = y.reshape((nspec, -1))
 53 | noise_level = noise_level[ymask]
 54 | noise_level = noise_level.reshape((nspec, -1))
 55 | 
 56 | #nspec, npixx, npixy = y.shape
 57 | print((y.shape))
 58 | outputimg = numpy.zeros((npixx, npixy)) * numpy.nan
 59 | 
 60 | #y = y.reshape((nspec, -1))
 61 | #noise_level = noise_level.reshape((nspec, -1))
 62 | outputimg_flat = outputimg #.reshape((-1))
 63 | x = datasection.header['CD3_3'] * numpy.arange(nspec) + datasection.header['CRVAL3']
 64 | print('    finding NaNs...')
 65 | good = numpy.isfinite(noise_level).all(axis=0)
 66 | #assert good.shape == (npixx*npixy,), good.shape
 67 | #goodids = numpy.where(good)[0]
 68 | goodids = list(zip(xids[good], yids[good]))
 69 | print((len(good), len(goodids)))
 70 | 
 71 | y = y[:,good]
 72 | noise_level = noise_level[:,good]
 73 | ndata = os.environ.get('MAXDATA', len(goodids))
 74 | print('    truncating data to %d sets...' % ndata, goodids[:ndata])
 75 | ## truncate data
 76 | y = y[:,:ndata]
 77 | noise_level = noise_level[:,:ndata]
 78 | goodids = goodids[:ndata]
 79 | 
 80 | print((y.shape))
 81 | 
 82 | prefix = sys.argv[1]
 83 | modelname = os.environ.get('MODEL', 'FULL')
 84 | if modelname == 'ZSOL':
 85 | 	paramnames = ['logSFtau', 'SFage', 'z', 'EBV']
 86 | 	prefix = prefix + '_zsol_'
 87 | elif modelname == 'FULL':
 88 | 	paramnames = ['Z', 'logSFtau', 'SFage', 'z', 'EBV']
 89 | 	prefix = prefix + '_full_'
 90 | else:
 91 | 	assert False
 92 | 
 93 | filename = prefix + '.out_%d.hdf5' % ndata
 94 | f = h5py.File(filename, 'r')
 95 | 
 96 | nsamplesmax, nids, nparams = f['x'].shape
 97 | assert nids == len(goodids), (nids, goodids)
 98 | 
 99 | output_Z = outputimg_flat.copy()
100 | output_Zerr = outputimg_flat.copy()
101 | output_means = {}
102 | output_errs = {}
103 | for pi in range(nparams):
104 | 	output_means[pi] = outputimg_flat.copy()
105 | 	output_errs[pi] = outputimg_flat.copy()
106 | 
107 | weights = numpy.transpose(f['w'].value + f['L'].value)
108 | print(weights.shape)
109 | 
110 | #def pointfactory():
111 | #	x = f['x'].value
112 | #	for i in range(nids):
113 | #		yield x[:,i,:]
114 | points = numpy.swapaxes(f['x'].value, 0, 1)
115 | 
116 | 
117 | 
118 | for i, (w, logZ, logZerr, x) in enumerate(zip(weights, f['logZ'].value, f['logZerr'].value, points)):
119 | 	xi, yi = goodids[i]
120 | 	mask = numpy.isfinite(w)
121 | 	jparent = numpy.where(mask)[0]
122 | 	w = w[jparent]
123 | 	w = numpy.exp(w - w.max())
124 | 	w = w / w.sum()
125 | 	j = numpy.random.choice(jparent, size=4000, p=w)
126 | 	print('   %d/%d: spaxel %s: from %d samples drew %d unique posterior points' % (i+1, nids, (xi, yi), len(jparent), len(numpy.unique(j))))
127 | 	
128 | 	print('        logZ = %.1f +- %.1f' % (logZ, logZerr))
129 | 	output_Z[xi, yi] = logZ
130 | 	output_Zerr[xi, yi] = logZerr
131 | 	#x = f['x'][:,i,:]
132 | 	xequal = x[j,:]
133 | 	for k in range(nparams):
134 | 		v = xequal[:,k]
135 | 		output_means[k][xi, yi] = v.mean()
136 | 		output_errs[k][xi, yi] = v.std()
137 | 		print('          param %d = %.3f +- %.3f (%s)' % (k, v.mean(), v.std(), paramnames[k]))
138 | 	if i < 5:
139 | 		numpy.savetxt(prefix + '.outsamples_%d.txt' % i, xequal)
140 | 	#if i > 1000: break
141 | 
142 | output_Z = output_Z.reshape((npixx, npixy))
143 | output_Zerr = output_Zerr.reshape((npixx, npixy))
144 | for pi in range(nparams):
145 | 	output_means[pi] = output_means[pi].reshape((npixx, npixy))
146 | 	output_errs[pi] = output_errs[pi].reshape((npixx, npixy))
147 | 
148 | filename = prefix + '.outimg_%d.hdf5' % ndata
149 | print('writing image files ...')
150 | def makeimg(name, img, title=None):
151 | 	outfilename = prefix + '.outimg_%d_%s.pdf' % (ndata, name)
152 | 	print('creating %s ...' % outfilename)
153 | 	plt.figure()
154 | 	if title is None:
155 | 		title = name
156 | 	plt.title(title)
157 | 	plt.imshow(img, cmap=plt.cm.RdBu)
158 | 	plt.colorbar()
159 | 	plt.savefig(outfilename, bbox_inches='tight')
160 | 	plt.close()
161 | 	
162 | # store results
163 | with h5py.File(filename, 'w') as fimg:
164 | 	fimg.create_dataset('logZ', data=output_Z, compression='gzip', shuffle=True)
165 | 	makeimg('logZ', output_Z)
166 | 	fimg.create_dataset('logZerr', data=output_Zerr, compression='gzip', shuffle=True)
167 | 	makeimg('logZerr', output_Zerr)
168 | 	for k in range(nparams):
169 | 		fimg.create_dataset('param%d' % k, data=output_means[k], compression='gzip', shuffle=True)
170 | 		makeimg('param%d' % k, output_means[k], title=paramnames[k])
171 | 		fimg.create_dataset('param%derr' % k, data=output_errs[k], compression='gzip', shuffle=True)
172 | 		makeimg('param%derr' % k, output_errs[k], title=paramnames[k] + ' errors')
173 | 	fimg.attrs['nparams'] = nparams
174 | 
175 | 
176 | 


--------------------------------------------------------------------------------
/clustering/radfriendsregion.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | RadFriends region with transforms
  5 | ----------------------------------
  6 | 
  7 | Copyright (c) 2017 Johannes Buchner
  8 | 
  9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 10 | 
 11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 12 | 
 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 14 | 
 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 16 | 
 17 | """
 18 | 
 19 | import numpy
 20 | import scipy.spatial, scipy.cluster
 21 | from .neighbors import find_rdistance, is_within_distance_of, count_within_distance_of, any_within_distance_of
 22 | from collections import defaultdict
 23 | 
 24 | class ClusterResult(object):
 25 | 	def __init__(self, points, clusters, metric, verbose=False):
 26 | 		self.ws = points
 27 | 		self.clusters = clusters
 28 | 		self.metric = metric
 29 | 		if verbose:
 30 | 			print('CLUSTERS:')
 31 | 			for cluster in clusters:
 32 | 				clusterpoints = metric.untransform(points[cluster,:])
 33 | 				print('CLUSTER:', clusterpoints.mean(axis=0), clusterpoints.std(axis=0))
 34 | 	
 35 | 	def get_cluster_id(self, point):
 36 | 		w = self.metric.transform(point)
 37 | 		dists = scipy.spatial.distance.cdist(self.ws, [w], metric='euclidean')
 38 | 		i = numpy.argmin(dists)
 39 | 		for j, cluster in enumerate(self.clusters):
 40 | 			if i in cluster:
 41 | 				return j
 42 | 	
 43 | 	def get_cluster_ids(self, points):
 44 | 		ws = self.metric.transform(points)
 45 | 		dists = scipy.spatial.distance.cdist(self.ws, ws, metric='euclidean')
 46 | 		i = numpy.argmin(dists, axis=0)
 47 | 		assert len(i) == len(points)
 48 | 		results = []
 49 | 		for ii in i:
 50 | 			for j, cluster in enumerate(self.clusters):
 51 | 				if ii in cluster:
 52 | 					results.append(j)
 53 | 		return results
 54 | 	
 55 | 	def get_n_clusters(self):
 56 | 		return len(self.clusters)
 57 | 
 58 | class RadFriendsRegion(object):
 59 | 	def __init__(self, members, maxdistance=None, metric='euclidean', nbootstraps=10, verbose=False):
 60 | 		self.members = members
 61 | 		assert metric == 'euclidean'
 62 | 		if maxdistance is None:
 63 | 			maxdistance = find_rdistance(members, nbootstraps=nbootstraps, 
 64 | 				metric=metric, verbose=verbose)
 65 | 			# print 'new RadFriendsRegion with r=', maxdistance
 66 | 		self.maxdistance = maxdistance
 67 | 		self.metric = metric
 68 | 		self.verbose = verbose
 69 | 		self.lo = numpy.min(self.members, axis=0) - self.maxdistance
 70 | 		self.hi = numpy.max(self.members, axis=0) + self.maxdistance
 71 | 	
 72 | 	def add_members(self, us):
 73 | 		self.members = numpy.vstack((self.members, us))
 74 | 		self.lo = numpy.min(self.members, axis=0) - self.maxdistance
 75 | 		self.hi = numpy.max(self.members, axis=0) + self.maxdistance
 76 | 	
 77 | 	def are_near_members(self, us):
 78 | 		dists = scipy.spatial.distance.cdist(self.members, us, metric=self.metric)
 79 | 		dist_criterion = dists < self.maxdistance
 80 | 		return dist_criterion
 81 | 	
 82 | 	def count_nearby_members(self, us):
 83 | 		return count_within_distance_of(self.members, self.maxdistance, us)
 84 | 	
 85 | 	def get_nearby_member_ids(self, u):
 86 | 		return numpy.where(self.are_near_members([u]))[0]
 87 | 	
 88 | 	def is_inside(self, u):
 89 | 		# is it true for at least one?
 90 | 		if not ((u >= self.lo).all() and (u <= self.hi).all()):
 91 | 			return False
 92 | 		return is_within_distance_of(self.members, self.maxdistance, u)
 93 | 		#return self.are_near_members([u]).any()
 94 | 	
 95 | 	def are_inside(self, us):
 96 | 		# is it true for at least one?
 97 | 		#return self.are_near_members(us).any(axis=0)
 98 | 		return any_within_distance_of(self.members, self.maxdistance, us)
 99 | 	
100 | 	def get_clusters(self):
101 | 		# agglomerate clustering of members
102 | 		dists = scipy.spatial.distance.cdist(self.members, self.members, metric=self.metric)
103 | 		connected = dists < self.maxdistance
104 | 		nmembers = len(self.members)
105 | 		cluster = dict([(i,i) for i in range(nmembers)])
106 | 		for i in range(nmembers):
107 | 			neighbors = numpy.where(connected[i,:])[0] #[i+1:]
108 | 			for j in neighbors:
109 | 				cluster[j] = cluster[i]
110 | 		result = defaultdict(list)
111 | 		for element, cluster_nro in list(cluster.items()):
112 | 			result[cluster_nro].append(element)
113 | 		#print 'RadFriends: %d clusters' % len(result)
114 | 		return result
115 | 		
116 | 	
117 | 	def generate(self, nmax=0):
118 | 		members = self.members
119 | 		maxdistance = self.maxdistance
120 | 		nmembers, ndim = numpy.shape(self.members)
121 | 		# how many points to try to generate
122 | 		# if too small, many function calls, inefficient
123 | 		# if too large, large cdist matrices, spikes in memory use
124 | 		N = 1000
125 | 		verbose = self.verbose
126 | 		nall = 0
127 | 		ntotal = 0
128 | 		#print 'draw from radfriends'
129 | 		while nmax == 0 or nall < nmax:
130 | 			#print 'drew %d/%d so far' % (N, nmax)
131 | 			# draw from box
132 | 			# this can be efficient if there are a lot of points
133 | 			ntotal = ntotal + N
134 | 			nall += N
135 | 			us = numpy.random.uniform(self.lo, self.hi, size=(N, ndim))
136 | 			mask = self.are_inside(us)
137 | 			#print 'accepted %d/%d [box draw]' % (mask.sum(), N)
138 | 			if mask.any():
139 | 				yield us[mask,:], ntotal
140 | 				#for u in us[mask,:]:
141 | 				#	#print 'box draw success:', ntotal
142 | 				#	yield u, ntotal
143 | 				ntotal = 0
144 | 			
145 | 			# draw from points
146 | 			# this can be efficient in higher dimensions
147 | 			us = members[numpy.random.randint(0, len(members), N),:]
148 | 			ntotal = ntotal + N
149 | 			nall += N
150 | 			if verbose: print('chosen point', us)
151 | 			# draw direction around it
152 | 			direction = numpy.random.normal(0, 1, size=(N, ndim))
153 | 			direction = direction / ((direction**2).sum(axis=1)**0.5).reshape((-1,1))
154 | 			if verbose: print('chosen direction', direction)
155 | 			# choose radius: volume gets larger towards the outside
156 | 			# so give the correct weight with dimensionality
157 | 			radius = maxdistance * numpy.random.uniform(0, 1, size=(N,1))**(1./ndim)
158 | 			us = us + direction * radius
159 | 			#mask = numpy.logical_and((u >= self.lo).all(axis=0), (u <= self.hi).all(axis=0))
160 | 			#if not mask.any():
161 | 			#	if verbose: print 'rejection because outside'
162 | 			#	continue
163 | 			#us = us[mask,:]
164 | 			#if verbose: print 'using point', us
165 | 			# count the number of points this is close to
166 | 			nnear = self.count_nearby_members(us)
167 | 			if verbose: print('near', nnear)
168 | 			# accept with probability 1./nnear
169 | 			coin = numpy.random.uniform(size=len(us))
170 | 			
171 | 			accept = coin < 1. / nnear
172 | 			#print 'accepted %d/%d [point draw]' % (accept.sum(), N)
173 | 			if not accept.any():
174 | 				if verbose: print('probabilistic rejection due to overlaps')
175 | 				continue
176 | 			#print '  overlaps accepted %d of %d, typically %.2f neighbours' % (accept.sum(), N, nnear.mean())
177 | 			us = us[accept,:]
178 | 			yield us, ntotal
179 | 			#for u in us:
180 | 			#	#print 'ball draw success:', ntotal
181 | 			#	yield u, ntotal
182 | 			ntotal = 0
183 | 
184 | 


--------------------------------------------------------------------------------
/multi_nested_integrator.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Integrator
  5 | ----------
  6 | 
  7 | Copyright (c) 2017 Johannes Buchner
  8 | 
  9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 10 | 
 11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 12 | 
 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 14 | 
 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 16 | 
 17 | """
 18 | 
 19 | import numpy
 20 | from numpy import exp, log, log10, pi
 21 | import progressbar
 22 | from adaptive_progress import AdaptiveETA
 23 | from numpy import logaddexp
 24 | import sys
 25 | 
 26 | def integrate_remainder(sampler, logwidth, logVolremaining, logZ, H, globalLmax):
 27 | 	# logwidth remains the same now for each sample
 28 | 	remainder = list(sampler.remainder())
 29 | 	logV = logwidth
 30 | 	L0 = remainder[-1][2]
 31 | 	L0 = globalLmax
 32 | 	logLs = [Li - L0 for ui, xi, Li in remainder]
 33 | 	Ls = numpy.exp(logLs)
 34 | 	LsMax = Ls.copy()
 35 | 	LsMax[-1] = numpy.exp(globalLmax - L0)
 36 | 	Lmax = LsMax[1:].sum(axis=0) + LsMax[-1]
 37 | 	#Lmax = Ls[1:].sum(axis=0) + Ls[-1]
 38 | 	Lmin = Ls[:-1].sum(axis=0) + Ls[0]
 39 | 	logLmid = log(Ls.sum(axis=0)) + L0
 40 | 	logZmid = logaddexp(logZ, logV + logLmid)
 41 | 	logZup  = logaddexp(logZ, logV + log(Lmax) + L0)
 42 | 	logZlo  = logaddexp(logZ, logV + log(Lmin) + L0)
 43 | 	logZerr = logZup - logZlo
 44 | 	assert numpy.isfinite(H).all()
 45 | 	assert numpy.isfinite(logZerr).all(), logZerr
 46 | 
 47 | 	for i in range(len(remainder)):
 48 | 		ui, xi, Li = remainder[i]
 49 | 		wi = logwidth + Li
 50 | 		logZnew = logaddexp(logZ, wi)
 51 | 		#Hprev = H
 52 | 		H = exp(wi - logZnew) * Li + exp(logZ - logZnew) * (H + logZ) - logZnew
 53 | 		H[H < 0] = 0
 54 | 		#assert (H>0).all(), (H, Hprev, wi, Li, logZ, logZnew)
 55 | 		logZ = logZnew
 56 | 	
 57 | 	#assert numpy.isfinite(logZerr + (H / sampler.nlive_points)**0.5), (H, sampler.nlive_points, logZerr)
 58 | 	
 59 | 	return logV + logLmid, logZerr, logZmid, logZerr + (H / sampler.nlive_points)**0.5, logZerr + (H / sampler.nlive_points)**0.5
 60 | 
 61 | """
 62 | Performs the Nested Sampling integration by calling the *sampler* multiple times
 63 | until the *tolerance* is reached, or the maximum number of likelihood evaluations
 64 | is exceeded.
 65 | 
 66 | :param sampler: Sampler
 67 | :param tolerance: uncertainty in log Z to compute to
 68 | :param max_samples: maximum number of likelihood evaluations (None for no limit)
 69 | 
 70 | @return dictionary containing the keys
 71 | 
 72 |   logZ, logZerr: log evidence and uncertainty, 
 73 |   samples: all obtained samples,
 74 |   weights: posterior samples: 
 75 |   	list of prior coordinates, transformed coordinates, likelihood value 
 76 |   	and weight
 77 |   information: information H
 78 |   niterations: number of nested sampling iterations
 79 | """
 80 | def multi_nested_integrator(multi_sampler, tolerance = 0.01, max_samples=None, min_samples = 0, need_robust_remainder_error=True):
 81 | 	sampler = multi_sampler
 82 | 	logVolremaining = 0
 83 | 	logwidth = log(1 - exp(-1. / sampler.nlive_points))
 84 | 	weights = [] #[-1e300, 1]]
 85 | 	
 86 | 	widgets = ["|...|",
 87 | 		progressbar.Bar(), progressbar.Percentage(), AdaptiveETA()]
 88 | 	pbar = progressbar.ProgressBar(widgets = widgets, maxval=sampler.nlive_points)
 89 | 	
 90 | 	i = 0
 91 | 	ndata = multi_sampler.ndata
 92 | 	running = numpy.ones(ndata, dtype=bool)
 93 | 	last_logwidth = numpy.zeros(ndata)
 94 | 	last_logVolremaining = numpy.zeros(ndata)
 95 | 	last_remainderZ = numpy.zeros(ndata)
 96 | 	last_remainderZerr = numpy.zeros(ndata)
 97 | 	logZerr = numpy.zeros(ndata)
 98 | 	ui, xi, Li = next(sampler)
 99 | 	wi = logwidth + Li
100 | 	logZ = wi
101 | 	H = Li - logZ
102 | 	remainder_tails = [[]] * ndata
103 | 	pbar.currval = i
104 | 	pbar.start()
105 | 	while True:
106 | 		i = i + 1
107 | 		logwidth = log(1 - exp(-1. / sampler.nlive_points)) + logVolremaining
108 | 		last_logwidth[running] = logwidth
109 | 		last_logVolremaining[running] = logwidth
110 | 		logVolremaining -= 1. / sampler.nlive_points
111 | 		
112 | 		# fill up, otherwise set weight to zero
113 | 		Lifull = numpy.zeros(ndata)
114 | 		Lifull[:] = -numpy.inf
115 | 		Lifull[running] = Li
116 | 		uifull = numpy.zeros((ndata, ui.shape[1]))
117 | 		uifull[running,:] = ui
118 | 		xifull = numpy.zeros((ndata, ui.shape[1]))
119 | 		xifull[running,:] = xi
120 | 		weights.append([uifull, xifull, Lifull, numpy.where(running, logwidth, -numpy.inf), running])
121 | 		
122 | 		logZerr[running] = (H[running] / sampler.nlive_points)**0.5
123 | 		
124 | 		sys.stdout.flush()
125 | 		pbar.update(i)
126 | 		
127 | 		# expected number of iterations:
128 | 		i_final = -sampler.nlive_points * (-sampler.Lmax + log(exp(numpy.max([tolerance - logZerr[running], logZerr[running] / 100.], axis=0) + logZ[running]) - exp(logZ[running])))
129 | 		i_final = numpy.where(i_final < i+1, i+1, numpy.where(i_final > i+100000, i+100000, i_final))
130 | 		max_value = max(i+1, i_final.max())
131 | 		if hasattr(pbar, 'max_value'):
132 | 			pbar.max_value = max_value
133 | 		elif hasattr(pbar, 'maxval'):
134 | 			pbar.maxval = max_value
135 | 		
136 | 		if i > min_samples and i % 50 == 1 or (max_samples and i > max_samples):
137 | 			remainderZ, remainderZerr, totalZ, totalZerr, totalZerr_bootstrapped = integrate_remainder(sampler, logwidth, logVolremaining, logZ[running], H[running], sampler.Lmax)
138 | 			print('checking for termination:', remainderZ, remainderZerr, totalZ, totalZerr)
139 | 			# tolerance
140 | 			last_remainderZ[running] = remainderZ
141 | 			last_remainderZerr[running] = remainderZerr
142 | 			terminating = totalZerr < tolerance
143 | 			if max_samples and i > max_samples:
144 | 				terminating[:] = True
145 | 			widgets[0] = '|%d/%d samples+%d/%d|lnZ = %.2f +- %.3f + %.3f|L=%.2f^%.2f ' % (
146 | 				i + 1, max_value, sampler.nlive_points, sampler.ndraws, logaddexp(logZ[running][0], remainderZ[0]), max(logZerr[running]), max(remainderZerr), Li[0], sampler.Lmax[0])
147 | 			if terminating.any():
148 | 				print('terminating %d, namely:' % terminating.sum(), list(numpy.where(terminating)[0]))
149 | 				for j, k in enumerate(numpy.where(running)[0]):
150 | 					if terminating[j]:
151 | 						remainder_tails[k] = [[ui, xi, Li, logwidth] for ui, xi, Li in sampler.remainder(j)]
152 | 				sampler.cut_down(~terminating)
153 | 				running[running] = ~terminating
154 | 			if not running.any():
155 | 				break
156 | 			print(widgets[0])
157 | 		ui, xi, Li = next(sampler)
158 | 		wi = logwidth + Li
159 | 		logZnew = logaddexp(logZ[running], wi)
160 | 		H[running] = exp(wi - logZnew) * Li + exp(logZ[running] - logZnew) * (H[running] + logZ[running]) - logZnew
161 | 		logZ[running] = logZnew
162 | 	
163 | 	# add tail
164 | 	# not needed for integral, but for posterior samples, otherwise there
165 | 	# is a hole in the most likely parameter ranges.
166 | 	all_tails = numpy.ones(ndata, dtype=bool)
167 | 	for i in range(sampler.nlive_points):
168 | 		u, x, L, logwidth = list(zip(*[tail[i] for tail in remainder_tails]))
169 | 		weights.append([u, x, L, logwidth, all_tails])
170 | 	logZerr = logZerr + last_remainderZerr
171 | 	logZ = logaddexp(logZ, last_remainderZ)
172 | 	
173 | 	return dict(logZ=logZ, logZerr=logZerr, 
174 | 		weights=weights, information=H,
175 | 		niterations=i)
176 | 
177 | __all__ = [multi_nested_integrator]
178 | 
179 | 


--------------------------------------------------------------------------------
/pres/mnras_template.tex:
--------------------------------------------------------------------------------
  1 | % mnras_template.tex
  2 | %
  3 | % LaTeX template for creating an MNRAS paper
  4 | %
  5 | % v3.0 released 14 May 2015
  6 | % (version numbers match those of mnras.cls)
  7 | %
  8 | % Copyright (C) Royal Astronomical Society 2015
  9 | % Authors:
 10 | % Keith T. Smith (Royal Astronomical Society)
 11 | 
 12 | % Change log
 13 | %
 14 | % v3.0 May 2015
 15 | %    Renamed to match the new package name
 16 | %    Version number matches mnras.cls
 17 | %    A few minor tweaks to wording
 18 | % v1.0 September 2013
 19 | %    Beta testing only - never publicly released
 20 | %    First version: a simple (ish) template for creating an MNRAS paper
 21 | 
 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 23 | % Basic setup. Most papers should leave these options alone.
 24 | \documentclass[a4paper,fleqn,usenatbib]{mnras}
 25 | 
 26 | % MNRAS is set in Times font. If you don't have this installed (most LaTeX
 27 | % installations will be fine) or prefer the old Computer Modern fonts, comment
 28 | % out the following line
 29 | \usepackage{newtxtext,newtxmath}
 30 | % Depending on your LaTeX fonts installation, you might get better results with one of these:
 31 | %\usepackage{mathptmx}
 32 | %\usepackage{txfonts}
 33 | 
 34 | % Use vector fonts, so it zooms properly in on-screen viewing software
 35 | % Don't change these lines unless you know what you are doing
 36 | \usepackage[T1]{fontenc}
 37 | \usepackage{ae,aecompl}
 38 | 
 39 | 
 40 | %%%%% AUTHORS - PLACE YOUR OWN PACKAGES HERE %%%%%
 41 | 
 42 | % Only include extra packages if you really need them. Common packages are:
 43 | \usepackage{graphicx}	% Including figure files
 44 | \usepackage{amsmath}	% Advanced maths commands
 45 | \usepackage{amssymb}	% Extra maths symbols
 46 | 
 47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 48 | 
 49 | %%%%% AUTHORS - PLACE YOUR OWN COMMANDS HERE %%%%%
 50 | 
 51 | % Please keep new commands to a minimum, and use \newcommand not \def to avoid
 52 | % overwriting existing commands. Example:
 53 | %\newcommand{\pcm}{\,cm$^{-2}$}	% per cm-squared
 54 | 
 55 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 56 | 
 57 | %%%%%%%%%%%%%%%%%%% TITLE PAGE %%%%%%%%%%%%%%%%%%%
 58 | 
 59 | % Title of the paper, and the short title which is used in the headers.
 60 | % Keep the title short and informative.
 61 | \title[Short title, max. 45 characters]{MNRAS \LaTeXe\ template -- title goes here}
 62 | 
 63 | % The list of authors, and the short list which is used in the headers.
 64 | % If you need two or more lines of authors, add an extra line using \newauthor
 65 | \author[K. T. Smith et al.]{
 66 | Keith T. Smith,$^{1}$\thanks{E-mail: mn@ras.org.uk (KTS)}
 67 | A. N. Other,$^{2}$
 68 | Third Author$^{2,3}$
 69 | and Fourth Author$^{3}$
 70 | \\
 71 | % List of institutions
 72 | $^{1}$Royal Astronomical Society, Burlington House, Piccadilly, London W1J 0BQ, UK\\
 73 | $^{2}$Department, Institution, Street Address, City Postal Code, Country\\
 74 | $^{3}$Another Department, Different Institution, Street Address, City Postal Code, Country
 75 | }
 76 | 
 77 | % These dates will be filled out by the publisher
 78 | \date{Accepted XXX. Received YYY; in original form ZZZ}
 79 | 
 80 | % Enter the current year, for the copyright statements etc.
 81 | \pubyear{2015}
 82 | 
 83 | % Don't change these lines
 84 | \begin{document}
 85 | \label{firstpage}
 86 | \pagerange{\pageref{firstpage}--\pageref{lastpage}}
 87 | \maketitle
 88 | 
 89 | % Abstract of the paper
 90 | \begin{abstract}
 91 | This is a simple template for authors to write new MNRAS papers.
 92 | The abstract should briefly describe the aims, methods, and main results of the paper.
 93 | It should be a single paragraph not more than 250 words (200 words for Letters).
 94 | No references should appear in the abstract.
 95 | \end{abstract}
 96 | 
 97 | % Select between one and six entries from the list of approved keywords.
 98 | % Don't make up new ones.
 99 | \begin{keywords}
100 | keyword1 -- keyword2 -- keyword3
101 | \end{keywords}
102 | 
103 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
104 | 
105 | %%%%%%%%%%%%%%%%% BODY OF PAPER %%%%%%%%%%%%%%%%%%
106 | 
107 | \section{Introduction}
108 | 
109 | This is a simple template for authors to write new MNRAS papers.
110 | See \texttt{mnras\_sample.tex} for a more complex example, and \texttt{mnras\_guide.tex}
111 | for a full user guide.
112 | 
113 | All papers should start with an Introduction section, which sets the work
114 | in context, cites relevant earlier studies in the field by \citet{Others2013},
115 | and describes the problem the authors aim to solve \citep[e.g.][]{Author2012}.
116 | 
117 | \section{Methods, Observations, Simulations etc.}
118 | 
119 | Normally the next section describes the techniques the authors used.
120 | It is frequently split into subsections, such as Section~\ref{sec:maths} below.
121 | 
122 | \subsection{Maths}
123 | \label{sec:maths} % used for referring to this section from elsewhere
124 | 
125 | Simple mathematics can be inserted into the flow of the text e.g. $2\times3=6$
126 | or $v=220$\,km\,s$^{-1}$, but more complicated expressions should be entered
127 | as a numbered equation:
128 | 
129 | \begin{equation}
130 |     x=\frac{-b\pm\sqrt{b^2-4ac}}{2a}.
131 | 	\label{eq:quadratic}
132 | \end{equation}
133 | 
134 | Refer back to them as e.g. equation~(\ref{eq:quadratic}).
135 | 
136 | \subsection{Figures and tables}
137 | 
138 | Figures and tables should be placed at logical positions in the text. Don't
139 | worry about the exact layout, which will be handled by the publishers.
140 | 
141 | Figures are referred to as e.g. Fig.~\ref{fig:example_figure}, and tables as
142 | e.g. Table~\ref{tab:example_table}.
143 | 
144 | % Example figure
145 | \begin{figure}
146 | 	% To include a figure from a file named example.*
147 | 	% Allowable file formats are eps or ps if compiling using latex
148 | 	% or pdf, png, jpg if compiling using pdflatex
149 | 	\includegraphics[width=\columnwidth]{example}
150 |     \caption{This is an example figure. Captions appear below each figure.
151 | 	Give enough detail for the reader to understand what they're looking at,
152 | 	but leave detailed discussion to the main body of the text.}
153 |     \label{fig:example_figure}
154 | \end{figure}
155 | 
156 | % Example table
157 | \begin{table}
158 | 	\centering
159 | 	\caption{This is an example table. Captions appear above each table.
160 | 	Remember to define the quantities, symbols and units used.}
161 | 	\label{tab:example_table}
162 | 	\begin{tabular}{lccr} % four columns, alignment for each
163 | 		\hline
164 | 		A & B & C & D\\
165 | 		\hline
166 | 		1 & 2 & 3 & 4\\
167 | 		2 & 4 & 6 & 8\\
168 | 		3 & 5 & 7 & 9\\
169 | 		\hline
170 | 	\end{tabular}
171 | \end{table}
172 | 
173 | 
174 | \section{Conclusions}
175 | 
176 | The last numbered section should briefly summarise what has been done, and describe
177 | the final conclusions which the authors draw from their work.
178 | 
179 | \section*{Acknowledgements}
180 | 
181 | The Acknowledgements section is not numbered. Here you can thank helpful
182 | colleagues, acknowledge funding agencies, telescopes and facilities used etc.
183 | Try to keep it short.
184 | 
185 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
186 | 
187 | %%%%%%%%%%%%%%%%%%%% REFERENCES %%%%%%%%%%%%%%%%%%
188 | 
189 | % The best way to enter references is to use BibTeX:
190 | 
191 | %\bibliographystyle{mnras}
192 | %\bibliography{example} % if your bibtex file is called example.bib
193 | 
194 | 
195 | % Alternatively you could enter them by hand, like this:
196 | % This method is tedious and prone to error if you have lots of references
197 | \begin{thebibliography}{99}
198 | \bibitem[\protect\citeauthoryear{Author}{2012}]{Author2012}
199 | Author A.~N., 2013, Journal of Improbable Astronomy, 1, 1
200 | \bibitem[\protect\citeauthoryear{Others}{2013}]{Others2013}
201 | Others S., 2012, Journal of Interesting Stuff, 17, 198
202 | \end{thebibliography}
203 | 
204 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
205 | 
206 | %%%%%%%%%%%%%%%%% APPENDICES %%%%%%%%%%%%%%%%%%%%%
207 | 
208 | \appendix
209 | 
210 | \section{Some extra material}
211 | 
212 | If you want to present additional material which would interrupt the flow of the main paper,
213 | it can be placed in an Appendix which appears after the list of references.
214 | 
215 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
216 | 
217 | 
218 | % Don't change these lines
219 | \bsp	% typesetting comment
220 | \label{lastpage}
221 | \end{document}
222 | 
223 | % End of mnras_template.tex


--------------------------------------------------------------------------------
/hiermetriclearn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Implementation of RadFriends
  5 | https://arxiv.org/abs/1407.5459
  6 | Uses standardised euclidean distance, which makes it fast.
  7 | 
  8 | Copyright (c) 2017 Johannes Buchner
  9 | 
 10 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 11 | 
 12 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 13 | 
 14 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 17 | 
 18 | 
 19 | 
 20 | """
 21 | 
 22 | import numpy
 23 | import scipy.spatial, scipy.cluster
 24 | import matplotlib.pyplot as plt
 25 | from clustering.neighbors import find_rdistance, is_within_distance_of, count_within_distance_of, any_within_distance_of
 26 | from clustering.sdml import IdentityMetric, SimpleScaling, TruncatedScaling
 27 | from collections import defaultdict
 28 | from clustering.radfriendsregion import ClusterResult, RadFriendsRegion
 29 | 
 30 | class MetricLearningFriendsConstrainer(object):
 31 | 	def __init__(self, metriclearner, rebuild_every = 50, metric_rebuild_every = 50, verbose = False,
 32 | 			keep_phantom_points=False, optimize_phantom_points=False,
 33 | 			force_shrink=False):
 34 | 		self.iter_since_metric_rebuild = 0
 35 | 		self.ndraws_since_rebuild = 0
 36 | 		self.region = None
 37 | 		self.rebuild_every = int(rebuild_every)
 38 | 		self.metric_rebuild_every = int(metric_rebuild_every)
 39 | 		self.verbose = verbose
 40 | 		self.force_shrink = force_shrink
 41 | 		self.metriclearner = metriclearner
 42 | 		self.metric = IdentityMetric()
 43 | 		self.clusters = None
 44 | 		self.direct_draws_efficient = True
 45 | 		self.last_cluster_points = None
 46 | 		self.prev_maxdistance = None
 47 | 	
 48 | 	def cluster(self, u, ndim, keepMetric=False):
 49 | 		w = self.metric.transform(u)
 50 | 		prev_region = self.region
 51 | 		if keepMetric:
 52 | 			self.region = RadFriendsRegion(members=w)
 53 | 			if self.force_shrink and self.region.maxdistance > self.prev_maxdistance:
 54 | 				self.region = RadFriendsRegion(members=w, maxdistance=self.prev_maxdistance)
 55 | 			self.prev_maxdistance = self.region.maxdistance
 56 | 			print('keeping metric, not reclustering.')
 57 | 			return
 58 | 		
 59 | 		metric_updated = False
 60 | 		clustermetric = self.metric
 61 | 		print('computing distances for clustering...')
 62 | 		# Overlay all clusters (shift by cluster mean) 
 63 | 		print('Metric update ...')
 64 | 		cluster_mean = numpy.mean(u, axis=0)
 65 | 		shifted_cluster_members = u - cluster_mean
 66 | 		
 67 | 		# Using original points and new metric, compute RadFriends bootstrapped distance and store
 68 | 		if self.metriclearner == 'none':
 69 | 			metric = self.metric # stay with identity matrix
 70 | 			metric_updated = False
 71 | 		elif self.metriclearner == 'simplescaling':
 72 | 			metric = SimpleScaling()
 73 | 			metric.fit(shifted_cluster_members)
 74 | 			metric_updated = True
 75 | 		elif self.metriclearner == 'truncatedscaling':
 76 | 			metric = TruncatedScaling()
 77 | 			metric.fit(shifted_cluster_members)
 78 | 			metric_updated = self.metric == IdentityMetric() or not numpy.all(self.metric.scale == metric.scale)
 79 | 		else:
 80 | 			assert False, self.metriclearner
 81 | 		
 82 | 		self.metric = metric
 83 | 		
 84 | 		wnew = self.metric.transform(u)
 85 | 		print('Region update ...')
 86 | 		
 87 | 		self.region = RadFriendsRegion(members=wnew) #, maxdistance=shifted_region.maxdistance)
 88 | 		if not metric_updated and self.force_shrink and self.prev_maxdistance is not None:
 89 | 			if self.region.maxdistance > self.prev_maxdistance:
 90 | 				self.region = RadFriendsRegion(members=w, maxdistance=self.prev_maxdistance)
 91 | 		self.prev_maxdistance = self.region.maxdistance
 92 | 		print('done.')
 93 | 	
 94 | 	def are_inside_cluster(self, points):
 95 | 		w = self.metric.transform(points)
 96 | 		return self.region.are_inside(w)
 97 | 	
 98 | 	def is_inside(self, point):
 99 | 		if not ((point >= 0).all() and (point <= 1).all()):
100 | 			return False
101 | 		w = self.metric.transform(point)
102 | 		return self.region.is_inside(w)
103 | 
104 | 	def generate(self, ndim):
105 | 		ntotal = 0
106 | 		N = 10000
107 | 		while True:
108 | 			#if numpy.random.uniform() < 0.01:
109 | 			if ndim < 40:
110 | 				# draw from radfriends directly
111 | 				for ws, n in self.region.generate(N):
112 | 					us = self.metric.untransform(ws)
113 | 					assert us.shape[1] == ndim, us.shape
114 | 					ntotal = ntotal + n
115 | 					mask = numpy.logical_and(us < 1, us > 0).all(axis=1)
116 | 					assert mask.shape == (len(us),), (mask.shape, us.shape)
117 | 					if mask.any():
118 | 						#print 'radfriends draw in unit cube:', mask.sum(), ntotal
119 | 						for u in us[mask,:]:
120 | 							assert u.shape == (us[0].shape), (u.shape, us.shape, mask.shape)
121 | 							yield u, ntotal
122 | 							ntotal = 0
123 | 					#if all([0 <= ui <= 1 for ui in u]):
124 | 					#	yield u, ntotal
125 | 					#	ntotal = 0
126 | 			if numpy.random.uniform() < 0.1:
127 | 				# draw from unit cube
128 | 				# this can be efficient if volume still large
129 | 				ntotal = ntotal + N
130 | 				us = numpy.random.uniform(size=(N, ndim))
131 | 				ws = self.metric.transform(us)
132 | 				nnear = self.region.are_inside(ws)
133 | 				#print '  %d of %d accepted' % (nnear.sum(), N)
134 | 				for u in us[nnear,:]:
135 | 					#print 'unit cube draw success:', ntotal
136 | 					yield u, ntotal
137 | 					ntotal = 0
138 | 	
139 | 	def rebuild(self, u, ndim, keepMetric=False):
140 | 		if self.last_cluster_points is not None and \
141 | 			len(self.last_cluster_points) == len(u) and \
142 | 			numpy.all(self.last_cluster_points == u):
143 | 			# do nothing if everything stayed the same
144 | 			return
145 | 		
146 | 		self.cluster(u=u, ndim=ndim, keepMetric=keepMetric)
147 | 		self.last_cluster_points = u
148 | 		
149 | 		print('maxdistance:', self.region.maxdistance)
150 | 		self.generator = self.generate(ndim)
151 | 	
152 | 	def _draw_constrained_prepare(self, Lmins, priortransform, loglikelihood, live_pointsu, ndim, **kwargs):
153 | 		rebuild = self.ndraws_since_rebuild > self.rebuild_every or self.region is None
154 | 		rebuild_metric = self.iter_since_metric_rebuild > self.metric_rebuild_every
155 | 		keepMetric = not rebuild_metric
156 | 		if rebuild:
157 | 			print('rebuild triggered at call')
158 | 			self.rebuild(numpy.asarray(live_pointsu), ndim, keepMetric=keepMetric)
159 | 			self.ndraws_since_rebuild = 0
160 | 			if rebuild_metric:
161 | 				self.iter_since_metric_rebuild = 0
162 | 		else:
163 | 			#print 'no rebuild: %d %d' % (self.iter_since_metric_rebuild, self.ndraws_since_rebuild)
164 | 			rebuild_metric = False
165 | 		assert self.generator is not None
166 | 		return rebuild, rebuild_metric
167 | 	
168 | 	def get_Lmax(self):
169 | 		if len(self.phantom_points_Ls) == 0:
170 | 			return None
171 | 		return max(self.phantom_points_Ls)
172 | 
173 | 	def draw_constrained(self, Lmins, priortransform, loglikelihood, live_pointsu, ndim, **kwargs):
174 | 		ntoaccept = 0
175 | 		ntotalsum = 0
176 | 		self.iter_since_metric_rebuild += 1
177 | 		#print 'MLFriends trying to replace', Lmins
178 | 		rebuild, rebuild_metric = self._draw_constrained_prepare(Lmins, priortransform, loglikelihood, live_pointsu, ndim, **kwargs)
179 | 		while True:
180 | 			#print '    starting generator ...'
181 | 			for u, ntotal in self.generator:
182 | 				assert (u >= 0).all() and (u <= 1).all(), u
183 | 				ntotalsum += ntotal
184 | 				x = priortransform(u)
185 | 				L = loglikelihood(x)
186 | 				ntoaccept += 1
187 | 				self.ndraws_since_rebuild += 1
188 | 
189 | 				#print 'ntotal:', ntotal
190 | 				if ntotal > 100000:
191 | 					self.direct_draws_efficient = False
192 | 				
193 | 				if numpy.any(L > Lmins):
194 | 					# yay, we win
195 | 					#print 'accept after %d tries' % ntoaccept
196 | 					return u, x, L, ntoaccept
197 | 				
198 | 				# if running very inefficient, optimize clustering 
199 | 				#     if we haven't done so at the start
200 | 				if not rebuild and self.ndraws_since_rebuild > self.rebuild_every:
201 | 					rebuild = True
202 | 					print('RadFriends rebuild triggered after %d draws' % self.ndraws_since_rebuild)
203 | 					self.rebuild(numpy.asarray(live_pointsu), ndim, keepMetric=True)
204 | 					self.ndraws_since_rebuild = 0
205 | 					break
206 | 				if not rebuild_metric and ntoaccept > 200:
207 | 					rebuild_metric = True
208 | 					print('RadFriends metric rebuild triggered after %d draws' % self.ndraws_since_rebuild)
209 | 					self.rebuild(numpy.asarray(live_pointsu), ndim, keepMetric=False)
210 | 					self.iter_since_metric_rebuild = 0
211 | 					break
212 | 				
213 | 


--------------------------------------------------------------------------------
/sample.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Main program
  5 | ---------------
  6 | 
  7 | Copyright (c) 2017 Johannes Buchner
  8 | 
  9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 10 | 
 11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 12 | 
 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 14 | 
 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 16 | 
 17 | """
 18 | 
 19 | import numpy
 20 | from numpy import exp
 21 | import h5py
 22 | import sys
 23 | import json
 24 | import os
 25 | import time
 26 | 
 27 | print('loading data...')
 28 | ndata = int(sys.argv[2])
 29 | with h5py.File(sys.argv[1], 'r') as f:
 30 | 	x = numpy.array(f['x'].value)
 31 | 	y = numpy.array(f['y'][:,:ndata])
 32 | 
 33 | 
 34 | """
 35 | 
 36 | Definition of the problem
 37 | - parameter space (here: 3d)
 38 | - likelihood function which consists of 
 39 |   - model function ("slow predicting function")
 40 |   - data comparison
 41 | 
 42 | """
 43 | 
 44 | nx, ndata = y.shape
 45 | noise_level = 0.01
 46 | params = ['A', 'mu', 'sig'] #, 'noise_level']
 47 | nparams = len(params)
 48 | 
 49 | def gauss(x, z, A, mu, sig):
 50 | 	return A * exp(-0.5 * ((mu - x / (1. + z))/sig)**2)
 51 | 
 52 | def priortransform(cube):
 53 | 	# definition of the parameter width, by transforming from a unit cube
 54 | 	cube = cube.copy()
 55 | 	cube[0] = 10**(cube[0] * 2 - 2)
 56 | 	cube[1] = cube[1] * 400 + 400
 57 | 	cube[2] = cube[2] * 2
 58 | 	return cube
 59 | 
 60 | # the following is a python-only implementation of the likelihood 
 61 | # @ params are the parameters (as transformed by priortransform)
 62 | # @ data_mask is which data sets to consider.
 63 | # returns a likelihood vector
 64 | def multi_loglikelihood(params, data_mask):
 65 | 	A, mu, log_sig_kms = params
 66 | 	# predict the model
 67 | 	sig = 10**log_sig_kms
 68 | 	ypred = A * exp(-0.5 * ((mu - x)/sig)**2)
 69 | 	# do the data comparison
 70 | 	L = -0.5 * (((ypred.reshape((-1,1)) - y[:,data_mask])/noise_level)**2).sum(axis=0)
 71 | 	return L
 72 | 
 73 | #print multi_loglikelihood([0.88091237,  444.44207558,    2.77671952], numpy.ones(ndata)==1)
 74 | #print multi_loglikelihood([1.65758829e-01, 4.45518543e+02, 3.25894638e+00], numpy.ones(ndata)==1)
 75 | #print multi_loglikelihood([0.95572931,  443.99407818,    2.95764509], numpy.ones(ndata)==1)
 76 | 
 77 | # The following is a C implementation of the likelihood
 78 | from ctypes import *
 79 | from numpy.ctypeslib import ndpointer
 80 | 
 81 | if int(os.environ.get('OMP_NUM_THREADS', '1')) > 1 and False: # does not work correctly yet
 82 | 	lib = cdll.LoadLibrary('./clike-parallel.so')
 83 | else:
 84 | 	lib = cdll.LoadLibrary('./clike.so')
 85 | lib.like.argtypes = [
 86 | 	ndpointer(dtype=numpy.float64, ndim=1, flags='C_CONTIGUOUS'), 
 87 | 	ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
 88 | 	c_int, 
 89 | 	c_int, 
 90 | 	c_double, 
 91 | 	c_double, 
 92 | 	c_double, 
 93 | 	c_double, 
 94 | 	ndpointer(dtype=numpy.bool, ndim=1, flags='C_CONTIGUOUS'), 
 95 | 	ndpointer(dtype=numpy.float64, ndim=1, flags='C_CONTIGUOUS'), 
 96 | 	]
 97 | 
 98 | # @ params are the parameters (as transformed by priortransform)
 99 | # @ data_mask is which data sets to consider.
100 | # returns a likelihood vector
101 | def multi_loglikelihood(params, data_mask):
102 | 	A, mu, log_sig_kms = params
103 | 	sig = 10**log_sig_kms
104 | 	Lout = numpy.zeros(data_mask.sum())
105 | 	# do everything in C and return the resulting likelihood vector
106 | 	ret = lib.like(x, y, ndata, nx, A, mu, sig, noise_level, data_mask, Lout)
107 | 	#assert numpy.isfinite(Lout).all(), (Lout, params)
108 | 	return -0.5 * Lout
109 | 
110 | #print multi_loglikelihood([0.88091237,  444.44207558,    2.77671952], numpy.ones(ndata)==1)
111 | #print multi_loglikelihood([1.65758829e-01, 4.45518543e+02, 3.25894638e+00], numpy.ones(ndata)==1)
112 | #print multi_loglikelihood([0.95572931,  443.99407818,    2.95764509], numpy.ones(ndata)==1)
113 | 
114 | """
115 | 
116 | After defining the problem, we use generic code to set up 
117 | - Nested Sampling (Multi)Integrator
118 | - Our special sampler
119 | - RadFriends (constrained region draw)
120 | 
121 | We start with the latter.
122 | """
123 | 
124 | 
125 | from multi_nested_integrator import multi_nested_integrator
126 | from multi_nested_sampler import MultiNestedSampler
127 | 
128 | import cachedconstrainer
129 | from cachedconstrainer import CachedConstrainer, generate_individual_constrainer, generate_superset_constrainer, MultiEllipsoidalConstrainer, MetricLearningFriendsConstrainer, generate_fresh_constrainer
130 | 
131 | constrainer_type = os.environ.get('CONSTRAINER', 'MLFRIENDS')
132 | if constrainer_type == 'MLFRIENDS':
133 | 	def generate_fresh_constrainer():
134 | 		return MetricLearningFriendsConstrainer(
135 | 			metriclearner = 'truncatedscaling', force_shrink=True,
136 | 			rebuild_every=1000, metric_rebuild_every=20, 
137 | 			verbose=False)
138 | 
139 | 	superset_constrainer = MetricLearningFriendsConstrainer(
140 | 			metriclearner = 'truncatedscaling', force_shrink=True,
141 | 			rebuild_every=1000, metric_rebuild_every=20, 
142 | 			verbose=False)
143 | elif constrainer_type == 'MULTIELLIPSOIDS':
144 | 	def generate_fresh_constrainer():
145 | 		return MultiEllipsoidalConstrainer(rebuild_every=1000)
146 | 
147 | 	superset_constrainer = generate_fresh_constrainer()
148 | elif constrainer_type == 'SLICE':
149 | 	#from whitenedmcmc import FilteredMCMCConstrainer, HybridMLMultiEllipsoidConstrainer
150 | 	from whitenedmcmc import SliceConstrainer, FilteredMahalanobisHARMProposal, FilteredUnitIterateSliceProposal
151 | 	def generate_fresh_constrainer():
152 | 		return SliceConstrainer(proposer=FilteredUnitIterateSliceProposal(), nsteps=nparams*5)
153 | 	superset_constrainer = generate_fresh_constrainer()
154 | else:
155 | 	assert False, constrainer_type
156 | 
157 | cachedconstrainer.generate_fresh_constrainer = generate_fresh_constrainer
158 | 
159 | cc = CachedConstrainer()
160 | focusset_constrainer = cc.get
161 | _, _, individual_draw_constrained = generate_individual_constrainer()
162 | numpy.random.seed(1)
163 | start_time = time.time()
164 | print('setting up integrator ...')
165 | nlive_points = int(os.environ.get('NLIVE_POINTS','400'))
166 | 
167 | # constrained region draw functions
168 | # we try hard to keep information about current regions and subselected regions
169 | # because recomputing the regions is expensive if the likelihood is very fast.
170 | # There are three constrainers:
171 | #   - the one of the superset (all data sets)
172 | #   - one for each data set if need a individual draw (focussed draw with only one)
173 | #   - a memory for recent clusterings, because they might recur in the next iteration(s)
174 | # Note that this does caching not improve the algorithms efficiency
175 | #   in fact, not recomputing regions keeps the regions larger, 
176 | #   leading potentially to slightly more rejections. 
177 | # However, there is substantial execution speedup.
178 | 
179 | 
180 | # now set up sampler and pass the three constrainers
181 | 
182 | sampler = MultiNestedSampler(nlive_points = nlive_points, 
183 | 	priortransform=priortransform, multi_loglikelihood=multi_loglikelihood, 
184 | 	ndim=nparams, ndata=ndata,
185 | 	superset_draw_constrained = superset_constrainer.draw_constrained, 
186 | 	individual_draw_constrained = individual_draw_constrained,
187 | 	draw_constrained = focusset_constrainer, 
188 | 	nsuperset_draws = int(os.environ.get('SUPERSET_DRAWS', '10')),
189 | 	use_graph = os.environ.get('USE_GRAPH', '1') == '1'
190 | )
191 | 
192 | superset_constrainer.sampler = sampler
193 | cc.sampler = sampler
194 | print('integrating ...')
195 | max_samples = int(os.environ.get('MAXSAMPLES', 0))
196 | min_samples = int(os.environ.get('MINSAMPLES', 0))
197 | results = multi_nested_integrator(tolerance=0.5, multi_sampler=sampler, min_samples=min_samples, max_samples=max_samples)
198 | duration = time.time() - start_time
199 | print('writing output files ...')
200 | prefix = '%s_%s_nlive%d_%d.out8' % (sys.argv[1], constrainer_type, nlive_points, ndata)
201 | # store results
202 | with h5py.File(prefix + '.hdf5', 'w') as f:
203 | 	f.create_dataset('logZ', data=results['logZ'], compression='gzip', shuffle=True)
204 | 	f.create_dataset('logZerr', data=results['logZerr'], compression='gzip', shuffle=True)
205 | 	u, x, L, w, mask = list(zip(*results['weights']))
206 | 	f.create_dataset('u', data=u, compression='gzip', shuffle=True)
207 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
208 | 	f.create_dataset('L', data=L, compression='gzip', shuffle=True)
209 | 	f.create_dataset('w', data=w, compression='gzip', shuffle=True)
210 | 	f.create_dataset('mask', data=mask, compression='gzip', shuffle=True)
211 | 	f.create_dataset('ndraws', data=sampler.ndraws)
212 | 	print('logZ = %.1f +- %.1f' % (results['logZ'][0], results['logZerr'][0]))
213 | 	print('ndraws:', sampler.ndraws, 'niter:', len(w))
214 | 
215 | print('writing statistic ...')
216 | json.dump(dict(ndraws=sampler.ndraws, duration=duration, ndata=ndata, niter=len(w)), 
217 | 	open(prefix + '.stats.json', 'w'), indent=4)
218 | print('done.')
219 | 
220 | 
221 | 


--------------------------------------------------------------------------------
/clustering/neighbors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Neighbourhood helper functions
  5 | -------------------------------
  6 | 
  7 | Copyright (c) 2017 Johannes Buchner
  8 | 
  9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 10 | 
 11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 12 | 
 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 14 | 
 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 16 | 
 17 | """
 18 | 
 19 | import numpy
 20 | import scipy.spatial
 21 | 
 22 | def initial_maxdistance_guess(u):
 23 | 	n = len(u)
 24 | 	distances = scipy.spatial.distance.cdist(u, u)
 25 | 	nearest = [distances[i,:].argsort()[1] for i in range(n)]
 26 | 	nearest = [numpy.abs(u[k,:] - u[i,:]) for i, k in enumerate(nearest)]
 27 | 	# compute distance maximum
 28 | 	maxdistance = numpy.max(nearest, axis=0)
 29 | 	return maxdistance
 30 | 
 31 | def update_maxdistance(u, ibootstrap, maxdistance, verbose = False):
 32 | 	n, ndim = u.shape
 33 | 	
 34 | 	# bootstrap to find smallest maxdistance which includes
 35 | 	# all points
 36 | 	choice = list(set(numpy.random.choice(numpy.arange(n), size=n)))
 37 | 	notchosen = set(range(n)) - set(choice)
 38 | 	# check if included with our starting criterion
 39 | 	for i in notchosen:
 40 | 		dists = numpy.abs(u[i,:] - u[choice,:])
 41 | 		close = numpy.all(dists < maxdistance.reshape((1,-1)), axis=1)
 42 | 		assert close.shape == (len(choice),), (close.shape, len(choice))
 43 | 		# find the point where we have to increase the least
 44 | 		if not close.any():
 45 | 			# compute maxdists -- we already did that
 46 | 			# compute extension to maxdistance
 47 | 			#maxdistance_suggest = [numpy.max([maxdistance, d], axis=0) for d in dists]
 48 | 			maxdistance_suggest = numpy.where(maxdistance > dists, dists, maxdistance)
 49 | 			assert maxdistance_suggest.shape == (len(dists), ndim)
 50 | 			# compute volume increase in comparison to maxdistance
 51 | 			#increase = [(numpy.log(m) - numpy.log(maxdistance)).sum()  for m in maxdistance_suggest]
 52 | 			increase = numpy.log(maxdistance_suggest).sum(axis=1) - numpy.log(maxdistance).sum()
 53 | 			
 54 | 			# choose smallest
 55 | 			nearest = numpy.argmin(increase)
 56 | 			if verbose: print(ibootstrap, 'nearest:', u[i], u[nearest], increase[nearest])
 57 | 			# update maxdistance
 58 | 			maxdistance = numpy.where(dists[nearest] > maxdistance, dists[nearest], maxdistance)
 59 | 			if verbose: print(ibootstrap, 'extending:', maxdistance)
 60 | 		else:
 61 | 			# we got this one, everything is fine
 62 | 			pass
 63 | 	return maxdistance
 64 | 
 65 | def find_maxdistance(u, verbose=False, nbootstraps=15):
 66 | 	# find nearest point for every point
 67 | 	if verbose: print('finding nearest neighbors:')
 68 | 	maxdistance = initial_maxdistance_guess(u)
 69 | 	#maxdistance = numpy.zeros(ndim)
 70 | 	if verbose: print('initial:', maxdistance)
 71 | 	for ibootstrap in range(nbootstraps):
 72 | 		maxdistance = update_maxdistance(u, ibootstrap, maxdistance, verbose=verbose)
 73 | 	return maxdistance
 74 | 
 75 | def is_within_distance_of(members, maxdistance, u, metric='euclidean'):
 76 | 	dists = scipy.spatial.distance.cdist(members, us, metric=metric)
 77 | 	return (dists < maxdistance).any()
 78 | 
 79 | def count_within_distance_of(members, maxdistance, us, metric='euclidean'):
 80 | 	dists = scipy.spatial.distance.cdist(members, us, metric=metric)
 81 | 	return (dists < maxdistance).sum(axis=0)
 82 | 
 83 | def any_within_distance_of(members, maxdistance, us, metric='euclidean'):
 84 | 	dists = scipy.spatial.distance.cdist(members, us, metric=metric)
 85 | 	return (dists < maxdistance).any(axis=0)
 86 | 
 87 | most_distant_nearest_neighbor = None
 88 | bootstrapped_maxdistance = None
 89 | try:
 90 | 	import os
 91 | 	from ctypes import *
 92 | 	from numpy.ctypeslib import ndpointer
 93 | 
 94 | 	if int(os.environ.get('OMP_NUM_THREADS', '1')) > 1:
 95 | 		libname = 'cneighbors-parallel.so'
 96 | 	else:
 97 | 		libname = 'cneighbors.so'
 98 | 	libfilename = os.path.join(os.path.dirname(os.path.abspath(__file__)), libname)
 99 | 	lib = cdll.LoadLibrary(libfilename)
100 | 	lib.most_distant_nearest_neighbor.argtypes = [
101 | 		ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
102 | 		c_int, 
103 | 		c_int, 
104 | 		]
105 | 	lib.most_distant_nearest_neighbor.restype = c_double
106 | 
107 | 	def most_distant_nearest_neighbor(xx):
108 | 		i, m = xx.shape
109 | 		r = lib.most_distant_nearest_neighbor(xx, i, m)
110 | 		return r
111 | 
112 | 	lib.is_within_distance_of.argtypes = [
113 | 		ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
114 | 		c_int, 
115 | 		c_int, 
116 | 		c_double, 
117 | 		ndpointer(dtype=numpy.float64, ndim=1, flags='C_CONTIGUOUS'), 
118 | 		]
119 | 	lib.is_within_distance_of.restype = c_int
120 | 
121 | 	def is_within_distance_of(xx, maxdistance, y):
122 | 		i, m = xx.shape
123 | 		r = lib.is_within_distance_of(xx, i, m, maxdistance, y)
124 | 		return r == 1
125 | 
126 | 	lib.count_within_distance_of.argtypes = [
127 | 		ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
128 | 		c_int, 
129 | 		c_int, 
130 | 		c_double, 
131 | 		ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
132 | 		c_int, 
133 | 		ndpointer(dtype=numpy.float64, ndim=1, flags='C_CONTIGUOUS'), 
134 | 		c_int, 
135 | 		]
136 | 
137 | 	def count_within_distance_of(xx, maxdistance, yy):
138 | 		i, m = xx.shape
139 | 		j = len(yy)
140 | 		counts = numpy.zeros(len(yy))
141 | 		r = lib.count_within_distance_of(xx, i, m, maxdistance, yy, j, counts, 0)
142 | 		counts = counts.astype(int)
143 | 		# check
144 | 		#dists = scipy.spatial.distance.cdist(xx, yy, metric='euclidean')
145 | 		#counts_true = (dists < maxdistance).sum(axis=0)
146 | 		#assert (counts == counts_true).all(), (counts, counts_true)
147 | 		return counts
148 | 
149 | 	def any_within_distance_of(xx, maxdistance, yy):
150 | 		i, m = xx.shape
151 | 		j = len(yy)
152 | 		counts = numpy.zeros(len(yy))
153 | 		r = lib.count_within_distance_of(xx, i, m, maxdistance, yy, j, counts, 1)
154 | 		counts = counts > 0
155 | 		# check
156 | 		#dists = scipy.spatial.distance.cdist(xx, yy, metric='euclidean')
157 | 		#counts_true = (dists < maxdistance).any(axis=0)
158 | 		#assert (counts == counts_true).all(), (counts, counts_true)
159 | 		return counts
160 | 
161 | 	lib.bootstrapped_maxdistance.argtypes = [
162 | 		ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
163 | 		c_int, 
164 | 		c_int, 
165 | 		ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
166 | 		c_int, 
167 | 		]
168 | 	lib.bootstrapped_maxdistance.restype = c_double
169 | 	
170 | 	def bootstrapped_maxdistance(xx, nbootstraps):
171 | 		nsamples, ndim = xx.shape
172 | 		chosen = numpy.zeros((nsamples, nbootstraps))
173 | 		for b in range(nbootstraps):
174 | 			chosen[numpy.random.choice(numpy.arange(nsamples), size=nsamples, replace=True),b] = 1.
175 | 		
176 | 		maxdistance = lib.bootstrapped_maxdistance(xx, nsamples, ndim, chosen, nbootstraps)
177 | 		return maxdistance
178 | 
179 | except ImportError as e:
180 | 	print('Using slow, high-memory neighborhood function nearest_rdistance_guess because import failed:', e)
181 | except Exception as e:
182 | 	print('Using slow, high-memory neighborhood function nearest_rdistance_guess because:', e)
183 | 
184 | 
185 | def nearest_rdistance_guess(u, metric='euclidean'):
186 | 	if metric == 'euclidean' and most_distant_nearest_neighbor is not None:
187 | 		return most_distant_nearest_neighbor(u)
188 | 	n = len(u)
189 | 	distances = scipy.spatial.distance.cdist(u, u, metric=metric)
190 | 	numpy.fill_diagonal(distances, 1e300)
191 | 	nearest_neighbor_distance = numpy.min(distances, axis = 1)
192 | 	rdistance = numpy.max(nearest_neighbor_distance)
193 | 	#print 'distance to nearest:', rdistance, nearest_neighbor_distance
194 | 	return rdistance
195 | 
196 | def initial_rdistance_guess(u, metric='euclidean', k = 10):
197 | 	n = len(u)
198 | 	distances = scipy.spatial.distance.cdist(u, u, metric=metric)
199 | 	if k == 1:
200 | 	#	numpy.diag(distances)
201 | 	#	nearest = [distances[i,:])[1:k] for i in range(n)]
202 | 		distances2 = distances + numpy.diag(1e100 * numpy.ones(len(distances)))
203 | 		nearest = distances2.min(axis=0)
204 | 	else:
205 | 		assert False, k
206 | 		nearest = [numpy.sort(distances[i,:])[1:k+1] for i in range(n)]
207 | 	# compute distance maximum
208 | 	rdistance = numpy.max(nearest)
209 | 	return rdistance
210 | 
211 | def update_rdistance(u, ibootstrap, rdistance, verbose = False, metric='euclidean'):
212 | 	n, ndim = u.shape
213 | 	
214 | 	# bootstrap to find smallest rdistance which includes
215 | 	# all points
216 | 	choice = set(numpy.random.choice(numpy.arange(n), size=n))
217 | 	mask = numpy.array([c in choice for c in numpy.arange(n)])
218 | 	
219 | 	distances = scipy.spatial.distance.cdist(u[mask], u[-mask], metric=metric)
220 | 	assert distances.shape == (mask.sum(), (-mask).sum())
221 | 	nearest_distance_to_members = distances.min(axis=0)
222 | 	if verbose:
223 | 		print('nearest distances:', nearest_distance_to_members.max(), nearest_distance_to_members)
224 | 	newrdistance = max(rdistance, nearest_distance_to_members.max())
225 | 	if newrdistance > rdistance and verbose:
226 | 		print(ibootstrap, 'extending:', newrdistance)
227 | 	return newrdistance
228 | 
229 | def find_rdistance(u, verbose=False, nbootstraps=15, metric='euclidean'):
230 | 	if metric == 'euclidean' and bootstrapped_maxdistance is not None:
231 | 		return bootstrapped_maxdistance(u, nbootstraps)
232 | 	# find nearest point for every point
233 | 	if verbose: print('finding nearest neighbors:')
234 | 	rdistance = 0 #initial_rdistance_guess(u)
235 | 	if verbose: print('initial:', rdistance)
236 | 	for ibootstrap in range(nbootstraps):
237 | 		rdistance = update_rdistance(u, ibootstrap, rdistance, verbose=verbose, metric=metric)
238 | 	return rdistance
239 | 
240 | if __name__ == '__main__':
241 | 	nbootstraps = 10
242 | 	numpy.random.seed(1)
243 | 	u = numpy.random.uniform(size=(200,2))
244 | 	for i in range(100):
245 | 		numpy.random.seed(i)
246 | 		a = bootstrapped_maxdistance(u, nbootstraps)
247 | 		numpy.random.seed(i)
248 | 		b = find_rdistance(u, nbootstraps=nbootstraps, metric='euclidean', verbose=False)
249 | 		print(a, b)
250 | 		assert numpy.allclose(a, b)
251 | 		
252 | 


--------------------------------------------------------------------------------
/whitenedmcmc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Implementation of MultiEllipsoidal sampling via nestle
  4 | 
  5 | Copyright (c) 2017 Johannes Buchner
  6 | 
  7 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  8 | 
  9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 10 | 
 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 12 | 
 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 14 | 
 15 | 
 16 | 
 17 | """
 18 | 
 19 | import numpy
 20 | from numpy import exp, log, log10, pi
 21 | from nestle import bounding_ellipsoid, bounding_ellipsoids, sample_ellipsoids
 22 | from collections import defaultdict
 23 | import scipy.spatial, scipy.cluster
 24 | import matplotlib.pyplot as plt
 25 | import numpy
 26 | from numpy import exp, log, log10, pi, cos, sin
 27 | from nestle import bounding_ellipsoid, bounding_ellipsoids, sample_ellipsoids
 28 | 
 29 | def is_inside_unit_filter(u):
 30 | 	return numpy.all(u >= 0) and numpy.all(u <= 1)
 31 | 
 32 | 
 33 | class BaseProposal(object):
 34 | 	"""
 35 | 	Base class for proposal function.
 36 | 	
 37 | 	:param scale: Scale of proposal
 38 | 	:param adapt: Adaptation rule to use for scale, when new_chain is called.
 39 | 	
 40 | 	If adapt is False, no adaptation is done. If adapt is 'Sivia', the rule
 41 | 	of Sivia & Skilling (2006) is used. If adapt is something else,
 42 | 	a crude thresholding adaptation is used to gain ~50% acceptance.
 43 | 	"""
 44 | 	def __init__(self, adapt = False, scale = 1.):
 45 | 		self.accepts = []
 46 | 		self.adapt = adapt
 47 | 		self.scale = scale
 48 | 	"""
 49 | 	Proposal function (to be overwritten)
 50 | 	"""
 51 | 	def propose(self, u, ndim, live_pointsu=None, is_inside_filter=None):
 52 | 		return u
 53 | 	"""
 54 | 	Reset accept counters and adapt proposal (if activated).
 55 | 	"""
 56 | 	def new_chain(self, live_pointsu=None, is_inside_filter=None):
 57 | 		if self.adapt and len(self.accepts) > 0:
 58 | 			# adjust future scale based on acceptance rate
 59 | 			m = numpy.mean(self.accepts)
 60 | 			assert 0 <= m <= 1, (m, self.accepts)
 61 | 			if self.adapt == 'sivia':
 62 | 				if m > 0.5: self.scale *= exp(1./numpy.sum(self.accepts))
 63 | 				else:       self.scale /= exp(1./(len(self.accepts) - numpy.sum(self.accepts)))
 64 | 			elif self.adapt == 'sivia-neg-binom':
 65 | 				# negative binomial rate estimator
 66 | 				m = (sum(self.accepts) - 1) / (len(self.accepts) - 1.)
 67 | 				if m > 0.5: self.scale *= exp(1./numpy.sum(self.accepts))
 68 | 				else:       self.scale /= exp(1./(len(self.accepts) - numpy.sum(self.accepts)))
 69 | 			elif self.adapt == 'step':
 70 | 				#print 'adaptation:', m
 71 | 				if m <= 0.1:
 72 | 					self.scale /= 1.1
 73 | 				elif m <= 0.3:
 74 | 					self.scale /= 1.01
 75 | 				elif m >= 0.9:
 76 | 					self.scale *= 1.1
 77 | 				elif m >= 0.7:
 78 | 					self.scale *= 1.01
 79 | 			else:
 80 | 				assert False, self.adapt
 81 | 			assert numpy.all(numpy.isfinite(self.scale)), self.scale
 82 | 		self.accepts = []
 83 | 	
 84 | 	"""
 85 | 	Add a point to the record.
 86 | 	:param accepted: True if accepted, False if rejected.
 87 | 	"""
 88 | 	def accept(self, accepted):
 89 | 		self.accepts.append(accepted)
 90 | 	
 91 | 	"""
 92 | 	Print some stats on the acceptance rate
 93 | 	"""
 94 | 	def stats(self):
 95 | 		print('Proposal %s stats: %.2f%% accepts' % (repr(self), 
 96 | 			numpy.mean(self.accepts) * 100.))
 97 | 
 98 | class MultiScaleProposal(BaseProposal):
 99 | 	"""Proposal over multiple scales, inspired by DNest. 
100 | 	Uses the formula
101 | 	
102 | 	:math:`x + n * 10^{l - s * u}`
103 | 	
104 | 	where l is the location, s is the scale and u is a uniform variate,
105 | 	and n is a normal variate.
106 | 	
107 | 	@see MultiScaleProposal
108 | 	"""
109 | 	def __init__(self, loc = -4.5, scale=1.5, adapt=False):
110 | 		# 10**(1.5 - 6 * u) (inspired by DNest)
111 | 		# a + (b - a) * u
112 | 		# a = 1.5, b = -4.5
113 | 		# a should increase for larger scales, decrease for smaller
114 | 		
115 | 		self.loc = loc
116 | 		BaseProposal.__init__(self, adapt=adapt, scale=scale)
117 | 	def __repr__(self):
118 | 		return 'MultiScaleProposal(loc=%s, scale=%s, adapt=%s)' % (self.loc, self.scale, self.adapt)
119 | 	def propose(self, u, ndim, live_pointsu=None, is_inside_filter=None):
120 | 		p = u + numpy.random.normal() * 10**(self.scale + (self.loc - self.scale) * numpy.random.uniform())
121 | 		p[p > 1] = 1
122 | 		p[p < 0] = 0
123 | 		#p = p - numpy.floor(p)
124 | 		return p
125 | 
126 | 
127 | class FilteredUnitHARMProposal(BaseProposal):
128 | 	"""
129 | 	Unit HARM proposal.
130 | 
131 | 	@see BaseProposal
132 | 	"""
133 | 	def __init__(self, adapt = False, scale = 1.):
134 | 		BaseProposal.__init__(self, adapt=False, scale=float(scale))
135 | 	
136 | 	def generate_direction(self, u, ndim, points):
137 | 		# generate unit direction
138 | 		x = numpy.random.normal(size=ndim)
139 | 		d = x / (x**2).sum()**0.5
140 | 		return d
141 | 	def new_chain(self, u, ndim, points, is_inside_filter):
142 | 		BaseProposal.new_chain(self)
143 | 		self.new_direction(u, ndim, points, is_inside_filter)
144 | 	def new_direction(self, u, ndim, points, is_inside_filter):
145 | 		d = self.generate_direction(u, ndim, points)
146 | 		#print('initial scale:', self.scale)
147 | 		# find end points
148 | 		forward_scale = self.scale
149 | 		# find a scale that is too large
150 | 		while True:
151 | 			assert forward_scale > 0
152 | 			p_for = u + d * forward_scale
153 | 			if is_inside_filter(p_for):
154 | 				# we are proposing too small. We should be outside
155 | 				forward_scale *= 2
156 | 				#print('too small, stepping further', forward_scale)
157 | 			else:
158 | 				break
159 | 		
160 | 		backward_scale = self.scale
161 | 		# find a scale that is too large
162 | 		while True:
163 | 			assert backward_scale > 0
164 | 			p_rev = u - d * backward_scale
165 | 			if is_inside_filter(p_rev):
166 | 				# we are proposing too small. We should be outside
167 | 				#print('too small, stepping back', backward_scale)
168 | 				backward_scale *= 2
169 | 			else:
170 | 				break
171 | 		# remember scale for next time:
172 | 		self.backward_scale = -backward_scale
173 | 		self.forward_scale = forward_scale
174 | 		self.direction = d
175 | 	
176 | 	def propose(self, u, ndim, points, is_inside_filter):
177 | 		# generate a random point between the two points.
178 | 		while True:
179 | 			#print('slice range:', (self.backward_scale, self.forward_scale))
180 | 			x = numpy.random.uniform(self.backward_scale, self.forward_scale)
181 | 			p = u + self.direction * x
182 | 			#assert self.forward_scale - self.backward_scale > 1e-100
183 | 			if x < 0:
184 | 				self.backward_scale = x
185 | 			else:
186 | 				self.forward_scale = x
187 | 			if is_inside_filter(p):
188 | 				if self.adapt:
189 | 					self.scale = self.forward_scale - self.backward_scale
190 | 					#print('adapting scale to', self.scale)
191 | 				return p
192 | 	
193 | 	def accept(self, accepted):
194 | 		# scale should not be modified
195 | 		pass
196 | 	
197 | 	def __repr__(self):
198 | 		return 'FilteredUnitHARMProposal(scale=%s, adapt=%s)' % (self.scale, self.adapt)
199 | 
200 | class FilteredMahalanobisHARMProposal(FilteredUnitHARMProposal):
201 | 	"""
202 | 	Mahalanobis HARM proposal.
203 | 
204 | 	@see BaseProposal
205 | 	"""
206 | 
207 | 	def generate_direction(self, u, ndim, points):
208 | 		# generate direction from mahalanobis metric
209 | 		metric = numpy.cov(numpy.transpose(points))
210 | 		assert metric.shape == (ndim,ndim), metric.shape
211 | 		x = numpy.random.multivariate_normal(numpy.zeros(ndim), metric)
212 | 		d = x / (x**2).sum()**0.5
213 | 		return d
214 | 	def __repr__(self):
215 | 		return 'FilteredMahalanobisHARMProposal(scale=%s, adapt=%s)' % (self.scale, self.adapt)
216 | 
217 | class FilteredUnitRandomSliceProposal(FilteredUnitHARMProposal):
218 | 	"""
219 | 	Unit Slice sampling proposal, random component-wise.
220 | 
221 | 	@see BaseProposal
222 | 	"""
223 | 	def generate_direction(self, u, ndim, points):
224 | 		# choose a random base vector
225 | 		d = numpy.zeros(ndim)
226 | 		i = numpy.random.randint(ndim)
227 | 		d[i] = 1
228 | 		return d
229 | 	def __repr__(self):
230 | 		return 'FilteredUnitRandomSliceProposal(scale=%s, adapt=%s)' % (self.scale, self.adapt)
231 | 
232 | class FilteredUnitIterateSliceProposal(FilteredUnitHARMProposal):
233 | 	"""
234 | 	Unit Slice sampling proposal, iterative component-wise.
235 | 
236 | 	@see BaseProposal
237 | 	"""
238 | 	def __init__(self, adapt = False, scale = 1.):
239 | 		BaseProposal.__init__(self, adapt=False, scale=float(scale))
240 | 		self.curindex = 0
241 | 	
242 | 	def generate_direction(self, u, ndim, points):
243 | 		# choose next base vector
244 | 		d = numpy.zeros(ndim)
245 | 		self.curindex = (self.curindex + 1) % ndim
246 | 		d[self.curindex] = 1
247 | 		return d
248 | 	def __repr__(self):
249 | 		return 'FilteredUnitIterateSliceProposal(scale=%s, adapt=%s)' % (self.scale, self.adapt)
250 | 
251 | class FilteredUnitRandomSliceProposal(FilteredUnitHARMProposal):
252 | 	"""
253 | 	Unit Slice sampling proposal, random component-wise.
254 | 
255 | 	@see BaseProposal
256 | 	"""
257 | 	def generate_direction(self, u, ndim, points):
258 | 		# choose a random base vector
259 | 		d = numpy.zeros(ndim)
260 | 		i = numpy.random.randint(ndim)
261 | 		d[i] = 1
262 | 		return d
263 | 	def __repr__(self):
264 | 		return 'FilteredUnitRandomSliceProposal(scale=%s, adapt=%s)' % (self.scale, self.adapt)
265 | 
266 | class SliceConstrainer(object):
267 | 	"""
268 | 	Markov chain Monte Carlo proposals using the Metropolis update: 
269 | 	Do a number of steps, while adhering to boundary.
270 | 	"""
271 | 	def __init__(self, proposer = MultiScaleProposal(), nsteps = 10, nmaxsteps = 10000):
272 | 		self.proposer = proposer
273 | 		self.sampler = None
274 | 		# number of new directions
275 | 		self.nsteps = nsteps
276 | 		# number of narrowings
277 | 		self.nmaxsteps = nmaxsteps
278 | 	
279 | 	def draw_constrained(self, Lmins, priortransform, loglikelihood, ndim, 
280 | 			live_pointsu, **kwargs):
281 | 		i = numpy.random.randint(len(live_pointsu))
282 | 		ui = live_pointsu[i]
283 | 		xi = None
284 | 		naccepts = 0
285 | 		nevals = 0
286 | 		# new direction
287 | 		for i in range(self.nsteps):
288 | 			self.proposer.new_chain(ui, ndim, live_pointsu, is_inside_unit_filter)
289 | 			# narrow in until we get an accept
290 | 			for n in range(self.nmaxsteps):
291 | 				u = self.proposer.propose(ui, ndim, live_pointsu, is_inside_unit_filter)
292 | 				x = priortransform(u)
293 | 				L = loglikelihood(x)
294 | 				nevals += 1
295 | 				# MH accept rule
296 | 				# accept = L > Li or numpy.random.uniform() < exp(L - Li)
297 | 				# Likelihood-difference independent, because we do
298 | 				# exploration of the prior (full diffusion).
299 | 				# but only accept in constrained region, because that
300 | 				# is what we are exploring now.
301 | 				# accept = L >= Lmin
302 | 				####
303 | 				# For collaborative nested sampling it is sampling 
304 | 				# from the super-contour, so only one needs to work:
305 | 				accept = numpy.any(L >= Lmins)
306 | 				
307 | 				# tell proposer so it can scale
308 | 				self.proposer.accept(accept)
309 | 				if accept:
310 | 					ui, xi, Li = u, x, L
311 | 					naccepts += 1
312 | 					break
313 | 		if numpy.all(Li < Lmins):
314 | 			print()
315 | 			print('ERROR: SliceConstrainer could not find a point matching constraint!')
316 | 			print('ERROR: Proposer stats:')
317 | 			self.proposer.stats()
318 | 			assert numpy.all(Li < Lmins), (Li, Lmins, self.nmaxsteps, numpy.mean(self.proposer.accepts), len(self.proposer.accepts))
319 | 		if xi is None:
320 | 			xi = priortransform(ui)
321 | 		return ui, xi, Li, nevals
322 | 
323 | 	def stats(self):
324 | 		return self.proposer.stats()
325 | 
326 | 


--------------------------------------------------------------------------------
/friends.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import numpy
  3 | import scipy.spatial, scipy.cluster
  4 | import matplotlib.pyplot as plt
  5 | from nested_sampling.clustering import clusterdetect
  6 | from nested_sampling.clustering.neighbors import find_maxdistance, find_rdistance, initial_rdistance_guess, nearest_rdistance_guess
  7 | 
  8 | class FriendsConstrainer(object):
  9 | 	"""
 10 | 	Rejection sampling pre-filtering method based on neighborhood to live points.
 11 | 
 12 | 	"Distant" means in this implementation that the distance to a cluster member
 13 | 	is large. 
 14 | 	The maximum distance to a cluster is computed by considering each
 15 | 	cluster member and its k nearest neighbors in turn, and 
 16 | 	computing the maximum distance.
 17 | 	
 18 | 	:param rebuild_every: After how many iterations should the clustering
 19 | 		distance be re-computed?
 20 | 	    
 21 | 	:param radial:	
 22 | 		if radial = True, then the normal euclidean distance is used.
 23 | 		otherwise, the absolute coordinate difference in each dimension is used.
 24 | 	
 25 | 	:param metric:
 26 | 		metric to use. Use 'chebyshev' for SupFriends, in which case then 
 27 | 		the supremum norm is used. Use 'euclidean' for RadFriends, via 
 28 | 		the euclidean norm.
 29 | 	
 30 | 	:param jackknife:
 31 | 		if True, instead of leaving out a group of live points in
 32 | 		the distance estimate, only one is left out in turn (jackknife resampling
 33 | 		instead of bootstrap resampling).
 34 | 	
 35 | 	:param force_shrink:
 36 | 		if True, the distance can only decrease between sampling steps.
 37 | 	
 38 | 	"""
 39 | 	def __init__(self, rebuild_every = 50, radial = True, metric = 'euclidean', jackknife = False,
 40 | 			force_shrink = False,
 41 | 			hinter = None, verbose = False, 
 42 | 			keep_phantom_points=False, optimize_phantom_points=False):
 43 | 		self.maxima = []
 44 | 		self.iter = 0
 45 | 		self.region = None
 46 | 		self.rebuild_every = rebuild_every
 47 | 		self.radial = radial
 48 | 		self.metric = metric
 49 | 		self.file = None
 50 | 		self.jackknife = jackknife
 51 | 		self.force_shrink = force_shrink
 52 | 		self.hinter = hinter
 53 | 		self.verbose = verbose
 54 | 		if keep_phantom_points:
 55 | 			assert self.force_shrink, 'keep_phantom_points needs force_shrink=True'
 56 | 		self.keep_phantom_points = keep_phantom_points
 57 | 		self.optimize_phantom_points = optimize_phantom_points
 58 | 		self.phantom_points = []
 59 | 		self.phantom_points_Ls = []
 60 | 		self.last_cluster_points = None
 61 | 	
 62 | 	def cluster(self, u, ndim, keepRadius=False):
 63 | 		"""
 64 | 		
 65 | 		"""
 66 | 		if self.verbose: print('building region ...')
 67 | 		if len(u) > 10:
 68 | 			if keepRadius and self.region is not None and 'maxdistance' in self.region:
 69 | 				maxdistance = self.region['maxdistance']
 70 | 			else:
 71 | 				if self.radial:
 72 | 					if self.jackknife:
 73 | 						#maxdistance = initial_rdistance_guess(u, k=1, metric=self.metric)
 74 | 						maxdistance = nearest_rdistance_guess(u, metric=self.metric)
 75 | 					else:
 76 | 						maxdistance = find_rdistance(u, nbootstraps=20, metric=self.metric, verbose=self.verbose)
 77 | 				else:
 78 | 					maxdistance = find_maxdistance(u)
 79 | 			if self.force_shrink and self.region is not None and 'maxdistance' in self.region:
 80 | 				maxdistance = min(maxdistance, self.region['maxdistance'])
 81 | 			if self.keep_phantom_points and len(self.phantom_points) > 0:
 82 | 				# add phantoms to u now
 83 | 				print('including phantom points in cluster members', self.phantom_points)
 84 | 				u = numpy.vstack((u, self.phantom_points))
 85 | 			ulow  = numpy.max([u.min(axis=0) - maxdistance, numpy.zeros(ndim)], axis=0)
 86 | 			uhigh = numpy.min([u.max(axis=0) + maxdistance, numpy.ones(ndim)], axis=0)
 87 | 		else:
 88 | 			maxdistance = None
 89 | 			ulow = numpy.zeros(ndim)
 90 | 			uhigh = numpy.ones(ndim)
 91 | 		if self.verbose: print('setting sampling region:', (ulow, uhigh), maxdistance)
 92 | 		self.region = dict(members=u, maxdistance=maxdistance, ulow=ulow, uhigh=uhigh)
 93 | 		self.generator = None
 94 | 		
 95 | 	def is_inside(self, u):
 96 | 		"""
 97 | 		Check if this new point is near or inside one of our clusters
 98 | 		"""
 99 | 		ndim = len(u)
100 | 		ulow = self.region['ulow']
101 | 		uhigh = self.region['uhigh']
102 | 		if not ((ulow <= u).all() and (uhigh >= u).all()):
103 | 			# does not even lie in our primitive rectangle
104 | 			# do not even need to compute the distances
105 | 			return False
106 | 		
107 | 		members = self.region['members']
108 | 		maxdistance = self.region['maxdistance']
109 | 		
110 | 		# if not initialized: no prefiltering
111 | 		if maxdistance is None:
112 | 			return True
113 | 		
114 | 		# compute distance to each member in each dimension
115 | 		if self.radial:
116 | 			dists = scipy.spatial.distance.cdist(members, [u], metric=self.metric)
117 | 			assert dists.shape == (len(members), 1)
118 | 			dist_criterion = dists < maxdistance
119 | 		else:
120 | 			dists = numpy.abs(u - members)
121 | 			assert dists.shape == (len(members), ndim), (dists.shape, ndim, len(members))
122 | 			# nearer than maxdistance in all dimensions
123 | 			dist_criterion = numpy.all(dists < maxdistance, axis=1)
124 | 			assert dist_criterion.shape == (len(members),), (dist_criterion.shape, len(members))
125 | 		# is it true for at least one?
126 | 		closeby = dist_criterion.any()
127 | 		if closeby:
128 | 			return True
129 | 		return False
130 | 	
131 | 	def are_inside_rect(self, u):
132 | 		"""
133 | 		Check if the new points are near or inside one of our clusters
134 | 		"""
135 | 		ulow = self.region['ulow']
136 | 		uhigh = self.region['uhigh']
137 | 		mask = numpy.logical_and(((ulow <= u).all(axis=1), (uhigh >= u).all(axis=1)))
138 | 	def are_inside_cluster(self, u, ndim):
139 | 		members = self.region['members']
140 | 		maxdistance = self.region['maxdistance']
141 | 		
142 | 		# if not initialized: no prefiltering
143 | 		if maxdistance is None:
144 | 			return numpy.ones(len(u), dtype=bool)
145 | 		
146 | 		# compute distance to each member in each dimension
147 | 		if self.radial:
148 | 			dists = scipy.spatial.distance.cdist(members, u, metric=self.metric)
149 | 			assert dists.shape == (len(members), len(u))
150 | 			dist_criterion = dists < maxdistance
151 | 		else:
152 | 			raise NotImplementedError()
153 | 		# is it true for at least one?
154 | 		closeby = dist_criterion.any(axis=0)
155 | 		return closeby
156 | 	
157 | 	def generate(self, ndim):
158 | 		it = True
159 | 		verbose = False and self.verbose
160 | 		ntotal = 0
161 | 		# largest maxdistance where generating from full space makes sense
162 | 		full_maxdistance = 0.5 * (0.01)**(1./ndim)
163 | 		while True:
164 | 			maxdistance = self.region['maxdistance']
165 | 			if maxdistance is None:
166 | 				# do a prefiltering rejection sampling first
167 | 				u = numpy.random.uniform(self.region['ulow'], self.region['uhigh'], size=ndim)
168 | 				yield u, ntotal
169 | 				ntotal = 0
170 | 				continue
171 | 			members = self.region['members']
172 | 			it = numpy.random.uniform() < 0.01
173 | 			# depending on the region size compared to 
174 | 			# the total space, one of the two methods will
175 | 			# be more efficient
176 | 			if it or not self.radial or maxdistance > full_maxdistance:
177 | 				it = True
178 | 				# for large regions
179 | 				# do a prefiltering rejection sampling first
180 | 				us = numpy.random.uniform(self.region['ulow'], self.region['uhigh'], size=(100, ndim))
181 | 				ntotal += 100
182 | 				mask = self.are_inside_cluster(self.transform_points(us), ndim)
183 | 				if not mask.any():
184 | 					continue
185 | 				us = us[mask]
186 | 				#indices = numpy.arange(len(mask))[mask]
187 | 				#for i in indices:
188 | 				#	u = us[indices[i],:]
189 | 				for u in us:
190 | 					yield u, ntotal
191 | 					ntotal = 0
192 | 			else:
193 | 				# for small regions
194 | 				# draw from points
195 | 				us = members[numpy.random.randint(0, len(members), 100),:]
196 | 				ntotal += 100
197 | 				if verbose: print('chosen point', us)
198 | 				if self.metric == 'euclidean':
199 | 					# draw direction around it
200 | 					direction = numpy.random.normal(0, 1, size=(100, ndim))
201 | 					direction = direction / ((direction**2).sum(axis=1)**0.5).reshape((-1,1))
202 | 					if verbose: print('chosen direction', direction)
203 | 					# choose radius: volume gets larger towards the outside
204 | 					# so give the correct weight with dimensionality
205 | 					radius = maxdistance * numpy.random.uniform(0, 1, size=(100,1))**(1./ndim)
206 | 					us = us + direction * radius
207 | 				else:
208 | 					assert self.metric == 'chebyshev'
209 | 					us = us + numpy.random.uniform(-maxdistance, maxdistance, size=(100, ndim))
210 | 				if verbose: print('using point', u)
211 | 				inside = numpy.logical_and((us >= 0).all(axis=1), (us <= 1).all(axis=1))
212 | 				if not inside.any():
213 | 					if verbose: print('outside boundaries', us, direction, maxdistance)
214 | 					continue
215 | 				us = us[inside]
216 | 				# count the number of points this is close to
217 | 				dists = scipy.spatial.distance.cdist(members, us, metric=self.metric)
218 | 				assert dists.shape == (len(members), len(us))
219 | 				nnear = (dists < maxdistance).sum(axis=0)
220 | 				if verbose: print('near', nnear)
221 | 				#ntotal += 1
222 | 				# accept with probability 1./nnear
223 | 				coin = numpy.random.uniform(size=len(us))
224 | 				
225 | 				accept = coin < 1. / nnear
226 | 				if not accept.any():
227 | 					if verbose: print('probabilistic rejection due to overlaps')
228 | 					continue
229 | 				us = us[accept]
230 | 				for u in us:
231 | 					yield u, ntotal
232 | 					ntotal = 0
233 | 			
234 | 	def transform_new_points(self, us):
235 | 		return us
236 | 	def transform_points(self, us):
237 | 		return us
238 | 	def transform_point(self, u):
239 | 		return u
240 | 	
241 | 	def rebuild(self, u, ndim, keepRadius=False):
242 | 		if self.last_cluster_points is None or \
243 | 			len(self.last_cluster_points) != len(u) or \
244 | 			numpy.any(self.last_cluster_points != u):
245 | 			self.cluster(u=self.transform_new_points(u), ndim=ndim, keepRadius=keepRadius)
246 | 			self.last_cluster_points = u
247 | 		
248 | 			# reset generator
249 | 			self.generator = self.generate(ndim=ndim)
250 | 	def debug(self, ndim):
251 | 		if self.file is None:
252 | 			#self.file = open("friends_debug.txt", "a")
253 | 			import tempfile
254 | 			filename = tempfile.mktemp(dir='',
255 | 				prefix='friends%s-%s_' % (
256 | 				'1' if self.jackknife else '',
257 | 				self.metric))
258 | 			self.file = open(filename, 'w')
259 | 			self.file.write("{} {} {}\n".format(self.iter, self.region['maxdistance'], len(self.region['members'])))
260 | 		self.file.write("{} {} {} {}\n".format(self.iter, self.region['maxdistance'], len(self.region['members']), ndim))
261 | 	def debugplot(self, u = None):
262 | 		print('creating plot...')
263 | 		n = len(self.region['members'][0]) / 2
264 | 		plt.figure(figsize=(6, n/2*4+1))
265 | 		m = self.region['members']
266 | 		d = self.region['maxdistance']
267 | 		for i in range(n):
268 | 			plt.subplot(numpy.ceil(n / 2.), 2, 1+i)
269 | 			j = i * 2
270 | 			k = i * 2 + 1
271 | 			plt.plot(m[:,j], m[:,k], 'x', color='b', ms=1)
272 | 			plt.gca().add_artist(plt.Circle((m[0,j], m[0,k]), d, color='g', alpha=0.3))
273 | 			if u is not None:
274 | 				plt.plot(u[j], u[k], 's', color='r')
275 | 				plt.gca().add_artist(plt.Circle((u[j], u[k]), d, color='r', alpha=0.3))
276 | 		prefix='friends%s-%s_' % ('1' if self.jackknife else '', self.metric)
277 | 		plt.savefig(prefix + 'cluster.pdf')
278 | 		plt.close()
279 | 		print('creating plot... done')
280 | 	
281 | 	def draw_constrained(self, Lmins, priortransform, loglikelihood, live_pointsu, ndim, max_draws=None, **kwargs):
282 | 		# previous is [[u, x, L], ...]
283 | 		self.iter += 1
284 | 		rebuild = self.iter % self.rebuild_every == 1
285 | 		if rebuild or self.region is None:
286 | 			self.rebuild(numpy.asarray(live_pointsu), ndim, keepRadius=False)
287 | 		if self.generator is None:
288 | 			self.generator = self.generate(ndim=ndim)
289 | 		ntoaccept = 0
290 | 		ntotalsum = 0
291 | 		while True:
292 | 			for u, ntotal in self.generator:
293 | 				assert (u >= 0).all() and (u <= 1).all(), u
294 | 				ntotalsum += ntotal
295 | 				
296 | 				if self.hinter is not None:
297 | 					hints = self.hinter(u)
298 | 					if len(hints) == 0:
299 | 						# no way
300 | 						continue
301 | 					if len(hints) > 1:
302 | 						# choose a random solution, by size
303 | 						raise NotImplementedError("multiple solutions not implemented")
304 | 						hints = hints[numpy.random.randInt(len(hints))]
305 | 					else:
306 | 						hints = hints[0]
307 | 				
308 | 					for i, lo, hi in hints:
309 | 						u[i] = numpy.random.uniform(lo, hi)
310 | 					if not is_inside(self.transform_point(u)):
311 | 						# not sure if this is a good idea
312 | 						# it means we dont completely trust
313 | 						# the hinting function
314 | 						continue
315 | 				
316 | 				x = priortransform(u)
317 | 				L = loglikelihood(x)
318 | 				ntoaccept += 1
319 | 				
320 | 				if numpy.any(L > Lmins) or (max_draws is not None and ntotalsum > max_draws):
321 | 					# yay, we win
322 | 					if ntotalsum > 10000: 
323 | 						if self.verbose: 
324 | 							print('sampled %d points, evaluated %d ' % (ntotalsum, ntoaccept))
325 | 							#self.debugplot(u)
326 | 					return u, x, L, ntoaccept
327 | 				
328 | 				# if running very inefficient, optimize clustering 
329 | 				#     if we haven't done so at the start
330 | 				if not rebuild and ntoaccept > 1000:
331 | 					#self.debugplot(u)
332 | 					break
333 | 			rebuild = True
334 | 			self.rebuild(numpy.asarray(live_pointsu), ndim, keepRadius=False)
335 | 
336 | if __name__ == '__main__':
337 | 	friends = FriendsConstrainer(radial = True)
338 | 	
339 | 	u = numpy.random.uniform(0.45, 0.55, size=1000).reshape((-1, 2))
340 | 	ndim = 2
341 | 	friends.cluster(u, ndim=ndim)
342 | 	Lmin = -1
343 | 	rv = scipy.stats.norm(0.515, 0.03)
344 | 	def priortransform(x): return x
345 | 	def loglikelihood(x): return rv.logpdf(x).sum()
346 | 	previous = []
347 | 	colors = ['r', 'g', 'orange']
348 | 	plt.figure("dists", figsize=(7,4))
349 | 	plt.figure("plane", figsize=(5,5))
350 | 	plt.plot(u[:,0], u[:,1], 'x')
351 | 	Lmins = [-5, 2, 2.5] #, 2.58]
352 | 	for j, (Lmin, color) in enumerate(zip(numpy.array(Lmins)*ndim, colors)):
353 | 		values = []
354 | 		for i in range(200):
355 | 			friends.iter = 4 # avoid rebuild
356 | 			u, x, L, ntoaccept = friends.draw_constrained(Lmin, priortransform, loglikelihood, previous, ndim)
357 | 			plt.figure("plane")
358 | 			plt.plot(u[0], u[1], '+', color=color)
359 | 			values.append(u)
360 | 		values = numpy.array(values)
361 | 		plt.figure("dists")
362 | 		for k in range(ndim):
363 | 			plt.subplot(1, ndim, k + 1)
364 | 			plt.title('Lmin={}, dim={}'.format(Lmin, k))
365 | 			plt.hist(values[:,k], cumulative=True, normed=True, 
366 | 				color=color, bins=1000, histtype='step')
367 | 	plt.figure("plane")
368 | 	plt.savefig('friends_sampling_test.pdf', bbox_inches='tight')
369 | 	plt.close()
370 | 	plt.figure("dists")
371 | 	plt.savefig('friends_sampling_test_dists.pdf', bbox_inches='tight')
372 | 	plt.close()
373 | 	
374 | 	# another test: given a group of samples, assert that only neighbors are evaluated
375 | 	
376 | 	r = numpy.random.uniform(0.2, 0.25, size=400)
377 | 	phi = numpy.random.uniform(0, 1, size=400)**10 * 2*numpy.pi
378 | 	u = numpy.transpose([0.5 + r*numpy.cos(phi), 0.5 + r*numpy.sin(phi)])
379 | 	friends.cluster(u, ndim=2)
380 | 	plt.figure(figsize=(10,5))
381 | 	plt.subplot(1, 2, 1)
382 | 	plt.plot(u[:,0], u[:,1], 'x')
383 | 	suggested = []
384 | 	def loglikelihood(x):
385 | 		r = ((x[0] - 0.5)**2 + (x[1] - 0.5)**2)**0.5
386 | 		#assert r < 0.5
387 | 		#assert r > 0.1
388 | 		suggested.append(r)
389 | 		if r > 0.2 and r < 0.25:
390 | 			plt.plot(x[0], x[1], 'o', color='green')
391 | 			return 100
392 | 		plt.plot(x[0], x[1], 'o', color='red')
393 | 		return -100
394 | 	
395 | 	ndim = 2
396 | 	taken = []
397 | 	for i in range(100):
398 | 		friends.iter = 4 # avoid rebuild
399 | 		u, x, L, ntoaccept = friends.draw_constrained(Lmin, priortransform, loglikelihood, previous, ndim)
400 | 		r = ((x[0] - 0.5)**2 + (x[1] - 0.5)**2)**0.5
401 | 		taken.append(r)
402 | 		print('suggested:', u)
403 | 	plt.subplot(1, 2, 2)
404 | 	plt.hist(taken, cumulative=True, normed=True, 
405 | 			color='g', bins=1000, histtype='step')
406 | 	plt.hist(suggested, cumulative=True, normed=True, 
407 | 			color='r', bins=1000, histtype='step')
408 | 	#x = numpy.linspace(0, 1, 400)
409 | 	#y = x**ndim - (x - min(suggested) / max(suggested))**ndim
410 | 	#y /= max(y)
411 | 	#plt.plot(x * (max(suggested) - min(suggested)) + min(suggested), y, '--', color='grey')
412 | 	
413 | 	plt.savefig('friends_sampling_test_sampling.pdf', bbox_inches='tight')
414 | 	plt.close()
415 | 	
416 | 	
417 | 	
418 | 
419 | 


--------------------------------------------------------------------------------
/multi_nested_sampler.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Sampler
  5 | ----------
  6 | 
  7 | Copyright (c) 2017 Johannes Buchner
  8 | 
  9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 10 | 
 11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 12 | 
 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 14 | 
 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 16 | 
 17 | 
 18 | 
 19 | """
 20 | import numpy
 21 | from numpy import exp, log, log10, pi
 22 | import progressbar
 23 | import igraph
 24 | from collections import defaultdict
 25 | 
 26 | status_symbols = {
 27 | 	0:' ', 
 28 | 	1:u"\u2581", 
 29 | 	2:u"\u2582", 
 30 | 	3:u"\u2583", 
 31 | 	4:u"\u2584", 5:u"\u2584", 
 32 | 	6:u"\u2585", 7:u"\u2585", 
 33 | 	8:u"\u2586", 9:u"\u2586", 
 34 | 	10:u"\u2587", 11:u"\u2587", 12:u"\u2587", 13:u"\u2587",  14:u"\u2587", 
 35 | 	15:u"\u2588", 16:u"\u2588", 17:u"\u2588", 18:u"\u2588", 19:u"\u2588", 
 36 | }
 37 | 
 38 | def find_nsmallest(n, arr1, arr2):
 39 | 	# old version
 40 | 	arr = numpy.hstack((arr1, arr2))
 41 | 	arr.sort()
 42 | 	return arr[n]
 43 | 
 44 | def find_nsmallest(n, arr1, arr2):
 45 | 	# new version, faster because it does not need to sort everything
 46 | 	arr = numpy.concatenate((arr1, arr2))
 47 | 	return numpy.partition(arr, n)[n]
 48 | 
 49 | class MultiNestedSampler(object):
 50 | 	"""
 51 | 	Samples points, always replacing the worst live point, forever.
 52 | 	
 53 | 	This implementation always removes and replaces one point (r=1),
 54 | 	and does so linearly (no parallelisation).
 55 | 	
 56 | 	This class is implemented as an iterator.
 57 | 	"""
 58 | 	def __init__(self, priortransform, multi_loglikelihood, superset_draw_constrained, individual_draw_constrained, draw_constrained, 
 59 | 			ndata, ndim, nlive_points = 200, draw_global_uniform = None,
 60 | 			nsuperset_draws = 10, use_graph=False):
 61 | 		self.nlive_points = nlive_points
 62 | 		self.nsuperset_draws = nsuperset_draws
 63 | 		self.priortransform = priortransform
 64 | 		self.real_multi_loglikelihood = multi_loglikelihood
 65 | 		self.multi_loglikelihood = multi_loglikelihood
 66 | 		self.superset_draw_constrained = superset_draw_constrained
 67 | 		self.individual_draw_constrained = individual_draw_constrained
 68 | 		self.draw_constrained = draw_constrained
 69 | 		#self.samples = []
 70 | 		self.global_iter = 0
 71 | 		self.ndim = ndim
 72 | 		self.ndata = ndata
 73 | 		self.superpoints = []
 74 | 		# lazy building of graph
 75 | 		self.use_graph = use_graph
 76 | 		self.membership_graph = None
 77 | 		self.last_graph = None
 78 | 		self.last_graph_selection = None
 79 | 		self.point_data_map = None
 80 | 		# draw N starting points from prior
 81 | 		pointpile = []
 82 | 		pointpilex = []
 83 | 		live_pointsp = [None] * nlive_points
 84 | 		#live_pointsu = [None] * nlive_points
 85 | 		#live_pointsx = [None] * nlive_points
 86 | 		live_pointsL = [None] * nlive_points
 87 | 		
 88 | 		print('generating initial %d live points' % (nlive_points))
 89 | 		data_mask = numpy.ones(ndata) == 1
 90 | 
 91 | 		for i in range(nlive_points):
 92 | 			u = self.draw_global_uniform()
 93 | 			x = priortransform(u)
 94 | 			L = multi_loglikelihood(x, data_mask=data_mask)
 95 | 			p = len(pointpile)
 96 | 			live_pointsp[i] = [p]*ndata
 97 | 			pointpile.append(u)
 98 | 			pointpilex.append(x)
 99 | 			#self.global_iter += 1
100 | 			#live_pointsu[i] = [u]*ndata
101 | 			#live_pointsx[i] = [x]*ndata
102 | 			live_pointsL[i] = L
103 | 			self.superpoints.append(p)
104 | 			#self.samples.append([live_pointsu[i], live_pointsx[i], live_pointsL[i]])
105 | 		print('generated %d live points' % (nlive_points))
106 | 		self.pointpile = numpy.array(pointpile)
107 | 		self.pointpilex = numpy.array(pointpilex)
108 | 		self.live_pointsp = numpy.array(live_pointsp)
109 | 		#self.live_pointsu = numpy.array(live_pointsu)
110 | 		#self.live_pointsx = numpy.array(live_pointsx)
111 | 		self.live_pointsL = numpy.array(live_pointsL)
112 | 		self.Lmax = self.live_pointsL.max(axis=0)
113 | 		self.data_mask_all = numpy.ones(self.ndata) == 1
114 | 		self.real_data_mask_all = numpy.ones(self.ndata) == 1
115 | 		assert self.Lmax.shape == (ndata,)
116 | 		self.ndraws = nlive_points
117 | 		self.shelves = [[] for _ in range(ndata)]
118 | 		
119 | 		self.dump_iter = 1
120 | 	
121 | 	def draw_global_uniform(self):
122 | 		return numpy.random.uniform(0, 1, size=self.ndim)
123 | 	
124 | 	def get_unique_points(self, allpoints):
125 | 		d = allpoints.reshape((-1,self.ndim))
126 | 		b = d.view(numpy.dtype((numpy.void, d.dtype.itemsize * d.shape[1])))
127 | 		_, idx = numpy.unique(b, return_index=True)
128 | 		return d[idx]
129 | 	
130 | 	def get_unique_pointsp(self, allpoints):
131 | 		idx = numpy.unique(allpoints)
132 | 		return self.pointpile[idx], idx
133 | 	
134 | 	def prepare(self):
135 | 		live_pointsL = self.live_pointsL
136 | 		Lmins = live_pointsL.min(axis=0)
137 | 		Lmini = live_pointsL.argmin(axis=0)
138 | 		# clean up shelves
139 | 		for d in range(self.ndata):
140 | 			self.shelves[d] = [(pj, uj, xj, Lj) for (pj, uj, xj, Lj) in self.shelves[d] if Lj > Lmins[d]]
141 | 		all_global_live_pointsu, all_global_live_pointsp = self.get_unique_pointsp(self.live_pointsp)
142 | 		all_Lmin = live_pointsL.min()
143 | 		return all_global_live_pointsu, all_global_live_pointsp, all_Lmin, Lmins, Lmini
144 | 	
145 | 	def shelf_status(self):
146 | 		print('shelf status: %s' % ''.join([status_symbols.get(len(shelf), 'X') for shelf in self.shelves]))
147 | 	
148 | 	def cut_down(self, surviving):
149 | 		# delete some data sets
150 | 		self.live_pointsp = self.live_pointsp[:,surviving]
151 | 		self.live_pointsL = self.live_pointsL[:,surviving]
152 | 		self.shelves = [shelf for s, shelf in zip(surviving, self.shelves) if s]
153 | 		self.ndata = surviving.sum()
154 | 		self.Lmax = self.live_pointsL.max(axis=0)
155 | 		self.data_mask_all = numpy.ones(self.ndata) == 1
156 | 		self.real_data_mask_all[self.real_data_mask_all] = surviving
157 | 		def multi_loglikelihood_subset(params, mask):
158 | 			subset_mask = self.real_data_mask_all.copy()
159 | 			subset_mask[subset_mask] = mask
160 | 			return self.real_multi_loglikelihood(params, subset_mask)
161 | 			
162 | 		self.multi_loglikelihood = multi_loglikelihood_subset
163 | 		# rebuild graph because igraph does not support renaming nodes
164 | 		self.membership_graph = None
165 | 		self.point_data_map = None
166 | 		self.last_graph = None
167 | 		self.last_graph_selection = None
168 | 		#if self.point_data_map is not None:
169 | 		#	for d, s in enumerate(surviving)
170 | 		#		if s: continue
171 | 		#		for p in self.live_pointsp[:,d]:
172 | 		#			self.point_data_map[p].add(d)
173 | 			
174 | 	
175 | 	def rebuild_graph(self):
176 | 		if self.membership_graph is None:
177 | 			print('constructing graph...')
178 | 			graph = igraph.Graph(directed=False)
179 | 			# pointing from live_point to member
180 | 			for i in numpy.where(self.data_mask_all)[0]:
181 | 				graph.add_vertex("n%d" % i, id=i, vtype=0)
182 | 			for p in range(len(self.pointpile)):
183 | 				graph.add_vertex("p%d" % p, id=p, vtype=1)
184 | 			edges = []
185 | 			for i in numpy.where(self.data_mask_all)[0]:
186 | 				#graph.add_vertex("n%d" % i, id=i, vtype=0)
187 | 				edges += [("n%d" % i, "p%d" % p) for p in self.live_pointsp[:,i]]
188 | 			print('connecting graph ...')
189 | 			graph.add_edges(edges)
190 | 			print('constructing graph done.')
191 | 			self.membership_graph = graph
192 | 	
193 | 	def rebuild_map(self):
194 | 		if self.point_data_map is None:
195 | 			print('constructing map...')
196 | 			# pointing from live_point to member
197 | 			self.point_data_map = defaultdict(set)
198 | 			for i in range(self.ndata):
199 | 				for p in self.live_pointsp[:,i]:
200 | 					self.point_data_map[p].add(i)
201 | 			print('constructing map done.')
202 | 	
203 | 	
204 | 	def generate_subsets_nograph(self, data_mask, allp):
205 | 		# generate data subsets which share points.
206 | 		selected = numpy.where(data_mask)[0]
207 | 		all_selected = len(selected) == len(data_mask)
208 | 		firstmember = selected[0]
209 | 		if len(selected) == 1:
210 | 			# trivial case:
211 | 			# requested only a single slot, so return its live points
212 | 			yield data_mask, self.live_pointsp[:,firstmember]
213 | 			return
214 | 		
215 | 		if not all_selected:
216 | 			allp = numpy.unique(self.live_pointsp[:,selected].flatten())
217 | 		
218 | 		if len(allp) < 2 * self.nlive_points:
219 | 			print('generate_subsets: only %d unique live points known, so connected' % len(allp))
220 | 			# if fewer than 2*nlive unique points are known, 
221 | 			# some must be shared between data sets.
222 | 			# So no disjoint data sets
223 | 			yield data_mask, allp
224 | 			return
225 | 		
226 | 		if len(self.superpoints) > 0:
227 | 			print('generate_subsets: %d superpoints known, so connected' % len(self.superpoints))
228 | 			# there are some points shared by all data sets
229 | 			# so no disjoint data sets
230 | 			yield data_mask, allp
231 | 			return
232 | 		
233 | 		self.rebuild_map()
234 | 		to_handle = data_mask.copy()
235 | 		while to_handle.any():
236 | 			firstmember = numpy.where(to_handle)[0][0]
237 | 			to_handle[firstmember] = False
238 | 			members = [firstmember]
239 | 			# get live points of this member
240 | 			member_live_pointsp = self.live_pointsp[:,firstmember].tolist()
241 | 			# look through to_handle for entries and check if they have the points
242 | 			i = 0
243 | 			while True:
244 | 				if i >= len(member_live_pointsp) or not to_handle.any():
245 | 					break
246 | 				p = member_live_pointsp[i]
247 | 				newmembers = [m for m in self.point_data_map[p] if to_handle[m]]
248 | 				print(newmembers)
249 | 				members += newmembers
250 | 				for newp in numpy.unique(self.live_pointsp[:,newmembers]):
251 | 					if newp not in member_live_pointsp:
252 | 						member_live_pointsp.append(newp)
253 | 				to_handle[newmembers] = False
254 | 				i = i + 1
255 | 			
256 | 			# now we have our members and live points
257 | 			member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
258 | 			member_data_mask[members] = True
259 | 			#print 'returning:', member_data_mask, member_live_pointsp
260 | 			yield member_data_mask, member_live_pointsp
261 | 
262 | 	def generate_subsets_graph(self, data_mask, allp):
263 | 		# generate data subsets which share points.
264 | 		selected = numpy.where(data_mask)[0]
265 | 		all_selected = len(selected) == len(data_mask)
266 | 		firstmember = selected[0]
267 | 		if len(selected) == 1:
268 | 			# trivial case:
269 | 			# requested only a single slot, so return its live points
270 | 			yield data_mask, self.live_pointsp[:,firstmember]
271 | 			return
272 | 		
273 | 		if not all_selected:
274 | 			allp = numpy.unique(self.live_pointsp[:,selected].flatten())
275 | 		
276 | 		if len(allp) < 2 * self.nlive_points:
277 | 			print('generate_subsets: only %d unique live points known, so connected' % len(allp))
278 | 			# if fewer than 2*nlive unique points are known, 
279 | 			# some must be shared between data sets.
280 | 			# So no disjoint data sets
281 | 			yield data_mask, allp
282 | 			return
283 | 		
284 | 		if len(self.superpoints) > 0:
285 | 			print('generate_subsets: %d superpoints known, so connected' % len(self.superpoints))
286 | 			# there are some points shared by all data sets
287 | 			# so no disjoint data sets
288 | 			yield data_mask, allp
289 | 			return
290 | 		
291 | 		self.rebuild_graph()
292 | 		if all_selected:
293 | 			graph = self.membership_graph
294 | 		else:
295 | 			graph = self._generate_subsets_graph_create_subgraph(data_mask, allp)
296 | 		
297 | 		for sub_data_mask, sub_points in self._generate_subsets_graph_subgraphs(graph, data_mask, all_selected, allp):
298 | 			yield sub_data_mask, sub_points
299 | 	
300 | 	def _generate_subsets_graph_create_subgraph(self, data_mask, allp):
301 | 		# need to look at the subgraph with only the selected
302 | 		# dataset nodes
303 | 		members  = ['n%d' % v for v, sel in enumerate(data_mask) if sel]
304 | 		members += ['p%d' % p for p in allp]
305 | 		# if the previous graph had all these nodes (or more)
306 | 		if self.last_graph is not None and self.last_graph_selection[data_mask].all():
307 | 			# re-using previously cut-down graph
308 | 			# this may speed things up because we have to cut less
309 | 			print('generate_subsets: re-using previous graph')
310 | 			prevgraph = self.last_graph
311 | 		else:
312 | 			# not a super-set, need to start with whole graph
313 | 			prevgraph = self.membership_graph
314 | 		
315 | 		graph = prevgraph.subgraph(members)
316 | 		self.last_graph = graph
317 | 		self.last_graph_selection = data_mask
318 | 		return graph
319 | 	
320 | 	
321 | 	def _generate_subsets_graph_subgraphs(self, graph, data_mask, all_selected, allp):
322 | 		# we could test here with graph.is_connected() first
323 | 		# but if it is connected,  then it takes as long as clusters()
324 | 		# and if it not connected, we have to call clusters() anyways.
325 | 		subgraphs = graph.clusters()
326 | 		assert len(subgraphs) > 0
327 | 		
328 | 		# single-node subgraphs can occur when 
329 | 		# a live point is not used anymore
330 | 		# a real subgraph has to have a data point and its live points, 
331 | 		# so at least nlive_points+1 entries
332 | 		subgraphs = [subgraph for subgraph in subgraphs if len(subgraph) > 1]
333 | 		
334 | 		if len(subgraphs) == 1:
335 | 			yield data_mask, allp
336 | 			return
337 | 		
338 | 		# then identify disjoint subgraphs
339 | 		for subgraph in subgraphs:
340 | 			member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
341 | 			member_live_pointsp = []
342 | 			for vi in subgraph:
343 | 				att = graph.vs[vi].attributes()
344 | 				#print '    ', att
345 | 				if att['vtype'] == 0:
346 | 					i = att['id']
347 | 					member_data_mask[i] = True
348 | 				else:
349 | 					p = att['id']
350 | 					member_live_pointsp.append(p)
351 | 			if member_data_mask.any():
352 | 				yield member_data_mask, member_live_pointsp
353 | 			#else:
354 | 			#	print 'skipping node-free subgraph:', [self.membership_graph.vs[vi].attributes()['name'] for vi in subgraph]
355 | 			#	print graph
356 | 
357 | 	def __next__(self):
358 | 		# select worst point, lowest likelihood and replace
359 | 		live_pointsL = self.live_pointsL
360 | 		superset_membersets = None
361 | 		
362 | 		print('iteration %d' % self.global_iter)
363 | 		all_global_live_pointsu, all_global_live_pointsp, all_Lmin, Lmins, Lmini = self.prepare()
364 | 		iter = 0
365 | 		while True:
366 | 			iter += 1
367 | 			empty_mask = numpy.array([len(self.shelves[d]) == 0 for d in range(self.ndata)])
368 | 			if not empty_mask.any():
369 | 				# all have something in their shelves
370 | 				break
371 | 			
372 | 			# if superset draws enabled, do some of these first. 
373 | 			sample_subset = iter > self.nsuperset_draws
374 | 			
375 | 			if sample_subset:
376 | 				# subset draw: focus on filling empty ones
377 | 				data_mask = empty_mask
378 | 				# cut_level = 5 4 3 2 1 0 0 0 0 
379 | 				#cut_level = max(0, 5 - (iter - self.nsuperset_draws))
380 | 				#data_mask = numpy.array([len(self.shelves[d]) <= cut_level for d in range(self.ndata)])
381 | 				global_live_pointsu, global_live_pointsp = self.get_unique_pointsp(self.live_pointsp[:,data_mask])
382 | 			else:
383 | 				# super-set draw, try to fill all/any
384 | 				data_mask = self.data_mask_all
385 | 				global_live_pointsu = all_global_live_pointsu
386 | 				global_live_pointsp = all_global_live_pointsp
387 | 				Lmin = all_Lmin
388 | 			use_rebuilding_draw = sample_subset
389 | 			
390 | 			self.shelf_status()
391 | 			# if the data sets do not share any live points, 
392 | 			# it does not make sense to analyse them jointly
393 | 			# so we break them up into membersets here, stringing
394 | 			# together those that do.
395 | 			
396 | 			# if a previous superset draw did the decomposition already,
397 | 			# just reuse it
398 | 			if superset_membersets is not None and not sample_subset:
399 | 				membersets = superset_membersets
400 | 			elif self.use_graph:
401 | 				membersets = list(self.generate_subsets_graph(data_mask, global_live_pointsp))
402 | 			else:
403 | 				membersets = list(self.generate_subsets_nograph(data_mask, global_live_pointsp))
404 | 			
405 | 			if not sample_subset and superset_membersets is None:
406 | 				# store superset decomposition
407 | 				superset_membersets = membersets
408 | 			
409 | 			assert len(membersets) > 0
410 | 			if len(membersets) > 1:
411 | 				# if the data is split, regions need to be 
412 | 				# rebuilt for every group
413 | 				use_rebuilding_draw = True
414 | 			
415 | 			for ji, (joint_data_mask, joint_live_pointsp) in enumerate(membersets):
416 | 				print('live point set %d/%d: %d from %d datasets, %s' % (
417 | 					ji+1, len(membersets), len(joint_live_pointsp), 
418 | 					joint_data_mask.sum(), 
419 | 					'focussed set constrained draw' if sample_subset else 'super-set constrained draw'))
420 | 				joint_live_pointsu = self.pointpile[joint_live_pointsp]
421 | 				#print 'members:', joint_data_mask.shape, joint_live_pointsu.shape
422 | 				max_draws = 1000
423 | 				njoints = joint_data_mask.sum()
424 | 				joint_indices = numpy.where(joint_data_mask)[0]
425 | 				firstd = joint_indices[0]
426 | 				# if it is the only dataset and we need an entry here, try longer
427 | 				if njoints == 1 and len(self.shelves[firstd]) == 0:
428 | 					max_draws = 100000
429 | 				
430 | 				# if there is more than one memberset and this one is full, 
431 | 				# we do not need to do anything
432 | 				# this should be a rare occasion
433 | 				if len(membersets) > 1 and not sample_subset and all([len(self.shelves[d]) > 0 for d in joint_indices]):
434 | 					continue
435 | 
436 | 				# Lmin needs to be corrected. It is the lowest L, but
437 | 				# this may not be useful for making a draw. 
438 | 				Lmins_higher = Lmins[joint_indices].copy()
439 | 				for j, d in enumerate(joint_indices):
440 | 					n = len(self.shelves[d])
441 | 					if n == 0:
442 | 						# relevant only for non-empty shelves
443 | 						continue
444 | 					# to insert at position n
445 | 					# there must be n elements smaller
446 | 					# in self.shelves[d] and self.live_pointsL[:,d]
447 | 					Lmins_higher[j] = find_nsmallest(n, live_pointsL[:,d], [Li for _, _, _, Li in self.shelves[d]])
448 | 				
449 | 				if njoints == 1:
450 | 					# only a single data set, we can keep the same region for longer
451 | 					real_firstd = numpy.where(self.real_data_mask_all)[0][firstd]
452 | 					draw_constrained = self.individual_draw_constrained(real_firstd, self.global_iter, sampler=self)
453 | 				elif use_rebuilding_draw:
454 | 					# a subset, perhaps different then last iteration
455 | 					# need to reconstruct the region from scratch
456 | 					real_joint_indices = numpy.where(self.real_data_mask_all)[0][joint_indices]
457 | 					draw_constrained = self.draw_constrained(real_joint_indices, self.real_data_mask_all, joint_live_pointsp, self.global_iter)
458 | 				else:
459 | 					# full data set, can keep longer
460 | 					draw_constrained = self.superset_draw_constrained
461 | 				
462 | 				uj, xj, Lj, n = draw_constrained(
463 | 					Lmins=Lmins_higher, 
464 | 					priortransform=self.priortransform, 
465 | 					loglikelihood=lambda params: self.multi_loglikelihood(params, joint_data_mask), 
466 | 					ndim=self.ndim,
467 | 					draw_global_uniform=self.draw_global_uniform,
468 | 					live_pointsu = joint_live_pointsu,
469 | 					max_draws=max_draws,
470 | 					iter=self.global_iter,
471 | 					nlive_points=self.nlive_points
472 | 				)
473 | 				
474 | 				# we have a new draw
475 | 				self.ndraws += int(n)
476 | 				ppi = len(self.pointpile)
477 | 				if self.membership_graph is not None:
478 | 					self.membership_graph.add_vertex("p%d" % ppi, id=ppi, vtype=1)
479 | 				self.pointpile = numpy.vstack((self.pointpile, [uj]))
480 | 				self.pointpilex = numpy.vstack((self.pointpilex, [xj]))
481 | 				nfilled = 0
482 | 				for j, d in enumerate(numpy.where(joint_data_mask)[0]):
483 | 					if Lj[j] > Lmins_higher[j]:
484 | 						self.shelves[d].append((ppi, uj, xj, Lj[j]))
485 | 						nfilled += 1
486 | 				if nfilled == self.ndata:
487 | 					# new point is a superpoint, accepted by all
488 | 					self.superpoints.append(ppi)
489 | 				print('accept after %d tries, filled %d shelves' % (n, nfilled))
490 | 			
491 | 			# we got a new point
492 | 			#print 'new point:', Lmins[data_mask], (Lj>Lmins[data_mask])*1
493 | 		
494 | 		# pop: for every data entry, advance one point
495 | 		print('advancing all...')
496 | 		self.global_iter += 1
497 | 		pj_old = self.live_pointsp[Lmini,numpy.arange(self.ndata)]
498 | 		uis = self.pointpile[pj_old]
499 | 		xis = self.pointpilex[pj_old]
500 | 		Lis = live_pointsL[Lmini, numpy.arange(self.ndata)]
501 | 		if self.membership_graph is not None:
502 | 			print('    deleting edges...')
503 | 			self.membership_graph.delete_edges([("n%d" % d, "p%d" % pj) for d, pj in enumerate(pj_old)])
504 | 		if self.point_data_map is not None:
505 | 			for d, pj in enumerate(pj_old):
506 | 				self.point_data_map[pj].remove(d)
507 | 		# point assignment changed, so can not re-use any more directly
508 | 		self.last_graph = None
509 | 		self.last_graph_selection = None
510 | 		if self.superpoints:
511 | 			print('    dropping superpoints ...')
512 | 			for pj in numpy.unique(pj_old):
513 | 				# no longer a superpoint, because it is no
514 | 				# longer shared by all data sets
515 | 				if pj in self.superpoints:
516 | 					self.superpoints.remove(pj)
517 | 		new_edges = None if self.membership_graph is None else []
518 | 		print('    replacing dead points ...')
519 | 		for d in range(self.ndata):
520 | 			i = Lmini[d]
521 | 			pj, uj, xj, Lj = self.shelves[d].pop(0)
522 | 			self.live_pointsp[i,d] = pj
523 | 			live_pointsL[i,d] = Lj
524 | 			if new_edges is not None:
525 | 				new_edges.append(("n%d" % d, "p%d" % pj))
526 | 			if self.point_data_map is not None:
527 | 				self.point_data_map[pj].add(d)
528 | 		if self.membership_graph is not None:
529 | 			print('    adding edges ...')
530 | 			self.membership_graph.add_edges(new_edges)
531 | 		self.Lmax = live_pointsL.max(axis=0)
532 | 		assert self.Lmax.shape == (self.ndata,)
533 | 		print('advancing done.')
534 | 		return numpy.asarray(uis), numpy.asarray(xis), numpy.asarray(Lis)
535 | 	
536 | 	def remainder(self, d=None):
537 | 		if d is None:
538 | 			print('sorting remainder...')
539 | 			indices = numpy.empty((self.ndata, self.nlive_points), dtype=int)
540 | 			for d in range(self.ndata):
541 | 				indices[d,:] = numpy.argsort(self.live_pointsL[:,d])
542 | 			ds = numpy.arange(self.ndata)
543 | 			print('building remainder...')
544 | 			for i in range(self.nlive_points):
545 | 				j = indices[ds,i]
546 | 				p = self.live_pointsp[j,ds]
547 | 				u = self.pointpile[p]
548 | 				x = self.pointpilex[p]
549 | 				L = self.live_pointsL[j,ds]
550 | 				#u = [self.pointpile[self.live_pointsp[indices[d][i],d]] for d in range(self.ndata)]
551 | 				#x = [self.pointpilex[self.live_pointsp[indices[d][i],d]] for d in range(self.ndata)]
552 | 				#L = numpy.asarray([self.live_pointsL[indices[d][i],d] for d in range(self.ndata)])
553 | 				yield u, x, L
554 | 			print('remainder done.')
555 | 		else:
556 | 			indices = numpy.argsort(self.live_pointsL[:,d])
557 | 			for i in indices:
558 | 				u = self.pointpile[self.live_pointsp[i,d]]
559 | 				x = self.pointpilex[self.live_pointsp[i,d]]
560 | 				L = self.live_pointsL[i,d]
561 | 				yield u, x, L
562 | 				#yield self.live_pointsu[i,d], self.live_pointsx[i,d], self.live_pointsL[i,d]
563 | 	
564 | 	next = __next__
565 | 	
566 | 	def __iter__(self):
567 | 		while True: yield self.__next__()
568 | 		
569 | __all__ = [MultiNestedSampler]
570 | 
571 | 


--------------------------------------------------------------------------------
/musefuse.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | """
  3 | 
  4 | Main program
  5 | ---------------
  6 | 
  7 | Copyright (c) 2017 Johannes Buchner
  8 | 
  9 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 10 | 
 11 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 12 | 
 13 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 14 | 
 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 16 | 
 17 | """
 18 | 
 19 | import numpy
 20 | from numpy import exp
 21 | import h5py
 22 | import sys
 23 | import json
 24 | import os
 25 | import time
 26 | import astropy.io.fits as pyfits
 27 | import matplotlib.pyplot as plt
 28 | 
 29 | do_plotting = False
 30 | 
 31 | print('loading data...')
 32 | f = pyfits.open(sys.argv[1])
 33 | datasection = f['DATA'] 
 34 | y = datasection.data # values
 35 | y = y[:3600,:,:]
 36 | nspec, npixx, npixy = y.shape
 37 | noise_level = f['STAT'].data # variance
 38 | noise_level = noise_level[:3600,:,:]
 39 | good = numpy.isfinite(noise_level).all(axis=0)
 40 | print('   %.2f%% good...' % (100*good.mean()))
 41 | #print numpy.where(~numpy.isfinite(noise_level[:,40,40]))
 42 | #print noise_level[~numpy.isfinite(noise_level[:,40,40]),40,40]
 43 | 
 44 | if do_plotting:
 45 | 	print('plotting image...')
 46 | 	plt.figure(figsize=(20,20))
 47 | 	plt.imshow(y[0,:,:])
 48 | 	plt.savefig('musefuse_img0.png', bbox_inches='tight')
 49 | 	plt.close()
 50 | 
 51 | regionfile = sys.argv[2]
 52 | import pyregion
 53 | region = pyregion.parse(open(regionfile).read())
 54 | mask = region.get_mask(shape=(npixx, npixy))
 55 | 
 56 | maskx = mask.any(axis=0)
 57 | masky = mask.any(axis=1)
 58 | i = numpy.where(maskx)[0]
 59 | ilo, ihi = i.min(), i.max() + 1
 60 | j = numpy.where(masky)[0]
 61 | jlo, jhi = j.min(), j.max() + 1
 62 | print((mask.sum(), ilo, ihi, jlo, jhi, y.shape, npixx, npixy))
 63 | #ndata = mask.sum()
 64 | 
 65 | #ymask = mask.reshape((1, npixx, npixy))
 66 | ymask = numpy.array([mask] * len(y))
 67 | y[~ymask] = numpy.nan
 68 | if do_plotting:
 69 | 	print('plotting selection ...')
 70 | 	plt.figure(figsize=(20,20))
 71 | 	plt.imshow(y[0,ilo:ihi,jlo:jhi])
 72 | 	plt.colorbar()
 73 | 	plt.savefig('musefuse_sel_img0.png', bbox_inches='tight')
 74 | 	plt.close()
 75 | 
 76 | print('applying subselection ...')
 77 | y = y[ymask]
 78 | noise_level = noise_level[ymask]
 79 | print('    subselection gave %s ...' % (y.shape))
 80 | y = y.reshape((nspec, -1))
 81 | noise_level = noise_level.reshape((nspec, -1))
 82 | x = datasection.header['CD3_3'] * numpy.arange(nspec) + datasection.header['CRVAL3']
 83 | wavelength = x
 84 | #good = numpy.logical_and(numpy.isfinite(noise_level).all(axis=0), numpy.isfinite(y).all(axis=0))
 85 | print('    finding NaNs...')
 86 | good = numpy.isfinite(noise_level).all(axis=0)
 87 | print('    found %d finite spaxels ...' % (good.sum()))
 88 | #assert good.shape == (ymask.sum(),), good.shape
 89 | goodids = numpy.where(good)[0]
 90 | #numpy.random.shuffle(goodids)
 91 | 
 92 | ndata = int(os.environ.get('MAXDATA', len(goodids)))
 93 | print('    truncating data to %d sets...' % ndata, goodids[:ndata])
 94 | ## truncate data
 95 | y = y[:,goodids[:ndata]]
 96 | noise_level = noise_level[:,goodids[:ndata]]
 97 | assert (noise_level>0).all(), noise_level
 98 | 
 99 | assert y.shape == (nspec, ndata), (y.shape, nspec, ndata)
100 | assert noise_level.shape == (nspec, ndata)
101 | 
102 | assert ndata > 0, 'No valid data!?'
103 | 
104 | #noise_level[noise_level > 2 * numpy.median(vd[:,i]] = 1000
105 | 
106 | print('    cleaning data')
107 | noise_level2 = noise_level.copy()
108 | w = 10
109 | for j in range(nspec):
110 | 	lo = j - w
111 | 	hi = j + w
112 | 	if lo < 0:
113 | 		lo = 0
114 | 	if hi > nspec:
115 | 		hi = nspec
116 | 	seg = noise_level[lo:hi,:]
117 | 	med = numpy.median(seg, axis=0)
118 | 	diff = numpy.abs(med.reshape((1, -1)) - seg)
119 | 	meddiff = numpy.median(diff, axis=0)
120 | 	diff = numpy.abs(noise_level[j,:] - med)
121 | 	v = (diff > 5 * meddiff) * 1e10
122 | 	#k = j
123 | 	if False and v.any():
124 | 		print('    updating noise level at', j) #, meddiff, diff
125 | 		for k in range(max(0, j-3), min(nspec-1, j+3)+1):
126 | 			noise_level2[k,:] += v
127 | 
128 | noise_level2[1600:1670,:] += 1e10
129 | noise_level2[1730:1780,:] += 1e10
130 | noise_level2[1950:2000,:] += 1e10
131 | noise_level2[1750+500:2200+500,:] += 1e10
132 | noise_level2[2300+500:2500+500,:] += 1e10
133 | #noise_level2[noise_level2 > noise_level.max()] = noise_level.max()
134 | 
135 | if do_plotting:
136 | 	for i in range(ndata):
137 | 		plt.figure()
138 | 		xi = numpy.arange(len(y[:,i]))
139 | 		plt.plot(xi, y[:,i], color='k', lw=1)
140 | 		sigma0 = noise_level[:,i]**0.5
141 | 		plt.fill_between(xi, y[:,i] - sigma0, y[:,i] + sigma0, alpha=0.3, color='red')
142 | 		sigma = noise_level2[:,i]**0.5
143 | 		plt.fill_between(xi, y[:,i] - sigma, y[:,i] + sigma, alpha=0.3, color='gray')
144 | 		idx = numpy.where(noise_level2[:,i] != noise_level[:,i])[0]
145 | 		lo, hi = y[:,i].min(), y[:,i].max()
146 | 		plt.plot(xi, lo+sigma0, color='b')
147 | 		plt.plot(xi, lo+0*sigma0, color='b')
148 | 		plt.vlines(idx, lo, hi, color='g', alpha=0.1, lw=0.1)
149 | 		plt.ylim(lo, hi)
150 | 		#plt.xlim(500, 3500)
151 | 		plt.savefig('musefuse_data%d.pdf' % (i+1), bbox_inches='tight')
152 | 		plt.close()
153 | 
154 | noise_level = noise_level2
155 | 
156 | """
157 | 
158 | Definition of the problem
159 | - parameter space (here: 3d)
160 | - likelihood function which consists of 
161 |   - model function ("slow predicting function")
162 |   - data comparison
163 | 
164 | """
165 | 
166 | paramnames = ['Z', 'logSFtau', 'SFage', 'z', 'EBV'] #, 'misfit']
167 | nparams = len(paramnames)
168 | 
169 | zlo = float(sys.argv[3])
170 | zhi = float(sys.argv[4])
171 | filenames = sys.argv[5:]
172 | grid = []
173 | 
174 | for iZ, filename in enumerate(filenames):
175 | 	print(filename)
176 | 	data = numpy.loadtxt(filename)
177 | 	model_wavelength = data[:,0]
178 | 	model_templates = data[:,1:].transpose()
179 | 	grid.append(model_templates)
180 | 
181 | inversewavelength_grid = numpy.linspace(1/10000., 1/4000., 2000)
182 | # sigma is applied on that grid
183 | # to convert to km/s, we need the wavelength, e.g. at 4000 and the element size
184 | inversewavelength_gridwidth_A = 0.24 / 5 # A at 4000 (the end of this grid)
185 | 
186 | Zs = numpy.log10([0.0001, 0.0004, 0.004, 0.008, 0.02, 0.05, 0.1])
187 | sftaus = numpy.log10(numpy.array([1, 4, 10, 40, 100, 400, 1000, 4000]) * 1.e6)
188 | sfages = numpy.linspace(0, 13, 26)
189 | ages = numpy.array([0.000E+00, 1.000E+05, 1.412E+05, 1.585E+05, 1.778E+05, 1.995E+05, 2.239E+05, 2.512E+05, 2.818E+05, 3.162E+05, 3.548E+05, 3.981E+05, 4.467E+05, 5.012E+05, 5.623E+05, 6.310E+05, 7.080E+05, 7.943E+05, 8.913E+05, 1.000E+06, 1.047E+06, 1.096E+06, 1.148E+06, 1.202E+06, 1.259E+06, 1.318E+06, 1.380E+06, 1.445E+06, 1.514E+06, 1.585E+06, 1.660E+06, 1.738E+06, 1.820E+06, 1.906E+06, 1.995E+06, 2.089E+06, 2.188E+06, 2.291E+06, 2.399E+06, 2.512E+06, 2.630E+06, 2.754E+06, 2.884E+06, 3.020E+06, 3.162E+06, 3.311E+06, 3.467E+06, 3.631E+06, 3.802E+06, 3.981E+06, 4.169E+06, 4.365E+06, 4.571E+06, 4.786E+06, 5.012E+06, 5.248E+06, 5.495E+06, 5.754E+06, 6.026E+06, 6.310E+06, 6.607E+06, 6.918E+06, 7.244E+06, 7.586E+06, 7.943E+06, 8.318E+06, 8.710E+06, 9.120E+06, 9.550E+06, 1.000E+07, 1.047E+07, 1.096E+07, 1.148E+07, 1.202E+07, 1.259E+07, 1.318E+07, 1.380E+07, 1.445E+07, 1.514E+07, 1.585E+07, 1.660E+07, 1.738E+07, 1.820E+07, 1.906E+07, 1.995E+07, 2.089E+07, 2.188E+07, 2.291E+07, 2.399E+07, 2.512E+07, 2.630E+07, 2.754E+07, 2.900E+07, 3.000E+07, 3.100E+07, 3.200E+07, 3.300E+07, 3.400E+07, 3.500E+07, 3.600E+07, 3.700E+07, 3.800E+07, 3.900E+07, 4.000E+07, 4.250E+07, 4.500E+07, 4.750E+07, 5.000E+07, 5.250E+07, 5.500E+07, 5.709E+07, 6.405E+07, 7.187E+07, 8.064E+07, 9.048E+07, 1.015E+08, 1.139E+08, 1.278E+08, 1.434E+08, 1.609E+08, 1.805E+08, 2.026E+08, 2.273E+08, 2.550E+08, 2.861E+08, 3.210E+08, 3.602E+08, 4.042E+08, 4.535E+08, 5.088E+08, 5.709E+08, 6.405E+08, 7.187E+08, 8.064E+08, 9.048E+08, 1.015E+09, 1.139E+09, 1.278E+09, 1.434E+09, 1.609E+09, 1.680E+09, 1.700E+09, 1.800E+09, 1.900E+09, 2.000E+09, 2.100E+09, 2.200E+09, 2.300E+09, 2.400E+09, 2.500E+09, 2.600E+09, 2.750E+09, 3.000E+09, 3.250E+09, 3.500E+09, 3.750E+09, 4.000E+09, 4.250E+09, 4.500E+09, 4.750E+09, 5.000E+09, 5.250E+09, 5.500E+09, 5.750E+09, 6.000E+09, 6.250E+09, 6.500E+09, 6.750E+09, 7.000E+09, 7.250E+09, 7.500E+09, 7.750E+09, 8.000E+09, 8.250E+09, 8.500E+09, 8.750E+09, 9.000E+09, 9.250E+09, 9.500E+09, 9.750E+09, 1.000E+10, 1.025E+10, 1.050E+10, 1.075E+10, 1.100E+10, 1.125E+10, 1.150E+10, 1.175E+10, 1.200E+10, 1.225E+10, 1.250E+10, 1.275E+10, 1.300E+10, 1.325E+10, 1.350E+10, 1.375E+10, 1.400E+10, 1.425E+10, 1.450E+10, 1.475E+10, 1.500E+10, 1.525E+10, 1.550E+10, 1.575E+10, 1.600E+10, 1.625E+10, 1.650E+10, 1.675E+10, 1.700E+10, 1.725E+10, 1.750E+10, 1.775E+10, 1.800E+10, 1.825E+10, 1.850E+10, 1.875E+10, 1.900E+10, 1.925E+10, 1.950E+10, 1.975E+10, 2.000E+10])[::2]
190 | 
191 | nZ = len(Zs)
192 | nSFage = len(sfages)
193 | nSFtau = len(sftaus)
194 | #nspec2 = models.shape
195 | #assert nspec2 == nspec
196 | #models /= 1e-10 + models[:,:,:,2000].reshape((nZ, nSFage, nSFtau, 1)) # normalise somewhere to one
197 | 
198 | """
199 | nspec = 3000
200 | #models = models[:,:,:,500:3500]
201 | y = y[500:3500,:]
202 | wavelength = wavelength[500:3500]
203 | noise_level = noise_level[500:3500,:]
204 | """
205 | y = y.astype(numpy.float64).copy()
206 | noise_level = noise_level.astype(numpy.float64).copy()
207 | 
208 | wavelength = wavelength / 10.
209 | model_wavelength = model_wavelength / 10.
210 | calzetti_result = numpy.zeros_like(model_wavelength)
211 | mask = (model_wavelength < 630)
212 | calzetti_result[mask] = 2.659 * (-2.156 + 1.509e3 / model_wavelength[mask] -
213 |     0.198e6 / model_wavelength[mask] ** 2 +
214 |     0.011e9 / model_wavelength[mask] ** 3) + 4.05
215 | 
216 | # Attenuation between 630 nm and 2200 nm
217 | mask = (model_wavelength >= 630)
218 | calzetti_result[mask] = 2.659 * (-1.857 + 1.040e3 / model_wavelength[mask]) + 4.05
219 | 
220 | import scipy.interpolate, scipy.ndimage
221 | 
222 | def model(Z, SFtau, sfage, z, EBV):
223 | 	iZ = numpy.where(Zs <= Z)[-1][-1]
224 | 	#print('   selecting Z: %d' % iZ) 
225 | 	model_templates = grid[iZ]
226 | 	#print('   template max value:', model_templates.max(), model_templates.shape)
227 | 	assert numpy.all(model_templates>=0), model_templates
228 | 	# convolve the template
229 | 	
230 | 	# SFage = 0-13 (Gyrs).
231 | 	#print('   selecting sfage: %.2f' % sfage) 
232 | 	# ----123456789SFage________ --age-->
233 | 	tsinceSF = sfage * 1.e9 - ages
234 | 	tsinceSF[tsinceSF <= 0] = 0
235 | 	# star formation history is a (delayed) exponential decline.
236 | 	SFtau = float(SFtau)
237 | 	#print('   selecting SFtau: %.2f' % SFtau) 
238 | 	sfh = tsinceSF / SFtau**2 * numpy.exp(-tsinceSF/SFtau)
239 | 	sfh /= sfh.max()
240 | 	assert numpy.all(sfh>=0), sfh
241 | 	#print('   ages: ', ages)
242 | 	#print('   tsinceSF: ', tsinceSF)
243 | 	#print('   sfh: ', sfh)
244 | 	# before sfage, no stars
245 | 	age_weight = ages[1:] - ages[:-1]
246 | 	assert numpy.all(age_weight>=0), age_weight
247 | 	
248 | 	# weight stellar templates with this SFH
249 | 	#print(model_templates.shape, sfh.shape, age_weight.shape)
250 | 	template = numpy.sum(model_templates[:-1] * \
251 | 		sfh[:-1].reshape((-1,1)) * age_weight.reshape((-1,1)), axis=0)
252 | 	assert template.shape == (len(model_wavelength),), template.shape
253 | 	#print('   template max value after sfh convolution:', template.max())
254 | 	# normalise template at the highest wavelength
255 | 	template /= 1e-10 + template[2050]
256 | 
257 | 	# apply calzetti extinction law at restframe
258 | 	template = template * 10**(-2.5 * calzetti_result * EBV)
259 | 	#print('   template max value after extinction:', template.max())
260 | 	
261 | 	#template = numpy.interp(x=inversewavelength_grid, xp=1./model_wavelength[::-1], fp=template[::-1])
262 | 	#
263 | 	## add Doppler blurring
264 | 	## sigma_4000 is something like a readshift:
265 | 	## f = f_0 * (1 + v/c)
266 | 	#sigma = 1 + v / 300000.
267 | 	## if sigma is 1A at 4000A, then on the 1/lam grid it should be this wide:
268 | 	#sigma_grid = sigma * 4000 / inversewavelength_gridwidth_A
269 | 	## convolve:
270 | 	#template = scipy.ndimage.filters.gaussian_filter1d(template, sigma_grid)
271 | 	
272 | 	# convert back to lambda
273 | 	
274 | 	# redshift / Doppler shift
275 | 	# interpolate template onto data grid
276 | 	# we go to the model at the restframe wavelength, which is bluer
277 | 	# template = numpy.interp(x=wavelength / (1 + z), xp=inversewavelength_grid, fp=template)
278 | 	template = numpy.interp(x=wavelength / (1 + z), xp=model_wavelength, fp=template)
279 | 	#print('   template max value after redshifting:', template.max())
280 | 
281 | 	#template = model_interp([Z, sfage, SFtau])[0]
282 | 	assert template.shape == (nspec,), template.shape
283 | 	#assert numpy.all(numpy.isfinite(exttemplate)), exttemplate
284 | 	return template
285 | 
286 | if True:
287 | 	#O = 20
288 | 	Z, SFtau, SFage, z, EBV = -2, 1.e8, 1, 0, 0
289 | 	for Z in [-4, -2, -1]:
290 | 		ypred = model(Z, SFtau, SFage, z, EBV)
291 | 		plt.plot(wavelength, ypred, label='Z=%s' % Z)
292 | 	plt.legend(loc='best')
293 | 	plt.savefig('musefuse_model_Z.pdf', bbox_inches='tight')
294 | 	plt.close()
295 | 	Z = -2
296 | 	for SFtau in [6., 6.1, 6.3, 6.5, 7., 8., 9.]:
297 | 		ypred = model(Z, 10**SFtau, SFage, z, EBV)
298 | 		plt.plot(wavelength, ypred, label='SFtau=${10}^{%s}$' % SFtau)
299 | 	plt.legend(loc='best')
300 | 	plt.savefig('musefuse_model_SFtau.pdf', bbox_inches='tight')
301 | 	plt.close()
302 | 	SFtau = 1e8
303 | 	for SFage in [0.001, 0.01, 0.1, 1, 6, 12]:
304 | 		ypred = model(Z, SFtau, SFage, z, EBV)
305 | 		plt.plot(wavelength, ypred, label='SFage=%s' % SFage)
306 | 	plt.legend(loc='best')
307 | 	plt.savefig('musefuse_model_SFage.pdf', bbox_inches='tight')
308 | 	plt.close()
309 | 	SFage = 1
310 | 	for z in [0, 0.1, 0.2, 0.3, 0.4, 0.5]:
311 | 		ypred = model(Z, SFtau, SFage, z, EBV)
312 | 		plt.plot(wavelength, ypred, label='z=%s' % z)
313 | 	plt.legend(loc='best')
314 | 	plt.savefig('musefuse_model_z.pdf', bbox_inches='tight')
315 | 	plt.close()
316 | 	z = 0.
317 | 	for EBV in [0, 0.5, 1]:
318 | 		ypred = model(Z, SFtau, SFage, z, EBV)
319 | 		plt.plot(wavelength, ypred, label='EBV=%s' % EBV)
320 | 	plt.legend(loc='best')
321 | 	plt.savefig('musefuse_model_EBV.pdf', bbox_inches='tight')
322 | 	plt.close()
323 | 
324 | 
325 | def priortransform(cube):
326 | 	# definition of the parameter width, by transforming from a unit cube
327 | 	cube = cube.copy()
328 | 	#cube[0] = 10**(cube[0] * 4 - 2) # plateau
329 | 	cube[0] = cube[0] * (Zs.max() - Zs.min()) + Zs.min()
330 | 	cube[1] = cube[1] * (sftaus.max() - sftaus.min()) + sftaus.min()
331 | 	cube[2] = cube[2] * (sfages.max() - sfages.min()) + sfages.min()
332 | 	#cube[4] = cube[4] * 3 + 1 # v (km/s)
333 | 	cube[3] = cube[3] * (zhi - zlo) + zlo # z
334 | 	cube[4] = cube[4] * 2 # E(B-V)
335 | 	#cube[8] = cube[8] * 4 - 1 # misfit
336 | 	return cube
337 | 
338 | def priortransform_simple(cube):
339 | 	# definition of the parameter width, by transforming from a unit cube
340 | 	cube = cube.copy()
341 | 	#cube[0] = 10**(cube[0] * 4 - 2) # plateau
342 | 	cube[0] = cube[0] * (sftaus.max() - sftaus.min()) + sftaus.min()
343 | 	cube[1] = cube[1] * (sfages.max() - sfages.min()) + sfages.min()
344 | 	cube[2] = cube[2] * (zhi - zlo) + zlo # z
345 | 	cube[3] = cube[3] * 2 # E(B-V)
346 | 	return cube
347 | 
348 | # the following is a python-only implementation of the likelihood 
349 | # @ params are the parameters (as transformed by priortransform)
350 | # @ data_mask is which data sets to consider.
351 | # returns a likelihood vector
352 | Lmax = -1e100
353 | Lmax = -1e100 * numpy.ones(ndata)
354 | def multi_loglikelihood(params, data_mask):
355 | 	global Lmax
356 | 	O, Z, logSFtau, SFage, z, EBV = params
357 | 	SFtau = 10**logSFtau
358 | 	# predict the model
359 | 	ypred = model(Z, SFtau, SFage, z, EBV)
360 | 	# do the data comparison
361 | 	#print ypred.shape, y.shape, data_mask
362 | 	ndata = data_mask.sum()
363 | 	if (ypred == 0).all():
364 | 		# give low probability to solutions with no stars
365 | 		return numpy.ones(ndata) * -1e100
366 | 	ypred += O
367 | 	
368 | 	yd = y[:,data_mask]
369 | 	vd = noise_level[:,data_mask] #+ 10**logvar
370 | 	#vd[vd > 2 * numpy.median(vd)] = 1000
371 | 	
372 | 	# simple likelihood, would need a normalisation factor:
373 | 	# L = -0.5 * numpy.nansum((ypred.reshape((-1,1)) - yd)**2/vd, axis=0)
374 | 	L = numpy.zeros(ndata)
375 | 	
376 | 	for i in numpy.arange(ndata):
377 | 		# scaled likelihood, like LePhare
378 | 		# s = sum[OjMj/sigmaj^2] / sum[Mj^2/sigmaj^2]
379 | 		s = numpy.nansum(yd[:,i] * ypred / vd[:,i]) / (numpy.nansum(ypred**2 / vd[:,i]) + 1e-10)
380 | 		assert numpy.isfinite(s), (s, ypred, ypred**2, yd[:,i], vd[:,i])
381 | 		# chi2 = sum[(Oi - s*Mi)^2 / sigmai^2]
382 | 		chi2 = numpy.nansum((yd[:,i] - s * ypred)**2 / vd[:,i]) # + numpy.log(2*numpy.pi*vd))
383 | 		L[i] = -0.5 * chi2 + numpy.random.uniform() * 1e-5
384 | 		j = numpy.where(data_mask)[0][i]
385 | 		if L[i] > Lmax[j]:
386 | 			Lmax[j] = L[i]
387 | 			print('plotting...')
388 | 			plt.figure(figsize=(20,20))
389 | 			plt.subplot(3, 1, 1)
390 | 			plt.title(str(params) + ' : chi2:' + str(chi2))
391 | 			#mask = vd[:,i] < 2 * numpy.median(vd[:,i])
392 | 			#mask = numpy.isfinite(vd[:,i])
393 | 			mask = Ellipsis
394 | 			plt.plot(wavelength, yd[mask,i], color='k', alpha=0.5)
395 | 			plt.plot(wavelength, s * ypred[mask], color='r')
396 | 			plt.ylim(yd[mask,i].min(), yd[mask,i].max())
397 | 			plt.subplot(3, 1, 2)
398 | 			plt.plot(wavelength, ypred[mask], color='k')
399 | 			plt.subplot(3, 1, 3)
400 | 			plt.plot(wavelength, vd[mask,i], color='k')
401 | 			plt.yscale('log')
402 | 			plt.savefig('musefuse_bestfit_%d.pdf' % (i+1), bbox_inches='tight')
403 | 			plt.close()
404 | 			time.sleep(0.1)
405 | 		#print chi2
406 | 	assert L.shape == (ndata,), (L.shape, ypred.shape, y.shape, data_mask)
407 | 	return L
408 | 
409 | def multi_loglikelihood_vectorized(params, data_mask):
410 | 	global Lmax
411 | 	O, Z, logSFtau, SFage, z, EBV = params
412 | 	SFtau = 10**logSFtau
413 | 	# predict the model
414 | 	ypred = model(Z, SFtau, SFage, z, EBV)
415 | 	# do the data comparison
416 | 	ndata = data_mask.sum()
417 | 	if (ypred == 0).all():
418 | 		# give low probability to solutions with no stars
419 | 		return numpy.ones(ndata) * -1e100
420 | 	ypred += O
421 | 	
422 | 	yd = y[:,data_mask]
423 | 	vd = noise_level[:,data_mask]
424 | 	assert numpy.isfinite(yd).all()
425 | 	assert numpy.isfinite(vd).all()
426 | 	assert numpy.isfinite(ypred).all()
427 | 	
428 | 	ypreds = ypred.reshape((-1,1))
429 | 	s = numpy.sum(yd * ypreds / vd, axis=0) / (numpy.sum(ypreds**2 / vd, axis=0) + 1e-10)
430 | 	assert s.shape == (ndata,), s.shape
431 | 	assert numpy.isfinite(s).all()
432 | 	chi2 = numpy.sum((yd - s.reshape((1,-1)) * ypreds)**2 / vd, axis=0)
433 | 	L = -0.5 * chi2 + numpy.random.uniform() * 1e-5
434 | 	
435 | 	assert L.shape == (ndata,), (L.shape, ypred.shape, y.shape, data_mask.sum())
436 | 	
437 | 	#for j, i in enumerate(numpy.where(L > Lmax[data_mask])[0]):
438 | 	for j, i in enumerate(numpy.where(data_mask)[0]):
439 | 		if not (L[j] > Lmax[i]): continue
440 | 		Lmax[i] = L[j]
441 | 		if i % (1 + ndata // 3) != 0: continue
442 | 		print('updating bestfit plot of %d ... chi2: %.2f' % (i+1, chi2[j]))
443 | 		#print '   ', yd.shape, yd[:,j].shape, ypred.shape
444 | 		plt.figure(figsize=(20,20))
445 | 		plt.subplot(3, 1, 1)
446 | 		plt.title('%s : chi2: %.2f' % (params, chi2[j]))
447 | 		#mask = vd[:,i] < 2 * numpy.median(vd[:,i])
448 | 		#mask = numpy.isfinite(vd[:,i])
449 | 		mask = Ellipsis
450 | 		plt.plot(wavelength, yd[mask,j], color='k', alpha=0.5)
451 | 		plt.plot(wavelength, s[j] * ypred[mask], color='r')
452 | 		plt.ylim(yd[mask,j].min(), yd[mask,j].max())
453 | 		plt.subplot(3, 1, 2)
454 | 		plt.plot(wavelength, ypred[mask], color='k')
455 | 		plt.subplot(3, 1, 3)
456 | 		plt.plot(wavelength, vd[mask,j], color='k')
457 | 		plt.yscale('log')
458 | 		plt.savefig('musefuse_bestfit_%d.pdf' % (i+1), bbox_inches='tight')
459 | 		plt.close()
460 | 		time.sleep(0.1)
461 | 
462 | 	return L
463 | 
464 | def multi_loglikelihood_vectorized_short(params, data_mask):
465 | 	O, Z, logSFtau, SFage, z, EBV = params
466 | 	SFtau = 10**logSFtau
467 | 	# predict the model
468 | 	ypred = model(Z, SFtau, SFage, z, EBV)
469 | 	# do the data comparison
470 | 	if (ypred == 0).all():
471 | 		# give low probability to solutions with no stars
472 | 		return numpy.ones(data_mask.sum()) * -1e100
473 | 	ypred += O
474 | 	
475 | 	yd = y[:,data_mask]
476 | 	vd = noise_level[:,data_mask]
477 | 	ypreds = ypred.reshape((-1,1))
478 | 	s = numpy.sum(yd * ypreds / vd, axis=0) / (numpy.sum(ypreds**2 / vd, axis=0) + 1e-10)
479 | 	chi2 = numpy.sum((yd - s.reshape((1,-1)) * ypreds)**2 / vd, axis=0)
480 | 	L = -0.5 * chi2 + numpy.random.uniform() * 1e-5
481 | 	return L
482 | 
483 | import numexpr as ne
484 | def multi_loglikelihood_numexpr(params, data_mask):
485 | 	O, Z, logSFtau, SFage, z, EBV = params
486 | 	SFtau = 10**logSFtau
487 | 	# predict the model
488 | 	ypred = model(Z, SFtau, SFage, z, EBV)
489 | 	# do the data comparison
490 | 	if (ypred == 0).all():
491 | 		# give low probability to solutions with no stars
492 | 		return numpy.ones(data_mask.sum()) * -1e100
493 | 	ypred += O
494 | 	
495 | 	yd = y[:,data_mask]
496 | 	vd = noise_level[:,data_mask]
497 | 	ypreds = ypred.reshape((-1,1))
498 | 	s1 = ne.evaluate("sum(yd * ypreds / vd, axis=0)")
499 | 	s2 = ne.evaluate("sum(ypreds**2 / vd, axis=0)")
500 | 	s = ne.evaluate("s1 / (s2 + 1e-10)").reshape((1,-1))
501 | 	return ne.evaluate("sum((yd - s * ypreds)**2 / (-2 * vd), axis=0)")
502 | 
503 | from ctypes import *
504 | from numpy.ctypeslib import ndpointer
505 | if int(os.environ.get('OMP_NUM_THREADS', '1')) > 1:
506 | 	lib = cdll.LoadLibrary('./cmuselike-parallel.so')
507 | else:
508 | 	lib = cdll.LoadLibrary('./cmuselike.so')
509 | lib.like.argtypes = [
510 | 	ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
511 | 	ndpointer(dtype=numpy.float64, ndim=2, flags='C_CONTIGUOUS'), 
512 | 	ndpointer(dtype=numpy.float64, ndim=1, flags='C_CONTIGUOUS'), 
513 | 	ndpointer(dtype=numpy.bool, ndim=1, flags='C_CONTIGUOUS'), 
514 | 	c_int, 
515 | 	c_int, 
516 | 	ndpointer(dtype=numpy.float64, ndim=1, flags='C_CONTIGUOUS'), 
517 | 	]
518 | 
519 | Lout = numpy.zeros(ndata)
520 | def multi_loglikelihood_clike(params, data_mask):
521 | 	global Lout
522 | 	#O = 0
523 | 	Z, logSFtau, SFage, z, EBV = params
524 | 	SFtau = 10**logSFtau
525 | 	# predict the model
526 | 	ypred = model(Z, SFtau, SFage, z, EBV)
527 | 	# do the data comparison
528 | 	if not numpy.any(ypred):
529 | 		# give low probability to solutions with no stars
530 | 		return numpy.ones(data_mask.sum()) * -1e100
531 | 	#ypred += O
532 | 	
533 | 	# do everything in C and return the resulting likelihood vector
534 | 	ret = lib.like(y, noise_level, ypred, data_mask, ndata, nspec, Lout)
535 | 	return Lout[data_mask] + numpy.random.normal(0, 1e-5, size=data_mask.sum())
536 | 
537 | def multi_loglikelihood_simple_clike(params, data_mask):
538 | 	logSFtau, SFage, z, EBV = params
539 | 	#Z = 0.012 # solar
540 | 	Z = 0.004 # Patricio2018
541 | 	params = Z, logSFtau, SFage, z, EBV
542 | 	return multi_loglikelihood_clike(params, data_mask)
543 | 
544 | if False:
545 | 	data_mask_all = numpy.ones(ndata) == 1
546 | 	print('testing vectorised code...')
547 | 	for i in range(100):
548 | 		cube = numpy.random.uniform(size=nparams)
549 | 		params = priortransform(cube)
550 | 		L = multi_loglikelihood(params, data_mask_all)
551 | 		L2 = multi_loglikelihood_vectorized(params, data_mask_all)
552 | 		assert numpy.allclose(L, L2), (L, L2, cube, params)
553 | 		L2 = multi_loglikelihood_vectorized_short(params, data_mask_all)
554 | 		assert numpy.allclose(L, L2), (L, L2, cube, params)
555 | 		L2 = multi_loglikelihood_numexpr(params, data_mask_all)
556 | 		assert numpy.allclose(L, L2), (L, L2, cube, params)
557 | 		L2 = multi_loglikelihood_clike(params, data_mask_all)
558 | 		assert numpy.allclose(L, L2), (L, L2, cube, params)
559 | 	test_cubes = [priortransform(numpy.random.uniform(size=nparams)) for i in range(1000)]
560 | 	a = time.time()
561 | 	[multi_loglikelihood(cube, data_mask_all) for cube in test_cubes]
562 | 	print('original python code:', time.time() - a)
563 | 	a = time.time()
564 | 	[multi_loglikelihood_vectorized(cube, data_mask_all) for cube in test_cubes]
565 | 	print('vectorised python code:', time.time() - a)
566 | 	a = time.time()
567 | 	[multi_loglikelihood_vectorized_short(cube, data_mask_all) for cube in test_cubes]
568 | 	print('shortened vectorised python code:', time.time() - a)
569 | 	a = time.time()
570 | 	[multi_loglikelihood_numexpr(cube, data_mask_all) for cube in test_cubes]
571 | 	print('numexpr code:', time.time() - a)
572 | 	a = time.time()
573 | 	[multi_loglikelihood_clike(cube, data_mask_all) for cube in test_cubes]
574 | 	print('C code:', time.time() - a)
575 | 
576 | #multi_loglikelihood = multi_loglikelihood_vectorized_short
577 | #multi_loglikelihood = multi_loglikelihood_numexpr
578 | multi_loglikelihood = multi_loglikelihood_clike
579 | 
580 | prefix = sys.argv[1]
581 | 
582 | modelname = os.environ.get('MODEL', 'FULL')
583 | if modelname == 'ZSOL':
584 | 	paramnames = ['logSFtau', 'SFage', 'z', 'EBV']
585 | 	nparams = len(paramnames)
586 | 	prefix = prefix + '_zsol_'
587 | 	print('Switching to Zsol model')
588 | 	multi_loglikelihood = multi_loglikelihood_simple_clike
589 | 	priortransform = priortransform_simple
590 | elif modelname == 'FULL':
591 | 	prefix = prefix + '_full_'
592 | 	pass
593 | else:
594 | 	assert False, modelname
595 | 
596 | """
597 | 
598 | After defining the problem, we use generic code to set up 
599 | - Nested Sampling (Multi)Integrator
600 | - Our special sampler
601 | - RadFriends (constrained region draw)
602 | 
603 | We start with the latter.
604 | """
605 | 
606 | 
607 | from multi_nested_integrator import multi_nested_integrator
608 | from multi_nested_sampler import MultiNestedSampler
609 | from cachedconstrainer import CachedConstrainer, generate_individual_constrainer, generate_superset_constrainer
610 | 
611 | superset_constrainer = generate_superset_constrainer()
612 | 
613 | cc = CachedConstrainer()
614 | focusset_constrainer = cc.get
615 | _, _, individual_draw_constrained = generate_individual_constrainer()
616 | numpy.random.seed(1)
617 | start_time = time.time()
618 | print('setting up integrator ...')
619 | nlive_points = int(os.environ.get('NLIVE_POINTS','400'))
620 | 
621 | # constrained region draw functions
622 | # we try hard to keep information about current regions and subselected regions
623 | # because recomputing the regions is expensive if the likelihood is very fast.
624 | # There are three constrainers:
625 | #   - the one of the superset (all data sets)
626 | #   - one for each data set if need a individual draw (focussed draw with only one)
627 | #   - a memory for recent clusterings, because they might recur in the next iteration(s)
628 | # Note that this does caching not improve the algorithms efficiency
629 | #   in fact, not recomputing regions keeps the regions larger, 
630 | #   leading potentially to slightly more rejections. 
631 | # However, there is substantial execution speedup.
632 | 
633 | 
634 | # now set up sampler and pass the three constrainers
635 | 
636 | sampler = MultiNestedSampler(nlive_points = nlive_points, 
637 | 	priortransform=priortransform, multi_loglikelihood=multi_loglikelihood, 
638 | 	ndim=nparams, ndata=ndata,
639 | 	superset_draw_constrained = superset_constrainer.draw_constrained, 
640 | 	individual_draw_constrained = individual_draw_constrained,
641 | 	draw_constrained = focusset_constrainer, 
642 | 	nsuperset_draws = int(os.environ.get('SUPERSET_DRAWS', '10')),
643 | 	use_graph = os.environ.get('USE_GRAPH', '1') == '1'
644 | )
645 | 
646 | superset_constrainer.sampler = sampler
647 | cc.sampler = sampler
648 | print('integrating ...')
649 | max_samples = int(os.environ.get('MAXSAMPLES', 100000))
650 | min_samples = int(os.environ.get('MINSAMPLES', 0))
651 | results = multi_nested_integrator(tolerance=0.5, multi_sampler=sampler, min_samples=min_samples, max_samples=max_samples)
652 | duration = time.time() - start_time
653 | print('writing output files ...')
654 | # store results
655 | with h5py.File(prefix + '.out_%d.hdf5' % ndata, 'w') as f:
656 | 	f.create_dataset('logZ', data=results['logZ'], compression='gzip', shuffle=True)
657 | 	f.create_dataset('logZerr', data=results['logZerr'], compression='gzip', shuffle=True)
658 | 	u, x, L, w, mask = list(zip(*results['weights']))
659 | 	f.create_dataset('u', data=u, compression='gzip', shuffle=True)
660 | 	f.create_dataset('x', data=x, compression='gzip', shuffle=True)
661 | 	f.create_dataset('L', data=L, compression='gzip', shuffle=True)
662 | 	f.create_dataset('w', data=w, compression='gzip', shuffle=True)
663 | 	f.create_dataset('mask', data=mask, compression='gzip', shuffle=True)
664 | 	f.create_dataset('ndraws', data=sampler.ndraws)
665 | 	f.create_dataset('fiberids', data=goodids[:ndata], compression='gzip', shuffle=True)
666 | 	f.create_dataset('duration', data=duration)
667 | 	f.create_dataset('ndata', data=ndata)
668 | 	
669 | 	print('logZ = %.1f +- %.1f' % (results['logZ'][0], results['logZerr'][0]))
670 | 	print('ndraws:', sampler.ndraws, 'niter:', len(w))
671 | 
672 | print('writing statistic ...')
673 | json.dump(dict(ndraws=sampler.ndraws, duration=duration, ndata=ndata, niter=len(w)), 
674 | 	open(prefix + '.out_%d.stats.json' % ndata, 'w'), indent=4)
675 | print('done.')
676 | 
677 | 
678 | 


--------------------------------------------------------------------------------