├── .Rhistory
├── dpp
    ├── samplers
    │   ├── __init__.py
    │   ├── dpp.pyc
    │   ├── __init__.pyc
    │   ├── decompose_kernel.m
    │   ├── elem_sympoly.m
    │   ├── genmult.m
    │   ├── sample_k.m
    │   ├── sample_dpp.m
    │   ├── sample_conditional_dpp.m
    │   ├── sample_dual_dpp.m
    │   ├── dpp.py~
    │   ├── dpp.py
    │   └── bp.m
    ├── __init__.pyc
    ├── __init__.py
    └── __init__.py~
├── demos
    ├── __init__.py~
    ├── matlab
    │   ├── plane_samples.m
    │   └── plane_samples_dual_conditional.m
    ├── dpp_samples.py~
    ├── python
    │   ├── dpp_samples.py
    │   ├── dual_dpp_samples.py
    │   └── dual_dpp_samples.py~
    └── dual_dpp_samples.py~
├── README.md
└── .gitignore


/.Rhistory:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dpp/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | import dpp
2 | 
3 | 


--------------------------------------------------------------------------------
/dpp/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/javiergonzalezh/dpp/HEAD/dpp/__init__.pyc


--------------------------------------------------------------------------------
/dpp/samplers/dpp.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/javiergonzalezh/dpp/HEAD/dpp/samplers/dpp.pyc


--------------------------------------------------------------------------------
/dpp/samplers/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/javiergonzalezh/dpp/HEAD/dpp/samplers/__init__.pyc


--------------------------------------------------------------------------------
/dpp/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings("ignore", category=DeprecationWarning)
3 | import samplers
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/dpp/samplers/decompose_kernel.m:
--------------------------------------------------------------------------------
1 | function L = decompose_kernel(M)
2 |   L.M = M;
3 |   [V,D] = eig(M);
4 |   L.V = real(V);
5 |   L.D = real(diag(D));


--------------------------------------------------------------------------------
/demos/__init__.py~:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings("ignore", category=DeprecationWarning)
3 | 
4 | import samplers
5 | import demos
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/dpp/__init__.py~:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings("ignore", category=DeprecationWarning)
3 | 
4 | import samplers
5 | import demos
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # dpp
2 | Python package to sample from determinantal point processes. Based on the Matlab code of Alex Kulesza (http://web.eecs.umich.edu/~kulesza/)
3 | 
4 | Uses matlab_wrapper to call the Matlad code of A. Kulesza, so before using the package be sure that Matlab is instaled and that the matlab path is added to your .bashrc file.
5 | 
6 | With this package you can use python to sample fom dpp, dual dpps, and conditional dpp (in their standard and dual form).
7 | 
8 | 


--------------------------------------------------------------------------------
/dpp/samplers/elem_sympoly.m:
--------------------------------------------------------------------------------
 1 | function E = elem_sympoly(lambda,k)
 2 | % given a vector of lambdas and a maximum size k, determine the value of
 3 | % the elementary symmetric polynomials:
 4 | %   E(l+1,n+1) = sum_{J \subseteq 1..n,|J| = l} prod_{i \in J} lambda(i) 
 5 |   
 6 |   N = length(lambda);
 7 |   E = zeros(k+1,N+1);
 8 |   E(1,:) = 1;
 9 |   for l = (1:k)+1
10 |     for n = (1:N)+1
11 |       E(l,n) = E(l,n-1) + lambda(n-1)*E(l-1,n-1);
12 |     end
13 |   end
14 | 


--------------------------------------------------------------------------------
/dpp/samplers/genmult.m:
--------------------------------------------------------------------------------
 1 | function X = genmult(A,B)
 2 | % generalized matrix multiply.  
 3 | % if A is N x M, and B is 1 x 1 x ... x 1 x M x K1 x K2 x ...
 4 | % then X is 1 x 1 x ... x 1 x N x K1 x K2 x ...
 5 | % where X(1,1,...,1,:,i1,i2,...) = A * B(1,1,...,1,:,i1,i2,...)
 6 |   
 7 |   % strip off leading singletons
 8 |   [B,shifts] = shiftdim(B);
 9 |   
10 |   % make B 2-d
11 |   Bsize = size(B);
12 |   B = reshape(B,Bsize(1),[]);
13 |   
14 |   % compute product
15 |   X = A*B;
16 |   
17 |   % repair original dimensions
18 |   X = reshape(X,[size(A,1) Bsize(2:end)]);  
19 |   X = shiftdim(X,-shifts);


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | # C extensions
 3 | *.so
 4 | # Packages
 5 | *.egg
 6 | *.egg-info
 7 | dist
 8 | build
 9 | eggs
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | lib
16 | lib64
17 | # Installer logs
18 | pip-log.txt
19 | # Results from optimization in demos
20 | GPyOpt-results.txt
21 | # figures in notebooks
22 | .png
23 | # Unit test / coverage reports
24 | .coverage
25 | .tox
26 | nosetests.xml
27 | # Translations
28 | *.mo
29 | # Mr Developer
30 | .mr.developer.cfg
31 | .project
32 | .pydevproject
33 | #vim
34 | *.swp
35 | #bfgs optimiser leaves this lying around
36 | iterate.dat
37 | # Nosetests #
38 | #############
39 | *.noseids
40 | # git merge files #
41 | ###################
42 | *.orig
43 | 


--------------------------------------------------------------------------------
/dpp/samplers/sample_k.m:
--------------------------------------------------------------------------------
 1 | function S = sample_k(lambda,k)
 2 | % pick k lambdas according to p(S) \propto prod(lambda \in S)
 3 |   
 4 |   % compute elementary symmetric polynomials
 5 |   E = elem_sympoly(lambda,k);
 6 | 
 7 |   % iterate
 8 |   i = length(lambda);
 9 |   remaining = k;
10 |   S = zeros(k,1);
11 |   while remaining > 0
12 | 
13 |     % compute marginal of i given that we choose remaining values from 1:i
14 |     if i == remaining
15 |       marg = 1;
16 |     else
17 |       marg = lambda(i) * E(remaining,i) / E(remaining+1,i+1);
18 |     end
19 |     
20 |     % sample marginal
21 |     if rand < marg
22 |       S(remaining) = i;
23 |       remaining = remaining - 1;            
24 |     end
25 |     i = i-1;
26 |   end
27 | 


--------------------------------------------------------------------------------
/demos/matlab/plane_samples.m:
--------------------------------------------------------------------------------
 1 | %% compare dpp/poisson samples of points in the plane
 2 | 
 3 | % config
 4 | n = 60;      % grid dimension, N = n^2
 5 | sigma = 0.1; % kernel width
 6 | 
 7 | % choose a grid of points
 8 | [x y] = meshgrid((1:n)/n);
 9 | 
10 | % gaussian kernel
11 | L = exp(- (bsxfun(@minus,x(:),x(:)').^2 + ...
12 |            bsxfun(@minus,y(:),y(:)').^2) / sigma^2);
13 | 
14 | % sample
15 | dpp_sample = sample_dpp(decompose_kernel(L));
16 | ind_sample = randsample(n*n,length(dpp_sample));
17 |   
18 | % plot
19 | subplot(1,2,1);
20 | plot(x(dpp_sample),y(dpp_sample),'b.');
21 | axis([0 1.02 0 1.02]);
22 | axis square;
23 | set(gca,'YTick',[]);
24 | set(gca,'XTick',[]);
25 | xlabel('DPP');
26 | 
27 | subplot(1,2,2);
28 | plot(x(ind_sample),y(ind_sample),'r.');
29 | axis([0 1.02 0 1.02]);
30 | axis square;
31 | set(gca,'YTick',[]);
32 | set(gca,'XTick',[]);
33 | xlabel('Independent');
34 | 


--------------------------------------------------------------------------------
/demos/dpp_samples.py~:
--------------------------------------------------------------------------------
 1 | import GPy
 2 | import matplotlib.pyplot as plt
 3 | from GPyOpt.util.general import multigrid
 4 | from ..dpp import *
 5 | 
 6 | # Genetate grid
 7 | Ngrid = 50
 8 | bounds = [(-2,2),(-2,2)]
 9 | X = multigrid(bounds, Ngrid)  
10 | 
11 | # Define kernel and kernel matrix
12 | kernel = GPy.kern.RBF(len(bounds), variance=1, lengthscale=0.5) 
13 | L = kernel.K(X)
14 | 
15 | # Number of points of each DPP sample
16 | k = 50
17 | 
18 | # Putative inputs
19 | set = [25,900, 1655,2125]
20 | 
21 | # Samples and plot from original and conditional with standard DPPS
22 | sample          = sample_dpp(L,k)
23 | sample_condset  = sample_conditional_dpp(L,set,k)
24 | 
25 | plt.subplot(1, 2, 1)
26 | plt.plot(X[sample,0],X[sample,1],'.',)
27 | plt.title('Sample from the DPP')
28 | plt.subplot(1, 2, 2)
29 | plt.plot(X[set,0],X[set,1],'k.',markersize=20)
30 | plt.plot(X[sample_condset,0],X[sample_condset,1],'.',)
31 | plt.title('Conditional sample from the DPP')
32 | 
33 | 


--------------------------------------------------------------------------------
/demos/python/dpp_samples.py:
--------------------------------------------------------------------------------
 1 | import GPy
 2 | import matplotlib.pyplot as plt
 3 | from GPyOpt.util.general import multigrid
 4 | from dpp.samplers.dpp import *
 5 | 
 6 | # Genetate grid
 7 | Ngrid = 50
 8 | bounds = [(-2,2),(-2,2)]
 9 | X = multigrid(bounds, Ngrid)  
10 | 
11 | # Define kernel and kernel matrix
12 | kernel = GPy.kern.RBF(len(bounds), variance=1, lengthscale=0.5) 
13 | L = kernel.K(X)
14 | 
15 | # Number of points of each DPP sample
16 | k = 50
17 | 
18 | # Putative inputs
19 | set = [25,900, 1655,2125]
20 | 
21 | # Samples and plot from original and conditional with standard DPPS
22 | sample          = sample_dpp(L,k)
23 | sample_condset  = sample_conditional_dpp(L,set,k)
24 | 
25 | plt.subplot(1, 2, 1)
26 | plt.plot(X[sample,0],X[sample,1],'.',)
27 | plt.title('Sample from the DPP')
28 | plt.subplot(1, 2, 2)
29 | plt.plot(X[set,0],X[set,1],'k.',markersize=20)
30 | plt.plot(X[sample_condset,0],X[sample_condset,1],'.',)
31 | plt.title('Conditional sample from the DPP')
32 | 
33 | 


--------------------------------------------------------------------------------
/demos/python/dual_dpp_samples.py:
--------------------------------------------------------------------------------
 1 | import GPy
 2 | import matplotlib.pyplot as plt
 3 | from GPyOpt.util.general import multigrid
 4 | from dpp.samplers.dpp import *
 5 | 
 6 | # Genetate grid
 7 | Ngrid = 50
 8 | bounds = [(-2,2),(-2,2)]
 9 | X = multigrid(bounds, Ngrid)  
10 | 
11 | # Define kernel and kernel matrix
12 | kernel = GPy.kern.RBF(len(bounds), variance=1, lengthscale=.5) 
13 | L = kernel.K(X)
14 | 
15 | # Number of points of each DPP sample
16 | k = 50
17 | 
18 | # Putative inputs
19 | set = [25,900, 1655,2125]
20 | 
21 | # Samples and plot from original and conditional with dual DPPS
22 | q=200  # truncation
23 | sample = sample_dual_dpp(L,q,k)
24 | sample_condset = sample_dual_conditional_dpp(L,set,q,k)
25 | 
26 | plt.subplot(1, 2, 1)
27 | plt.plot(X[sample,0],X[sample,1],'.',)
28 | plt.title('Sample from the DPP')
29 | plt.subplot(1, 2, 2)
30 | plt.plot(X[set,0],X[set,1],'k.',markersize=20)
31 | plt.plot(X[sample_condset,0],X[sample_condset,1],'.',)
32 | plt.title('Conditional sample from the DPP')
33 | 


--------------------------------------------------------------------------------
/demos/python/dual_dpp_samples.py~:
--------------------------------------------------------------------------------
 1 | import GPy
 2 | import matplotlib.pyplot as plt
 3 | from GPyOpt.util.general import multigrid
 4 | from dpp import sample_dual_dpp, sample_dual_conditional_dpp
 5 | 
 6 | # Genetate grid
 7 | Ngrid = 50
 8 | bounds = [(-2,2),(-2,2)]
 9 | X = multigrid(bounds, Ngrid)  
10 | 
11 | # Define kernel and kernel matrix
12 | kernel = GPy.kern.RBF(len(bounds), variance=1, lengthscale=.5) 
13 | L = kernel.K(X)
14 | 
15 | # Number of points of each DPP sample
16 | k = 50
17 | 
18 | # Putative inputs
19 | set = [25,900, 1655,2125]
20 | 
21 | # Samples and plot from original and conditional with dual DPPS
22 | q=200  # truncation
23 | sample = sample_dual_dpp(L,q,k)
24 | sample_condset = sample_dual_conditional_dpp(L,set,q,k)
25 | 
26 | plt.subplot(1, 2, 1)
27 | plt.plot(X[sample,0],X[sample,1],'.',)
28 | plt.title('Sample from the DPP')
29 | plt.subplot(1, 2, 2)
30 | plt.plot(X[set,0],X[set,1],'k.',markersize=20)
31 | plt.plot(X[sample_condset,0],X[sample_condset,1],'.',)
32 | plt.title('Conditional sample from the DPP')
33 | 


--------------------------------------------------------------------------------
/dpp/samplers/sample_dpp.m:
--------------------------------------------------------------------------------
 1 | function Y = sample_dpp(L,k)
 2 | % sample a set Y from a dpp.  L is a decomposed kernel, and k is (optionally)
 3 | % the size of the set to return.
 4 |   
 5 | if ~exist('k','var')  
 6 |   % choose eigenvectors randomly
 7 |   D = L.D ./ (1+L.D);
 8 |   v = find(rand(length(D),1) <= D);
 9 | else
10 |   % k-DPP
11 |   v = sample_k(L.D,k);
12 | end
13 | k = length(v);    
14 | V = L.V(:,v);
15 | 
16 | % iterate
17 | Y = zeros(k,1);
18 | for i = k:-1:1
19 |   
20 |   % compute probabilities for each item
21 |   P = sum(V.^2,2);
22 |   P = P / sum(P);
23 | 
24 |   % choose a new item to include
25 |   Y(i) = find(rand <= cumsum(P),1);
26 | 
27 |   % choose a vector to eliminate
28 |   j = find(V(Y(i),:),1);
29 |   Vj = V(:,j);
30 |   V = V(:,[1:j-1 j+1:end]);
31 | 
32 |   % update V
33 |   V = V - bsxfun(@times,Vj,V(Y(i),:)/Vj(Y(i)));
34 | 
35 |   % orthogonalize
36 |   for a = 1:i-1
37 |     for b = 1:a-1
38 |       V(:,a) = V(:,a) - V(:,a)'*V(:,b)*V(:,b);
39 |     end
40 |     V(:,a) = V(:,a) / norm(V(:,a));
41 |   end
42 | 
43 | end
44 | 
45 | Y = sort(Y);


--------------------------------------------------------------------------------
/demos/dual_dpp_samples.py~:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.chdir("/home/javier/Desktop/dpp")
 3 | 
 4 | import GPy
 5 | import matplotlib.pyplot as plt
 6 | from GPyOpt.util.general import multigrid
 7 | from dpp import sample_dual_dpp, sample_dual_conditional_dpp
 8 | 
 9 | # Genetate grid
10 | Ngrid = 50
11 | bounds = [(-2,2),(-2,2)]
12 | X = multigrid(bounds, Ngrid)  
13 | 
14 | # Define kernel and kernel matrix
15 | kernel = GPy.kern.RBF(len(bounds), variance=1, lengthscale=.5) 
16 | L = kernel.K(X)
17 | 
18 | # Number of points of each DPP sample
19 | k = 50
20 | 
21 | # Putative inputs
22 | set = [25,900, 1655,2125]
23 | 
24 | # Samples and plot from original and conditional with dual DPPS
25 | q=200  # truncation
26 | sample = sample_dual_dpp(L,q,k)
27 | sample_condset = sample_dual_conditional_dpp(L,set,q,k)
28 | 
29 | plt.subplot(1, 2, 1)
30 | plt.plot(X[sample,0],X[sample,1],'.',)
31 | plt.title('Sample from the DPP')
32 | plt.subplot(1, 2, 2)
33 | plt.plot(X[set,0],X[set,1],'k.',markersize=20)
34 | plt.plot(X[sample_condset,0],X[sample_condset,1],'.',)
35 | plt.title('Conditional sample from the DPP')
36 | 


--------------------------------------------------------------------------------
/dpp/samplers/sample_conditional_dpp.m:
--------------------------------------------------------------------------------
 1 | % Javier Gonzalez
 2 | % 2015
 3 | %
 4 | % Given a kernel matrix L and a list of elements set returns a sample from a
 5 | % conditional k-DPP.
 6 | %    L:     kernel matrix
 7 | %    set:   index of the conditional elements. Integer numpy array containing the locations 
 8 | %            (starting in zero) relative to the rows of L.
 9 | %    k:     size of the sample from the DPP
10 |  
11 | function Y = sample_conditional_dpp(L,set,k)
12 |     [n,~] = size(L);
13 |     n_set = length(set);
14 |     
15 |     % Calculate the kernel for the marginal    
16 |     e                  = ones(1,n);
17 |     e(set)             = 0;
18 |     Id                 = diag(e);    
19 |     L_aux              = inv(Id + L);
20 |     L_aux(set,:)       = [];
21 |     L_aux(:,set)       = [];
22 |     L_minor            = inv(L_aux); 
23 |     L_compset          = L_minor - eye(n-n_set);
24 |     
25 |     % index to keep track of the original elements
26 |     index_reduced = 1:n;
27 |     index_reduced(set) = [];
28 |     
29 |     % Compute the sample from the marginal
30 |     sample_conditional = sample_dpp(decompose_kernel(L_compset),k-n_set);
31 |      
32 |     % final sample that includes the set
33 |     Y = [set,index_reduced(sample_conditional)];
34 |     end
35 |   


--------------------------------------------------------------------------------
/dpp/samplers/sample_dual_dpp.m:
--------------------------------------------------------------------------------
 1 | function Y = sample_dual_dpp(B,C,k)
 2 | % sample from a dual DPP (non-structured)
 3 | % B is the N x d feature matrix (L would be B*B', but is too big to work with)
 4 | % C is the decomposed covariance matrix, computed using:
 5 | %   C = decompose_kernel(B'*B);
 6 | % k is (optionally) the size of the set to return.
 7 | 
 8 | if ~exist('k','var')  
 9 |   % choose eigenvectors randomly
10 |   D = C.D ./ (1+C.D);
11 |   v = find(rand(length(D),1) <= D);
12 | else
13 |   % k-DPP
14 |   v = sample_k(C.D,k);
15 | end
16 | k = length(v);
17 | V = C.V(:,v);
18 | 
19 | % rescale eigenvectors so they normalize in the projected space
20 | V = bsxfun(@times,V,1./sqrt(C.D(v)'));
21 | 
22 | % iterate
23 | Y = zeros(k,1);
24 | for i = k:-1:1
25 | 
26 |   % compute probabilities for each item
27 |   P = sum((B * V).^2,2);
28 |   P = P / sum(P);
29 | 
30 |   % choose a new item to include
31 |   Y(i) = find(rand <= cumsum(P),1);
32 |   
33 |   % choose a vector to eliminate
34 |   S = B(Y(i),:) * V;
35 |   j = find(S,1);
36 |   Vj = V(:,j);
37 |   Sj = S(j);
38 |   V = V(:,[1:j-1 j+1:end]);
39 |   S = S(:,[1:j-1 j+1:end]);
40 | 
41 |   % update V
42 |   V = V - bsxfun(@times,Vj,S/Sj);
43 | 
44 |   % orthogonalize in the projected space
45 |   for a = 1:i-1
46 |     for b = 1:a-1
47 |       V(:,a) = V(:,a) - (V(:,a)'*C.M *V(:,b))*V(:,b);
48 |     end
49 |     V(:,a) = V(:,a) / sqrt(V(:,a)'*C.M*V(:,a));
50 |   end
51 | 
52 | end
53 | 
54 | Y = sort(Y);
55 | 


--------------------------------------------------------------------------------
/demos/matlab/plane_samples_dual_conditional.m:
--------------------------------------------------------------------------------
 1 | % Javier Gonzalez
 2 | % 2015
 3 | %
 4 | % test and compare samples from a k-dpp, a conditional k-dpp, and a 
 5 | % conditional k-dpp in dual form
 6 | 
 7 | % config
 8 | n = 60;      % grid dimension, N = n^2
 9 | sigma = 0.1; % kernel width
10 | 
11 | % choose a grid of points
12 | [x y] = meshgrid((1:n)/n);
13 | 
14 | % gaussian kernel
15 | L = exp(- (bsxfun(@minus,x(:),x(:)').^2 + ...
16 |            bsxfun(@minus,y(:),y(:)').^2) / sigma^2);
17 | 
18 | set = [10,100,500,750,1500,3500];
19 | k   = 50;                           % number of elements in the sample
20 | q   = 100;                          % effective dimensions used
21 | 
22 | % sample form k-DPP
23 | dpp_sample             = sample_dpp(decompose_kernel(L),k);
24 | 
25 | % conditional sample from DPP 
26 | t_standard = cputime;
27 | dpp_sample_conditional = sample_conditional_dpp(L,set,k);
28 | t_standard = cputime - t_standard; 
29 | 
30 | % conditional sample from DPP in dual form
31 | t_dual = cputime;
32 | dpp_dual_sample_conditional = sample_dual_conditional_dpp(L,q,set,k);
33 | t_dual = cputime - t_dual ;
34 | 
35 | 
36 | % plot
37 | subplot(1,3,1);
38 | plot(x(dpp_sample),y(dpp_sample),'b.');
39 | axis([0 1.02 0 1.02]);
40 | xlabel('DPP');
41 | 
42 | subplot(1,3,2);
43 | plot(x(set),y(set),'r*');
44 | hold on;
45 | plot(x(dpp_sample_conditional),y(dpp_sample_conditional),'r.');
46 | axis([0 1.02 0 1.02]);
47 | xlabel('Conditional DPP');
48 | 
49 | subplot(1,3,3);
50 | plot(x(set),y(set),'r*');
51 | hold on;
52 | plot(x(dpp_dual_sample_conditional),y(dpp_dual_sample_conditional),'r.');
53 | axis([0 1.02 0 1.02]);
54 | xlabel('Conditional Dual DPP');
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/dpp/samplers/dpp.py~:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.sparse.linalg.eigen.arpack import eigsh as largest_eigsh
  3 | from  numpy.linalg import inv
  4 | mtb = None
  5 | 
  6 | def sample_dpp(L,k=None):
  7 |     '''
  8 |     Wrapper function for the sample_dpp Matlab code written by Alex Kulesza
  9 |     Given a kernel matrix L, returns a sample from a k-DPP.
 10 |     The code is hacked in a way that if a set A is provied, samples from a conditional 
 11 |     dpp given A are produced
 12 |     L:     kernel matrix
 13 |     k:     size of the sample from the DPP
 14 |     set:   index of the conditional elements. Integer numpy array containing the locations 
 15 |             (starting in zero) relative to the rows of L.
 16 |        
 17 |     '''
 18 |     # Matlab link
 19 |     global mtb    
 20 |     if mtb == None:
 21 |         import matlab_wrapper
 22 |         mtb = matlab_wrapper.MatlabSession()
 23 | 
 24 |     # load values in Matlab and get sample
 25 |     mtb.put('L',L)
 26 |     if k!=None: 
 27 |         k = np.array([[k]])  # matlab only undenstand matrices 
 28 |         mtb.put('k',k)
 29 |         mtb.eval("dpp_sample = sample_dpp(decompose_kernel(L),k)")
 30 |     else:
 31 |         mtb.eval("dpp_sample = sample_dpp(decompose_kernel(L))")
 32 |         
 33 |     #dpp_sample = mtb.getvalue('dpp_sample')
 34 |     dpp_sample = mtb.get('dpp_sample')
 35 |     return dpp_sample.astype(int)
 36 | 
 37 | 
 38 | def sample_conditional_dpp(L,set0,k=None):
 39 |     '''
 40 |     Wrapper function for the sample_dpp Matlab code written by Alex Kulesza
 41 |     Given a kernel matrix L, returns a sample from a k-DPP.
 42 |     The code is hacked in a way that if a set A is provied, samples from a conditional 
 43 |     dpp given A are produced
 44 |     L:     kernel matrix
 45 |     set:   index of the conditional elements. Integer numpy array containing the locations 
 46 |             (starting in zero) relative to the rows of L.
 47 |     k:     size of the sample from the DPP
 48 |     '''
 49 |     # Calculate the kernel for the marginal
 50 |     Id = np.array([1]*L.shape[0])
 51 |     Id[set0] = 0
 52 |     Id = np.diag(Id)    
 53 |     L_compset_full = inv(Id + L)
 54 |     L_minor = inv(np.delete(np.delete(L_compset_full,tuple(set0), axis=1),tuple(set0),axis=0))
 55 |     L_compset = L_minor - np.diag([1]*L_minor.shape[0])
 56 |     
 57 |     # Compute the sample
 58 |     sample = sample_dpp(L_compset,k)
 59 |     if k==2: sample = [sample]
 60 |     return np.concatenate((set0,sample) ,axis=0)
 61 | 
 62 | 
 63 | def sample_dual_conditional_dpp(L,set0,q,k=None):
 64 |     '''
 65 |     Wrapper function for the sample_dpp Matlab code written by Alex Kulesza
 66 |     Given a kernel matrix L, returns a sample from a dual k-DPP.
 67 |     The code is hacked in a way that if a set0 A is provied, samples from a conditional 
 68 |     dpp given A are produced
 69 |     L:     kernel matrix
 70 |     set0:   index of the conditional elements. Integer numpy array containing the locations 
 71 |            (starting in zero) relative to the rows of L.
 72 |     q:     is the number of used eigenvalues
 73 |     k:     size of the sample from the DPP
 74 |     '''
 75 |     # Calculate the kernel of the marginal
 76 |     Id = np.array([1]*L.shape[0])
 77 |     Id[set0] = 0
 78 |     Id = np.diag(Id)    
 79 |     L_compset_full = inv(Id + L)
 80 |     L_minor = inv(np.delete(np.delete(L_compset_full,tuple(set0), axis=1),tuple(set0),axis=0))
 81 |     L_compset = L_minor - np.diag([1]*L_minor.shape[0]) 
 82 |     
 83 |     # Take approximated sample
 84 |     sample = sample_dual_dpp(L_compset,q,k-1)
 85 |     if k==2: sample = [sample]
 86 |     return np.concatenate((set0,sample) ,axis=0)
 87 | 
 88 | 
 89 | 
 90 | def sample_dual_dpp(L,q,k=None):
 91 |     '''
 92 |     Wrapper function for the sample_dual_dpp Matlab code written by Alex Kulesza
 93 |     Given a kernel matrix L, returns a sample from a k-DPP.
 94 |     
 95 |     L is the kernel matrix
 96 |     q is the number of used eigenvalues
 97 |     k is the number of elements in the sample from the DPP
 98 |     '''
 99 |     # Matlab link
100 |     global mtb    
101 |     if mtb == None:
102 |         import matlab_wrapper
103 |         mtb = matlab_wrapper.MatlabSession()
104 |         
105 |     # Extract the feature matrix from the kernel
106 |     evals, evecs = largest_eigsh(L,q,which='LM')
107 |     B = np.dot(evecs,np.diag(evals))
108 |     
109 |     # load values in Matlab and get sample
110 |     mtb.put('B',B)
111 |     
112 |     if k!=None: 
113 |         k = np.array([[k]])  # matlab only undernstand matrices 
114 |         mtb.put('k',k)
115 |         mtb.eval("dpp_sample = sample_dual_dpp(B,decompose_kernel(B'*B),k)")
116 |     else:
117 |         mtb.eval("dpp_sample = sample_dual_dpp(B,decompose_kernel(B'*B))")
118 |         
119 |     dpp_sample = mtb.get('dpp_sample')
120 |     return dpp_sample.astype(int)
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/dpp/samplers/dpp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.sparse.linalg.eigen.arpack import eigsh as largest_eigsh
  3 | from  numpy.linalg import inv
  4 | mtb = None
  5 | 
  6 | def sample_dpp(L,k=None):
  7 |     '''
  8 |     Wrapper function for the sample_dpp Matlab code written by Alex Kulesza
  9 |     Given a kernel matrix L, returns a sample from a k-DPP.
 10 |     The code is hacked in a way that if a set A is provied, samples from a conditional 
 11 |     dpp given A are produced
 12 |     L:     kernel matrix
 13 |     k:     size of the sample from the DPP
 14 |     set:   index of the conditional elements. Integer numpy array containing the locations 
 15 |             (starting in zero) relative to the rows of L.
 16 |        
 17 |     '''
 18 |     # Matlab link
 19 |     global mtb    
 20 |     if mtb == None:
 21 |         import matlab_wrapper
 22 |         mtb = matlab_wrapper.MatlabSession()
 23 | 
 24 |     # load values in Matlab and get sample
 25 |     mtb.put('L',L)
 26 |     if k!=None: 
 27 |         k = np.array([[k]])  # matlab only undenstand matrices 
 28 |         mtb.put('k',k)
 29 |         mtb.eval("dpp_sample = sample_dpp(decompose_kernel(L),k)")
 30 |     else:
 31 |         mtb.eval("dpp_sample = sample_dpp(decompose_kernel(L))")
 32 |         
 33 |     #dpp_sample = mtb.getvalue('dpp_sample')
 34 |     dpp_sample = mtb.get('dpp_sample')
 35 |     return dpp_sample.astype(int)-1  # index start in zero in python
 36 | 
 37 | 
 38 | def sample_conditional_dpp(L,set0,k=None):
 39 |     '''
 40 |     Wrapper function for the sample_dpp Matlab code written by Alex Kulesza
 41 |     Given a kernel matrix L, returns a sample from a k-DPP.
 42 |     The code is hacked in a way that if a set A is provied, samples from a conditional 
 43 |     dpp given A are produced
 44 |     L:     kernel matrix
 45 |     set:   index of the conditional elements. Integer numpy array containing the locations 
 46 |             (starting in zero) relative to the rows of L.
 47 |     k:     size of the sample from the DPP
 48 |     '''
 49 |     set0 = np.array(set0)+1 # matlab starts counting in one
 50 |     # Calculate the kernel for the marginal
 51 |     Id = np.array([1]*L.shape[0])
 52 |     Id[set0] = 0
 53 |     Id = np.diag(Id)    
 54 |     L_compset_full = inv(Id + L)
 55 |     L_minor = inv(np.delete(np.delete(L_compset_full,tuple(set0), axis=1),tuple(set0),axis=0))
 56 |     L_compset = L_minor - np.diag([1]*L_minor.shape[0])
 57 |     
 58 |     # Compute the sample
 59 |     sample = sample_dpp(L_compset,k)
 60 |     if k==2: sample = [sample]
 61 |     return np.concatenate((set0,sample) ,axis=0)-1  # back to python indexing
 62 | 
 63 | 
 64 | def sample_dual_conditional_dpp(L,set0,q,k=None):
 65 |     '''
 66 |     Wrapper function for the sample_dpp Matlab code written by Alex Kulesza
 67 |     Given a kernel matrix L, returns a sample from a dual k-DPP.
 68 |     The code is hacked in a way that if a set0 A is provied, samples from a conditional 
 69 |     dpp given A are produced
 70 |     L:     kernel matrix
 71 |     set0:   index of the conditional elements. Integer numpy array containing the locations 
 72 |            (starting in zero) relative to the rows of L.
 73 |     q:     is the number of used eigenvalues
 74 |     k:     size of the sample from the DPP
 75 |     '''
 76 |     # Calculate the kernel of the marginal
 77 |     Id = np.array([1]*L.shape[0])
 78 |     Id[set0] = 0
 79 |     Id = np.diag(Id)    
 80 |     L_compset_full = inv(Id + L)
 81 |     L_minor = inv(np.delete(np.delete(L_compset_full,tuple(set0), axis=1),tuple(set0),axis=0))
 82 |     L_compset = L_minor - np.diag([1]*L_minor.shape[0]) 
 83 |     
 84 |     # Take approximated sample
 85 |     sample = sample_dual_dpp(L_compset,q,k-1)
 86 |     if k==2: sample = [sample]
 87 |     return np.concatenate((set0,sample) ,axis=0)
 88 | 
 89 | 
 90 | 
 91 | def sample_dual_dpp(L,q,k=None):
 92 |     '''
 93 |     Wrapper function for the sample_dual_dpp Matlab code written by Alex Kulesza
 94 |     Given a kernel matrix L, returns a sample from a k-DPP.
 95 |     
 96 |     L is the kernel matrix
 97 |     q is the number of used eigenvalues
 98 |     k is the number of elements in the sample from the DPP
 99 |     '''
100 |     # Matlab link
101 |     global mtb    
102 |     if mtb == None:
103 |         import matlab_wrapper
104 |         mtb = matlab_wrapper.MatlabSession()
105 |         
106 |     # Extract the feature matrix from the kernel
107 |     evals, evecs = largest_eigsh(L,q,which='LM')
108 |     B = np.dot(evecs,np.diag(evals))
109 |     
110 |     # load values in Matlab and get sample
111 |     mtb.put('B',B)
112 |     
113 |     if k!=None: 
114 |         k = np.array([[k]])  # matlab only undernstand matrices 
115 |         mtb.put('k',k)
116 |         mtb.eval("dpp_sample = sample_dual_dpp(B,decompose_kernel(B'*B),k)")
117 |     else:
118 |         mtb.eval("dpp_sample = sample_dual_dpp(B,decompose_kernel(B'*B))")
119 |         
120 |     dpp_sample = mtb.get('dpp_sample')
121 |     return dpp_sample.astype(int)
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/dpp/samplers/bp.m:
--------------------------------------------------------------------------------
  1 | function result = bp(M,mode,V,assign)
  2 | % run BP on a sequence model
  3 | % 
  4 | % - M is a sequence model with fields:
  5 | %     T = length of sequence
  6 | %     N = number of labels (all nodes assumed to take same labels)
  7 | %     A = N x N sparse edge potential matrix; A(i,j) is potential from label i to j
  8 | %     Q = 1 x N node potential vector (applies to all except (possibly) first node)
  9 | %     Q1 = 1 x N initial node quality vector (if omitted, uses Q)
 10 | %     G = N x D similarity features (only needed for 2nd-order modes)
 11 | % 
 12 | % - mode specifies the semiring/result, and can be:
 13 | %
 14 | %   'partition' to compute the partition function, result is scalar.
 15 | %
 16 | %   'marginals' to compute marginals, result is T x N.
 17 | % 
 18 | %   'sample' to generate a sample, result is T x 1.
 19 | %
 20 | %   'marginals2' to compute second-order marginals, result is T x N:
 21 | %      sum_{y ~ y_i} p(y) sum_{j=1}^r (V(:,j) . G(y))^2
 22 | %
 23 | %   'sample2' to generate a second-order sample, result is T x 1:
 24 | %      P(y) ~ p(y) sum_{j=1}^r (V(j,:) . G(y))^2
 25 | %
 26 | %   'covariance' to compute similarity feature covariance, result is D x D
 27 | %
 28 | % - V is a D x r set of r weight vectors on similarity features, needed 
 29 | %   only for modes 'marginals2' and 'sample2'.
 30 | %
 31 | % - assign is an optional T x 1 partial assignment vector.  nonzero values 
 32 | %   are visible labels, zeros are hidden labels.
 33 |   
 34 |   if ~exist('assign','var') || isempty(assign)
 35 |     assign = zeros(M.T,1);
 36 |   end
 37 |   assert(length(assign) == M.T);
 38 |   
 39 |   if ~isfield(M,'Q1')
 40 |     M.Q1 = M.Q;
 41 |   end
 42 | 
 43 | 
 44 |   %% forward pass
 45 |   
 46 |   % init
 47 |   switch mode
 48 | 
 49 |    case {'partition','marginals','sample'}
 50 |     Fq = zeros(M.T,M.N);
 51 |         
 52 |    case {'marginals2','sample2'}
 53 |     r = size(V,2);    
 54 |     Fq = zeros(M.T,M.N);
 55 |     Ff = zeros(M.T,M.N,r);
 56 |     Fff = zeros(M.T,M.N,r);
 57 | 
 58 |     % pre-compute, f = 1 x N x r
 59 |     f = shiftdim(M.G*V,-1);
 60 |     
 61 |    case 'covariance'    
 62 |     % to save memory, keep only the most recent message
 63 |     Fq = zeros(M.N,1);
 64 |     Fg = zeros(M.N,size(M.G,2));
 65 |     Fc = zeros(M.N,size(M.G,2),size(M.G,2));
 66 |     
 67 |     % pre-compute, gg = N x D x D
 68 |     gg = bsxfun(@times,M.G,permute(M.G,[1,3,2]));
 69 | 
 70 |   end
 71 |   
 72 |   % first messages
 73 |   switch mode
 74 | 
 75 |    case {'partition','marginals','sample'}
 76 |     if assign(1)
 77 |       Fq(1,assign(1)) = 1;
 78 |     else
 79 |       Fq(1,:) = M.Q1;
 80 |     end
 81 |     
 82 |    case {'marginals2','sample2'}
 83 |     if assign(1)
 84 |       Fq(1,assign(1)) = M.Q1(assign(1));
 85 |     else
 86 |       Fq(1,:) = M.Q1;
 87 |     end
 88 |     Ff(1,:,:) = bsxfun(@times,Fq(1,:),f);
 89 |     Fff(1,:,:) = bsxfun(@times,Fq(1,:),f.^2);
 90 | 
 91 |    case 'covariance'
 92 |     if assign(1)
 93 |       Fq(assign(1)) = 1;
 94 |     else
 95 |       Fq = M.Q1;
 96 |     end
 97 |     Fg = bsxfun(@times,Fq(1,:)',M.G);
 98 |     Fc = bsxfun(@times,Fq(1,:)',gg);
 99 | 
100 |   end
101 |   
102 |   % go
103 |   for t = 2:M.T
104 | 
105 |     if assign(t)
106 |       notallowed = setdiff(1:M.N,assign(t));
107 |     else
108 |       notallowed = [];
109 |     end
110 |     
111 |     switch mode
112 | 
113 |      case {'partition','marginals','sample'}
114 |       Fq(t,:) = M.Q .* (Fq(t-1,:) * M.A);
115 |       Fq(t,notallowed) = 0;
116 |             
117 |      case {'marginals2','sample2'}
118 |       AFf = genmult(M.A',Ff(t-1,:,:));                  
119 |       AFff = genmult(M.A',Fff(t-1,:,:));
120 | 
121 |       Fq(t,:) = M.Q .* (Fq(t-1,:) * M.A);
122 |       Ff(t,:,:) = bsxfun(@times,Fq(t,:),f) ...
123 |           + bsxfun(@times,M.Q,AFf);                   
124 |       Fff(t,:,:) = bsxfun(@times,Fq(t,:),f.^2) ...
125 |           + bsxfun(@times, M.Q, 2 * f .* AFf + AFff);
126 | 
127 |       Fq(t,notallowed) = 0;
128 |       Ff(t,notallowed,:) = 0;
129 |       Fff(t,notallowed,:) = 0;
130 | 
131 |      case 'covariance'
132 |       AFg = M.A' * Fg;
133 |       Across = bsxfun(@times,AFg,permute(M.G,[1 3 2]));
134 | 
135 |       Fq = M.Q .* (Fq * M.A);
136 |       Fg = bsxfun(@times,Fq',M.G) + bsxfun(@times,M.Q',AFg);
137 |       Fc = bsxfun(@times,Fq',gg) ...
138 |           + bsxfun(@times,M.Q', ...
139 |                    Across + permute(Across,[1 3 2]) ...
140 |                    + genmult(M.A',Fc));
141 |       
142 |       Fq(notallowed) = 0;
143 |       Fg(notallowed,:) = 0;
144 |       Fc(notallowed,:,:) = 0;
145 |     
146 |     end
147 |   end
148 | 
149 |   %% backward pass
150 |   
151 |   % init
152 |   switch mode
153 | 
154 |    case 'marginals'
155 |     Bq = zeros(M.T,M.N);
156 |    
157 |    case 'sample'
158 |     Bq = zeros(M.T,M.N);
159 |     Y = zeros(M.T,1);
160 |     
161 |    case 'marginals2'
162 |     Bq = zeros(M.T,M.N);
163 |     Bf = zeros(M.T,M.N,r);
164 |     Bff = zeros(M.T,M.N,r);
165 | 
166 |    case 'sample2'
167 |     Bq = zeros(M.T,M.N);
168 |     Bf = zeros(M.T,M.N,r);
169 |     Bff = zeros(M.T,M.N,r);
170 |     Y = zeros(M.T,1);
171 |     
172 |   end
173 |   
174 |   % first messages
175 |   switch mode
176 | 
177 |    case 'marginals'
178 |     if assign(M.T)
179 |       Bq(M.T,assign(M.T)) = 1;
180 |     else
181 |       Bq(M.T,:) = ones(1,M.N);
182 |     end
183 |    
184 |    case 'sample'
185 |     if assign(M.T)
186 |       Y(M.T) = assign(M.T);
187 |     else
188 |       % sample node
189 |       dist = Fq(M.T,:);
190 |       Y(M.T) = find(rand <= cumsum(dist) / sum(dist),1);      
191 |     end
192 |     Bq(M.T,Y(M.T)) = 1;
193 |         
194 |    case 'marginals2'
195 |     if assign(M.T)
196 |       Bq(M.T,assign(M.T)) = 1;
197 |     else
198 |       Bq(M.T,:) = ones(1,M.N);
199 |     end
200 |     Bf(M.T,:,:) = zeros(1,M.N,r);
201 |     Bff(M.T,:,:) = zeros(1,M.N,r);
202 | 
203 |    case 'sample2'
204 |     if assign(M.T)
205 |       Y(M.T) = assign(M.T);
206 |     else
207 |       % sample node
208 |       dist = sum(Fff(M.T,:,:),3);
209 |       Y(M.T) = find(rand <= cumsum(dist) / sum(dist),1);
210 |     end
211 |     Bq(M.T,Y(M.T)) = 1;
212 |     Bf(M.T,:,:) = zeros(1,M.N,r);
213 |     Bff(M.T,:,:) = zeros(1,M.N,r);    
214 |     
215 |   end
216 |   
217 |   % go
218 |   for t = M.T-1:-1:1
219 |     if assign(t)
220 |       notallowed = setdiff(1:M.N,assign(t));
221 |     else
222 |       notallowed = [];
223 |     end
224 |     
225 |     switch mode
226 | 
227 |      case 'marginals'
228 |       Bq(t,:) = M.A * (M.Q .* Bq(t+1,:))';
229 |       Bq(t,notallowed) = 0;
230 |       
231 |      case 'sample'
232 |       Bq(t,:) = M.A * (M.Q .* Bq(t+1,:))';
233 |       
234 |       if assign(t)
235 |         Y(t) = assign(t);
236 |       else
237 |         % sample node
238 |         dist = Fq(t,:) .* Bq(t,:);
239 |         Y(t) = find(rand <= cumsum(dist) / sum(dist),1);
240 |       end
241 |       notallowed = setdiff(1:M.N,Y(t));
242 | 
243 |       Bq(t,notallowed) = 0;
244 |      
245 |      case 'marginals2'
246 |       Bq(t,:) = M.A * (M.Q .* Bq(t+1,:))';   
247 |       Bf(t,:,:) = genmult(M.A,bsxfun(@times,M.Q .* Bq(t+1,:),f) ...
248 |                           + bsxfun(@times,M.Q,Bf(t+1,:,:)));
249 |       Bff(t,:,:) = genmult(M.A,bsxfun(@times,M.Q .* Bq(t+1,:),f.^2) ...
250 |                            + bsxfun(@times,M.Q, ...
251 |                                     Bff(t+1,:,:) + 2*f.*Bf(t+1,:,:)));
252 | 
253 |       Bq(t,notallowed) = 0;
254 |       Bf(t,notallowed,:) = 0;
255 |       Bff(t,notallowed,:) = 0;
256 | 
257 |      case 'sample2'
258 |       Bq(t,:) = M.A * (M.Q .* Bq(t+1,:))';
259 |       Bf(t,:,:) = genmult(M.A,bsxfun(@times,M.Q .* Bq(t+1,:),f) ...
260 |                           + bsxfun(@times,M.Q,Bf(t+1,:,:)));
261 |       Bff(t,:,:) = genmult(M.A,bsxfun(@times,M.Q .* Bq(t+1,:),f.^2) ...
262 |                            + bsxfun(@times,M.Q, ...
263 |                                     Bff(t+1,:,:) + 2*f.*Bf(t+1,:,:)));
264 | 
265 |       if assign(t)
266 |         Y(t) = assign(t);
267 |       else
268 |         % sample node
269 |         dist = sum(bsxfun(@times,Fq(t,:),Bff(t,:,:)) + ...
270 |                    bsxfun(@times,Bq(t,:),Fff(t,:,:)) + ...
271 |                    2*Bf(t,:,:).*Ff(t,:,:),3);
272 |         Y(t) = find(rand <= cumsum(dist) / sum(dist),1);
273 |       end
274 |       notallowed = setdiff(1:M.N,Y(t));
275 |       
276 |       Bq(t,notallowed) = 0;
277 |       Bf(t,notallowed,:) = 0;
278 |       Bff(t,notallowed,:) = 0;
279 | 
280 |     end
281 |   end
282 |   
283 |   % set result
284 |   switch  mode
285 |    case 'partition'
286 |     result = sum(Fq(M.T,:));
287 |     
288 |    case 'marginals'
289 |     result = Fq .* Bq;
290 | 
291 |    case 'sample'
292 |     result = Y;
293 | 
294 |    case 'marginals2'
295 |     result = sum(bsxfun(@times,Fq,Bff) + bsxfun(@times,Bq,Fff) + 2*Bf.*Ff,3);
296 | 
297 |    case 'sample2'
298 |     result = Y;
299 | 
300 |    case 'covariance'
301 |     result = shiftdim(sum(Fc(:,:,:)));
302 |     
303 |   end
304 |   
305 | 


--------------------------------------------------------------------------------