├── README.md
├── matlab
    └── w2.c
└── python
    ├── example.ipynb
    ├── example.py
    ├── pyproject.toml
    ├── setup.py
    └── src
        └── main.cpp


/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # The back-and-forth method in optimal transport
 4 | 
 5 | This repository contains the source code used in the paper [A fast approach to optimal transport: The back-and-forth method](https://arxiv.org/pdf/1905.12154.pdf) [1]. The original code was written in C and we provide here a Python and a MATLAB wrapper to the C code.
 6 | 
 7 | 
 8 | 
 9 | # Documentation
10 | Available here: <https://back-and-forth.netlify.app>.
11 | 
12 | 
13 | # Python
14 | 
15 | ## Installation
16 | 
17 | The simplest way to use the Python code is to [run this notebook on Google Colab](https://colab.research.google.com/drive/1Uml2n4MIVDZnviEHEMFrJIdMwYDOPHax?usp=sharing). 
18 | 
19 | The notebook is also available here as `example.ipynb`.
20 | 
21 | Alternatively, to install the Python bindings on your machine, first clone the the GitHub repository and then install the Python bindings by running
22 | ```
23 | pip install ./bfm/python
24 | ```
25 | 
26 | ## Usage
27 | See the Jupyter notebook `example.ipynb` or directly run `example.py`.
28 | 
29 | 
30 | 
31 | 
32 | # MATLAB
33 | 
34 | ## Installation
35 | 
36 | Requirements: FFTW ([download here](http://www.fftw.org/)), MATLAB.
37 | 
38 | Download the C MEX file `w2.c` [here](https://raw.githubusercontent.com/Math-Jacobs/bfm/main/matlab/w2.c) or clone the GitHub repository and navigate to the `matlab/` folder.
39 | 
40 | Compilation: in a MATLAB session run
41 | ```matlab
42 | mex -O CFLAGS="\$CFLAGS -std=c99" -lfftw3 -lm w2.c 
43 | ```
44 | This will produce a MEX function `w2` that you can use in MATLAB. You may need to use flags `-I` and `-L` to link to the FFTW3 library, e.g. `mex -O CFLAGS="\$CFLAGS -std=c++11" w2.c -lfftw3 -I/usr/local/include`. See [this page](https://www.mathworks.com/help/matlab/matlab_external/build-an-executable-mex-file.html) for more information on how to compile MEX files. 
45 | 
46 | 
47 | 
48 | ## Usage
49 | 
50 | In a MATLAB session, run the command
51 | ```matlab
52 | [phi, psi] = w2(mu, nu, numIters, sigma);
53 | ```
54 | 
55 | Input:
56 | 
57 | * `mu` and `nu` are two arrays of nonnegative values which sum up to the same value.
58 | * `numIters` is the total number of iterations.
59 | * `sigma` is the initial step size of the gradient ascent iterations.
60 | 
61 | Output:
62 | 
63 | * `phi` and `psi` are arrays corresponding to the Kantorovich potentials. 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | # References
70 | 
71 | 
72 | [1] Matt Jacobs and Flavien Léger. [A fast approach to optimal transport: The back-and-forth method](https://arxiv.org/pdf/1905.12154.pdf). *Numerische Mathematik* (2020): 1-32.
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/matlab/w2.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <math.h>
  3 | #include <stdio.h>
  4 | #include <string.h>
  5 | #include <time.h>
  6 | #include <fftw3.h>
  7 | #include "mex.h"
  8 | 
  9 | 
 10 | 
 11 | 
 12 | 
 13 | double compute_l2_ot(double *mu, double *nu, double *phi, double *dual, double totalMass, double sigma, int maxIters, int n1, int n2);
 14 | 
 15 | 
 16 | 
 17 | void mexFunction( int nlhs, mxArray *plhs[],
 18 |                  int nrhs, const mxArray *prhs[]){
 19 |     
 20 |     
 21 |     double *mu=mxGetPr(prhs[0]);
 22 |     double *nu=mxGetPr(prhs[1]);
 23 |     int maxIters=(int) mxGetScalar(prhs[2]);
 24 |     double sigma =(double) mxGetScalar(prhs[3]);
 25 |     
 26 | 	int n1=mxGetM(prhs[0]);
 27 | 	int n2=mxGetN(prhs[0]);
 28 |     
 29 |     int pcount=n1*n2;
 30 |     
 31 |     plhs[0] = mxCreateDoubleMatrix(n1,n2,mxREAL);
 32 |     plhs[1] = mxCreateDoubleMatrix(n1,n2,mxREAL);
 33 |     
 34 |     double *phi=mxGetPr(plhs[0]);
 35 |     double *psi=mxGetPr(plhs[1]);
 36 |     
 37 |         
 38 |     double sum=0;
 39 | 
 40 |     for(int i=0;i<pcount;i++){
 41 | 		if (mu[i]<0){
 42 | 			mexErrMsgTxt("Initial density contains negative values");
 43 | 		}
 44 | 		if (nu[i]<0){
 45 | 			mexErrMsgTxt("Final density contains negative values");
 46 | 		}
 47 |         
 48 | 		sum+=mu[i];
 49 |     }
 50 |     
 51 |        
 52 |     
 53 |     double totalMass=sum/(n1*n2*1.0);
 54 | 	   
 55 |    
 56 |     double value=compute_l2_ot(mu, nu, phi, psi, totalMass, sigma, maxIters, n1, n2);
 57 |     
 58 |     
 59 | 
 60 |     for(int i=0;i<n2;i++){
 61 |         for(int j=0;j<n1;j++){
 62 |             double x=(j+.5)/(n1*1.0);
 63 |             double y=(i+.5)/(n2*1.0);
 64 |             
 65 |             phi[i*n1+j]=.5*(x*x+y*y)-phi[i*n1+j];
 66 |             psi[i*n1+j]=.5*(x*x+y*y)-psi[i*n1+j];            
 67 |         }
 68 |     }
 69 |     
 70 | }
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | typedef struct{
 78 |     fftw_plan dctIn;
 79 |     fftw_plan dctOut;
 80 |     double *kernel;
 81 |     double *workspace;
 82 | }poisson_solver;
 83 | 
 84 | 
 85 | double *create_negative_laplace_kernel(int n1, int n2){
 86 |     double *kernel=calloc(n1*n2,sizeof(double));
 87 |     for(int i=0;i<n2;i++){
 88 |         for(int j=0;j<n1;j++){
 89 |             double x=M_PI*j/(n1*1.0);
 90 |             double y=M_PI*i/(n2*1.0);
 91 |             
 92 |             double negativeLaplacian=2*n1*n1*(1-cos(x))+2*n2*n2*(1-cos(y));
 93 |             
 94 |             kernel[i*n1+j]=negativeLaplacian;
 95 |                 
 96 |             
 97 |             
 98 |         }
 99 |     }
100 |     return kernel;
101 | }
102 | 
103 | 
104 | poisson_solver create_poisson_solver_workspace(int n1, int n2){
105 |     clock_t b,e;
106 |     b=clock();
107 |     poisson_solver fftps;
108 |     fftps.workspace=calloc(n1*n2,sizeof(double));
109 |     fftps.kernel=create_negative_laplace_kernel(n1,n2);
110 |     
111 |     fftps.dctIn=fftw_plan_r2r_2d(n2, n1, fftps.workspace, fftps.workspace,
112 |                                  FFTW_REDFT10, FFTW_REDFT10,
113 |                                  FFTW_MEASURE);
114 |     fftps.dctOut=fftw_plan_r2r_2d(n2, n1, fftps.workspace, fftps.workspace,
115 |                                   FFTW_REDFT01, FFTW_REDFT01,
116 |                                   FFTW_MEASURE);
117 |     
118 |     e=clock();
119 |     
120 |     mexPrintf("FFT setup time: %.2fs\n", (e-b)/(CLOCKS_PER_SEC*1.0));
121 |     mexEvalString("pause(.001);");
122 | 
123 |     return fftps;
124 | }
125 | 
126 | 
127 | 
128 | void destroy_poisson_solver(poisson_solver fftps){
129 |     free(fftps.kernel);
130 |     free(fftps.workspace);
131 |     fftw_destroy_plan(fftps.dctIn);
132 |     fftw_destroy_plan(fftps.dctOut);
133 | }
134 | 
135 | 
136 | 
137 | 
138 | typedef struct{
139 |     int *indices;
140 |     int hullCount;
141 |     
142 | }convex_hull;
143 | 
144 | 
145 | int sgn(double x){
146 |     
147 |     int truth=(x>0)-(x<0);
148 |     return truth;
149 |     
150 | }
151 | 
152 | 
153 | void init_hull(convex_hull *hull, int n){
154 |     hull->indices=calloc(n,sizeof(double));
155 |     hull->hullCount=0;
156 |     
157 | }
158 | 
159 | void destroy_hull(convex_hull *hull){
160 |     free(hull->indices);
161 | }
162 | 
163 | void transpose_doubles(double *transpose, double *data, int n1, int n2){
164 |     
165 |     for(int i=0;i<n2;i++){
166 |         for(int j=0;j<n1;j++){
167 |          
168 |             transpose[j*n2+i]=data[i*n1+j];
169 |         }
170 |     }
171 | }
172 | 
173 | 
174 | 
175 | 
176 | double interpolate_function(double *function, double x, double y, int n1, int n2){
177 |     
178 |     int xIndex=fmin(fmax(x*n1-.5 ,0),n1-1);
179 |     int yIndex=fmin(fmax(y*n2-.5 ,0),n2-1);
180 |     
181 |     double xfrac=x*n1-xIndex-.5;
182 |     double yfrac=y*n2-yIndex-.5;
183 |     
184 |     int xOther=xIndex+sgn(xfrac);
185 |     int yOther=yIndex+sgn(yfrac);
186 |     
187 |     xOther=fmax(fmin(xOther, n1-1),0);
188 |     yOther=fmax(fmin(yOther, n2-1),0);
189 |     
190 |     double v1=(1-fabs(xfrac))*(1-fabs(yfrac))*function[yIndex*n1+xIndex];
191 |     double v2=fabs(xfrac)*(1-fabs(yfrac))*function[yIndex*n1+xOther];
192 |     double v3=(1-fabs(xfrac))*fabs(yfrac)*function[yOther*n1+xIndex];
193 |     double v4=fabs(xfrac)*fabs(yfrac)*function[yOther*n1+xOther];
194 |     
195 |     double v=v1+v2+v3+v4;
196 |     
197 |     return v;
198 |     
199 | }
200 | 
201 | 
202 | void add_point(double *u, convex_hull *hull, int i){
203 |     
204 |     
205 |     if(hull->hullCount<2){
206 |         hull->indices[1]=i;
207 |         hull->hullCount++;
208 |     }else{
209 |         int hc=hull->hullCount;
210 |         int ic1=hull->indices[hc-1];
211 |         int ic2=hull->indices[hc-2];
212 |         
213 |         double oldSlope=(u[ic1]-u[ic2])/(ic1-ic2);
214 |         double slope=(u[i]-u[ic1])/(i-ic1);
215 |         
216 |         if(slope>=oldSlope){
217 |             int hc=hull->hullCount;
218 |             hull->indices[hc]=i;
219 |             hull->hullCount++;
220 |         }else{
221 |             hull->hullCount--;
222 |             add_point(u, hull, i);
223 |         }
224 |     }
225 | }
226 | 
227 | 
228 | void get_convex_hull(double *u, convex_hull *hull, int n){
229 |     
230 |     hull->indices[0]=0;
231 |     hull->indices[1]=1;
232 |     hull->hullCount=2;
233 |     
234 |     for(int i=2;i<n;i++){
235 |         
236 |         add_point(u, hull, i);
237 |         
238 |     }
239 |     
240 | }
241 | 
242 | 
243 | void compute_dual_indices(int *dualIndicies, double *u, convex_hull *hull, int n){
244 |     
245 |     int counter=1;
246 |     int hc=hull->hullCount;
247 |     
248 |     for(int i=0;i<n;i++){
249 |        
250 |         double s=(i+.5)/(n*1.0);
251 |         int ic1=hull->indices[counter];
252 |         int ic2=hull->indices[counter-1];
253 |         
254 |         double slope=n*(u[ic1]-u[ic2])/(ic1-ic2);
255 |         while(s>slope&&counter<hc-1){
256 |             counter++;
257 |             ic1=hull->indices[counter];
258 |             ic2=hull->indices[counter-1];
259 |             slope=n*(u[ic1]-u[ic2])/(ic1-ic2);
260 |         }
261 |         dualIndicies[i]=hull->indices[counter-1];
262 |         
263 |     }
264 | }
265 | 
266 | 
267 | void compute_dual(double *dual, double *u, int *dualIndicies, convex_hull *hull, int n){
268 |     
269 |     get_convex_hull(u, hull, n);
270 |    
271 |     
272 |     compute_dual_indices(dualIndicies, u, hull, n);
273 |     
274 |     for(int i=0;i<n;i++){
275 |         double s=(i+.5)/(n*1.0);
276 |         int index=dualIndicies[i];
277 |         double x=(index+.5)/(n*1.0);
278 |         double v1=s*x-u[dualIndicies[i]];
279 |         double v2=s*(n-.5)/(n*1.0)-u[n-1];
280 |         if(v1>v2){
281 |             dual[i]=v1;
282 |         }else{
283 |             dualIndicies[i]=n-1;
284 |             dual[i]=v2;
285 |         }
286 |         
287 |     }
288 |     
289 | }
290 | 
291 | 
292 | 
293 | 
294 | void compute_2d_dual(double *dual, double *u, convex_hull *hull, int n1, int n2){
295 |     
296 |     int pcount=n1*n2;
297 |     
298 |     int n=fmax(n1,n2);
299 |     
300 |     int *argmin=calloc(n,sizeof(int));
301 |     
302 |     double *temp=calloc(pcount,sizeof(double));
303 |     
304 |     memcpy(temp, u, pcount*sizeof(double));
305 |     
306 |     
307 |     for(int i=0;i<n2;i++){
308 |         compute_dual(&dual[i*n1], &temp[i*n1], argmin, hull, n1);
309 |         
310 |     }
311 |     transpose_doubles(temp, dual, n1, n2);
312 |     for(int i=0;i<n1*n2;i++){
313 |         dual[i]=-temp[i];
314 |     }
315 |     for(int j=0;j<n1;j++){
316 |         compute_dual(&temp[j*n2], &dual[j*n2], argmin, hull, n2);
317 |         
318 |     }
319 |     transpose_doubles(dual, temp, n2, n1);
320 |     
321 |     free(temp);
322 |     free(argmin);
323 |     
324 | }
325 | 
326 | 
327 | 
328 | void convexify(double *phi, double *dual, convex_hull *hull, int n1, int n2){
329 |     
330 |     compute_2d_dual(dual, phi, hull, n1, n2);
331 |     
332 |     compute_2d_dual(phi, dual, hull, n1, n2);
333 |     
334 | }
335 | 
336 | 
337 | 
338 | 
339 | 
340 | void calc_pushforward_map(double *xMap, double *yMap, double *dual, int n1, int n2){
341 |     
342 |     
343 |     double xStep=1.0/n1;
344 |     double yStep=1.0/n2;
345 |     
346 |     
347 |     for(int i=0;i<n2+1;i++){
348 |         for(int j=0;j<n1+1;j++){
349 |             double x=j/(n1*1.0);
350 |             double y=i/(n2*1.0);
351 |             
352 |             double dualxp=interpolate_function(dual, x+xStep, y, n1, n2);
353 |             double dualxm=interpolate_function(dual, x-xStep, y, n1, n2);
354 |             
355 |             double dualyp=interpolate_function(dual, x, y+yStep, n1, n2);
356 |             double dualym=interpolate_function(dual, x, y-yStep, n1, n2);
357 |             
358 |             xMap[i*(n1+1)+j]=.5*n1*(dualxp-dualxm);
359 |             yMap[i*(n1+1)+j]=.5*n2*(dualyp-dualym);
360 |             
361 |             
362 |         }
363 |     }
364 |     
365 | }
366 | 
367 | 
368 | 
369 | 
370 | 
371 | void sampling_pushforward(double *rho, double *mu, double totalMass, double *xMap, double *yMap, int n1, int n2){
372 |     
373 |     int pcount=n1*n2;
374 |     
375 |     memset(rho,0,pcount*sizeof(double));
376 |     
377 |     
378 |     double xCut=pow(1.0/n1,1.0/3);
379 |     double yCut=pow(1.0/n2,1.0/3);
380 |     
381 |     for(int i=0;i<n2;i++){
382 |         for(int j=0;j<n1;j++){
383 |             
384 |             double mass=mu[i*n1+j];
385 |             
386 |             if(mass>0){
387 |                 
388 |                 double xStretch0=fabs(xMap[i*(n1+1)+j+1]-xMap[i*(n1+1)+j]);
389 |                 double xStretch1=fabs(xMap[(i+1)*(n1+1)+j+1]-xMap[(i+1)*(n1+1)+j]);
390 |                 
391 |                 double yStretch0=fabs(yMap[(i+1)*(n1+1)+j]-yMap[i*(n1+1)+j]);
392 |                 double yStretch1=fabs(yMap[(i+1)*(n1+1)+j+1]-yMap[i*(n1+1)+j+1]);
393 |                 
394 |                 double xStretch=fmax(xStretch0, xStretch1);
395 |                 double yStretch=fmax(yStretch0, yStretch1);
396 |                 
397 |                 int xSamples=2*fmax(n1*xStretch,1);
398 |                 int ySamples=2*fmax(n2*yStretch,1);
399 |                 
400 |                 if(xStretch<xCut&&yStretch<yCut){
401 |                     
402 |                     double factor=1/(xSamples*ySamples*1.0);
403 |                     
404 |                     for(int l=0;l<ySamples;l++){
405 |                         for(int k=0;k<xSamples;k++){
406 |                             
407 |                             double a=(k+.5)/(xSamples*1.0);
408 |                             double b=(l+.5)/(ySamples*1.0);
409 |                             
410 |                             double xPoint=(1-b)*(1-a)*xMap[i*(n1+1)+j]+(1-b)*a*xMap[i*(n1+1)+j+1]+b*(1-a)*xMap[(i+1)*(n1+1)+j]+a*b*xMap[i*(n1+1)+j+1];
411 |                             double yPoint=(1-b)*(1-a)*yMap[i*(n1+1)+j]+(1-b)*a*yMap[i*(n1+1)+j+1]+b*(1-a)*yMap[(i+1)*(n1+1)+j]+a*b*yMap[i*(n1+1)+j+1];
412 |                             
413 |                             double X=xPoint*n1-.5;
414 |                             double Y=yPoint*n2-.5;
415 |                             
416 |                             int xIndex=X;
417 |                             int yIndex=Y;
418 |                             
419 |                             double xFrac=X-xIndex;
420 |                             double yFrac=Y-yIndex;
421 |                             
422 |                             int xOther=xIndex+1;
423 |                             int yOther=yIndex+1;
424 |                             
425 |                             xIndex=fmin(fmax(xIndex,0),n1-1);
426 |                             xOther=fmin(fmax(xOther,0),n1-1);
427 |                             
428 |                             yIndex=fmin(fmax(yIndex,0),n2-1);
429 |                             yOther=fmin(fmax(yOther,0),n2-1);
430 |                             
431 |                             
432 |                             rho[yIndex*n1+xIndex]+=(1-xFrac)*(1-yFrac)*mass*factor;
433 |                             rho[yOther*n1+xIndex]+=(1-xFrac)*yFrac*mass*factor;
434 |                             rho[yIndex*n1+xOther]+=xFrac*(1-yFrac)*mass*factor;
435 |                             rho[yOther*n1+xOther]+=xFrac*yFrac*mass*factor;
436 |                             
437 |                         }
438 |                     }
439 |                 }
440 |                 
441 |             }
442 |             
443 |         }
444 |     }
445 |     
446 |     double sum=0;
447 |     for(int i=0;i<pcount;i++){
448 |         sum+=rho[i]/pcount;
449 |     }
450 |     for(int i=0;i<pcount;i++){
451 |         rho[i]*=totalMass/sum;
452 |     }
453 |     
454 | }
455 | 
456 | 
457 | 
458 | 
459 | 
460 | 
461 | 
462 | double update_potential(poisson_solver fftps, double *phi, double *rho, double *nu, double sigma, int n1, int n2){
463 |     
464 |     int pcount=n1*n2;
465 |     
466 |     double h1=0;
467 |     
468 |     for(int i=0;i<pcount;i++){
469 |         fftps.workspace[i]=rho[i]-nu[i];
470 |     }
471 |     
472 |     fftw_execute(fftps.dctIn);
473 |     
474 |     fftps.workspace[0]=0;
475 |     
476 |     for(int i=1;i<pcount;i++){
477 |         
478 |         fftps.workspace[i]/=4*pcount*fftps.kernel[i];
479 |         
480 |     }
481 |    
482 |     
483 |     fftw_execute(fftps.dctOut);
484 |     
485 |     for(int i=0;i<pcount;i++){
486 |         phi[i]+=sigma*fftps.workspace[i];
487 |         h1+=fftps.workspace[i]*(rho[i]-nu[i]);
488 |     }
489 |     
490 |     h1/=pcount;
491 |     
492 |     
493 |     
494 |     return h1;
495 |     
496 | }
497 | 
498 | 
499 | 
500 | 
501 | 
502 | double compute_w2(double *phi, double *dual, double *mu, double *nu, int n1, int n2){
503 |     
504 |     int pcount=n1*n2;
505 |     
506 |     double value=0;
507 |     
508 |     for(int i=0;i<n2;i++){
509 |         for(int j=0;j<n1;j++){
510 |             double x=(j+.5)/(n1*1.0);
511 |             double y=(i+.5)/(n2*1.0);
512 |             
513 |             value+=.5*(x*x+y*y)*(mu[i*n1+j]+nu[i*n1+j])-nu[i*n1+j]*phi[i*n1+j]-mu[i*n1+j]*dual[i*n1+j];
514 |         }
515 |     }
516 |     
517 |     value/=pcount;
518 |     
519 |     return value;
520 |     
521 | }
522 | 
523 | 
524 | 
525 | double step_update(double sigma, double value, double oldValue, double gradSq, double scaleUp, double scaleDown, double upper, double lower){
526 |     
527 |     double diff=value-oldValue;
528 |     
529 |     if(diff>gradSq*sigma*upper){
530 |         return sigma*scaleUp;
531 |     }else if(diff<gradSq*sigma*lower){
532 |         return sigma*scaleDown;
533 |     }else{
534 |         return sigma;
535 |     }
536 |     
537 | }
538 | 
539 | double compute_l2_ot(double *mu, double *nu, double *phi, double *dual, double totalMass, double sigma, int maxIters, int n1, int n2){
540 |     
541 |     int pcount=n1*n2;
542 |     poisson_solver fftps=create_poisson_solver_workspace(n1,n2);
543 |     
544 | 
545 |     
546 |     double *xMap=calloc((n1+1)*(n2+1),sizeof(double));
547 |     double *yMap=calloc((n1+1)*(n2+1),sizeof(double));
548 |     
549 |    
550 |     
551 |     int n=fmax(n1,n2);
552 |     
553 |     convex_hull hull;
554 |     
555 |     init_hull(&hull, n);
556 |     
557 |     
558 |     
559 |    
560 |     
561 |     for(int i=0;i<n2+1;i++){
562 |         for(int j=0;j<n1+1;j++){
563 |             
564 |             double x=j/(n1*1.0);
565 |             double y=i/(n2*1.0);
566 |             
567 |             xMap[i*n1+j]=x;
568 |             yMap[i*n1+j]=y;
569 |             
570 |         }
571 |     }
572 |     
573 |     for(int i=0;i<n2;i++){
574 |         for(int j=0;j<n1;j++){
575 |             double x=(j+.5)/(n1*1.0);
576 |             double y=(i+.5)/(n2*1.0);
577 |             
578 |             phi[i*n1+j]=.5*(x*x+y*y);
579 |             dual[i*n1+j]=.5*(x*x+y*y);
580 |         }
581 |     }
582 |        
583 |     
584 |     double *rho=calloc(pcount,sizeof(double));
585 |     memcpy(rho,mu,pcount*sizeof(double));
586 |     
587 |     
588 |     double oldValue=compute_w2(phi, dual, mu, nu, n1, n2);
589 |     
590 |     double scaleDown=.8;
591 |     double scaleUp=1/scaleDown;
592 |     double upper=.75;
593 |     double lower=.25;
594 | 
595 |     int numDigitsIter=floor(log10(maxIters) + 1);
596 |     
597 |     for(int i=0;i<maxIters+1;i++){
598 |         
599 |         
600 |         double gradSq=update_potential(fftps, phi, rho, nu, sigma, n1, n2);
601 |         
602 |        
603 |         
604 |         convexify(phi, dual, &hull, n1, n2);
605 |         
606 |         
607 |         
608 |         double value=compute_w2(phi, dual, mu, nu, n1, n2);
609 |         
610 |         sigma=step_update(sigma, value, oldValue, gradSq , scaleUp, scaleDown, upper, lower);
611 |         
612 |         oldValue=value;
613 |         
614 |         
615 |         calc_pushforward_map(xMap, yMap, phi, n1, n2);
616 |         
617 |         sampling_pushforward(rho, nu, totalMass, xMap, yMap, n1, n2);
618 |         
619 |         
620 |         
621 |         
622 |         gradSq=update_potential(fftps, dual, rho, mu, sigma, n1, n2);
623 |         
624 |         
625 |         
626 |         convexify(dual, phi, &hull, n1, n2);
627 |         
628 |         calc_pushforward_map(xMap, yMap, dual, n1, n2);
629 |         
630 |         sampling_pushforward(rho, mu, totalMass, xMap, yMap, n1, n2);
631 |         
632 |         
633 |         
634 |         value=compute_w2(phi, dual, mu, nu, n1, n2);
635 |         
636 |         sigma=step_update(sigma, value, oldValue, gradSq , scaleUp, scaleDown, upper, lower);
637 |         
638 |         oldValue=value;
639 |         
640 |         
641 |         sigma=fmax(sigma,.05);
642 |         
643 |         if(i%5==0){
644 |                                     
645 |             mexPrintf("iter %*d, W2 value: %5e\n", numDigitsIter, i, value);
646 |             mexEvalString("pause(.001);");
647 |             
648 |         }
649 |         
650 |        
651 |         
652 |        
653 |         
654 |         
655 |     }
656 |     
657 |     
658 |     destroy_hull(&hull);
659 |     free(rho);
660 |     free(xMap);
661 |     free(yMap);
662 |     destroy_poisson_solver(fftps);
663 |     
664 |     return oldValue;
665 | }
666 | 
667 | 
668 | 
669 | 
670 | 


--------------------------------------------------------------------------------
/python/example.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | from w2 import BFM
  5 | from time import time
  6 | import numpy as np
  7 | import numpy.ma as ma
  8 | from scipy.fftpack import dctn, idctn
  9 | import matplotlib.pyplot as plt
 10 | plt.rcParams['figure.figsize'] = (13, 8)
 11 | plt.rcParams['image.cmap'] = 'viridis'
 12 | 
 13 | # %% Function definitions
 14 | 
 15 | # Initialize Fourier kernel
 16 | def initialize_kernel(n1, n2):
 17 |     xx, yy = np.meshgrid(np.linspace(0,np.pi,n1,False), np.linspace(0,np.pi,n2,False))
 18 |     kernel = 2*n1*n1*(1-np.cos(xx)) + 2*n2*n2*(1-np.cos(yy))
 19 |     kernel[0,0] = 1     # to avoid dividing by zero
 20 |     return kernel
 21 | 
 22 | # 2d DCT
 23 | def dct2(a):
 24 |     return dctn(a, norm='ortho')
 25 |     
 26 | # 2d IDCT
 27 | def idct2(a):
 28 |     return idctn(a, norm='ortho')
 29 | 
 30 | # Update phi as 
 31 | #       ϕ ← ϕ + σ Δ⁻¹(ρ − ν)
 32 | # and return the error 
 33 | #       ∫(−Δ)⁻¹(ρ−ν) (ρ−ν)
 34 | # Modifies phi and rho
 35 | def update_potential(phi, rho, nu, kernel, sigma):
 36 |     n1, n2 = nu.shape
 37 | 
 38 |     rho -= nu
 39 |     workspace = dct2(rho) / kernel
 40 |     workspace[0,0] = 0
 41 |     workspace = idct2(workspace)
 42 |     
 43 |     phi += sigma * workspace
 44 |     h1 = np.sum(workspace * rho) / (n1*n2)
 45 |     
 46 |     return h1
 47 | 
 48 | # Compute the dual value
 49 | # 
 50 | #       ∫ (½|x|² − ϕ(x)) ν(x)dx  +  ∫ (½|x|² − ψ(x)) μ(x)dx 
 51 | # 
 52 | def compute_w2(phi, psi, mu, nu, x, y):
 53 |     n1, n2 = mu.shape
 54 |     return np.sum(0.5 * (x*x+y*y) * (mu + nu) - nu*phi - mu*psi)/(n1*n2)
 55 | 
 56 | # Parameters for Armijo-Goldstein
 57 | scaleDown = 0.95
 58 | scaleUp   = 1/scaleDown
 59 | upper = 0.75
 60 | lower = 0.25
 61 | # Armijo-Goldstein
 62 | def stepsize_update(sigma, value, oldValue, gradSq):
 63 |     diff = value - oldValue
 64 | 
 65 |     if diff > gradSq * sigma * upper:
 66 |         return sigma * scaleUp
 67 |     elif diff < gradSq * sigma * lower:
 68 |         return sigma * scaleDown
 69 |     return sigma
 70 | 
 71 | 
 72 | # Back-and-forth solver
 73 | def compute_ot(phi, psi, bf, sigma):
 74 | 
 75 |     kernel = initialize_kernel(n1, n2)
 76 |     rho = np.copy(mu)
 77 | 
 78 |     oldValue = compute_w2(phi, psi, mu, nu, x, y)
 79 | 
 80 |     for k in range(numIters+1):
 81 |           
 82 |         gradSq = update_potential(phi, rho, nu, kernel, sigma)
 83 | 
 84 |         bf.ctransform(psi, phi)
 85 |         bf.ctransform(phi, psi)
 86 | 
 87 |         value = compute_w2(phi, psi, mu, nu, x, y)
 88 |         sigma = stepsize_update(sigma, value, oldValue, gradSq)
 89 |         oldValue = value
 90 | 
 91 |         bf.pushforward(rho, phi, nu)
 92 | 
 93 |         gradSq = update_potential(psi, rho, mu, kernel, sigma)
 94 | 
 95 |         bf.ctransform(phi, psi)
 96 |         bf.ctransform(psi, phi)
 97 | 
 98 |         bf.pushforward(rho, psi, mu)
 99 | 
100 |         value = compute_w2(phi, psi, mu, nu, x, y)
101 |         sigma = stepsize_update(sigma, value, oldValue, gradSq)
102 |         oldValue = value
103 | 
104 |         if k % 5 == 0:
105 |             print(f'iter {k:4d},   W2 value: {value:.6e},   H1 err: {gradSq:.2e}')
106 | 
107 | # %% Example: Caffarelli's counterexample
108 | 
109 | # Caffarelli's counterexample illustrates that the optimal map can be discontinous when the target domain is nonconvex.
110 | # Reference: Luis A. Caffarelli. The regularity of mappings with a convex potential. J. Amer. Math. Soc. 5, 1 (1992), 99–104.
111 | 
112 | 
113 | # Define the problem data and initial values
114 | 
115 | # Grid of size n1 x n2
116 | n1 = 1024   # x axis
117 | n2 = 1024   # y axis
118 | 
119 | x, y = np.meshgrid(np.linspace(0.5/n1,1-0.5/n1,n1), np.linspace(0.5/n2,1-0.5/n1,n2))
120 | 
121 | phi = 0.5 * (x*x + y*y)
122 | psi = 0.5 * (x*x + y*y)
123 | 
124 | # Initialize densities
125 | mu = np.zeros((n2, n1))
126 | r = 0.125
127 | mu[(x-0.5)**2 + (y-0.5)**2 < r**2] = 1
128 | nu = np.zeros((n2, n1))
129 | idx = (((x-0.25)**2 + (y-0.5)**2 < r**2) & (x < 0.25) ) 
130 | idx = idx | (((x-0.75)**2 + (y-0.5)**2 < r**2) & (x > 0.75))
131 | idx = idx | ((x < 0.751) & (x > 0.249) & (y < 0.51) & (y > 0.49))
132 | nu[idx] = 1
133 | 
134 | # Normalize
135 | mu *= n1*n2 / np.sum(mu)
136 | nu *= n1*n2 / np.sum(nu)
137 | 
138 | 
139 | # Plot mu and nu
140 | fig, ax = plt.subplots(1, 2)
141 | ax[0].imshow(mu, origin='lower', extent=(0,1,0,1))
142 | ax[0].set_title("$\\mu$")
143 | ax[1].imshow(nu, origin='lower', extent=(0,1,0,1))
144 | ax[1].set_title("$\\nu$");
145 | 
146 | 
147 | # %% Run the back-and-forth solver
148 | 
149 | # Number of iterations for BFM
150 | numIters = 50
151 | 
152 | # Initial step size
153 | sigma = 4/np.maximum(mu.max(), nu.max())
154 | 
155 | tic = time()
156 | 
157 | # Initialize BFM method
158 | bf = BFM(n1, n2, mu)
159 | compute_ot(phi, psi, bf, sigma)
160 | 
161 | toc = time()
162 | print(f'\nElapsed time: {toc-tic:.2f}s')
163 | 
164 | 
165 | 
166 | # %% Visualizations
167 | 
168 | 
169 | my, mx = ma.masked_array(np.gradient(psi-0.5*(x*x+y*y), 1/n2, 1/n1), mask=((mu==0), (mu==0)))
170 | 
171 | fig, ax = plt.subplots()
172 | ax.contourf(x, y, mu+nu)
173 | ax.set_aspect('equal')
174 | skip = (slice(None,None,n1//50), slice(None,None,n2//50))
175 | ax.quiver(x[skip], y[skip], mx[skip], my[skip], color='yellow', angles='xy', scale_units='xy', scale=1);
176 | 
177 | # %% 
178 | # The discontinuity of the optimal map is hard to see as a quiver plot. So let's instead display only the x-component of the map.
179 | 
180 | fig, ax = plt.subplots(1, 2)
181 | ax[0].imshow(x + mx, origin='lower', extent=(0,1,0,1), cmap='plasma')
182 | 
183 | x_masked = ma.masked_array(x, mask=(nu==0))
184 | ax[1].imshow(x_masked, origin='lower', extent=(0,1,0,1), cmap='plasma')
185 | 
186 | 
187 | 
188 | # %% Displacement interpolation
189 | 
190 | # Plotting interpolation
191 | def plot_interpolation(mu, nu, phi, psi, n_fig=6):
192 |     fig, ax = plt.subplots(1, n_fig, figsize=(20,8))
193 |     [axi.axis('off') for axi in ax.ravel()]
194 |     vmax = mu.max()
195 |     ax[0].imshow(mu, vmax=vmax)
196 |     ax[0].set_title("$t=0$")
197 |     ax[n_fig-1].imshow(nu, vmax=vmax)
198 |     ax[n_fig-1].set_title("$t=1$")
199 | 
200 |     interpolate = np.zeros_like(mu)
201 |     rho_fwd = np.zeros_like(mu)
202 |     rho_bwd = np.zeros_like(mu)
203 | 
204 |     for i in range(1,n_fig-1):
205 |         t = i / (n_fig - 1)
206 |         psi_t = (1-t) * 0.5 * (x*x + y*y) + t * psi
207 |         phi_t = t * 0.5 * (x*x + y*y) + (1-t) * phi
208 | 
209 |         bf.pushforward(rho_fwd, psi_t, mu)
210 |         bf.pushforward(rho_bwd, phi_t, nu)
211 |         interpolate = (1-t) * rho_fwd + t * rho_bwd
212 |         ax[i].imshow(interpolate, vmax=vmax)  
213 |         ax[i].set_title(f"$t={i}/{n_fig-1}$")
214 | 
215 | plot_interpolation(mu, nu, phi, psi)
216 | 


--------------------------------------------------------------------------------
/python/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel",
5 |     "pybind11>=2.6.0",
6 | ]
7 | 
8 | build-backend = "setuptools.build_meta"
9 | 


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | from pybind11.setup_helpers import Pybind11Extension, build_ext
 4 | from pybind11 import get_cmake_dir
 5 | 
 6 | import sys
 7 | 
 8 | __version__ = "0.0.1"
 9 | 
10 | ext_modules = [
11 |     Pybind11Extension("w2",
12 |         ["src/main.cpp"],
13 |         define_macros = [('VERSION_INFO', __version__)],
14 |         ),
15 | ]
16 | 
17 | setup(
18 |     name="w2",
19 |     version=__version__,
20 |     author="Wonjun Lee",
21 |     author_email="wlee@math.ucla.edu",
22 |     description="Python wrapper for the back-and-forth method for optimal transport",
23 |     long_description="""
24 |         The code is based on C code of the back-and-forth method https://github.com/Math-Jacobs/bfm. 
25 |         Link to the paper: https://arxiv.org/pdf/1905.12154.pdf
26 |         """,
27 |     long_description_content_type="text/markdown",
28 |     ext_modules=ext_modules,
29 |     cmdclass={"build_ext": build_ext},
30 |     zip_safe=False,
31 | )


--------------------------------------------------------------------------------
/python/src/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <pybind11/pybind11.h>
  2 | #include <pybind11/numpy.h>
  3 | 
  4 | namespace py = pybind11;
  5 | 
  6 | 
  7 | 
  8 | class convex_hull{
  9 | public:
 10 |     int* indices;
 11 |     int  hullCount;
 12 | 
 13 |     convex_hull(int n){
 14 |         indices=new int[n];
 15 |         hullCount=0;
 16 |     }
 17 | 
 18 |     ~convex_hull(){
 19 |         delete[] indices;
 20 |     }
 21 | };
 22 | 
 23 | 
 24 | 
 25 | // --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
 26 | 
 27 | 
 28 | 
 29 | class BFM{
 30 | public:
 31 | int n1;
 32 | int n2;
 33 | 
 34 | double totalMass;
 35 | 
 36 | double *xMap;
 37 | double *yMap;
 38 | 
 39 | double *rho;
 40 | 
 41 | int *argmin;
 42 | double *temp;
 43 |     
 44 | convex_hull* hull;
 45 | 
 46 |     BFM(int n1, int n2, py::array_t<double> & mu_np){
 47 | 
 48 |         py::buffer_info mu_buf = mu_np.request();
 49 |         double *mu = static_cast<double *>(mu_buf.ptr);
 50 | 
 51 |         this->n1 = n1;
 52 |         this->n2 = n2;
 53 | 
 54 |         int n=fmax(n1,n2);
 55 |         hull   = new convex_hull(n);    
 56 |         argmin = new int[n];
 57 |         temp   = new double[n1*n2];
 58 | 
 59 |         xMap=new double[(n1+1)*(n2+1)];
 60 |         yMap=new double[(n1+1)*(n2+1)];
 61 | 
 62 |         for(int i=0;i<n2+1;i++){
 63 |             for(int j=0;j<n1+1;j++){
 64 |                 
 65 |                 double x=j/(n1*1.0);
 66 |                 double y=i/(n2*1.0);
 67 |                 
 68 |                 xMap[i*(n1+1)+j]=x;
 69 |                 yMap[i*(n1+1)+j]=y;
 70 |                 
 71 |             }
 72 |         }
 73 | 
 74 |         rho=new double[n1*n2];
 75 |         memcpy(rho,mu,n1*n2*sizeof(double));
 76 | 
 77 |         totalMass = 0;
 78 |         for(int i=0;i<n1*n2;++i){
 79 |             totalMass += mu[i];
 80 |         }
 81 |         totalMass /= n1*n2;
 82 | 
 83 |     }
 84 | 
 85 |     ~BFM(){
 86 |         delete [] xMap;
 87 |         delete [] yMap;
 88 |         delete [] rho;
 89 |         delete hull;
 90 |     }
 91 | 
 92 |     void ctransform(py::array_t<double> & dual_np, py::array_t<double> & phi_np){
 93 | 
 94 |         py::buffer_info phi_buf  = phi_np.request();
 95 |         py::buffer_info dual_buf = dual_np.request();
 96 | 
 97 |         double *phi  = static_cast<double *> (phi_buf.ptr);
 98 |         double *dual = static_cast<double *> (dual_buf.ptr);
 99 | 
100 |         compute_2d_dual_inside(dual, phi, hull, n1, n2);
101 |     }
102 | 
103 |     void pushforward(py::array_t<double> & rho_np, py::array_t<double> & phi_np, py::array_t<double> & nu_np){
104 | 
105 |         py::buffer_info phi_buf  = phi_np.request();
106 |         py::buffer_info nu_buf   = nu_np.request();
107 | 
108 |         double *phi = static_cast<double *> (phi_buf.ptr);
109 |         double *nu  = static_cast<double *> (nu_buf.ptr);
110 | 
111 |         calc_pushforward_map(phi);
112 |         sampling_pushforward(nu);
113 | 
114 |         py::buffer_info rho_buf  = rho_np.request();
115 |         memcpy(static_cast<double *> (rho_buf.ptr), rho, n1*n2*sizeof(double));
116 |     }
117 | 
118 | 
119 |     double compute_w2(py::array_t<double> & phi_np, py::array_t<double> & dual_np, py::array_t<double> & mu_np, py::array_t<double> & nu_np){
120 | 
121 |         py::buffer_info phi_buf  = phi_np.request();
122 |         py::buffer_info dual_buf = dual_np.request();
123 |         py::buffer_info mu_buf   = mu_np.request();
124 |         py::buffer_info nu_buf   = nu_np.request();
125 | 
126 |         double *phi  = static_cast<double *> (phi_buf.ptr);
127 |         double *dual = static_cast<double *> (dual_buf.ptr);
128 |         double *mu   = static_cast<double *> (mu_buf.ptr);
129 |         double *nu   = static_cast<double *> (nu_buf.ptr);
130 |         
131 |         int pcount=n1*n2;
132 |         
133 |         double value=0;
134 |         
135 |         for(int i=0;i<n2;i++){
136 |             for(int j=0;j<n1;j++){
137 |                 double x=(j+.5)/(n1*1.0);
138 |                 double y=(i+.5)/(n2*1.0);
139 |                 
140 |                 value+=.5*(x*x+y*y)*(mu[i*n1+j]+nu[i*n1+j])-nu[i*n1+j]*phi[i*n1+j]-mu[i*n1+j]*dual[i*n1+j];
141 |             }
142 |         }
143 | 
144 |         printf("value calculated");
145 | 
146 |         value/=pcount;
147 |         
148 |         return value;
149 |     }
150 | 
151 |     void compute_2d_dual_inside(double *dual, double *u, convex_hull *hull, int n1, int n2){
152 |         
153 |         int pcount=n1*n2;
154 |         
155 |         int n=fmax(n1,n2);
156 |         
157 |         memcpy(temp, u, pcount*sizeof(double));
158 |         
159 |         
160 |         for(int i=0;i<n2;i++){
161 |             compute_dual(&dual[i*n1], &temp[i*n1], argmin, hull, n1);
162 |             
163 |         }
164 |         transpose_doubles(temp, dual, n1, n2);
165 |         for(int i=0;i<n1*n2;i++){
166 |             dual[i]=-temp[i];
167 |         }
168 |         for(int j=0;j<n1;j++){
169 |             compute_dual(&temp[j*n2], &dual[j*n2], argmin, hull, n2);
170 |             
171 |         }
172 |         transpose_doubles(dual, temp, n2, n1);
173 |     }
174 | 
175 | 
176 |     void compute_dual(double *dual, double *u, int *dualIndicies, convex_hull *hull, int n){
177 |         
178 |         get_convex_hull(u, hull, n);
179 |        
180 |         
181 |         compute_dual_indices(dualIndicies, u, hull, n);
182 |         
183 |         for(int i=0;i<n;i++){
184 |             double s=(i+.5)/(n*1.0);
185 |             int index=dualIndicies[i];
186 |             double x=(index+.5)/(n*1.0);
187 |             double v1=s*x-u[dualIndicies[i]];
188 |             double v2=s*(n-.5)/(n*1.0)-u[n-1];
189 |             if(v1>v2){
190 |                 dual[i]=v1;
191 |             }else{
192 |                 dualIndicies[i]=n-1;
193 |                 dual[i]=v2;
194 |             }
195 |             
196 |         }
197 |         
198 |     }
199 | 
200 | 
201 |     int sgn(double x){
202 |         
203 |         int truth=(x>0)-(x<0);
204 |         return truth;
205 |         
206 |     }
207 | 
208 | 
209 |     void transpose_doubles(double *transpose, double *data, int n1, int n2){
210 |         
211 |         for(int i=0;i<n2;i++){
212 |             for(int j=0;j<n1;j++){
213 |              
214 |                 transpose[j*n2+i]=data[i*n1+j];
215 |             }
216 |         }
217 |     }
218 | 
219 | 
220 |     void get_convex_hull(double *u, convex_hull *hull, int n){
221 |         
222 |         hull->indices[0]=0;
223 |         hull->indices[1]=1;
224 |         hull->hullCount=2;
225 |         
226 |         for(int i=2;i<n;i++){
227 |             
228 |             add_point(u, hull, i);
229 |             
230 |         }
231 |         
232 |     }
233 | 
234 | 
235 |     void add_point(double *u, convex_hull *hull, int i){
236 |         
237 |         
238 |         if(hull->hullCount<2){
239 |             hull->indices[1]=i;
240 |             hull->hullCount++;
241 |         }else{
242 |             int hc=hull->hullCount;
243 |             int ic1=hull->indices[hc-1];
244 |             int ic2=hull->indices[hc-2];
245 |             
246 |             double oldSlope=(u[ic1]-u[ic2])/(ic1-ic2);
247 |             double slope=(u[i]-u[ic1])/(i-ic1);
248 |             
249 |             if(slope>=oldSlope){
250 |                 int hc=hull->hullCount;
251 |                 hull->indices[hc]=i;
252 |                 hull->hullCount++;
253 |             }else{
254 |                 hull->hullCount--;
255 |                 add_point(u, hull, i);
256 |             }
257 |         }
258 |     }
259 | 
260 | 
261 | 
262 | 
263 |     double interpolate_function(double *function, double x, double y, int n1, int n2){
264 |         
265 |         int xIndex=fmin(fmax(x*n1-.5 ,0),n1-1);
266 |         int yIndex=fmin(fmax(y*n2-.5 ,0),n2-1);
267 |         
268 |         double xfrac=x*n1-xIndex-.5;
269 |         double yfrac=y*n2-yIndex-.5;
270 |         
271 |         int xOther=xIndex+sgn(xfrac);
272 |         int yOther=yIndex+sgn(yfrac);
273 |         
274 |         xOther=fmax(fmin(xOther, n1-1),0);
275 |         yOther=fmax(fmin(yOther, n2-1),0);
276 |         
277 |         double v1=(1-fabs(xfrac))*(1-fabs(yfrac))*function[yIndex*n1+xIndex];
278 |         double v2=fabs(xfrac)*(1-fabs(yfrac))*function[yIndex*n1+xOther];
279 |         double v3=(1-fabs(xfrac))*fabs(yfrac)*function[yOther*n1+xIndex];
280 |         double v4=fabs(xfrac)*fabs(yfrac)*function[yOther*n1+xOther];
281 |         
282 |         double v=v1+v2+v3+v4;
283 |         
284 |         return v;
285 |         
286 |     }
287 | 
288 | 
289 | 
290 | 
291 |     void compute_dual_indices(int *dualIndicies, double *u, convex_hull *hull, int n){
292 |         
293 |         int counter=1;
294 |         int hc=hull->hullCount;
295 |         
296 |         for(int i=0;i<n;i++){
297 |            
298 |             double s=(i+.5)/(n*1.0);
299 |             int ic1=hull->indices[counter];
300 |             int ic2=hull->indices[counter-1];
301 |             
302 |             double slope=n*(u[ic1]-u[ic2])/(ic1-ic2);
303 |             while(s>slope&&counter<hc-1){
304 |                 counter++;
305 |                 ic1=hull->indices[counter];
306 |                 ic2=hull->indices[counter-1];
307 |                 slope=n*(u[ic1]-u[ic2])/(ic1-ic2);
308 |             }
309 |             dualIndicies[i]=hull->indices[counter-1];
310 |             
311 |         }
312 |     }
313 | 
314 | 
315 | 
316 |     void calc_pushforward_map(double *dual){
317 |         
318 |         
319 |         double xStep=1.0/n1;
320 |         double yStep=1.0/n2;
321 |         
322 |         
323 |         for(int i=0;i<n2+1;i++){
324 |             for(int j=0;j<n1+1;j++){
325 |                 double x=j/(n1*1.0);
326 |                 double y=i/(n2*1.0);
327 |                 
328 |                 double dualxp=interpolate_function(dual, x+xStep, y, n1, n2);
329 |                 double dualxm=interpolate_function(dual, x-xStep, y, n1, n2);
330 |                 
331 |                 double dualyp=interpolate_function(dual, x, y+yStep, n1, n2);
332 |                 double dualym=interpolate_function(dual, x, y-yStep, n1, n2);
333 |                 
334 |                 xMap[i*(n1+1)+j]=.5*n1*(dualxp-dualxm);
335 |                 yMap[i*(n1+1)+j]=.5*n2*(dualyp-dualym);
336 |                 
337 |                 
338 |             }
339 |         }
340 |         
341 |     }
342 | 
343 | 
344 | 
345 |     void sampling_pushforward(double *mu){
346 | 
347 | 
348 |         int pcount=n1*n2;
349 |         
350 |         memset(rho,0,pcount*sizeof(double));
351 |         
352 |         for(int i=0;i<n2;i++){
353 |             for(int j=0;j<n1;j++){
354 |                 
355 |                 double mass=mu[i*n1+j];
356 |                 
357 |                 if(mass>0){
358 |                     
359 |                     double xStretch0=fabs(xMap[i*(n1+1)+j+1]-xMap[i*(n1+1)+j]);
360 |                     double xStretch1=fabs(xMap[(i+1)*(n1+1)+j+1]-xMap[(i+1)*(n1+1)+j]);
361 |                     
362 |                     double yStretch0=fabs(yMap[(i+1)*(n1+1)+j]-yMap[i*(n1+1)+j]);
363 |                     double yStretch1=fabs(yMap[(i+1)*(n1+1)+j+1]-yMap[i*(n1+1)+j+1]);
364 |                     
365 |                     double xStretch=fmax(xStretch0, xStretch1);
366 |                     double yStretch=fmax(yStretch0, yStretch1);
367 |                     
368 |                     int xSamples=fmax(n1*xStretch,1);
369 |                     int ySamples=fmax(n2*yStretch,1);
370 | 
371 |                     double factor=1/(xSamples*ySamples*1.0);
372 |                     
373 |                     for(int l=0;l<ySamples;l++){
374 |                         for(int k=0;k<xSamples;k++){
375 |                             
376 |                             double a=(k+.5)/(xSamples*1.0);
377 |                             double b=(l+.5)/(ySamples*1.0);
378 |                             
379 |                             double xPoint=(1-b)*(1-a)*xMap[i*(n1+1)+j]+(1-b)*a*xMap[i*(n1+1)+j+1]+b*(1-a)*xMap[(i+1)*(n1+1)+j]+a*b*xMap[(i+1)*(n1+1)+(j+1)];
380 |                             double yPoint=(1-b)*(1-a)*yMap[i*(n1+1)+j]+(1-b)*a*yMap[i*(n1+1)+j+1]+b*(1-a)*yMap[(i+1)*(n1+1)+j]+a*b*yMap[(i+1)*(n1+1)+(j+1)];
381 |                             
382 |                             double X=xPoint*n1-.5;
383 |                             double Y=yPoint*n2-.5;
384 |                             
385 |                             int xIndex=X;
386 |                             int yIndex=Y;
387 |                             
388 |                             double xFrac=X-xIndex;
389 |                             double yFrac=Y-yIndex;
390 |                             
391 |                             int xOther=xIndex+1;
392 |                             int yOther=yIndex+1;
393 |                             
394 |                             xIndex=fmin(fmax(xIndex,0),n1-1);
395 |                             xOther=fmin(fmax(xOther,0),n1-1);
396 |                             
397 |                             yIndex=fmin(fmax(yIndex,0),n2-1);
398 |                             yOther=fmin(fmax(yOther,0),n2-1);
399 |                             
400 |                             
401 |                             rho[yIndex*n1+xIndex]+=(1-xFrac)*(1-yFrac)*mass*factor;
402 |                             rho[yOther*n1+xIndex]+=(1-xFrac)*yFrac*mass*factor;
403 |                             rho[yIndex*n1+xOther]+=xFrac*(1-yFrac)*mass*factor;
404 |                             rho[yOther*n1+xOther]+=xFrac*yFrac*mass*factor;
405 |                             
406 |                         }
407 |                     }
408 |                     
409 |                 }
410 |                 
411 |             }
412 |         }
413 |         
414 |         double sum=0;
415 |         for(int i=0;i<pcount;i++){
416 |             sum+=rho[i]/pcount;
417 |         }
418 |         for(int i=0;i<pcount;i++){
419 |             rho[i]*=totalMass/sum;
420 |         }
421 |     }
422 | 
423 | };
424 | 
425 | 
426 | // --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
427 | 
428 | 
429 | PYBIND11_MODULE(w2, m) {
430 |     // optional module docstring
431 |     m.doc() = "pybind11 for w2 code";
432 | 
433 |     py::class_<BFM>(m, "BFM")
434 |         .def(py::init<int, int, py::array_t<double> &>())
435 |         .def("ctransform", &BFM::ctransform)
436 |         .def("pushforward", &BFM::pushforward);
437 | }
438 | 


--------------------------------------------------------------------------------