├── libmatch
    ├── __init__.py
    ├── lap
    │   ├── __init__.py
    │   ├── permanent-0.0.1
    │   │   ├── setup.py
    │   │   └── src
    │   │   │   └── permanent.cpp
    │   ├── perm.py
    │   ├── murty.py
    │   ├── perm.cpp
    │   ├── lap.py
    │   └── munkres.py
    ├── environments.py
    └── structures.py
├── tools
    ├── smap2colvar.sh
    ├── libatoms2xyz.sh
    ├── costs.py
    ├── get_atomic_prop.py
    ├── eigenvals.py
    ├── mkball.py
    ├── MultipleKernelLearning.py
    ├── krr-test.py
    ├── krr-Cortes-test.py
    ├── select_landmarks.py
    ├── alchemy_rules.py
    ├── env_clust.py
    ├── env_corr.py
    ├── krr.py
    ├── krr-Cortes.py
    └── cluster.py
├── README.md
├── example
    ├── mol-50.xyz.idx
    └── sim.ref
├── .gitignore
├── LICENSE
├── glosoap.py
└── glosoapAlchemy.py


/libmatch/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [ "structures", "environments" ]
2 | 


--------------------------------------------------------------------------------
/libmatch/lap/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [ "lap", "murty", 'munkres', "perm" ]
2 | 


--------------------------------------------------------------------------------
/tools/smap2colvar.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | awk 'BEGIN{ print "#! FIELDS  index  cv1    cv2"; n=0; } !/#/{n++; printf "% 4d  % 10.5e  % 10.5e\n", n, $1, $2}' | paste - $1 
4 | 


--------------------------------------------------------------------------------
/tools/libatoms2xyz.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sed 's/.*Lattice=["'\'']\([^"'\'']*\)['\''"].*/# CELL(GENH): \1  Traj: libatoms/' | awk '!/^ *[A-Z]/{print $0} /^ *[A-Z]/{print $1, $2, $3, $4}'
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # glosim
2 | A Python package to compute similarities between molecules and structures
3 | 
4 | This package is not further developed. We might be able to incorporate small bugfixes, but the original developers do not have time to properly maintain it.
5 | 


--------------------------------------------------------------------------------
/tools/costs.py:
--------------------------------------------------------------------------------
 1 | """List of cost functions """
 2 | import numpy as np 
 3 | 
 4 | def mae(x):
 5 |     return np.mean(np.absolute(x))
 6 | def mse(x):
 7 |     return np.mean(np.power(x,2))
 8 | def rmse(x):
 9 |     return np.sqrt(mse(x))
10 | def sup_e(x):
11 |     return np.amax(np.absolute(x))


--------------------------------------------------------------------------------
/tools/get_atomic_prop.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | from collections import defaultdict
 3 | 
 4 | pname='Electro-Negativity'
 5 | #pname='Atomic Weight'
 6 | columns = defaultdict(list)
 7 | with open('elements.csv') as csvfile:
 8 |    reader = csv.DictReader(csvfile)
 9 |    for row in reader:
10 |        for (k,v) in row.items():
11 |           try :
12 |              v=int(v)
13 |           except:
14 |              try:
15 |                v=float(v)
16 |              except:
17 |                continue
18 |           columns[k].append(v)
19 | 
20 | pdict=dict(zip(columns['Atomic Number'],columns[pname]))
21 | print pdict
22 | 


--------------------------------------------------------------------------------
/example/mol-50.xyz.idx:
--------------------------------------------------------------------------------
 1 | 50
 2 | 0 16
 3 | 829 17
 4 | 1708 11
 5 | 2286 13
 6 | 2965 12
 7 | 3594 12
 8 | 4222 12
 9 | 4851 16
10 | 5680 16
11 | 6508 15
12 | 7287 15
13 | 8065 18
14 | 8994 16
15 | 9823 16
16 | 10652 15
17 | 11431 15
18 | 12209 13
19 | 12888 15
20 | 13667 20
21 | 14696 14
22 | 15425 21
23 | 16504 17
24 | 17382 18
25 | 18310 17
26 | 19189 17
27 | 20068 11
28 | 20647 13
29 | 21326 17
30 | 22204 17
31 | 23083 17
32 | 23961 17
33 | 24840 12
34 | 25469 18
35 | 26397 16
36 | 27225 10
37 | 27754 20
38 | 28782 18
39 | 29711 15
40 | 30490 17
41 | 31369 20
42 | 32398 14
43 | 33127 13
44 | 33805 16
45 | 34633 13
46 | 35311 17
47 | 36190 13
48 | 36869 14
49 | 37598 12
50 | 38226 16
51 | 39055 12
52 | 39684 12
53 | 


--------------------------------------------------------------------------------
/libmatch/lap/permanent-0.0.1/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup, Extension
 4 | import numpy
 5 | 
 6 | setup(name             = "permanent",
 7 |       version          = "0.0.1",
 8 |       description      = "Calculates the permanent of a Numpy matrix upto given accuracy using random montecarlo algorithm, or Ryser algorithm",
 9 |       author           = "Sandip De, Michele Ceriotti",
10 |       author_email     = "1sandipde@gmail.com",
11 |       maintainer       = "1sandipde@gmail.com",
12 |       url              = "https://github.com/sandipde/MCpermanent",
13 |       ext_modules      = [
14 |           Extension(
15 |               'permanent', ['./src/permanent.cpp'],
16 |               extra_compile_args=["-O3","-std=c++0x"],
17 |               include_dirs=[numpy.get_include()]),
18 |       ],
19 | 
20 | )
21 | 
22 | 


--------------------------------------------------------------------------------
/tools/eigenvals.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import numpy as np
 4 | import sys
 5 | 
 6 | def main(fname, csi="1.0"):
 7 |    csi = float(csi)
 8 |    ffile=open(fname, "r")
 9 |    fline = ffile.readline()
10 |    while fline[0]=='#': fline=ffile.readline()
11 |    sline=map(float,fline.split())
12 |    nel = len(sline)
13 |    fmat = np.zeros((nel,nel), float)
14 |    ik = 0
15 |    while (len(sline)==nel):
16 |       fmat[ik]=np.asarray(sline)
17 |       fline = ffile.readline()
18 |       sline=map(float,fline.split())
19 |       ik+=1
20 |    if fmat[0,0]<1e-4: # automatic heuristic to detect if this is a distance or a kernel 
21 |       fmat = 0.5*(2-fmat*fmat)
22 |    fmat = fmat **csi 
23 |    v = np.linalg.eigvalsh(fmat)
24 |    print "finished reading"
25 |    for i in range(len(v)):
26 |         print i, v[i]
27 | 
28 | if __name__ == '__main__':
29 |    main(*sys.argv[1:])
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | *.idx
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Laboratory of Computational Science and Modeling
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/tools/mkball.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import numpy as np
 4 | import sys, glob
 5 | from ipi.utils.io import read_file, print_file
 6 | from ipi.engine.atoms import Atoms
 7 | from ipi.utils.depend import *
 8 | from ipi.utils.units import *
 9 | 
10 | def main(filename, natoms):
11 | 
12 |    ipos=open(filename,"r")
13 |    imode=filename[-3:]
14 |    natoms = int(natoms)
15 |    
16 |    ifr = 0
17 |    nn = 2.5
18 |    while True:
19 |       try:
20 |          ret = read_file(imode,ipos,readcell=True)
21 |          pos = ret["atoms"]
22 |          cell = ret["cell"]
23 |          q=depstrip(pos.q).copy()
24 |          cell.array_pbc(q)
25 |          
26 |          natin = pos.natoms
27 |          q.shape=(natin,3)
28 |          s=np.dot(depstrip(cell.ih),q.T).T
29 |          
30 |          # now replicate in scaled coordinates
31 |          nrep  = int((natoms/natin*nn)**(1./3.))
32 |          
33 |          natrep = natin*(2*nrep+1)**3
34 |          
35 |          ns = np.zeros((natrep,3))
36 |          ik = 0
37 |          for ix in range(-nrep,nrep+1):
38 |           for iy in range(-nrep,nrep+1):
39 | 		   for iz in range(-nrep,nrep+1):
40 | 			for i in range(natin):
41 | 			 ns[ik] = s[i]+[ix,iy,iz]
42 | 			 ik+=1
43 | 		
44 |          ns = np.dot(depstrip(cell.h),ns.T).T          
45 |          
46 |          # now removes atoms until we only have natoms
47 |          d = np.zeros(natrep)
48 |          for i in range(natrep):
49 |            d[i] = np.sqrt(np.dot(ns[i],ns[i]))
50 |          di = np.argsort(d)
51 | 		 
52 |          npos = Atoms(natoms)
53 |          for i in range(natoms):           
54 |            npos.q[3*i:3*(i+1)]=ns[di[i]]
55 |          
56 |       except EOFError: # finished reading files
57 |          sys.exit(0)
58 | 
59 |       print_file("pdb",npos, cell)
60 |       ifr+=1
61 | 
62 | 
63 | if __name__ == '__main__':
64 |    main(*sys.argv[1:])
65 | 


--------------------------------------------------------------------------------
/libmatch/lap/perm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np    
 2 | import sys    
 3 | __all__ = [ "xperm", "rndperm","mcperm" ]
 4 | 
 5 | def _mcperm(mtx, eps = 1e-3, ntry=None, seed=None):
 6 |     sz = len(mtx[0])
 7 |     idx = np.asarray(xrange(sz),int)
 8 |     
 9 |     prm = 0
10 |     prm2 = 0
11 |     pstride = 100*sz
12 |     i=0
13 |     if not seed is None: 
14 |         np.random.seed(seed)
15 |     while True:
16 |         np.random.shuffle(idx)
17 |         pi = 1.
18 |         for j in xrange(sz):
19 |             pi *= mtx[j, idx[j]]
20 |         prm += pi
21 |         prm2 += pi*pi
22 |         i+=1
23 |         if (not ntry is None) and i >= ntry: break
24 |         if ntry is None and (i)%pstride==0:
25 |             err=np.sqrt( (prm2-prm*prm/i)/i/(i-1) ) / (prm/i)
26 |             if err<eps: break
27 |             
28 |     return prm/i*np.math.factorial(sz)    
29 | 
30 | 
31 | # Monte Carlo evaluation of the permanent
32 | try:
33 |     from permanent import permanent_mc, permanent_ryser, rematch
34 |     def mcperm(mtx, eps=1e-3, ntry=None, seed=None):  # , ntry=100000, seed=12345): #
35 |         return permanent_mc(mtx,eps,0 if (ntry is None) else ntry, 0 if (seed is None) else seed)
36 |     def xperm(mtx):
37 |         return permanent_ryser(mtx)
38 | except:
39 |     print >> sys.stderr, "Cannot find mcpermanent.so module in pythonpath. Permanent evaluations will be very slow and approximate."
40 |     print >> sys.stderr, "Get it from https://github.com/sandipde/MCpermanent "
41 |     def mcperm(mtx, eps=1e-2, ntry=None, seed=None):
42 |         return _mcperm(mtx,eps,ntry,seed)
43 |     def xperm(mtx, eps=1e-6):
44 |         return _mcperm(mtx,eps)
45 |     def rematch(mtx, gamma, eps):
46 |         raise ValueError("No Python equivalent to rematch function...")
47 |    
48 | import time, sys
49 | if __name__ == "__main__":
50 |     
51 |     filename = sys.argv[1]
52 |     mtx=np.loadtxt(filename)
53 |     #mtx=np.random.rand(10,10)
54 |     st=time.time()
55 |     new=_mcperm(mtx, eps=1e-2)
56 |     tnew = time.time()-st    
57 |     st=time.time()
58 |     cnew=mcperm(mtx,1e-3)
59 |     ctnew = time.time() -st
60 |     st=time.time()
61 |     if len(mtx[0])<30: 
62 |         ref=xperm(mtx)
63 |     else: ref=0
64 |     tref = time.time()-st
65 |     
66 |     print "Reference:          ", ref, " time: ", tref
67 |     print "New_method:         ", new, " time: ", tnew
68 |     print "New_method C++:         ", cnew, " time: ",ctnew
69 | 


--------------------------------------------------------------------------------
/tools/MultipleKernelLearning.py:
--------------------------------------------------------------------------------
 1 | """Multiple Kernel Learning methods  """
 2 | 
 3 | import numpy as np 
 4 | import sys
 5 | import costs as cst 
 6 | 
 7 | 
 8 | """MKL KRR from Cortes 2009 : 'L2 regularization for learning kernels'."""
 9 | 
10 | def TrainKRRCortes(kernels,prop,verbose=False,**KRRCortesParam):
11 |     """kernels is a list of numpy nxn kernel matrices associated to the training set.
12 |     prop is a numpy array containing the properties corresponding to the training set.
13 |     KRRCortesParam is a dictionary of the parameters of the algorithm."""
14 |     # unpack algortihm parameters
15 |     Lambda = KRRCortesParam['Lambda']
16 |     eta = KRRCortesParam['eta']
17 |     sigma = KRRCortesParam['sigma']
18 |     epsilon = KRRCortesParam['epsilon']
19 |     Nmax = KRRCortesParam['maxIter']
20 |     mu0 = KRRCortesParam['mu0']
21 |     
22 |     nbOfKernels = len(kernels)
23 | 
24 |     n,m = kernels[0].shape
25 |     propVar = np.var(prop)
26 |     # Initialize vectors 
27 |     Id = np.eye(n, M=n, k=0, dtype=np.float64) 
28 |     alphaNew = np.zeros((n,1), dtype=np.float64)
29 |     alphaOld = np.zeros((n,1), dtype=np.float64)
30 |     v = np.zeros(nbOfKernels,dtype=np.float64)
31 |     # mu = np.zeros(nbOfKernels,dtype=np.float64)
32 |     mu = mu0
33 | 
34 |     # Initialize the algorithm tr(tk)/(N vp)
35 |     kernelMat = setKernelMat(kernels,mu)
36 |     
37 |     regParam = sigma**2 * kernelMat.trace() / (n * propVar)
38 |     alphaNew = np.dot(np.linalg.inv(kernelMat + regParam*Id),prop)
39 |     MaeInit = cst.mae(np.dot(alphaNew,kernelMat)-prop)
40 |     if verbose is True:
41 |         print 'Initial MAE : {:.4e}'.format(MaeInit)
42 |     N = 0
43 |     while(np.linalg.norm(alphaNew-alphaOld) > epsilon and N <= Nmax):
44 |         # print 'ENter ##################'
45 |         alphaOld = alphaNew
46 |         
47 |         # update search direction for mu
48 |         for it,kernel in enumerate(kernels):
49 |             v[it] = np.dot(alphaOld.T,np.dot(kernel,alphaOld))
50 | 
51 |         # update mu
52 |         mu = mu0 + Lambda * v / np.linalg.norm(v)
53 |         
54 |         # update ktot
55 |         kernelMat = setKernelMat(kernels,mu)
56 | 
57 |         # update alpha
58 |         regParam = sigma**2 * kernelMat.trace() / (n * propVar)
59 |         #print regParam, kernelMat.trace(), mu      
60 |         alphaNew = eta * alphaOld + (1-eta) * np.dot(np.linalg.inv(kernelMat+ regParam*Id),prop)
61 |         N += 1
62 | 
63 |         if verbose is True:
64 |             Mae = cst.mae(np.dot(alphaNew,kernelMat)-prop)
65 |             print 'N = {:.0f} / alpha diff = {:.3e} / MAE = {:.4e}'.format(N,np.linalg.norm(alphaNew-alphaOld),Mae)
66 |         
67 | 
68 |     print 'Training the weights with Cortes algorithm has ended in {:.0f} iterrations \n  alpha diff = {:.3e} / Initial Mae={:.4e} / Final Mae={:.4e}'\
69 |             .format(N,np.linalg.norm(alphaNew-alphaOld),MaeInit,cst.mae(np.dot(alphaNew,kernelMat)-prop))
70 |     propTr = np.dot(alphaNew,kernelMat)
71 |     return alphaNew, mu, propTr
72 | 
73 | 
74 | def PredictKRRCortes(kernels,alpha,mu):
75 |     """kernels is a list of numpy nxm kernel matrices associated to the K(x_tr,x_te),
76 |     n is the number of training elements and m is the number of testing elements.
77 |     alpha is the optimal weight vector for the KRR step.
78 |     mu is the optimal weight for linearly combining the kernels"""
79 |     kernelMat = setKernelMat(kernels,mu)
80 |     return np.dot(alpha,kernelMat)
81 | 
82 | def setKernelMat(kernels,mu):
83 |     """Combine linarly the kernels with weights mu."""
84 |     n,m = kernels[0].shape
85 |     kernelMat = np.zeros((n,m),dtype=np.float64)
86 | 
87 |     for it,kernel in enumerate(kernels):
88 |         kernelMat += mu[it] * kernel
89 |     return kernelMat
90 | 


--------------------------------------------------------------------------------
/tools/krr-test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | import argparse
 4 | import numpy as np
 5 | import math
 6 | 
 7 | bufsize = 1000
 8 | def main(kernels, props, weights, kweights, csi, noidx=False):
 9 |     if kweights == "":
10 |         kweights = np.ones(len(kernels))
11 |     else:
12 |         kweights = np.asarray(kweights.split(","),float)
13 |     kweights /= kweights.sum()
14 |     # reads kernel(s)
15 |     csi=float(csi)    
16 |     wvec = np.loadtxt(weights)
17 |     tc = np.asarray(wvec[:,0], float)
18 |     icols = np.asarray(wvec[:,1], int)
19 |     irows = np.asarray(wvec[:,2], int)
20 |     
21 |     p=np.loadtxt(props)
22 |     
23 |     
24 |     print "# Using kernels ", kernels, " with weights ", kweights
25 |     fk=[]
26 |     for i in xrange(0,len(kernels)):
27 |         fk.append(open(kernels[i], "r")    )
28 |         # skips comments
29 |         f=fk[i]
30 |         # determines size of reference set
31 |         fstart = f.tell()
32 |         fline = f.readline()
33 |         while fline[0]=='#': 
34 |             fstart = f.tell()
35 |             fline=f.readline()
36 |         nref = len(fline.split())
37 |         f.seek(fstart)
38 |     
39 |     # average counters
40 |     testmae = 0
41 |     trainmae = 0
42 |     testrms = 0
43 |     trainrms = 0
44 |     testsup = 0
45 |     trainsup = 0
46 |     ntest = 0
47 |     ntrain = 0 
48 |     ktot = 0
49 |     while True:
50 |         for i in xrange(0,len(kernels)):
51 |             chunk = np.fromfile(fk[i], dtype="float",count=(nref)*bufsize, sep=" ")
52 |             if len(chunk) ==0: break
53 |             nk = len(chunk)/(nref)
54 |             if i==0:
55 |                 kij = chunk.reshape((nk,nref))[:,icols]*kweights[i]
56 |             else:
57 |                 kij += chunk.reshape((nk,nref))[:,icols]*kweights[i]
58 |         if len(chunk) ==0: break
59 |         kij = kij**csi
60 |         krp = np.dot(kij,tc)
61 |         for k in xrange(nk):
62 |             if (not noidx) and (ktot+k in irows):
63 |                 lab = "TRAIN"
64 |                 trainmae += abs(krp[k] - p[ktot+k])
65 |                 trainrms += (krp[k] - p[ktot+k])**2
66 |                 trainsup = max(trainsup, abs(krp[k] - p[ktot+k]))
67 |                 ntrain += 1
68 |             else:
69 |                 lab = "TEST"
70 |                 testmae += abs(krp[k] - p[ktot+k])
71 |                 testrms += (krp[k] - p[ktot+k])**2
72 |                 testsup = max(testsup, abs(krp[k] - p[ktot+k]))
73 |                 ntest +=1 
74 |             print k+ktot, p[ktot+k], krp[k], lab
75 |         ktot += nk
76 |     if ntrain >0:
77 |         print "# Train points MAE=%f  RMSE=%f  SUP=%f" % (trainmae/ntrain, np.sqrt(trainrms/ntrain), trainsup)
78 |     if ntest>0:
79 |         print "# Test points  MAE=%f  RMSE=%f  SUP=%f " % (testmae/ntest, np.sqrt(testrms/ntest), testsup)    
80 | 
81 | if __name__ == '__main__':
82 |     parser = argparse.ArgumentParser(description="""Computes KRR predictions from a weights vector obtained from a previous run of krr.py with --saveweights.""")
83 |     parser.add_argument("--kernels", nargs='+', type=str, help="Kernel matrix (more than one can be read!)")  
84 |     parser.add_argument("--props", default="", type=str, help="Property file (for cross-check)")
85 |     parser.add_argument("--kweights", default="1", type=str, help="Comma-separated list of kernel weights (when multiple kernels are provided)")
86 |     parser.add_argument("--weights", default="", type=str, help="KRR weights corresponding to the reference fit")
87 |     
88 |     #parser.add_argument("kernel", nargs=1, help="Kernel matrix")      
89 |     #parser.add_argument("weights", nargs=1, help="Weights vector") 
90 |     #parser.add_argument("props", nargs=1, help="Property file (for cross-check)")
91 |     parser.add_argument("--csi", type=float, default='1.0', help="Kernel scaling")
92 |     parser.add_argument("--noidx", action="store_true", help="Ignores indices and treats all points as testing points")
93 |     
94 |     args = parser.parse_args()
95 |     
96 |     main(kernels=args.kernels, props=args.props, weights=args.weights, kweights=args.kweights, csi=args.csi, noidx=args.noidx)
97 | 
98 | 


--------------------------------------------------------------------------------
/libmatch/lap/murty.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from copy import copy, deepcopy
  3 | import sys
  4 | from lap import best_pairs
  5 | import numpy as np
  6 | 
  7 | __all__ = [ "cost_list" ]
  8 | 
  9 | myINF=1e100
 10 | def main(filename='tmp', mxbest=20, mxdelta=50):
 11 |   a=np.loadtxt(filename)
 12 |   costs=best_costs(a,mxbest=int(mxbest), mxdelta=float(mxdelta))
 13 |   for x in costs:
 14 |     print x
 15 | 
 16 | def factorial(n):
 17 |   f = 1
 18 |   for i in xrange(2,n+1):
 19 |      f*=i
 20 |   return f
 21 | 
 22 | def cost_list(matrix, mxdelta=None, mxbest=None):
 23 |   #nbest: number of best costs 
 24 |   best_costs=[] #output array containing costs
 25 |   node_rule_list=[] # the node gen rule list
 26 |   cost_list=[] #list containing total costs
 27 |   rsrv_list=[] # list containing the costs which are not included in subset
 28 | 
 29 |   mxfac = factorial(len(matrix))
 30 |   if mxbest is None or mxbest > mxfac: 
 31 |      mxbest = mxfac
 32 | 
 33 |   # The best cost  by hugarian method
 34 |   hun=best_pairs(matrix)
 35 |   cost=0.0
 36 |   for pair in hun:
 37 |      cost+=matrix[pair[0],pair[1]]
 38 |   verybest = cost  
 39 |   best_costs.append(cost)
 40 |   if mxdelta == 0.0: return best_costs
 41 | 
 42 |   # Murty's algorithm for finding next k best costs 
 43 |   partition_list(matrix,node_rule_list,cost_list,rsrv_list)
 44 |   for k in range(1,mxbest):
 45 |     min_cost=min(cost_list)
 46 |     if not mxdelta is None:
 47 |        if min_cost - verybest > mxdelta: break
 48 |        for i in xrange(len(cost_list)):
 49 |           if cost_list[i] - verybest > mxdelta: cost_list[i]=myINF
 50 |     best_costs.append(min_cost)
 51 |     min_index=cost_list.index(min_cost)
 52 |     partition_list(matrix,node_rule_list,cost_list,rsrv_list)
 53 |     cost_list[min_index]=myINF
 54 |   return best_costs
 55 | 
 56 | 
 57 | def partition_list(matrix_orig,node_rule_list,cost_list,rsrv_list):
 58 |   try:
 59 |     min_index=cost_list.index(min(cost_list))
 60 |     rsrv_cost=rsrv_list[min_index]
 61 |   except:
 62 |     rsrv_cost=0.0
 63 |   try:
 64 |     node_rule=node_rule_list[min_index]
 65 |   except:
 66 |     node_rule=copy(node_rule_list)
 67 | 
 68 |   matrix,rsrv_cost=gen_partition(matrix_orig,node_rule)
 69 |   pair=[]
 70 |   assignments=lap(matrix)
 71 | 
 72 |   for m in range (0,len(assignments)-1):
 73 |     rule=copy(node_rule)
 74 |     pair=assignments[m]
 75 |     rule.append([-pair[0]-1,-pair[1]-1]) # - sign to denote the inf value position. -1 added to avoid confusion of +/- 0. 
 76 | 
 77 |     for x in range(0,m):
 78 |       rule.append([assignments[x][0],assignments[x][1]])
 79 | 
 80 |     node,rsrv_cost=gen_partition(matrix_orig,rule)
 81 |     hun=best_pairs(node)
 82 |     node_rule_list.append(rule)
 83 |     cost=0.0
 84 |     for pair in hun:
 85 |       cost+=node[pair[0],pair[1]]
 86 |     cost_list.append(cost+rsrv_cost)
 87 |     rsrv_list.append(rsrv_cost)
 88 |   return 
 89 | 
 90 | def gen_partition(matrix,rule):
 91 |   node=copy(matrix)
 92 |   rsrv_cost=0.0
 93 | #  print rule
 94 |   if (len(rule)==0): 
 95 |     return node,rsrv_cost # node is the main matrix itself
 96 |   # indices: indices of the negative pairs. Number of the appearance 
 97 |   # of negative indicies in the rule suggests the steps in gen the partition.
 98 |   indices=[i for i in range(len(rule)) if rule[i][0]<0]
 99 | #  print "indices=",indices
100 |   nstep=len(indices)
101 |   indices.append(len(rule))
102 |   cost=0.0
103 |   for step in range(nstep):
104 |      step_rule=rule[indices[step]:indices[step+1]]
105 |      #print "step=",step_rule
106 |      node, cost=gen_node(node,step_rule)
107 |      rsrv_cost+=cost
108 |   return node,rsrv_cost
109 | 
110 | 
111 | def gen_node(matrix,rule):
112 |  node=copy(matrix)
113 |  rsrv_cost=0.0
114 |  pair=rule[0]
115 |  node[-1-pair[0],-1-pair[1]]=myINF
116 |  row_list=[]
117 |  col_list=[]
118 |  for pair in rule[1:]:
119 |      row_list.append(pair[0])
120 |      col_list.append(pair[1])
121 |      rsrv_cost+=node[pair[0],pair[1]]
122 |  node=remove_row_col(node,row_list,col_list)
123 |  return node,rsrv_cost 
124 | 
125 | def remove_row_col(matrix,row,col):
126 | #sorting in descending order so that it is easy to remove
127 |   row.sort(reverse=True)
128 |   col.sort(reverse=True)
129 |   for r in row:
130 |     matrix=np.delete(matrix,(r), axis=0)
131 |   for c in col:
132 |     matrix=np.delete(matrix,(c), axis=1)
133 |   return matrix
134 | 
135 | if __name__ == "__main__":
136 |     main(*sys.argv[1:])
137 | 


--------------------------------------------------------------------------------
/tools/krr-Cortes-test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | import argparse
  4 | import numpy as np
  5 | import math
  6 | 
  7 | 
  8 | def main(kernelFilenames, propFilename, weightFilenames, csi,bufsize):
  9 |         
 10 |     suffix = weightFilenames[0].split('.')[-1]
 11 |     if suffix == 'alpha': 
 12 |         alphaidx = 0
 13 |         muidx = 1
 14 |     elif suffix == 'mu':
 15 |         alphaidx = 1
 16 |         muidx = 0
 17 |     else:
 18 |         raise ValueError("Suffix of the weights do not match with alpha/mu.")
 19 |     # Unpack the alpha's and  their corresponding lines/columns in the kernel matrices
 20 |     wvec = np.loadtxt(weightFilenames[alphaidx])
 21 |     alpha = np.asarray(wvec[:,0], float)
 22 |     icols = np.asarray(wvec[:,1], int)
 23 |     irows = np.asarray(wvec[:,2], int)
 24 |     # Unpack the mu's 
 25 |     mu = np.loadtxt(weightFilenames[muidx])
 26 |     
 27 |     p = np.loadtxt(propFilename, dtype=np.float64, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0)
 28 |     
 29 |     kernelFiles = []
 30 |     for it,kernelFilename in enumerate(kernelFilenames):
 31 |         kernelFiles.append(open(kernelFilename, "r"))
 32 |         # skips comments
 33 |         fstart = kernelFiles[it].tell()
 34 |         fline = kernelFiles[it].readline()
 35 |         while fline[0]=='#': 
 36 |             fstart = kernelFiles[it].tell()
 37 |             fline = kernelFiles[it].readline()
 38 |         nref = len(fline.split())
 39 |         kernelFiles[it].seek(fstart)
 40 |     
 41 |     # average counters
 42 |     testmae = 0
 43 |     trainmae = 0
 44 |     testrms = 0
 45 |     trainrms = 0
 46 |     testsup = 0
 47 |     trainsup = 0
 48 |     ntest = 0
 49 |     ntrain = 0 
 50 |     ktot = 0
 51 |     chunks = [[] for it in range(len(kernelFiles))]
 52 |     kernels = [[] for it in range(len(kernelFiles))]
 53 | 
 54 |     while True:
 55 |         # Read chunks of the several kernel matrices
 56 |         for it,kernelFile in enumerate(kernelFiles):
 57 |             chunks[it] = np.fromfile(kernelFile, dtype="float",count=(nref)*bufsize, sep=" ")
 58 |             nk = len(chunks[it])/(nref) 
 59 |             kernels[it] = chunks[it].reshape((nk,nref))[:,icols]
 60 |             kernels[it] = kernels[it]**csi[it]
 61 |         n,m = kernels[0].shape
 62 |         # Condition to leave the loop. if the chunk has no lines
 63 |         if n == 0: break
 64 |         # Make the composite kernel matrix using the mu
 65 |         Kernel = np.zeros((n,m),dtype=np.float64)
 66 |         for it,kernel in enumerate(kernels):
 67 |             Kernel += mu[it] * kernel
 68 |         # Create the predictions
 69 |         krp = np.dot(Kernel,alpha)
 70 |         
 71 |         # output the different errors on the training set and the testing set
 72 |         for k in xrange(nk):
 73 |             if  ktot+k in irows:
 74 |                 lab = "TRAIN"
 75 |                 trainmae += abs(krp[k] - p[ktot+k])
 76 |                 trainrms += (krp[k] - p[ktot+k])**2
 77 |                 trainsup = max(trainsup, abs(krp[k] - p[ktot+k]))
 78 |                 ntrain += 1
 79 |             else:
 80 |                 lab = "TEST"
 81 |                 testmae += abs(krp[k] - p[ktot+k])
 82 |                 testrms += (krp[k] - p[ktot+k])**2
 83 |                 testsup = max(testsup, abs(krp[k] - p[ktot+k]))
 84 |                 ntest +=1 
 85 |             print k+ktot, p[ktot+k], krp[k], lab
 86 |         ktot += nk
 87 |     print "# Train points MAE={:.4e}  RMSE={:.4e}  SUP={:.4e}".format(trainmae/ntrain, np.sqrt(trainrms/ntrain), trainsup)
 88 |     print "# Test points  MAE={:.4e}  RMSE={:.4e}  SUP={:.4e} ".format(testmae/ntest, np.sqrt(testrms/ntest), testsup)
 89 |     # print "# Train points MAE=%f  RMSE=%f  SUP=%f" % (trainmae/ntrain, np.sqrt(trainrms/ntrain), trainsup)
 90 |     # print "# Test points  MAE=%f  RMSE=%f  SUP=%f " % (testmae/ntest, np.sqrt(testrms/ntest), testsup)    
 91 | 
 92 | if __name__ == '__main__':
 93 |     parser = argparse.ArgumentParser(description="""Computes KRR predictions from a weights vector obtained from a previous run of krr.py with --saveweights.""")
 94 |     parser.add_argument("kernels", nargs=1, help="Kernel matrices. List of coma separated file names.")            
 95 |     parser.add_argument("weights", nargs=1, help="Weights vector") 
 96 |     parser.add_argument("props", nargs=1, help="Property file name (for cross-check)")
 97 |     parser.add_argument("--csi", type=str, default='1.0', help="Kernel scaling")
 98 |     parser.add_argument("--bufsize", type=int, default=1000, help="Size of the buffer")
 99 |     
100 |     args = parser.parse_args()
101 |     kernelFilenames = args.kernels[0].split(',')
102 |     weightsFilenames = args.weights[0].split(',')
103 |     a = args.csi.split(',')
104 |     if len(a) != len(kernelFilenames):
105 |         raise ValueError("The number of kernel file names and elements of csi must be equal.")
106 |     csi = np.zeros(len(a),dtype=np.float64)
107 |     for it,item in enumerate(a):
108 |         csi[it] = float(item)
109 |     
110 |     main(kernelFilenames=kernelFilenames, propFilename=args.props[0], weightFilenames=weightsFilenames, csi=csi, bufsize=args.bufsize)
111 | 
112 | 


--------------------------------------------------------------------------------
/libmatch/lap/perm.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <iomanip> 
  3 | #include <vector>
  4 | #include <sstream>
  5 | #include <string>
  6 | #include <algorithm>
  7 | #include <random>
  8 | #include <cmath>
  9 | #include <stdlib.h> 
 10 | #include <time.h> 
 11 | 
 12 | template <typename T>
 13 | class Matrix
 14 | {
 15 |         std::vector<T> inner_;
 16 |         unsigned int dimx_, dimy_;
 17 | 
 18 | public:
 19 |         unsigned int size() const { return dimx_; } 
 20 | 
 21 |         Matrix (unsigned int dimx, unsigned int dimy)
 22 |                 : dimx_ (dimx), dimy_ (dimy)
 23 |         {
 24 |                 inner_.resize (dimx_*dimy_);
 25 |         }
 26 | 
 27 |         inline T operator()(unsigned int x, unsigned int y) const
 28 |         {
 29 |                 if (x >= dimx_ || y>= dimy_)
 30 |                         throw 0; // ouch
 31 |                 return inner_[dimx_*y + x];
 32 |         }
 33 |         
 34 |         inline T& operator()(unsigned int x, unsigned int y)
 35 |         {
 36 |                 if (x >= dimx_ || y>= dimy_)
 37 |                         throw 0; // ouch
 38 |                 return inner_[dimx_*y + x];
 39 |         }
 40 |         
 41 | };
 42 | 
 43 | double fact(int n)
 44 | {
 45 |    double fn=1.0;
 46 |    for (int i=2; i<=n; ++i) fn*=double(i);
 47 |    return fn;
 48 | }
 49 | 
 50 | double perm(const Matrix<double>& mtx, double eps=1e-4)
 51 | {
 52 |     int n=mtx.size(); std::vector<int> idx(n);
 53 |     
 54 |     for (int i=0; i<n; ++i) idx[i]=i;
 55 |     double pi, prm=0, prm2=0, fn=fact(n), ti;
 56 |     int i=0, istride=0, pstride=n*100; 
 57 |     while (true)
 58 |     {
 59 |         // combines shuffles and cyclic permutations (which are way cheaper!)
 60 |         if (i%n==0) std::random_shuffle(idx.begin(),idx.end());
 61 |         else { for (int i=0; i<n; ++i) idx[i]=(idx[i]+1)%n; }
 62 |         
 63 |         //for (int j=0; j<n; ++j) std::cerr<<idx[j]<<" ";        std::cerr<<"\n";
 64 |         
 65 |         // computes the product of elements for the selected permutation
 66 |         pi = mtx(0, idx[0]);;
 67 |         for (int j=1; j<n; ++j)
 68 |             pi *= mtx(j, idx[j]);
 69 |         
 70 |         // accumulates mean and mean square
 71 |         prm += pi;
 72 |         prm2 += pi*pi;
 73 |         ++i;
 74 |         if (i==pstride)  // check if we are converged
 75 |         {
 76 |             ++istride; i=0; ti=double(istride)*double(pstride);
 77 |             double err=sqrt((prm2-prm*prm/ti)/ti/(ti-1) ) / (prm/ti);
 78 |             //std::cerr <<istride<< " "<<std::setprecision(10)<<fn*prm/ti<< " "<<err<< "\n";
 79 |             if (err<eps) break;
 80 |         }
 81 |     }
 82 |     return prm/ti*fact(n);
 83 | }
 84 | 
 85 | /*  experimental (correlated permutations) */
 86 | std::mt19937 generator;
 87 | std::uniform_real_distribution<double> distribution(0,1);
 88 | auto rndu=std::bind(distribution, generator);
 89 | 
 90 | void shuffle(std::vector<int>& idx)
 91 | {
 92 |     int swp, n=idx.size();
 93 |     for (int i=n-1; i>0; --i)
 94 |     {
 95 |         int j = rndu()*(i+1);
 96 |         swp=idx[i];
 97 |         idx[i]=idx[j];
 98 |         idx[j]=swp;
 99 |     }    
100 | }
101 | 
102 | void shuffle2(std::vector<int>& idx, Matrix<long>& cshuf)
103 | {
104 |     int swp, n=idx.size();
105 |     static std::vector<double> cf;
106 |     double pf;
107 |     if (cf.size()!=n) cf.resize(n);
108 |     int cmin, j=0;
109 |     for (int i=n-1; i>0; --i)
110 |     {
111 |         cmin = cshuf(i,idx[0]);
112 |         for (int k=1; k<=i; ++k) if (cshuf(i,idx[k])<cmin) cmin=cshuf(i,idx[k]);
113 |         cf[0]=1.0/(1.0+cshuf(i,idx[0])-cmin);
114 |         for (int k=1; k<=i; ++k) cf[k]=cf[k-1]+1.0/(1.0+cshuf(i,idx[k])-cmin);
115 |         
116 |         pf=rndu()*cf[i];
117 |         j=i; for (int k=0; k<i; ++k) if (pf<cf[k]) { j=k; break; }        
118 |         swp=idx[i];
119 |         idx[i]=idx[j];
120 |         idx[j]=swp;
121 |     }    
122 |     for (int k=0; k<n; ++k) cshuf(k, idx[k])++;
123 |     //std::cerr<<" ====== cshuf ====== \n";  for (int i=0; i<n; ++i) { for (int j=0; j<n; ++j) std::cerr<<cshuf(i,j)<<" "; std::cerr<<"\n";}
124 | }
125 | 
126 | double perm2(const Matrix<double>& mtx)
127 | {
128 |     int n=mtx.size(); 
129 |     Matrix<long> cshuf(n,n); 
130 |     
131 |     std::vector<int> idx(n);
132 |     for (int i=0; i<n; ++i) idx[i]=i;
133 |     for (int i=0; i<n; ++i) for (int j=0; j<n; ++j) cshuf(i,j)=0;
134 |     
135 |     double pi, prm=0, prm2=0, fn=fact(n);
136 |     int i=0, pstride=n*2000;     
137 |     while (true)
138 |     {
139 |         if (i%n==0) shuffle2(idx, cshuf);
140 |         else { for (int i=0; i<n; ++i) idx[i]=(idx[i]+1)%n; }
141 |         
142 |         //for (int j=0; j<n; ++j) std::cerr<<idx[j]<<" ";        std::cerr<<"\n";
143 |         pi = 1.;
144 |         for (int j=0; j<n; ++j)
145 |             pi *= mtx(j, idx[j]);
146 |             
147 |         prm += pi;
148 |         prm2 += pi*pi;
149 |         i+=1;
150 |         if (i%pstride==0) 
151 |         {
152 |             double err=sqrt((prm2-prm*prm/i)/i/(i-1) ) / (prm/i);
153 |             std::cout <<i<< " "<<std::setprecision(10)<<fn*prm/i<< " "<<err<< "\n";
154 |             if (err<1e-4) break;
155 |         }
156 |     }
157 |     return prm/i*fact(n);
158 | }
159 | 
160 | /* end experimental */
161 | 
162 | int main(int argc, char *argv[])
163 | {
164 |     int n;
165 |     std::istringstream iargi(argv[1]);
166 |     iargi>>n;
167 | 
168 |     srand(1234);
169 |     Matrix<double> mtx(n,n);
170 |    
171 |     for (int i=0; i<n; ++i) for (int j=0; j<n; ++j)
172 |         std::cin >> mtx(i,j); 
173 |     
174 |     double pp = perm(mtx,1e-4);
175 |     std::cout << "PERMANENT "<<std::setprecision(10)<<pp<<"\n";
176 |     
177 | }
178 | 
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/tools/select_landmarks.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # select landmarks based on a kernel matrix 
  4 | # given a  vector of the observed properties it also output only properties of landmarks.
  5 | # $  select-landmarks.py <kernel.dat> [ options ]
  6 | 
  7 | import argparse
  8 | import numpy as np
  9 | import sys
 10 | 
 11 | def segfind(cp, cs):
 12 |     a = 0
 13 |     b = len(cp)    
 14 |     while (b-a)>1:
 15 |         c = int((b+a)/2)
 16 |         if cs<cp[c]:
 17 |             b = c
 18 |         else:
 19 |             a = c
 20 |     if cs < cp[a]:
 21 |         return a
 22 |     else:
 23 |         return b
 24 |         
 25 | def cur(kernel,tol=1.0e-4):
 26 | 
 27 |     U, S, V = np.linalg.svd(kernel)
 28 |     rank = list(S > tol).count(True) / 2
 29 |     S[rank:] = 0.0
 30 |     S = np.diag(S)
 31 |     Ap = np.dot(np.dot(U,S),V)
 32 |     p = np.sum(V[0:rank,:]**2, axis=0) / rank
 33 |     return p
 34 | 
 35 | def randomsubset(ndata, nsel, plist=None):    
 36 |     if nsel > ndata:
 37 |         raise ValueError("Cannot select data out of thin air")
 38 |     if nsel == ndata: 
 39 |         return np.asarray(range(ndata))
 40 |     cplist = np.zeros(ndata)
 41 |     if plist is None:
 42 |         plist = np.ones(ndata, float)
 43 |         
 44 |     # computes initial cumulative probability distr.
 45 |     cplist[0]=plist[0] 
 46 |     for i in xrange(1,ndata):
 47 |         cplist[i]=cplist[i-1]+plist[i]
 48 |     
 49 |     rdata = np.zeros(nsel, int)
 50 |     for i in xrange(nsel):
 51 |         csel = np.random.uniform() * cplist[-1]
 52 |         isel = segfind(cplist, csel)
 53 |         rdata[i] = isel
 54 |         psel = plist[isel]
 55 |         for j in xrange(isel,ndata):
 56 |             cplist[j] -= psel
 57 |     return rdata
 58 | 
 59 | def farthestPointSampling(kernel,nbOfFrames,nbOfLandmarks,seed=10,initalLandmark=None,listOfDiscardedPoints=None,verbose=False):
 60 |     np.random.seed(seed)
 61 |     LandmarksIdx = np.zeros(nbOfLandmarks,int)
 62 |     if listOfDiscardedPoints is None:
 63 |         listOfDiscardedPoints = []
 64 |     if initalLandmark is None:
 65 |         isel = int(np.random.uniform()*nbOfFrames)
 66 |         while isel in listOfDiscardedPoints:
 67 |             isel=int(np.random.uniform()*nbOfFrames)
 68 |     else:
 69 |         isel = initalLandmark
 70 |     
 71 |     diag = np.diag(kernel)
 72 |     
 73 |     ldist = 1e100*np.ones(nbOfFrames,float)
 74 |     
 75 |     LandmarksIdx[0] = isel
 76 |     nontrue = np.setdiff1d(range(nbOfFrames), listOfDiscardedPoints)
 77 |     
 78 |     for nsel in xrange(1,nbOfLandmarks):
 79 |         dmax = 0*np.ones(nbOfFrames,float)
 80 |         imax = 0       
 81 |         distLine = np.sqrt(kernel[isel,isel] + diag - 2 * kernel[isel,:])
 82 |         
 83 |         dsel = distLine[nontrue]
 84 |         
 85 |         low = dsel < ldist
 86 |         ldist[low] = dsel[low]
 87 |         larg = ldist > dmax
 88 |         dmax[larg] = ldist[larg]
 89 |         
 90 |         isel = dmax.argmax()
 91 |         LandmarksIdx[nsel] = isel
 92 |         if verbose is True:
 93 |             print "selected ", isel, " distance ", dmax[isel]
 94 |             
 95 |     return LandmarksIdx
 96 | 
 97 | def main(kernel, props, mode, nland,output="distance", prefix=""):
 98 | 
 99 |     if prefix=="" : prefix=kernel[0:-2]     
100 |     # reads kernel
101 |     kij=np.loadtxt(kernel)
102 |     nel=len(kij) 
103 |     # reads properties if given
104 |     if props!="":
105 |        p = np.loadtxt(props)
106 |        if len(p)!=nel : 
107 |          print "ERROR ! incomplete set of properties"
108 |          exit()
109 |     
110 |     np.set_printoptions(threshold=1000)
111 |     nland = int(nland)
112 |     
113 |     psel = np.ones(nel,float)
114 |     if mode == "random":
115 |         LandmarksIdx = randomsubset(nel, nland, psel)
116 |     elif mode == "fps":  
117 |         LandmarksIdx = farthestPointSampling(kij,nel,nland)
118 |         
119 | 
120 |     filand=prefix+"-landmark"+str(nland)+".index"
121 |     np.savetxt(filand,LandmarksIdx,fmt='%1.1i')
122 |     if props != "":
123 |         lp = p[LandmarksIdx]
124 |         fpland=prefix+"-landmark"+str(nland)+".prop"
125 |         np.savetxt(fpland,lp)
126 | 
127 |     if output=="kernel":    
128 |         print "Writing Kernels"
129 |         lk = kij[LandmarksIdx][:,LandmarksIdx].copy()
130 |         fkland=prefix+"-landmark"+str(nland)+".k"
131 |         np.savetxt(fkland,lk)
132 |         foos=prefix+"-landmark"+str(nland)+"-OOS.k"
133 |         koos=kij[:,LandmarksIdx]
134 |         np.savetxt(foos,koos)
135 | 
136 |     if output=="distance" :
137 |         print "Writing Distances"
138 |         sim=np.zeros((nel,nel))
139 |         for i in range(nel):
140 |             for j in range(i):
141 |                 sim[i,j]=sim[j,i]=np.sqrt(kij[i,i]+kij[j,j]-2*kij[i,j])
142 |         ld=sim[LandmarksIdx][:,LandmarksIdx].copy()   
143 |         fsimland=prefix+"-landmark"+str(nland)+".sim"
144 |         np.savetxt(fsimland,ld)
145 |         foos=prefix+"-landmark"+str(nland)+"-OOS.sim"
146 |         simoos=sim[:,LandmarksIdx]
147 |         np.savetxt(foos,simoos)
148 |          
149 |     
150 | 
151 | if __name__ == '__main__':
152 |     parser = argparse.ArgumentParser(description="""Do landmarks selction based on a kernel matrix and export square matrix for landmarks and rectangular matrix.""")
153 |                            
154 |     parser.add_argument("kernel", nargs=1, help="Kernel matrix")      
155 |     parser.add_argument("--props",type=str, default="", help="Property file")
156 |     parser.add_argument("--mode", type=str, default="random", help="landmark selection (e.g. --mode  random / fps ")      
157 |     parser.add_argument("--output", type=str, default="distance", help="what to output kernel/distance ")      
158 |     parser.add_argument("--nland", type=int, default=1, help="number of landmarks")
159 |     parser.add_argument("--prefix",  type=str, default="", help="prefix of the output files")    
160 |     
161 |     args = parser.parse_args()
162 |     
163 |     main(kernel=args.kernel[0], props=args.props, mode=args.mode,nland=args.nland,output=args.output, prefix=args.prefix)
164 | 
165 | 


--------------------------------------------------------------------------------
/libmatch/lap/lap.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import numpy as np
  3 | import sys, time
  4 | from copy import copy
  5 | 
  6 | __all__ = [ "best_cost", "lcm_best_cost", "lcm_best_cost1", "lcm_best_cost2", "best_pairs" ]
  7 | 
  8 | try: 
  9 |     import hungarian
 10 |     def linear_assignment(matrix):
 11 |         m=copy(matrix)
 12 |         assign=hungarian.lap(m)
 13 |         pair=[]
 14 |         for x in range(len(assign[0])):
 15 |             pair.append([x,assign[0][x]])
 16 |         return pair
 17 | except:
 18 |     from munkres import linear_assignment
 19 |     print >> sys.stderr, "WARNING! fast hungarian library is not available \n"  
 20 | def best_pairs(matrix):
 21 |     return linear_assignment(matrix)
 22 | 
 23 | def best_cost(matrix):
 24 |   hun=linear_assignment(matrix)
 25 |   cost=0.0
 26 |   #print hun
 27 |   for pair in hun:
 28 |      cost+=matrix[pair[0],pair[1]]
 29 |   return cost
 30 | 
 31 | def gcd(a,b):
 32 |    if (b>a): a,b = b, a
 33 |    
 34 |    while (b):  a, b = b, a%b
 35 |    
 36 |    return a
 37 |    
 38 | def lcm(a,b):
 39 |    return a*b/gcd(b,a)
 40 | 
 41 | def lcm_index(x, y):
 42 |     z=lcm(x,y)
 43 |     
 44 |     ix = np.asarray(xrange(x),int)
 45 |     iy = np.asarray(xrange(y),int)
 46 |     
 47 |     lx = np.zeros(z,int)
 48 |     ly = np.zeros(z,int)
 49 |     for i in xrange(z/x):
 50 |         lx[i*x:(i+1)*x]=ix
 51 |     for i in xrange(z/y):
 52 |         ly[i*y:(i+1)*y]=iy
 53 |     return (lx,ly)
 54 |        
 55 | def lcm_matrix(m):
 56 |     x,y = m.shape
 57 |     if x==y: return m   # no-op
 58 |     
 59 |     lx, ly = lcm_index(x,y)
 60 |     return m[np.ix_(lx, ly)]
 61 | 
 62 | myinf = 1e2
 63 | 
 64 | def lcm_best_cost(mtx):
 65 |     
 66 |     # heuristics
 67 |     if (lcm(mtx.shape[0],mtx.shape[1]) < 1000):
 68 |         return lcm_best_cost1(mtx)
 69 |     else:
 70 |         return lcm_best_cost2(mtx, 1e-5)
 71 | 
 72 | def lcm_best_cost1(mtx):    
 73 |     nmtx = lcm_matrix(mtx)
 74 |           
 75 |     bp = best_pairs(nmtx)
 76 |     tc = 0
 77 |     
 78 |     for p in bp:
 79 |         pc = nmtx[p[0],p[1]]        
 80 |         if pc<myinf: tc += nmtx[p[0],p[1]]    
 81 |     return tc
 82 | 
 83 | def lcm_best_cost2(gmtx, thresh = 1e-10):
 84 |     # tresh: threshold for getting into the merging of two blocks
 85 |     mtx = gmtx
 86 |     xm, ym = mtx.shape 
 87 |     if xm==ym:
 88 |         return best_cost(mtx)
 89 |     if xm > ym:
 90 |         mtx = gmtx.T
 91 |         xm, ym = mtx.shape 
 92 |     mm = lcm(xm, ym)
 93 |     lx, ly = lcm_index(xm,ym)    
 94 |     
 95 |     # print xm, ym
 96 |     sm = min(xm, ym)
 97 |     np.set_printoptions(linewidth=1000,threshold=10000)
 98 |     tc = 0    
 99 |     bcl = []
100 |     blocks = []    
101 |     for i in range(mm/sm):
102 |         blx=list(lx[range(i*sm,(i+1)*sm)])
103 |         bly=list(ly[range(i*sm,(i+1)*sm)])        
104 |         blocks.append((blx,bly))
105 |         subm = mtx[np.ix_(blx,bly)]
106 |         
107 |         #print subm
108 |         bp = best_pairs(subm)
109 |       
110 |         bc = 0
111 |         for p in bp:
112 |             pc = subm[p[0],p[1]]
113 |             bc += pc  
114 |         bcl.append(bc)
115 |     
116 |     nxc = 0 
117 |     ntry = 0
118 |         
119 |     merged = True
120 |     nb = len(blocks)
121 |     tainted = np.ones((nb,nb), int)
122 |     tainted[1,0]=1 # make sure we get in once!
123 |     
124 |     while merged and np.triu(tainted,1).sum()>0:     
125 |         #print "estimate cost ", sum(bcl), "n. exchanges", nxc, "/", ntry     
126 |         merged = False
127 |         
128 |         for i in xrange(nb):                  
129 |             blxi = blocks[i][0]
130 |             blyi = blocks[i][1]
131 |             ni = len(blxi)                
132 |             for j in range(i+1,nb):
133 |                 if tainted[i,j]==0: continue  
134 |                 # print np.tril(tainted,-1)              
135 |                 ntry += 1                        
136 |                 blxj = blocks[j][0]
137 |                 blyj = blocks[j][1]
138 |                 nj = len(blxj)                
139 |                 blx = blxi+blxj
140 |                 bly = blyi+blyj
141 |                 subm = mtx[np.ix_(blx,bly)]
142 |                 bp = best_pairs(subm)
143 |           
144 |                 bc = 0
145 |                 for p in bp:
146 |                     pc = subm[p[0],p[1]]
147 |                     bc += pc  
148 |                     
149 |                 if (bcl[i]+bcl[j])/bc -1  > thresh:
150 |                     # print "MERGING %d,%d: %f+%f=%f >%f\n" %(i,j,bcl[i],bcl[j],bcl[i]+bcl[j],bc)
151 |                     blyi = []
152 |                     blyj = []                    
153 |                     ci = 0
154 |                     for ti in xrange(ni):
155 |                         blyi.append(bly[bp[ti][1]])
156 |                         ci += subm[bp[ti][0],bp[ti][1]]                        
157 |                     cj = 0
158 |                     for tj in xrange(nj):
159 |                         blyj.append(bly[bp[ni+tj][1]])
160 |                         cj += subm[bp[ni+tj][0],bp[ni+tj][1]]                        
161 |                     #print "after", nblyi, nblyj
162 |                     #~ subm = mtx[np.ix_(blxi,nblyi)]
163 |                     #~ ci = best_cost(subm)
164 |                     #~ subm = mtx[np.ix_(blxj,nblyj)]
165 |                     #~ cj = best_cost(subm)
166 |                     #~ subm = mtx[np.ix_(blxi+blxj,nblyi+nblyj)]
167 |                     #~ ncc = best_cost(subm)
168 |                     # print "new cost", ci+cj, bcl[i]+bcl[j], bc, "n. exchanges", nxc, "/", ntry
169 |                     nxc+=1
170 |                     blocks[i] = (blxi,blyi)
171 |                     blocks[j] = (blxj,blyj)
172 |                     bcl[i] = ci
173 |                     bcl[j] = cj  
174 |                     tainted[i,:] = 1
175 |                     tainted[:,j] = 1
176 |                     tainted[i,j] = 0
177 |                     merged = True
178 |                     break # it is more efficient to restart since this i has been tainted anyway
179 |                 tainted[i,j] = 0
180 |             #if merged: break  
181 |     
182 |     #print "final cost", sum(bcl), len(bcl), "total exchanges: ", nxc, "/", ntry
183 |     return sum(bcl)
184 |     
185 | if __name__ == "__main__":
186 |      
187 |     filename = sys.argv[1]
188 |     mtx=1-np.loadtxt(filename)
189 |     np.set_printoptions(linewidth=1000)
190 |     st=time.time()
191 |     new=lcm_best_cost2(mtx)
192 |     tnew = time.time()-st    
193 |     
194 |     st=time.time()
195 |     apx=lcm_best_cost2(mtx,1e-5)
196 |     tapx = time.time()-st    
197 |     
198 |     st=time.time()
199 |     ref=0
200 |     ref=best_cost(mtx)
201 |     tref = time.time()-st
202 |     
203 |     print "Reference:          ", 1-ref/len(mtx), " time: ", tref
204 |     print "New_method:         ", 1-new/len(mtx), " time: ", tnew
205 |     print "New_method(approx): ", 1-apx/len(mtx), " time: ", tapx
206 |     
207 | 


--------------------------------------------------------------------------------
/tools/alchemy_rules.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys, time,ast
 4 | from copy import copy, deepcopy
 5 | import numpy as np
 6 | import argparse
 7 | import gc 
 8 | import cPickle as pickle
 9 | 
10 | class alchemy:
11 |    def getpair(self, sa, sb):
12 |       if len(self.rules)==0: # special case when the alchemical matrix is default
13 |          if sa==sb: return 1
14 |          else: return 0  
15 |       else:
16 |           if sa<=sb and (sa,sb) in self.rules:            
17 |              return self.rules[(sa,sb)]
18 |           elif sa>sb and (sb,sa) in self.rules:
19 |              return self.rules[(sb,sa)] 
20 |           else: 
21 |              if sa==sb: return 1
22 |              else: return 0  
23 |    
24 |    def __init__(self, rules={}, mu=0):            
25 |       self.rules = rules.copy()
26 |       self.mu = mu
27 | 
28 | class alchemy_mendeleev:
29 |    def getpair(self, sa, sb):
30 |       if sa<=sb and (sa,sb) in self.rules:
31 |          return self.rules[(sa,sb)]
32 |       elif sa>sb and (sb,sa) in self.rules:
33 |          return self.rules[(sb,sa)]
34 |       else:
35 |          Elec_neg={1: 2.2, 2: 0, 3: 0.98, 4: 1.57, 5: 2.04, 6: 2.55, 7: 3.04, 8: 3.44, 9: 3.98, 10: 0, 11: 0.93, 12: 1.31, 13: 1.5, 14: 1.8, 15: 2.19, 16: 2.58, 17: 3.16, 18: 0, 19: 0.82, 20: 1, 21: 1.36, 22: 1.54, 23: 1.63, 24: 1.66, 25: 1.55, 26: 1.83, 27: 1.88, 28: 1.91, 29: 1.9, 30: 1.65, 31: 1.81, 32: 2.01, 33: 2.18, 34: 2.55, 35: 2.96, 36: 0, 37: 0.82, 38: 0.95, 39: 1.22, 40: 1.33, 41: 1.6, 42: 2.16, 43: 1.9, 44: 2.2, 45: 2.28, 46: 2.2, 47: 1.93, 48: 1.69, 49: 1.78, 50: 1.96, 51: 2.05, 52: 2.1, 53: 2.66, 54: 0, 55: 0.79, 56: 0.89, 57: 1.1, 58: 1.12, 59: 1.13, 60: 1.14, 61: 1.13, 62: 1.17, 63: 1.2, 64: 1.2, 65: 1.2, 66: 1.22, 67: 1.23, 68: 1.24, 69: 1.25, 70: 1.1, 71: 1.27, 72: 1.3, 73: 1.5, 74: 2.36, 75: 1.9, 76: 2.2, 77: 2.2, 78: 2.28, 79: 2.54, 80: 2, 81: 2.04, 82: 2.33, 83: 2.02, 84: 2, 85: 2.2, 86: 0, 87: 0.7, 88: 0.9, 89: 1.1, 90: 1.3, 91: 1.5, 92: 1.38, 93: 1.36, 94: 1.28, 95: 1.3, 96: 1.3, 97: 1.3, 98: 1.3, 99: 1.3, 100: 1.3, 101: 1.3, 102: 1.3, 103: 10.0, 104: 10.0, 105:10.0, 106: 10.0, 107: 10.0, 108: 10.0, 109: 10.0}
36 |          Elec_aff={1: 0.75420375, 2: 0.0, 3: 0.618049, 4: 0.0, 5: 0.279723, 6: 1.262118, 7: -0.07, 8: 1.461112, 9: 3.4011887, 10: 0.0, 11: 0.547926, 12: 0.0, 13: 0.43283, 14: 1.389521, 15: 0.7465, 16: 2.0771029, 17: 3.612724, 18: 0.0, 19: 0.501459, 20: 0.02455, 21: 0.188, 22: 0.084, 23: 0.525, 24: 0.67584, 25: 0.0, 26: 0.151, 27: 0.6633, 28: 1.15716, 29: 1.23578, 30: 0.0, 31: 0.41, 32: 1.232712, 33: 0.814, 34: 2.02067, 35: 3.363588, 36: 0.0, 37: 0.485916, 38: 0.05206, 39: 0.307, 40: 0.426, 41: 0.893, 42: 0.7472, 43: 0.55, 44: 1.04638, 45: 1.14289, 46: 0.56214, 47: 1.30447, 48: 0.0, 49: 0.404, 50: 1.112066, 51: 1.047401, 52: 1.970875, 53: 3.059038, 54: 0.0, 55: 0.471626, 56: 0.14462, 57: 0.47, 58: 0.5, 59: 0.5, 60: 0.5, 61: 0.5, 62: 0.5, 63: 0.5, 64: 0.5, 65: 0.5, 66: 0.5, 67: 0.5, 68: 0.5, 69: 0.5, 70: 0.5, 71: 0.5, 72: 0.0, 73: 0.322, 74: 0.815, 75: 0.15, 76: 1.0778, 77: 1.56436, 78: 2.1251, 79: 2.30861, 80: 0.0, 81: 0.377, 82: 0.364, 83: 0.942363, 84: 1.9, 85: 2.8, 86: 0.0, 87: 0.0, 88: 0.0, 89: 0.0, 90: 0.0, 91: 0.0, 92: 0.0, 93: 0.0, 94: 0.0, 95: 0.0, 96: 0.0, 97: 0.0, 98: 0.0, 99: 0.0, 100: 0.0, 101: 0.0, 102: 0.0, 103: 0.0, 104: 0.0, 105: 0.0, 106: 0.0, 107: 0.0, 108: 0.0, 109: 0.0}
37 |          Ion_nrg={1: 13.5984, 2: 24.5874, 3: 5.3917, 4: 9.3227, 5: 8.298, 6: 11.2603, 7: 14.5341, 8: 13.6181, 9: 17.4228, 10: 21.5645, 11: 5.1391, 12: 7.6462, 13: 5.9858, 14: 8.1517, 15: 10.4867, 16: 10.36, 17: 12.9676, 18: 15.7596, 19: 4.3407, 20: 6.1132, 21: 6.5615, 22: 6.8281, 23: 6.7462, 24: 6.7665, 25: 7.434, 26: 7.9024, 27: 7.881, 28: 7.6398, 29: 7.7264, 30: 9.3942, 31: 5.9993, 32: 7.8994, 33: 9.7886, 34: 9.7524, 35: 11.8138, 36: 13.9996, 37: 4.1771, 38: 5.6949, 39: 6.2173, 40: 6.6339, 41: 6.7589, 42: 7.0924, 43: 7.28, 44: 7.3605, 45: 7.4589, 46: 8.3369, 47: 7.5762, 48: 8.9938, 49: 5.7864, 50: 7.3439, 51: 8.6084, 52: 9.0096, 53: 10.4513, 54: 12.1298, 55: 3.8939, 56: 5.2117, 57: 5.5769, 58: 5.5387, 59: 5.473, 60: 5.525, 61: 5.582, 62: 5.6437, 63: 5.6704, 64: 6.1498, 65: 5.8638, 66: 5.9389, 67: 6.0215, 68: 6.1077, 69: 6.1843, 70: 6.2542, 71: 5.4259, 72: 6.8251, 73: 7.5496, 74: 7.864, 75: 7.8335, 76: 8.4382, 77: 8.967, 78: 8.9588, 79: 9.2255, 80: 10.4375, 81: 6.1082, 82: 7.4167, 83: 7.2855, 84: 8.414, 85: -1, 86: 10.7485, 87: 4.0727, 88: 5.2784, 89: 5.17, 90: 6.3067, 91: 5.89, 92: 6.1941, 93: 6.2657, 94: 6.026, 95: 5.9738, 96: 5.9914, 97: 6.1979, 98: 6.2817, 99: 6.42, 100: 6.5, 101: 6.58, 102: 6.65, 103: 4.9, 104: 6.0, 105: -1, 106: -1, 107: -1, 108: -1, 109: -1}
38 |          DEN=Elec_neg[sa] -Elec_neg[sb]
39 |          DEA=Elec_aff[sa] -Elec_aff[sb]
40 |          DIE=Ion_nrg[sa] - Ion_nrg[sb]
41 |          p=1
42 |          if self.deltaEN>0: p*=np.exp(-0.5*(DEN/self.deltaEN)**2)
43 |          if self.deltaEA>0: p*=np.exp(-0.5*(DEA/self.deltaEA)**2)
44 |          if self.deltaIE>0: p*=np.exp(-0.5*(DIE/self.deltaIE)**2)
45 |          return p
46 |    def __init__(self, rules={},deltaEN=1e100, deltaEA=1e100, deltaIE=1e100, mu=0):            
47 |       self.rules = rules.copy()
48 |       self.mu = mu
49 |       self.deltaEA= deltaEA
50 |       self.deltaIE= deltaIE
51 |       self.deltaEN= deltaEN            
52 | 
53 | def main(deltaEN,deltaEA,deltaIE,splist,alchemyrules):
54 |    if (alchemyrules=="none"):
55 |       alchem=alchemy_mendeleev(deltaEN=deltaEN,deltaIE=deltaIE,deltaEA=deltaEA,rules={})
56 |    else:
57 |        r=alchemyrules.replace('"', '').strip()
58 |        r=alchemyrules.replace("'", '').strip()
59 |        r=ast.literal_eval(r)
60 |        print >> sys.stderr, "Using Alchemy rules: ", r,"\n"
61 |        alchem=alchemy_mendelev(deltaEN=deltaEN,deltaIE=deltaIE,deltaEA=deltaEA,rules=r)
62 |    rule={}
63 |    for sa in splist:
64 |       for sb in splist:
65 |         if (sb >sa): 
66 |            rule[(sa,sb)]=alchem.getpair(sa,sb)
67 |    print rule
68 |    f="alchemy"
69 |    if deltaEN > 0: f+="_dEN"+str(deltaEN)
70 |    if deltaEA > 0: f+="_dEA"+str(deltaEA)
71 |    if deltaIE > 0: f+="_dIE"+str(deltaIE)
72 |    f+=".pickle"
73 |    file = open(f,"wb")
74 |    gc.disable()
75 |    pickle.dump(rule, file,protocol=pickle.HIGHEST_PROTOCOL) # HIGHEST_PROTOCOL is 2 in py 2.7
76 |    file.close()
77 |    gc.enable()
78 | if __name__ == '__main__':
79 |       parser = argparse.ArgumentParser(description="""Utility code to generate alchemy rules based on atomic properties.""")
80 | 
81 |       parser.add_argument("--deltaEN",type=float,default="-1", help="Delta value for electronegativity")
82 |       parser.add_argument("--deltaEA",type=float,default="-1", help="Delta value for electron affinity [eV]")
83 |       parser.add_argument("--deltaIE",type=float,default="-1", help="Delta value for ionization energy [eV]")            
84 |       parser.add_argument("--species", type=str, default="all", help="list of species (e.g. --species 1,6  for H and C )")
85 |       parser.add_argument("--rules", type=str, default="none", help='Dictionary-style rule specification in quote (e.g. --rules "{(6,7):1,(6,8):1}"')
86 |       args = parser.parse_args()
87 |       if args.species == "all":
88 |          splist = []
89 |          for i in range(1,109):
90 |             splist.append(i)
91 |       else:
92 |          splist = sorted(map(int,args.species.split(',')))
93 |       main(deltaEN=args.deltaEN,deltaIE=args.deltaIE,deltaEA=args.deltaEA,splist=splist,alchemyrules=args.rules)
94 | 


--------------------------------------------------------------------------------
/glosoap.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Just a minimal stub to compute SOAP vectors from a list of structures and print them out on a file.
  4 | """
  5 | 
  6 | import quippy
  7 | import sys, time, ast
  8 | from multiprocessing import Process, Value, Array
  9 | import argparse
 10 | from random import randint
 11 | from libmatch.environments import alchemy, environ
 12 | from libmatch.structures import structk, structure, structurelist
 13 | import os
 14 | import numpy as np
 15 | from copy import copy 
 16 | from time import ctime
 17 | from datetime import datetime
 18 | import gc
 19 | import cPickle as pickle
 20 | import code
 21 | 
 22 | # tries really hard to flush any buffer to disk!
 23 | def flush(stream):
 24 |     stream.flush()
 25 |     os.fsync(stream)
 26 | def atomicno_to_sym(atno):
 27 |   pdict={1: 'H', 2: 'He', 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', 19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn', 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr', 41: 'Nb', 42: 'Mo', 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn', 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', 59: 'Pr', 60: 'Nd', 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb', 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg', 81: 'Tl', 82: 'Pb', 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th', 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', 99: 'Es', 100: 'Fm', 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Ha', 106: 'Sg', 107: 'Ns', 108: 'Hs', 109: 'Mt', 110: 'Unn', 111: 'Unu'}   
 28 |   return pdict[atno]
 29 | 
 30 | 
 31 | def main(filename, nmax, lmax, coff, cotw, gs, centerweight, gspecies, redfiles, prefix=""):
 32 |     start_time = datetime.now()
 33 |     filename = filename[0]
 34 |     # sets a few defaults
 35 |     if prefix=="": prefix=filename
 36 |     if prefix.endswith('.xyz'): prefix=prefix[:-4]
 37 |     prefix=prefix+"-n"+str(nmax)+"-l"+str(lmax)+"-c"+str(coff)+"-g"+str(gs)
 38 |     print >> sys.stderr, "using output prefix =", prefix
 39 |     # Reads input file using quippy
 40 |     print >> sys.stderr, "Reading input file", filename
 41 |     (first,last)=(0,0); # reads the whole thing 
 42 |     if first==0: first=None; 
 43 |     if last==0: last=None
 44 |     al = quippy.AtomsList(filename, start=first, stop=last);
 45 |     print >> sys.stderr, len(al.n) , " Configurations Read"
 46 |     
 47 |     # determines "kit" (i.e. max n and kind of atoms present)
 48 |     spkit = {}
 49 |     for at in al:
 50 |         atspecies = {}
 51 |         for z in at.z:      
 52 |             if z in atspecies: atspecies[z]+=1
 53 |             else: atspecies[z] = 1
 54 |             
 55 |         for (z, nz) in atspecies.iteritems():
 56 |             if z in spkit:
 57 |                 if nz>spkit[z]: spkit[z] = nz
 58 |             else:
 59 |                 spkit[z] = nz
 60 |     
 61 |     # species string 
 62 |     if gspecies is None:
 63 |         zsp=spkit.keys()
 64 |         zsp.sort()
 65 |         print "No species provided"
 66 |     else: zsp=map(int,gspecies.split(",")) 
 67 | 
 68 |     # If dimensionality reduction is called, stores the variables
 69 |     cur = False
 70 |     if redfiles is not None:
 71 |         cur = True
 72 |         # Does not matter in which sequence the files are provided
 73 |         try:
 74 |             idxsel = np.loadtxt(redfiles[0], dtype=int)
 75 |             chol = np.loadtxt(redfiles[1])
 76 |         except ValueError:
 77 |             idxsel = np.loadtxt(redfiles[1], dtype=int)
 78 |             chol = np.loadtxt(redfiles[0])
 79 | 
 80 |         # Checks if all dimensions are the same
 81 |         assert idxsel.shape[0] == chol.shape[0] == chol.shape[1], "Wrong dimensions, cannot perform dimensionality reduction"
 82 |         
 83 |     lspecies = 'n_species='+str(len(zsp))+' species_Z={ '
 84 |     for z in zsp: lspecies = lspecies + str(z) + ' '
 85 |     lspecies = lspecies + '}'   
 86 |     print "Using the following species:", ",".join([str(z) for z in zsp])
 87 |     if not cur:
 88 |         fout=open(prefix+".soap","w")
 89 |     else:
 90 |         fout=open(prefix+"_red.soap","w") # To avoid giving the same name to the reduced soap vector
 91 |     counter = 0
 92 |     for at in al:
 93 |         fout.write("%d\n" % (len(at.z)))
 94 |         at.set_cutoff(coff);
 95 |         at.calc_connect();
 96 |         nel = np.bincount(at.z)
 97 |         
 98 |         # ok, it appears you *have* to do this atom by atom. let's do that, but then re-sort in the same way as in the input
 99 |         soaps = {}
100 |         sz = {}
101 |         for z in at.z: 
102 |             if z in sz: sz[z]+=1
103 |             else: sz[z]=1
104 |         for (z, nz) in sz.iteritems():
105 |             soapstr=("soap central_reference_all_species=F central_weight="+str(centerweight)+
106 |            "  covariance_sigma0=0.0 atom_sigma="+str(gs)+" cutoff="+str(coff)+" cutoff_transition_width="+str(cotw)+
107 |            " n_max="+str(nmax)+" l_max="+str(lmax)+' '+lspecies+' Z='+str(z))
108 |             desc = quippy.descriptors.Descriptor(soapstr )
109 |             soaps[z] = desc.calc(at)["descriptor"]
110 |         for z in at.z:
111 |             fout.write("%3s  " % (atomicno_to_sym(z)))
112 |             if cur:
113 |                 single_soap = soaps[z][len(soaps[z])-sz[z]]
114 |                 red_soap = np.dot(single_soap[idxsel], chol)
115 |                 np.savetxt(fout, [red_soap])
116 |             else: np.savetxt(fout, [ soaps[z][len(soaps[z])-sz[z]] ])
117 |             sz[z] -=1
118 |         counter +=1
119 |         sys.stderr.write("SOAP vectors calculated: %d / %d   \r" %(counter, len(al.n)) )
120 |     sys.stderr.write("\n")
121 |         
122 |     fout.close() 
123 | 
124 | if __name__ == '__main__':
125 |       parser = argparse.ArgumentParser(description="""Computes the similarity matrix between a set of atomic structures 
126 |                            based on SOAP descriptors and an optimal assignment of local environments.""")
127 |                            
128 |       parser.add_argument("filename", nargs=1, help="Name of the LibAtom formatted xyz input file")
129 |       parser.add_argument("-n", type=int, default='8', help="Number of radial functions for the descriptor")
130 |       parser.add_argument("-l", type=int, default='6', help="Maximum number of angular functions for the descriptor")
131 |       parser.add_argument("-c", type=float, default='5.0', help="Radial cutoff")
132 |       parser.add_argument("--cotw", type=float, default='0.5', help="Cutoff transition width")
133 |       parser.add_argument("-g", type=float, default='0.5', help="Atom Gaussian sigma")
134 |       parser.add_argument("-cw", type=float, default='1.0', help="Center atom weight")
135 |       parser.add_argument("-z", type=str, default=None, help="Comma separated atomic numbers of the species that must be considered")
136 |       parser.add_argument("--reduce", type=str, nargs = 2, default=None, metavar=('COLS', 'CHOL'), help="Reduce the output power spectrum using two provided files, one for the indices of the columns, the other containing the decomposed UR matrix")
137 |       parser.add_argument("--prefix", type=str, default='', help="Prefix for output files (defaults to input file name)")
138 |       
139 |            
140 |       args = parser.parse_args()
141 | 
142 |       main(args.filename, nmax=args.n, lmax=args.l, coff=args.c, cotw=args.cotw, gs=args.g, centerweight=args.cw, gspecies=args.z, redfiles=args.reduce, prefix=args.prefix)
143 | 


--------------------------------------------------------------------------------
/tools/env_clust.py:
--------------------------------------------------------------------------------
  1 | import env_corr
  2 | import cluster
  3 | import numpy as np 
  4 | import quippy
  5 | import sys
  6 | import argparse
  7 | from scipy.cluster import hierarchy as sc
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | def main(fnamexyz, cutoffdist, fnamesim, zenv, nenv, dcut, selectgroupxyz2print=[], dcutisgood=False, isploting=False ):
 11 | 
 12 | 	pyplot = isploting
 13 | 	plotdendo = True
 14 | 
 15 | 	# Generate the file name for the new dist matrices that are without dummy atoms
 16 | 	nsim = len(nenv)
 17 | 	fname = []
 18 | 	fnamecorr = []
 19 | 	fnamemathematica = []
 20 | 	cnt = 0
 21 | 	for fn in fnamesim:
 22 | 		target = '_'+str(nenv[cnt])
 23 | 		fname.append(fn[:fn.find(target)]+'.sim')
 24 | 		fnamecorr.append(fn[:fn.find(target)]+'.corrmt')
 25 | 		fnamemathematica.append(fn[:fn.find(target)]+'.mathematica-cluster.dat')
 26 | 		cnt+=1
 27 | 		print (fn[:fn.find(target)])
 28 | 	
 29 | 	print >> sys.stderr, "Reading input file", fnamexyz
 30 | 	mols = quippy.AtomsList(fnamexyz);
 31 | 	print >> sys.stderr, len(mols.n) , " Configurations Read"
 32 | 	
 33 | 	distmatrix = []
 34 | 	clist = []
 35 | 	nbclst = []
 36 | 	clusterlist = []
 37 | 	nspiecies = []
 38 | 	Z = []
 39 | 	dendo = []
 40 | 
 41 | 	for it in range(nsim):
 42 | 		print '############'
 43 | 		print "Read distance matrix "+ fnamesim[it] + " with dummy atoms"
 44 | 		olddistmatrix = np.loadtxt(fnamesim[it])
 45 | 		
 46 | 		fsim = open(fnamesim[it],'r')
 47 | 		head = fsim.readline()
 48 | 		head = head.strip('#');head = head.strip('\n')
 49 | 		# print '##########' , head
 50 | 		fsim.close()
 51 | 
 52 | 		# Removes dummy atoms rows/columns from distance matrix
 53 | 		newdistmatrix1, dummylist = env_corr.rmdummyfromsim(fnamexyz,olddistmatrix,zenv[it],nenv[it])
 54 | 		print len(newdistmatrix1),' Real ',zenv[it],' atom '
 55 | 		print 'Removes : ', len(dummylist),' rows/colomns from distmatrix'
 56 | 		print newdistmatrix1.shape
 57 | 		
 58 | 		# Write the distmatrix without dummy atoms
 59 | 		np.savetxt(fname[it],newdistmatrix1, header=head )
 60 | 
 61 | 		# get list of the cluster groups idx (same order as in dist mat) 
 62 | 		clist1,Z1 = env_corr.clusterdistmat(fname[it],newdistmatrix1,0.,mode='average',plot=plotdendo)
 63 | 		dendo1 = sc.dendrogram(Z1)
 64 | 		# Write mathematica cluster structure
 65 | 		cluster.mathematica_cluster(Z1, newdistmatrix1,fnamemathematica[it])
 66 | 
 67 | 		# get nb of cluster groups
 68 | 		nbclst1 = len(np.unique(clist1))
 69 | 		# print a==nbclst1
 70 | 		distmatrix.append(newdistmatrix1)
 71 | 		clist.append(clist1)
 72 | 		nbclst.append(nbclst1)
 73 | 		Z.append(Z1)
 74 | 		dendo.append(dendo1)
 75 | 
 76 | 		# Link the cluster groups with atom's frame and respective position in it
 77 | 		if dcutisgood:
 78 | 			clusterlist1, nspiecies1 = env_corr.linkgroup2atmidx(mols,clist1,zenv[it],nenv[it])
 79 | 			clusterlist.append(clusterlist1)
 80 | 			# count nb of species atom in each frame
 81 | 			nspiecies.append(nspiecies1)
 82 | 	
 83 | 	# Compute the correlation between the species of 2 cluster 
 84 | 	# look for the groups of the atoms (from cluster2) in the environment of an
 85 | 	# atom from a group from cluster1
 86 | 	if dcutisgood:
 87 | 		corrmtx = []
 88 | 		for it in range(1,nsim):	
 89 | 			if len(selectgroupxyz2print)==0:
 90 | 				selectgroupxyz2print = []
 91 | 				for i in range(1,nsim+1):
 92 | 					selectgroupxyz2print.append([]) 
 93 | 			
 94 | 			corrmtx1, corr1,idx1, idx2  = env_corr.getcorrofcluster(mols,cutoffdist,fnamexyz,clusterlist[0],clusterlist[it],nbclst[0],nbclst[it],zenv[0],zenv[it],nspiecies[0],nspiecies[it],selectgroupxyz2print[it-1])
 95 | 			
 96 | 			# idx1 = dendo[0]['leaves']
 97 | 			# idx2 = dendo[it]['leaves']
 98 | 			print len(idx1),len(idx2)
 99 | 			print corrmtx1.shape
100 | 			# corrmtx1 = corrmtx1[idx1,:]
101 | 			# corrmtx1 = corrmtx1[:,idx2]
102 | 
103 | 			corrmtx.append(corrmtx1)
104 | 			head1 = 'From '+fnamexyz+'  Correlation matrix between environment of atom '+str(zenv[0])+' and '+str(zenv[it])
105 | 			print '############'
106 | 			print head1
107 | 			head2 = 'From '+fnamexyz+'  Dendogram ordering of atom '+str(zenv[0])
108 | 			head3 = 'From '+fnamexyz+'  Dendogram ordering of atom '+str(zenv[it])
109 | 
110 | 			np.savetxt(fnamecorr[it], corrmtx1, header=head1 )
111 | 			np.savetxt(fnamecorr[it]+str(zenv[0]), idx1, header=head2 )
112 | 			np.savetxt(fnamecorr[it]+str(zenv[it]), idx2, header=head3 )
113 | 
114 | 			# print corrmtx1
115 | 
116 | 	if pyplot:
117 | 		plt.show()
118 | 
119 | # take input argument like aa,bb,aa and output a list
120 | # separator must be ','
121 | def unpackstrlistinput(arg):
122 | 	tmpsim = ''
123 | 	arglist = []
124 | 	for char in arg:
125 | 		if char == ',':
126 | 			arglist.append(tmpsim)
127 | 			tmpsim = ''
128 | 			continue
129 | 		tmpsim += char
130 | 	arglist.append(tmpsim)
131 | 	return arglist
132 | 
133 | def unpackfloatlistinput(arg):
134 | 	tmpsim = ''
135 | 	arglist = []
136 | 	for char in arg:
137 | 		if char == ',':
138 | 			arglist.append(float(tmpsim))
139 | 			tmpsim = ''
140 | 			continue
141 | 		tmpsim += char
142 | 	arglist.append(float(tmpsim))
143 | 	return arglist
144 | 
145 | def unpackintlistinput(arg):
146 | 	tmpsim = ''
147 | 	arglist = []
148 | 	for char in arg:
149 | 		if char == ',':
150 | 			arglist.append(int(tmpsim))
151 | 			tmpsim = ''
152 | 			continue
153 | 		tmpsim += char
154 | 	arglist.append(int(tmpsim))
155 | 	return arglist
156 | 
157 | 
158 | if __name__ == '__main__':
159 | 	parser = argparse.ArgumentParser(description="""Computes the correlation matrix from the hierarchical clustering between environments given as environmental similarity/distance matrices  produced by glosim from option --envsim.""")
160 | 
161 | 	parser.add_argument("xyz", nargs=1, help="xyz data file")
162 | 	parser.add_argument("sim",type=list,  help="Distance matrices of given species environement. The species of the first file is taken as reference. Must be a list of simfilename with coma separator, i.e. sim = a.sim,b.sim")
163 |     # parser.add_argument("--mode", type=str, default="average", help="Linkage mode (e.g. --mode average/single/complete/median/centroid")
164 | 	parser.add_argument("--cutoffdist", type=float, default=2., help="Cutoff distance used in glosim (in Angstrom)")
165 | 	parser.add_argument("--dcut", type=list, default='', help="List of distance cutoff to cut the dendrogram. Must be listed in same order as sim")
166 | 	parser.add_argument("--zenv", type=list, default='', help="List of the atomic number of the atoms of the similarity matrices listed. Must be listed in same order as sim")
167 | 	parser.add_argument("--nenv", type=list, default='', help="List of the number of dummy atoms of the similarity matrices listed. Must be listed in same order as sim")
168 | 	parser.add_argument("--select", type=list, default='', help="List of the group idx to be printed to an xyz file. ex: with 3 distmatrix, input : 1,2,4,6 ;  which will select group 1 from 1st species and group 2 of 2nd species and  group 4 from 1st species and group 6 of 3rd species")
169 | 	parser.add_argument("--plot",  action="store_true", help="Plot the dendrograms")
170 | 	parser.add_argument("--dcutok",  action="store_true", help="If the dcut are fine then do all the analisis")
171 |     
172 | 	args = parser.parse_args()
173 | 
174 | 	# Convert list of char into proper list using ',' as the delemiter
175 | 	fnamesim = unpackstrlistinput(args.sim)
176 | 	zenv = unpackintlistinput(args.zenv)
177 | 	nenv = unpackintlistinput(args.nenv)
178 | 	dcut = unpackfloatlistinput(args.dcut)
179 | 	
180 | 	# Convert list of char into proper list using ',' as the delemiter and set default value
181 | 	if len(args.select) > 0:
182 | 		simlist = unpackintlistinput(args.select)
183 | 		selectgroupxyz2print = zip(*[iter(simlist)]*2)
184 | 	else:
185 | 		selectgroupxyz2print = []
186 | 
187 | 	main(args.xyz[0], args.cutoffdist, fnamesim, zenv, nenv, dcut, selectgroupxyz2print, dcutisgood=args.dcutok, isploting =args.plot)
188 | 


--------------------------------------------------------------------------------
/libmatch/environments.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Computes the matrix of similarities between structures in a xyz file
  3 | # by first getting SOAP descriptors for all environments, finding the best
  4 | # match between environments using the Hungarian algorithm, and finally
  5 | # summing up the environment distances.
  6 | # Supports periodic systems, matching between structures with different
  7 | # atom number and kinds, and sports the infrastructure for introducing an
  8 | # alchemical similarity kernel to match different atomic species
  9 | 
 10 | import sys, time
 11 | from copy import copy, deepcopy
 12 | import numpy as np
 13 | 
 14 | __all__ = [ "environ", "alchemy", "envk" ]
 15 | 
 16 | class alchemy:
 17 |    def getpair(self, sa, sb):
 18 |       if len(self.rules)==0: # special case when the alchemical matrix is default
 19 |          if sa==sb: return 1
 20 |          else: return 0  
 21 |       else:
 22 |           if sa<=sb and (sa,sb) in self.rules:            
 23 |              return self.rules[(sa,sb)]
 24 |           elif sa>sb and (sb,sa) in self.rules:
 25 |              return self.rules[(sb,sa)] 
 26 |           else: 
 27 |              if sa==sb: return 1
 28 |              else: return 0  
 29 |    
 30 |    def __init__(self, rules={}, mu=0):            
 31 |       self.rules = rules.copy()
 32 |       self.mu = mu
 33 |       
 34 | 
 35 | class environ:   
 36 |    def getpair(self, sa, sb):
 37 |       siab = (sa,sb) # the power spectra are not fully symmetric with respect to exchange of species index, unless one also exchanges n1 and n2, which is a mess.
 38 |       if siab in self.soaps:               
 39 |          return self.soaps[siab]
 40 |       else: 
 41 |          if len(self.soaps)==0: # dummy atoms environments just returned as isolated species!            
 42 |             if sa==sb and sa==self.z: 
 43 |               return self.dummy1
 44 |          return self.dummy0  
 45 |          
 46 |    def __init__(self, nmax, lmax, salchem=None, specie=0):            
 47 |       self.alchem = salchem
 48 |       if self.alchem is None: self.alchem=alchemy()
 49 |       
 50 |       self.z = specie
 51 |       
 52 |       self.nmax = nmax
 53 |       self.lmax = lmax      
 54 |       self.dummy0 = np.zeros((self.nmax*self.nmax)*(self.lmax+1), float); 
 55 |       self.dummy1 = self.dummy0.copy(); self.dummy1[0]=1.0;
 56 |       
 57 |       self.soaps = {}        
 58 |       if self.z == 0:
 59 |         self.nspecies = 0
 60 |         self.zspecies = []
 61 |         self.natoms = 0
 62 |       else:
 63 |         self.nspecies = 1
 64 |         self.zspecies = [self.z]
 65 |         self.natoms = 1
 66 |       
 67 |    
 68 |    def add(self, nenv):
 69 |       # combine the SOAPS in the nenv environment to this one
 70 |       self.zspecies = sorted(list(set(self.zspecies+nenv.zspecies)))  # merges the list of environment species
 71 |       self.nspecies = len(self.zspecies)
 72 |       if self.z>0 and self.z!= nenv.z: self.z=-1  # self species is not defined for a sum of different centers of environments
 73 |       if self.nmax != nenv.nmax or self.lmax != nenv.lmax: raise ValueError("Cannot combine environments with different expansion channels")
 74 |       if len(self.soaps)==0 and self.z>0:  # we need an explicit description 
 75 |         self.soaps[(self.z,self.z)] = self.dummy1
 76 |       if len(nenv.soaps)==0 and nenv.z>0:
 77 |         if (nenv.z,nenv.z) in self.soaps:
 78 |             self.soaps[(nenv.z,nenv.z)] += self.dummy1
 79 |         else:
 80 |             self.soaps[(nenv.z,nenv.z)] = self.dummy1.copy()
 81 |       else:
 82 |         for k,w in nenv.soaps.items():     
 83 |             if k in self.soaps:
 84 |                 self.soaps[k] = self.soaps[k] + nenv.soaps[k] 
 85 |             else:
 86 |                 self.soaps[k] = w.copy()
 87 |         
 88 |       self.natoms += nenv.natoms            
 89 | 
 90 |    def normalize(self):
 91 |       nrm = np.sqrt( envk(self, self, self.alchem) )
 92 | 
 93 |       for sij in self.soaps:  self.soaps[sij]*=1.0/nrm
 94 |       
 95 |    # @profile
 96 |    def convert(self, specie, species, rawsoap, unsoap=False):      
 97 |       self.z = specie
 98 |       self.zspecies = sorted(species)
 99 |       self.nspecies = len(species)
100 |       self.natoms = 1
101 |       
102 |       self.soaps = {}
103 |       ipair = {}
104 |       for s1 in range(self.nspecies):
105 |          for s2 in range(self.nspecies): #  range(s1+1): we actually need to store also the reverse pairs if we want to go alchemical
106 |             self.soaps[(self.zspecies[s2],self.zspecies[s1])] = np.zeros((self.nmax*self.nmax)*(self.lmax+1), float) 
107 |             ipair[(self.zspecies[s2],self.zspecies[s1])] = 0
108 |       
109 |       isoap = 0
110 |       isqrttwo = 1.0/np.sqrt(2.0)
111 |       for s1 in xrange(self.nspecies):
112 |          for n1 in xrange(self.nmax):         
113 |             for s2 in xrange(s1+1):
114 |                selpair = self.soaps[(self.zspecies[s2],self.zspecies[s1])]
115 |                # we need to reconstruct the spectrum for the inverse species order, that also swaps n1 and n2. 
116 |                # This is again only needed to enable alchemical combination of e.g. alpha-beta and beta-alpha. Shit happens
117 |                revpair = self.soaps[(self.zspecies[s1],self.zspecies[s2])]                  
118 |                isel = ipair[(self.zspecies[s2],self.zspecies[s1])]
119 |                for n2 in xrange(self.nmax if s2<s1 else n1+1): 
120 |                   for l in xrange(self.lmax+1):
121 |                      #print s1, s2, n1, n2, isel, l+(self.lmax+1)*(n2+self.nmax*n1), l+(self.lmax+1)*(n1+self.nmax*n2)            
122 |                      #selpair[isel] = rawsoap[isoap] 
123 |                      if (s1 != s2):
124 |                         selpair[isel] = rawsoap[isoap] * isqrttwo  # undo the normalization since we will actually sum over all pairs in all directions!                        
125 |                         revpair[l+(self.lmax+1)*(n1+self.nmax*n2)] = selpair[isel]
126 |                      else: 
127 |                         # diagonal species (s1=s2) have only half of the elements.          
128 |                         # this is tricky. we need to duplicate diagonal blocks "repairing" these to be full.
129 |                         # this is necessary to enable alchemical similarity matching, where we need to combine 
130 |                         # alpha-alpha and alpha-beta environment fingerprints
131 |                         selpair[l+(self.lmax+1)*(n2+self.nmax*n1)] = selpair[l+(self.lmax+1)*(n1+self.nmax*n2)] = rawsoap[isoap] * (1 if n1==n2 else isqrttwo)                                             
132 |                      isoap+=1
133 |                      isel+=1
134 |                ipair[(self.zspecies[s2],self.zspecies[s1])] = isel
135 |       
136 |       # alchemy-aware normalization
137 |       if not unsoap: self.normalize()
138 |       
139 | # SOAP kernel between environments (with possibly alchemical similarity matrix)
140 | # @profile
141 | def envk(envA, envB, alchem=alchemy()):
142 |    dotp = 0.0
143 |    
144 |    #union of atom kinds present in the two environments
145 |    #zspecies = sorted(list(set(envA.zspecies+envB.zspecies)))
146 |    #zspecies = envA.zspecies
147 |    #zspecies = envA.zspecies
148 |    #print "ENV CHECK A", envA.zspecies
149 |    #print "ENV CHECK B", sorted(list(set(envA.zspecies+envB.zspecies)))
150 |          
151 |    
152 |    if len(alchem.rules) == 0 : # special case, only sum over diagonal bits
153 |        # only species that are in both environments will give nonzero contributions
154 |        zspecies = sorted(list(set(envA.zspecies).intersection(envB.zspecies))) 
155 |        for s1 in zspecies:
156 |            for s2 in zspecies:
157 |                ndot = np.dot(envA.getpair(s1,s2), envB.getpair(s1,s2))
158 |                dotp+=ndot
159 |    else:
160 |        #union of atom kinds present in the two environments   
161 |        zspecies = sorted(list(set(envA.zspecies+envB.zspecies)))
162 |        nsp = len(zspecies)
163 |     
164 |        # alchemical matrix for species
165 |        alchemAB = np.zeros((nsp,nsp), float)
166 |        for sA in xrange(nsp):
167 |            for sB in xrange(sA+1):
168 |                alchemAB[sA,sB] = alchem.getpair(zspecies[sB],zspecies[sA])
169 |                alchemAB[sB,sA] = alchemAB[sA,sB]
170 |                
171 |        # prepares the lists of pairs to avoid calling many times getpair further down the line
172 |        eB = []
173 |        for iB1 in xrange(nsp):
174 |            sB1 = zspecies[iB1]
175 |            eB.append([])
176 |            for iB2 in xrange(nsp):
177 |                sB2 = zspecies[iB2] 
178 |                eB[iB1].append(envB.getpair(sB1,sB2))
179 |         
180 |        for iA1 in xrange(nsp):
181 |           sA1 = zspecies[iA1]
182 |           for iA2 in xrange(nsp):
183 |              sA2 = zspecies[iA2] 
184 |              eA = envA.getpair(sA1, sA2)                
185 |              for iB1 in xrange(nsp):          
186 |                 if alchemAB[iA1,iB1] == 0.0: continue
187 |                 sB1 = zspecies[iB1]
188 |                 for iB2 in xrange(nsp):                              
189 |                     if alchemAB[iA2,iB2] == 0.0: continue
190 |                     sB2 = zspecies[iB2]
191 |                     dotp += np.dot(eA, eB[iB1][iB2]) * alchemAB[iA1,iB1] * alchemAB[iA2,iB2]
192 |    
193 |    return dotp
194 | 


--------------------------------------------------------------------------------
/libmatch/lap/permanent-0.0.1/src/permanent.cpp:
--------------------------------------------------------------------------------
  1 | // (c) Sandip De 
  2 | // 16th Oct 2015
  3 | // Lausanne 
  4 | // C++ implementation of python module to compute permanent of a matrix by random montecarlo 
  5 | /* Functions to compute the permanent, given a numpy array */
  6 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
  7 | #include <Python.h>
  8 | #include <numpy/arrayobject.h>
  9 | #include <vector>
 10 | #include <stdlib.h> 
 11 | #include <algorithm>
 12 | #include <random>
 13 | #include <cmath>
 14 | #include <iostream>
 15 | // Array access macros.
 16 | 
 17 | #define SM(x0, x1) (*(npy_double*) (( (char*) PyArray_DATA(matrix) + \
 18 |                     (x0) * PyArray_STRIDES(matrix)[0] +  \
 19 |                     (x1) * PyArray_STRIDES(matrix)[1])))
 20 | #define SM_shape(x0) (int) PyArray_DIM(matrix, x0)
 21 | 
 22 | template <typename T>
 23 | class Matrix
 24 | {
 25 |         std::vector<T> inner_;
 26 |         unsigned int dimx_, dimy_;
 27 | 
 28 | public:
 29 |         unsigned int size() const { return dimx_; } 
 30 | 
 31 |         Matrix (unsigned int dimx, unsigned int dimy)
 32 |                 : dimx_ (dimx), dimy_ (dimy)
 33 |         {
 34 |                 inner_.resize (dimx_*dimy_);
 35 |         }
 36 | 
 37 |         inline T operator()(unsigned int x, unsigned int y) const
 38 |         {
 39 |                 if (x >= dimx_ || y>= dimy_)
 40 |                         throw 0; // ouch
 41 |                 return inner_[dimx_*y + x];
 42 |         }
 43 |         
 44 |         inline T& operator()(unsigned int x, unsigned int y)
 45 |         {
 46 |                 if (x >= dimx_ || y>= dimy_)
 47 |                         throw 0; // ouch
 48 |                 return inner_[dimx_*y + x];
 49 |         }
 50 |         
 51 | };
 52 | 
 53 | // Forward function declaration 
 54 | static PyObject *permanent_mc(PyObject *self, PyObject *args);
 55 | // Forward function declaration 
 56 | static PyObject *permanent_ryser(PyObject *self, PyObject *args);
 57 | // Forward function declaration 
 58 | static PyObject *rematch(PyObject *self, PyObject *args);
 59 | 
 60 | 
 61 | 
 62 | // Method list
 63 | static PyMethodDef methods[] = {
 64 |   { "permanent_mc", permanent_mc, METH_VARARGS, "Computes the permanent of a numpy matrix by random montecarlo method upto given accuracy"},
 65 |   { "permanent_ryser", permanent_ryser, METH_VARARGS, "Computes the permanent of a numpy matrix by Ryser algorithm"},
 66 |   { "rematch", rematch, METH_VARARGS, "Computes the permanent of a numpy matrix by Ryser algorithm"},
 67 |   { NULL, NULL, 0, NULL } // Sentinel
 68 | };
 69 | 
 70 | // Module initialization
 71 | PyMODINIT_FUNC initpermanent(void) {
 72 |   (void) Py_InitModule("permanent", methods);
 73 |   import_array();
 74 | }
 75 | 
 76 | double fact(int n)
 77 | {
 78 |    double fn=1.0;  for (int i=2; i<=n; ++i) fn*=double(i);
 79 |    return fn;
 80 | }
 81 | 
 82 | static npy_double _mcperm(PyArrayObject *matrix, PyFloatObject *eps, PyIntObject *ntry, PyIntObject *seed)
 83 | {
 84 |  //   int n=mtx.size();
 85 |     int n = (int) PyArray_DIM(matrix, 0);
 86 |     std::vector<int> idx(n);
 87 | //    double eps=1e-3;
 88 |     double eps1=PyFloat_AS_DOUBLE(eps);
 89 |     int ntry1=PyInt_AS_LONG(ntry);
 90 |     int seed1=PyInt_AS_LONG(seed);
 91 |     for (int i=0; i<n; ++i) idx[i]=i;
 92 |     double pi, prm=0, prm2=0, fn=fact(n), ti=0;
 93 |     int i=0, istride=0, pstride=n*100; 
 94 |     if (seed1>0) std::srand(seed1);
 95 |     //std::cerr<<eps<<" "<<ntry1<<"  "<<seed1<<"\n";
 96 |     
 97 |     while (true)
 98 |     {
 99 |         // combines shuffles and cyclic permutations (which are way cheaper!)
100 |         if (i%n==0) std::random_shuffle(idx.begin(),idx.end());
101 |         else { for (int i=0; i<n; ++i) idx[i]=(idx[i]+1)%n; }
102 |         
103 |         //if (i%10000==0) { for (int j=0; j<n; ++j) std::cerr<<idx[j]<<" ";        std::cerr<<"\n"; }
104 |         
105 |         // computes the product of elements for the selected permutation
106 |         pi = SM(0, idx[0]);;
107 |         for (int j=1; j<n; ++j)
108 |             pi *= SM(j, idx[j]);
109 |         
110 |         // accumulates mean and mean square
111 |         prm += pi;
112 |         prm2 += pi*pi;
113 |         ++i;
114 |         if (ntry1>0 && i >=ntry1) { ti=i; break; }
115 |         if (ntry1==0 && i==pstride)  // check if we are converged
116 |         {
117 |             ++istride; i=0; ti=double(istride)*double(pstride);
118 |             double err=sqrt((prm2-prm*prm/ti)/ti/(ti-1) ) / (prm/ti);
119 |             //std::cerr <<istride<< " "<<fn*prm/ti<< " "<<err<< "\n";
120 |             if (err< eps1) break;
121 |         }
122 |     }
123 |     //std::cerr <<i<< " "<<fn*prm/ti<< "\n";
124 |     return prm/ti*fn;
125 | }
126 | 
127 | // sinkhorn regularized best match 
128 | // NB this assumes that the input matrix is a kernel matrix with entries \in [0,1], 
129 | // NB this also works on rectangular matrices
130 | static npy_double _shmatch(PyArrayObject* matrix, PyFloatObject *gamma, PyFloatObject *eps)
131 | {
132 |     int nx = (int) PyArray_DIM(matrix, 0);
133 |     int ny = (int) PyArray_DIM(matrix, 1);
134 |     std::vector<double> u(nx), ou(nx), v(ny);
135 |     double ax = 1.0/nx, ay=1.0/ny;
136 |     Matrix<double> Kg(nx,ny);
137 |     for (int i=0; i<nx; ++i) u[i]=1.0;
138 |     for (int i=0; i<ny; ++i) v[i]=1.0;
139 |     double lambda=1.0/PyFloat_AS_DOUBLE(gamma), terr=PyFloat_AS_DOUBLE(eps)*PyFloat_AS_DOUBLE(eps), derr;
140 |     
141 |     for (int i=0; i<nx; ++i) for (int j=0; j<ny; ++j) Kg(i,j)=std::exp(-(1-SM(i,j))*lambda);
142 |     
143 |     do 
144 |     {
145 |         // u<-1.0/Kg.v
146 |         for (int i=0; i<nx; ++i) { ou[i]=u[i]; u[i]=0.0; }            
147 |         for (int i=0; i<nx; ++i) for (int j=0; j<ny; ++j) u[i]+=Kg(i,j)*v[j];
148 |         // at this point we can compute how far off unity we are
149 |         derr = 0.0;
150 |         for (int i=0; i<nx; ++i) derr+=(ax-ou[i]*u[i])*(ax-ou[i]*u[i]);        
151 |         for (int i=0; i<nx; ++i) u[i]=ax/u[i];
152 |         
153 |         // v<-1.0/Kg.u
154 |         for (int i=0; i<ny; ++i) v[i]=0.0; 
155 |         for (int i=0; i<ny; ++i) for (int j=0; j<nx; ++j) v[i]+=Kg(j,i)*u[j];
156 |         for (int i=0; i<ny; ++i) v[i]=ay/v[i];
157 |         //std::cerr<<derr<<"\n";
158 |                 
159 |     } while (derr>terr);
160 |     
161 |     double rval=0, rrow; 
162 |     for (int i=0; i<nx; ++i) 
163 |     {
164 |        rrow=0;
165 |        for (int j=0; j<ny; ++j) rrow+=Kg(i,j)*SM(i,j)*v[j];
166 |        rval+=u[i]*rrow;
167 |     }   
168 |     //std::cerr<<"regmatch "<< rval/n <<"\n";
169 |     return rval;
170 | }
171 | 
172 | // Count the number of set bits in a binary string
173 | inline int countbits(unsigned int n) 
174 | {
175 |     int q=n;
176 |     q = (q & 0x5555555555555555) + ((q & 0xAAAAAAAAAAAAAAAA) >> 1);
177 |     q = (q & 0x3333333333333333) + ((q & 0xCCCCCCCCCCCCCCCC) >> 2);
178 |     q = (q & 0x0F0F0F0F0F0F0F0F) + ((q & 0xF0F0F0F0F0F0F0F0) >> 4);
179 |     q = (q & 0x00FF00FF00FF00FF) + ((q & 0xFF00FF00FF00FF00) >> 8);
180 |     q = (q & 0x0000FFFF0000FFFF) + ((q & 0xFFFF0000FFFF0000) >> 16);
181 |     q = (q & 0x00000000FFFFFFFF) + ((q & 0xFFFFFFFF00000000) >> 32); // This last & isn't strictly qecessary.
182 |     return q;
183 | }
184 | 
185 | inline int bitparity (unsigned int n) { return 1 - (countbits(n) & 1)*2; }
186 | 
187 | // Ryser's algorithm 
188 | // Adapted from a complex-argument version by Pete Shadbolt
189 | static npy_double _ryperm(PyArrayObject *matrix) {
190 |     unsigned int n = (unsigned int) PyArray_DIM(matrix, 0);    
191 |     npy_double sumz, prod;
192 |     npy_double perm = 0;
193 |     unsigned long two2n = 1 << n; 
194 |     unsigned long i, y, z;
195 |     for (i=0; i<two2n; ++i) {
196 |         prod = 1.0;
197 |         for (y=0; y<n; ++y) {               
198 |             sumz = 0;
199 |             for (z=0; z<n; ++z) { 
200 |                 if ((i & (1 << z)) != 0) { sumz += SM(z, y); }
201 |             }
202 |             prod*=sumz;
203 |         }
204 |         perm += prod * bitparity(i);
205 |     }
206 |     if (n%2 == 1) { perm*=-1; }
207 |     return perm;
208 | }
209 | 
210 | // Computes the permanent using a monte carlo scheme
211 | static PyObject *permanent_mc(PyObject *self, PyObject *args) {
212 |   // Parse the input 
213 |   PyArrayObject *matrix;
214 |   PyFloatObject *eps;
215 |   PyIntObject *ntry;
216 |   PyIntObject *seed;
217 |   if (!PyArg_ParseTuple(args, "O!O!O!O!", &PyArray_Type, &matrix, &PyFloat_Type, &eps, &PyInt_Type, &ntry, &PyInt_Type, &seed)) {return NULL;}
218 | 
219 |   // Compute the permanent
220 |   npy_double p = _mcperm(matrix,eps,ntry,seed);
221 |   return PyFloat_FromDouble(p);
222 | }
223 | 
224 | // Exact permanent based on Ryser algorithm
225 | static PyObject *permanent_ryser(PyObject *self, PyObject *args) {
226 |   // Parse the input 
227 |   PyArrayObject *matrix;  
228 |   if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &matrix)) {return NULL;}
229 | 
230 |   // Compute the permanent
231 |   npy_double p = _ryperm(matrix);
232 |   return PyFloat_FromDouble(p);
233 | }
234 | 
235 | // Computes regularised best-match usin Sinkhorn algorithm 
236 | static PyObject *rematch(PyObject *self, PyObject *args) {
237 |   // Parse the input 
238 |   PyArrayObject *matrix;
239 |   PyFloatObject *eps;
240 |   PyFloatObject *gamma;
241 |   if (!PyArg_ParseTuple(args, "O!O!O!", &PyArray_Type, &matrix, &PyFloat_Type, &gamma, &PyFloat_Type, &eps)) {return NULL;}
242 | 
243 |   // Compute the permanent
244 |   npy_double p = _shmatch(matrix,gamma,eps);
245 |   return PyFloat_FromDouble(p);
246 | }
247 | 


--------------------------------------------------------------------------------
/libmatch/lap/munkres.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["linear_assignment"]
  2 | 
  3 | import numpy as np
  4 | 
  5 | """
  6 | Solve the unique lowest-cost assignment problem using the
  7 | Hungarian algorithm (also known as Munkres algorithm).
  8 | 
  9 | """
 10 | # Based on original code by Brain Clapper, adapted to NumPy by Gael Varoquaux.
 11 | # Heavily refactored by Lars Buitinck.
 12 | 
 13 | # Copyright (c) 2008 Brian M. Clapper <bmc@clapper.org>, Gael Varoquaux
 14 | # Author: Brian M. Clapper, Gael Varoquaux
 15 | # LICENSE: BSD
 16 | 
 17 | 
 18 | try:
 19 |   np.array(5).astype(float, copy=False)
 20 | except TypeError:
 21 |   # Compat where astype accepted no copy argument
 22 |   def astype(array, dtype, copy=True):
 23 |     if not copy and array.dtype == dtype:
 24 |       return array
 25 |     return array.astype(dtype)
 26 | else:
 27 |   astype = np.ndarray.astype
 28 | 
 29 | 
 30 | def linear_assignment(X):
 31 |     """Solve the linear assignment problem using the Hungarian algorithm.
 32 | 
 33 |     The problem is also known as maximum weight matching in bipartite graphs.
 34 |     The method is also known as the Munkres or Kuhn-Munkres algorithm.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     X : array
 39 |         The cost matrix of the bipartite graph
 40 | 
 41 |     Returns
 42 |     -------
 43 |     indices : array,
 44 |         The pairs of (row, col) indices in the original array giving
 45 |         the original ordering.
 46 | 
 47 |     References
 48 |     ----------
 49 | 
 50 |     1. http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html
 51 | 
 52 |     2. Harold W. Kuhn. The Hungarian Method for the assignment problem.
 53 |        *Naval Research Logistics Quarterly*, 2:83-97, 1955.
 54 | 
 55 |     3. Harold W. Kuhn. Variants of the Hungarian method for assignment
 56 |        problems. *Naval Research Logistics Quarterly*, 3: 253-258, 1956.
 57 | 
 58 |     4. Munkres, J. Algorithms for the Assignment and Transportation Problems.
 59 |        *Journal of the Society of Industrial and Applied Mathematics*,
 60 |        5(1):32-38, March, 1957.
 61 | 
 62 |     5. http://en.wikipedia.org/wiki/Hungarian_algorithm
 63 |     """
 64 |     indices = _hungarian(X).tolist()
 65 |     indices.sort()
 66 |     # Re-force dtype to ints in case of empty list
 67 |     indices = np.array(indices, dtype=int)
 68 |     # Make sure the array is 2D with 2 columns.
 69 |     # This is needed when dealing with an empty list
 70 |     indices.shape = (-1, 2)
 71 |     return indices
 72 | 
 73 | 
 74 | class _HungarianState(object):
 75 |     """State of one execution of the Hungarian algorithm.
 76 | 
 77 |     Parameters
 78 |     ----------
 79 |     cost_matrix : 2D matrix
 80 |         The cost matrix. Does not have to be square.
 81 |     """
 82 | 
 83 |     def __init__(self, cost_matrix):
 84 |         cost_matrix = np.atleast_2d(cost_matrix)
 85 | 
 86 |         # If there are more rows (n) than columns (m), then the algorithm
 87 |         # will not be able to work correctly. Therefore, we
 88 |         # transpose the cost function when needed. Just have to
 89 |         # remember to swap the result columns back later.
 90 |         transposed = (cost_matrix.shape[1] < cost_matrix.shape[0])
 91 |         if transposed:
 92 |             self.C = (cost_matrix.T).copy()
 93 |         else:
 94 |             self.C = cost_matrix.copy()
 95 |         self.transposed = transposed
 96 | 
 97 |         # At this point, m >= n.
 98 |         n, m = self.C.shape
 99 |         self.row_uncovered = np.ones(n, dtype=np.bool)
100 |         self.col_uncovered = np.ones(m, dtype=np.bool)
101 |         self.Z0_r = 0
102 |         self.Z0_c = 0
103 |         self.path = np.zeros((n + m, 2), dtype=int)
104 |         self.marked = np.zeros((n, m), dtype=int)
105 | 
106 |     def _find_prime_in_row(self, row):
107 |         """
108 |         Find the first prime element in the specified row. Returns
109 |         the column index, or -1 if no starred element was found.
110 |         """
111 |         col = np.argmax(self.marked[row] == 2)
112 |         if self.marked[row, col] != 2:
113 |             col = -1
114 |         return col
115 | 
116 |     def _clear_covers(self):
117 |         """Clear all covered matrix cells"""
118 |         self.row_uncovered[:] = True
119 |         self.col_uncovered[:] = True
120 | 
121 | 
122 | def _hungarian(cost_matrix):
123 |     """The Hungarian algorithm.
124 | 
125 |     Calculate the Munkres solution to the classical assignment problem and
126 |     return the indices for the lowest-cost pairings.
127 | 
128 |     Parameters
129 |     ----------
130 |     cost_matrix : 2D matrix
131 |         The cost matrix. Does not have to be square.
132 | 
133 |     Returns
134 |     -------
135 |     indices : 2D array of indices
136 |         The pairs of (row, col) indices in the original array giving
137 |         the original ordering.
138 |     """
139 |     state = _HungarianState(cost_matrix)
140 | 
141 |     # No need to bother with assignments if one of the dimensions
142 |     # of the cost matrix is zero-length.
143 |     step = None if 0 in cost_matrix.shape else _step1
144 | 
145 |     while step is not None:
146 |         step = step(state)
147 | 
148 |     # Look for the starred columns
149 |     results = np.array(np.where(state.marked == 1)).T
150 | 
151 |     # We need to swap the columns because we originally
152 |     # did a transpose on the input cost matrix.
153 |     if state.transposed:
154 |         results = results[:, ::-1]
155 | 
156 |     return results
157 | 
158 | 
159 | # Individual steps of the algorithm follow, as a state machine: they return
160 | # the next step to be taken (function to be called), if any.
161 | 
162 | def _step1(state):
163 |     """Steps 1 and 2 in the Wikipedia page."""
164 | 
165 |     # Step1: For each row of the matrix, find the smallest element and
166 |     # subtract it from every element in its row.
167 |     state.C -= state.C.min(axis=1)[:, np.newaxis]
168 |     # Step2: Find a zero (Z) in the resulting matrix. If there is no
169 |     # starred zero in its row or column, star Z. Repeat for each element
170 |     # in the matrix.
171 |     for i, j in zip(*np.where(state.C == 0)):
172 |         if state.col_uncovered[j] and state.row_uncovered[i]:
173 |             state.marked[i, j] = 1
174 |             state.col_uncovered[j] = False
175 |             state.row_uncovered[i] = False
176 | 
177 |     state._clear_covers()
178 |     return _step3
179 | 
180 | 
181 | def _step3(state):
182 |     """
183 |     Cover each column containing a starred zero. If n columns are covered,
184 |     the starred zeros describe a complete set of unique assignments.
185 |     In this case, Go to DONE, otherwise, Go to Step 4.
186 |     """
187 |     marked = (state.marked == 1)
188 |     state.col_uncovered[np.any(marked, axis=0)] = False
189 | 
190 |     if marked.sum() < state.C.shape[0]:
191 |         return _step4
192 | 
193 | 
194 | def _step4(state):
195 |     """
196 |     Find a noncovered zero and prime it. If there is no starred zero
197 |     in the row containing this primed zero, Go to Step 5. Otherwise,
198 |     cover this row and uncover the column containing the starred
199 |     zero. Continue in this manner until there are no uncovered zeros
200 |     left. Save the smallest uncovered value and Go to Step 6.
201 |     """
202 |     # We convert to int as numpy operations are faster on int
203 |     C = (state.C == 0).astype(np.int)
204 |     covered_C = C * state.row_uncovered[:, np.newaxis]
205 |     covered_C *= astype(state.col_uncovered, dtype=np.int, copy=False)
206 |     n = state.C.shape[0]
207 |     m = state.C.shape[1]
208 |     while True:
209 |         # Find an uncovered zero
210 |         row, col = np.unravel_index(np.argmax(covered_C), (n, m))
211 |         if covered_C[row, col] == 0:
212 |             return _step6
213 |         else:
214 |             state.marked[row, col] = 2
215 |             # Find the first starred element in the row
216 |             star_col = np.argmax(state.marked[row] == 1)
217 |             if not state.marked[row, star_col] == 1:
218 |                 # Could not find one
219 |                 state.Z0_r = row
220 |                 state.Z0_c = col
221 |                 return _step5
222 |             else:
223 |                 col = star_col
224 |                 state.row_uncovered[row] = False
225 |                 state.col_uncovered[col] = True
226 |                 covered_C[:, col] = C[:, col] * (
227 |                     astype(state.row_uncovered, dtype=np.int, copy=False))
228 |                 covered_C[row] = 0
229 | 
230 | 
231 | def _step5(state):
232 |     """
233 |     Construct a series of alternating primed and starred zeros as follows.
234 |     Let Z0 represent the uncovered primed zero found in Step 4.
235 |     Let Z1 denote the starred zero in the column of Z0 (if any).
236 |     Let Z2 denote the primed zero in the row of Z1 (there will always be one).
237 |     Continue until the series terminates at a primed zero that has no starred
238 |     zero in its column. Unstar each starred zero of the series, star each
239 |     primed zero of the series, erase all primes and uncover every line in the
240 |     matrix. Return to Step 3
241 |     """
242 |     count = 0
243 |     path = state.path
244 |     path[count, 0] = state.Z0_r
245 |     path[count, 1] = state.Z0_c
246 | 
247 |     while True:
248 |         # Find the first starred element in the col defined by
249 |         # the path.
250 |         row = np.argmax(state.marked[:, path[count, 1]] == 1)
251 |         if not state.marked[row, path[count, 1]] == 1:
252 |             # Could not find one
253 |             break
254 |         else:
255 |             count += 1
256 |             path[count, 0] = row
257 |             path[count, 1] = path[count - 1, 1]
258 | 
259 |         # Find the first prime element in the row defined by the
260 |         # first path step
261 |         col = np.argmax(state.marked[path[count, 0]] == 2)
262 |         if state.marked[row, col] != 2:
263 |             col = -1
264 |         count += 1
265 |         path[count, 0] = path[count - 1, 0]
266 |         path[count, 1] = col
267 | 
268 |     # Convert paths
269 |     for i in range(count + 1):
270 |         if state.marked[path[i, 0], path[i, 1]] == 1:
271 |             state.marked[path[i, 0], path[i, 1]] = 0
272 |         else:
273 |             state.marked[path[i, 0], path[i, 1]] = 1
274 | 
275 |     state._clear_covers()
276 |     # Erase all prime markings
277 |     state.marked[state.marked == 2] = 0
278 |     return _step3
279 | 
280 | 
281 | def _step6(state):
282 |     """
283 |     Add the value found in Step 4 to every element of each covered row,
284 |     and subtract it from every element of each uncovered column.
285 |     Return to Step 4 without altering any stars, primes, or covered lines.
286 |     """
287 |     # the smallest uncovered value in the matrix
288 |     if np.any(state.row_uncovered) and np.any(state.col_uncovered):
289 |         minval = np.min(state.C[state.row_uncovered], axis=0)
290 |         minval = np.min(minval[state.col_uncovered])
291 |         state.C[np.logical_not(state.row_uncovered)] += minval
292 |         state.C[:, state.col_uncovered] -= minval
293 |     return _step4
294 | 
295 | 


--------------------------------------------------------------------------------
/tools/env_corr.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Compute correlation matrix between the clustering of 2 differents environment
  3 | """
  4 | import argparse
  5 | import numpy as np
  6 | import sys
  7 | import scipy.cluster.hierarchy as sc
  8 | import itertools
  9 | from scipy.stats import kurtosis,skew
 10 | from scipy.stats.mstats import kurtosistest
 11 | from os.path import basename
 12 | import cluster
 13 | from collections import Counter
 14 | import matplotlib.pyplot as plt
 15 | import quippy
 16 | 
 17 | def getcorrofcluster(mols,cutoffdist,fnamexyz,clusterlist1,clusterlist2,ng1,ng2,zenv1,zenv2,nspiecies1,nspiecies2,selectgroupxyz2print):
 18 | 	# zenv is atomic nb of selected atom and nenv is the number of 
 19 | 	# such species in the similarity matrix
 20 | 
 21 | 	nframe = len(mols)
 22 | 	
 23 | 	# initialize correlation matrix 
 24 | 	corrmtx = np.zeros((ng1,ng2), dtype=int)  
 25 | 	frame1 = np.zeros((ng1), dtype=int) 
 26 | 	frame2 = np.zeros((ng2), dtype=int) 
 27 | 	# give correlation data structured with specie of clusterlist1 
 28 |  	# as main element
 29 |  	corr1 = {x: [] for x in xrange(ng1)}
 30 |  	corr2 = {x: [] for x in xrange(ng2)}
 31 |  	
 32 |  	r1 = np.array([0,0,0],dtype=float)
 33 |  	r2 = np.array([0,0,0],dtype=float)
 34 | 
 35 |  	# count nb of atom in each clst group
 36 |  	atmcount = {x: -1 for x in xrange(ng1)}
 37 |  	# Print xyz of selected group
 38 | 	if len(selectgroupxyz2print) > 0 :
 39 | 	 	selectfname = 'selected'+str(selectgroupxyz2print[0])+'_'+str(selectgroupxyz2print[1])+'.env'+str(zenv1)+'_'+str(zenv2)+'.xyz'
 40 | 		print 'Writing ', selectfname
 41 | 	 	f = open(selectfname,'w')
 42 | 
 43 | 	# Main loop over the frames 	
 44 |  	for iframe in xrange(nframe):
 45 |  		mol = mols[iframe]
 46 | 
 47 | 	 	for atm1 in xrange(nspiecies1[iframe]): # mol.z is fortran array bound:(1:natm)
 48 | 
 49 | 	 		# need to be done at each loop
 50 | 	 		corr2 = {x: [] for x in xrange(ng2)}
 51 | 
 52 | 	 		corr1[clusterlist1[iframe][atm1][0]].append([clusterlist1[iframe][atm1][1],clusterlist1[iframe][atm1][2],{zenv2 : corr2}])
 53 | 	 		atmcount[clusterlist1[iframe][atm1][0]] += 1
 54 | 	 		
 55 | 	 		# position of the atom from clusterlist1
 56 | 	 		r1[0] = mol[clusterlist1[iframe][atm1][2]].x
 57 | 	 		r1[1] = mol[clusterlist1[iframe][atm1][2]].y
 58 | 	 		r1[2] = mol[clusterlist1[iframe][atm1][2]].z
 59 | 	 		
 60 | 	 		for atm2 in xrange(nspiecies2[iframe]):
 61 | 	 			# position of the atom from clusterlist2
 62 | 	 			r2[0] = mol[clusterlist2[iframe][atm2][2]].x
 63 | 	 			r2[1] = mol[clusterlist2[iframe][atm2][2]].y
 64 | 	 			r2[2] = mol[clusterlist2[iframe][atm2][2]].z
 65 | 	 			
 66 | 	 			frame1[clusterlist1[iframe][atm1][0]] = clusterlist1[iframe][atm1][1]
 67 | 	 			frame2[clusterlist2[iframe][atm2][0]] = clusterlist2[iframe][atm2][1]
 68 | 
 69 | 	 			dist = np.linalg.norm(r1-r2)
 70 | 	 			# if atom is within cutoff distance then update correlation matrix
 71 | 	 			if dist < cutoffdist: 
 72 | 	 				corrmtx[clusterlist1[iframe][atm1][0],clusterlist2[iframe][atm2][0]] += 1 
 73 | 	 				corr1[clusterlist1[iframe][atm1][0]][atmcount[clusterlist1[iframe][atm1][0]]][2][zenv2][clusterlist2[iframe][atm2][0]].append(clusterlist2[iframe][atm2][2])
 74 | 	 				
 75 | 	 				# Print xyz of selected group
 76 | 					if len(selectgroupxyz2print) > 0 :
 77 | 						if 	(int(clusterlist1[iframe][atm1][0]) == selectgroupxyz2print[0] and int(clusterlist2[iframe][atm2][0]) == selectgroupxyz2print[1]):
 78 | 							# print 'TTTTTTTTTTTTTTTTTTT'
 79 | 							tmppos=[]
 80 | 							tmpname= []
 81 | 
 82 | 							for atm3 in xrange(mol.n):
 83 | 								# important otherwise r3 is not updated properly
 84 | 								r3 = []
 85 | 								r3.extend([mol[atm3].x, mol[atm3].y,mol[atm3].z])
 86 | 								
 87 | 								dist1 = np.linalg.norm(r1-r3)
 88 | 					 			if dist1 < cutoffdist:
 89 | 					 				#print r3
 90 | 									tmppos.append(r3)
 91 | 									tmpname.append(mol[atm3].symbol)
 92 | 							f.write(str(len(tmppos))+'\n')
 93 | 							f.write('Frame '+str(iframe)+' from '+fnamexyz+'\n')
 94 | 							# print tmppos 
 95 | 							for line in xrange(len(tmppos)):
 96 | 							# 	f.write('{0:} \t {1:.8f} \t {2:.8f}\t {3:.8f} \n'.format(tmpname[line],tmppos[line][0],tmppos[line][1],tmppos[line][2]))
 97 | 								f.write("%s %f %f %f \n" %(tmpname[line],tmppos[line][0],tmppos[line][1],tmppos[line][2]))
 98 | 	if len(selectgroupxyz2print) > 0 :
 99 | 		f.close()
100 | 	# corrkeys = corr1[0][0][2][zenv2].keys()
101 | 	# arr = np.zeros(len(corrkeys))
102 | 	# for i in range(0,len(corr1[0])):
103 | 	# 	for j in range(len(corrkeys)):
104 | 	# 		arr[j] += len(corr1[0][i][2][zenv2][corrkeys[j]])
105 | 	# 	print corr1[0][i][0], corr1[0][i][1]
106 | 	# 	print corr1[0][i][2][zenv2]
107 | 	# print corrmtx 
108 | 	# print arr
109 | 	return corrmtx, corr1, frame1, frame2
110 | 
111 | def rmdummyfromsim(fnamexyz,distmatrix,zenv,nenv):
112 | 	# zenv is atomic nb of env and nenv is the number of 
113 |  	# such species in the similarity matrix
114 | 	
115 | 	# print clist
116 | 	atomicmap = {1:"H",2:"He",6:"C",7:"N",8:"O",9:"F"}
117 | 	# atomicmap_inv = {"H":1,"He":2,"C":6,"N":7,"O":8,"F":9}
118 | 	# get the atom name and its number from filename
119 | 	symbenv = atomicmap[zenv]
120 | 	fxyz = open(fnamexyz,'r')
121 | 
122 | 	dummylist = np.ones((len(distmatrix)),dtype=bool)
123 | 
124 | 	atomnb = 0
125 | 	# list on the atom : (its cluster idx, its frame idx, id in frame ignoring other atoms, id in frame)
126 | 	for ind in xrange(len(distmatrix)):
127 | 		iat = np.fmod(ind,nenv) # ind starts at 0
128 | 		# iframe = (ind - iat) / nenv
129 |    
130 | 		if iat==0: # if new frame 
131 | 			atomnb = int(fxyz.readline()) # get number of atoms in the frame
132 | 			fxyz.readline() # skip the comment line
133 | 			strt = 0
134 | 			lines = []
135 | 			for it in xrange(atomnb): # reads the full frame
136 | 				lines.append(fxyz.readline())
137 | 
138 | 		for it in xrange(strt,atomnb):
139 | 			isdummy = False	
140 | 			if lines[it].find(symbenv)>=0: # find() returns -1 if does not find the atom name in line
141 | 				strt=it+1
142 | 				break
143 | 
144 | 			
145 | 			if it >= nenv-1: # if the atom is a dummy then set its idx nb to -1
146 | 				strt = atomnb
147 | 				#print ind		
148 | 				isdummy = True
149 | 		if isdummy:
150 | 			dummylist[ind] = False 
151 | 	
152 | 	tmp = distmatrix[dummylist,:]
153 | 	newdistmtr = tmp[:,dummylist]
154 | 	
155 | 	fxyz.close()
156 | 	return newdistmtr, dummylist
157 | 
158 | 
159 | def linkgroup2atmidx(mols,clist,zenv,nenv):
160 | 	# zenv is atomic nb of env and nenv is the number of 
161 |  	# such species in the similarity matrix
162 | 	
163 | 	nframe = len(mols)
164 | 	# symbsim = atomicmap[zenv]
165 | 
166 | 	# tmp = np.zeros(3,dtype=int)
167 | 	# clusterlist = np.zeros((len(clist),3),dtype=int)
168 | 	clusterlist = []
169 | 	clistcntr = 0
170 | 	nspecies = np.zeros(nframe,dtype=int)
171 | 	# list on the atom : (its cluster idx, its frame idx, id in frame ignoring other atoms, id in frame)
172 | 	for iframe in xrange(nframe):
173 | 		mol = mols[iframe]
174 | 		# nenv = 0
175 | 		natm = len(mol.z)
176 | 		clusterlist.append([])
177 | 		for it in xrange(1,natm+1): # mol.z is a fortran array (1:natm)
178 | 			if mol.z[it] == zenv:
179 | 				nspecies[iframe] += 1
180 | 				tmp1 = [int(clist[clistcntr]), iframe, it -1]
181 | 				clusterlist[iframe].append(tmp1)
182 | 				clistcntr += 1
183 | 
184 | 	return clusterlist, nspecies 
185 | 
186 | def clusterdistmatfull(distmatrixfile,sim,mode='average',plot=False):
187 | 	# Compute the clusturing on dist^2 so that the average 
188 | 	# distance of a cluster with an other is the RMS distance
189 | 	sim2 = sim*sim
190 | 	Z = sc.linkage(sim2,mode)
191 | 
192 | 	# get the full tree
193 | 	plt.figure(figsize=(10, 15))
194 | 	plt.title('Hierarchical Clustering Dendrogram')
195 | 	plt.xlabel('sample index')
196 | 	plt.ylabel('distance')
197 | 	dendo = sc.dendrogram(Z,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False)
198 | 	c_list = np.array(dendo['leaves'])
199 | 
200 | 	c_count = Counter(c_list)
201 | 	nbclst = len(c_count)
202 | 
203 | 	print "Number of clusters", nbclst 
204 | 	
205 | 	# c_list = np.zeros(len(sim))
206 | 
207 | 	# # Change cluster groups numbering to (0:n-1)
208 | 	# for i in range(len(sim)):
209 | 	# 	c_list[i] = int(clist[i]-1)
210 | 
211 | 	return c_list,Z
212 | 
213 | def clusterdistmat(distmatrixfile,sim,dcut,mode='average',plot=False):
214 | 	# Compute the clusturing on dist^2 so that the average 
215 | 	# distance of a cluster with an other is the RMS distance
216 | 	sim2 = sim*sim
217 | 	Z = sc.linkage(sim2,mode)
218 | 
219 | 	cdist = Z[:,2]
220 | 	# get the full tree
221 | 	# dendo = sc.dendrogram(Z)
222 | 	# clist = dendo['leaves']
223 |    	nclust = cluster.estimate_ncluster(cdist,dcut)
224 | 
225 | 	clist = sc.fcluster(Z,nclust,criterion='maxclust')
226 | 	c_count = Counter(clist)
227 | 	nbclst = len(c_count)
228 | 
229 | 	print "Number of clusters", nbclst 
230 | 	
231 | 	rep_ind = getrep_ind(sim2,clist,c_count)
232 | 
233 | 	# Write the groupe indices and representatives
234 | 	filename=basename(distmatrixfile)+'-cluster.index'
235 | 	f=open(filename,"w")
236 | 	f.write(" # groupid representative \n ")
237 | 	for i in range(len(sim)):
238 | 		iselect=0
239 | 		if i in rep_ind: iselect=2
240 | 		f.write("%d   %d \n " %(clist[i]-1,  iselect)) 
241 | 	f.close()
242 | 
243 |    
244 | 	if plot: 
245 | 		filename=basename(distmatrixfile)+'-dendogram.eps'
246 | 		plotdendro(Z,nclust,filename,rep_ind)
247 | 	c_list = np.zeros(len(sim))
248 | 
249 | 	# Change cluster groups numbering to (0:n-1)
250 | 	for i in range(len(sim)):
251 | 		c_list[i] = int(clist[i]-1)
252 | 
253 | 	return c_list,Z
254 | 
255 | # Determine the representative element of each cluster group
256 | def getrep_ind(sim2,clist,c_count):
257 | 	rep_ind=[]
258 | 	structurelist=[]
259 | 	for iclust in range(1,len(c_count)+1):  #calculate mean dissimilary and pick representative structure for each cluster
260 | 		indices = [i for i, x in enumerate(clist) if x == iclust] #indices for cluster i
261 | 		nconf=len(indices)
262 | 		structurelist.append(indices)
263 | 		sumd=0.0
264 | 
265 | 		#calculate mean dissimilarity in each group
266 | 		for iconf in range(len(indices)):
267 | 			ind1=indices[iconf]
268 | 			for jconf in range(len(indices)):
269 | 				ind2=indices[jconf]
270 | 				sumd+=sim2[ind1][ind2]
271 | 		meand=np.sqrt(sumd/(nconf*nconf))
272 |       
273 |       # pick the configuration with min variance in the group
274 | 		minvar=9999
275 | 		var=0.0
276 | 		for iconf in range(len(indices)):
277 | 			ivar=0.0
278 | 			ind1=indices[iconf]
279 | 			for jconf in range(len(indices)):
280 | 				ind2=indices[jconf]
281 | 				ivar+=(sim2[ind1][ind2]-meand**2)**2
282 | 			ivar=ivar/nconf
283 | 			var+=ivar  
284 | 			if(ivar<minvar):  
285 | 				minvar=ivar
286 | 				iselect=ind1
287 | 		var=var/nconf  
288 | 		rep_ind.append(iselect)
289 | 	return rep_ind
290 | # Plot and print the dendogram 
291 | def plotdendro(Z,ncluster,filename,rep_ind):
292 | 	plt.figure(figsize=(10, 15))
293 | 	plt.title('Hierarchical Clustering Dendrogram')
294 | 	plt.xlabel('sample index')
295 | 	plt.ylabel('distance')
296 | 
297 | 	d = sc.dendrogram(Z,truncate_mode='lastp', p=ncluster,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False)
298 | 	
299 | 	coord=[]
300 | 	for i in range(len(d['icoord'])):
301 | 		if d['dcoord'][i][0]==0.0 :
302 | 			coord.append(d['icoord'][i][0])
303 | 	for i in range(len(d['icoord'])):
304 | 		if d['dcoord'][i][3]==0.0 :
305 | 			coord.append(d['icoord'][i][3])
306 | 
307 | 	plt.savefig(filename, dpi=100, facecolor='w', edgecolor='w',
308 |         orientation='portrait', papertype='letter', format=None,
309 |         transparent=True, bbox_inches=None, pad_inches=0.1,
310 |         frameon=None)


--------------------------------------------------------------------------------
/libmatch/structures.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Computes the matrix of similarities between structures in a xyz file
  3 | # by first getting SOAP descriptors for all environments, finding the best
  4 | # match between environments using the Hungarian algorithm, and finally
  5 | # summing up the environment distances.
  6 | # Supports periodic systems, matching between structures with different
  7 | # atom number and kinds, and sports the infrastructure for introducing an
  8 | # alchemical similarity kernel to match different atomic species
  9 | 
 10 | # import sys, os, pickle
 11 | import sys, os
 12 | import cPickle as pickle
 13 | import gc 
 14 | from lap.lap import best_pairs, best_cost, lcm_best_cost
 15 | from lap.perm import xperm, mcperm, rematch
 16 | import numpy as np
 17 | from environments import environ, alchemy, envk
 18 | import quippy
 19 | __all__ = [ "structk", "structure" ]
 20 | 
 21 |    
 22 | class structure:
 23 |    def __init__(self, salchem=None):
 24 |       self.env={}
 25 |       self.species={}
 26 |       self.zspecies = []
 27 |       self.atz = []
 28 |       self.nenv=0  
 29 |       self.alchem=salchem
 30 |       if self.alchem is None: self.alchem=alchemy()
 31 |       self.globenv = None
 32 |       
 33 |    def getnz(self, sp):
 34 |       if sp in self.species:
 35 |          return self.species[sp]
 36 |       else: return 0
 37 |       
 38 |    def getatomenv(self, i):
 39 |       if i>=len(self.atz):
 40 |           raise IndexError("Trying to access atom past structure size")
 41 |       k=0
 42 |       lsp = {}
 43 |       for z in self.atz:
 44 |          if z in lsp: lsp[z]+=1
 45 |          else: lsp[z] = 0
 46 |          if i==k: 
 47 |              return self.env[z][lsp[z]]           
 48 |          k+=1
 49 |        
 50 |    def getenv(self, sp, i):
 51 |       if sp in self.env and i<len(self.env[sp]):
 52 |          return self.env[sp][i]
 53 |       else: 
 54 |          return environ(self.nmax,self.lmax,self.alchem,sp)  # missing atoms environments just returned as isolated species!
 55 |          
 56 |    def ismissing(self, sp, i):
 57 |       if sp in self.species and i<self.species[sp]:
 58 |          return False
 59 |       else: return True
 60 |    
 61 |       
 62 |    def parse(self, fat, coff=5.0, cotw=0.5, nmax=4, lmax=3, gs=0.5, cw=1.0, nocenter=[], noatom=[], unsoap=False, kit=None, soapdump=None):
 63 |       """ Takes a frame in the QUIPPY format and computes a list of its environments. """
 64 |       
 65 |       # removes atoms that are to be ignored
 66 |       at = fat.copy()
 67 |       nol = []
 68 |       for s in range(1,at.z.size+1):
 69 |          if at.z[s] in noatom: nol.append(s)
 70 |       if len(nol)>0: at.remove_atoms(nol)
 71 |       
 72 |       
 73 |       self.nmax = nmax
 74 |       self.lmax = lmax
 75 |       self.atz = at.z.copy()
 76 |       self.species = {}
 77 |       for z in at.z:      
 78 |          if z in self.species: self.species[z]+=1
 79 |          else: self.species[z] = 1
 80 |             
 81 |       self.zspecies = self.species.keys();
 82 |       self.zspecies.sort(); 
 83 |       lspecies = 'n_species='+str(len(self.zspecies))+' species_Z={ '
 84 |       for z in self.zspecies: lspecies = lspecies + str(z) + ' '
 85 |       lspecies = lspecies + '}'
 86 |    
 87 |       at.set_cutoff(coff);
 88 |       at.calc_connect();
 89 |       
 90 |       self.nenv = 0
 91 |       if not soapdump is None:
 92 |          soapdump.write("####### SOAP VECTOR FRAME ######\n")
 93 |       for sp in self.species:
 94 |          
 95 |          if sp in nocenter: 
 96 |             self.species[sp]=0
 97 |             continue # Option to skip some environments
 98 |          
 99 |          # first computes the descriptors of species that are present
100 |          if not soapdump is None: sys.stderr.write("SOAP STRING:    "+"soap central_reference_all_species=F central_weight="+str(cw)+"  covariance_sigma0=0.0 atom_sigma="+str(gs)+" cutoff="+str(coff)+" cutoff_transition_width="+str(cotw)+" n_max="+str(nmax)+" l_max="+str(lmax)+' '+lspecies+' Z='+str(sp)+"\n")
101 |          desc = quippy.descriptors.Descriptor("soap central_reference_all_species=F "+("normalise=F" if unsoap else "")+" central_weight="+str(cw)+"  covariance_sigma0=0.0 atom_sigma="+str(gs)+" cutoff="+str(coff)+" cutoff_transition_width="+str(cotw)+" n_max="+str(nmax)+" l_max="+str(lmax)+' '+lspecies+' Z='+str(sp) )   
102 |          try:
103 |             psp = desc.calc(at)["descriptor"]
104 |          except TypeError:
105 |             print("Interface change in QUIP/GAP. Update your code first.")
106 | 
107 |          if not soapdump is None:
108 |             soapdump.write("Specie %d - %d atoms\n"% (sp,len(psp)))
109 |             for p in psp:
110 |                 np.savetxt(soapdump,[p])
111 | 
112 |          # now repartitions soaps in environment descriptors
113 |          lenv = []
114 |          for p in psp:
115 |             nenv = environ(nmax, lmax, self.alchem)
116 |             nenv.convert(sp, self.zspecies, p, unsoap)
117 |             lenv.append(nenv)
118 |          self.env[sp] = lenv
119 |          self.nenv += self.species[sp]
120 |          
121 |       # adds kit data   
122 |       if kit is None: kit = {}
123 |       
124 |       for sp in kit:         
125 |          if not sp in self.species: 
126 |             self.species[sp]=0
127 |             self.env[sp] = []
128 |          for k in range(self.species[sp], kit[sp]):            
129 |             self.env[sp].append(environ(self.nmax,self.lmax,self.alchem,sp))
130 |             self.nenv+=1
131 |          self.species[sp] = kit[sp]          
132 |       
133 |       self.zspecies = self.species.keys()
134 |       self.zspecies.sort() 
135 |       
136 |       # also compute the global (flattened) fingerprint
137 |       self.globenv = environ(nmax, lmax, self.alchem)
138 |       
139 |       for k, se in self.env.items():
140 |          for e in se:
141 |             self.globenv.add(e)
142 |       # divides by the number of atoms in the structure
143 |       for sij in self.globenv.soaps:  self.globenv.soaps[sij]*=1.0/self.nenv
144 |       # self.globenv.normalize()  #if needed, normalization will be done later on.....
145 |       
146 | 
147 | def gcd(a,b):
148 |    if (b>a): a,b = b, a
149 |    
150 |    while (b):  a, b = b, a%b
151 |    
152 |    return a
153 |    
154 | def lcm(a,b):
155 |    return a*b/gcd(b,a)
156 | 
157 | #def gstructk(strucA, strucB, alchem=alchemy(), peratom=False):
158 | #    
159 | #   return envk(strucA.globenv, strucB.globenv, alchem) 
160 | 
161 | def structk(strucA, strucB, alchem=alchemy(), peratom=False, mode="match", fout=None, peps=0.0, gamma=1.0, zeta=1.0, xspecies=False):
162 |    # computes the SOAP similarity KERNEL between two structures by combining atom-centered kernels
163 |    # possible kernel modes include:
164 |    #   average :  scalar product between averaged kernels
165 |    #   match:     best-match hungarian kernel
166 |    #   permanent: average over all permutations
167 |       
168 |    # average kernel. quick & easy!   
169 |    if mode=="fastavg":
170 |        genvA=strucA.globenv
171 |        genvB=strucB.globenv        
172 |        return envk(genvA, genvB, alchem)**zeta, 0
173 |    elif mode=="fastspecies": 
174 |        # for now, only implement standard Kronecker alchemy
175 |        senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)
176 |        kk = 0
177 |        for za in strucA.zspecies:    
178 |          if not za in strucB.zspecies: continue         
179 |          senvA = environ(strucA.nmax, strucA.lmax, strucA.alchem)
180 |          for ia in xrange(strucA.getnz(za)):
181 |             senvA.add(strucA.getenv(za, ia))
182 |          senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)   
183 |          for ib in xrange(strucB.getnz(za)):
184 |             senvB.add(strucB.getenv(za, ib))
185 |          kk += envk(senvA, senvB, alchem)**zeta
186 |        
187 |        kk/=strucA.nenv*strucB.nenv
188 |        return kk,0
189 |          
190 |        #  for zb, nzb in nspeciesB:
191 |        #         for ib in xrange(nzb):
192 |        #            return envk(genvA, genvB, alchem), 0
193 | 
194 |    nenv = 0
195 |    
196 |    if peratom: # replicate structures to match structures of different peratomity
197 |       # we do not check for compatibility at this stage, just assume that the 
198 |       # matching will be done somehow (otherwise it would be exceedingly hard to manage in case of non-standard alchemy)
199 |       nspeciesA = []
200 |       nspeciesB = []
201 |       for z in strucA.zspecies:
202 |          nspeciesA.append( (z, strucA.getnz(z)) )
203 |       for z in strucB.zspecies:
204 |          nspeciesB.append( (z, strucB.getnz(z)) )
205 |       nenv=nenvA = strucA.nenv
206 |       nenvB = strucB.nenv            
207 |    else:   
208 |       # top up missing atoms with isolated environments
209 |       # first checks which atoms are present
210 |       zspecies = sorted(list(set(strucB.zspecies+strucA.zspecies)))
211 |       nspecies = []
212 |       for z in zspecies:
213 |          nz = max(strucA.getnz(z),strucB.getnz(z))
214 |          nspecies.append((z,nz)) 
215 |          nenv += nz
216 |       nenvA = nenvB = nenv
217 |       nspeciesA = nspeciesB = nspecies   
218 |          
219 |    np.set_printoptions(linewidth=500,precision=4)
220 | 
221 |    kk = np.zeros((nenvA,nenvB),float)
222 |    ika = 0
223 |    ikb = 0  
224 |    for za, nza in nspeciesA:      
225 |       for ia in xrange(nza):
226 |          envA = strucA.getenv(za, ia)         
227 |          ikb = 0
228 |          for zb, nzb in nspeciesB:
229 |             for ib in xrange(nzb):
230 |                envB = strucB.getenv(zb, ib)
231 |                if alchem.mu > 0 and (strucA.ismissing(za, ia) ^ strucB.ismissing(zb, ib)):
232 |                    # includes a penalty dependent on "mu", in a way that is consistent with the definition of kernel distance
233 |                    kk[ika,ikb] = exp(-alchem.mu)
234 |                else:
235 |                   if za == zb or not xspecies:  #uncomment to zero out kernels between different species
236 |                     kk[ika,ikb] = envk(envA, envB, alchem)**zeta              
237 |                   else: kk[ika,ikb] = 0
238 |                ikb+=1
239 |          ika+=1
240 |    aidx = {}
241 |    ika=0
242 |    for za, nza in nspeciesA: 
243 |       aidx[za] = range(ika,ika+nza)
244 |       ika+=nza
245 |    ikb=0
246 |    bidx = {}
247 |    for zb, nzb in nspeciesB: 
248 |       bidx[zb] = range(ikb,ikb+nzb)
249 |       ikb+=nzb
250 | 
251 |    if fout != None:
252 |       # prints out similarity information for the environment pairs
253 |       fout.write("# atomic species in the molecules (possibly topped up with dummy isolated atoms): \n")      
254 |       for za, nza in nspeciesA:
255 |          for ia in xrange(nza): fout.write(" %d " % (za) )
256 |       fout.write("\n");
257 |       for zb, nzb in nspeciesB:
258 |          for ib in xrange(nzb): fout.write(" %d " % (zb) )
259 |       fout.write("\n");
260 |       
261 |       fout.write("# environment kernel matrix: \n")      
262 |       for r in kk:
263 |          for e in r:
264 |             fout.write("%20.14e " % (e) )
265 |          fout.write("\n")
266 |       #fout.write("# environment kernel eigenvalues: \n")      
267 |       #ev = np.linalg.eigvals(kk)
268 |       #for e in ev:
269 |       #    fout.write("(%8.4e,%8.4e) " % (e.real,e.imag) )
270 |       #fout.write("\n");
271 |          
272 |        
273 | 
274 |       
275 |    # Now we have the matrix of scalar products. 
276 |    # We can first find the optimal scalar product kernel
277 |    # we must find the maximum "cost"
278 |    if mode == "match":
279 |         if peratom and nenvA != nenvB:
280 |             nenv = lcm(nenvA, nenvB)
281 |             hun = lcm_best_cost(1-kk)
282 |         else:
283 |             hun=best_cost(1.0-kk)        
284 |         cost = 1-hun/nenv
285 |    elif mode == "permanent":
286 |         # there is no place to hide: cross-species environments are not necessarily zero 
287 |         if peps>0: cost = mcperm(kk, peps)
288 |         else: cost = xperm(kk)
289 |             
290 |         cost = cost/np.math.factorial(nenv)/nenv        
291 |    elif mode == "rematch":
292 |        cost=rematch(kk, gamma, 1e-6)  # hard-coded residual error for regularized gamma
293 |        # print cost, kk.sum()/(nenv*nenv), envk(strucA.globenv, strucB.globenv, alchem)
294 |    elif mode == "average":
295 |        cost = kk.sum()/(nenvA*nenvB)
296 |        # print 'elem: {}'.format(kk.sum()) 
297 |        # print 'elem norm: {}'.format(cost) 
298 |        # print 'avg norm: {}'.format((nenvA*nenvB)) 
299 |        
300 |    else: raise ValueError("Unknown global fingerprint mode ", mode)
301 |    
302 |          
303 |    return cost,kk
304 | 
305 | 
306 | class structurelist(list):
307 |     def __init__(self, basedir="tmpstructures"):
308 |         self.basedir=basedir
309 |         # create the folder if it is not there        
310 |         if not os.path.exists(basedir):os.makedirs(basedir)
311 |         self.count=0
312 |         
313 |     def exists(self, index):
314 |         # return true if the file associated with index exists, false otherwise
315 |         f=self.basedir+'/sl_'+str(index)+'.dat'
316 |         return os.path.isfile(f)
317 |     # @profile
318 |     def append(self, element):
319 |         #pickle the element for later use
320 |         ind=self.count
321 |         f=self.basedir+'/sl_'+str(ind)+'.dat'
322 |         file = open(f,"wb")
323 |         gc.disable()
324 |         pickle.dump(element, file,protocol=pickle.HIGHEST_PROTOCOL) # HIGHEST_PROTOCOL is 2 in py 2.7
325 |         file.close()
326 |         gc.enable()
327 |         self.count+=1
328 | 
329 |         
330 |     # @profile
331 |     def __getitem__(self, index):
332 |         f = self.basedir+'/sl_'+str(index)+'.dat'
333 |         try:
334 |             file = open(f,"rb")
335 |         except IOError:
336 |             raise IOError("Cannot load descriptors for index %d" % (index) )
337 |         gc.disable()
338 |         l = pickle.load(file)
339 |         file.close()
340 |         gc.enable()
341 |         return l
342 | 


--------------------------------------------------------------------------------
/tools/krr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # compute kernel ridge regression for a data set given a kernel matrix
  4 | # and a vector of the observed properties. syntax:
  5 | # $  krr.py <kernel.dat> <properties.dat> [ options ]
  6 | 
  7 | import argparse
  8 | import numpy as np
  9 | import sys, os
 10 | 
 11 | def segfind(cp, cs):
 12 |     a = 0
 13 |     b = len(cp)    
 14 |     while (b-a)>1:
 15 |         c = int((b+a)/2)
 16 |         if cs<cp[c]:
 17 |             b = c
 18 |         else:
 19 |             a = c
 20 |     if cs < cp[a]:
 21 |         return a
 22 |     else:
 23 |         return b
 24 |         
 25 | def cur(kernel,tol=1.0e-4):
 26 | 
 27 |     U, S, V = np.linalg.svd(kernel)
 28 |     rank = list(S > tol).count(True) / 2
 29 |     S[rank:] = 0.0
 30 |     S = np.diag(S)
 31 |     Ap = np.dot(np.dot(U,S),V)
 32 |     p = np.sum(V[0:rank,:]**2, axis=0) / rank
 33 |     return p
 34 | 
 35 | def randomsubset(ndata, nsel, plist=None):    
 36 |     if nsel > ndata:
 37 |         raise ValueError("Cannot select data out of thin air")
 38 |     if nsel == ndata: 
 39 |         return np.asarray(range(ndata))
 40 |     cplist = np.zeros(ndata)
 41 |     if plist is None:
 42 |         plist = np.ones(ndata, float)
 43 |         
 44 |     # computes initial cumulative probability distr.
 45 |     cplist[0]=plist[0] 
 46 |     for i in xrange(1,ndata):
 47 |         cplist[i]=cplist[i-1]+plist[i]
 48 |     
 49 |     rdata = np.zeros(nsel, int)
 50 |     for i in xrange(nsel):
 51 |         csel = np.random.uniform() * cplist[-1]
 52 |         isel = segfind(cplist, csel)
 53 |         rdata[i] = isel
 54 |         psel = plist[isel]
 55 |         for j in xrange(isel,ndata):
 56 |             cplist[j] -= psel
 57 |     return rdata
 58 | 
 59 | def main(kernels, props, kweights, mode, trainfrac, csi, sigma, ntests, ttest, savevector="", refindex="", inweights=""):
 60 | 
 61 |     trainfrac=float(trainfrac) 
 62 |     csi = float(csi)
 63 |     sigma = float(sigma)
 64 |     ntests = int(ntests)
 65 |     ttest=float(ttest)
 66 |     if (mode == "sequential" or mode == "all") and ntests>1:
 67 |         raise ValueError("No point in having multiple tests when using determininstic train set selection")
 68 | 
 69 |     np.random.seed(12345) #!TODO MAKE IT AN OPTION
 70 |     if mode=="manual":
 71 |        mtrain = np.loadtxt("train.idx")
 72 |     if kweights == "":
 73 |         kweights = np.ones(len(kernels))
 74 |     else:
 75 |         kweights = np.asarray(kweights.split(","),float)
 76 |     kweights /= kweights.sum()
 77 |     # reads kernel(s)
 78 |     print "# Using kernels ", kernels, " with weights ", kweights
 79 |     if os.path.splitext(kernels[0])[1] == ".npy":
 80 |        kij = np.load(kernels[0]) * kweights[0]
 81 |     else:
 82 |        kij = np.loadtxt(kernels[0]) * kweights[0]
 83 |     for i in xrange(1,len(kernels)):
 84 |        print kernels[i]
 85 |        if kweights[i]==0: continue
 86 |        if os.path.splitext(kernels[i])[1] == ".npy":
 87 |           kij += np.load(kernels[i]) * kweights[i] 
 88 |        else:
 89 |           kij += np.loadtxt(kernels[i]) * kweights[i]
 90 |     nel = len(kij)
 91 | 
 92 |     # heuristics to see if this is a kernel or a similarity matrix!!
 93 |     
 94 |     if kij[0,0]<1e-8:
 95 |         kij *= kij # builds square distance matrix
 96 |         ssum = kij.sum(axis=0)/nel # row sum (matrix should be symmetric so same as col sum!
 97 |         for i in xrange(nel):
 98 |             kij[i,:]-=ssum
 99 |             kij[:,i]-=ssum
100 |         kij += ssum.sum()/nel
101 |         kij *= -0.5
102 |         
103 |     
104 |     # reads index, if available
105 |     if refindex == "":
106 |         rlabs = np.asarray(range(nel), int)
107 |     else:
108 |         rlabs = np.loadtxt(refindex,dtype=int)
109 |         if len(rlabs) != nel:
110 |             raise ValueError("Reference index size mismatch")
111 |     
112 |     if inweights == "":
113 |         lweights = np.ones(nel,float)
114 |     else:
115 |         lweights = np.loadtxt(inweights, dtype=float);
116 |         lweights *= nel/lweights.sum()
117 |         
118 |     # first hyperparameter - we raise the kernel to a positive exponent to make it sharper or smoother
119 |     kij = kij**csi
120 |     
121 |     # reads properties
122 |     p = np.loadtxt(props[0])  # TODO add support for multiple properties
123 |     
124 |     # chooses test
125 |     testmae=0
126 |     trainmae=0
127 |     truemae=0
128 |     testrms=0
129 |     trainrms=0
130 |     truerms=0
131 |     testsup=0
132 |     trainsup=0
133 |     truesup=0
134 |     ctrain=0
135 |     ctest=0
136 |     ctrue=0
137 |     
138 |     if mode == "all" :
139 |             tp = p[:]
140 |             tk = kij[:][:].copy()
141 |             vp = np.var(tp) # variance of the property subset (to be size consistent!)            
142 |             vk = np.trace(tk)/len(tp)
143 |             n = len(tp)
144 |             lweight = np.ones(n,np.float64)
145 |             print >> sys.stderr, "Regularization shift ", sigma**2 * vk/vp
146 |             for i in xrange(len(tp)):
147 |                 tk[i,i]+=sigma**2 * vk/vp/ lweights[i]  # diagonal regularization times weight!
148 |             tc = np.linalg.solve(tk, tp)
149 |             krp = np.dot(kij[:,:],tc)
150 |             mae=abs(krp[:]-p[:]).sum()/len(p)
151 |             rms=np.sqrt(((krp[:]-p[:])**2).sum()/len(p))
152 |             sup=abs(krp[:]-p[:]).max()             
153 |             print "# train-set MAE: %f RMS: %f SUP: %f" % (mae, rms, sup)
154 |             ltrain = range(nel)            
155 |     else: 
156 |         np.set_printoptions(threshold=10000)
157 |         ntrain = int(trainfrac*nel)
158 |         if mode == "manual": ntrain=len(mtrain)
159 |         ntrue = int(ttest*nel)        
160 | 
161 |         for itest in xrange(ntests):        
162 |             ltest = np.zeros(nel-ntrain-ntrue,int)
163 |             ltrain = np.zeros(ntrain,int)
164 |             
165 |             # if specified, select some elements that are completely ignored from both selection and training
166 |             ltrue = np.zeros(ntrue, int)
167 |             psel = np.ones(nel,float)
168 |             if ntrue > 0:
169 |                 ltrue = randomsubset(nel, ntrue)
170 |                 psel[ltrue] = 0.0
171 |             if mode == "random":
172 |                 ltrain[:] = randomsubset(nel, ntrain, psel)
173 |             elif mode == "manual":
174 |                 ltrain[:] = mtrain
175 |             elif mode == "sequential":
176 |                 ltrain[:] = range(ntrain)
177 |             elif mode == "fps":            
178 |                 isel=int(np.random.uniform()*nel)
179 |                 while isel in ltrue:
180 |                     isel=int(np.random.uniform()*nel)
181 |                     
182 |                 ldist = 1e100*np.ones(nel,float)
183 |                 imin = np.zeros(nel,int) # index of the closest FPS grid point
184 |                 ltrain[0]=isel
185 |                 nontrue = np.setdiff1d(range(nel), ltrue)
186 |                 for nsel in xrange(1,ntrain):
187 |                     dmax = 0
188 |                     imax = 0       
189 |                     for i in nontrue:
190 |                         # numerical error can lead to negative d2
191 |                         d2 = kij[i,i]+kij[isel,isel]-2*kij[i,isel]
192 |                         if d2 >= 0:
193 |                             dsel = np.sqrt(d2) #don't assume kernel is normalised
194 |                         elif d2 < -1e-3:
195 |                             print 'Might have a problem with the kernel matrix: ', d2
196 |                         else:
197 |                             dsel = 0.
198 |                         if dsel < ldist[i]:
199 |                            imin[i] = nsel-1                    
200 |                            ldist[i] = dsel
201 |                         if ldist[i] > dmax:
202 |                             dmax = ldist[i]; imax = i
203 |                     print "selected ", isel, " distance ", dmax
204 |                     isel = imax
205 |                     ltrain[nsel] = isel
206 |                 
207 |                 for i in xrange(nel):
208 |                     if i in ltrue: continue   
209 |                     # numerical error can lead to negative d2
210 |                     d2 = kij[i,i]+kij[isel,isel]-2*kij[i, isel]
211 |                     if d2 >= 0:
212 |                         dsel = np.sqrt(d2) #don't assume kernel is normalised
213 |                     elif d2 < -1e-3:
214 |                         print 'Might have a problem with the kernel matrix'
215 |                     else:
216 |                         dsel = 0.                 
217 |                     
218 |                   #  dsel = np.sqrt(1.0-kij[i, isel])
219 |                     if dsel < ldist[i]:
220 |                         imin[i] = nsel-1
221 |                         ldist[i] = dsel
222 |                     if ldist[i] > dmax:
223 |                         dmax = ldist[i]; imax = i
224 |             
225 |             k = 0
226 |             for i in xrange(nel):
227 |                 if not i in ltrain and not i in ltrue: 
228 |                     ltest[k] = i
229 |                     k += 1
230 |                 
231 |             tp = p[ltrain]
232 |             vp = np.var(tp) # variance of the property subset (to be size consistent!)            
233 |             tk = kij[ltrain][:,ltrain].copy()
234 |             vk = np.trace(tk)/len(ltrain)
235 |             # the kernel should represent the variance of the energy (in a GAP interpretation) 
236 |             # and sigma^2 the estimate of the noise variance. However we want to keep a "naked kernel" so
237 |             # we can then estimate without bringing around the variance. So the problem would be
238 |             # (vp*N/Tr(tk) tk + sigma^2 I )^-1 p = w
239 |             # but equivalently we can write 
240 |             # ( tk + sigma^2 *tr(tk)/(N vp) I )^-1 p = w            
241 |             
242 |             print >> sys.stderr, "Regularization shift ", sigma**2 * vk/vp
243 | 
244 |             for i in xrange(len(ltrain)):
245 |                 tk[i,i]+=sigma**2 * vk/vp/lweights[i]  # diagonal regularization times weight!
246 |             tc = np.linalg.solve(tk, tp)
247 |             krp = np.dot(kij[:,ltrain],tc)   
248 | 
249 |             mae=abs(krp[ltest]-p[ltest]).sum()/len(ltest)
250 |             rms=np.sqrt(((krp[ltest]-p[ltest])**2).sum()/len(ltest))
251 |             sup=abs(krp[ltest]-p[ltest]).max()
252 |             print "# run: %d test-set MAE: %f RMS: %f SUP: %f" % (itest, mae, rms, sup)
253 |             
254 | 
255 |             testmae += abs(krp[ltest]-p[ltest]).sum()/len(ltest)
256 |             trainmae += abs(krp[ltrain]-p[ltrain]).sum()/len(ltrain)
257 |             if ntrue>0: truemae += abs(krp[ltrue]-p[ltrue]).sum()/len(ltrue)
258 |             testrms += np.sqrt(((krp[ltest]-p[ltest])**2).sum()/len(ltest))
259 |             trainrms += np.sqrt(((krp[ltrain]-p[ltrain])**2).sum()/len(ltrain))
260 |             if ntrue>0: truerms += np.sqrt(((krp[ltrue]-p[ltrue])**2).sum()/len(ltrue))
261 |             testsup+=abs(krp[ltest]-p[ltest]).max()
262 |             trainsup+=abs(krp[ltrain]-p[ltrain]).max()
263 |             if ntrue>0: truesup+=abs(krp[ltrue]-p[ltrue]).max()
264 |             ctrain+=len(ltrain)
265 |             ctest+=len(ltest)
266 |             ctrue+=len(ltrue)
267 |                     
268 |             for i in xrange(nel):
269 |                if i in ltrain: 
270 |                    lab = "TRAIN "
271 |                elif i in ltrue:
272 |                    lab = "TRUE "
273 |                else: lab = "TEST"
274 |                print i, p[i], krp[i], lab 
275 | 
276 |         print "# KRR results (%d tests, %f training p., %f test p.): csi=%f  sigma=%f " % (ntests, ctrain/ntests, ctest/ntests, csi, sigma),(" weights_file=%s" % inweights if inweights!="" else "")
277 |         print "# Train points MAE=%f  RMSE=%f  SUP=%f" % (trainmae/ntests, trainrms/ntests, trainsup/ntests)
278 |         print "# Test points  MAE=%f  RMSE=%f  SUP=%f " % (testmae/ntests, testrms/ntests, testsup/ntests)
279 |         if len(ltrue) > 0: 
280 |             print "# True test points  MAE=%f  RMSE=%f  SUP=%f " % (truemae/ntests, truerms/ntests, truesup/ntests)
281 |     
282 |     if savevector:
283 |         fname=open(savevector,'w')
284 |         commentline=' Train Vector from kernel matrices: '+str(kernels)+' with weights '+str(kweights)+' and properties from '+ props[0] + ' selection mode: '+mode+' : Csi, sigma = ' + str(csi) +' , '+ str(sigma)
285 |         np.savetxt(fname,np.asarray([tc, ltrain, rlabs[ltrain]]).T,fmt=("%24.15e", "%10d", "%10d"),header=commentline)
286 |         fname.close()
287 | 
288 | if __name__ == '__main__':
289 |     parser = argparse.ArgumentParser(description="""Computes KRR and analytics based on a kernel matrix and a property vector.""")
290 |                            
291 |     parser.add_argument("--kernels", nargs='+', type=str, help="Kernel matrix (more than one can be read!)")      
292 |     parser.add_argument("--props", nargs='+', type=str, help="Property file")
293 |     parser.add_argument("--kweights", default="", type=str, help="Comma-separated list of kernel weights (when multiple kernels are provided)")
294 |     parser.add_argument("--mode", type=str, default="random", help="Train point selection (e.g. --mode all / sequential / random / fps / cur / manual")      
295 |     parser.add_argument("-f", type=float, default='0.5', help="Train fraction")
296 |     parser.add_argument("--truetest", type=float, default='0.0', help="Take these points out from the selection procedure")
297 |     parser.add_argument("--csi", type=float, default='1.0', help="Kernel scaling")
298 |     parser.add_argument("--sigma", type=float, default='1e-3', help="KRR regularization. In units of the properties. ")
299 |     parser.add_argument("--ntests", type=int, default='1', help="Number of tests")
300 |     parser.add_argument("--pweights", type=str, default="", help="Apply prior weights to the data points, reading them from the file. The weights will be normalized to sum to N.")
301 |     parser.add_argument("--refindex",  type=str, default="", help="Structure indices of the kernel matrix (useful when dealing with a subset of a larger structures file)")        
302 |     parser.add_argument("--saveweights",  type=str, default="", help="Save the train-set weights vector in file")    
303 |     
304 |     args = parser.parse_args()
305 |     
306 |     main(kernels=args.kernels, props=args.props, kweights=args.kweights, mode=args.mode, trainfrac=args.f, csi=args.csi, 
307 |          sigma=args.sigma, ntests=args.ntests, ttest=args.truetest,savevector=args.saveweights, refindex=args.refindex, inweights=args.pweights)
308 | 


--------------------------------------------------------------------------------
/tools/krr-Cortes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # compute kernel ridge regression for a data set given a kernel matrix
  4 | # and a vector of the observed properties. syntax:
  5 | # $  krr.py <kernel.dat> <properties.dat> [ options ]
  6 | 
  7 | import argparse
  8 | import numpy as np
  9 | import sys
 10 | import MultipleKernelLearning as mkl 
 11 | import costs as cst
 12 | from select_landmarks import farthestPointSampling,randomsubset,cur,segfind
 13 | 
 14 | 
 15 | def main(kernelFilenames, propFilename, mode, trainfrac, csi,  ntests, ttest, savevector="", refindex="",**KRRCortesParam):
 16 | 
 17 |     trainfrac=float(trainfrac) 
 18 |     #csi = float(csi)
 19 |     ntests = int(ntests)
 20 |     ttest=float(ttest)
 21 |     if (mode == "sequential" or mode == "all") and ntests>1:
 22 |         raise ValueError("No point in having multiple tests when using determininstic train set selection")
 23 |      
 24 |     # Reads kernels
 25 |     nbOfKernels = len(kernelFilenames)
 26 |     kernels = []
 27 |     for it,kernelFilename in enumerate(kernelFilenames):
 28 |         kernels.append(np.loadtxt(kernelFilename, dtype=np.float64, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0))
 29 |         # heuristics to see if this is a kernel or a similarity matrix!!
 30 |         if kernels[it][0,0]<1e-8:
 31 |             kernels[it] = (1-0.5*kernels[it]*kernels[it])
 32 |         # first hyperparameter - we raise the kernel to a positive exponent to make it sharper or smoother
 33 |         kernels[it] = kernels[it]**csi[it]
 34 |     
 35 |     
 36 |     # reads properties
 37 |     prop = np.loadtxt(propFilename, dtype=np.float64, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0)
 38 |     
 39 |     # check if size of input of the kernels and property is consistant
 40 |     for it,kernel in enumerate(kernels):
 41 |         if len(prop) != len(kernel):
 42 |             raise ValueError("Dimention mismatch between kernel {} and prop".format(kernelFilenames[it]))
 43 |     for it,kernel1 in enumerate(kernels):
 44 |         for jt,kernel2 in enumerate(kernels):
 45 |             if kernel1.shape != kernel2.shape:
 46 |                 raise ValueError("Dimention mismatch between kernel {} and kernel {}".format(kernelFilenames[it]),kernelFilenames[jt])
 47 | 
 48 |     # Kernel matrices should be square and of the same size
 49 |     nel = len(kernels[0])
 50 |     
 51 |     # reads index, if available
 52 |     if refindex == "":
 53 |         rlabs = np.asarray(range(nel), int)
 54 |     else:
 55 |         rlabs = np.loadtxt(refindex,dtype=int)
 56 |         if len(rlabs) != nel:
 57 |             raise ValueError("Reference index size mismatch")   
 58 | 
 59 | 
 60 |     # chooses test
 61 |     testmae=0
 62 |     trainmae=0
 63 |     truemae=0
 64 |     testrms=0
 65 |     trainrms=0
 66 |     truerms=0
 67 |     testsup=0
 68 |     trainsup=0
 69 |     truesup=0
 70 |     ctrain=0
 71 |     ctest=0
 72 |     ctrue=0
 73 |     
 74 |     if mode=="manual":
 75 |         mtrain = np.loadtxt("train.idx")
 76 |     if mode == "all" :
 77 |         raise NotImplementedError("")
 78 |             # tp = p[:]
 79 |             # tk = kij[:][:].copy()
 80 |             # vp = np.var(tp) # variance of the property subset (to be size consistent!)            
 81 |             # vk = np.trace(tk)/len(tp)
 82 |             # print >> sys.stderr, "Regularization shift ", sigma**2 * vk/vp
 83 |             # #print lweight
 84 |             # for i in xrange(len(tp)):
 85 |             #     tk[i,i]+=sigma**2 * vk/vp  #/ lweight[i]  # diagonal regularization times weight!
 86 |             # tc = np.linalg.solve(tk, tp)
 87 |             # krp = np.dot(kij[:,:],tc)
 88 |             # mae=abs(krp[:]-p[:]).sum()/len(p)
 89 |             # rms=np.sqrt(((krp[:]-p[:])**2).sum()/len(p))
 90 |             # sup=abs(krp[:]-p[:]).max()             
 91 |             # print "# train-set MAE: %f RMS: %f SUP: %f" % (mae, rms, sup)
 92 |             # ltrain = range(nel)            
 93 |     else: 
 94 |         np.set_printoptions(threshold=10000)
 95 |         ntrain = int(trainfrac*nel)
 96 |         if mode == "manual": ntrain=len(mtrain)
 97 |         ntrue = int(ttest*nel)        
 98 |         seeds = np.random.randint(0,5000,ntests)
 99 |         alphas = []
100 |         mus = []
101 |         testMAEs = []
102 |         ltrains = []
103 |         rlabss = []
104 |         for itest in xrange(ntests):        
105 |             ltest = np.zeros(nel-ntrain-ntrue,int)
106 |             ltrain = np.zeros(ntrain,int)
107 |             
108 |             # if specified, select some elements that are completely ignored from both selection and training
109 |             ltrue = np.zeros(ntrue, int)
110 |             psel = np.ones(nel,float)
111 |             if ntrue > 0:
112 |                 ltrue = randomsubset(nel, ntrue)
113 |                 psel[ltrue] = 0.0
114 |             if mode == "random":
115 |                 ltrain[:] = randomsubset(nel, ntrain, psel)
116 |             elif mode == "manual":
117 |                 ltrain[:] = mtrain
118 |             elif mode == "sequential":
119 |                 ltrain[:] = range(ntrain)
120 |             elif mode == "fps":            
121 |                 # do farthest point sampling on the uniform combination of the kernels
122 |                 kij = np.zeros((nel,nel),dtype=np.float64)
123 |                 for kernel in kernels:
124 |                     kij += kernel
125 | 
126 |                 isel=int(np.random.uniform()*nel)
127 |                 while isel in ltrue:
128 |                     isel=int(np.random.uniform()*nel)
129 |                 
130 |                 ltrain = farthestPointSampling(kij,nel,ntrain,initalLandmark=isel,listOfDiscardedPoints=ltrue,seed=seeds[itest])    
131 |             
132 |             k = 0
133 |             for i in xrange(nel):
134 |                 if not i in ltrain and not i in ltrue: 
135 |                     ltest[k] = i
136 |                     k += 1
137 |                 
138 | 
139 |             # # the kernel should represent the variance of the energy (in a GAP interpretation) 
140 |             # # and sigma^2 the estimate of the noise variance. However we want to keep a "naked kernel" so
141 |             # # we can then estimate without bringing around the variance. So the problem would be
142 |             # # (vp*N/Tr(tk) tk + sigma^2 I )^-1 p = w
143 |             # # but equivalently we can write 
144 |             # # ( tk + sigma^2 *tr(tk)/(N vp) I )^-1 p = w            
145 |             
146 | 
147 |             # get prop of reference for training and testing
148 |             propTeRef = prop[ltest]
149 |             propTrRef = prop[ltrain]
150 | 
151 |             # Train your model and get the optimal weights out 
152 |             kernelsTr = []
153 |             for it,kernel  in enumerate(kernels):
154 |                 kernelsTr.append(kernel[np.ix_(ltrain,ltrain)])
155 |             
156 | 
157 |             alpha, mu, propTr = mkl.TrainKRRCortes(kernelsTr,propTrRef,**KRRCortesParam)
158 |             
159 |             # Predict property using the optimal weights
160 |             kernelsTe = []
161 |             for it,kernel  in enumerate(kernels):
162 |                 kernelsTe.append(kernel[np.ix_(ltrain,ltest)])
163 | 
164 |             propTe = mkl.PredictKRRCortes(kernelsTe,alpha,mu)
165 |             
166 |             
167 |             mae = cst.mae(propTe-propTeRef)
168 |             rms = cst.rmse(propTe-propTeRef)
169 |             sup = cst.sup_e(propTe-propTeRef)
170 |             print "# run: {} test-set MAE: {:.4e} RMS: {:.4e} SUP: {:.4e}".format(itest, mae, rms, sup)
171 |             
172 |             # accumulate output to select the weigths corresponding to the lowest MAE
173 |             alphas.append(alpha)
174 |             mus.append(mu)
175 |             testMAEs.append(mae)
176 |             ltrains.append(ltrain)
177 |             rlabss.append(rlabs)
178 |             
179 | 
180 |             testmae += cst.mae(propTe-propTeRef)
181 |             trainmae += cst.mae(propTr-propTrRef)
182 |             #if ntrue>0: truemae += abs(krp[ltrue]-prop[ltrue]).sum()/len(ltrue)
183 |             testrms += cst.rmse(propTe-propTeRef)
184 |             trainrms += cst.rmse(propTr-propTrRef)
185 |             #if ntrue>0: truerms += np.sqrt(((krp[ltrue]-prop[ltrue])**2).sum()/len(ltrue))
186 |             testsup += cst.sup_e(propTe-propTeRef)
187 |             trainsup += cst.sup_e(propTr-propTrRef)
188 |             #if ntrue>0: truesup+=abs(krp[ltrue]-prop[ltrue]).max()
189 |             ctrain+=len(ltrain)
190 |             ctest+=len(ltest)
191 |             ctrue+=len(ltrue)
192 |             
193 | 
194 |             # for it,jt  in enumerate(ltrain):
195 |             #     print jt, propTrRef[it], propTr[it], "TRAIN" 
196 |             # for it,jt  in enumerate(ltest):
197 |             #     print jt, propTeRef[it], propTe[it], "TEST" 
198 |             
199 |             # print alpha
200 |             print 'Mu = {}'.format(mu)
201 |         
202 |         print "# KRR results ({:d} tests, {:f} training p., {:f} test p.): csi={}  sigma={:.2e} mu0={} Lambda={:.1f} epsilon={:.1e} eta={:.1e} "\
203 |         .format(ntests, ctrain/ntests, ctest/ntests, csi, KRRCortesParam['sigma'],KRRCortesParam['mu0'],KRRCortesParam['Lambda'],KRRCortesParam['epsilon'],KRRCortesParam['eta']) 
204 |         print "# Train points averages: MAE={:.4e}  RMSE={:.4e}  SUP={:.4e}".format(trainmae/ntests, trainrms/ntests, trainsup/ntests)
205 |         print "# Test points averages: MAE={:.4e}  RMSE={:.4e}  SUP={:.4e} ".format(testmae/ntests, testrms/ntests, testsup/ntests)
206 |         if len(ltrue) > 0: 
207 |             print "# True test points  MAE=%f  RMSE=%f  SUP=%f " % (truemae/ntests, truerms/ntests, truesup/ntests)
208 |     
209 |     if savevector:
210 |         bestRunIdx = np.argmin(testMAEs)
211 |         falpha = open(savevector+'.alpha','w')
212 |         fmu = open(savevector+'.mu','w')
213 |         kernelFilenamesStr = '';
214 |         for it,kernelFilename in enumerate(kernelFilenames): 
215 |             kernelFilenamesStr+=kernelFilename+' '
216 | 
217 |         commentline=' Train Vector from kernel matrix with the best MAE test score ('+str(np.min(testMAEs))+'): '+ kernelFilenamesStr +', and properties from '+ propFilename + ' selection mode: '+mode+' : Csi, sigma, mu0, Lambda, epsilon, eta = ' + str(csi) +' , '+ str(KRRCortesParam['sigma']) \
218 |                     +' , '+ str(mu0)+' , '+ str(KRRCortesParam['Lambda'])+' , '+ str(KRRCortesParam['epsilon'])+' , '+ str(KRRCortesParam['eta'])
219 |         np.savetxt(falpha,np.asarray([alphas[bestRunIdx], ltrains[bestRunIdx], rlabss[bestRunIdx][ltrains[bestRunIdx]]]).T,fmt=("%24.15e", "%10d", "%10d"),header=commentline)
220 |         np.savetxt(fmu,mus[bestRunIdx],fmt=("%24.15e"),header=commentline)
221 |         
222 |         # commentline=' Train Vector from kernel matrix with the best MAE test score ('+str(testMAEs[-1])+'): '+ kernelFilenamesStr +', and properties from '+ propFilename + ' selection mode: '+mode+' : Csi, sigma, mu0, Lambda, epsilon, eta = ' + str(csi) +' , '+ str(KRRCortesParam['sigma']) \
223 |         #             +' , '+ str(mu0)+' , '+ str(KRRCortesParam['Lambda'])+' , '+ str(KRRCortesParam['epsilon'])+' , '+ str(KRRCortesParam['eta'])
224 |         
225 |         # np.savetxt(falpha,np.asarray([alpha, ltrain, rlabs[ltrain]]).T,fmt=("%24.15e", "%10d", "%10d"),header=commentline)
226 |         # np.savetxt(fmu,mu,fmt=("%24.15e"),header=commentline)
227 |         falpha.close()
228 |         fmu.close()
229 | 
230 | if __name__ == '__main__':
231 |     parser = argparse.ArgumentParser(description="""Computes Multiple Kernel Learning KRR from Cortes and analytics based on a kernel matrix and a property vector.""")
232 |                            
233 |     parser.add_argument("kernels", nargs=1, help="Kernel matrices. List of coma separated file names.")      
234 |     parser.add_argument("props", nargs=1, help="Property file name.")
235 |     parser.add_argument("--mode", type=str, default="random", help="Train point selection (e.g. --mode all / sequential / random / fps / cur / manual")      
236 |     parser.add_argument("-f", type=float, default='0.5', help="Train fraction")
237 |     parser.add_argument("--truetest", type=float, default='0.0', help="Take these points out from the selection procedure")
238 |     parser.add_argument("--csi", type=str, default='', help="Kernel scaling. list of coma separated positive values (e.g. 1,1,1 )")
239 |     parser.add_argument("--sigma", type=float, default='1e-3', help="KRR regularization. In units of the properties. ")
240 |     parser.add_argument("--epsilon", type=float, default='2e-3', help="KRR-Mkl param. convergence tolerance on alpha weights absolute difference.")    
241 |     parser.add_argument("--Lambda", type=float, default='1', help="KRR-Mkl param. Radius of the ball containing the possible weights of the kernel combination, positive value")
242 |     parser.add_argument("--mu0", type=str, default='', help="KRR-Mkl param. Center of the ball containing the possible weights of the kernel combination, list of coma separated positive values (e.g. 1,1,1 )")
243 |     parser.add_argument("--maxIter", type=float, default='1e2', help="KRR-Mkl param. Maximal number of iteration. ")
244 |     parser.add_argument("--eta", type=float, default='0.5', help="KRR-Mkl param. Interpolation parameter for the update of alpha, belongs to ]0,1[. ")
245 |     parser.add_argument("--ntests", type=int, default='1', help="Number of tests")
246 |     parser.add_argument("--refindex",  type=str, default="", help="Structure indices of the kernel matrix (useful when dealing with a subset of a larger structures file)")        
247 |     parser.add_argument("--saveweights",  type=str, default="", help="Save the train-set weights vector in file")    
248 |     
249 |     args = parser.parse_args()
250 | 
251 |     kernelFilenames = args.kernels[0].split(',')
252 | 
253 |     a = args.mu0.split(',')
254 |     if len(a) != len(kernelFilenames):
255 |         raise ValueError("The number of kernel file names and elements of mu0 must be equal.")
256 |     mu0 = np.zeros(len(a),dtype=np.float64)
257 |     for it,item in enumerate(a):
258 |         mu0[it] = float(item)
259 | 
260 |     a = args.csi.split(',')
261 |     if len(a) != len(kernelFilenames):
262 |         raise ValueError("The number of kernel file names and elements of csi must be equal.")
263 |     csi = np.zeros(len(a),dtype=np.float64)
264 |     for it,item in enumerate(a):
265 |         csi[it] = float(item)
266 | 
267 |     KRRCortesParam = {'mu0':mu0,'epsilon':args.epsilon,'Lambda':args.Lambda,'eta':args.eta,
268 |             'maxIter':args.maxIter,'sigma':args.sigma}  
269 |     
270 |     
271 |     main(kernelFilenames=kernelFilenames, propFilename=args.props[0], mode=args.mode, 
272 |         trainfrac=args.f, csi=csi, ntests=args.ntests, refindex=args.refindex,
273 |         ttest=args.truetest,savevector=args.saveweights, **KRRCortesParam)
274 | 
275 | 


--------------------------------------------------------------------------------
/tools/cluster.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import argparse
  3 | import numpy as np
  4 | import sys
  5 | import numpy as np
  6 | import scipy.cluster.hierarchy as sc
  7 | import scipy.spatial.distance as sd
  8 | import itertools
  9 | from scipy.stats import kurtosis,skew
 10 | from scipy.stats.mstats import kurtosistest
 11 | from os.path import basename
 12 | try:
 13 |   from matplotlib import pyplot as plt
 14 | except:
 15 |   print "matplotlib is not available. You will not be able to plot"
 16 | 
 17 | from collections import Counter
 18 | def list_elements(Z, i, n):
 19 |    if Z[i,0]<n: l = [int(Z[i,0])]
 20 |    else: l = list_elements(Z,int(Z[i,0])-n,n)
 21 |    if Z[i,1]<n: r = [int(Z[i,1])]
 22 |    else: r = list_elements(Z,int(Z[i,1])-n,n)
 23 |    l.extend(r)
 24 |    return l
 25 | 
 26 | def main(distmatrixfile,dcut,mode='average',proplist='',plot=False,calc_sd=False,rect_matrixfile='',mtcia=False,verbose=False):
 27 |    project=False
 28 |    print "Loading similarity data"
 29 |    sim=np.loadtxt(distmatrixfile)
 30 |    if rect_matrixfile != '' :
 31 |       rect_matrix=np.loadtxt(rect_matrixfile)
 32 |       if (len(sim) != len(rect_matrix[0])):
 33 |          print "Inconsistent dimesion of rect matrix file"
 34 |          return
 35 |       project=True
 36 |    if proplist!='': prop=np.loadtxt(proplist)
 37 |    # maxes the distance matrix into its hadamard square
 38 |    print "Squaring the matrix"   
 39 |    sim*=sim
 40 |    print "Selecting the upper triangular bit"
 41 |    vsim = sd.squareform(sim,checks=False)
 42 |    print "Linking clusters"
 43 |    Z=sc.linkage(vsim,mode)   
 44 |    header="Cluster linkage matrix for distance matrix file: " + distmatrixfile +" clustering mode= " +mode
 45 |    linkfile=distmatrixfile[:-4]+"-cluster_linkage.dat"
 46 |    np.savetxt(linkfile,Z,header=header)
 47 | 
 48 |    n=len(sim)
 49 |    ncls = len(Z)
 50 |    pcls = np.zeros((ncls,2))
 51 |    for icls in xrange(ncls):
 52 |         # adjust linkage distance..
 53 |         Z[icls,2] = np.sqrt(Z[icls,2])
 54 |         lel = np.asarray(list_elements(Z,icls,n))
 55 |         ni = len(lel)
 56 |         subm = sim[np.ix_(lel, lel)]
 57 |         dsum = np.sum(subm,axis=0)
 58 |         
 59 |         imin = np.argmin(dsum)
 60 |         dmin = np.sqrt(dsum[imin]/ni)
 61 |         
 62 |         pcls[icls] = [lel[imin], dmin]
 63 |         print icls, Z[icls], pcls[icls]
 64 |    if mtcia : 
 65 |        mathematica_cluster(Z,pcls,n,'cluster-mathematica.dat')
 66 | 
 67 |    print "Printing diagnostics"
 68 |    cdist=Z[:,2]
 69 |    if verbose : 
 70 |      np.savetxt('linkage.dat',Z)
 71 |      np.savetxt('dist.dat',cdist)
 72 |    nclust=estimate_ncluster(cdist,dcut)
 73 |    print "Estimated ncluster:",nclust
 74 |    print "mean+std, cutoffdist", np.sqrt(np.var(Z[:,2]))+np.mean(Z[:,2]),(Z[n-nclust,2])
 75 |    clist=sc.fcluster(Z,nclust,criterion='maxclust')
 76 |    c_count=Counter(clist)
 77 |    print "Number of clusters", len(c_count)
 78 |    print "nconfig     meand    variance   rep_config  Kurtosis skewness  Multimodal "
 79 |    rep_ind=[]
 80 |    structurelist=[]
 81 |    for iclust in range(1,len(c_count)+1):  #calculate mean dissimilary and pick representative structure for each cluster
 82 |       indices = [i for i, x in enumerate(clist) if x == iclust] #indices for cluster i
 83 |       nconf=len(indices)
 84 |       structurelist.append(indices)
 85 |       sumd=0.0
 86 |       kurt=0.0
 87 |       skewness=0.0
 88 |       multimodal=False
 89 |       if proplist!='' and len(indices)>1 :
 90 |             kurt= kurtosis(prop[indices],fisher=False)
 91 |             skewness=skew(prop[indices])
 92 |             if kurt !=0 : modc=(skewness*skewness+1)/kurt
 93 |             if modc >(5.0/9.0) :
 94 |                multimodal=True
 95 |       #calculate mean dissimilarity in each group
 96 |       for iconf in range(len(indices)):
 97 |          ind1=indices[iconf]
 98 |          for jconf in range(len(indices)):
 99 |            ind2=indices[jconf]
100 |            sumd+=sim[ind1][ind2]
101 |       meand=np.sqrt(sumd/(nconf*nconf))
102 |       
103 |       # pick the configuration with min mean distance variance in the group
104 |       minvar=1e100
105 |       var=0.0
106 |       for iconf in range(len(indices)):
107 |         ivar=0.0
108 |         ind1=indices[iconf]
109 |         for jconf in range(len(indices)):
110 |           ind2=indices[jconf]
111 |           ivar+=sim[ind1][ind2]**2
112 |         ivar=ivar/nconf
113 |         var+=ivar  
114 |         if(ivar<minvar):  
115 |           minvar=ivar
116 |           iselect=ind1
117 |       var=var/nconf  
118 |       rep_ind.append(iselect)
119 |       print nconf, meand, np.sqrt(var), iselect, minvar, kurt, skewness, multimodal
120 | #   print rep_ind
121 | 
122 | 
123 |    print "index of clusters"
124 |    for i in structurelist:
125 |       print "index=",i
126 |    filename=basename(distmatrixfile)+'-cluster.index'
127 |    f=open(filename,"w")
128 |    f.write(" # groupid representative \n ")
129 |    for i in range(len(sim)):
130 |       iselect=0
131 |       if i in rep_ind: iselect=2
132 |       f.write("%d   %d \n " %(clist[i]-1,  iselect)) 
133 |    f.close()
134 |    if(project):
135 |      project_groupid,project_rep=project_config(clist,rect_matrix,rep_ind)
136 |      filename=basename(rect_matrixfile)+'-cluster.index'
137 |      f=open(filename,"w")
138 |      f.write("groupid representative \n ")
139 |      for i in range(len(project_groupid)):
140 |         iselect=0
141 |         if i in project_rep: iselect=2
142 |         f.write("%d   %d \n " %(project_groupid[i]-1,  iselect)) 
143 |      f.close()
144 |    if (calc_sd):
145 |      filename= filename=basename(distmatrixfile)+'-cluster-sd.dat'
146 |      f=open(filename,"w")
147 |      f.write("dist_sd ")
148 |      if proplist!='':f.write("prop_sd ")
149 |      f.write("representative config")
150 |      f.write("\n")
151 |      sim_sd,rep_index=dissimilarity_sd(Z,sim) 
152 |      if proplist!='': psd=prop_sd(Z,prop,verbose)
153 |      for i in range(len(Z)):
154 |          f.write("%f" %(sim_sd[i]))
155 |          if proplist!='':f.write("   %f" %(psd[i]))
156 |          f.write("  %d" %(rep_index[i]))
157 |          f.write("\n")
158 |    if plot: 
159 |         filename=basename(distmatrixfile)+'-dendogram.eps'
160 |         plotdendro(Z,nclust,filename,rep_ind)
161 | 
162 | def plotdendro(Z,ncluster,filename,rep_ind):
163 |   plt.figure(figsize=(10, 15))
164 |   plt.title('Hierarchical Clustering Dendrogram')
165 |   plt.xlabel('sample index')
166 |   plt.ylabel('distance')
167 |   d=sc.dendrogram(Z,truncate_mode='lastp', p=ncluster,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False)
168 | #  coord = np.c_[np.array(d['icoord'])[:,1:3],np.array(d['dcoord'])[:,1]]
169 | #  coord = coord[np.argsort(coord[:,2])]
170 |   num=ncluster-1
171 |   coord=[]
172 |   for i in range(len(d['icoord'])):
173 |     if d['dcoord'][i][0]==0.0 :
174 |      coord.append(d['icoord'][i][0])
175 |   for i in range(len(d['icoord'])):
176 |     if d['dcoord'][i][3]==0.0 :
177 |      coord.append(d['icoord'][i][3])
178 |   #print d['leaves']
179 |   #return
180 |   #for posi in coord:
181 |   # x = posi
182 |   #  y = 0.05
183 |   #  plt.plot(x, y, 'ro')
184 |   #  plt.annotate("%2i" % rep_ind[num], (x, y), xytext=(0, -8),
185 |   #               textcoords='offset points',
186 |   #               va='top', ha='center')
187 |   #  num = num-1
188 |   #plt.show()
189 |   
190 |   plt.savefig(filename, dpi=100, facecolor='w', edgecolor='w',
191 |         orientation='portrait', papertype='letter', format=None,
192 |         transparent=True, bbox_inches=None, pad_inches=0.1,
193 |         frameon=None)
194 |       
195 | def project_config(clusterlist,rect_matrix,rep_ind):
196 |   nland=len(rect_matrix[0])
197 |   if nland != len(clusterlist) : 
198 |      print "Dimension Mismatch for rect matrix" 
199 |      stop 
200 |   n=len(rect_matrix)
201 |   groupid=[]
202 |   for i in range(n):
203 |     mind=10
204 |     for j  in range(nland): # find which cluster it belongs 
205 |         d=rect_matrix[i][j]
206 |         if d <mind : 
207 |             mind=d #find min distance config from config from  all clusters 
208 |             icluster_select=clusterlist[j]
209 |     groupid.append(icluster_select)
210 |   project_rep=[]
211 |   for iconfig in rep_ind: 
212 |     mind=np.min(rect_matrix[:,iconfig])
213 |     if (mind <1E-9):
214 |       iselect=np.argmin(rect_matrix[:,iconfig])               
215 |       project_rep.append(iselect)
216 |   return(groupid,project_rep)
217 | 
218 | def mathematica_cluster(Z,pcls,n,fname):
219 |    clusterlist=[]
220 |    nlist=[]
221 |    for i in range(len(Z)):
222 |     id1=int(Z[i,0])
223 |     id2=int(Z[i,1])
224 |     if((id1 < n) and (id2<n)):  # when two configurations are merged note their index
225 |        # in mathematica cluster index should start from 1 so '+1'
226 |        clusterlist.append([int(id1+1),int(id2+1),'{:.8f}'.format(Z[i,2]),1,1,int(id1+1),'{:.8f}'.format(Z[i,2])])
227 |        nlist.append(2)
228 |        #ncluster+=1
229 |     else:
230 |       cl=[]
231 |       icount=0
232 |       if id1>=n:  # this means merging is happening with previously formed cluster
233 |         icluster=int(id1)-n
234 |    #     for x in clusterlist[icluster]: #we already have the list for the old cluster
235 |         cl.append(clusterlist[icluster])
236 |         n1=nlist[icluster]
237 |       else:
238 |         cl.append(id1+1)
239 |         n1=1
240 |       if id2>=n: # same logic as before
241 |         icluster=int(id2)-n
242 |        # for x in clusterlist[icluster]:
243 |         cl.append(clusterlist[icluster])
244 |         n2=nlist[icluster]
245 |       else:
246 |         cl.append(id2+1)
247 |         n2=1
248 |       cl.append('{:.8f}'.format(Z[i,2]))
249 |       cl.append(n1)
250 |       cl.append(n2)
251 |       cl.append(int(pcls[i,0])+1)
252 |       cl.append(pcls[i,1])
253 |    #   tmp='Cluster'+str(cl)
254 |    #   tmp.replace("'","")
255 |    #   clusterlist.append(tmp)
256 |       clusterlist.append(cl)
257 |       nlist.append(n1+n2)
258 |    
259 |    # get the final nested cluster structure and put
260 |    # the mathematica Cluster statement
261 |    clusterliststr = str(clusterlist[n-2])
262 |    clusterliststr = clusterliststr.replace("[","XCluster[")
263 |    clusterliststr = clusterliststr.replace("'","")
264 |    # print a.replace("[","Cluster[")
265 |    fmathematica=open(fname,'w')
266 |    fmathematica.write(clusterliststr)
267 |    fmathematica.close()
268 | 
269 |    return
270 | 
271 | 
272 | def dissimilarity_sd(Z,sim):
273 |   n=len(sim)
274 |   clusterlist=[]
275 |   ncluster=0
276 |   sdlist=[]
277 |   rep_index=[]
278 |   for i in range(len(Z)):
279 |     id1=int(Z[i,0])
280 |     id2=int(Z[i,1])
281 |     if((id1 < n) and (id2<n)):  # when two configurations are merged note their index
282 |        clusterlist.append([id1,id2])
283 |        ncluster+=1
284 |     else:
285 |       cl=[]
286 |       icount=0
287 |       if id1>=n:  # this means merging is happening with previously formed cluster
288 |         icluster=int(id1)-n
289 |         for x in clusterlist[icluster]: #we already have the list for the old cluster
290 |           cl.append(x)
291 |       else:cl.append(id1)
292 |       if id2>=n: # same logic as before
293 |         icluster=int(id2)-n
294 |         for x in clusterlist[icluster]:
295 |           cl.append(x)
296 |       else:cl.append(id2)
297 |       clusterlist.append(cl) # append the index of the members at this stage of clustering 
298 | #   calculate mean dissimilarity of the cluster
299 |     sumd=0.0
300 |     icount=0
301 |     for iconf in range(len(clusterlist[i])):
302 |         ind1=clusterlist[i][iconf]
303 |         for jconf in range(iconf):
304 |           ind2=clusterlist[i][jconf]
305 |           sumd+=sim[ind1][ind2]
306 |           icount+=1
307 |     meand=sumd/icount
308 | #   calculate variance and sd
309 |     var=0.0
310 |     icount=0
311 |     minvar=9999
312 |     for iconf in range(len(clusterlist[i])):
313 |         ind1=clusterlist[i][iconf]
314 |         ivar=0.0
315 |         for jconf in range(len(clusterlist[i])):
316 |           ind2=clusterlist[i][jconf]
317 |           ivar+=(sim[ind1][ind2]-meand)**2
318 |         ivar=ivar/(len(clusterlist[i])-1)
319 |         var+=ivar
320 |         icount+=1
321 |         if(ivar<minvar):
322 |             minvar=ivar
323 |             iselect=ind1
324 |     rep_index.append(iselect)
325 |     var=var/(icount)
326 |     sd=np.sqrt(var)
327 |     sdlist.append(sd)
328 |   return sdlist,rep_index
329 | #    print len(clusterlist[i]),meand,var,sd,iselect,Z[i,2]
330 | #  print "clusters:", nl-elbow+2
331 | 
332 | def estimate_ncluster(dist,dcut):
333 |   n=len(dist)
334 |   if dcut>=0.0 : 
335 |     for i in range(n):
336 |       if dist[i]>dcut : 
337 |           nclust=n-i
338 |           break
339 | 
340 |   else:
341 |     b=[n-1,dist[n-1]-dist[0]]
342 |     b=np.array(b)
343 |     b=b/np.linalg.norm(b)
344 |     dmax=0.0
345 |     for i in range(n):
346 |       p=[n-1-i,dist[n-1]-dist[i]]
347 |       d=np.linalg.norm(p-np.dot(p,b)*b)
348 |       if d>dmax :
349 |          elbow=i
350 |          dmax=d
351 |     dcut=dist[elbow]*1.2
352 |     print "estimated dcut=",dcut
353 |     for j in range(n):
354 |       if dist[j]>dcut : 
355 |             nclust=n-j
356 |             break
357 |   return nclust
358 | 
359 | 
360 | def prop_sd(Z,prop,verbose):
361 |   n=len(prop)
362 |   clusterlist=[]
363 |   ncluster=0
364 |   sdlist=[]
365 |   if verbose : f=open('clusterlist.dat','w')
366 |   for i in range(len(Z)):
367 |     id1=int(Z[i,0])
368 |     id2=int(Z[i,1])
369 |     if((id1 < n) and (id2<n)):  # when two configurations are merged note their index
370 |        clusterlist.append([id1,id2])
371 |        ncluster+=1
372 |     else:
373 |       cl=[]
374 |       icount=0
375 |       if id1>=n:  # this means merging is happening with previously formed cluster
376 |         icluster=int(id1)-n
377 |         for x in clusterlist[icluster]: #we already have the list for the old cluster
378 |           cl.append(x)
379 |       else:cl.append(id1)
380 |       if id2>=n: # same logic as before
381 |         icluster=int(id2)-n
382 |         for x in clusterlist[icluster]:
383 |           cl.append(x)
384 |       else:cl.append(id2)
385 |       clusterlist.append(cl) # append the index of the members at this stage of clustering 
386 | #   calculate mean dissimilarity of the cluster
387 |     sumd=0.0
388 |     icount=0
389 | #   calculate variance and sd
390 |     sd=np.std(prop[clusterlist[i]])
391 |     if verbose: 
392 |       f.write(" %s " %str(clusterlist[i]))
393 |       f.write("\n")
394 |     sdlist.append(sd)
395 |   return sdlist
396 | #    print len(clusterlist[i]),meand,var,sd,iselect,Z[i,2]
397 | #  print "clusters:", nl-elbow+2
398 | 
399 | 
400 | if __name__ == '__main__':
401 |     parser = argparse.ArgumentParser(description="""Computes KRR and analytics based on a kernel matrix and a property vector.""")
402 | 
403 |     parser.add_argument("sim", nargs=1, help="Kernel matrix")
404 |     parser.add_argument("--mode", type=str, default="average", help="Linkage mode (e.g. --mode average/single/complete/median/centroid")
405 |     parser.add_argument("--dcut", type=float, default='0', help="distance cutoff to cut the dendrogram. if dcut=0 then it is autamaticlly estimated")
406 |     parser.add_argument("--prop", type=str, default='', help="property file")
407 |     parser.add_argument("--plot",  action="store_true", help="Plot the dendrogram")
408 |     parser.add_argument("--calc_sd",  action="store_true", help="calculate standard div of the dist and prop for all level of clustering")
409 |     parser.add_argument("--project",  type=str,default='', help="Project configurations using Rect Dist Matrix file")
410 |     parser.add_argument("--mathematica",  action="store_true", help="export the cluster object in Mathematica format")
411 |     parser.add_argument("--verbose",  action="store_true", help="increase output informations. write multiple files")
412 | 
413 |     args = parser.parse_args()
414 |     main(args.sim[0],args.dcut,mode=args.mode,proplist=args.prop,plot=args.plot,calc_sd=args.calc_sd,rect_matrixfile=args.project,mtcia=args.mathematica,verbose=args.verbose)
415 | 
416 | 


--------------------------------------------------------------------------------
/glosoapAlchemy.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import quippy as qp
  4 | import numpy  as np
  5 | import argparse
  6 | 
  7 | 
  8 | 
  9 | def atomicno_to_sym(atno):
 10 |   pdict={1: 'H', 2: 'He', 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', 19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn', 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr', 41: 'Nb', 42: 'Mo', 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn', 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', 59: 'Pr', 60: 'Nd', 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb', 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg', 81: 'Tl', 82: 'Pb', 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th', 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', 99: 'Es', 100: 'Fm', 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Ha', 106: 'Sg', 107: 'Ns', 108: 'Hs', 109: 'Mt', 110: 'Unn', 111: 'Unu'}
 11 |   return pdict[atno]
 12 | 
 13 | def get_spkitMax(frames):
 14 |     '''
 15 |     Get the set of species their maximum number across frames.
 16 | 
 17 |     :param frames: list of quippy frames object
 18 |     :return: Dictionary with species as key and return its
 19 |                 largest number of occurrence
 20 |     '''
 21 |     spkitMax = {}
 22 | 
 23 |     for frame in frames:
 24 |         atspecies = {}
 25 |         for z in frame.z:
 26 |             if z in atspecies:
 27 |                 atspecies[z] += 1
 28 |             else:
 29 |                 atspecies[z] = 1
 30 | 
 31 |         for (z, nz) in atspecies.iteritems():
 32 |             if z in spkitMax:
 33 |                 if nz > spkitMax[z]: spkitMax[z] = nz
 34 |             else:
 35 |                 spkitMax[z] = nz
 36 | 
 37 |     return spkitMax
 38 | 
 39 | def get_spkit(frame):
 40 |     '''
 41 |     Get the set of species their number across frame.
 42 | 
 43 |     :param frame: One quippy frames object
 44 |     :return:
 45 |     '''
 46 |     spkit = {}
 47 |     for z in frame.z:
 48 |         if z in spkit:
 49 |             spkit[z]+=1
 50 |         else:
 51 |             spkit[z] = 1
 52 |     return spkit
 53 | 
 54 | 
 55 | def get_soap(frame, spkit, spkitMax, centerweight=1., gaussian_width=0.5,
 56 |              cutoff=5.0, cutoff_transition_width=0.5, nmax=8, lmax=6):
 57 |     '''
 58 |     Get the soap vectors (power spectra) for each frameic environments in frame.
 59 | 
 60 |     :param frame: A quippy atomsList object
 61 |     :param spkit: Dictionary with specie as key and number of corresponding frame as item.
 62 |                     Returned by get_spkit(frame).
 63 |     :param spkitMax: Dictionary with species as key and return its largest number of occurrence.
 64 |                         Returned by get_spkitMax(frames) .
 65 |     :param centerweight: Center atom weight
 66 |     :param gaussian_width: Atom Gaussian std
 67 |     :param cutoff: Cutoff radius for each atomic environment in the unit of cell and positions.
 68 |     :param cutoff_transition_width: Steepness of the smooth environmental cutoff radius. Smaller -> steeper
 69 |     :param nmax: Number of radial basis functions.
 70 |     :param lmax: Number of Spherical harmonics.
 71 |     :return: Soap vectors of Atoms quippy class. Dictionary (keys:atomic number of the central atom,
 72 |                 items: list of power spectra for each central atom
 73 |                         with corresponding atomic number )
 74 |     '''
 75 |     zsp = spkitMax.keys()
 76 |     zsp.sort()
 77 |     lspecies = 'n_species=' + str(len(zsp)) + ' species_Z={ '
 78 |     for z in zsp:
 79 |         lspecies = lspecies + str(z) + ' '
 80 |     lspecies = lspecies + '}'
 81 | 
 82 |     frame.set_cutoff(cutoff)
 83 |     frame.calc_connect()
 84 | 
 85 |     soap = {}
 86 |     for (z, nz) in spkit.iteritems():
 87 |         soapstr = "soap central_reference_all_species=F central_weight=" + str(centerweight)+\
 88 |                   "  covariance_sigma0=0.0 atom_sigma=" + str(gaussian_width) +\
 89 |                   " cutoff=" + str(cutoff) + \
 90 |                   " cutoff_transition_width=" + str(cutoff_transition_width) + \
 91 |                   " n_max=" + str(nmax) + " l_max=" + str(lmax) + ' ' + lspecies +\
 92 |                   ' Z=' + str(z)
 93 | 
 94 |         desc = qp.descriptors.Descriptor(soapstr)
 95 | 
 96 |         sps = desc.calc(frame)["descriptor"]
 97 |         soap[z] = sps
 98 | 
 99 |     return soap
100 | 
101 | 
102 | def Soap2AlchemySoap(rawsoap, spkit, nmax, lmax):
103 |     '''
104 |     Convert the soap vector of an environment from quippy descriptor to soap vectors
105 |      with chemical channels.
106 | 
107 |     :param rawsoap: numpy array dim:(N,) containing the soap vector of one environment
108 |     :param spkit: Dictionary with specie as key and number of corresponding atom as item.
109 |                     Returned by get_spkit(frame).
110 |     :param nmax: Number of radial basis functions.
111 |     :param lmax: Number of Spherical harmonics.
112 |     :return: Dictionary  (keys: species tuples (sp1,sp2),
113 |                             items: soap vector, numpy array dim:(nmax ** 2 * (lmax + 1),) )
114 |     '''
115 |     # spkit keys are the center species in the full frame
116 |     zspecies = sorted(spkit.keys())
117 |     nspecies = len(spkit.keys())
118 | 
119 |     alchemySoap = {}
120 |     ipair = {}
121 |     # initialize the alchemical soap
122 |     for s1 in xrange(nspecies):
123 |         for s2 in xrange(
124 |                 nspecies):  # range(s1+1): we actually need to store also the reverse pairs if we want to go alchemical
125 |             alchemySoap[(zspecies[s2], zspecies[s1])] = np.zeros(nmax ** 2 * (lmax + 1), float)
126 |             ipair[(zspecies[s2], zspecies[s1])] = 0
127 | 
128 |     isoap = 0
129 |     isqrttwo = 1.0 / np.sqrt(2.0)
130 | 
131 |     # selpair and revpair are modified and in turn modify soaps because they are all pointing at the same memory block
132 |     for s1 in xrange(nspecies):
133 |         for n1 in xrange(nmax):
134 |             for s2 in xrange(s1 + 1):
135 |                 selpair = alchemySoap[(zspecies[s2], zspecies[s1])]
136 |                 # we need to reconstruct the spectrum for the inverse species order, that also swaps n1 and n2.
137 |                 # This is again only needed to enable alchemical combination of e.g. alpha-beta and beta-alpha. Shit happens
138 |                 revpair = alchemySoap[(zspecies[s1], zspecies[s2])]
139 |                 isel = ipair[(zspecies[s2], zspecies[s1])]
140 |                 for n2 in xrange(nmax if s2 < s1 else n1 + 1):
141 |                     for l in xrange(lmax + 1):
142 |                         # print s1, s2, n1, n2, isel, l+(self.lmax+1)*(n2+self.nmax*n1), l+(self.lmax+1)*(n1+self.nmax*n2)
143 |                         # selpair[isel] = rawsoap[isoap]
144 |                         if (s1 != s2):
145 |                             selpair[isel] = rawsoap[
146 |                                                 isoap] * isqrttwo  # undo the normalization since we will actually sum over all pairs in all directions!
147 |                             revpair[l + (lmax + 1) * (n1 + nmax * n2)] = selpair[isel]
148 |                         else:
149 |                             # diagonal species (s1=s2) have only half of the elements.
150 |                             # this is tricky. we need to duplicate diagonal blocks "repairing" these to be full.
151 |                             # this is necessary to enable alchemical similarity matching, where we need to combine
152 |                             # alpha-alpha and alpha-beta environment fingerprints
153 |                             selpair[l + (lmax + 1) * (n2 + nmax * n1)] = rawsoap[isoap] * (1 if n1 == n2 else isqrttwo)
154 |                             selpair[l + (lmax + 1) * (n1 + nmax * n2)] = rawsoap[isoap] * (1 if n1 == n2 else isqrttwo)
155 |                         # selpair[l + (lmax + 1) * (n2 + nmax * n1)] = selpair[l + (lmax + 1) * (n1 + nmax * n2)]  \
156 |                         #                                                                                                   =  rawsoap[isoap] * (1 if n1 == n2 else isqrttwo)
157 |                         isoap += 1
158 |                         isel += 1
159 |                 ipair[(zspecies[s2], zspecies[s1])] = isel
160 | 
161 |     return alchemySoap
162 | 
163 | 
164 | def get_Soaps(frames,chem_channels=False, centerweight=1.0, gaussian_width=0.5, cutoff=3.5,
165 |                      cutoff_transition_width=0.5 , nmax=8, lmax=6):
166 |     '''
167 |     Compute the SOAP vectors for each atomic environment in frames and
168 |     reorder them into chemical channels.
169 | 
170 |     :param frames: list of quippy frames object
171 |     :param centerweight: Center atom weight
172 |     :param gaussian_width: Atom Gaussian std
173 |     :param cutoff: Cutoff radius for each atomic environment in the unit of cell and positions.
174 |     :param cutoff_transition_width: Steepness of the smooth environmental cutoff radius. Smaller -> steeper
175 |     :param nmax: Number of radial basis functions.
176 |     :param lmax: Number of Spherical harmonics.
177 |     :return: Nested List/Dictionary: list->frames,
178 |                 dict->(keys:atomic number,
179 |                 items:list of atomic environment), list->atomic environment,
180 |                 dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number
181 |                       inside the atomic environment),
182 |                        items: SOAP vector, flat numpy array)
183 |     '''
184 | 
185 |     Soaps = []
186 |     # get the set of species their maximum number across frames
187 |     spkitMax = get_spkitMax(frames)
188 | 
189 |     for frame in frames:
190 | 
191 |         # to avoid side effect due to pointers
192 |         atm = frame.copy()
193 |         # get the set of species their number across atom
194 |         spkit = get_spkit(atm)
195 |         # get the soap vectors (power spectra) for each atomic environments in atm
196 |         rawsoaps = get_soap(atm, spkit, spkitMax, centerweight, gaussian_width,
197 |                             cutoff, cutoff_transition_width, nmax, lmax)
198 | 
199 |         # chemical channel separation for each central atom species
200 |         # and each atomic environment
201 |         if chem_channels:
202 |             alchemySoap = {}
203 |             for (z, soap) in rawsoaps.iteritems():
204 |                 Nenv, Npowerspectrum = soap.shape
205 |                 lsp = []
206 |                 # loop over the local environments of specie z
207 |                 for it in xrange(Nenv):
208 |                     # soap[it] is (1,Npowerspectrum) so need to transpose it
209 |                     #  convert the soap vector of an environment from quippy descriptor to soap vectors
210 |                     # with chemical channels.
211 |                     lsp.append(Soap2AlchemySoap(soap[it].T, spkit, nmax, lmax))
212 |                 # gather list of environment over the atomic number
213 |                 alchemySoap[z] = lsp
214 |             # gather soaps over the atom
215 |             Soaps.append(alchemySoap)
216 |         # out put rawSoap
217 |         else:
218 |             Soaps.append(rawsoaps)
219 | 
220 |     return Soaps
221 | 
222 | def get_AvgSoaps(frames, chem_channels=False, centerweight=1.0, gaussian_width=0.5, cutoff=3.5,
223 |                  cutoff_transition_width=0.5, nmax=8, lmax=6):
224 |     '''
225 |     Compute the average SOAP vectors for each atomic environment in frames and
226 |     reorder them into chemical channels.
227 | 
228 |     :param frames: list of quippy frames object
229 |     :param centerweight: Center atom weight
230 |     :param gaussian_width: Atom Gaussian std
231 |     :param cutoff: Cutoff radius for each atomic environment in the unit of cell and positions.
232 |     :param cutoff_transition_width: Steepness of the smooth environmental cutoff radius. Smaller -> steeper
233 |     :param nmax: Number of radial basis functions.
234 |     :param lmax: Number of Spherical harmonics.
235 |     :return: Nested List/Dictionary: list->frames,
236 |                 dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number
237 |                       inside the atomic environment),
238 |                        items: SOAP vector, flat numpy array)
239 |     '''
240 |     AvgSoaps = []
241 |     # get the set of species their maximum number across frames
242 |     spkitMax = get_spkitMax(frames)
243 |     for frame in frames:
244 |         # to avoid side effect due to pointers
245 |         atm = frame.copy()
246 |         # get the set of species their number across atom
247 |         spkit = get_spkit(atm)
248 |         # get the soap vectors (power spectra) for each atomic environments in atm
249 |         rawsoaps = get_soap(atm, spkit, spkitMax, centerweight, gaussian_width,
250 |                             cutoff, cutoff_transition_width, nmax, lmax)
251 |         # compute the average soap over an atomic environment (creates a matrix with each line a soap
252 |         # vector of one atomic environement and then average over the rows)
253 |         avgrawsoap = np.concatenate(rawsoaps.values(), axis=0).mean(axis=0)
254 | 
255 |         # chemical channel separation for each each atomic environment
256 |         if chem_channels:
257 |             AvgSoaps.append(Soap2AlchemySoap(avgrawsoap, spkit, nmax, lmax))
258 |         # output average rawSoaps
259 |         else:
260 |             AvgSoaps.append(avgrawsoap)
261 |     return AvgSoaps
262 | 
263 | 
264 | def get_AvgDeltaKernel(AvgRawSoapListA, AvgRawSoapListB):
265 |     return np.dot(np.array(AvgRawSoapListA),np.array(AvgRawSoapListB).T)
266 | 
267 | def get_AvgDeltaSim(AvgSoapA, AvgSoapB, chem_channels=False):
268 |     if chem_channels:
269 |         AvgDeltaSim = 0
270 |         for spA in AvgSoapA:
271 |             for spB in AvgSoapB:
272 |                 if np.all(spA != spB):
273 |                     continue
274 |                 elif np.all(spA == spB):
275 |                     AvgDeltaSim += np.vdot(AvgSoapA[spA], AvgSoapB[spB])
276 |     else:
277 |         AvgDeltaSim = np.vdot(AvgSoapA, AvgSoapB)
278 | 
279 |     return AvgDeltaSim
280 | 
281 | def get_ChemDelta(alchemyAvgSoapA,alchemyAvgSoapB):
282 |     chemicalSim = {}
283 |     for spA in alchemyAvgSoapA:
284 |         for spB in alchemyAvgSoapB:
285 |             if np.all(spA == spB):
286 |                 chemicalSim[spA + spB] = 1.
287 |             else:
288 |                 chemicalSim[spA + spB] = 0.
289 |     return chemicalSim
290 | 
291 | def get_AvgSim(alchemyAvgSoapA, alchemyAvgSoapB, chemicalSimGen):
292 |     chemicalSim = chemicalSimGen(alchemyAvgSoapA, alchemyAvgSoapB)
293 |     AvgSim = 0
294 |     for spA in alchemyAvgSoapA:
295 |         for spB in alchemyAvgSoapB:
296 |             theta = chemicalSim[spA + spB]
297 |             if theta == 0.:
298 |                 continue
299 |             else:
300 |                 AvgSim += theta * np.vdot(alchemyAvgSoapA[spA], alchemyAvgSoapB[spB])
301 | 
302 |     return AvgSim
303 | 
304 | 
305 | 
306 | 
307 | def dumpAlchemySoapstxt(alchemySoaps,fout):
308 |     '''
309 |     Print in text format the alchemySoaps using the same format as in glosim --verbose
310 | 
311 |     :param alchemySoaps: Nested List/Dictionary: list->frames,
312 |                 dict->(keys:atomic number,
313 |                 items:list of atomic environment), list->atomic environment,
314 |                 dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number
315 |                       inside the atomic environment),
316 |                        items: SOAP vector, flat numpy array)
317 |     :param fout: Writable python io object
318 |     :return: None
319 |     '''
320 |     for iframe, alchemySoap in enumerate(alchemySoaps):
321 |         fout.write("# Frame %d \n" % (iframe))
322 | 
323 |         for zatom, soapEnvList in alchemySoap.iteritems():
324 |             for ienv, soapEnv in enumerate(soapEnvList):
325 |                 fout.write("# Species %d Environment %d \n" % (zatom, ienv))
326 |                 for (sp1, sp2), soap in soapEnv.iteritems():
327 |                     fout.write("%d %d   " % (sp1, sp2))
328 |                     for sj in soap:
329 |                         fout.write("%8.4e " % (sj))
330 |                     fout.write("\n")
331 | 
332 | def dumpAlchemySoapspickle(alchemySoaps, fout):
333 |     '''
334 |     Dump alchemySoaps in pickle binary format. Read with pck.load(filename)
335 | 
336 |     :param alchemySoaps: Nested List/Dictionary: list->frames,
337 |                 dict->(keys:atomic number,
338 |                 items:list of atomic environment), list->atomic environment,
339 |                 dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number
340 |                       inside the atomic environment),
341 |                        items: SOAP vector, flat numpy array)
342 |     :param fout: Writable python io object
343 |     :return: None
344 |     '''
345 |     import cPickle as pck
346 |     pck.dump(alchemySoaps,fout,protocol=pck.HIGHEST_PROTOCOL)
347 | 
348 | if __name__ == '__main__':
349 |     parser = argparse.ArgumentParser(description="""Computes the SOAP vectors of a list of atomic frame 
350 |             and differenciate the chemical channels. Ready for alchemical kernel.""")
351 | 
352 |     parser.add_argument("filename", nargs=1, help="Name of the LibAtom formatted xyz input file")
353 |     parser.add_argument("-n", type=int, default='8', help="Number of radial functions for the descriptor")
354 |     parser.add_argument("-l", type=int, default='6', help="Maximum number of angular functions for the descriptor")
355 |     parser.add_argument("-c", type=float, default='5.0', help="Radial cutoff")
356 |     parser.add_argument("-cotw", type=float, default='0.5', help="Cutoff transition width")
357 |     parser.add_argument("-g", type=float, default='0.5', help="Atom Gaussian sigma")
358 |     parser.add_argument("-cw", type=float, default='1.0', help="Center atom weight")
359 |     parser.add_argument("-prefix", type=str, default='', help="Prefix for output files (defaults to input file name)")
360 |     parser.add_argument("-first", type=int, default='0', help="Index of first frame to be read in")
361 |     parser.add_argument("-last", type=int, default='0', help="Index of last frame to be read in")
362 |     parser.add_argument("-outformat", type=str, default='pickle', help="Choose how to dump the alchemySoaps, e.g. pickle (default) or text (same as from glosim --verbose)")
363 | 
364 |     args = parser.parse_args()
365 | 
366 |     filename = args.filename[0]
367 |     prefix = args.prefix
368 |     centerweight = args.cw
369 |     gaussian_width = args.g
370 |     cutoff = args.c
371 |     cutoff_transition_width = args.cotw
372 |     nmax = args.n
373 |     lmax = args.l
374 |     first = args.first if args.first>0 else None
375 |     last = args.last if args.last>0 else None
376 | 
377 |     if args.outformat in ['text','pickle']:
378 |         outformat = args.outformat
379 |     else:
380 |         raise Exception('outformat is not recognised')
381 | 
382 | 
383 | 
384 |     if prefix=="": prefix=filename
385 |     if prefix.endswith('.xyz'): prefix=prefix[:-4]
386 |     prefix += "-n"+str(nmax)+"-l"+str(lmax)+"-c"+str(cutoff)+\
387 |              "-g"+str(gaussian_width)+ "-cw"+str(centerweight)+ \
388 |              "-cotw" +str(cutoff_transition_width)
389 | 
390 |     print  "using output prefix =", prefix
391 |     # Reads input file using quippy
392 |     print "Reading input file", filename
393 | 
394 |     # Reads the file and create a list of quippy frames object
395 |     frames = qp.AtomsList(filename, start=first, stop=last)
396 | 
397 |     alchemySoaps = get_Soaps(frames, centerweight=centerweight, gaussian_width=gaussian_width, cutoff=cutoff,
398 |                      cutoff_transition_width=cutoff_transition_width, nmax=nmax, lmax=lmax,chem_channels=True)
399 | 
400 | 
401 |     if outformat == 'text':
402 |         with open(prefix + "-soap.dat", "w") as fout:
403 |             dumpAlchemySoapstxt(alchemySoaps, fout)
404 |     elif outformat == 'pickle':
405 |         with open(prefix + "-soap.pck", "w") as fout:
406 |             dumpAlchemySoapspickle(alchemySoaps, fout)
407 | 
408 | 
409 | 


--------------------------------------------------------------------------------
/example/sim.ref:
--------------------------------------------------------------------------------
 1 | # Similarity matrix for ['mol-50.xyz']. Cutoff: 3.500000  Nmax: 12  Lmax: 6  Atoms-sigma: 0.500000  Mu: 0.000000  Central-weight: 1.000000  Periodic: False  Distance: nkdistance  Ignored_Z: []  Ignored_Centers_Z: []
 2 | 0.0 0.554810466333 0.89385833076 0.772997541539 0.920352390375 0.927106849477 0.894560433418 0.710237454946 0.676699674032 0.683329922674 0.869149580631 0.643036156382 0.709082489518 0.731473907975 0.773219182577 0.727863457681 0.853629946188 0.724361567068 0.578425003538 0.750757904628 0.64902878566 0.633889384523 0.401559285215 0.618360425902 0.703251573835 0.974018884234 0.917472769834 0.692497595669 0.716798972845 0.736118258628 0.715604402675 0.928548217829 0.573907965438 0.587494873854 1.01218440576 0.631765248954 0.460353556943 0.719111699422 0.653360004853 0.603839572003 0.883970474543 0.861893803176 0.762202144725 0.854601980699 0.721296782202 0.914528892729 0.810945204649 0.883919615435 0.725826021887 0.966885979479 
 3 | 0.554810466333 0.0 0.836649834583 0.694298231726 0.898675388187 0.893969337264 0.886263892555 0.605676661223 0.633706778973 0.676325013081 0.738465488951 0.436798567131 0.587309910944 0.578396706165 0.632762108275 0.71195746612 0.769057506042 0.656559560939 0.546362209578 0.715531679366 0.557239397294 0.601509751242 0.510741840889 0.553125449811 0.613081105865 0.915590570905 0.85801332229 0.673198476186 0.719960404848 0.701319992814 0.727159690405 0.913690170144 0.444261655102 0.575283282394 0.969371151304 0.53545050896 0.500773401166 0.780919579561 0.691584920029 0.557057989121 0.844887246359 0.841535914636 0.614580972119 0.832847071241 0.695711294713 0.890535903794 0.752082160021 0.881260444088 0.799573098509 0.952084380002 
 4 | 0.89385833076 0.836649834583 0.0 0.495848341797 0.758830901946 0.718906110136 0.704668624029 0.823925856904 0.776467757024 0.822375656807 0.704189808486 0.8761714856 0.795773754944 0.765589074186 0.673824934535 0.697736402467 0.550235782264 0.682809885622 0.88812377119 0.618964988525 0.925180318983 0.749290941079 0.822246671031 0.795897414061 0.784646152824 0.650495905428 0.668577089979 0.83976102758 0.827680863981 0.803298357718 0.823390620755 0.660260319718 0.821274886094 0.75904989029 0.635620435725 0.902145898207 0.85232934806 0.789152972029 0.846974468629 0.883205172538 0.801712694051 0.742654696774 0.761597427698 0.69407283316 0.814823102384 0.695056429327 0.667086568985 0.674304886704 0.810817840704 0.782352136634 
 5 | 0.772997541539 0.694298231726 0.495848341797 0.0 0.742306798513 0.729083468629 0.705750301314 0.682764264064 0.63572461014 0.730163581159 0.620439257119 0.757819410726 0.683057104794 0.661941809627 0.569173732239 0.61801612101 0.550541883353 0.606801747007 0.776160401623 0.557539521454 0.818356851513 0.66651347583 0.705023676926 0.690429236511 0.655774904464 0.727707515832 0.658735928396 0.709591945953 0.712415034052 0.692331787666 0.715497427158 0.757736387751 0.692835737808 0.627882981552 0.758242538408 0.789161984643 0.726779175329 0.728862559693 0.772479622708 0.777988975348 0.781985811764 0.761242714555 0.662519130824 0.73759647706 0.75120452334 0.765174094954 0.534670782196 0.743655802073 0.724234848246 0.844700231432 
 6 | 0.920352390375 0.898675388187 0.758830901946 0.742306798513 0.0 0.356630229692 0.438806424035 0.783062267014 0.746464860536 0.770431534748 0.807496393837 0.923753011071 0.875200748972 0.852362251998 0.812534894433 0.85752676317 0.780380873566 0.797857824129 0.926796812019 0.77419541653 0.952930238983 0.855666312101 0.886135421439 0.872990974828 0.782446680264 0.663490443756 0.496890305291 0.751115938048 0.679162023931 0.675446195664 0.733612111249 0.712456119612 0.831434593453 0.794547571555 0.761973501402 0.884094676103 0.855737875159 0.814259415572 0.872599363164 0.839191564695 0.530497345834 0.753300389028 0.720640269714 0.739866026363 0.800367843158 0.719123268369 0.674245628626 0.565471287069 0.660147447179 0.906762244749 
 7 | 0.927106849477 0.893969337264 0.718906110136 0.729083468629 0.356630229692 0.0 0.414353818724 0.804348958172 0.760618012679 0.799171479904 0.796708012118 0.925411730689 0.878528687278 0.855173394522 0.79248172481 0.835404155765 0.762900248282 0.802200931676 0.938568206487 0.777966828947 0.956782638797 0.848386545427 0.887404534937 0.870984632048 0.805745244427 0.641530858873 0.511939531088 0.779813905616 0.69530754387 0.687811148101 0.740090213647 0.696059106446 0.831167788854 0.813536228782 0.736754977602 0.896857838515 0.864537409732 0.814505664693 0.852223583992 0.847873823185 0.536921082483 0.771209518563 0.712394785836 0.73843581233 0.816608851161 0.710387160907 0.708836231059 0.595346591819 0.667044413962 0.866284113849 
 8 | 0.894560433418 0.886263892555 0.704668624029 0.705750301314 0.438806424035 0.414353818724 0.0 0.787606781402 0.735107022308 0.771597559492 0.788094082298 0.922194406972 0.874782917479 0.844702882891 0.783067155115 0.816683160821 0.754682690233 0.777385333685 0.918550092211 0.73755347749 0.946031875696 0.82540068623 0.860875086701 0.853337355693 0.77170025523 0.635250869583 0.513446565958 0.750063675487 0.680581164388 0.659339158593 0.703867493783 0.699907844934 0.81591591686 0.771690988465 0.727548351314 0.891495195445 0.836948574759 0.790721340257 0.838571087112 0.84171612582 0.638957319903 0.760589271263 0.716548143086 0.733875721489 0.805643068762 0.711468258182 0.683696872326 0.577690268734 0.657554721863 0.843360512587 
 9 | 0.710237454946 0.605676661223 0.823925856904 0.682764264064 0.783062267014 0.804348958172 0.787606781402 0.0 0.279517693 0.587762362087 0.702183355568 0.623542560819 0.622198885151 0.622990044264 0.715615072059 0.791863154176 0.823350463522 0.731636511918 0.658061761272 0.74393075446 0.710666098012 0.726458985561 0.620374723694 0.660980468987 0.433092453517 0.870254645424 0.760807761757 0.414097111109 0.567008262188 0.523763407661 0.612707301091 0.907043196017 0.530844886603 0.544510789348 0.914494906126 0.604920418166 0.623438055109 0.808922492253 0.75130499739 0.599095112394 0.820507117689 0.830389493543 0.594453029658 0.833847263835 0.678768736941 0.8691099886 0.567035022824 0.833912233823 0.706873283558 0.986084512188 
10 | 0.676699674032 0.633706778973 0.776467757024 0.63572461014 0.746464860536 0.760618012679 0.735107022308 0.279517693 0.0 0.601405973453 0.688227868746 0.660813685796 0.647260975543 0.640758620742 0.692480805382 0.747117569537 0.783858682464 0.699117315836 0.658235115491 0.708674123937 0.724168444324 0.691765714858 0.600844147385 0.649135570615 0.450440925279 0.837797091079 0.715757984788 0.412426401806 0.511755809641 0.471466738143 0.555502409566 0.868475447955 0.525489508727 0.530700211813 0.875123480201 0.610866236589 0.590737086994 0.769193310083 0.712434150728 0.589709053096 0.792329034985 0.806841608247 0.593232963076 0.800537285215 0.665403297361 0.836124380352 0.55772093073 0.791072226548 0.628783455701 0.943777977022 
11 | 0.683329922674 0.676325013081 0.822375656807 0.730163581159 0.770431534748 0.799171479904 0.771597559492 0.587762362087 0.601405973453 0.0 0.778091210243 0.680531488175 0.634215803633 0.639971578366 0.691652774114 0.753143330777 0.80323789403 0.696855356384 0.710400025067 0.679739862275 0.738684308386 0.738802923976 0.661839277115 0.697572365034 0.638094447616 0.866415283858 0.779736028901 0.625263494929 0.716521338694 0.674280741803 0.753696089767 0.82119264638 0.589446080811 0.528436891734 0.900692448818 0.634359982081 0.64971960145 0.718685932863 0.700272216677 0.632534891128 0.698284930076 0.676442480064 0.516064864454 0.708540584617 0.501787458824 0.757337355278 0.567940756714 0.75179735287 0.670422723273 0.922696483696 
12 | 0.869149580631 0.738465488951 0.704189808486 0.620439257119 0.807496393837 0.796708012118 0.788094082298 0.702183355568 0.688227868746 0.778091210243 0.0 0.722091101088 0.631247698185 0.572063959793 0.647108313277 0.733047487346 0.69070445285 0.699017838203 0.781622080565 0.636894705409 0.849141806298 0.769381719655 0.75506479514 0.749551007191 0.623760464408 0.758540795476 0.723747763027 0.757933357369 0.775739927988 0.742629364357 0.798921800406 0.840849344087 0.767662333925 0.728644149351 0.807592824539 0.8316716154 0.806292773915 0.831136869566 0.8519850228 0.821141925066 0.83715442272 0.824208827149 0.676565021645 0.815622468719 0.808203560803 0.82150228229 0.68744567143 0.822164123175 0.83245849571 0.900894162529 
13 | 0.643036156382 0.436798567131 0.8761714856 0.757819410726 0.923753011071 0.925411730689 0.922194406972 0.623542560819 0.660813685796 0.680531488175 0.722091101088 0.0 0.447722333363 0.492709038173 0.691222880539 0.760885676331 0.828118755163 0.713869949837 0.488807545457 0.723549141997 0.616986367587 0.703369989187 0.532297710273 0.630825721779 0.570400436057 0.945556038688 0.890384756469 0.697086144867 0.767769343194 0.753774374417 0.777619661696 0.950673029158 0.533694495723 0.620387976516 0.989517427474 0.618078891727 0.618990954735 0.823002758644 0.766032361972 0.632589508538 0.87301906307 0.859373236007 0.675944001755 0.85047540967 0.732077352698 0.909054512512 0.752682918735 0.895842403008 0.829750168867 1.00401115188 
14 | 0.709082489518 0.587309910944 0.795773754944 0.683057104794 0.875200748972 0.878528687278 0.874782917479 0.622198885151 0.647260975543 0.634215803633 0.631247698185 0.447722333363 0.0 0.337786623091 0.619592821733 0.675834084756 0.746422504567 0.63574099366 0.607308261974 0.61737470591 0.720034030005 0.697716447409 0.573555092122 0.631343276688 0.59081897823 0.888638876179 0.8392438387 0.695986126577 0.771415149969 0.750673313103 0.791483491144 0.88670024695 0.624828579195 0.520614520243 0.926766141388 0.72242020181 0.685640435118 0.772061233828 0.780051971023 0.717618799723 0.825335012504 0.800528045902 0.605955113288 0.785265488575 0.720470440101 0.857487960463 0.679590696925 0.830393172397 0.799693809553 0.9649602699 
15 | 0.731473907975 0.578396706165 0.765589074186 0.661941809627 0.852362251998 0.855173394522 0.844702882891 0.622990044264 0.640758620742 0.639971578366 0.572063959793 0.492709038173 0.337786623091 0.0 0.566589945557 0.673857633584 0.718357617296 0.610641814774 0.611902501647 0.572597892318 0.72020070908 0.665455703418 0.577824704191 0.628391139836 0.559475354972 0.852556340391 0.80013790571 0.706178256718 0.762457178398 0.730609232563 0.785865412618 0.860864766315 0.626738457589 0.545588320482 0.880824809803 0.714390977343 0.668493079704 0.764572214236 0.775005761487 0.715030496557 0.803198216956 0.779271759188 0.560570117123 0.77080907212 0.711004847527 0.833123875794 0.668381854168 0.810927256374 0.793577035393 0.936963653531 
16 | 0.773219182577 0.632762108275 0.673824934535 0.569173732239 0.812534894433 0.79248172481 0.783067155115 0.715615072059 0.692480805382 0.691652774114 0.647108313277 0.691222880539 0.619592821733 0.566589945557 0.0 0.481069172789 0.536993688244 0.439076391556 0.756741971278 0.574478444848 0.72904700322 0.574251220153 0.674643968233 0.583100169982 0.703439703059 0.790821541962 0.743598242239 0.762146379985 0.777203961947 0.748352108345 0.793025180149 0.799474075897 0.668503891318 0.621426050469 0.811710953432 0.755712771133 0.707273924245 0.610412370872 0.691238005741 0.745812722399 0.761034001495 0.67333066448 0.585050573494 0.668750400512 0.671425617086 0.70685570694 0.675580031512 0.754962191881 0.75996195493 0.854521154202 
17 | 0.727863457681 0.71195746612 0.697736402467 0.61801612101 0.85752676317 0.835404155765 0.816683160821 0.791863154176 0.747117569537 0.753143330777 0.733047487346 0.760885676331 0.675834084756 0.673857633584 0.481069172789 0.0 0.553535851032 0.497461943777 0.775998715858 0.61965060835 0.7735159956 0.536001584882 0.674360797973 0.530360341691 0.753409038255 0.833776039725 0.786560079957 0.796541526535 0.786819808848 0.777225360829 0.788088077938 0.776239903705 0.690373451102 0.626739024127 0.841589078411 0.8048878022 0.694558837052 0.539219984194 0.64657225925 0.775788394126 0.797721992192 0.703587789066 0.67029217865 0.662020895674 0.725633359706 0.75277573258 0.742242381883 0.781970139584 0.760382602301 0.859257608099 
18 | 0.853629946188 0.769057506042 0.550235782264 0.550541883353 0.780380873566 0.762900248282 0.754682690233 0.823350463522 0.783858682464 0.80323789403 0.69070445285 0.828118755163 0.746422504567 0.718357617296 0.536993688244 0.553535851032 0.0 0.604742006615 0.846209888306 0.620826655395 0.861503144029 0.678895301549 0.784984190372 0.695068699384 0.774541445573 0.726758125562 0.694789053774 0.836537695496 0.820946310669 0.801508352955 0.825802082154 0.67048241035 0.770430629722 0.724683986895 0.744506389921 0.863671043709 0.79822213142 0.712095543266 0.778757667643 0.848442720104 0.803442089143 0.721028865462 0.707059818914 0.691380369243 0.785560968887 0.723322020966 0.687538350916 0.717234248274 0.794632201546 0.813990450668 
19 | 0.724361567068 0.656559560939 0.682809885622 0.606801747007 0.797857824129 0.802200931676 0.777385333685 0.731636511918 0.699117315836 0.696855356384 0.699017838203 0.713869949837 0.63574099366 0.610641814774 0.439076391556 0.497461943777 0.604742006615 0.0 0.734685826408 0.540232238216 0.743905968891 0.470345612865 0.644464541231 0.529789466227 0.714303679824 0.805372527043 0.747747055645 0.760033137627 0.777994490258 0.75803776591 0.795952041087 0.798802326318 0.66354808318 0.583744786402 0.813082856563 0.771104047709 0.681137129779 0.612291335697 0.662313582472 0.743408139862 0.780908818181 0.678203077868 0.646096249305 0.638509312738 0.687961151141 0.721080374137 0.661483006336 0.748433793385 0.730618006165 0.87121657286 
20 | 0.578425003538 0.546362209578 0.88812377119 0.776160401623 0.926796812019 0.938568206487 0.918550092211 0.658061761272 0.658235115491 0.710400025067 0.781622080565 0.488807545457 0.607308261974 0.611902501647 0.756741971278 0.775998715858 0.846209888306 0.734685826408 0.0 0.740377412507 0.544166805917 0.683667218948 0.512947993378 0.635229860014 0.606092502074 0.962786408893 0.899202972912 0.663132834251 0.730575226481 0.737399713603 0.744844907569 0.9446370672 0.569805383089 0.60253936267 0.994205449606 0.528325777574 0.586728875431 0.816644534202 0.767089979213 0.530233293951 0.89538097859 0.876014133854 0.737120576906 0.872981758849 0.758943575131 0.925341188281 0.774607834455 0.904994726791 0.802712703219 0.988638793024 
21 | 0.750757904628 0.715531679366 0.618964988525 0.557539521454 0.77419541653 0.777966828947 0.73755347749 0.74393075446 0.708674123937 0.679739862275 0.636894705409 0.723549141997 0.61737470591 0.572597892318 0.574478444848 0.61965060835 0.620826655395 0.540232238216 0.740377412507 0.0 0.819317796829 0.667343750639 0.629738627363 0.693898207143 0.697831974275 0.756130802764 0.708428589415 0.763710799704 0.77901948243 0.761493253942 0.811600465992 0.758808455554 0.710638057237 0.599437462058 0.755166297476 0.799344749722 0.715255239614 0.695238961353 0.77025222753 0.776359663392 0.714300144559 0.649030174569 0.630240806697 0.642798916133 0.707561611815 0.715863268125 0.633877058644 0.701404080894 0.731835044432 0.830098238739 
22 | 0.64902878566 0.557239397294 0.925180318983 0.818356851513 0.952930238983 0.956782638797 0.946031875696 0.710666098012 0.724168444324 0.738684308386 0.849141806298 0.616986367587 0.720034030005 0.72020070908 0.72904700322 0.7735159956 0.861503144029 0.743905968891 0.544166805917 0.819317796829 0.0 0.691494771555 0.63229633397 0.63145397403 0.717165442043 0.983526932768 0.927393229306 0.740086095419 0.786928634125 0.784409031188 0.791813751081 0.967780780714 0.604117592749 0.691068441717 1.02493299237 0.512843446735 0.635155766037 0.796773784318 0.726832288395 0.554060833518 0.899736622349 0.862671297457 0.732327890123 0.871661553255 0.68590174328 0.925896198849 0.838463996993 0.93856479364 0.855269943513 0.986535257279 
23 | 0.633889384523 0.601509751242 0.749290941079 0.66651347583 0.855666312101 0.848386545427 0.82540068623 0.726458985561 0.691765714858 0.738802923976 0.769381719655 0.703369989187 0.697716447409 0.665455703418 0.574251220153 0.536001584882 0.678895301549 0.470345612865 0.683667218948 0.667343750639 0.691494771555 0.0 0.583589025456 0.471395635894 0.68270650748 0.866802236338 0.809251459273 0.732361895054 0.721152927804 0.718253967571 0.720271942085 0.844450559767 0.631794758929 0.601413173071 0.890823032798 0.713491488679 0.598979103802 0.618612403538 0.576694918927 0.684868792519 0.828532218354 0.770675242496 0.688101534537 0.730133365474 0.691051258055 0.790621696937 0.746513865874 0.825377360756 0.7307482906 0.879070677005 
24 | 0.401559285215 0.510741840889 0.822246671031 0.705023676926 0.886135421439 0.887404534937 0.860875086701 0.620374723694 0.600844147385 0.661839277115 0.75506479514 0.532297710273 0.573555092122 0.577824704191 0.674643968233 0.674360797973 0.784984190372 0.644464541231 0.512947993378 0.629738627363 0.63229633397 0.583589025456 0.0 0.534554515783 0.588076702904 0.911457406617 0.850976876092 0.624587688802 0.658216017232 0.676501227978 0.674417726314 0.892644837927 0.519207857451 0.523406005922 0.936650196726 0.609999968629 0.428684692018 0.708728389893 0.652793007 0.587499458303 0.841198442664 0.817713579768 0.666963798747 0.813763250024 0.694217189872 0.866920682396 0.743838339419 0.846881711369 0.721035465302 0.934488768874 
25 | 0.618360425902 0.553125449811 0.795897414061 0.690429236511 0.872990974828 0.870984632048 0.853337355693 0.660980468987 0.649135570615 0.697572365034 0.749551007191 0.630825721779 0.631343276688 0.628391139836 0.583100169982 0.530360341691 0.695068699384 0.529789466227 0.635229860014 0.693898207143 0.63145397403 0.471395635894 0.534554515783 0.0 0.661440887415 0.89699810428 0.829892728673 0.686895830755 0.727413975469 0.71314111561 0.727430123751 0.860617552394 0.569999071689 0.566126641553 0.920250302491 0.69582525872 0.577737844817 0.615916919185 0.54587996172 0.669739817653 0.844002967239 0.77416046648 0.658799016766 0.729497578976 0.679686334092 0.81610129171 0.729563834826 0.8285611701 0.731609925029 0.92921221733 
26 | 0.703251573835 0.613081105865 0.784646152824 0.655774904464 0.782446680264 0.805745244427 0.77170025523 0.433092453517 0.450440925279 0.638094447616 0.623760464408 0.570400436057 0.59081897823 0.559475354972 0.703439703059 0.753409038255 0.774541445573 0.714303679824 0.606092502074 0.697831974275 0.717165442043 0.68270650748 0.588076702904 0.661440887415 0.0 0.832870506786 0.7382250142 0.500292209578 0.557186272045 0.54383280145 0.592543914721 0.866782265352 0.576838901211 0.553738135171 0.880004672278 0.630758928244 0.598682285887 0.775564737283 0.755941970157 0.619433074268 0.820096094276 0.810640313848 0.614657633359 0.816234925986 0.692104564627 0.841914791832 0.61493419454 0.815255545946 0.731155766695 0.944257796783 
27 | 0.974018884234 0.915590570905 0.650495905428 0.727707515832 0.663490443756 0.641530858873 0.635250869583 0.870254645424 0.837797091079 0.866415283858 0.758540795476 0.945556038688 0.888638876179 0.852556340391 0.790821541962 0.833776039725 0.726758125562 0.805372527043 0.962786408893 0.756130802764 0.983526932768 0.866802236338 0.911457406617 0.89699810428 0.832870506786 0.0 0.540537872524 0.854265121457 0.822674657765 0.807838357506 0.804949201876 0.649500300338 0.876429234913 0.841152864697 0.584534402103 0.940690224714 0.905162905928 0.853430929254 0.906097045165 0.920372075298 0.743667711582 0.75787232009 0.761216631104 0.731898916341 0.860969656157 0.674535859049 0.753290476935 0.647756283499 0.830711430223 0.818924214019 
28 | 0.917472769834 0.85801332229 0.668577089979 0.658735928396 0.496890305291 0.511939531088 0.513446565958 0.760807761757 0.715757984788 0.779736028901 0.723747763027 0.890384756469 0.8392438387 0.80013790571 0.743598242239 0.786560079957 0.694789053774 0.747747055645 0.899202972912 0.708428589415 0.927393229306 0.809251459273 0.850976876092 0.829892728673 0.7382250142 0.540537872524 0.0 0.741284142178 0.663813662884 0.659006811637 0.666499784776 0.657724940247 0.787759415589 0.762069136038 0.665185533463 0.848906168245 0.799463913062 0.79787152572 0.859500020272 0.815464378561 0.645031848498 0.746774253664 0.661602246333 0.718485768159 0.8021459192 0.696051434384 0.642524513709 0.607804416896 0.710142427147 0.856837158561 
29 | 0.692497595669 0.673198476186 0.83976102758 0.709591945953 0.751115938048 0.779813905616 0.750063675487 0.414097111109 0.412426401806 0.625263494929 0.757933357369 0.697086144867 0.695986126577 0.706178256718 0.762146379985 0.796541526535 0.836537695496 0.760033137627 0.663132834251 0.763710799704 0.740086095419 0.732361895054 0.624587688802 0.686895830755 0.500292209578 0.854265121457 0.741284142178 0.0 0.450530664377 0.367181740042 0.514114658924 0.856127751504 0.569645705612 0.530680872371 0.920670873445 0.621149336997 0.622229494603 0.784820910546 0.743269674512 0.624925636087 0.790358797157 0.837334204088 0.664040403554 0.839218315847 0.685442709103 0.870854726071 0.609640416321 0.794542759283 0.648535024349 0.973646798751 
30 | 0.716798972845 0.719960404848 0.827680863981 0.712415034052 0.679162023931 0.69530754387 0.680581164388 0.567008262188 0.511755809641 0.716521338694 0.775739927988 0.767769343194 0.771415149969 0.762457178398 0.777203961947 0.786819808848 0.820946310669 0.777994490258 0.730575226481 0.77901948243 0.786928634125 0.721152927804 0.658216017232 0.727413975469 0.557186272045 0.822674657765 0.663813662884 0.450530664377 0.0 0.368891688462 0.407371579999 0.831591786515 0.622571204816 0.614461826077 0.876470637528 0.675911699978 0.603028019388 0.785311684164 0.757065046107 0.621298482358 0.748267351327 0.847225006642 0.657212976101 0.822895040676 0.743096163826 0.843110613814 0.700778261153 0.748775002431 0.597590474453 0.935426042166 
31 | 0.736118258628 0.701319992814 0.803298357718 0.692331787666 0.675446195664 0.687811148101 0.659339158593 0.523763407661 0.471466738143 0.674280741803 0.742629364357 0.753774374417 0.750673313103 0.730609232563 0.748352108345 0.777225360829 0.801508352955 0.75803776591 0.737399713603 0.761493253942 0.784409031188 0.718253967571 0.676501227978 0.71314111561 0.54383280145 0.807838357506 0.659006811637 0.367181740042 0.368891688462 0.0 0.458653423679 0.803448229506 0.607217329492 0.611850721208 0.871501537293 0.665156040947 0.635371675134 0.775862611033 0.737703447389 0.641681683353 0.738544292849 0.827749198671 0.634926033535 0.80491191819 0.706594798888 0.827590372114 0.634878773159 0.747820497985 0.574068487025 0.923575735483 
32 | 0.715604402675 0.727159690405 0.823390620755 0.715497427158 0.733612111249 0.740090213647 0.703867493783 0.612707301091 0.555502409566 0.753696089767 0.798921800406 0.777619661696 0.791483491144 0.785865412618 0.793025180149 0.788088077938 0.825802082154 0.795952041087 0.744844907569 0.811600465992 0.791813751081 0.720271942085 0.674417726314 0.727430123751 0.592543914721 0.804949201876 0.666499784776 0.514114658924 0.407371579999 0.458653423679 0.0 0.826550941666 0.637456198623 0.665171382786 0.896087104925 0.678509451688 0.619782372404 0.766332516903 0.749461359792 0.626736573021 0.789585835327 0.867009941666 0.696003567894 0.843130348433 0.762125457057 0.838862459654 0.714565907899 0.772306185304 0.636274179464 0.928078284689 
33 | 0.928548217829 0.913690170144 0.660260319718 0.757736387751 0.712456119612 0.696059106446 0.699907844934 0.907043196017 0.868475447955 0.82119264638 0.840849344087 0.950673029158 0.88670024695 0.860864766315 0.799474075897 0.776239903705 0.67048241035 0.798802326318 0.9446370672 0.758808455554 0.967780780714 0.844450559767 0.892644837927 0.860617552394 0.866782265352 0.649500300338 0.657724940247 0.856127751504 0.831591786515 0.803448229506 0.826550941666 0.0 0.841335346092 0.825973109053 0.709488952413 0.916402571214 0.862943683025 0.785252282395 0.834627533012 0.915617797138 0.726659306475 0.746662266233 0.796250427513 0.689523276466 0.811521601059 0.705217151952 0.773228468859 0.543864469207 0.758779411681 0.801570166485 
34 | 0.573907965438 0.444261655102 0.821274886094 0.692835737808 0.831434593453 0.831167788854 0.81591591686 0.530844886603 0.525489508727 0.589446080811 0.767662333925 0.533694495723 0.624828579195 0.626738457589 0.668503891318 0.690373451102 0.770430629722 0.66354808318 0.569805383089 0.710638057237 0.604117592749 0.631794758929 0.519207857451 0.569999071689 0.576838901211 0.876429234913 0.787759415589 0.569645705612 0.622571204816 0.607217329492 0.637456198623 0.841335346092 0.0 0.502143201847 0.915500616279 0.470637493255 0.454994810867 0.720719052113 0.608392338962 0.49506217174 0.790735842995 0.795072157177 0.555379276899 0.776484343177 0.621250769677 0.831332844365 0.674510667451 0.812417978769 0.687449729144 0.943626259407 
35 | 0.587494873854 0.575283282394 0.75904989029 0.627882981552 0.794547571555 0.813536228782 0.771690988465 0.544510789348 0.530700211813 0.528436891734 0.728644149351 0.620387976516 0.520614520243 0.545588320482 0.621426050469 0.626739024127 0.724683986895 0.583744786402 0.60253936267 0.599437462058 0.691068441717 0.601413173071 0.523406005922 0.566126641553 0.553738135171 0.841152864697 0.762069136038 0.530680872371 0.614461826077 0.611850721208 0.665171382786 0.825973109053 0.502143201847 0.0 0.872914218468 0.632120324134 0.51593291476 0.646928513044 0.66512917666 0.591804371868 0.746027277979 0.730177474164 0.512120252804 0.71019189447 0.628000882603 0.793535424097 0.60758425449 0.76020458638 0.656290973805 0.920172511714 
36 | 1.01218440576 0.969371151304 0.635620435725 0.758242538408 0.761973501402 0.736754977602 0.727548351314 0.914494906126 0.875123480201 0.900692448818 0.807592824539 0.989517427474 0.926766141388 0.880824809803 0.811710953432 0.841589078411 0.744506389921 0.813082856563 0.994205449606 0.755166297476 1.02493299237 0.890823032798 0.936650196726 0.920250302491 0.880004672278 0.584534402103 0.665185533463 0.920670873445 0.876470637528 0.871501537293 0.896087104925 0.709488952413 0.915500616279 0.872914218468 0.0 0.987419907716 0.938932016036 0.877953211785 0.926552795437 0.960427660571 0.787751219165 0.735884972137 0.79346151571 0.713304170344 0.884238554369 0.687233745815 0.770545316154 0.686055651869 0.869518170204 0.823693580878 
37 | 0.631765248954 0.53545050896 0.902145898207 0.789161984643 0.884094676103 0.896857838515 0.891495195445 0.604920418166 0.610866236589 0.634359982081 0.8316716154 0.618078891727 0.72242020181 0.714390977343 0.755712771133 0.8048878022 0.863671043709 0.771104047709 0.528325777574 0.799344749722 0.512843446735 0.713491488679 0.609999968629 0.69582525872 0.630758928244 0.940690224714 0.848906168245 0.621149336997 0.675911699978 0.665156040947 0.678509451688 0.916402571214 0.470637493255 0.632120324134 0.987419907716 0.0 0.517371348048 0.795376019111 0.725493380787 0.349779717673 0.833565467059 0.844170047736 0.648623851287 0.848791519488 0.63563412268 0.885265537399 0.747500727544 0.884976292371 0.764988619145 0.968848406979 
38 | 0.460353556943 0.500773401166 0.85232934806 0.726779175329 0.855737875159 0.864537409732 0.836948574759 0.623438055109 0.590737086994 0.64971960145 0.806292773915 0.618990954735 0.685640435118 0.668493079704 0.707273924245 0.694558837052 0.79822213142 0.681137129779 0.586728875431 0.715255239614 0.635155766037 0.598979103802 0.428684692018 0.577737844817 0.598682285887 0.905162905928 0.799463913062 0.622229494603 0.603028019388 0.635371675134 0.619782372404 0.862943683025 0.454994810867 0.51593291476 0.938932016036 0.517371348048 0.0 0.674583868549 0.648849074864 0.49876547743 0.812253824365 0.805352356971 0.616136372003 0.797757206305 0.66204990387 0.845532818237 0.744491479278 0.822123855845 0.690013058115 0.942388689735 
39 | 0.719111699422 0.780919579561 0.789152972029 0.728862559693 0.814259415572 0.814505664693 0.790721340257 0.808922492253 0.769193310083 0.718685932863 0.831136869566 0.823002758644 0.772061233828 0.764572214236 0.610412370872 0.539219984194 0.712095543266 0.612291335697 0.816644534202 0.695238961353 0.796773784318 0.618612403538 0.708728389893 0.615916919185 0.775564737283 0.853430929254 0.79787152572 0.784820910546 0.785311684164 0.775862611033 0.766332516903 0.785252282395 0.720719052113 0.646928513044 0.877953211785 0.795376019111 0.674583868549 0.0 0.562245508514 0.763045315827 0.751635156576 0.668002646253 0.705987284165 0.631302570542 0.660261107945 0.694858012326 0.76027774142 0.747170770926 0.66510126983 0.889090664595 
40 | 0.653360004853 0.691584920029 0.846974468629 0.772479622708 0.872599363164 0.852223583992 0.838571087112 0.75130499739 0.712434150728 0.700272216677 0.8519850228 0.766032361972 0.780051971023 0.775005761487 0.691238005741 0.64657225925 0.778757667643 0.662313582472 0.767089979213 0.77025222753 0.726832288395 0.576694918927 0.652793007 0.54587996172 0.755941970157 0.906097045165 0.859500020272 0.743269674512 0.757065046107 0.737703447389 0.749461359792 0.834627533012 0.608392338962 0.66512917666 0.926552795437 0.725493380787 0.648849074864 0.562245508514 0.0 0.707710573877 0.821690433998 0.793999984384 0.697482847305 0.711627650237 0.631276214314 0.803834183806 0.795180107487 0.840430545338 0.692970886016 0.927274639456 
41 | 0.603839572003 0.557057989121 0.883205172538 0.777988975348 0.839191564695 0.847873823185 0.84171612582 0.599095112394 0.589709053096 0.632534891128 0.821141925066 0.632589508538 0.717618799723 0.715030496557 0.745812722399 0.775788394126 0.848442720104 0.743408139862 0.530233293951 0.776359663392 0.554060833518 0.684868792519 0.587499458303 0.669739817653 0.619433074268 0.920372075298 0.815464378561 0.624925636087 0.621298482358 0.641681683353 0.626736573021 0.915617797138 0.49506217174 0.591804371868 0.960427660571 0.349779717673 0.49876547743 0.763045315827 0.707710573877 0.0 0.799255443773 0.820461983947 0.620335953767 0.819503468593 0.653371226459 0.861243733639 0.750353130487 0.853992981029 0.723229115209 0.952583772153 
42 | 0.883970474543 0.844887246359 0.801712694051 0.781985811764 0.530497345834 0.536921082483 0.638957319903 0.820507117689 0.792329034985 0.698284930076 0.83715442272 0.87301906307 0.825335012504 0.803198216956 0.761034001495 0.797721992192 0.803442089143 0.780908818181 0.89538097859 0.714300144559 0.899736622349 0.828532218354 0.841198442664 0.844002967239 0.820096094276 0.743667711582 0.645031848498 0.790358797157 0.748267351327 0.738544292849 0.789585835327 0.726659306475 0.790735842995 0.746027277979 0.787751219165 0.833565467059 0.812253824365 0.751635156576 0.821690433998 0.799255443773 0.0 0.634388482354 0.628397770289 0.628300057761 0.718166043112 0.675053311817 0.744795991589 0.636155024859 0.697035702347 0.849449251758 
43 | 0.861893803176 0.841535914636 0.742654696774 0.761242714555 0.753300389028 0.771209518563 0.760589271263 0.830389493543 0.806841608247 0.676442480064 0.824208827149 0.859373236007 0.800528045902 0.779271759188 0.67333066448 0.703587789066 0.721028865462 0.678203077868 0.876014133854 0.649030174569 0.862671297457 0.770675242496 0.817713579768 0.77416046648 0.810640313848 0.75787232009 0.746774253664 0.837334204088 0.847225006642 0.827749198671 0.867009941666 0.746662266233 0.795072157177 0.730177474164 0.735884972137 0.844170047736 0.805352356971 0.668002646253 0.793999984384 0.820461983947 0.634388482354 0.0 0.656361102323 0.497868737637 0.663592325713 0.570185929906 0.687564097001 0.619471823396 0.753146996831 0.854382415757 
44 | 0.762202144725 0.614580972119 0.761597427698 0.662519130824 0.720640269714 0.712394785836 0.716548143086 0.594453029658 0.593232963076 0.516064864454 0.676565021645 0.675944001755 0.605955113288 0.560570117123 0.585050573494 0.67029217865 0.707059818914 0.646096249305 0.737120576906 0.630240806697 0.732327890123 0.688101534537 0.666963798747 0.658799016766 0.614657633359 0.761216631104 0.661602246333 0.664040403554 0.657212976101 0.634926033535 0.696003567894 0.796250427513 0.555379276899 0.512120252804 0.79346151571 0.648623851287 0.616136372003 0.705987284165 0.697482847305 0.620335953767 0.628397770289 0.656361102323 0.0 0.675861672493 0.60270372258 0.708996360664 0.635628758746 0.73312737445 0.702015468001 0.877727091838 
45 | 0.854601980699 0.832847071241 0.69407283316 0.73759647706 0.739866026363 0.73843581233 0.733875721489 0.833847263835 0.800537285215 0.708540584617 0.815622468719 0.85047540967 0.785265488575 0.77080907212 0.668750400512 0.662020895674 0.691380369243 0.638509312738 0.872981758849 0.642798916133 0.871661553255 0.730133365474 0.813763250024 0.729497578976 0.816234925986 0.731898916341 0.718485768159 0.839218315847 0.822895040676 0.80491191819 0.843130348433 0.689523276466 0.776484343177 0.71019189447 0.713304170344 0.848791519488 0.797757206305 0.631302570542 0.711627650237 0.819503468593 0.628300057761 0.497868737637 0.675861672493 0.0 0.656574487086 0.526078738592 0.6943101907 0.620356001152 0.74994931389 0.833569447966 
46 | 0.721296782202 0.695711294713 0.814823102384 0.75120452334 0.800367843158 0.816608851161 0.805643068762 0.678768736941 0.665403297361 0.501787458824 0.808203560803 0.732077352698 0.720470440101 0.711004847527 0.671425617086 0.725633359706 0.785560968887 0.687961151141 0.758943575131 0.707561611815 0.68590174328 0.691051258055 0.694217189872 0.679686334092 0.692104564627 0.860969656157 0.8021459192 0.685442709103 0.743096163826 0.706594798888 0.762125457057 0.811521601059 0.621250769677 0.628000882603 0.884238554369 0.63563412268 0.66204990387 0.660261107945 0.631276214314 0.653371226459 0.718166043112 0.663592325713 0.60270372258 0.656574487086 0.0 0.721588620718 0.680377447921 0.773898029983 0.733504409695 0.89276018162 
47 | 0.914528892729 0.890535903794 0.695056429327 0.765174094954 0.719123268369 0.710387160907 0.711468258182 0.8691099886 0.836124380352 0.757337355278 0.82150228229 0.909054512512 0.857487960463 0.833123875794 0.70685570694 0.75277573258 0.723322020966 0.721080374137 0.925341188281 0.715863268125 0.925896198849 0.790621696937 0.866920682396 0.81610129171 0.841914791832 0.674535859049 0.696051434384 0.870854726071 0.843110613814 0.827590372114 0.838862459654 0.705217151952 0.831332844365 0.793535424097 0.687233745815 0.885265537399 0.845532818237 0.694858012326 0.803834183806 0.861243733639 0.675053311817 0.570185929906 0.708996360664 0.526078738592 0.721588620718 0.0 0.753783401867 0.629153366758 0.779588048503 0.750223635114 
48 | 0.810945204649 0.752082160021 0.667086568985 0.534670782196 0.674245628626 0.708836231059 0.683696872326 0.567035022824 0.55772093073 0.567940756714 0.68744567143 0.752682918735 0.679590696925 0.668381854168 0.675580031512 0.742242381883 0.687538350916 0.661483006336 0.774607834455 0.633877058644 0.838463996993 0.746513865874 0.743838339419 0.729563834826 0.61493419454 0.753290476935 0.642524513709 0.609640416321 0.700778261153 0.634878773159 0.714565907899 0.773228468859 0.674510667451 0.60758425449 0.770545316154 0.747500727544 0.744491479278 0.76027774142 0.795180107487 0.750353130487 0.744795991589 0.687564097001 0.635628758746 0.6943101907 0.680377447921 0.753783401867 0.0 0.692314656117 0.66041354639 0.93575566255 
49 | 0.883919615435 0.881260444088 0.674304886704 0.743655802073 0.565471287069 0.595346591819 0.577690268734 0.833912233823 0.791072226548 0.75179735287 0.822164123175 0.895842403008 0.830393172397 0.810927256374 0.754962191881 0.781970139584 0.717234248274 0.748433793385 0.904994726791 0.701404080894 0.93856479364 0.825377360756 0.846881711369 0.8285611701 0.815255545946 0.647756283499 0.607804416896 0.794542759283 0.748775002431 0.747820497985 0.772306185304 0.543864469207 0.812417978769 0.76020458638 0.686055651869 0.884976292371 0.822123855845 0.747170770926 0.840430545338 0.853992981029 0.636155024859 0.619471823396 0.73312737445 0.620356001152 0.773898029983 0.629153366758 0.692314656117 0.0 0.645633989485 0.869709495356 
50 | 0.725826021887 0.799573098509 0.810817840704 0.724234848246 0.660147447179 0.667044413962 0.657554721863 0.706873283558 0.628783455701 0.670422723273 0.83245849571 0.829750168867 0.799693809553 0.793577035393 0.75996195493 0.760382602301 0.794632201546 0.730618006165 0.802712703219 0.731835044432 0.855269943513 0.7307482906 0.721035465302 0.731609925029 0.731155766695 0.830711430223 0.710142427147 0.648535024349 0.597590474453 0.574068487025 0.636274179464 0.758779411681 0.687449729144 0.656290973805 0.869518170204 0.764988619145 0.690013058115 0.66510126983 0.692970886016 0.723229115209 0.697035702347 0.753146996831 0.702015468001 0.74994931389 0.733504409695 0.779588048503 0.66041354639 0.645633989485 0.0 0.912704598478 
51 | 0.966885979479 0.952084380002 0.782352136634 0.844700231432 0.906762244749 0.866284113849 0.843360512587 0.986084512188 0.943777977022 0.922696483696 0.900894162529 1.00401115188 0.9649602699 0.936963653531 0.854521154202 0.859257608099 0.813990450668 0.87121657286 0.988638793024 0.830098238739 0.986535257279 0.879070677005 0.934488768874 0.92921221733 0.944257796783 0.818924214019 0.856837158561 0.973646798751 0.935426042166 0.923575735483 0.928078284689 0.801570166485 0.943626259407 0.920172511714 0.823693580878 0.968848406979 0.942388689735 0.889090664595 0.927274639456 0.952583772153 0.849449251758 0.854382415757 0.877727091838 0.833569447966 0.89276018162 0.750223635114 0.93575566255 0.869709495356 0.912704598478 0.0 
52 | 


--------------------------------------------------------------------------------