├── aseMolec
    ├── __init__.py
    ├── evalUtils.py
    ├── extAtoms.py
    ├── pltProps.py
    ├── anaAtoms.py
    └── ioLammps.py
├── .gitignore
├── README.md
└── setup.py


/aseMolec/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | aseMolec.egg-info/
2 | aseMolec/__pycache__/
3 | build/
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Provides useful functionality to:
 2 | - query ASE Atoms lists
 3 | - identify molecules in ASE Atoms lists
 4 | - create Volume-Scans
 5 | - split properties into intra-/inter-
 6 | - split forces into trans-/rot-/vib-
 7 | - plot MD trajectories etc
 8 | 
 9 | # Experimental, use with caution!
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='aseMolec',
 5 |     version='1.0.0',
 6 |     url='https://github.com/imagdau/Phython-Atoms.git',
 7 |     author='Ioan-Bogdan Magdau',
 8 |     author_email='i.b.magdau@gmail.com',
 9 |     description='Extension for ASE atoms',
10 |     packages=find_packages(),
11 | )
12 | 


--------------------------------------------------------------------------------
/aseMolec/evalUtils.py:
--------------------------------------------------------------------------------
 1 | import ase.io
 2 | from ase import Atoms
 3 | import os
 4 | import numpy as np
 5 | 
 6 | def eval_gap_quippy(db, gap, prog=True):
 7 |     for i, at in enumerate(db):
 8 |         if prog:
 9 |             print('Evaluating config:', i)
10 |         at.calc = gap
11 |         at.info['energy'] = at.get_potential_energy()
12 |         at.info['virial'] = -at.get_stress(voigt=False)*at.get_volume()
13 |         at.arrays['forces'] = at.get_forces()
14 |         del at.calc
15 | 
16 | def eval_gap_quip(inxyz_file, outxyz_file, gap_file, init_args=None):
17 |     quipcmd = "atoms_filename="+inxyz_file
18 |     temp_file = os.path.splitext(outxyz_file)[0]+'_temp.xyz'
19 |     if init_args is not None:
20 |         quipcmd += " init_args=\""+init_args+"\""
21 |     quipcmd += " param_filename="+gap_file+" E=True F=True V=True"
22 |     os.system("quip "+quipcmd+" | grep AT | sed 's/AT//' > "+temp_file)
23 |     db = ase.io.read(temp_file,':')
24 |     for at in db:
25 |         at.calc.reset()
26 |         at.arrays['forces'] = at.arrays.pop('force')
27 |         del at.arrays['map_shift']
28 |         del at.arrays['n_neighb']
29 |         at.info['stress'] = -at.info['virial']/at.get_volume()
30 |         del at.info['nneightol']
31 |         del at.info['cutoff']
32 |     ase.io.write(outxyz_file, db)
33 |     os.system('rm -rfv '+temp_file)
34 | 
35 | try:
36 |     from mace.calculators import MACECalculator
37 | except ImportError:
38 |     pass
39 | else:
40 |     def eval_mace(inxyz_file, outxyz_file, mace_file, init_args=None):
41 |         db = ase.io.read(inxyz_file, ':')
42 |         atomic_numbers = list(np.unique([n for at in db for n in at.numbers]))
43 |         calc = MACECalculator(model_path=mace_file, r_max=6.0, device='cpu', atomic_numbers=atomic_numbers, default_dtype="float64")
44 |         for at in db:
45 |             at.calc = calc
46 |             at.info['energy'] = at.get_potential_energy()
47 |             at.arrays['forces'] = at.get_forces()
48 |             del at.calc
49 |         ase.io.write(outxyz_file, db)
50 | 
51 | 


--------------------------------------------------------------------------------
/aseMolec/extAtoms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import hashlib
  3 | import ase.units
  4 | 
  5 | #creates unique hash for a matrix of numbers
  6 | def hash_array(v):
  7 |     return hashlib.md5(np.array2string(v, precision=8, sign='+', floatmode='fixed').encode()).hexdigest()
  8 | 
  9 | #creates unique hash for Atoms from atomic numbers and positions
 10 | def hash_atoms(db):
 11 |     for at in db:
 12 |         v = np.concatenate((at.numbers.reshape(-1,1), at.positions),axis=1)
 13 |         at.info['uid'] = hash_array(v)
 14 | 
 15 | # __eq__(self, other) implemented in atoms.py envokes really high precision
 16 | def equal(self, other, prec=1e-12):
 17 |     a = self.arrays
 18 |     b = other.arrays
 19 |     return (len(self) == len(other) and
 20 |             (np.abs(a['positions']-b['positions'])<prec).all() and
 21 |             (a['numbers'] == b['numbers']).all() and
 22 |             (np.abs((self.cell-other.cell))<prec).all() and
 23 |             (self.pbc == other.pbc).all())
 24 | 
 25 | #prints all available properties in list of Atoms ~Panos edit to include properties stored in calc
 26 | def check_keys(db):
 27 |     x = []
 28 |     for at in db:
 29 | 
 30 |         if 'config_type' in at.info:
 31 |             if at.calc:
 32 |                  x = list(at.calc.__dict__["results"].keys())
 33 |             print([at.info['config_type']]+list(at.info.keys())+list(at.arrays.keys())+x) # ~Panos addition of calc results
 34 |         else:
 35 |             if at.calc:
 36 |                  x = list(at.calc.__dict__["results"].keys())
 37 |             print(list(at.info.keys())+list(at.arrays.keys())+x) # ~Panos addition of calc results
 38 | 
 39 | #selects configurations which have property
 40 | def sel_by_prop(db, prop):
 41 |     reflist = []
 42 |     for at in db:
 43 |         props = list(at.info.keys())+list(at.arrays.keys())
 44 |         if at.calc:
 45 |             props += list(at.calc.__dict__["results"].keys()) # ~Panos addition of calc results
 46 |         if prop in props:
 47 |             reflist.append(at)
 48 |     return reflist
 49 | 
 50 | #select configurations which have an info or calc results field with certain value
 51 | def sel_by_info_val(db, info_key, info_val):
 52 |     sel = []
 53 |     for at in db:
 54 |         info_calc_keys = list(at.info.keys())
 55 |         if at.calc:
 56 |            info_calc_keys += list(at.calc.__dict__["results"].keys()) # ~Panos addition of calc results
 57 |         if info_key in info_calc_keys:
 58 |             if at.info[info_key] == info_val:
 59 |                 sel += [at]
 60 |     return sel
 61 | 
 62 | #selects configurations by uid
 63 | def sel_by_uid(db, uid):
 64 |     reflist = []
 65 |     for at in db:
 66 |         if uid == at.info['uid']:
 67 |             reflist.append(at)
 68 |     return reflist
 69 | 
 70 | #selects configurations of a certain config_type
 71 | def sel_by_conf_type(db, config_type):
 72 |     reflist = []
 73 |     for at in db:
 74 |         if (at.info['config_type'] == config_type):
 75 |             reflist.append(at)
 76 |     return reflist
 77 | 
 78 | #deletes all properties except for coordinates and identification
 79 | def del_prop(db):
 80 |     for at in db:
 81 |         if at.calc:
 82 |             del at.calc
 83 |         keys = list(at.info.keys())
 84 |         for k in keys:
 85 |             if 'energy' in k:
 86 |                 del at.info[k]
 87 |             if 'stress' in k:
 88 |                 del at.info[k]
 89 |             if 'virial' in k:
 90 |                 del at.info[k]
 91 |         keys = list(at.arrays.keys())
 92 |         for k in keys:
 93 |             if 'momenta' in k:
 94 |                 del at.arrays[k]
 95 |             if 'force' in k:
 96 |                 del at.arrays[k]
 97 | 
 98 | #deletes all properties which contain tag
 99 | def del_prop_by_tag(db, tag):
100 |     for at in db:
101 |         if at.calc:
102 |             del at.calc
103 |         keys = list(at.info.keys())
104 |         for k in keys:
105 |             if tag in k:
106 |                 del at.info[k]
107 |         keys = list(at.arrays.keys())
108 |         for k in keys:
109 |             if tag in k:
110 |                 del at.arrays[k]
111 | 
112 | #renames prop tag, eg: energy_PBE -> energy_dft
113 | def rename_prop_tag(db, oldtag, newtag):
114 |     for at in db:
115 |         keys = list(at.info.keys())
116 |         for k in keys:
117 |             if oldtag in k:
118 |                 at.info[k.replace(oldtag, newtag)] = at.info.pop(k)
119 |         keys = list(at.arrays.keys())
120 |         for k in keys:
121 |             if oldtag in k:
122 |                 at.arrays[k.replace(oldtag, newtag)] = at.arrays.pop(k)
123 | 
124 | def get_E0(db, tag=''):
125 |     E0 = {}
126 |     for at in db:
127 |         if len(at)==1:
128 |             E0[at.get_chemical_symbols()[0]]=at.info['energy'+tag]
129 |     return E0
130 | 
131 | def get_density_gcm3(at):
132 |     densfact = (ase.units.m/1.0e2)**3/ase.units.mol
133 |     return np.sum(at.get_masses())/at.get_volume()*densfact
134 | 
135 | #returns desired property for list of Atoms
136 | def get_prop(db, type, prop='', peratom=False, E0={}):
137 |     if peratom:
138 |         N = lambda a : a.get_global_number_of_atoms()
139 |     else:
140 |         N = lambda a : 1
141 |     if type == 'info':
142 |         return np.array(list(map(lambda a : a.info[prop]/N(a), db)))
143 |     if type == 'calc':
144 |         return np.array(list(map(lambda a : a.calc.__dict__["results"][prop]/N(a), db)), dtype=object) # ~Panos Added this, need dtype=object because both forces and energy are saved in the results dict
145 |     if type == 'arrays':
146 |         return np.array(list(map(lambda a : a.arrays[prop]/N(a), db)), dtype=object)
147 |     if type == 'cell':
148 |         return np.array(list(map(lambda a : a.cell/N(a), db)))
149 |     if type == 'meth':
150 |         return np.array(list(map(lambda a : getattr(a, prop)()/N(a), db)))
151 |     if type == 'atom':
152 |         if not E0:
153 |             E0 = get_E0(db, prop)
154 |         return np.array(list(map(lambda a : (np.sum([E0[s] for s in a.get_chemical_symbols()]))/N(a), db)))
155 |     if type == 'bind':
156 |         if not E0:
157 |             E0 = get_E0(db, prop)
158 |         return np.array(list(map(lambda a : (a.info['energy'+prop]-np.sum([E0[s] for s in a.get_chemical_symbols()]))/N(a), db)))
159 | 
160 | def set_prop(db, type, prop, tag):
161 |     for i,at in enumerate(db):
162 |         if type == 'info':
163 |             at.info[tag] = prop[i]
164 |         if type == 'arrays':
165 |             at.arrays[tag] = prop[i]
166 | 
167 | def calc_virial(db, tag='', keep=False, convstr=False):
168 |     for at in db:
169 |         if keep:
170 |             at.info['virial'+tag] = -at.info['stress'+tag]*at.get_volume()
171 |         else:
172 |             at.info['virial'+tag] = -at.info.pop('stress'+tag)*at.get_volume()
173 |         if convstr:
174 |             at.info['virial'+tag] = ' '.join(map(str, at.info.pop('virial'+tag).reshape(-1,order='F')))
175 | 
176 | def split_db(db, N=150, seed=12345):
177 |     db_ia = sel_by_conf_type(db, 'IsolatedAtoms')
178 |     db_im = sel_by_conf_type(db, 'IsolatedMolecules')
179 |     db_lc = sel_by_conf_type(db, 'LiquidConfigs')
180 |     per = np.random.RandomState(seed=seed).permutation(len(db_lc))
181 |     db1 = db_ia.copy()
182 |     db2 = db_ia.copy()
183 |     for i in range(len(db_lc)):
184 |         frame_lc = db_lc[i]
185 |         frame_im = sel_by_uid(db_im, frame_lc.info['uid'])
186 |         if i in per[:N]:
187 |             db1 += [frame_lc]
188 |             db1 += frame_im
189 |         else:
190 |             db2 += [frame_lc]
191 |             db2 += frame_im
192 |     return db1, db2
193 | 


--------------------------------------------------------------------------------
/aseMolec/pltProps.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import matplotlib.cm as cm
  3 | from matplotlib import gridspec
  4 | from matplotlib.ticker import StrMethodFormatter
  5 | import numpy as np
  6 | from aseMolec import extAtoms as ea
  7 | import re
  8 | 
  9 | # ### Standard Error of the Mean
 10 | # a = np.random.normal(size=100000)
 11 | # N = 10
 12 | # win = int(100000/N)
 13 | # m = np.mean(a.reshape([N,win]), axis=1)
 14 | # print(m.size)
 15 | # print(np.std(m))
 16 | # print(np.std(m)*np.sqrt(win))
 17 | # print(np.std(a))
 18 | def stats(v, win=1):
 19 |     N = np.floor(v.size/win).astype(int)
 20 |     v_win = v[:N*win].reshape(N,win)
 21 |     means = np.mean(v_win, axis=1)
 22 |     return np.mean(means), np.std(means), np.around((N*win*100)/v.size, 2)
 23 | 
 24 | def plot_prop(prop1, prop2, **kwargs):
 25 |     lmin = min(min(prop1), min(prop2))
 26 |     lmax = max(max(prop1), max(prop2))
 27 |     RMSE = np.sqrt(np.mean((prop1-prop2)**2))
 28 |     RRMSE = RMSE/np.sqrt(np.mean((prop1-np.mean(prop1))**2))
 29 |     if 'cols' in kwargs.keys():
 30 |         cols = kwargs['cols']
 31 |     else:
 32 |         cols = None
 33 |     if 'cmap' in kwargs.keys():
 34 |         cmap = kwargs['cmap']
 35 |     else:
 36 |         cmap = None
 37 |     if 'rel' in kwargs.keys():
 38 |         rel = kwargs['rel']
 39 |     else:
 40 |         rel = False
 41 |     if 'return_rel' in kwargs.keys():
 42 |         return_rel = kwargs['return_rel']
 43 |     else:
 44 |         return_rel = False
 45 |     if 'rast' in kwargs.keys():
 46 |         rast = kwargs['rast']
 47 |     else:
 48 |         rast = False
 49 |     if 'xy' in kwargs.keys():
 50 |         xy = kwargs['xy']
 51 |     else:
 52 |         xy = [0.35,0.04]
 53 |     plt.scatter(prop1, prop2, s=3.0, c=cols, cmap=cmap, rasterized=rast)
 54 |     if rel:
 55 |         plt.text(xy[0], xy[1], "  RMSE = {0:.4f}\nRRMSE = {1:.4f}".format(RMSE, RRMSE), transform=plt.gca().transAxes)
 56 |     else:
 57 |         plt.text(xy[0], xy[1], "  RMSE = {0:.4f}".format(RMSE), transform=plt.gca().transAxes)
 58 |     plt.xlim([lmin, lmax])
 59 |     plt.ylim([lmin, lmax])
 60 |     plt.plot([lmin, lmax], [lmin, lmax], '--', linewidth=1, color='gray')
 61 |     if 'title' in kwargs.keys():
 62 |         plt.title(kwargs['title'])
 63 |     if 'labs' in kwargs.keys():
 64 |         plt.xlabel(kwargs['labs'][0])
 65 |         plt.ylabel(kwargs['labs'][1])
 66 |     plt.gca().set_aspect('equal', adjustable='box')
 67 |     plt.gca().ticklabel_format(useOffset=False)
 68 |     if return_rel:
 69 |         return RMSE, RRMSE
 70 |     else:
 71 |         return RMSE
 72 | 
 73 | # col, Navg, legend, labs, title
 74 | def plot_traj(fnames, **kwargs):
 75 |     i = 0
 76 |     if 'col' in kwargs.keys():
 77 |         col = kwargs['col']
 78 |     else:
 79 |         col = 1
 80 |     if 'colors' in kwargs.keys():
 81 |         colors = kwargs['colors']
 82 |     else:
 83 |         colors = np.array(cm.get_cmap('tab10').colors)
 84 |     if 'alpha' in kwargs.keys():
 85 |         alpha = kwargs['alpha']
 86 |     else:
 87 |         alpha = 1.0
 88 |     if 'Navg' in kwargs.keys():
 89 |         N = kwargs['Navg']
 90 |     else:
 91 |         N = 1
 92 |     if 'Nsamp' in kwargs.keys():
 93 |         Nsamp = kwargs['Nsamp']
 94 |     else:
 95 |         Nsamp = 1000
 96 |     for f in fnames:
 97 |         thermo = np.loadtxt(f)
 98 |         y = np.convolve(thermo[:,col], np.ones(N)/N, mode='valid')
 99 |         ymin = round(np.ceil((N-1)/2))
100 |         ymax = -round(np.floor((N-1)/2))
101 |         if ymax == 0:
102 |             ymax = thermo.shape[0]
103 |         if 'legend' in kwargs.keys():
104 |             lb = kwargs['legend'][i]
105 |         else:
106 |             lb = None
107 |         plt.plot(thermo[ymin:ymax,0]/Nsamp, y, label=lb, color=colors[i,:], alpha=alpha)
108 |         if 'sel' in kwargs.keys():
109 |             sel = kwargs['sel']
110 |             if (i+1) in sel.keys():
111 |                 plt.scatter(thermo[sel[i+1],0]/Nsamp, y[np.array(sel[i+1])-ymin], marker='o', color=colors[i,:], s=50)
112 |         i += 1
113 |     if 'title' in kwargs.keys():
114 |         plt.title(kwargs['title'])
115 |     if 'labs' in kwargs.keys():
116 |         plt.xlabel(kwargs['labs'][0])
117 |         plt.ylabel(kwargs['labs'][1])
118 |     if lb:
119 |         plt.legend()
120 | 
121 | # col, start, bins, legend, labs, title
122 | def plot_hist(fnames, **kwargs):
123 |     avgs = []
124 |     stds = []
125 |     i = 0
126 |     if 'col' in kwargs.keys():
127 |         col = kwargs['col']
128 |     else:
129 |         col = 1
130 |     if 'colors' in kwargs.keys():
131 |         colors = kwargs['colors']
132 |     else:
133 |         colors = np.array(cm.get_cmap('tab10').colors)
134 |     if 'alpha' in kwargs.keys():
135 |         alpha = kwargs['alpha']
136 |     else:
137 |         alpha = 0.7
138 |     if 'bins' in kwargs.keys():
139 |         b = kwargs['bins']
140 |     else:
141 |         b = 100
142 |     if 'start' in kwargs.keys():
143 |         start = kwargs['start']
144 |     else:
145 |         start = 0
146 |     if 'orient' in kwargs.keys():
147 |         orientation=kwargs['orient']
148 |     else:
149 |         orientation='vertical'
150 |     if 'htype' in kwargs.keys():
151 |         htype = kwargs['htype']
152 |     else:
153 |         htype = 'step'
154 |     if 'Navg' in kwargs.keys():
155 |         Navg = kwargs['Navg']
156 |     else:
157 |         Navg = 1
158 |     if 'density' in kwargs.keys():
159 |         density = kwargs['density']
160 |     else:
161 |         density = False
162 |     if 'scale' in kwargs.keys():
163 |         scale = kwargs['scale']
164 |     else:
165 |         scale = 1
166 |     for f in fnames:
167 |         thermo = np.loadtxt(f)
168 |         if 'legend' in kwargs.keys():
169 |             lb = kwargs['legend'][i]
170 |         else:
171 |             lb = None
172 |         result = plt.hist(thermo[start:,col]/scale, bins=b, histtype=htype, label=lb, orientation=orientation, alpha=alpha, density=density, color=colors[i,:])
173 |         centers = result[1][:-1]+np.diff(result[1])/2
174 |         counts = result[0]
175 |         # avg = np.sum(centers*counts)/np.sum(counts)
176 |         # std = np.sqrt(np.sum(((centers-avg)**2)*counts)/np.sum(counts))
177 |         # avg = np.mean(thermo[start:,col])
178 |         # std = np.std(thermo[start:,col])
179 |         avg, std, _ = stats(thermo[start:,col], Navg)
180 |         avgs += [avg]
181 |         stds += [std]
182 |         if 'sel' in kwargs.keys():
183 |             sel = kwargs['sel']
184 |             if (i+1) in sel.keys():
185 |                 counts = np.histogram(thermo[start:,col], bins=b)
186 |                 ids = np.argmin(np.abs(counts[1][:-1].reshape(-1,1)-thermo[sel[i+1],col]), axis=0)
187 |                 plt.scatter(counts[1][ids], counts[0][ids], marker='o', color='C{}'.format(i), s=50)
188 |         i += 1
189 |     if 'title' in kwargs.keys():
190 |         plt.title(kwargs['title'])
191 |     if 'labs' in kwargs.keys():
192 |         plt.xlabel(kwargs['labs'][0])
193 |         plt.ylabel(kwargs['labs'][1])
194 |     if lb:
195 |         plt.legend()
196 |     return np.array(avgs), np.array(stds)
197 | 
198 | def plot_traj_hist(fnames, col=2, Navg=1, start=0, bins=50, legend=None, labs=None, ylims=None, colors=np.array([]), title=None, fs=10, Nsamp=4000):
199 |     plt.rcParams.update({'font.size': fs})
200 |     if not colors.size:
201 |         colors = np.array(cm.get_cmap('tab10').colors)
202 |     gs = gridspec.GridSpec(1, 2, width_ratios=[5, 1], wspace=0)
203 |     plt.subplot(gs[0])
204 |     if legend:
205 |         plot_traj(fnames, col=col, Navg=Navg, legend=legend, labs=labs[0:2], title=title, colors=colors, alpha=0.8)
206 |     else:
207 |         plot_traj(fnames, col=col, Navg=Navg, labs=labs[0:2], title=title, colors=colors, alpha=0.8)
208 |     if ylims:
209 |         plt.ylim(ylims)
210 |     ylim = plt.gca().get_ylim()
211 |     plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))
212 |     plt.subplot(gs[1])
213 |     avgs, errs = plot_hist(fnames, col=col, start=start, bins=bins, labs=labs[1:3], title=None, orient='horizontal', htype='stepfilled', colors=colors, alpha=0.8, Navg=Nsamp)
214 |     plt.ylim(ylim)
215 |     plt.xticks([])
216 |     plt.yticks([])
217 |     plt.xlabel('')
218 |     plt.ylabel('')
219 |     plt.tight_layout()
220 |     return avgs, errs
221 | 
222 | def plot_menvs(menvs, lb, **kwargs):
223 |     if 'nbins' in kwargs.keys():
224 |         nbins = kwargs['nbins']
225 |     else:
226 |         nbins = np.max(menvs[lb])
227 |     bins = np.vstack([np.array(range(nbins+1))]*menvs[lb].shape[1])
228 |     counts, coords = np.histogramdd(menvs[lb], bins=bins)
229 |     #later could expend to more dimensions, for now just implement for 2
230 |     #for more than 2, need to make a choice on how to project in lower dimension
231 |     if 'cmap' in kwargs.keys():
232 |         cmap = kwargs['cmap']
233 |     else:
234 |         cmap = 'viridis'
235 |     plt.pcolormesh(coords[0]-0.5, coords[1]-0.5, counts.T, cmap=cmap, edgecolors='grey')
236 |     plt.xticks(coords[0][:-1])
237 |     plt.yticks(coords[1][:-1])
238 |     if 'style' in kwargs.keys():
239 |         if kwargs['style']=='cbar':
240 |             plt.colorbar()
241 |         if kwargs['style']=='nums':
242 |             for i in range(nbins):
243 |                 for j in range(nbins):
244 |                     plt.text(i,j,'{0:d}'.format(int(counts[i, j])), ha='center', va='center')
245 |     if 'labs' in kwargs.keys():
246 |         plt.xlabel(kwargs['labs'][0])
247 |         plt.ylabel(kwargs['labs'][1])
248 |     if 'title' in kwargs.keys():
249 |         plt.title(kwargs['title'])
250 | 
251 | def plot_hist_thermo(thermos, **kwargs):
252 |     avgs = []
253 |     stds = []
254 |     i = 0
255 |     if 'col' in kwargs.keys():
256 |         col = kwargs['col']
257 |     else:
258 |         col = 1
259 |     if 'colors' in kwargs.keys():
260 |         colors = kwargs['colors']
261 |     else:
262 |         colors = np.array(cm.get_cmap('tab10').colors)
263 |     if 'alpha' in kwargs.keys():
264 |         alpha = kwargs['alpha']
265 |     else:
266 |         alpha = 0.7
267 |     if 'bins' in kwargs.keys():
268 |         b = kwargs['bins']
269 |     else:
270 |         b = 100
271 |     if 'start' in kwargs.keys():
272 |         start = kwargs['start']
273 |     else:
274 |         start = 0
275 |     if 'orient' in kwargs.keys():
276 |         orientation=kwargs['orient']
277 |     else:
278 |         orientation='vertical'
279 |     if 'htype' in kwargs.keys():
280 |         htype = kwargs['htype']
281 |     else:
282 |         htype = 'step'
283 |     if 'Navg' in kwargs.keys():
284 |         Navg = kwargs['Navg']
285 |     else:
286 |         Navg = 1
287 |     if 'density' in kwargs.keys():
288 |         density = kwargs['density']
289 |     else:
290 |         density = False
291 |     if 'scale' in kwargs.keys():
292 |         scale = kwargs['scale']
293 |     else:
294 |         scale = 1
295 |     for thermo in thermos:
296 |         if 'legend' in kwargs.keys():
297 |             lb = kwargs['legend'][i]
298 |         else:
299 |             lb = None
300 |         result = plt.hist(thermo[start:,col]/scale, bins=b, histtype=htype, label=lb, orientation=orientation, alpha=alpha, density=density, color=colors[i,:])
301 |         centers = result[1][:-1]+np.diff(result[1])/2
302 |         counts = result[0]
303 |         # avg = np.sum(centers*counts)/np.sum(counts)
304 |         # std = np.sqrt(np.sum(((centers-avg)**2)*counts)/np.sum(counts))
305 |         # avg = np.mean(thermo[start:,col])
306 |         # std = np.std(thermo[start:,col])
307 |         avg, std, _ = stats(thermo[start:,col], Navg)
308 |         avgs += [avg]
309 |         stds += [std]
310 |         if 'sel' in kwargs.keys():
311 |             sel = kwargs['sel']
312 |             if (i+1) in sel.keys():
313 |                 counts = np.histogram(thermo[start:,col], bins=b)
314 |                 ids = np.argmin(np.abs(counts[1][:-1].reshape(-1,1)-thermo[sel[i+1],col]), axis=0)
315 |                 plt.scatter(counts[1][ids], counts[0][ids], marker='o', color='C{}'.format(i), s=50)
316 |         i += 1
317 |     if 'title' in kwargs.keys():
318 |         plt.title(kwargs['title'])
319 |     if 'labs' in kwargs.keys():
320 |         plt.xlabel(kwargs['labs'][0])
321 |         plt.ylabel(kwargs['labs'][1])
322 |     if lb:
323 |         plt.legend()
324 |     return np.array(avgs), np.array(stds)
325 | 
326 | def plot_menvs(menvs, lb, **kwargs):
327 |     if 'nbins' in kwargs.keys():
328 |         nbins = kwargs['nbins']
329 |     else:
330 |         nbins = np.max(menvs[lb])
331 |     bins = np.vstack([np.array(range(nbins+1))]*menvs[lb].shape[1])
332 |     counts, coords = np.histogramdd(menvs[lb], bins=bins)
333 |     #later could expend to more dimensions, for now just implement for 2
334 |     #for more than 2, need to make a choice on how to project in lower dimension
335 |     if 'cmap' in kwargs.keys():
336 |         cmap = kwargs['cmap']
337 |     else:
338 |         cmap = 'viridis'
339 |     plt.pcolormesh(coords[0]-0.5, coords[1]-0.5, counts.T, cmap=cmap, edgecolors='grey')
340 |     plt.xticks(coords[0][:-1])
341 |     plt.yticks(coords[1][:-1])
342 |     if 'style' in kwargs.keys():
343 |         if kwargs['style']=='cbar':
344 |             plt.colorbar()
345 |         if kwargs['style']=='nums':
346 |             for i in range(nbins):
347 |                 for j in range(nbins):
348 |                     plt.text(i,j,'{0:d}'.format(int(counts[i, j])), ha='center', va='center')
349 |     if 'labs' in kwargs.keys():
350 |         plt.xlabel(kwargs['labs'][0])
351 |         plt.ylabel(kwargs['labs'][1])
352 |     if 'title' in kwargs.keys():
353 |         plt.title(kwargs['title'])
354 | 
355 | def plot_intra_inter(db1, db2, labs):
356 |     plt.figure(figsize=(15,12), dpi=200)
357 |     plt.subplot(3,3,1)
358 |     plot_prop(ea.get_prop(db1, 'info', 'energy_intram', True).flatten(), \
359 |               ea.get_prop(db2, 'info', 'energy_intram', True).flatten(), \
360 |               title=r'Intra Energy $(\rm eV/atom)$ ', labs=labs, rel=True)
361 |     plt.subplot(3,3,2)
362 |     plot_prop(ea.get_prop(db1, 'info', 'energy_interm', True).flatten(), \
363 |               ea.get_prop(db2, 'info', 'energy_interm', True).flatten(), \
364 |               title=r'Inter Energy $(\rm eV/atom)$ ', labs=labs, rel=True)
365 |     plt.subplot(3,3,3)
366 |     plot_prop(ea.get_prop(db1, 'info', 'energy', True).flatten(), \
367 |               ea.get_prop(db2, 'info', 'energy', True).flatten(), \
368 |               title=r'Total Energy $(\rm eV/atom)$ ', labs=labs, rel=True)
369 |     plt.subplot(3,3,4)
370 |     plot_prop(np.concatenate(ea.get_prop(db1, 'arrays', 'forces_intram')).flatten(), \
371 |               np.concatenate(ea.get_prop(db2, 'arrays', 'forces_intram')).flatten(), \
372 |               title=r'Intra Forces $\rm (eV/\AA)$ ', labs=labs, rel=True)
373 |     plt.subplot(3,3,5)
374 |     plot_prop(np.concatenate(ea.get_prop(db1, 'arrays', 'forces_interm')).flatten(), \
375 |               np.concatenate(ea.get_prop(db2, 'arrays', 'forces_interm')).flatten(), \
376 |               title=r'Inter Forces $\rm (eV/\AA)$ ', labs=labs, rel=True)
377 |     plt.subplot(3,3,6)
378 |     plot_prop(np.concatenate(ea.get_prop(db1, 'arrays', 'forces')).flatten(), \
379 |               np.concatenate(ea.get_prop(db2, 'arrays', 'forces')).flatten(), \
380 |               title=r'Total Forces $\rm (eV/\AA)$ ', labs=labs, rel=True)
381 |     plt.subplot(3,3,7)
382 |     plot_prop(ea.get_prop(db1, 'info', 'virial_intram', True).flatten(), \
383 |               ea.get_prop(db2, 'info', 'virial_intram', True).flatten(), \
384 |               title=r'Intra Virial $(\rm eV/atom)$ ', labs=labs, rel=True)
385 |     plt.subplot(3,3,8)
386 |     plot_prop(ea.get_prop(db1, 'info', 'virial_interm', True).flatten(), \
387 |               ea.get_prop(db2, 'info', 'virial_interm', True).flatten(), \
388 |               title=r'Inter Virial $(\rm eV/atom)$ ', labs=labs, rel=True)
389 |     plt.subplot(3,3,9)
390 |     plot_prop(ea.get_prop(db1, 'info', 'virial', True).flatten(), \
391 |               ea.get_prop(db2, 'info', 'virial', True).flatten(), \
392 |               title=r'Total Virial $(\rm eV/atom)$ ', labs=labs, rel=True)
393 |     plt.tight_layout()
394 |     
395 | def plot_trans_rot_vib(db1, db2, labs):
396 |     plt.figure(figsize=(16,5), dpi=200)
397 |     plt.subplot(1,4,1)
398 |     plot_prop(np.concatenate(ea.get_prop(db1, 'arrays', 'forces_trans')).flatten(), \
399 |               np.concatenate(ea.get_prop(db2, 'arrays', 'forces_trans')).flatten(), \
400 |               title=r'Translational Forces $\rm (eV/\AA)$ ', labs=labs, rel=True)
401 |     plt.subplot(1,4,2)
402 |     plot_prop(np.concatenate(ea.get_prop(db1, 'arrays', 'forces_rot')).flatten(), \
403 |               np.concatenate(ea.get_prop(db2, 'arrays', 'forces_rot')).flatten(), \
404 |               title=r'Rotational Forces $\rm (eV/\AA)$ ', labs=labs, rel=True)
405 |     plt.subplot(1,4,3)
406 |     plot_prop(np.concatenate(ea.get_prop(db1, 'arrays', 'forces_vib')).flatten(), \
407 |               np.concatenate(ea.get_prop(db2, 'arrays', 'forces_vib')).flatten(), \
408 |               title=r'Vibrational Forces $\rm (eV/\AA)$ ', labs=labs, rel=True)
409 |     plt.subplot(1,4,4)
410 |     plot_prop(np.concatenate(ea.get_prop(db1, 'arrays', 'forces')).flatten(), \
411 |               np.concatenate(ea.get_prop(db2, 'arrays', 'forces')).flatten(), \
412 |               title=r'Total Forces $\rm (eV/\AA)$ ', labs=labs, rel=True)
413 |     plt.tight_layout()
414 | 
415 | def plot_intra_inter_energy(db_test, db_pred):
416 |     E0_test = ea.get_E0(db_test)
417 |     E0_pred = ea.get_E0(db_pred)
418 |     db_test = ea.sel_by_conf_type(db_test, 'LiquidConfigs')
419 |     db_pred = ea.sel_by_conf_type(db_pred, 'LiquidConfigs')
420 | 
421 |     RMSE = {}
422 |     plt.rcParams.update({'font.size': 12})
423 |     plt.figure(figsize=(8,8), dpi=200)
424 |     plt.subplot(2,2,1)
425 |     RMSE['IntraEnergy'] = plot_prop(ea.get_prop(db_test, 'bind', '_intram', True, E0_test).flatten(), \
426 |                                     ea.get_prop(db_pred, 'bind', '_intram', True, E0_pred).flatten(), \
427 |                                     title='Intra Energy (ev/atom) ', labs=['DFT', 'GAP'])
428 |     plt.subplot(2,2,2)
429 |     RMSE['InterEnergy'] = plot_prop(ea.get_prop(db_test, 'info', 'energy_interm', True).flatten(), \
430 |                                     ea.get_prop(db_pred, 'info', 'energy_interm', True).flatten(), \
431 |                                     title='Inter Energy (ev/atom) ', labs=['DFT', 'GAP'])
432 |     plt.subplot(2,2,3)
433 |     # RMSE['AtomEnergy'] = plot_prop(np.array([E0_test[k] for k in E0_test]), \
434 |     #                                np.array([E0_pred[k] for k in E0_pred]), \
435 |     #                                title='Atomic Energy (ev/atom) ', labs=['DFT', 'GAP'])
436 |     RMSE['AtomEnergy'] = plot_prop(ea.get_prop(db_test, 'atom', peratom=True, E0=E0_test).flatten(), \
437 |                                    ea.get_prop(db_pred, 'atom', peratom=True, E0=E0_pred).flatten(), \
438 |                                    title='Atom Energy (ev/atom) ', labs=['DFT', 'GAP'])
439 |     plt.subplot(2,2,4)
440 |     RMSE['TotalEnergy'] = plot_prop(ea.get_prop(db_test, 'info', 'energy', True).flatten(), \
441 |                                     ea.get_prop(db_pred, 'info', 'energy', True).flatten(), \
442 |                                     title='Total Energy (ev/atom) ', labs=['DFT', 'GAP'])
443 |     plt.tight_layout(pad=0.5)
444 |     plt.savefig('energy.png')
445 |     plt.close()
446 |     return RMSE
447 | 
448 | def plot_intra_inter_forces(db_test, db_pred):
449 |     db_test = ea.sel_by_conf_type(db_test, 'LiquidConfigs')
450 |     db_pred = ea.sel_by_conf_type(db_pred, 'LiquidConfigs')
451 |     elms = np.array([el for at in db_test for el in at.get_chemical_symbols()])
452 |     Nelms = np.unique(elms).size
453 | 
454 |     RMSE = {}
455 |     plt.rcParams.update({'font.size': 12})
456 |     plt.figure(figsize=((Nelms+1)*4,3*4), dpi=200)
457 |     for i,el in enumerate(np.unique(elms)):
458 |         plt.subplot(3,Nelms+1,i+1)
459 |         RMSE['IntraForces'+el] = plot_prop(np.concatenate(ea.get_prop(db_test, 'arrays', 'forces_intram'))[elms==el,:].flatten(), \
460 |                                            np.concatenate(ea.get_prop(db_pred, 'arrays', 'forces_intram'))[elms==el,:].flatten(), \
461 |                                            title=el+'\nIntra Forces (ev/A) ', labs=['DFT', 'GAP'])
462 |         plt.subplot(3,Nelms+1,(Nelms+1)+i+1)
463 |         RMSE['InterForces'+el] = plot_prop(np.concatenate(ea.get_prop(db_test, 'arrays', 'forces_interm'))[elms==el,:].flatten(), \
464 |                                            np.concatenate(ea.get_prop(db_pred, 'arrays', 'forces_interm'))[elms==el,:].flatten(), \
465 |                                            title='Inter Forces (ev/A) ', labs=['DFT', 'GAP'])
466 |         plt.subplot(3,Nelms+1,2*(Nelms+1)+i+1)
467 |         RMSE['TotalForces'+el] = plot_prop(np.concatenate(ea.get_prop(db_test, 'arrays', 'forces'))[elms==el,:].flatten(), \
468 |                                            np.concatenate(ea.get_prop(db_pred, 'arrays', 'forces'))[elms==el,:].flatten(), \
469 |                                            title='Total Forces (ev/A) ', labs=['DFT', 'GAP'])
470 |     plt.subplot(3,Nelms+1,(Nelms+1))
471 |     RMSE['IntraForces'] = plot_prop(np.concatenate(ea.get_prop(db_test, 'arrays', 'forces_intram')).flatten(), \
472 |                                     np.concatenate(ea.get_prop(db_pred, 'arrays', 'forces_intram')).flatten(), \
473 |                                     title='Intra Forces (ev/A) ', labs=['DFT', 'GAP'])
474 |     plt.subplot(3,Nelms+1,2*(Nelms+1))
475 |     RMSE['InterForces'] = plot_prop(np.concatenate(ea.get_prop(db_test, 'arrays', 'forces_interm')).flatten(), \
476 |                                     np.concatenate(ea.get_prop(db_pred, 'arrays', 'forces_interm')).flatten(), \
477 |                                     title='Inter Forces (ev/A) ', labs=['DFT', 'GAP'])
478 |     plt.subplot(3,Nelms+1,3*(Nelms+1))
479 |     RMSE['TotalForces'] = plot_prop(np.concatenate(ea.get_prop(db_test, 'arrays', 'forces')).flatten(), \
480 |                                     np.concatenate(ea.get_prop(db_pred, 'arrays', 'forces')).flatten(), \
481 |                                     title='Total Forces (ev/A) ', labs=['DFT', 'GAP'])
482 |     plt.tight_layout(pad=0.5)
483 |     plt.savefig('forces.png')
484 |     plt.close()
485 |     return RMSE
486 | 
487 | def plot_intra_inter_virial(db_test, db_pred):
488 |     db_test = ea.sel_by_conf_type(db_test, 'LiquidConfigs')
489 |     db_pred = ea.sel_by_conf_type(db_pred, 'LiquidConfigs')
490 | 
491 |     RMSE = {}
492 |     plt.rcParams.update({'font.size': 12})
493 |     plt.figure(figsize=(12,4), dpi=200)
494 |     plt.subplot(1,3,1)
495 |     RMSE['IntraVirial'] = plot_prop(ea.get_prop(db_test, 'info', 'virial_intram', True).flatten(), \
496 |                                     ea.get_prop(db_pred, 'info', 'virial_intram', True).flatten(), \
497 |                                     title='Intra Virial (ev/atom) ', labs=['DFT', 'GAP'])
498 |     plt.subplot(1,3,2)
499 |     RMSE['InterVirial'] = plot_prop(ea.get_prop(db_test, 'info', 'virial_interm', True).flatten(), \
500 |                                     ea.get_prop(db_pred, 'info', 'virial_interm', True).flatten(), \
501 |                                     title='Inter Virial (ev/atom) ', labs=['DFT', 'GAP'])
502 |     plt.subplot(1,3,3)
503 |     RMSE['TotalVirial'] = plot_prop(ea.get_prop(db_test, 'info', 'virial', True).flatten(), \
504 |                                     ea.get_prop(db_pred, 'info', 'virial', True).flatten(), \
505 |                                     title='Total Virial (ev/atom) ', labs=['DFT', 'GAP'])
506 |     plt.tight_layout(pad=0.5)
507 |     plt.savefig('virial.png')
508 |     plt.close()
509 |     return RMSE
510 | 
511 | def loadtxttag(fname):
512 |     with open(fname, 'r') as file:
513 |         comment = file.readline()
514 |         header = file.readline().split()
515 |         assert header[0] == '#'            
516 |         fields = header[1:]
517 |     dat = np.loadtxt(fname)
518 |     db = dict()
519 |     for i, aux in enumerate(fields):
520 |         buf = re.findall('\((.*?)\)', aux)
521 |         if buf:
522 |             fld = aux[:-len(buf[0])-2]
523 |             db[fld] = {'units':buf[0], 'data':dat[:,i]}
524 |         else:
525 |             fld = aux
526 |             db[fld] = {'data':dat[:,i]}
527 |     return db
528 | 
529 | def convert_units(dat, key, units, fact):
530 |     for k in dat:
531 |         dat[k][key]['units'] = units
532 |         dat[k][key]['data'] *= fact
533 | 
534 | def rename_key(dat, key_old, key_new):
535 |     for k in dat:
536 |         dat[k][key_new] = dat[k].pop(key_old)
537 | 
538 | def simpleplot(db, i, j, byKey=False, **kwargs):
539 |     if byKey:
540 |         k1 = i
541 |         k2 = j
542 |     else:
543 |         keys = list(db)
544 |         k1 = keys[i]
545 |         k2 = keys[j]
546 |     if 'units' in db[k1]:
547 |         u1 = ' ('+db[k1]['units']+')'
548 |     else:
549 |         u1 = ''
550 |     if 'units' in db[k2]:
551 |         u2 = ' ('+db[k2]['units']+')'
552 |     else:
553 |         u2 = ''
554 |     if 'skip' in kwargs:
555 |         skip = kwargs.pop('skip')
556 |     else:
557 |         skip = 0
558 |     plt.plot(db[k1]['data'][skip:], db[k2]['data'][skip:], **kwargs)
559 |     plt.xlabel(k1+u1)
560 |     plt.ylabel(k2+u2)
561 |     
562 | def listdict_to_dictlist(dct):
563 |     Ns = np.array([len(v) for v in dct.values() if type(v)==list])
564 |     if Ns.size==0:
565 |         Ns = np.array([1]) #edge case: none of the dct elements is a list
566 |     assert np.all(Ns==Ns[0]) #check that all lists are the same size
567 |     dlist = []
568 |     for i in range(Ns[0]):
569 |         d = {}
570 |         for k in list(dct):
571 |             if type(dct[k])==list:
572 |                 d[k] = dct[k][i]
573 |             else: #if element is not a list, replicate values
574 |                 d[k] = dct[k]
575 |         dlist += [d]
576 |     return dlist
577 | 
578 | def multiplot(db, i, jcol, **kwargs):
579 |     keys = list(db)
580 |     unts = db[keys[jcol[0]]]['units']
581 |     for k,j in enumerate(jcol):
582 |         if kwargs:
583 |             kwarg_list = listdict_to_dictlist(kwargs)
584 |             assert len(kwarg_list)==len(jcol)
585 |             simpleplot(db, i, j, label=keys[j], **kwarg_list[k])
586 |         else:
587 |             simpleplot(db, i, j, label=keys[j])
588 |         assert unts==db[keys[j]]['units']
589 |     plt.ylabel('Series ('+unts+')')
590 |     plt.legend()
591 | 


--------------------------------------------------------------------------------
/aseMolec/anaAtoms.py:
--------------------------------------------------------------------------------
  1 | from ase import neighborlist
  2 | from ase import Atoms
  3 | import ase.geometry
  4 | from scipy import sparse
  5 | import numpy as np
  6 | from aseMolec import extAtoms as ea
  7 | import scipy.spatial
  8 | from ase.ga.utilities import get_rdf
  9 | import ase.data
 10 | import warnings
 11 | from collections import Counter
 12 | import hashlib
 13 | chem_syms = ase.data.chemical_symbols
 14 | 
 15 | #extends fct to dictionary if needed
 16 | def modif_natural_cutoffs(at, fct):
 17 |     if type(fct) is int or type(fct) is float:
 18 |         return neighborlist.natural_cutoffs(at, mult=fct)
 19 |     elif type(fct) is dict:
 20 |         cutOff = neighborlist.natural_cutoffs(at, mult=1)
 21 |         newCutOff = []
 22 |         for ctf, el in zip(cutOff, at.get_chemical_symbols()):
 23 |             if el in fct:
 24 |                 newCutOff += [ctf*fct[el]]
 25 |             else:
 26 |                 newCutOff += [ctf]
 27 |         return newCutOff
 28 |     else:
 29 |         raise NameError('Unknown fct type '+str(type(fct)))
 30 | 
 31 | #returns molecular name based on formula
 32 | def mol_chem_name(formula):
 33 |     if formula=='C3H4O3':
 34 |         return 'EC'
 35 |     elif formula=='C4H6O3':
 36 |         return 'PC'
 37 |     elif formula=='C3H2O3':
 38 |         return 'VC'
 39 |     elif formula=='C4H8O3':
 40 |         return 'EMC'
 41 |     elif formula=='C3H6O3':
 42 |         return 'DMC'
 43 |     elif formula=='C5H10O3':
 44 |         return 'DEC'
 45 |     elif formula=='Li':
 46 |         return 'Li'
 47 |     elif formula=='F6P':
 48 |         return 'PF6'
 49 |     elif len(formula)==1:
 50 |         return formula
 51 |     else:
 52 |         return 'UNK_'+formula
 53 | 
 54 | def mol_config(molSym):
 55 |     d = dict(Counter(molSym))
 56 |     name = ''
 57 |     name_UNK = ''
 58 |     for k in sorted(list(d.keys())):
 59 |         if 'UNK_' in k:
 60 |             name_UNK += (k + '(%d)' % d[k] + ':')
 61 |         else:
 62 |             name += (k + '(%d)' % d[k] + ':')
 63 |     return (name[:-1]+'---'*(not len(name_UNK)==0)+name_UNK[:-1])
 64 | 
 65 | #computes molID for single config, not adding molID to atoms.arrays
 66 | def find_molec(at, fct=1.0):
 67 |     #from https://wiki.fysik.dtu.dk/ase/ase/neighborlist.html
 68 |     cutOff = modif_natural_cutoffs(at, fct)
 69 |     nbLst = neighborlist.NeighborList(cutOff, self_interaction=False, bothways=True)
 70 |     nbLst.update(at)
 71 |     conMat = nbLst.get_connectivity_matrix(sparse=True)
 72 |     Nmol, molID = sparse.csgraph.connected_components(conMat)
 73 |     Natoms, Nmols = np.unique(np.unique(molID, return_counts=True)[1], return_counts=True)
 74 |     return list(zip(Nmols,Natoms))
 75 | 
 76 | #computes molIDs
 77 | def find_molecs(db, fct=1.0, return_mask=False):
 78 |     masks = []
 79 |     for at in db:
 80 |         #from https://wiki.fysik.dtu.dk/ase/ase/neighborlist.html
 81 |         cutOff = modif_natural_cutoffs(at, fct)
 82 |         nbLst = neighborlist.NeighborList(cutOff, self_interaction=False, bothways=True)
 83 |         nbLst.update(at)
 84 |         conMat = nbLst.get_connectivity_matrix(sparse=True)
 85 |         Nmol, molID = sparse.csgraph.connected_components(conMat)
 86 |         at.arrays['molID'] = molID
 87 |         if return_mask:
 88 |             mask = np.zeros([len(molID)]*2)
 89 |             for mID in np.unique(molID):
 90 |                 mask += (((molID==mID).reshape(1,-1))*((molID==mID).reshape(-1,1))).astype(int)
 91 |             masks += [mask]
 92 |     if return_mask:
 93 |         return masks
 94 | 
 95 | #computes number of neighbours
 96 | def find_num_nb(db, Rcut=6.0):
 97 |     NumNbs = []
 98 |     for at in db:
 99 |         nbLst = neighborlist.NeighborList([Rcut/2.0]*len(at), self_interaction=False, bothways=True)
100 |         nbLst.update(at)
101 |         conMat = nbLst.get_connectivity_matrix(sparse=False)
102 |         NumNbs += list(np.sum(conMat, axis=0))
103 |     return np.array(NumNbs)
104 | 
105 | #extracts molecules CM into a new trajectory without changing any coordinates
106 | #designed mainly for extracting diffusion coefficients, assumes no wrapping
107 | #assumes molIDs exist and molecules are full !!!IMPORTANT, calculation of CM, intertia, torque ... !!!
108 | #this is a bit redundant with wrap_molecs, maybe could be combined in the future
109 | def extract_molecs(db, fct=1, intra_inter=False, prog=False):
110 |     moldb = []
111 |     for i, at in enumerate(db):
112 |         if prog:
113 |             print(i)
114 |         if 'molID' not in at.arrays.keys():
115 |             find_molecs([at], fct=fct)
116 |         molID = at.arrays['molID']
117 |         molCM = []
118 |         molM = []
119 |         molI = []
120 |         molSym = []
121 |         molQ = []
122 |         molD = []
123 |         molF = []
124 |         molT = []
125 |         atftrn = []
126 |         atfrot = []
127 |         for m in np.unique(molID):
128 |             mol = at[molID==m] #copy by value
129 |             mass = mol.get_masses()
130 |             cm = np.sum(mol.positions*mass.reshape(-1,1), axis=0)/np.sum(mass)
131 |             M = np.sum(mass)
132 |             I = mom_inertia(mol)
133 |             molCM.append(cm)
134 |             molM.append(M)
135 |             molI.append(I.flatten())
136 |             molSym.append(mol_chem_name(mol.symbols.get_chemical_formula()))
137 |             if 'initial_charges' in at.arrays:
138 |                 charge = mol.arrays['initial_charges']
139 |                 D = np.sum((mol.positions-cm)*charge.reshape(-1,1), axis=0) #subtract cm, so dipole is also correct for charges molecules, e.g. PF6
140 |                 molQ.append(np.sum(charge))
141 |                 molD.append(D)
142 |             if 'atomic_dipoles' in at.arrays:
143 |                 molD[-1] += mol.arrays['atomic_dipoles'].sum(0)
144 |             if 'forces' in at.arrays:
145 |                 Fcm = np.sum(mol.arrays['forces'], axis=0)
146 |                 Tcm = np.sum(np.cross(mol.positions-cm, mol.arrays['forces'], axis=1), axis=0)
147 |                 ftrn = mass.reshape(-1,1)/M*Fcm #redistributed to atoms
148 |                 if np.allclose(I, 0, atol=1e-6): #this is the case for molecules made of single atoms: Li-ion
149 |                     frot = np.zeros([1,3])
150 |                 #WARNING: not implemented yet: handles linear molecules (singular moment of inertia) by setting frot to zero
151 |                 elif np.allclose(np.linalg.det(I), 0, atol=1e-6):
152 |                     frot = np.zeros([len(mol),3])
153 |                 else:
154 |                     frot = mass.reshape(-1,1)*np.cross(np.linalg.solve(I, Tcm),mol.positions-cm) #reditributed to atoms
155 |                 molF.append(Fcm)
156 |                 molT.append(Tcm)
157 |                 atftrn.append(ftrn)
158 |                 atfrot.append(frot)
159 |         newmol = Atoms(positions=np.array(molCM), pbc=True, cell=at.cell)
160 |         newmol.set_masses(molM)
161 |         newmol.arrays['molSym'] = np.array(molSym)
162 |         newmol.arrays['momInertia'] = np.array(molI)
163 |         if molQ:
164 |             newmol.arrays['initial_charges'] = np.array(molQ)
165 |             newmol.arrays['dipoles'] = np.array(molD)
166 |             newmol.arrays['dipoles_abs'] = np.sqrt(np.sum(np.array(molD)**2, axis=1))/0.2081943 #from e*A to Debye
167 |         if molF:
168 |             newmol.arrays['forces'] = np.array(molF)
169 |             newmol.arrays['torques'] = np.array(molT)
170 |         moldb.append(newmol)
171 |         if intra_inter:
172 |             at.arrays['forces_trans'] = np.concatenate(atftrn)
173 |             at.arrays['forces_rot'] = np.concatenate(atfrot)
174 |             at.arrays['forces_vib'] = at.arrays['forces']-at.arrays['forces_trans']-at.arrays['forces_rot']
175 |     return moldb
176 | 
177 | def extract_molecs_tag(db, fct=1, intra_inter=False, prog=False, tag=''):
178 |     moldb = []
179 |     for i, at in enumerate(db):
180 |         if prog:
181 |             print(i)
182 |         if 'molID' not in at.arrays.keys():
183 |             find_molecs([at], fct=fct)
184 |         molID = at.arrays['molID']
185 |         molCM = []
186 |         molM = []
187 |         molI = []
188 |         molSym = []
189 |         molQ = []
190 |         molD = []
191 |         molF = []
192 |         molT = []
193 |         atftrn = []
194 |         atfrot = []
195 |         for m in np.unique(molID):
196 |             mol = at[molID==m] #copy by value
197 |             mass = mol.get_masses()
198 |             cm = np.sum(mol.positions*mass.reshape(-1,1), axis=0)/np.sum(mass)
199 |             M = np.sum(mass)
200 |             I = mom_inertia(mol)
201 |             molCM.append(cm)
202 |             molM.append(M)
203 |             molI.append(I.flatten())
204 |             molSym.append(mol_chem_name(mol.symbols.get_chemical_formula()))
205 |             if tag+'initial_charges' in at.arrays:
206 |                 charge = mol.arrays[tag+'initial_charges']
207 |                 D = np.sum((mol.positions-cm)*charge.reshape(-1,1), axis=0) #subtract cm, so dipole is also correct for charges molecules, e.g. PF6
208 |                 molQ.append(np.sum(charge))
209 |                 molD.append(D)
210 |             if tag+'forces' in at.arrays:
211 |                 Fcm = np.sum(mol.arrays[tag+'forces'], axis=0)
212 |                 Tcm = np.sum(np.cross(mol.positions-cm, mol.arrays[tag+'forces'], axis=1), axis=0)
213 |                 ftrn = mass.reshape(-1,1)/M*Fcm #redistributed to atoms
214 |                 if np.allclose(I, 0, atol=1e-6): #this is the case for molecules made of single atoms: Li-ion
215 |                     frot = np.zeros([1,3])
216 |                 #WARNING: not implemented yet: handles linear molecules (singular moment of inertia) by setting frot to zero
217 |                 elif np.allclose(np.linalg.det(I), 0, atol=1e-6):
218 |                     frot = np.zeros([len(mol),3])
219 |                 else:
220 |                     frot = mass.reshape(-1,1)*np.cross(np.linalg.solve(I, Tcm),mol.positions-cm) #reditributed to atoms
221 |                 molF.append(Fcm)
222 |                 molT.append(Tcm)
223 |                 atftrn.append(ftrn)
224 |                 atfrot.append(frot)
225 |         newmol = Atoms(positions=np.array(molCM), pbc=True, cell=at.cell)
226 |         newmol.set_masses(molM)
227 |         newmol.arrays['molSym'] = np.array(molSym)
228 |         newmol.arrays['momInertia'] = np.array(molI)
229 |         if molQ:
230 |             newmol.arrays[tag+'initial_charges'] = np.array(molQ)
231 |             newmol.arrays[tag+'dipoles'] = np.array(molD)
232 |             newmol.arrays[tag+'dipoles_abs'] = np.sqrt(np.sum(np.array(molD)**2, axis=1))/0.2081943 #from e*A to Debye
233 |         if molF:
234 |             newmol.arrays[tag+'forces'] = np.array(molF)
235 |             newmol.arrays[tag+'torques'] = np.array(molT)
236 |         moldb.append(newmol)
237 |         if intra_inter:
238 |             at.arrays[tag+'forces_trans'] = np.concatenate(atftrn)
239 |             at.arrays[tag+'forces_rot'] = np.concatenate(atfrot)
240 |             at.arrays[tag+'forces_vib'] = at.arrays[tag+'forces']-at.arrays[tag+'forces_trans']-at.arrays[tag+'forces_rot']
241 |     return moldb
242 | 
243 | #assumes coordinates are unwrapped
244 | def mom_inertia(mol):
245 |     m = mol.get_masses()
246 |     rcm = np.sum(m.reshape(-1,1)*mol.positions,axis=0)/np.sum(m)
247 |     r = mol.positions-rcm
248 |     I = np.zeros([3,3])
249 |     I[0,0] = np.sum(m*(r[:,1]**2+r[:,2]**2))
250 |     I[1,1] = np.sum(m*(r[:,0]**2+r[:,2]**2))
251 |     I[2,2] = np.sum(m*(r[:,0]**2+r[:,1]**2))
252 |     I[0,1] = -np.sum(m*r[:,0]*r[:,1])
253 |     I[0,2] = -np.sum(m*r[:,0]*r[:,2])
254 |     I[1,2] = -np.sum(m*r[:,1]*r[:,2])
255 |     I[1,0] = I[0,1]
256 |     I[2,0] = I[0,2]
257 |     I[2,1] = I[1,2]
258 |     return I
259 | 
260 | #wraps single molecule: completes molecule over pbc and sfits COM back to unit cell
261 | def wrap_molec(mol, fct=1.0, full=False):
262 |     if not full:
263 |         cutOff = modif_natural_cutoffs(mol, fct)
264 |         nbLst = neighborlist.NeighborList(cutOff, self_interaction=False, bothways=True)
265 |         visited = []
266 |         tovisit = [0]
267 |         while tovisit:
268 |             i = tovisit.pop(0)
269 |             nbLst.update(mol)
270 |             nbs, vecs = nbLst.get_neighbors(i)
271 |             for j, v in zip(nbs, vecs):
272 |                 if (j not in visited) and (j not in tovisit):
273 |                     mol.positions[j,:] += np.dot(v, mol.cell)
274 |                     tovisit.append(j)
275 |             visited.append(i)
276 |     m = mol.get_masses()
277 |     cm = np.sum(mol.positions*m.reshape(-1,1), axis=0)/np.sum(m)
278 |     wrap_cm = ase.geometry.wrap_positions(positions=[cm], cell=mol.cell, pbc=mol.pbc)[0]
279 |     mol.positions += (wrap_cm-cm)
280 |     return wrap_cm
281 | 
282 | #wraps all molecules over a list of configurations
283 | def wrap_molecs(db, fct=1.0, full=False, prog=False, returnMols=False):
284 |     moldb = []
285 |     iter = 0
286 |     for at in db:
287 |         if prog:
288 |             iter += 1
289 |             print(iter)
290 |         if 'molID' not in at.arrays.keys():
291 |             find_molecs([at], fct)
292 |         molID = at.arrays['molID']
293 |         molCM = []
294 |         molSym = []
295 |         for m in np.unique(molID):
296 |             mol = at[molID==m] #copy by value
297 |             cm = wrap_molec(mol, fct, full)
298 |             #at[molID==m].positions = mol.positions #does not work at[molID==m] is not a ref
299 |             at.positions[molID==m,:] = mol.positions
300 |             molCM.append(cm)
301 |             molSym.append(mol_chem_name(mol.symbols.get_chemical_formula()))
302 |         newmol = Atoms(positions=np.array(molCM), pbc=True, cell=at.cell)
303 |         newmol.arrays['molSym'] = np.array(molSym)
304 |         at.info['Nmols'] = len(newmol)
305 |         at.info['Comp'] = mol_config(newmol.arrays['molSym'])
306 |         moldb.append(newmol)
307 |     if returnMols:
308 |         return moldb
309 | 
310 | #wraps all molecules over a list of configurations, given a mask
311 | def wrap_molecs_partial(db, fct=1.0, full=False, prog=False, mask=None, wrap=True):
312 |     iter = 0
313 |     for at in db:
314 |         if prog:
315 |             iter += 1
316 |             print(iter)
317 |         if 'molID' not in at.arrays.keys():
318 |             masked_at = at[mask]
319 |             find_molecs([masked_at], fct)
320 |             at.arrays['molID'] = np.array([-1]*len(at))
321 |             at.arrays['molID'][mask] = masked_at.arrays['molID']
322 |         molID = at.arrays['molID']
323 |         at.arrays['molSym'] = np.array(['None']*len(at))
324 |         at.arrays['molHash'] = np.array(['0000']*len(at))
325 |         for m in np.unique(molID):
326 |             if m>=0:
327 |                 mol = at[molID==m] #copy by value
328 |                 wrap_molec(mol, fct, full)
329 |                 if wrap: #this is counterintuitive given the name of the function, but for now it's the easiet way to achieve this
330 |                     at.positions[molID==m,:] = mol.positions
331 |                 molSym = mol.symbols.get_chemical_formula()
332 |                 at.arrays['molSym'][molID==m] = molSym
333 |                 at.arrays['molHash'][molID==m] = hashlib.sha256(molSym.encode()).hexdigest()[:4]
334 | 
335 | #splits condensed phase into separate molecules
336 | def split_molecs(db, scale=1.0):
337 |     wrap_molecs(db, fct=1.0, full=False, prog=False)
338 |     ea.hash_atoms(db)
339 |     smdb = []
340 |     if isinstance(scale, float):
341 |         scale = np.ones(len(db))*scale
342 |     for i, at in enumerate(db):
343 |         molID = at.arrays['molID']
344 |         for m in np.unique(molID):
345 |             buf = at[molID==m] #copy by value
346 |             buf.cell *= scale[i]
347 |             smdb += [buf]
348 |     return smdb
349 | 
350 | #splits condensed phase into dictionary of molecules by type
351 | def split_molecs_dict(db, L=20.0):
352 |     wrap_molecs(db, fct=1.0, full=False, prog=False)
353 |     ea.hash_atoms(db)
354 |     smdb = {}
355 |     for i, at in enumerate(db):
356 |         molID = at.arrays['molID']
357 |         for m in np.unique(molID):
358 |             buf = at[molID==m] #copy by value
359 |             buf.cell = [L,L,L]
360 |             buf.center()
361 |             buf.info['mID'] = m
362 |             molSym = mol_chem_name(buf.get_chemical_formula())
363 |             if molSym in smdb:
364 |                 smdb[molSym] += [buf]
365 |             else:
366 |                 smdb[molSym] = [buf]
367 |     return smdb
368 | 
369 | #collects intra- and inter- molecular contributions
370 | #checked in 02-DFTcalcs-Castep/10-EC4-EMC8-singleMolec-PBEG06/FiniteSizeCheck
371 | #intra-virial is volume independent, while intra-stress is not
372 | #therefore decomposition only works for the virial
373 | def collect_molec_results(db, smdb, fext='', dryrun=False, molecEng=True):
374 |     for at in db:
375 |         sel = ea.sel_by_uid(smdb, at.info['uid']) #assumes molecules are in the original condensed phase order
376 |         if dryrun:
377 |             print(np.sum(np.abs(at.positions - np.concatenate(ea.get_prop(sel,'arrays','positions'))))) #check if that was true
378 |         else:
379 |             if molecEng:
380 |                 at.info['energy'+fext+'_intram_mol'] = ea.get_prop(sel, 'calc', 'energy'+fext)
381 |             at.info['energy'+fext+'_intram'] = sum(ea.get_prop(sel, 'calc', 'energy'+fext))
382 |             if ('virial'+fext) in at.info.keys():
383 |                 at.info['virial'+fext+'_intram'] = sum(ea.get_prop(sel, 'info', 'virial'+fext))
384 |             at.arrays['forces'+fext+'_intram'] = np.concatenate(ea.get_prop(sel, 'calc', 'forces'+fext)).astype(float)
385 |             at.info['energy'+fext+'_interm'] = at.calc.__dict__["results"]['energy'+fext]-at.info['energy'+fext+'_intram']
386 |             if ('virial'+fext) in at.info.keys():
387 |                 at.info['virial'+fext+'_interm'] = at.info['virial'+fext]-at.info['virial'+fext+'_intram']
388 |             at.arrays['forces'+fext+'_interm'] = at.calc.__dict__["results"]['forces'+fext]-at.arrays['forces'+fext+'_intram']
389 | 
390 | #collects molecules without assuming any order, but expects mID in info
391 | def collect_molec_results_dict(db, smdb, fext='', dryrun=False):
392 |     for at in db:
393 |         uid = at.info['uid']
394 |         sm = ea.sel_by_uid(smdb, uid)
395 |         idx = np.argsort(ea.get_prop(sm, 'info', 'mID'))
396 |         sel = [sm[i] for i in idx]
397 |         #check positions differ only up to a translation
398 |         if dryrun:
399 |             molID = at.arrays['molID']
400 |             mdiffs = []
401 |             for m in np.unique(at.arrays['molID']):
402 |                 diffs = at.positions[molID==m,:]-sel[m].positions
403 |                 mdiffs += [np.max(np.abs(diffs-diffs[0,:]))]
404 |             print(np.max(mdiffs))
405 |         else:
406 |             if fext=="":
407 |                 at.info['energy'+fext+'_intram'] = sum(ea.get_prop(sel, 'calc', 'energy'+fext))
408 |                 at.info['virial'+fext+'_intram'] = sum(ea.get_prop(sel, 'info', 'virial'+fext))
409 |                 at.arrays['forces'+fext+'_intram'] = np.concatenate(ea.get_prop(sel, 'calc', 'forces'+fext)).astype(float)
410 |                 at.info['energy'+fext+'_interm'] = at.calc.__dict__["results"]['energy'+fext]-at.info['energy'+fext+'_intram']
411 |                 at.info['virial'+fext+'_interm'] = at.info['virial'+fext]-at.info['virial'+fext+'_intram']
412 |                 at.arrays['forces'+fext+'_interm'] = at.calc.__dict__["results"]['forces'+fext]-at.arrays['forces'+fext+'_intram']
413 |                 if ('initial_charges'+fext) in at.info.keys():
414 |                     at.arrays['initial_charges'+fext+'_intram'] = np.concatenate(ea.get_prop(sel, 'arrays', 'initial_charges'+fext)).astype(float)
415 |                     at.arrays['initial_charges'+fext+'_interm'] = at.arrays['initial_charges'+fext]-at.arrays['initial_charges'+fext+'_intram']
416 |             else:
417 |                 at.info['energy'+fext+'_intram'] = sum(ea.get_prop(sel, 'calc', 'energy'+fext))
418 |                 at.info['virial'+fext+'_intram'] = sum(ea.get_prop(sel, 'info', 'virial'+fext))
419 |                 at.arrays['forces'+fext+'_intram'] = np.concatenate(ea.get_prop(sel, 'calc', 'forces'+fext)).astype(float)
420 |                 at.info['energy'+fext+'_interm'] = at.calc.__dict__["results"]['energy'+fext]-at.info['energy'+fext+'_intram']
421 |                 at.info['virial'+fext+'_interm'] = at.info['virial'+fext]-at.info['virial'+fext+'_intram']
422 |                 at.arrays['forces'+fext+'_interm'] = at.calc.__dict__["results"]['forces'+fext]-at.arrays['forces'+fext+'_intram']
423 |                 if ('initial_charges'+fext) in at.info.keys():
424 |                     at.arrays['initial_charges'+fext+'_intram'] = np.concatenate(ea.get_prop(sel, 'arrays', 'initial_charges'+fext)).astype(float)
425 |                     at.arrays['initial_charges'+fext+'_interm'] = at.arrays['initial_charges'+fext]-at.arrays['initial_charges'+fext+'_intram']
426 | 
427 | #starting from one configuration, adjusts the volume according to vol_fracs
428 | def scan_vol(at, vol_fracs, frozen=True):
429 |     db = []
430 |     lat_fracs = vol_fracs**(1.0/3.0)
431 |     mol = wrap_molecs([at], fct=1, full=False, prog=False, returnMols=True)[0]
432 |     molID = at.arrays['molID']
433 |     for f in lat_fracs:
434 |         mol_disps = mol.positions*(f-1)
435 |         nat = at.copy()
436 |         nat.cell *= f
437 |         id = 0
438 |         if frozen:
439 |             for m in np.unique(molID):
440 |                 nat.positions[molID==m,:] += mol_disps[id,:]
441 |                 id += 1
442 |         else:
443 |             nat.positions *= f
444 |         db.append(nat)
445 |     return db
446 | 
447 | #find voids: copied from https://github.com/gabor1/workflow/blob/main/wfl/utils/find_voids.py
448 | def find_voids(at):
449 |     transl_symprec = 1.0e-1
450 |     # save original cell
451 |     cell_orig = at.get_cell()
452 |     reciprocal_cell_orig = at.get_reciprocal_cell()
453 |     # create supercell
454 |     at_sc = at * [3, 3, 3]
455 |     at_sc.set_positions(at_sc.get_positions() - np.sum(cell_orig, axis=0))
456 |     # calculate Voronoi tesselation
457 |     vor = scipy.spatial.Voronoi(at_sc.get_positions())
458 |     # list possible centers from Voronoi vertices that are close to original cell
459 |     possible_centers_lat = np.matmul(vor.vertices, reciprocal_cell_orig.T)
460 |     possible_indices = np.where(np.all(np.abs(possible_centers_lat - 0.5) <= 0.6, axis=1))[0]
461 |     # create atoms object with supercell of all possible interstitial positions
462 |     vertices = vor.vertices[possible_indices]
463 |     at_w_interst = at.copy()
464 |     at_w_interst.extend(Atoms('X{}'.format(len(possible_indices)), positions=vertices))
465 |     # eliminate duplicates that are equivalent by translation
466 |     dists = at_w_interst.get_all_distances(mic=True)
467 |     del_list = set()
468 |     for i in range(len(at_w_interst) - 1):
469 |         dups = i + 1 + np.where(dists[i][i + 1:] < transl_symprec)[0]
470 |         del_list = del_list.union(set(dups))
471 |     del at_w_interst[list(del_list)]
472 |     return at_w_interst
473 | 
474 | def find_voids_grid(db, dx=2.0, xminfct=2.0, prog=False):
475 |     db_grid = []
476 |     iter = 0
477 |     pts = []
478 |     for at in db:
479 |         if prog:
480 |             iter += 1
481 |             print(iter)
482 |         mat = at.cell
483 |         N = [int(x) for x in (np.diag(at.cell)/dx)]
484 |         Na = len(at)
485 |         x = np.arange(0, 1, 1/N[0])
486 |         y = np.arange(0, 1, 1/N[1])
487 |         z = np.arange(0, 1, 1/N[2])
488 |         X, Y, Z = np.meshgrid(x, y, z)
489 |         grid = np.dot(np.array([X,Y,Z]).reshape(3,-1).T, at.cell)
490 |         at_wgrid = at.copy()
491 |         at_wgrid.extend(Atoms('X{}'.format(len(grid)), positions=grid))
492 |         dst = at_wgrid.get_all_distances(mic=True)
493 |         dst = dst[Na:,:Na]
494 |         xmin = (3*at.get_volume()/at.get_global_number_of_atoms()/4/np.pi)**(1/3)
495 |         ids = np.where(np.any(dst<=xmin*xminfct, axis=1))[0]
496 |         ids += Na
497 |         del at_wgrid[list(ids)]
498 |         db_grid.append(at_wgrid)
499 |         pts.append(len(grid)-len(ids))
500 |     return db_grid, pts
501 | 
502 | def track_initial_bonds(db, fct=1, prog=False):
503 |     cutOff = modif_natural_cutoffs(db[0], fct)
504 |     nbLst = neighborlist.NeighborList(cutOff, self_interaction=False, bothways=False)
505 |     nbLst.update(db[0])
506 |     conMat = nbLst.get_connectivity_matrix(sparse=False)
507 |     dists = db[0].get_all_distances(mic=True)[conMat==True].reshape(-1,1)
508 |     iter = 0
509 |     for at in db[1:]:
510 |         if prog:
511 |             iter += 1
512 |             print(iter)
513 |         dists = np.hstack([dists, at.get_all_distances(mic=True)[conMat==True].reshape(-1,1)])
514 |     return dists
515 | 
516 | def track_distrib_grid(db, N=2, prog=False):
517 |     masses = []
518 |     numbers = []
519 |     densities = []
520 |     iter = 0
521 |     for at in db:
522 |         if prog:
523 |             iter += 1
524 |             print(iter)
525 |         frac = at.get_scaled_positions(wrap=True)
526 |         m = at.get_masses()
527 |         idx = np.sum(np.floor(frac*N)*np.array([N**2,N**1,N**0]),axis=1).astype(int)
528 |         masses.append([np.sum(m[idx==i]) for i in range(N**3)])
529 |         numbers.append([sum(idx==i) for i in range(N**3)])
530 |         densities.append([np.sum(m[idx==i])*(N**3)*10/6.022/at.get_volume() for i in range(N**3)])
531 |     masses = np.array(masses)
532 |     numbers = np.array(numbers)
533 |     return np.array(densities), masses/np.mean(masses, axis=1).reshape(-1,1), numbers/np.mean(numbers, axis=1).reshape(-1,1)
534 | 
535 | def mol_env(at, Rcut=6.0, returnEnvs=False):
536 |     Nmol = len(at)
537 |     molSym = at.arrays['molSym']
538 |     molEnv = dict()
539 |     molEnvArr = []
540 |     lbs = list(np.unique(molSym))
541 |     for lb in lbs:
542 |         molEnv[lb] = []
543 |     nbLst = neighborlist.NeighborList([Rcut/2]*len(at), self_interaction=False, bothways=True)
544 |     nbLst.update(at)
545 |     S = nbLst.get_connectivity_matrix(sparse=False)
546 |     for i in range(Nmol):
547 |         counts = np.unique(molSym[S[i,:]==1], return_counts=True)
548 |         buf = []
549 |         for lb in lbs:
550 |             if lb in counts[0]:
551 |                 buf.append(counts[1][list(counts[0]).index(lb)])
552 |             else:
553 |                 buf.append(0)
554 |         molEnv[molSym[i]].append(np.array(buf))
555 |         molEnvArr.append(np.array(buf))
556 |     for lb in lbs:
557 |         #molEnv[lb] = np.concatenate(molEnv[lb]) - will not work, creates a 1D array
558 |         molEnv[lb] = np.array(molEnv[lb])
559 |     if returnEnvs:
560 |         at.arrays['molEnv'] = np.array(molEnvArr)
561 |         at.info['molEnvLb'] = lbs
562 |     return molEnv
563 | 
564 | def sublist(subls, totls):
565 |     idx = []
566 |     for l in subls:
567 |         if l in totls:
568 |             idx.append(totls.index(l))
569 |         else:
570 |             return False, None
571 |     return True, idx
572 | 
573 | def mol_envs(moldb, lbs, Rcut=6.0, returnEnvs=False):
574 |     menvs = dict()
575 |     for lb in lbs:
576 |         menvs[lb] = np.empty(shape=[0,len(lbs)]).astype(int)
577 |     for at in moldb:
578 |         menv = mol_env(at, Rcut, returnEnvs)
579 |         nlbs = list(menv.keys())
580 |         is_sublist, mask = sublist(nlbs, lbs)
581 |         if is_sublist:
582 |             for lb in nlbs:
583 |                 buf = np.zeros([menv[lb].shape[0], len(lbs)]).astype(int)
584 |                 buf[:,mask] = menv[lb] #if less molecules, e.g. only EMC, fill out rest with zero
585 |                 menvs[lb] = np.vstack([menvs[lb], buf])
586 |             if returnEnvs:
587 |                 buf = np.zeros([len(at), len(lbs)]).astype(int)
588 |                 buf[:,mask] = at.arrays['molEnv']
589 |                 at.info['molEnvLb'] = lbs
590 |                 at.arrays['molEnv'] = buf
591 |     return menvs
592 | 
593 | def compute_rdfs(at, rmax, nbins, fct=1.0):
594 |     rdfs = {}
595 |     N = len(at)
596 |     z_counts = dict([(x,y) for x,y in zip(*np.unique(at.numbers, return_counts=True))])
597 |     dm = at.get_all_distances(mic=True)
598 |     intra_mask = find_molecs([at], fct, return_mask=True)[0]
599 |     for z1 in z_counts:
600 |         for z2 in z_counts:
601 |             if z2<z1:
602 |                 continue
603 |             rdf, r = get_rdf(atoms=at, rmax=rmax, nbins=nbins, distance_matrix=dm*intra_mask, elements=[z1,z2], no_dists=False)
604 |             if z2>z1:
605 |                 rdf *= 2.0
606 |             rdfs[chem_syms[z1]+chem_syms[z2]+'_intra'] = rdf*z_counts[z1]/N
607 |     inter_mask = 1-intra_mask
608 |     for z1 in z_counts:
609 |         for z2 in z_counts:
610 |             if z2<z1:
611 |                 continue
612 |             rdf, r = get_rdf(atoms=at, rmax=rmax, nbins=nbins, distance_matrix=dm*inter_mask, elements=[z1,z2], no_dists=False)
613 |             if z2>z1:
614 |                 rdf *= 2.0
615 |             rdfs[chem_syms[z1]+chem_syms[z2]+'_inter'] = rdf*z_counts[z1]/N
616 |     return rdfs, r
617 | 
618 | def compute_rdfs_traj_avg(traj, rmax, nbins, fct=1.0):
619 |     N = len(traj)
620 |     rdfs, r = compute_rdfs(traj[0], rmax, nbins, fct)
621 |     for at in traj[1:]:
622 |         tmp_rdfs, _ = compute_rdfs(at, rmax, nbins, fct)
623 |         for d in tmp_rdfs:
624 |             rdfs[d] += tmp_rdfs[d]
625 |     for d in rdfs:
626 |         rdfs[d] /= N
627 |     return rdfs, r
628 | 
629 | def compute_rdfs_traj_stats(traj, rmax, nbins, win=1, fct=1.0):
630 |     N = np.floor(len(traj)/win).astype(int)
631 |     rdfs, r = compute_rdfs_traj_avg(traj[slice(0, win)], rmax, nbins, fct)
632 |     for d in rdfs:
633 |         rdfs[d] = [rdfs[d]]
634 |     for i in range(1,N):
635 |         tmp_rdfs, _ = compute_rdfs_traj_avg(traj[slice(win*i, win*(i+1))], rmax, nbins, fct)
636 |         for d in tmp_rdfs:
637 |             rdfs[d] += [tmp_rdfs[d]]
638 |     for d in rdfs:
639 |         rdfs[d] = np.array(rdfs[d])
640 |     for d in rdfs:
641 |         rdfs[d] = {'avg': list(np.mean(rdfs[d], axis=0)), 'std': list(np.std(rdfs[d], axis=0))}
642 |     return {'rdfs': rdfs, 'r': list(r)}
643 | 


--------------------------------------------------------------------------------
/aseMolec/ioLammps.py:
--------------------------------------------------------------------------------
  1 | #copied from https://gitlab.com/ase/ase/-/blob/master/ase/io/lammpsrun.py
  2 | #web: https://wiki.fysik.dtu.dk/ase/_modules/ase/io/lammpsrun.html
  3 | #tweaked for specific use in this work
  4 | 
  5 | import gzip
  6 | import struct
  7 | from os.path import splitext
  8 | from collections import deque
  9 | import numpy as np
 10 | import math
 11 | import re
 12 | 
 13 | from ase.atoms import Atoms
 14 | from ase.quaternions import Quaternions
 15 | from ase.calculators.singlepoint import SinglePointCalculator
 16 | from ase.parallel import paropen
 17 | from ase.calculators.lammps import Prism, convert
 18 | import ase.data
 19 | from ase.utils import reader, writer
 20 | 
 21 | 
 22 | def lammps_data_to_ase_atoms(
 23 |     data,
 24 |     colnames,
 25 |     cell,
 26 |     celldisp,
 27 |     pbc=False,
 28 |     atomsobj=Atoms,
 29 |     order=True,
 30 |     specorder=None,
 31 |     prismobj=None,
 32 |     units="metal",
 33 | ):
 34 |     """Extract positions and other per-atom parameters and create Atoms
 35 | 
 36 |     :param data: per atom data
 37 |     :param colnames: index for data
 38 |     :param cell: cell dimensions
 39 |     :param celldisp: origin shift
 40 |     :param pbc: periodic boundaries
 41 |     :param atomsobj: function to create ase-Atoms object
 42 |     :param order: sort atoms by id. Might be faster to turn off
 43 |     :param specorder: list of species to map lammps types to ase-species
 44 |     (usually .dump files to not contain type to species mapping)
 45 |     :param prismobj: Coordinate transformation between lammps and ase
 46 |     :type prismobj: Prism
 47 |     :param units: lammps units for unit transformation between lammps and ase
 48 |     :returns: Atoms object
 49 |     :rtype: Atoms
 50 | 
 51 |     """
 52 |     # data array of doubles
 53 |     ids = data[:, colnames.index("id")].astype(int)
 54 |     if "types" in colnames:
 55 |         types = data[:, colnames.index("type")].astype(int)
 56 |         if order:
 57 |             sort_order = np.argsort(ids)
 58 |             ids = ids[sort_order]
 59 |             data = data[sort_order, :]
 60 |             types = types[sort_order]
 61 | 
 62 |         # reconstruct types from given specorder
 63 |         if specorder:
 64 |             types = [specorder[t - 1] for t in types]
 65 |     else:
 66 |         types = [1]*len(ids)
 67 | 
 68 |     def get_quantity(labels, quantity=None):
 69 |         try:
 70 |             cols = [colnames.index(label) for label in labels]
 71 |             if quantity:
 72 |                 return convert(data[:, cols], quantity, units, "ASE")
 73 | 
 74 |             return data[:, cols]
 75 |         except ValueError:
 76 |             return None
 77 | 
 78 |     # slice data block into columns
 79 |     # + perform necessary conversions to ASE units
 80 |     positions = get_quantity(["x", "y", "z"], "distance")
 81 |     if positions is None:
 82 |         positions = get_quantity(["xu", "yu", "zu"], "distance")
 83 |     scaled_positions = get_quantity(["xs", "ys", "zs"])
 84 |     velocities = get_quantity(["vx", "vy", "vz"], "velocity")
 85 |     charges = get_quantity(["q"], "charge")
 86 |     masses = get_quantity(["mass"], "mass")
 87 |     forces = get_quantity(["fx", "fy", "fz"], "force")
 88 |     # !TODO: how need quaternions be converted?
 89 |     quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])
 90 | 
 91 |     # reconstruct types from given masses
 92 |     if masses is not None:
 93 |         types = [np.argmin(np.abs(ase.data.atomic_masses-m)) for m in masses]
 94 | 
 95 |     # convert cell
 96 |     cell = convert(cell, "distance", units, "ASE")
 97 |     celldisp = convert(celldisp, "distance", units, "ASE")
 98 |     if prismobj:
 99 |         celldisp = prismobj.vector_to_ase(celldisp)
100 |         cell = prismobj.update_cell(cell)
101 | 
102 |     if quaternions:
103 |         out_atoms = Quaternions(
104 |             symbols=types,
105 |             positions=positions,
106 |             cell=cell,
107 |             celldisp=celldisp,
108 |             pbc=pbc,
109 |             quaternions=quaternions,
110 |         )
111 |     elif positions is not None:
112 |         # reverse coordinations transform to lammps system
113 |         # (for all vectors = pos, vel, force)
114 |         if prismobj:
115 |             positions = prismobj.vector_to_ase(positions, wrap=True)
116 | 
117 |         out_atoms = atomsobj(
118 |             symbols=types,
119 |             positions=positions,
120 |             pbc=pbc,
121 |             celldisp=celldisp,
122 |             cell=cell
123 |         )
124 |     elif scaled_positions is not None:
125 |         out_atoms = atomsobj(
126 |             symbols=types,
127 |             scaled_positions=scaled_positions,
128 |             pbc=pbc,
129 |             celldisp=celldisp,
130 |             cell=cell,
131 |         )
132 | 
133 |     if velocities is not None:
134 |         if prismobj:
135 |             velocities = prismobj.vector_to_ase(velocities)
136 |         out_atoms.set_velocities(velocities)
137 |     if charges is not None:
138 |         out_atoms.set_initial_charges(charges)
139 |     if forces is not None:
140 |         if prismobj:
141 |             forces = prismobj.vector_to_ase(forces)
142 |         # !TODO: use another calculator if available (or move forces
143 |         #        to atoms.property) (other problem: synchronizing
144 |         #        parallel runs)
145 |         # calculator = SinglePointCalculator(out_atoms, energy=0.0, forces=forces)
146 |         # out_atoms.calc = calculator
147 |         out_atoms.arrays['forces'] = forces
148 | 
149 |     # process the extra columns of fixes, variables and computes
150 |     #    that can be dumped, add as additional arrays to atoms object
151 |     for colname in colnames:
152 |         # determine if it is a compute or fix (but not the quaternian)
153 |         if (colname.startswith('f_') or colname.startswith('v_') or
154 |                 (colname.startswith('c_') and not colname.startswith('c_q['))):
155 |             out_atoms.new_array(colname, get_quantity([colname]), dtype='float')
156 | 
157 |     return out_atoms
158 | 
159 | 
160 | def construct_cell(diagdisp, offdiag):
161 |     """Help function to create an ASE-cell with displacement vector from
162 |     the lammps coordination system parameters.
163 | 
164 |     :param diagdisp: cell dimension convoluted with the displacement vector
165 |     :param offdiag: off-diagonal cell elements
166 |     :returns: cell and cell displacement vector
167 |     :rtype: tuple
168 |     """
169 |     xlo, xhi, ylo, yhi, zlo, zhi = diagdisp
170 |     xy, xz, yz = offdiag
171 | 
172 |     # create ase-cell from lammps-box
173 |     xhilo = (xhi - xlo) - abs(xy) - abs(xz)
174 |     yhilo = (yhi - ylo) - abs(yz)
175 |     zhilo = zhi - zlo
176 |     celldispx = xlo - min(0, xy) - min(0, xz)
177 |     celldispy = ylo - min(0, yz)
178 |     celldispz = zlo
179 |     cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])
180 |     celldisp = np.array([celldispx, celldispy, celldispz])
181 | 
182 |     return cell, celldisp
183 | 
184 | 
185 | def get_max_index(index):
186 |     if np.isscalar(index):
187 |         return index
188 |     elif isinstance(index, slice):
189 |         return index.stop if (index.stop is not None) else float("inf")
190 | 
191 | 
192 | def read_lammps_dump_text(fileobj, index=-1, prt=False, **kwargs):
193 |     """Process cleartext lammps dumpfiles
194 | 
195 |     :param fileobj: filestream providing the trajectory data
196 |     :param index: integer or slice object (default: get the last timestep)
197 |     :returns: list of Atoms objects
198 |     :rtype: list
199 |     """
200 |     # Load all dumped timesteps into memory simultaneously
201 |     lines = deque(fileobj.readlines())
202 | 
203 |     index_end = get_max_index(index)
204 | 
205 |     n_atoms = 0
206 |     images = []
207 | 
208 |     while len(lines) > n_atoms:
209 |         line = lines.popleft()
210 | 
211 |         if "ITEM: TIMESTEP" in line:
212 |             n_atoms = 0
213 |             line = lines.popleft()
214 |             # !TODO: pyflakes complains about this line -> do something
215 |             ntimestep = int(line.split()[0])  # NOQA
216 |             if prt:
217 |                 print(ntimestep)
218 | 
219 |         if "ITEM: NUMBER OF ATOMS" in line:
220 |             line = lines.popleft()
221 |             n_atoms = int(line.split()[0])
222 | 
223 |         if "ITEM: BOX BOUNDS" in line:
224 |             # save labels behind "ITEM: BOX BOUNDS" in triclinic case
225 |             # (>=lammps-7Jul09)
226 |             # !TODO: handle periodic boundary conditions in tilt_items
227 |             tilt_items = line.split()[3:]
228 |             celldatarows = [lines.popleft() for _ in range(3)]
229 |             celldata = np.loadtxt(celldatarows)
230 |             diagdisp = celldata[:, :2].reshape(6, 1).flatten()
231 | 
232 |             # determine cell tilt (triclinic case!)
233 |             if len(celldata[0]) > 2:
234 |                 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"
235 |                 # to assign tilt (vector) elements ...
236 |                 offdiag = celldata[:, 2]
237 |                 # ... otherwise assume default order in 3rd column
238 |                 # (if the latter was present)
239 |                 if len(tilt_items) >= 3:
240 |                     sort_index = [tilt_items.index(i)
241 |                                   for i in ["xy", "xz", "yz"]]
242 |                     offdiag = offdiag[sort_index]
243 |             else:
244 |                 offdiag = (0.0,) * 3
245 | 
246 |             cell, celldisp = construct_cell(diagdisp, offdiag)
247 | 
248 |             # Handle pbc conditions #fixed by Tamas Stenczel
249 |             if len(tilt_items) > 3:
250 |                 pbc = ["p" in d.lower() for d in tilt_items[3:]]
251 |             if len(tilt_items) == 3:
252 |                 pbc_items = tilt_items
253 |             elif len(tilt_items) > 3:
254 |                 pbc_items = tilt_items[3:6]
255 |             else:
256 |                 pbc = (False,) * 3
257 |                 pbc_items = ["f", "f", "f"]
258 |             pbc = ["p" in d.lower() for d in pbc_items]
259 | 
260 |         if "ITEM: ATOMS" in line:
261 |             colnames = line.split()[2:]
262 |             datarows = [lines.popleft() for _ in range(n_atoms)]
263 |             data = np.loadtxt(datarows)
264 |             out_atoms = lammps_data_to_ase_atoms(
265 |                 data=data,
266 |                 colnames=colnames,
267 |                 cell=cell,
268 |                 celldisp=celldisp,
269 |                 atomsobj=Atoms,
270 |                 pbc=pbc,
271 |                 **kwargs
272 |             )
273 |             out_atoms.info['Time'] = ntimestep
274 |             images.append(out_atoms)
275 | 
276 |         if len(images) > index_end >= 0:
277 |             break
278 | 
279 |     return images[index]
280 | 
281 | @writer
282 | def write_lammps_dump_text(fd, db, prog=False):
283 |     i = 0
284 |     Nmax = 0
285 |     for at in db:
286 |         if len(at) > Nmax:
287 |             Nmax = len(at)
288 |     for at in db:
289 |         fd.write("ITEM: TIMESTEP \n")
290 |         fd.write("{0} \n".format(i))
291 |         fd.write("ITEM: NUMBER OF ATOMS \n")
292 |         fd.write("{0} \n".format(Nmax))
293 |         fd.write("ITEM: BOX BOUNDS pp pp pp \n") #needs to be extended
294 |         lims = np.diag(at.cell)
295 |         for j in range(3):
296 |             fd.write("{0:23.17e} {1:23.17e} \n".format(*[0,lims[j]]))
297 |         i += 1
298 |         fd.write("ITEM: ATOMS id mass xu yu zu \n") #needs to be extended
299 |         for j in range(len(at)):
300 |             fd.write("{0:>5} {1:8.3f} {2:23.17f} {3:23.17f} {4:23.17f}\n".format(*[j+1, at.get_masses()[j]]+list(at.positions[j])))
301 |         for j in range(len(at),Nmax): #the rest are dummy atoms, because VMD can only read lammpstrj with fixed number of atoms
302 |             #cannot select by mass, because mass is decided by vmd in the first frame, so use dummy location
303 |             fd.write("{0:>5} {1:8.3f} {2:23.17f} {3:23.17f} {4:23.17f}\n".format(*[j+1, 1.0, -10.1, -10.1, -10.1]))
304 |         if prog:
305 |             print(i)
306 | 
307 | #copied from https://wiki.fysik.dtu.dk/ase/dev/_modules/ase/io/lammpsdata.html
308 | @writer
309 | def write_lammps_data(fd, atoms, specorder=None, force_skew=False,
310 |                       prismobj=None, velocities=False, units="metal",
311 |                       atom_style='atomic'):
312 |     """Write atomic structure data to a LAMMPS data file."""
313 | 
314 |     # FIXME: We should add a check here that the encoding of the file object
315 |     #        is actually ascii once the 'encoding' attribute of IOFormat objects
316 |     #        starts functioning in implementation (currently it doesn't do
317 |     #         anything).
318 | 
319 |     if isinstance(atoms, list):
320 |         if len(atoms) > 1:
321 |             raise ValueError(
322 |                 "Can only write one configuration to a lammps data file!"
323 |             )
324 |         atoms = atoms[0]
325 | 
326 |     if hasattr(fd, "name"):
327 |         fd.write("{0} (written by ASE) \n\n".format(fd.name))
328 |     else:
329 |         fd.write("(written by ASE) \n\n")
330 | 
331 |     symbols = atoms.get_chemical_symbols()
332 |     n_atoms = len(symbols)
333 |     fd.write("{0} \t atoms \n".format(n_atoms))
334 | 
335 |     if specorder is None:
336 |         # This way it is assured that LAMMPS atom types are always
337 |         # assigned predictably according to the alphabetic order
338 |         # species = sorted(set(symbols)) #original code
339 |         z_atoms = np.sort(np.unique(atoms.numbers))
340 |         species = [ase.data.chemical_symbols[z] for z in z_atoms]
341 |     else:
342 |         # To index elements in the LAMMPS data file
343 |         # (indices must correspond to order in the potential file)
344 |         species = specorder
345 |     n_atom_types = len(species)
346 |     fd.write("{0}  atom types\n".format(n_atom_types))
347 | 
348 |     if prismobj is None:
349 |         p = Prism(atoms.get_cell())
350 |     else:
351 |         p = prismobj
352 | 
353 |     fd.write("\n")
354 | 
355 |     # Get cell parameters and convert from ASE units to LAMMPS units
356 |     xhi, yhi, zhi, xy, xz, yz = convert(p.get_lammps_prism(), "distance",
357 |                                         "ASE", units)
358 | 
359 |     fd.write("0.0 {0:23.17g}  xlo xhi\n".format(xhi))
360 |     fd.write("0.0 {0:23.17g}  ylo yhi\n".format(yhi))
361 |     fd.write("0.0 {0:23.17g}  zlo zhi\n".format(zhi))
362 | 
363 |     if force_skew or p.is_skewed():
364 |         fd.write(
365 |             "{0:23.17g} {1:23.17g} {2:23.17g}  xy xz yz\n".format(
366 |                 xy, xz, yz
367 |             )
368 |         )
369 |     fd.write("\n")
370 | 
371 |     #added by myself
372 |     fd.write("Masses\n\n")
373 |     for i in range(n_atom_types):
374 |         m = ase.data.atomic_masses[ase.data.chemical_symbols.index(species[i])]
375 |         fd.write("{0:>3} {1:23.17g}\n".format(i+1, m))
376 |     fd.write("\n")
377 | 
378 |     # Write (unwrapped) atomic positions.  If wrapping of atoms back into the
379 |     # cell along periodic directions is desired, this should be done manually
380 |     # on the Atoms object itself beforehand.
381 |     fd.write("Atoms # "+atom_style+"\n\n")
382 |     pos = p.vector_to_lammps(atoms.get_positions(), wrap=False)
383 | 
384 |     if atom_style == 'atomic':
385 |         for i, r in enumerate(pos):
386 |             # Convert position from ASE units to LAMMPS units
387 |             r = convert(r, "distance", "ASE", units)
388 |             s = species.index(symbols[i]) + 1
389 |             fd.write(
390 |                 "{0:>6} {1:>3} {2:23.17g} {3:23.17g} {4:23.17g}\n".format(
391 |                     *(i + 1, s) + tuple(r)
392 |                 )
393 |             )
394 |     elif atom_style == 'charge':
395 |         charges = atoms.get_initial_charges()
396 |         for i, (q, r) in enumerate(zip(charges, pos)):
397 |             # Convert position and charge from ASE units to LAMMPS units
398 |             r = convert(r, "distance", "ASE", units)
399 |             q = convert(q, "charge", "ASE", units)
400 |             s = species.index(symbols[i]) + 1
401 |             fd.write("{0:>6} {1:>3} {2:>5} {3:23.17g} {4:23.17g} {5:23.17g}\n"
402 |                     .format(*(i + 1, s, q) + tuple(r)))
403 |     elif atom_style == 'dipole':
404 |         charges = atoms.get_initial_charges()
405 |         dipoles = atoms.arrays['initial_dipoles']
406 |         for i, (q, mu, r) in enumerate(zip(charges, dipoles, pos)):
407 |             # Convert position and charge from ASE units to LAMMPS units
408 |             r = convert(r, "distance", "ASE", units)
409 |             q = convert(q, "charge", "ASE", units)
410 |             # mu = convert(...) not implemented yet!
411 |             s = species.index(symbols[i]) + 1
412 |             fd.write("{0:>6} {1:>3} {2:>7} {3:>7} {4:>7} {5:>7} {6:23.17g} {7:23.17g} {8:23.17g}\n"
413 |                     .format(*(i + 1, s, q) + tuple(mu) + tuple(r)))
414 |     elif atom_style == 'full':
415 |         charges = atoms.get_initial_charges()
416 |         # The label 'mol-id' has apparenlty been introduced in read earlier,
417 |         # but so far not implemented here. Wouldn't a 'underscored' label
418 |         # be better, i.e. 'mol_id' or 'molecule_id'?
419 |         if atoms.has('molID'):
420 |             molecules = atoms.get_array('molID')+1
421 |             if not np.issubdtype(molecules.dtype, np.integer):
422 |                 raise TypeError((
423 |                     "If 'atoms' object has 'molID' array, then"
424 |                     " molID dtype must be subtype of np.integer, and"
425 |                     " not {:s}.").format(str(molecules.dtype)))
426 |             if (len(molecules) != len(atoms)) or (molecules.ndim != 1):
427 |                 raise TypeError((
428 |                     "If 'atoms' object has 'mol-id' array, then"
429 |                     " each atom must have exactly one mol-id."))
430 |         else:
431 |             # Assigning each atom to a distinct molecule id would seem
432 |             # preferableabove assigning all atoms to a single molecule id per
433 |             # default, as done within ase <= v 3.19.1. I.e.,
434 |             # molecules = np.arange(start=1, stop=len(atoms)+1, step=1, dtype=int)
435 |             # However, according to LAMMPS default behavior,
436 |             molecules = np.zeros(len(atoms), dtype=int)
437 |             # which is what happens if one creates new atoms within LAMMPS
438 |             # without explicitly taking care of the molecule id.
439 |             # Quote from docs at https://lammps.sandia.gov/doc/read_data.html:
440 |             #    The molecule ID is a 2nd identifier attached to an atom.
441 |             #    Normally, it is a number from 1 to N, identifying which
442 |             #    molecule the atom belongs to. It can be 0 if it is a
443 |             #    non-bonded atom or if you don't care to keep track of molecule
444 |             #    assignments.
445 | 
446 |         for i, (m, q, r) in enumerate(zip(molecules, charges, pos)):
447 |             # Convert position and charge from ASE units to LAMMPS units
448 |             r = convert(r, "distance", "ASE", units)
449 |             q = convert(q, "charge", "ASE", units)
450 |             s = species.index(symbols[i]) + 1
451 |             fd.write("{0:>6} {1:>3} {2:>3} {3:>5} {4:23.17g} {5:23.17g} "
452 |                     "{6:23.17g}\n".format(*(i + 1, m, s, q) + tuple(r)))
453 |     else:
454 |         raise NotImplementedError
455 | 
456 |     if velocities and atoms.get_velocities() is not None:
457 |         fd.write("\nVelocities \n\n")
458 |         vel = p.vector_to_lammps(atoms.get_velocities())
459 |         for i, v in enumerate(vel):
460 |             # Convert velocity from ASE units to LAMMPS units
461 |             v = convert(v, "velocity", "ASE", units)
462 |             fd.write(
463 |                 "{0:>6} {1:23.17g} {2:23.17g} {3:23.17g}\n".format(
464 |                     *(i + 1,) + tuple(v)
465 |                 )
466 |             )
467 | 
468 |     fd.flush()
469 | 
470 | @reader
471 | def read_lammps_data(fileobj, Z_of_type=None, style="full",
472 |                      sort_by_id=False, units="metal"):
473 |     """Method which reads a LAMMPS data file.
474 | 
475 |     sort_by_id: Order the particles according to their id. Might be faster to
476 |     switch it off.
477 |     Units are set by default to the style=metal setting in LAMMPS.
478 |     """
479 |     # load everything into memory
480 |     lines = fileobj.readlines()
481 | 
482 |     # begin read_lammps_data
483 |     comment = None
484 |     N = None
485 |     # N_types = None
486 |     xlo = None
487 |     xhi = None
488 |     ylo = None
489 |     yhi = None
490 |     zlo = None
491 |     zhi = None
492 |     xy = None
493 |     xz = None
494 |     yz = None
495 |     pos_in = {}
496 |     travel_in = {}
497 |     mol_id_in = {}
498 |     charge_in = {}
499 |     mass_in = {}
500 |     vel_in = {}
501 |     bonds_in = []
502 |     angles_in = []
503 |     dihedrals_in = []
504 | 
505 |     sections = [
506 |         "Atoms",
507 |         "Velocities",
508 |         "Masses",
509 |         "Charges",
510 |         "Ellipsoids",
511 |         "Lines",
512 |         "Triangles",
513 |         "Bodies",
514 |         "Bonds",
515 |         "Angles",
516 |         "Dihedrals",
517 |         "Impropers",
518 |         "Impropers Pair Coeffs",
519 |         "PairIJ Coeffs",
520 |         "Pair Coeffs",
521 |         "Bond Coeffs",
522 |         "Angle Coeffs",
523 |         "Dihedral Coeffs",
524 |         "Improper Coeffs",
525 |         "BondBond Coeffs",
526 |         "BondAngle Coeffs",
527 |         "MiddleBondTorsion Coeffs",
528 |         "EndBondTorsion Coeffs",
529 |         "AngleTorsion Coeffs",
530 |         "AngleAngleTorsion Coeffs",
531 |         "BondBond13 Coeffs",
532 |         "AngleAngle Coeffs",
533 |     ]
534 |     header_fields = [
535 |         "atoms",
536 |         "bonds",
537 |         "angles",
538 |         "dihedrals",
539 |         "impropers",
540 |         "atom types",
541 |         "bond types",
542 |         "angle types",
543 |         "dihedral types",
544 |         "improper types",
545 |         "extra bond per atom",
546 |         "extra angle per atom",
547 |         "extra dihedral per atom",
548 |         "extra improper per atom",
549 |         "extra special per atom",
550 |         "ellipsoids",
551 |         "lines",
552 |         "triangles",
553 |         "bodies",
554 |         "xlo xhi",
555 |         "ylo yhi",
556 |         "zlo zhi",
557 |         "xy xz yz",
558 |     ]
559 |     sections_re = "(" + "|".join(sections).replace(" ", "\\s+") + ")"
560 |     header_fields_re = "(" + "|".join(header_fields).replace(" ", "\\s+") + ")"
561 | 
562 |     section = None
563 |     header = True
564 |     for line in lines:
565 |         if comment is None:
566 |             comment = line.rstrip()
567 |         else:
568 |             line = re.sub("#.*", "", line).rstrip().lstrip()
569 |             if re.match("^\\s*$", line):  # skip blank lines
570 |                 continue
571 | 
572 |         # check for known section names
573 |         m = re.match(sections_re, line)
574 |         if m is not None:
575 |             section = m.group(0).rstrip().lstrip()
576 |             header = False
577 |             continue
578 | 
579 |         if header:
580 |             field = None
581 |             val = None
582 |             # m = re.match(header_fields_re+"\s+=\s*(.*)", line)
583 |             # if m is not None: # got a header line
584 |             #   field=m.group(1).lstrip().rstrip()
585 |             #   val=m.group(2).lstrip().rstrip()
586 |             # else: # try other format
587 |             #   m = re.match("(.*)\s+"+header_fields_re, line)
588 |             #   if m is not None:
589 |             #       field = m.group(2).lstrip().rstrip()
590 |             #       val = m.group(1).lstrip().rstrip()
591 |             m = re.match("(.*)\\s+" + header_fields_re, line)
592 |             if m is not None:
593 |                 field = m.group(2).lstrip().rstrip()
594 |                 val = m.group(1).lstrip().rstrip()
595 |             if field is not None and val is not None:
596 |                 if field == "atoms":
597 |                     N = int(val)
598 |                 # elif field == "atom types":
599 |                 #     N_types = int(val)
600 |                 elif field == "xlo xhi":
601 |                     (xlo, xhi) = [float(x) for x in val.split()]
602 |                 elif field == "ylo yhi":
603 |                     (ylo, yhi) = [float(x) for x in val.split()]
604 |                 elif field == "zlo zhi":
605 |                     (zlo, zhi) = [float(x) for x in val.split()]
606 |                 elif field == "xy xz yz":
607 |                     (xy, xz, yz) = [float(x) for x in val.split()]
608 | 
609 |         if section is not None:
610 |             fields = line.split()
611 |             if section == "Atoms":  # id *
612 |                 id = int(fields[0])
613 |                 if style == "full" and (len(fields) == 7 or len(fields) == 10):
614 |                     # id mol-id type q x y z [tx ty tz]
615 |                     pos_in[id] = (
616 |                         int(fields[2]),
617 |                         float(fields[4]),
618 |                         float(fields[5]),
619 |                         float(fields[6]),
620 |                     )
621 |                     mol_id_in[id] = int(fields[1])
622 |                     charge_in[id] = float(fields[3])
623 |                     if len(fields) == 10:
624 |                         travel_in[id] = (
625 |                             int(fields[7]),
626 |                             int(fields[8]),
627 |                             int(fields[9]),
628 |                         )
629 |                 elif style == "atomic" and (
630 |                         len(fields) == 5 or len(fields) == 8
631 |                 ):
632 |                     # id type x y z [tx ty tz]
633 |                     pos_in[id] = (
634 |                         int(fields[1]),
635 |                         float(fields[2]),
636 |                         float(fields[3]),
637 |                         float(fields[4]),
638 |                     )
639 |                     if len(fields) == 8:
640 |                         travel_in[id] = (
641 |                             int(fields[5]),
642 |                             int(fields[6]),
643 |                             int(fields[7]),
644 |                         )
645 |                 elif (style in ("angle", "bond", "molecular")
646 |                       ) and (len(fields) == 6 or len(fields) == 9):
647 |                     # id mol-id type x y z [tx ty tz]
648 |                     pos_in[id] = (
649 |                         int(fields[2]),
650 |                         float(fields[3]),
651 |                         float(fields[4]),
652 |                         float(fields[5]),
653 |                     )
654 |                     mol_id_in[id] = int(fields[1])
655 |                     if len(fields) == 9:
656 |                         travel_in[id] = (
657 |                             int(fields[6]),
658 |                             int(fields[7]),
659 |                             int(fields[8]),
660 |                         )
661 |                 elif (style == "charge"
662 |                       and (len(fields) == 6 or len(fields) == 9)):
663 |                     # id type q x y z [tx ty tz]
664 |                     pos_in[id] = (
665 |                         int(fields[1]),
666 |                         float(fields[3]),
667 |                         float(fields[4]),
668 |                         float(fields[5]),
669 |                     )
670 |                     charge_in[id] = float(fields[2])
671 |                     if len(fields) == 9:
672 |                         travel_in[id] = (
673 |                             int(fields[6]),
674 |                             int(fields[7]),
675 |                             int(fields[8]),
676 |                         )
677 |                 else:
678 |                     raise RuntimeError(
679 |                         "Style '{}' not supported or invalid "
680 |                         "number of fields {}"
681 |                         "".format(style, len(fields))
682 |                     )
683 |             elif section == "Velocities":  # id vx vy vz
684 |                 vel_in[int(fields[0])] = (
685 |                     float(fields[1]),
686 |                     float(fields[2]),
687 |                     float(fields[3]),
688 |                 )
689 |             elif section == "Masses":
690 |                 mass_in[int(fields[0])] = float(fields[1])
691 |             elif section == "Bonds":  # id type atom1 atom2
692 |                 bonds_in.append(
693 |                     (int(fields[1]), int(fields[2]), int(fields[3]))
694 |                 )
695 |             elif section == "Angles":  # id type atom1 atom2 atom3
696 |                 angles_in.append(
697 |                     (
698 |                         int(fields[1]),
699 |                         int(fields[2]),
700 |                         int(fields[3]),
701 |                         int(fields[4]),
702 |                     )
703 |                 )
704 |             elif section == "Dihedrals":  # id type atom1 atom2 atom3 atom4
705 |                 dihedrals_in.append(
706 |                     (
707 |                         int(fields[1]),
708 |                         int(fields[2]),
709 |                         int(fields[3]),
710 |                         int(fields[4]),
711 |                         int(fields[5]),
712 |                     )
713 |                 )
714 | 
715 |     # set cell
716 |     cell = np.zeros((3, 3))
717 |     cell[0, 0] = xhi - xlo
718 |     cell[1, 1] = yhi - ylo
719 |     cell[2, 2] = zhi - zlo
720 |     if xy is not None:
721 |         cell[1, 0] = xy
722 |     if xz is not None:
723 |         cell[2, 0] = xz
724 |     if yz is not None:
725 |         cell[2, 1] = yz
726 | 
727 |     # initialize arrays for per-atom quantities
728 |     positions = np.zeros((N, 3))
729 |     numbers = np.zeros((N), int)
730 |     ids = np.zeros((N), int)
731 |     types = np.zeros((N), int)
732 |     if len(vel_in) > 0:
733 |         velocities = np.zeros((N, 3))
734 |     else:
735 |         velocities = None
736 |     if len(mass_in) > 0:
737 |         masses = np.zeros((N))
738 |     else:
739 |         masses = None
740 |     if len(mol_id_in) > 0:
741 |         mol_id = np.zeros((N), int)
742 |     else:
743 |         mol_id = None
744 |     if len(charge_in) > 0:
745 |         charge = np.zeros((N), float)
746 |     else:
747 |         charge = None
748 |     if len(travel_in) > 0:
749 |         travel = np.zeros((N, 3), int)
750 |     else:
751 |         travel = None
752 |     if len(bonds_in) > 0:
753 |         bonds = [""] * N
754 |     else:
755 |         bonds = None
756 |     if len(angles_in) > 0:
757 |         angles = [""] * N
758 |     else:
759 |         angles = None
760 |     if len(dihedrals_in) > 0:
761 |         dihedrals = [""] * N
762 |     else:
763 |         dihedrals = None
764 | 
765 |     ind_of_id = {}
766 |     # copy per-atom quantities from read-in values
767 |     for (i, id) in enumerate(pos_in.keys()):
768 |         # by id
769 |         ind_of_id[id] = i
770 |         if sort_by_id:
771 |             ind = id - 1
772 |         else:
773 |             ind = i
774 |         type = pos_in[id][0]
775 |         positions[ind, :] = [pos_in[id][1], pos_in[id][2], pos_in[id][3]]
776 |         if velocities is not None:
777 |             velocities[ind, :] = [vel_in[id][0], vel_in[id][1], vel_in[id][2]]
778 |         if travel is not None:
779 |             travel[ind] = travel_in[id]
780 |         if mol_id is not None:
781 |             mol_id[ind] = mol_id_in[id]
782 |         if charge is not None:
783 |             charge[ind] = charge_in[id]
784 |         ids[ind] = id
785 |         # by type
786 |         types[ind] = type
787 |         if masses is not None:
788 |             masses[ind] = mass_in[type]
789 |         if Z_of_type is None:
790 |             if masses is not None:
791 |                 numbers[ind] = np.argmin(np.abs(ase.data.atomic_masses-masses[ind]))
792 |             else:
793 |                 numbers[ind] = type
794 |         else:
795 |             numbers[ind] = Z_of_type[type]
796 |     # convert units
797 |     positions = convert(positions, "distance", units, "ASE")
798 |     cell = convert(cell, "distance", units, "ASE")
799 |     if masses is not None:
800 |         masses = convert(masses, "mass", units, "ASE")
801 |     if velocities is not None:
802 |         velocities = convert(velocities, "velocity", units, "ASE")
803 | 
804 |     # create ase.Atoms
805 |     at = Atoms(
806 |         positions=positions,
807 |         numbers=numbers,
808 |         masses=masses,
809 |         cell=cell,
810 |         pbc=[True, True, True],
811 |     )
812 |     # set velocities (can't do it via constructor)
813 |     if velocities is not None:
814 |         at.set_velocities(velocities)
815 |     at.arrays["id"] = ids
816 |     at.arrays["type"] = types
817 |     if travel is not None:
818 |         at.arrays["travel"] = travel
819 |     if mol_id is not None:
820 |         at.arrays["mol-id"] = mol_id
821 |     if charge is not None:
822 |         at.arrays["initial_charges"] = charge
823 |         at.arrays["mmcharges"] = charge.copy()
824 | 
825 |     if bonds is not None:
826 |         for (type, a1, a2) in bonds_in:
827 |             i_a1 = ind_of_id[a1]
828 |             i_a2 = ind_of_id[a2]
829 |             if len(bonds[i_a1]) > 0:
830 |                 bonds[i_a1] += ","
831 |             bonds[i_a1] += "%d(%d)" % (i_a2, type)
832 |         for i in range(len(bonds)):
833 |             if len(bonds[i]) == 0:
834 |                 bonds[i] = "_"
835 |         at.arrays["bonds"] = np.array(bonds)
836 | 
837 |     if angles is not None:
838 |         for (type, a1, a2, a3) in angles_in:
839 |             i_a1 = ind_of_id[a1]
840 |             i_a2 = ind_of_id[a2]
841 |             i_a3 = ind_of_id[a3]
842 |             if len(angles[i_a2]) > 0:
843 |                 angles[i_a2] += ","
844 |             angles[i_a2] += "%d-%d(%d)" % (i_a1, i_a3, type)
845 |         for i in range(len(angles)):
846 |             if len(angles[i]) == 0:
847 |                 angles[i] = "_"
848 |         at.arrays["angles"] = np.array(angles)
849 | 
850 |     if dihedrals is not None:
851 |         for (type, a1, a2, a3, a4) in dihedrals_in:
852 |             i_a1 = ind_of_id[a1]
853 |             i_a2 = ind_of_id[a2]
854 |             i_a3 = ind_of_id[a3]
855 |             i_a4 = ind_of_id[a4]
856 |             if len(dihedrals[i_a1]) > 0:
857 |                 dihedrals[i_a1] += ","
858 |             dihedrals[i_a1] += "%d-%d-%d(%d)" % (i_a2, i_a3, i_a4, type)
859 |         for i in range(len(dihedrals)):
860 |             if len(dihedrals[i]) == 0:
861 |                 dihedrals[i] = "_"
862 |         at.arrays["dihedrals"] = np.array(dihedrals)
863 | 
864 |     at.info["comment"] = comment
865 | 
866 |     return at
867 | 


--------------------------------------------------------------------------------