├── QE.py
├── README.md
├── bandgapoccu.py
├── color.key
├── default.py
├── early_stop.py
├── extractor.py
├── functions.py
├── hse_db.py
├── process.py
├── sample.py
├── sdir.sh
└── to_xml.py


/QE.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys, os
  3 | import numpy as np
  4 | from numpy import cos as cos
  5 | from numpy import sin as sin
  6 | import json
  7 | from collections import Counter
  8 | import sqlite3 as lite
  9 | from xml.etree import ElementTree
 10 | from lxml import etree as ET
 11 | import copy
 12 | 
 13 | class index():
 14 |   def __init__(self):
 15 |     self.keys = {}
 16 | 
 17 |   def sanitize(self,s):
 18 |     return ''.join([i for i in s if not i.isdigit()]).strip()
 19 | 
 20 |   def key(self,specie):
 21 |     if specie not in self.keys:
 22 |       self.keys[specie] = 0
 23 |     else:
 24 |       self.keys[specie] += 1
 25 |     return specie + str(self.keys[specie])
 26 | 
 27 |   def reset(self):
 28 |     self.keys = {}
 29 | 
 30 | class Struct:
 31 |   def __init__(self,direct = None):
 32 |     self.BOHRtoA = 0.529177249
 33 |     self.RYtoeV = 13.605698066
 34 |     self.program = "QE"
 35 |     self.version = ""
 36 |     self.volume = 0.0
 37 |     self.alat = 0.0
 38 |     self.natoms = 0
 39 |     self.nat = 0
 40 |     self.nelect = 0
 41 |     self.Ecut = 0.0
 42 |     self.RhoCut = 0.0
 43 |     self.Econv = 0.0
 44 |     self.Exch = ""
 45 |     self.energy = 0.0
 46 |     self.natoms = 0
 47 |     self.bandgap = 0.0
 48 |     self.bands = 0
 49 |     self.lattice = {'a':np.zeros(3),'b':np.zeros(3),'c':np.zeros(3)}
 50 |     self.atoms = {}
 51 |     self.norms = {'a':0.0,'b':0.0,'c':0.0}
 52 |     self.angles = {'alpha':0.0,'beta':0.0,'gamma':0.0}
 53 |     self.kpts = 0
 54 |     self.bnddiagram = False
 55 |     self.FermiTest = False
 56 |     self.Fermi = 0.0
 57 |     self.JSON = ""
 58 |     self.email = ""
 59 |     self.atomindex = index()
 60 |     self.noband = False
 61 |     if direct is not None:
 62 |       if os.path.isfile(direct):
 63 |         self.File_Process(direct)
 64 |       elif os.path.isdir(direct):
 65 |         self.XML_Process(direct)
 66 | 
 67 |   def From_Crystal(self):
 68 |     #vol = np.sqrt(1 - np.cos(self.angles['alpha']*np.pi/180.)**2 - np.cos(self.angles['beta']*np.pi/180.)**2 - np.cos(self.angles['gamma']*np.pi/180.)**2 + 2*np.cos(self.angles['alpha']*np.pi/180.)*np.cos(self.angles['beta']*np.pi/180.)*np.cos(self.angles['gamma']*np.pi/180.))
 69 |     #temp1 = [self.norms['a'], self.norms['b']*np.cos(self.angles['gamma']*np.pi/180.), self.norms['c']*np.cos(self.angles['beta']*np.pi/180.)]
 70 |     #temp2 = [0,self.norms['b']*np.sin(self.angles['gamma']*np.pi/180.),self.norms['c']*((np.cos(self.angles['alpha']*np.pi/180.)-np.cos(self.angles['beta']*np.pi/180.)*np.cos(self.angles['beta']*np.pi/180.))/np.sin(self.angles['gamma']*np.pi/180.))]
 71 |     #temp3 = [0,0,self.norms['c']*vol/np.sin(self.angles['gamma']*np.pi/180.)]
 72 |     #conversion = np.vstack((np.asarray(temp1),np.asarray(temp2),np.asarray(temp3))) 
 73 |     t = []
 74 |     for i in ['a','b','c']:
 75 |       t.append(list(self.lattice[i]))
 76 |     return np.transpose(np.array(t))
 77 |     #return np.linalg.inv(np.transpose(np.array(t)))
 78 |   
 79 |   def to_Crystal(self):
 80 |     self.Normalize()
 81 |     conversion = np.linalg.inv(self.From_Crystal())
 82 |     print("ATOM_POSITIONS {crystal}")
 83 |     for i in self.atoms:
 84 |       #tmp = copy.deepcopy(self.atoms[i])
 85 |       tmp = np.dot(conversion,self.atoms[i])
 86 |       print(self.atomindex.sanitize(i) + " " + ' '.join([str(round(j,9)) for j in tmp])) 
 87 | 
 88 |   def wrap_Cell(self):
 89 |     self.Normalize()
 90 |     conversion = np.linalg.inv(self.From_Crystal())
 91 |     c_1 = self.From_Crystal()
 92 |     for i in self.atoms:
 93 |       tmp = np.dot(conversion,self.atoms[i])
 94 |       for c,j in enumerate(tmp):
 95 |         if tmp[c] < 0.0: 
 96 |           while tmp[c] <= 0.0:
 97 |             tmp[c] += 1
 98 |         elif tmp[c] > 1.0: 
 99 |           while tmp[c] >= 1.0:
100 |             tmp[c] -= 1
101 |       self.atoms[i] = np.dot(c_1,tmp)
102 |       
103 |     
104 | 
105 |   def RDF(self,rcut=5.0,dr=0.1):
106 |     supcell = []
107 |     rho = self.natoms/self.volume
108 |     radius = np.arange(0,rcut + dr,dr)
109 |     R = {}
110 |     for i in radius:
111 |       R[i] = 0.0
112 |     conversion = np.linalg.inv(self.From_Crystal())
113 |     crystal = self.From_Crystal()
114 |     max_trans = {}
115 |     for i in self.norms:
116 |       max_trans[i] = int(np.ceil(1.1*rcut/self.norms[i]))
117 |     trans_index = []
118 |     for i in range(-max_trans['a'],max_trans['a']+1):
119 |       for j in range(-max_trans['b'],max_trans['b']+1):
120 |         for k in range(-max_trans['c'],max_trans['c']+1):
121 |           trans_index.append([i,j,k])
122 |     y = copy.deepcopy(self)
123 |     for i in y.atoms:
124 |       y.atoms[i] = np.dot(conversion,y.atoms[i])
125 |     
126 |     for i in y.atoms:
127 |       pos1 = copy.copy(y.atoms[i])
128 |       for j in y.atoms:
129 |         for z in trans_index:
130 |           pos2 = np.add(y.atoms[j],z)
131 |           delta = np.linalg.norm(np.dot(crystal,np.subtract(pos2,pos1)))
132 |           if delta <= rcut:
133 |             for ZZ in range(0,len(radius)-1):
134 |               if radius[ZZ] < delta < radius[ZZ+1]:
135 |                 R[radius[ZZ]] += 1.0/(rho*4.*np.pi*(radius[ZZ]**2)*dr)
136 |     data = []
137 |     for i in R:
138 |       data.append([i,R[i]/(float(self.natoms))])
139 |     return data
140 |     
141 |     
142 |   def CIF(self,filename=""):
143 |     self.Normalize()
144 |     ciffile = "data_global\n"
145 |     ciffile += "_chemical_name " + self.to_Formula().strip() + "\n"
146 |     for i in ['a','b','c']:
147 |       ciffile += '_cell_length_' + i.strip() + " " + str(self.norms[i]) + "\n"
148 |     for i in ['alpha','beta','gamma']:
149 |       ciffile += '_cell_angle_' + i.strip() + " " + str(self.angles[i]) + "\n"
150 |     ciffile += '_cell_volume_ ' + str(self.volume) + "\n"
151 |     ciffile += "loop_\n"
152 |     ciffile += "_atom_site_label\n"
153 |     ciffile += "_atom_site_fract_x\n"
154 |     ciffile += "_atom_site_fract_y\n"
155 |     ciffile += "_atom_site_fract_z\n"
156 |     conversion = np.linalg.inv(self.From_Crystal())
157 |     counter = 0
158 |     for i in self.atoms:
159 |       tmp = copy.deepcopy(self.atoms[i])
160 |       dot = np.dot(conversion,tmp)
161 |       ciffile += self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in dot]) + "\n"
162 |     if filename == "":
163 |       print(ciffile)
164 |     else:
165 |       f = open(filename, 'w')
166 |       f.write(ciffile)
167 |       f.close()
168 |   def to_XYZ(self,filename = None):
169 |     printstring = ''
170 |     printstring += str(len(self.atoms)) + '\n\n'
171 |     for i in self.atoms:
172 |       tmp = [round(x,5) for x in self.atoms[i]]
173 |       string = self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in self.atoms[i]])
174 |       printstring += string + "\n"
175 |     if filename == None:
176 |       print(printstring)
177 |     else:
178 |       f = open(filename,'w')
179 |       f.write(printstring)
180 |       f.close()
181 | 
182 |   def return_params(self):
183 |     atoms = ''
184 |     for i in self.atoms:
185 |       tmp = [round(x,5) for x in self.atoms[i]]
186 |       string = self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in self.atoms[i]])
187 |       atoms += string + "\n"
188 |     cell = ''
189 |     for i in ['a','b','c']:
190 |       string = " ".join([str(round(x,5)) for x in self.lattice[i]])
191 |       cell += string + "\n"
192 |     return atoms,cell
193 | 
194 |   def print(self):
195 |     printstring = "ATOMIC_POSITIONS {angstrom}\n"
196 |     for i in self.atoms:
197 |       tmp = [round(x,5) for x in self.atoms[i]]
198 |       string = self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in self.atoms[i]])
199 |       printstring += string + "\n"
200 |     printstring += "CELL_PARAMETERS {angstrom}\n"
201 |     for i in ['a','b','c']:
202 |       string = " ".join([str(round(x,5)) for x in self.lattice[i]])
203 |       printstring += string + "\n"
204 |     return printstring
205 | 
206 |   def XML_Process(self,dirstring): #Need to talk to Alexie About this, does not store total energy
207 |     try:
208 |       f = open(dirstring + "/data-file.xml")
209 |     except:
210 |       raise ValueError("Cannot open " +  dirstring + '/data-file.xml')
211 |     tree = ElementTree.parse(f)
212 |     f.close()
213 |     MAP = {'a':'a1','b':'a2','c':'a3'}
214 |     try:
215 |       self.version = tree.find('./HEADER/CREATOR').attrib['VERSION']
216 |       self.nat = int(tree.find('./IONS/NUMBER_OF_SPECIES').text)
217 |       self.atomindex.reset()
218 |       self.Exch = tree.find('./EXCHANGE_CORRELATION/DFT').text.rstrip().lstrip()
219 |       self.Nelec = float(tree.find('./BAND_STRUCTURE_INFO/NUMBER_OF_ELECTRONS').text.strip())
220 |       self.kpts = float(tree.find('./BAND_STRUCTURE_INFO/NUMBER_OF_K-POINTS').text)
221 |       self.fermi = 27.2114*float(tree.find('./BAND_STRUCTURE_INFO/FERMI_ENERGY').text)
222 |       self.alat = self.BOHRtoA*float(tree.find('./CELL/LATTICE_PARAMETER').text)
223 |       self.Ecut = 27.2114*float(tree.find('./PLANE_WAVES/WFC_CUTOFF').text)
224 |       self.RhoCut = 27.2114*float(tree.find('./PLANE_WAVES/WFC_CUTOFF').text)
225 |       #self.beta = 0.0
226 |     except:
227 |       self.version = None
228 |       self.natoms = None
229 |       self.nat = None
230 |       self.atomindex.reset()
231 |       self.Exch = None
232 |       self.Nelec = None
233 |       self.kpts = None
234 |       self.fermi = None
235 |       self.alat = None
236 |       self.Ecut = None
237 |       self.RhoCut = None
238 |     try:
239 |       self.energy = float(tree.find('./TOTAL_ENERGY').text)
240 |     except:
241 |       self.energy = None
242 |     for i in ['a','b','c']:
243 |       try:
244 |         for c,j in enumerate(tree.find('./CELL/DIRECT_LATTICE_VECTORS/' + MAP[i]).text.split()):
245 |           self.lattice[i][c] = self.BOHRtoA*float(j)
246 |       except:
247 |         for c,j in enumerate(tree.find('./DIRECT_LATTICE_VECTORS/' + MAP[i]).text.split()):
248 |           self.lattice[i][c] = self.BOHRtoA*float(j)
249 |     self.natoms = int(tree.find('./IONS/NUMBER_OF_ATOMS').text)
250 |     for i in range(0,self.natoms):
251 |       tmp = tree.find('./IONS/ATOM.' + str(i + 1)).attrib
252 |       species = tmp['SPECIES']
253 |       array = [ self.BOHRtoA*float(j) for j in tmp['tau'].split()]
254 |       self.atoms[self.atomindex.key(species)] = np.array([array[0],array[1],array[2]])
255 |     self.Normalize()
256 |     #test = tree.find('./IONS/ATOM.1')
257 |     #print(test)
258 | 
259 |   def to_Latex(self,caption=None,ncols=1):
260 |     self.Normalize()
261 |     if caption is None: caption = self.to_Formula()
262 |     t = '''\\begin{table}[!ht]
263 | \centering
264 | \caption{CAPTION}
265 | %\label{my-label}
266 | \\bigskip
267 | \\begin{tabular}{lr}
268 | \hline
269 |  Parameter & Value  \\\\
270 | \hline
271 |  PARAMS
272 | \hline
273 | \end{tabular}
274 | \end{table}'''
275 |     params = ''
276 |     for c,i in enumerate(['a','b','c']):
277 |       params += i.upper() + ' & ' + str(round(self.norms[i],2)) + ' \\\\' + '\n'
278 |     for i in ['alpha','beta','gamma']:
279 |       params += '$\\' + i + '$ & ' + str(round(self.angles[i],2)) + ' \\\\' + '\n'
280 |     print(t.replace('PARAMS',params).replace('CAPTION',caption + ' unit cell parameters. Length units are in \\r{A}  and angles are in degrees.'))
281 |     header = '& '.join(['Atom & X & Y & Z']*ncols)
282 |     t1 = '''\\begin{table}[!ht]
283 | \centering
284 | \caption{CAPTION}
285 | \\bigskip
286 | %\label{my-label}
287 | \\begin{tabular}{''' + '|'.join(['lrrr']*ncols) + '''}
288 | \hline
289 |  ''' + header + '''\\\\
290 | \hline
291 |  PARAMS
292 | \hline
293 | \end{tabular}
294 | \end{table}'''
295 |     params_1 = ''
296 |     conversion = np.linalg.inv(self.From_Crystal())
297 |     print()
298 |     atom_keys = list(self.atoms.keys())
299 |     #for i in self.atoms:
300 |     for c in range(0,len(atom_keys) - ncols - 1,ncols):
301 |       i = atom_keys[c]
302 |       lne = ''
303 |       for j in range(0,ncols):
304 |         try:
305 |           tmp = np.dot(conversion,self.atoms[atom_keys[c + j]])
306 |           lne += self.atomindex.sanitize(atom_keys[c+j]) + " & " + ' & '.join([str(round(j,3)) for j in tmp]) + ' '
307 |           if (ncols > 1) & j < ncols -1: lne += '&'
308 |         except:
309 |           lne += ' & & &'
310 |       #params_1 += self.atomindex.sanitize(i) + " & " + ' & '.join([str(round(j,3)) for j in tmp]) + '\\\\ \n'
311 |       params_1 += lne + '\\\\ \n'
312 |     print(t1.replace('PARAMS',params_1).replace('CAPTION',caption + ' atomic positions. Here positions are shown in fractional coordinates'))
313 | 
314 |   def File_Process(self,filestring):
315 |     try:
316 |       f = open(filestring,'r')
317 |     except:
318 |       print("Cannot open %s" % filestring)
319 |       #sys.exit(1)
320 |       raise IOError
321 |     linenum = 0
322 |     for i in f:
323 |       i = i.lower()
324 |       #if "ERROR" in i.upper():
325 |         #print("There is an error in this calculation")
326 |         #sys.exit(2)
327 |       if linenum < 1000:
328 |         if 'nat' in i.lower():
329 |           self.natoms = int(''.join(zz for zz in i.strip() if zz.isdigit()))
330 |         if "lattice parameter (a_0)" in i:
331 |           self.alat = float(i.split()[5])
332 |         if "number of k points=" in i:
333 |           self.kpts = int(i.split()[4])
334 |           next
335 |         if "Program PWSCF" in i:
336 |           self.version = i.split()[2].replace('v.','')
337 |           next
338 |         if "lattice parameter (alat)" in i:
339 |           self.alat = float(i.split()[4])*self.BOHRtoA
340 |           next
341 |         if "number of Kohn-Sham states" in i:
342 |           self.bands = int(i.split()[4]) 
343 |         if "unit-cell volume" in i and "new" not in i:
344 |           self.volume = float(i.split()[3])*(self.BOHRtoA**3.0)
345 |           next
346 |         if "number of atoms/cell" in i:
347 |           self.natoms = int(i.split()[4])
348 |           next
349 |         if "number of atomic types" in i:
350 |           self.nat = int(i.split()[5])
351 |           next
352 |         if "number of electrons" in i:
353 |           self.nelect = float(i.split()[4])
354 |           next
355 |         if "kinetic-energy cutoff" in i:
356 |           self.Ecut = float(i.split()[3])*self.RYtoeV
357 |           next
358 |         if "charge density cutoff" in i:
359 |           self.RhoCut = float(i.split()[4])*self.RYtoeV
360 |           next
361 |         if "convergence threshold" in i:
362 |           if len(i.split()) < 4:
363 |             self.Econv = float(i.split()[3])
364 |             next
365 |         if "Exchange-correlation" in i:
366 |           self.Exch = i[i.find('=') + 1:i.find('(')].rstrip()
367 |           next
368 |         if "a(1) =" in i:
369 |           tmp = i.replace('a(1)','').replace('(','').replace('=','').replace(',','').replace(')','').split()
370 |           for j in range(0,3):
371 |             self.lattice['a'][j] = self.alat*float(tmp[j])
372 |           next
373 |         if "a(2) =" in i:
374 |           tmp = i.replace('a(2)','').replace('(','').replace('=','').replace(',','').replace(')','').split()
375 |           for j in range(0,3):
376 |             self.lattice['b'][j] = self.alat*float(tmp[j])
377 |           next
378 |         if "a(3) =" in i:
379 |           tmp = i.replace('a(3)','').replace('(','').replace('=','').replace(',','').replace(')','').split()
380 |           for j in range(0,3):
381 |             self.lattice['c'][j] = self.alat*float(tmp[j])
382 |           next
383 |         if "site n.     atom                  positions (alat units)" in i:
384 |           self.atomindex.reset()
385 |           for j in range(0,self.natoms):
386 |             line = next(f).split()
387 |             self.atoms[self.atomindex.key(line[1])] = np.multiply(np.array([float(line[6]),float(line[7]),float(line[8])]),self.alat)
388 |           next
389 |         if 'nat' in i.lower():
390 |           self.natoms = int(''.join([zz for zz in i if zz.isdigit()]))
391 |           next
392 |       if "!" in i and "ENERGY" in i.upper():
393 |         self.energy= float(i.split()[4])*self.RYtoeV
394 |       if "new unit-cell volume" in i:
395 |         self.volume = float(i.split()[4])*(self.BOHRtoA**3)
396 |       if "cell_parameters" in i:
397 |         if "angstrom" in i:
398 |           for j in ['a','b','c']:
399 |             line = next(f)
400 |             tmp = line.split()
401 |             for k in range(0,3):
402 |               self.lattice[j][k] = float(tmp[k])
403 |         else:
404 |           for j in ['a','b','c']:
405 |             line = next(f)
406 |             tmp = line.split()
407 |             for k in range(0,3):
408 |               self.lattice[j][k] = self.alat*float(tmp[k])
409 |         self.Normalize()
410 |       if "ATOMIC_POSITIONS" in i.upper():
411 |         self.atomindex.reset()
412 |         if "angstrom" in i:
413 |           for j in range(0,self.natoms):
414 |             line = next(f).split()
415 |             self.atoms[self.atomindex.key(line[0])] = np.array([float(line[1]),float(line[2]),float(line[3])])
416 |         if "alat" in i:
417 |           for j in range(0,self.natoms):
418 |             line = next(f).split()
419 |             self.atoms[self.atomindex.key(line[0])] = np.array([self.alat*float(line[1]),self.alat*float(line[2]),self.alat*float(line[3])])
420 |         if "crystal" in i.lower():
421 |           conversion = self.From_Crystal()
422 |           for j in range(0,self.natoms):
423 |             line = next(f).split()
424 |             tmp = np.transpose(np.array([float(line[1]),float(line[2]),float(line[3])]))
425 |             ncoords = np.dot(conversion,tmp)
426 |             self.atoms[self.atomindex.key(line[0])] = np.array([float(ncoords[0]),float(ncoords[1]),float(ncoords[2])])
427 |       if "End of self-consistent calculation" in i:
428 |         if np.floor(self.bands/8.)*8. <= self.bands:
429 |           numlines = int(np.floor(self.bands/8.) + 1)
430 |           remainder = int(self.bands - np.floor(self.bands/8.)*8.)
431 |         else: 
432 |           numlines = int(np.floor(self.bands/8.))
433 |           remainder = 0
434 |         self.bnddiagram = np.zeros((self.kpts,self.bands))
435 |         counter = 0
436 |         self.noband = False
437 |         while counter < self.kpts:
438 |           line = next(f)
439 |           if "Number of k-points >=" in line: 
440 |             self.noband = True
441 |             break
442 |           if "k =" in line:
443 |             line = next(f)
444 |             counter1 = 0
445 |             for j in range(0,numlines):
446 |               line = next(f)
447 |               '''
448 |               for k in range(0,len(line.split())):
449 |                 self.bnddiagram[counter][counter1 + k] = float(line.split()[k])
450 |               '''
451 |               counter1 += 8
452 |             counter += 1  
453 |         next
454 |       if "highest occupied, lowest unoccupied level (ev)" in i:
455 |         self.bandgap = float(i.split()[7]) - float(i.split()[6])
456 |         next
457 |       if "the Fermi energy is" in i:
458 |         self.Fermi = float(i.split()[4])
459 |         self.FermiTest = True
460 |         next
461 |       linenum += 1
462 |     f.close()
463 |     self.Normalize()
464 |     self.noband = True
465 |     if self.FermiTest == True and self.noband == False:
466 |       self.bnddiagram = np.subtract(self.bnddiagram,self.Fermi)
467 |       emin = np.zeros(self.kpts)
468 |       emax = np.zeros(self.kpts)
469 |       counter = 0
470 |       for j in self.bnddiagram:
471 |         emin[counter] = j[np.searchsorted(j,  0.0,side='right')-1]
472 |         emax[counter] = j[np.searchsorted(j,  0.0,side='right')]
473 |         counter += 1
474 |       self.bandgap = float(np.min(emax-emin))
475 | 
476 |   def to_JSON(self):
477 |     if self.JSON == "":
478 |       for i in self.lattice:
479 |         self.lattice[i] = self.lattice[i].tolist() 
480 |       self.bnddiagram = self.bnddiagram.tolist()
481 |       self.JSON = json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=1)
482 | 
483 |   def to_Formula(self):
484 |     from math import gcd
485 |     string = ''
486 |     cmmon = []
487 |     for i in self.atomindex.keys:
488 |       cmmon.append(self.atomindex.keys[i] + 1)
489 |     if len(cmmon) == 1:
490 |       div = cmmon[0]
491 |     else: 
492 |       div = cmmon[0]
493 |       for c in cmmon[1::]:
494 |           div = gcd(div , c)
495 |     for i in self.atomindex.keys:
496 |       t_ = str(int((self.atomindex.keys[i]+1)/div))
497 |       if t_ == '1': t_ = ''
498 |       else: t_ = '$_' + t_ + '$'
499 |       string += i + t_
500 |     return string
501 | 
502 |   def to_database(self):
503 |     con = None
504 |     try:
505 |       con = lite.connect('QE.db')
506 |       cur = con.cursor()    
507 |       insert_command = 'INSERT INTO QE(A,B,C,ALPHA,BETA,GAMMA,VOLUME,NATOMS,FORMULA,BANDGAP,ENERGY,PROGRAM,VERSION) VALUES ('
508 |       for i in('a','b','c'):
509 |         insert_command += str(self.norms[i]) + ','
510 |       for i in('alpha','beta','gamma'):
511 |         insert_command += str(self.angles[i]) + ','
512 |       insert_command += str(self.volume) + ',' +  str(self.natoms) + ',"' + self.to_Formula() + '",' + str(self.bandgap) + ',' + str(self.energy) + ',"QE",' + '"' + self.version + '");'
513 |       cur.execute(insert_command)
514 |       lid = cur.lastrowid
515 |       JSON = self.to_JSON()
516 |       insert_command = "INSERT INTO RAW_DATA VALUES (" + str(lid) + ",'" +  self.JSON + "');"
517 |       cur.execute(insert_command)
518 |       self.to_File(lid)
519 |     except lite.Error:
520 |       print("Unable to insert into the database")
521 |       #sys.exit(3)
522 |       raise IOError
523 |     finally:
524 |       if con:
525 |         con.commit()
526 |         con.close()
527 | 
528 |   def Normalize(self):
529 |     try:
530 |       for i in ['a','b','c']:
531 |         self.norms[i] = np.linalg.norm(self.lattice[i])
532 |       self.angles['alpha'] = np.arccos(np.dot(self.lattice['b'],self.lattice['c'])/(self.norms['c']*self.norms['b'])) * 180./np.pi
533 |       self.angles['gamma'] = np.arccos(np.dot(self.lattice['a'],self.lattice['b'])/(self.norms['a']*self.norms['b'])) * 180./np.pi
534 |       self.angles['beta'] = np.arccos(np.dot(self.lattice['a'],self.lattice['c'])/(self.norms['a']*self.norms['c'])) * 180./np.pi
535 |       self.volume = np.dot(self.lattice['a'],np.cross(self.lattice['b'],self.lattice['c']))
536 |     except:
537 |       print("Lattice undefined")
538 |       raise ValueError
539 |       #sys.exit(4)
540 | 
541 |   def to_File(self,lid):
542 |     with open(str(lid) + '.json','w') as f:
543 |       f.write(self.JSON)
544 |   
545 |   def to_XML(self,fname):
546 |     root = ET.Element("Root") 
547 |     DIRECT = ET.SubElement(root,'DIRECT_LATTICE_VECTORS')
548 |     UNITS = ET.SubElement(DIRECT,'UNITS_FOR_DIRECT_LATTICE_VECTORS')
549 |     UNITS.set('UNITS',"Bohr")
550 |     lattice = {}
551 |     convert = {'a1':'a','a2':'b','a3':'c'}
552 |     for i in range(1,4):
553 |       key = 'a' + str(i)
554 |       lattice[key] = ET.SubElement(DIRECT,key)
555 |       lattice[key].set('type','real')
556 |       lattice[key].set('size','3')
557 |       lattice[key].set('columns','3')
558 |       text = ' '.join([str(1.88973*x) for x in self.lattice[convert[key]]]) 
559 |       lattice[key].text = text
560 |     IONS = ET.SubElement(root,'IONS')
561 |     NA = ET.SubElement(IONS,'NUMBER_OF_ATOMS')
562 |     NA.set('type','integer')
563 |     NA.set('size','1')
564 |     NA.text=str(self.natoms)
565 |     NA = ET.SubElement(IONS,'NUMBER_OF_SPECIES')
566 |     NA.set('type','integer')
567 |     NA.set('size','1')
568 |     NA.text = str(self.nat)
569 |     NA = ET.SubElement(IONS,'UNITS_FOR_ATOMIC_POSITIONS')
570 |     NA.set('UNITS','bohr')
571 |     index = 1
572 |     tun = {}
573 |     counter = 1
574 |     for i in self.atoms:
575 |       if self.atomindex.sanitize(i) not in tun:
576 |         tun[self.atomindex.sanitize(i)] = str(index)
577 |         index += 1
578 |       ATOM = ET.SubElement(IONS,'ATOM.' + str(counter))
579 |       ATOM.set('SPECIES',self.atomindex.sanitize(i) + " ")
580 |       ATOM.set('INDEX',tun[self.atomindex.sanitize(i)])
581 |       text = ' '.join([str(x*1.88973) for x in self.atoms[i]])
582 |       ATOM.set('tau',text)
583 |       ATOM.set('if_pos',"1 1 1")
584 |       counter += 1
585 |     TE = ET.SubElement(root,'TOTAL_ENERGY')
586 |     TE.set('UNITS','eV')
587 |     TE.text = str(self.energy)
588 |     f = ET.ElementTree(root)
589 |     f.write(fname + '.xml',pretty_print=True)
590 | 
591 |   def to_Supercell(self,array,symm=False):
592 |     if isinstance(array,list):
593 |       tmp = copy.deepcopy(self)
594 |       conversion = np.linalg.inv(tmp.From_Crystal())
595 |       for i in tmp.atoms:
596 |         dot = np.dot(conversion,tmp.atoms[i])
597 |         tmp.atoms[i] = dot
598 |       COORDs = copy.copy(tmp.atoms)
599 |       if symm:
600 |         r1 = range(-array[0],array[0] + 1)
601 |         r2 = range(-array[1],array[1] + 1)
602 |         r3 = range(-array[2],array[2] + 1)
603 |       else:
604 |         r1 = range(0,array[0])
605 |         r2 = range(0,array[1])
606 |         r3 = range(0,array[2])
607 |       for i in tmp.atoms:
608 |         for j in r1:
609 |           for k in r2:
610 |             for z in r3:
611 |               if (j == 0) and (k == 0) and (z == 0): #We already have the (0,0,0) structure
612 |                 next
613 |               else:
614 |                 COORDs[tmp.atomindex.key(tmp.atomindex.sanitize(i))] = np.add(tmp.atoms[i],np.array([j,k,z]))
615 |       for i in COORDs:
616 |         COORDs[i] = np.dot(tmp.From_Crystal(),COORDs[i])
617 |       tmpmap = {'a':0,'b':1,'c':2}
618 |       array = [float(xx) for xx in array]
619 |       for i in tmp.lattice:
620 |         #tmp.lattice[i] = np.array([tmp.lattice[i][0]*float(array[tmpmap[i]]), tmp.lattice[i][1]*float(array[tmpmap[i]]), tmp.lattice[i][2]*float(array[tmpmap[i]])])
621 |         tmp.lattice[i] = tmp.lattice[i]*array[tmpmap[i]]
622 |       tmp.atoms = COORDs
623 |       tmp.natoms = tmp.natoms*(array[0]*array[1]*array[2])
624 |       indexing = ['a','b','c']
625 |       for i in indexing:
626 |         tmp.norms[i] *= array[indexing.index(i)]
627 |       tmp.volume = tmp.volume*(array[0]*array[1]*array[2]) 
628 |       tmp.energy = tmp.energy*(array[0]*array[1]*array[2])
629 |       return tmp
630 |     else:
631 |       print("Invalid supercell dimensions")
632 |   
633 |   def __str__(self):
634 |     return self.print()
635 |   
636 |   def __eq__(self,other):
637 |     if type(other) == type(self):
638 |       diff = []
639 |       keys_ = []
640 |       l = sorted(self.atoms.items(),key=lambda x: (x[1][0],x[1][1],x[1][2]))
641 |       r = sorted(other.atoms.items(),key=lambda x: (x[1][0],x[1][1],x[1][2]))
642 |       for c,i in enumerate(l):
643 |         diff.append(np.linalg.norm(i[1] - r[c][1]))
644 |         keys_.append(self.atomindex.sanitize(i[0]) == self.atomindex.sanitize(r[c][0]))
645 |       m = np.max(np.abs(diff))
646 |       if m > 1e-3: return False
647 |       if False in keys_: return False
648 |       diff = []
649 |       for c,i in enumerate(self.lattice): 
650 |         diff.append(np.linalg.norm(self.lattice[i] - other.lattice[i]))
651 |       m = np.max(np.abs(diff))
652 |       if m > 1e-3: return False
653 |     else: 
654 |       return False
655 |     return True
656 | 
657 | def main(command):
658 |   test = Struct()
659 |   test.email = command[1]
660 |   if os.path.isfile(command[0]):
661 |     test.File_Process(command[0])
662 |   elif os.path.isdir(command[0]):
663 |     print("Is Dir//process xml here")
664 |     test.XML_Process(command[0])
665 |     print(test.lattice)
666 |   if "@" not in test.email:
667 |     print("Invalid Email Supplied")
668 |     #sys.exit(5)
669 |     raise ValueError
670 |   test.to_database()
671 |   test = None
672 | 
673 | if __name__ == "__main__":
674 |   if len(sys.argv) != 3:
675 |     print("Incorrect number of arguments, run as ./QE.py QEOUTPUT_FILE EMAIL")
676 |     sys.exit(6)
677 |   command = [sys.argv[1],sys.argv[2]]
678 |   
679 |   main(command)  
680 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This is a simple repository to keep track of changes across multiple servers
2 | 
3 | This code is not for production, but purely research purposes and, as such, is not commented for wide use
4 | 


--------------------------------------------------------------------------------
/bandgapoccu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import numpy as np
 3 | import sys
 4 | 
 5 | vbm = None
 6 | cbm = None
 7 | bg = None
 8 | class bg:
 9 |   def __init__( self, direct):
10 |     self.vbm = None
11 |     self.cbm = None
12 |     self.bg = None
13 |     self.Fermi = None
14 |     self.occu = None
15 |     self.energy = None
16 |     self.metallic = False
17 |     self.get_bandgap(direct)
18 |   
19 |   def __call__ ( self,direct):
20 |     self.get_bandgap(direct)
21 | 
22 |   def __iter__(self):
23 |     yield 'gap', self.bg
24 |     yield 'cbm', self.cbm
25 |     yield 'vbm', self.vbm
26 |   
27 |   def get_bandgap(self,direct):
28 |     self.occu = []
29 |     self.energy = []
30 |     f = open(direct)
31 | 
32 |     for i in f:
33 |       if "End of self-consistent" in i:
34 |         self.occu = []
35 |         self.energy = []
36 |       if ' k =' in i:
37 |         data = []
38 |         i = next(f)
39 |         i = next(f)
40 |         while len(i.split()) > 0:
41 |           for jj in i.split():
42 |             try:
43 |               data.append(float(jj))
44 |             except ValueError:
45 |               jjt = [xx for xx in jj.split('-') if xx]
46 |               for xx in jjt:
47 |                 data.append(-float(xx))
48 |           i = next(f)
49 |         self.energy.append(data)
50 |       if 'occupation numbers' in i:
51 |         data = []
52 |         while len(i.split()) > 0:
53 |           i = next(f)
54 |           for jj in i.split():
55 |             data.append(float(jj.strip()))
56 |         self.occu.append(data)
57 |       if 'Fermi' in i:
58 |         self.Fermi = float(i.split()[4])
59 |     f.close() 
60 |     minmax = []
61 |     metallic = False
62 |     if len(self.occu) > 0:
63 |       for c,i in enumerate(self.occu):
64 |         t = np.array(i)
65 |         if 0.0 in t:
66 |           idx = np.where(t==0.0)[0][0]
67 |           te = np.array(self.energy[c])
68 |           minmax.append([te[idx-1],te[idx]])
69 |         else:
70 |           metallic = True 
71 |       if not metallic:
72 |         minmax = np.array(minmax)
73 |         self.vbm = np.max(minmax[:,0])
74 |         self.cbm = np.min(minmax[:,1])
75 |         self.bg = self.cbm - self.vbm
76 |       else:
77 |         self.vbm = None
78 |         self.cbm = None
79 |         self.metallic = True
80 |         self.bg = 0.0
81 |     else:
82 |       minmax = []
83 |       for c,i in enumerate(self.energy):
84 |         t = np.array(i)
85 |         t = np.subtract(i,self.Fermi)
86 |         m_ = np.argwhere(t>0)
87 |         minmax.append([t[m_[0][0]-1],t[m_[0][0]]]) 
88 |       minmax = np.array(minmax)
89 |       self.vbm = np.max(minmax[:,0])
90 |       self.cbm = np.min(minmax[:,1])
91 |       self.bg = self.cbm - self.vbm
92 | 
93 | if __name__ == "__main__":
94 |   import sys
95 |   x = bg(sys.argv[1])
96 |   print(x.bg)
97 | 


--------------------------------------------------------------------------------
/color.key:
--------------------------------------------------------------------------------
  1 | H	#FFFFFF
  2 | He	#D9FFFF
  3 | Li	#CC80FF
  4 | Be	#C2FF00
  5 | B	#FFB5B5
  6 | C	#909090
  7 | N	#3050F8
  8 | O	#FF0D0D
  9 | O2	#FFAE00
 10 | F	#90E050
 11 | Ne	#B3E3F5
 12 | Na	#AB5CF2
 13 | Mg	#8AFF00
 14 | Al	#BFA6A6
 15 | Si	#F0C8A0
 16 | P	#FF8000
 17 | S	#FFFF30
 18 | Cl	#1FF01F
 19 | Ar	#80D1E3
 20 | K	#8F40D4
 21 | Ca	#3DFF00
 22 | Sc	#E6E6E6
 23 | Ti	#BFC2C7
 24 | Ti1	#BFC2C7
 25 | Ti2	#BFC2C7
 26 | V	#A6A6AB
 27 | V1	#A6A6AB
 28 | V2	#A6A6AB
 29 | Cr	#8A99C7
 30 | Cr1	#8A99C7
 31 | Cr2	#8A99C7
 32 | Mn	#9C7AC7
 33 | Mn1	#9C7AC7
 34 | Mn2	#9C7AC7
 35 | Fe	#FFA800
 36 | Fe1	#FFA200
 37 | Fe2	#FFD200
 38 | Co	#F090A0
 39 | Co1	#05004C
 40 | Co2	#388786
 41 | Co3	#67CAC9
 42 | Ni	#50D050
 43 | Ni1	#50D050
 44 | Ni2	#50D050
 45 | Cu	#808080
 46 | Cu1	#808080
 47 | Cu2	#606060
 48 | Zn	#7D80B0
 49 | Ga	#C28F8F
 50 | Ge	#668F8F
 51 | As	#BD80E3
 52 | Se	#FFA100
 53 | Br	#A62929
 54 | Kr	#5CB8D1
 55 | Rb	#702EB0
 56 | Sr	#00FF00
 57 | Y	#94FFFF
 58 | Zr	#94E0E0
 59 | Nb	#73C2C9
 60 | Mo	#54B5B5
 61 | Tc	#3B9E9E
 62 | Ru	#248F8F
 63 | Rh	#0A7D8C
 64 | Pd	#006985
 65 | Ag	#C0C0C0
 66 | Cd	#FFD98F
 67 | In	#A67573
 68 | Sn	#668080
 69 | Sb	#9E63B5
 70 | Te	#D47A00
 71 | I	#940094
 72 | Xe	#429EB0
 73 | Cs	#57178F
 74 | Ba	#00C900
 75 | La	#70D4FF
 76 | Ce	#FFFFC7
 77 | Pr	#D9FFC7
 78 | Nd	#C7FFC7
 79 | Pm	#A3FFC7
 80 | Sm	#8FFFC7
 81 | Eu	#61FFC7
 82 | Gd	#45FFC7
 83 | Tb	#30FFC7
 84 | Dy	#1FFFC7
 85 | Ho	#00FF9C
 86 | Er	#00E675
 87 | Tm	#00D452
 88 | Yb	#00BF38
 89 | Lu	#00AB24
 90 | Hf	#4DC2FF
 91 | Ta	#4DA6FF
 92 | W	#2194D6
 93 | Re	#267DAB
 94 | Os	#266696
 95 | Ir	#175487
 96 | Pt	#D0D0E0
 97 | Au	#FFD123
 98 | Hg	#B8B8D0
 99 | Tl	#A6544D
100 | Pb	#575961
101 | Bi	#9E4FB5
102 | Po	#AB5C00
103 | At	#754F45
104 | Rn	#428296
105 | Fr	#420066
106 | Ra	#007D00
107 | Ac	#70ABFA
108 | Th	#00BAFF
109 | Pa	#00A1FF
110 | U	#008FFF
111 | Np	#0080FF
112 | Pu	#006BFF
113 | Am	#545CF2
114 | Cm	#785CE3
115 | Bk	#8A4FE3
116 | Cf	#A136D4
117 | Es	#B31FD4
118 | Fm	#B31FBA
119 | Md	#B30DA6
120 | No	#BD0D87
121 | Lr	#C70066
122 | Rf	#CC0059
123 | Db	#D1004F
124 | Sg	#D90045
125 | Bh	#E00038
126 | Hs	#E6002E
127 | Mt	#EB0026
128 | 


--------------------------------------------------------------------------------
/default.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import json as json
4 | import pickle as pkl
5 | import functions as fun
6 | 


--------------------------------------------------------------------------------
/early_stop.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | from functions import prophet_map as pm
  5 | import re
  6 | import sqlite3
  7 | import pandas as pd
  8 | import numpy as np
  9 | import os
 10 | import pickle as pkl
 11 | from functions import get_network_info as get_net
 12 | 
 13 | def to_pd(d_,train,df=None,f=None):
 14 |   if df is not None:
 15 |     print(d_)
 16 |     for i in d_:
 17 |       df.loc[i,'target'] = d_[i]['target']
 18 |       df.loc[i,'prediction'] = d_[i]['prophet']
 19 |       if 'train' in list(d_[i].keys()):
 20 |         df.loc[i,'train'] = d_[i]['train']
 21 |       else:
 22 |         df.loc[i,'train'] = train
 23 |   else:
 24 |     for i in d_:
 25 |       f.write(','.join([str(zz) for zz in [d_[i]['target'],d_[i]['prophet'],train]]) + '\n')
 26 | 
 27 | def analysis(s):
 28 |   d = []
 29 |   s_ = s.read().split('\n')
 30 |   try:
 31 |     for c,i in enumerate(s_):
 32 |       if 'System         Prediction       Target' in i:
 33 |         c += 2
 34 |         i = s_[c]
 35 |         while len(i.split()) > 0:
 36 |           d.append([float(i.split()[1]),float(i.split()[2])])
 37 |           c += 1
 38 |           i = s_[c]
 39 |         break
 40 |     d = np.array(d)
 41 |     del_ = np.max(np.abs(d[:,0] - d[:,1]))
 42 |     rmse = np.sqrt(np.sum((d[:,0]-d[:,1])**2)/len(d))
 43 |   except:
 44 |     print(s_)
 45 |     raise "error with PROPhet"
 46 |   return del_,rmse
 47 | 
 48 | def convert(s,include,chkpoint):
 49 |   f = open(s)
 50 |   inc = False
 51 |   chk = False
 52 |   val_f = ""
 53 |   for i in f:
 54 |     if 'checkpoint_in' in i.lower():
 55 |       val_f += 'checkpoint_in = ' + chkpoint + '\n'
 56 |       chk = True
 57 |     elif 'include' in i.lower():
 58 |       val_f += 'include = ' + include + '\n'
 59 |       inc = True
 60 |     else:
 61 |       val_f += i
 62 |   if not inc:
 63 |     val_f += 'include = ' + include + '\n'
 64 |   if not chk:
 65 |     val_f += 'checkpoint_in = ' + chkpoint + '\n'
 66 |   f.close()
 67 |   return val_f
 68 | 
 69 | def get_restart(s):
 70 |   f = open(s)
 71 |   nsave = None 
 72 |   checkpoint = None
 73 |   nint = None 
 74 |   for i in f:
 75 |     if i[0] == '#': continue
 76 |     if 'nsave' in i.lower():
 77 |       nsave = i[i.find('=') + 1:].strip()
 78 |     if 'checkpoint_out' in i.lower():
 79 |       checkpoint = i[i.find('=') + 1:].strip()
 80 |     if 'niterations' in i.lower():
 81 |       nint = i[i.find('=') + 1:].strip()
 82 |   f.close()
 83 |   return nsave,checkpoint,nint
 84 | 
 85 | def process(fname,bout=None,df=None,executable='PROPhet',np=32,db=None,d=None):
 86 |   if bout is not None:
 87 |     if d is None: 
 88 |       t = []
 89 |       f = open(bout)
 90 |       for i in f:
 91 |         if 'Iteration   ' in i:
 92 |           i = next(f)
 93 |           i = next(f)
 94 |           while len(i.split()) == 4:
 95 |             try:
 96 |               t.append(i.split())
 97 |               i = next(f)
 98 |             except: break
 99 |       f.close()
100 |       t = sorted(t,key= lambda x: float(x[2]))
101 |       d = [(int(t[0][0]),)]
102 |     nsave,checkpoint,nint = get_restart(fname)
103 |     print(nsave,checkpoint,nint)
104 |     nsave = int(nsave)
105 |     c = int(d[0][0])
106 |     correct = round((c/nsave)+1)*nsave
107 |     valf = convert(fname,'train.dat','FILE')
108 |     if not os.path.isfile(checkpoint + '_' + str(correct)): 
109 |       if os.path.isfile(checkpoint + '_' +  str(int(correct) - int(nsave))):
110 |         correct = correct - nsave
111 |         chkpoint = checkpoint + '_' + str(correct)
112 |       elif int(correct) - int(nsave) == int(nint):
113 |         chkpoint = checkpoint
114 |       else:  
115 |         raise ValueError(correct)
116 |     else: chkpoint = checkpoint + '_' + str(correct)
117 |     f = open('val_temp','w')
118 |     f.write(valf.replace('FILE',chkpoint))
119 |     f.close()
120 |     #t = os.popen('mpirun -np {np} {prop} -in val_temp -validate | tee train.dat.out'.format(prop=executable,np=np))
121 |     t = os.popen('mpirun -np 32 PROPhet -in val_temp -validate | tee train.dat.out'.format(prop=executable,np=np)).read()
122 |     #print(t)
123 |     #funct = open(checkpoint + '_' + str(correct)).read()
124 |     funct = open(chkpoint).read()
125 |     t_file = ['train.dat']
126 |     to_pkl(db=db,df=df,t_file=t_file,funct=funct)
127 |     return
128 |   np = str(np)
129 |   nsave,checkpoint,nint = get_restart(fname)
130 |   valf = convert(fname,'val.dat','FILE')
131 |   len_ = len(open('val.dat').read().split('\n')[:-1])
132 |   np = str(len_) if len_ < 32 else str(32)
133 |   if d is None:
134 |     out = open('earlystop.out','w')
135 |     d = []
136 |     out.write('step,rmse,max\n')
137 |     for i in range(100,int(nint),int(nsave)):
138 |       if not os.path.isfile(checkpoint + '_' + str(i)):
139 |         break
140 |       f = open('val_temp','w')
141 |       f.write(valf.replace('FILE',checkpoint + '_' + str(i)))
142 |       f.close()
143 |       t = os.popen('mpirun -np {np} {prop} -in val_temp -validate'.format(prop=executable,np=np))
144 |       del_,rmse = analysis(t)
145 |       d.append((i,rmse,del_))
146 |       print(i,rmse,del_)
147 |     d = sorted(d,key=lambda x: x[1])
148 |     for i in d:
149 |       out.write(','.join([str(zz) for zz in i]) + '\n')
150 |     out.close()
151 |   f = open('val_temp','w')
152 |   f.write(valf.replace('FILE',checkpoint + '_' + str(d[0][0])))
153 |   f.close()
154 |   t = os.popen('mpirun -np {np} {prop} -in val_temp -validate > val.dat.out'.format(prop=executable,np=np)).read()
155 |   f = open('train_temp','w')
156 |   f.write(valf.replace('FILE',checkpoint + '_' + str(d[0][0])).replace('val.dat','train.dat'))
157 |   f.close()
158 |   t = os.popen('mpirun -np {np} {prop} -in train_temp -validate > train.dat.out'.format(prop=executable,np=np)).read()
159 |   f = open('test_temp','w')
160 |   f.write(valf.replace('FILE',checkpoint + '_' + str(d[0][0])).replace('val.dat','test.dat'))
161 |   f.close()
162 |   t = os.popen('mpirun -np {np} {prop} -in test_temp -validate > test.dat.out'.format(prop=executable,np=np)).read()
163 |   t_file = ['train.dat','val.dat','test.dat']
164 |   #t_out = ['train.dat.out','val.dat.out','test.dat.out']
165 |   #flag = ['train','val','test']
166 |   funct = open(checkpoint + '_' + str(d[0][0])).read()
167 |   to_pkl(db=db,df=df,t_file=t_file,funct=funct)
168 | 
169 | def to_pkl(db=None,fname='bfgs_file',df=None,t_file=['train.dat'],f=None,funct=None):
170 |   if df is not None:
171 |     for c,i in enumerate(t_file):
172 |       t = pm(i + '.out',i)
173 |       to_pd(t,i.replace('.dat',''),df=df)
174 |     df = df.dropna()
175 |     t = get_net(fname='bfgs_file')
176 |     if db is not None:
177 |       F_pkl = pkl.load(open(db,'rb'))
178 |       F_pkl[os.getcwd()] = {'description':t,'df':df.T.to_dict(),'functional':funct} #storing the dataframe as dict for version control
179 |       pkl.dump(F_pkl,open(db,'wb'))
180 |     df.to_csv('data.csv')
181 |   else:
182 |     f.write('target,prediction,train\n')
183 |     to_pd(t,'train',f=f)
184 |     to_pd(v,'train',f=f)
185 | 
186 | def construct_df(j):
187 |   if j is not None:
188 |     _ = pd.read_json(j)
189 |     _.set_index('location',inplace=True)
190 |     _['target'] = None
191 |     _['prediction'] = None
192 |     _['train'] = None
193 |     return _
194 |   else:
195 |     raise ValueError('json file does not exist',j)
196 | 
197 | def split_val(df,val_file='val.dat'):
198 |   t = open('val.dat').split('\n')[:-1]
199 |   d = df.ix[t]
200 |   val_temp = pd.DataFrame()
201 |   test_temp = pd.DataFrame()
202 |   for i in d.phase.unique():
203 |     for j in d[d.phase == i].dopant.unique():
204 |       v_t = d[(d.phase == i) & (d.dopant == j)]
205 |       vt = v_t.sample(frac=0.5)
206 |       tt = v_t.drop(vt.index)
207 |       val_temp = val_temp.append(vt)
208 |       test_temp = test_temp.append(tt)
209 |   f = open('val.dat','w')
210 |   for i in val_temp.index:
211 |     f.write(i + '\n')
212 |   f.close()
213 |   f = open('test.dat','w')
214 |   for i in test_temp.index:
215 |     f.write(i + '\n')
216 |   f.close()
217 |   
218 | if __name__ == "__main__":
219 |   #df = construct_df('/data/llentz/Charge-Density/no_Phosphate/data/all.json')
220 |   #d = process('bfgs_file',df=df,executable='PROPhet',db='/data/llentz/codeplayground/data/Database.pkl')
221 |   df = construct_df('/data/llentz/Charge-Density/HSE/data/all.hse.json')
222 |   d = process('bfgs_file',df=df,executable='PROPhet',db='/data/llentz/Charge-Density/HSE/data/database.hse.pkl',bout='train.bfgs')
223 | 


--------------------------------------------------------------------------------
/extractor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import glob
 4 | import QE
 5 | from bandgapoccu import bg as BG
 6 | import os
 7 | import json
 8 | import re
 9 | 
10 | def separate_aimd(fname='scf.out',tempfile='OUT.out',jfile='data.json'):
11 |   """This takes a AIMD run from QE and separates the data into jfile
12 | run as separate_aimd(fname=QE out file, tempfile = Temporary file used for parsing, jfile = json file for output
13 |   """
14 |   temp = [zz.split()[2] for zz in open(fname).read().split('\n') if 'temperature' in zz]
15 |   temp = [float(zz) for zz in temp if re.match('\d+.\d+$',zz)]
16 |   scf = open(fname,'r')
17 | 
18 |   header = ''
19 | 
20 |   for i in scf:
21 |     if 'PseudoPot' in i: break
22 |     header += i
23 | 
24 | 
25 |   data = []
26 |   rest = header + '\n\n'
27 |   tmp = ''
28 |   tmp += rest
29 |   cnt = 1
30 |   base = QE.Struct()
31 |   try:
32 |     for i in scf:
33 |       while '!    total energy' not in i:
34 |         i = next(scf)
35 |         tmp += i
36 |       #data[cnt] = {}
37 |       t_ = {}
38 |       OUT = tempfile
39 |       output = open(OUT,'w')
40 |       output.write(tmp)
41 |       output.close()
42 |       x = QE.Struct()
43 |       x.File_Process(OUT)
44 |       atoms,cell = x.return_params()
45 |       if cnt == 1: base.File_Process(OUT)
46 |       displ = {}
47 |       for zz in base.atoms:
48 |         displ[zz] = list(base.atoms[zz]-x.atoms[zz])
49 |       t_['str'] = x.print()
50 |       t_['atoms'] = atoms
51 |       t_['cell'] = cell
52 |       t_['displ'] = displ 
53 |       t_['energy'] = x.energy
54 |       del x
55 |       Gap = BG(OUT)
56 |       t_['gap'] = Gap.bg
57 |       try:
58 |         t_['temp'] = temp[cnt - 1] #[zz for zz in open(str(cnt) + '.out').read().split('\n') if 'temperature' in zz][-1].split()[2]
59 |       except:
60 |         t_['temp'] = 'NA'
61 |       cnt += 1
62 |       tmp = rest
63 |       data.append(t_)
64 |       with open(jfile, 'w') as outfile:
65 |           json.dump(data, outfile)
66 |       i = next(scf)
67 |       continue
68 |     scf.close()
69 |   except StopIteration:
70 |     scf.close()
71 |     with open(jfile, 'w') as outfile:
72 |         json.dump(data, outfile)
73 | 
74 | if __name__ == '__main__':
75 |   separate_aimd(fname=sys.argv[1])
76 | 
77 | 


--------------------------------------------------------------------------------
/functions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import pandas as pd
  4 | import json
  5 | import copy
  6 | 
  7 | def get_pdos(key,directory = '.'):
  8 |   d = []
  9 |   cnt = []
 10 |   files = glob.glob(directory + '/' + '*(' + key + ')*')
 11 |   for i in files:
 12 |     lb = i.find('#') + 1
 13 |     rb = i.find('(',lb)
 14 |     if i[lb:rb] not in cnt: cnt.append(i[lb:rb])
 15 |   for i in [files[0]]:
 16 |     f = open(i)
 17 |     next(f)
 18 |     next(f)
 19 |     d_ = []
 20 |     for jj in f:
 21 |       d_.append([float(zz) for zz in [jj.split()[0],jj.split()[1]]])
 22 |     f.close()
 23 |     if len(d) == 0: d = np.array(d_)
 24 |     else:
 25 |       d[:,1] += np.array(d_)[:,1]
 26 |   return d.tolist(),len(cnt)
 27 | 
 28 | def get_files(directory = '.'):
 29 |   files = glob.glob(directory + '/' + '*pdos_atm*')
 30 |   pdos = {}
 31 |   cnt = {}
 32 |   for i in files:
 33 |     lb = i.find('(')
 34 |     rb = i.find(')',lb)
 35 |     key = i[lb+1:rb]
 36 |     if key not in pdos:
 37 |       pdos[key],cnt[key] = get_pdos(key,directory=directory)
 38 |   return pdos,cnt
 39 | 
 40 | def get_fermi(fname='vc-relax.out',directory = '.'):
 41 |   f = open(directory + '/' + fname)
 42 |   fermi = []
 43 |   for i in f:
 44 |     if 'Fermi' in i:
 45 |       fermi.append(float(i.split()[-2]))
 46 |   return fermi
 47 | 
 48 | def prophet_map(pname,tname):
 49 |   '''This is a conversion routine to convert PROPhet out put into a dictionary with the PK being the directory'''
 50 |   try:
 51 |     d_ = open(tname).read().split('\n')[:-1]
 52 |     d = []
 53 |     train = []
 54 |     for i in d_: 
 55 |       if i is not '':
 56 |         i_ = i.split()
 57 |         d.append(i_[0])
 58 |         if len(i_) > 1:
 59 |           train.append(i_[1])
 60 |         else:
 61 |           train.append(i_[0])
 62 |     d_ = d
 63 |   except:
 64 |     print('error opening ',tname)
 65 |     return 0
 66 |   p_ = {}
 67 |   with open(pname,'r') as f:
 68 |     for i in f:
 69 |       while 'System' not in i: 
 70 |         i = next(f)
 71 |       i = next(f)
 72 |       i = next(f)
 73 |       cnt = 0
 74 |       while len(i.split()) > 0:
 75 |         if 'warning' in i.lower(): continue
 76 |         s_ = i.split()
 77 |         t_ = {'prophet':float(s_[1]),'target':float(s_[2]),'train':train[cnt]}
 78 |         p_[d_[cnt]] = t_
 79 |         cnt += 1
 80 |         i = next(f)
 81 |       break
 82 |   return p_
 83 | 
 84 | def subscript(string):
 85 |   t = ''
 86 |   for i in string:
 87 |     if i.isdigit(): t+= '$_' + i + '$'
 88 |     else: t += i
 89 |   return t
 90 | 
 91 | def prophet_list(pname):
 92 |   try:
 93 |     d_ = open(pname)
 94 |     t = []
 95 |     for i in d_:
 96 |       if '-----------------------' in i:
 97 |         i = next(d_)
 98 |         while len(i.split()) > 0:
 99 |           if len(i.split()) == 5:
100 |             t_ = i.split()
101 |             t.append({'prediction':float(t_[1]),'target':float(t_[2]),'natom':int(t_[3]),'train':t_[4]})
102 |           i = next(d_)
103 |     d_.close()
104 |     return t
105 |   except:
106 |     print('error opening ',pname)
107 |     return 0
108 |        
109 | 
110 | def temperature(scf_file,wd):
111 |   '''old routine for AIMD'''
112 |   f = open(scf_file)
113 |   temp = []
114 |   for i in f:
115 |     if 'temperature' in i and len(i.split()) == 4: 
116 |       temp.append(i.split()[2])
117 |   f.close()
118 |   for c,i in enumerate(temp):
119 |     f_ = open(wd + '/' + str(c+1) + '.save/temperature','w')
120 |     f_.write(i)
121 |     f_.close()
122 | 
123 | def get_network_info(fname='bfgs_file'):
124 |   '''extracts relevant network information for PROPhet'''
125 |   f = open(fname).read().split('\n')
126 |   d = {}
127 |   for i in f:
128 |     if 'hidden' in i:
129 |       l = i.find('=')
130 |       d['network'] = i[l+1:len(i)].strip()
131 |     if 'downsample' in i:
132 |       l = i.find('=')
133 |       d['downsample'] = i[l+1:len(i)].strip()
134 |     if 'precondition' in i:
135 |       if '1' in i and '#' not in i: 
136 |         d['precondition'] = True
137 |       elif '#' in i:
138 |         d['precondition'] = False
139 |       else:
140 |         d['precondition'] = False
141 |   return d 
142 | 
143 | def construct_json(*args):
144 |   t = []
145 |   for i in args:
146 |     t_ = json.load(open(i))
147 |     for j in t_: t.append(j)
148 |   return t
149 | 
150 | def rot_dir(theta,dir_ = 'x'):
151 |   t = np.pi*theta/180
152 |   s = np.sin
153 |   c = np.cos
154 |   if dir_=='x':
155 |     return np.array([[1,0,0],[0,c(t),-s(t)],[0,s(t),c(t)]])
156 |   elif dir_=='y':
157 |     return np.array([[c(t),0,s(t)],[0,1,0],[-s(t),0,c(t)]])
158 |   elif dir_ == 'z':
159 |     return np.array([[c(t),-s(t),0],[s(t),c(t),0],[0,0,1]])
160 | 
161 | def rotate_QE(x,theta,dir_='x'):
162 |   y = copy.deepcopy(x)
163 |   rot_matrix = rot_dir(theta,dir_=dir_)
164 |   l = []
165 |   for i in y.lattice:
166 |     l.append(y.lattice[i])
167 |   l = np.transpose(np.array(l))
168 |   l = np.dot(rot_matrix,l)
169 |   for j in y.atoms:
170 |     y.atoms[j] = np.transpose(np.dot(rot_matrix,np.transpose(y.atoms[j])))
171 |   t = ['a','b','c']
172 |   for c,i in enumerate(np.transpose(l)):
173 |     y.lattice[t[c]] = i
174 |   return y
175 | 
176 | def split_df(df,**kwargs):
177 |   '''This will add a column to a dataframe and split it into train, val, and test datasets'''
178 |   if not kwargs:
179 |     kwargs = {'frac':0.80}
180 |   df['train'] = None
181 |   train = df.sample(**kwargs)
182 |   df.loc[train.index,'train'] = 'train'
183 |   rem = df.drop(train.index)
184 |   test = rem.sample(frac=0.50)
185 |   df.loc[test.index,'train'] = 'test'
186 |   df.loc[rem.drop(test.index).index,'train'] = 'val'
187 | 
188 | def upf(stru):
189 |   atom,cell = stru.return_params()
190 |   t = {'H': 'H 1.0079 H.upf', 'He': 'He 4.0026 He.upf', 'Li': 'Li 6.941 Li.upf', 'Be': 'Be 9.0122 Be.upf', 'B': 'B 10.811 B.upf', 'C': 'C 12.0107 C.upf', 'N': 'N 14.0067 N.upf', 'O': 'O 15.9994 O.upf', 'F': 'F 18.9984 F.upf', 'Ne': 'Ne 20.1797 Ne.upf', 'Na': 'Na 22.9897 Na.upf', 'Mg': 'Mg 24.305 Mg.upf', 'Al': 'Al 26.9815 Al.upf', 'Si': 'Si 28.0855 Si.upf', 'P': 'P 30.9738 P.upf', 'S': 'S 32.065 S.upf', 'Cl': 'Cl 35.453 Cl.upf', 'Ar': 'Ar 39.948 Ar.upf', 'K': 'K 39.0983 K.upf', 'Ca': 'Ca 40.078 Ca.upf', 'Sc': 'Sc 44.9559 Sc.upf', 'Ti': 'Ti 47.867 Ti.upf', 'V': 'V 50.9415 V.upf', 'Cr': 'Cr 51.9961 Cr.upf', 'Mn': 'Mn 54.938 Mn.upf', 'Fe': 'Fe 55.845 Fe.upf', 'Co': 'Co 58.9332 Co.upf', 'Ni': 'Ni 58.6934 Ni.upf', 'Cu': 'Cu 63.54600000000001 Cu.upf', 'Zn': 'Zn 65.39 Zn.upf', 'Ga': 'Ga 69.723 Ga.upf', 'Ge': 'Ge 72.64 Ge.upf', 'As': 'As 74.9216 As.upf', 'Se': 'Se 78.96 Se.upf', 'Br': 'Br 79.904 Br.upf', 'Kr': 'Kr 83.8 Kr.upf', 'Rb': 'Rb 85.4678 Rb.upf', 'Sr': 'Sr 87.62 Sr.upf', 'Y': 'Y 88.9059 Y.upf', 'Zr': 'Zr 91.22399999999999 Zr.upf', 'Nb': 'Nb 92.9064 Nb.upf', 'Mo': 'Mo 95.94 Mo.upf', 'Tc': 'Tc 98.0 Tc.upf', 'Ru': 'Ru 101.07 Ru.upf', 'Rh': 'Rh 102.9055 Rh.upf', 'Pd': 'Pd 106.42 Pd.upf', 'Ag': 'Ag 107.8682 Ag.upf', 'Cd': 'Cd 112.411 Cd.upf', 'In': 'In 114.818 In.upf', 'Sn': 'Sn 118.71 Sn.upf', 'Sb': 'Sb 121.76 Sb.upf', 'Te': 'Te 127.6 Te.upf', 'I': 'I 126.9045 I.upf', 'Xe': 'Xe 131.293 Xe.upf', 'Cs': 'Cs 132.9055 Cs.upf', 'Ba': 'Ba 137.327 Ba.upf', 'La': 'La 138.9055 La.upf', 'Ce': 'Ce 140.116 Ce.upf', 'Pr': 'Pr 140.9077 Pr.upf', 'Nd': 'Nd 144.24 Nd.upf', 'Pm': 'Pm 145.0 Pm.upf', 'Sm': 'Sm 150.36 Sm.upf', 'Eu': 'Eu 151.964 Eu.upf', 'Gd': 'Gd 157.25 Gd.upf', 'Tb': 'Tb 158.9253 Tb.upf', 'Dy': 'Dy 162.5 Dy.upf', 'Ho': 'Ho 164.9303 Ho.upf', 'Er': 'Er 167.25900000000001 Er.upf', 'Tm': 'Tm 168.9342 Tm.upf', 'Yb': 'Yb 173.04 Yb.upf', 'Lu': 'Lu 174.967 Lu.upf', 'Hf': 'Hf 178.49 Hf.upf', 'Ta': 'Ta 180.9479 Ta.upf', 'W': 'W 183.84 W.upf', 'Re': 'Re 186.207 Re.upf', 'Os': 'Os 190.23 Os.upf', 'Ir': 'Ir 192.217 Ir.upf', 'Pt': 'Pt 195.078 Pt.upf', 'Au': 'Au 196.9665 Au.upf', 'Hg': 'Hg 200.59 Hg.upf', 'Tl': 'Tl 204.3833 Tl.upf', 'Pb': 'Pb 207.2 Pb.upf', 'Bi': 'Bi 208.9804 Bi.upf', 'Po': 'Po 209.0 Po.upf', 'At': 'At 210.0 At.upf', 'Rn': 'Rn 222.0 Rn.upf', 'Fr': 'Fr 223.0 Fr.upf', 'Ra': 'Ra 226.0 Ra.upf', 'Ac': 'Ac 227.0 Ac.upf', 'Th': 'Th 232.0381 Th.upf', 'Pa': 'Pa 231.0359 Pa.upf', 'U': 'U 238.0289 U.upf', 'Np': 'Np 237.0 Np.upf', 'Pu': 'Pu 244.0 Pu.upf', 'Am': 'Am 243.0 Am.upf'}
191 |   atm = {}
192 |   for i in atom.split('\n')[:-1]:
193 |     _ = i.split()[0]
194 |     if _ in atm: continue
195 |     atm[_] = t[_]
196 |   _ = '\n'.join(atm[i] for i in atm)
197 |   return _
198 | 
199 | 
200 | hammett = {'F': 0.34,'NH2':-0.16,'H':0,'COCl':0.51,'CF3':0.43,'OH':0.12,'NHNO2':0.91}
201 | color_keys = {'H': '#FFFFFF', 'He': '#D9FFFF', 'Li': '#CC80FF', 'Be': '#C2FF00', 'B': '#FFB5B5', 'C': '#909090', 'N': '#3050F8', 'O': '#FF0D0D', 'O2': '#FFAE00', 'F': '#90E050', 'Ne': '#B3E3F5', 'Na': '#AB5CF2', 'Mg': '#8AFF00', 'Al': '#BFA6A6', 'Si': '#F0C8A0', 'P': '#FF8000', 'S': '#FFFF30', 'Cl': '#1FF01F', 'Ar': '#80D1E3', 'K': '#8F40D4', 'Ca': '#3DFF00', 'Sc': '#E6E6E6', 'Ti': '#BFC2C7', 'Ti1': '#BFC2C7', 'Ti2': '#BFC2C7', 'V': '#A6A6AB', 'V1': '#A6A6AB', 'V2': '#A6A6AB', 'Cr': '#8A99C7', 'Cr1': '#8A99C7', 'Cr2': '#8A99C7', 'Mn': '#9C7AC7', 'Mn1': '#9C7AC7', 'Mn2': '#9C7AC7', 'Fe': '#FFA800', 'Fe1': '#FFA200', 'Fe2': '#FFD200', 'Co': '#F090A0', 'Co1': '#05004C', 'Co2': '#388786', 'Co3': '#67CAC9', 'Ni': '#50D050', 'Ni1': '#50D050', 'Ni2': '#50D050', 'Cu': '#808080', 'Cu1': '#808080', 'Cu2': '#606060', 'Zn': '#7D80B0', 'Ga': '#C28F8F', 'Ge': '#668F8F', 'As': '#BD80E3', 'Se': '#FFA100', 'Br': '#A62929', 'Kr': '#5CB8D1', 'Rb': '#702EB0', 'Sr': '#00FF00', 'Y': '#94FFFF', 'Zr': '#94E0E0', 'Nb': '#73C2C9', 'Mo': '#54B5B5', 'Tc': '#3B9E9E', 'Ru': '#248F8F', 'Rh': '#0A7D8C', 'Pd': '#006985', 'Ag': '#C0C0C0', 'Cd': '#FFD98F', 'In': '#A67573', 'Sn': '#668080', 'Sb': '#9E63B5', 'Te': '#D47A00', 'I': '#940094', 'Xe': '#429EB0', 'Cs': '#57178F', 'Ba': '#00C900', 'La': '#70D4FF', 'Ce': '#FFFFC7', 'Pr': '#D9FFC7', 'Nd': '#C7FFC7', 'Pm': '#A3FFC7', 'Sm': '#8FFFC7', 'Eu': '#61FFC7', 'Gd': '#45FFC7', 'Tb': '#30FFC7', 'Dy': '#1FFFC7', 'Ho': '#00FF9C', 'Er': '#00E675', 'Tm': '#00D452', 'Yb': '#00BF38', 'Lu': '#00AB24', 'Hf': '#4DC2FF', 'Ta': '#4DA6FF', 'W': '#2194D6', 'Re': '#267DAB', 'Os': '#266696', 'Ir': '#175487', 'Pt': '#D0D0E0', 'Au': '#FFD123', 'Hg': '#B8B8D0', 'Tl': '#A6544D', 'Pb': '#575961', 'Bi': '#9E4FB5', 'Po': '#AB5C00', 'At': '#754F45', 'Rn': '#428296', 'Fr': '#420066', 'Ra': '#007D00', 'Ac': '#70ABFA', 'Th': '#00BAFF', 'Pa': '#00A1FF', 'U': '#008FFF', 'Np': '#0080FF', 'Pu': '#006BFF', 'Am': '#545CF2', 'Cm': '#785CE3', 'Bk': '#8A4FE3', 'Cf': '#A136D4', 'Es': '#B31FD4', 'Fm': '#B31FBA', 'Md': '#B30DA6', 'No': '#BD0D87', 'Lr': '#C70066', 'Rf': '#CC0059', 'Db': '#D1004F', 'Sg': '#D90045', 'Bh': '#E00038', 'Hs': '#E6002E', 'Mt': '#EB0026'}
202 | 


--------------------------------------------------------------------------------
/hse_db.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import json
 3 | import numpy as np
 4 | import QE 
 5 | from bandgapoccu import bg
 6 | import json, os
 7 | 
 8 | def get_hse(all_,phse,hse_location,pbe_fname='.out',hse_fname='.out'):
 9 |   '''This matches the HSE data with the PBE data'''
10 |   t = [i for i in all_ if i['phase'] == phse]
11 |   hse_lst = []
12 |   for c,i in enumerate(t):
13 |     key = i['location'].split('/')[-1].replace('.save','')
14 |     d_ = i['location'][0:i['location'].rfind('/')]
15 |     pbe = bg(d_ + '/' + key + pbe_fname)
16 |     i['bandgap'] = pbe.bg
17 |     i['metallic'] = pbe.metallic
18 |     hse_f = '/'.join([hse_location,i['dopant'],key+hse_fname])
19 |     hse = None
20 |     if os.path.isfile(hse_f):
21 |       if 'job done' in open(hse_f).read().lower():
22 |         hse = bg(hse_f)
23 |         i['hse_bandgap'] = hse.bg
24 |         i['hse_metallic'] = hse.metallic
25 |         hse = hse.bg
26 |     if hse is None:
27 |       i['hse_bandgap'] = None
28 |       i['hse_metallic'] = None
29 |     i['hse_location'] = hse_f.replace(hse_fname,'.save')
30 |     hse_lst.append(i)
31 |   return hse_lst
32 | 
33 | 
34 | def process_save(fname,counts):
35 |   '''This gets the bandgap, dopant, etc for a range of save directories'''
36 |   f = open(fname).read().split('\n')
37 |   from math import gcd
38 |   string = ''
39 |   cmmon = []
40 |   for i in counts:
41 |     cmmon.append(counts[i])
42 |   if len(cmmon) == 1:
43 |     div = cmmon[0]
44 |   else: 
45 |     div = cmmon[0]
46 |     for c in cmmon[1::]:
47 |         div = gcd(div , c)
48 | 
49 |   counts_sort = sorted([(i,int(counts[i]/div)) for i in counts],key=lambda x: x[1])
50 |   b_ = [i[0] for i in counts_sort]
51 |   phase = ''
52 |   for i in counts_sort:
53 |     if i[1] == 1:
54 |       phase += i[0]
55 |     else:
56 |       phase += i[0] + str(i[1])
57 | 
58 |   d = []
59 |   for i in f:
60 |     if len(i) == 0: continue
61 |     try:
62 |       y = QE.Struct()
63 |       y.XML_Process(i)
64 |       x = bg(i.replace('.save','.out'))
65 |       t = [zz for zz in y.atoms if ''.join([tt for tt in zz if not tt.isdigit()]).strip() not in b_]
66 |       p = [zz for zz in y.atoms if ''.join([tt for tt in zz if not tt.isdigit()]).strip() in b_]
67 |       coun = {}
68 |       for zz in p:
69 |         c = ''.join([tt for tt in zz.split() if not tt.isdigit()]) 
70 |         if c in coun: coun[c] += 1
71 |         else: coun[c] = 1
72 |       loc = phase
73 |       loc = b_[0] if coun[b_[0]] < counts[b_[0]] else b_[1]
74 |       if len(t) > 0:
75 |         dopant = ''.join([zz for zz in t[0] if not zz.isdigit()]).strip()
76 |       else:
77 |         dopant = phase
78 |       d.append({'location':i.strip(),'dopant':dopant,'bandgap':x.bg,'metallic':x.metallic,'dop_sub':loc,'phase':phase,'natom':len(y.atoms)})
79 |     except:
80 |       continue
81 |   return phase,d
82 | 
83 | if __name__ == '__main__':
84 |   phase,d = process_save('save.out',{'Ti':8,'O':16})
85 | 
86 |   hse_location = '/data/llentz/Charge-Density/HSE/HSE/TiO2/Big/c-len'
87 |   t = get_hse(d,phase,hse_location)
88 |   json.dump(t, open(phase + '.hse.json','w'))
89 | 
90 | 


--------------------------------------------------------------------------------
/process.py:
--------------------------------------------------------------------------------
 1 | #!/global/homes/l/llentz/anaconda3/bin/python
 2 | 
 3 | import tempfile
 4 | import sys
 5 | import glob
 6 | import QE
 7 | from bandgapoccu import bg as BG
 8 | import os
 9 | import json
10 | import re
11 | 
12 | def separate_aimd(fname='scf.out',tempfile='OUT.out',jfile='data.json'):
13 |   temp = [zz.split()[2] for zz in open(fname).read().split('\n') if 'temperature' in zz]
14 |   temp = [float(zz) for zz in temp if re.match('\d+.\d+$',zz)]
15 |   scf = open(fname,'r')
16 | 
17 |   header = ''
18 | 
19 |   for i in scf:
20 |     if 'PseudoPot' in i: break
21 |     header += i
22 | 
23 | 
24 |   data = []
25 |   rest = header + '\n\n'
26 |   tmp = ''
27 |   tmp += rest
28 |   cnt = 1
29 |   try:
30 |     for i in scf:
31 |       while '!    total energy' not in i:
32 |         i = next(scf)
33 |         tmp += i
34 |       #data[cnt] = {}
35 |       t_ = {}
36 |       OUT = tempfile
37 |       output = open(OUT,'w')
38 |       output.write(tmp)
39 |       output.close()
40 |       x = QE.Struct()
41 |       x.File_Process(OUT)
42 |       atoms,cell = x.return_params()
43 |       t_['str'] = x.print()
44 |       t_['atoms'] = atoms
45 |       t_['cell'] = cell
46 |       del x
47 |       Gap = BG(OUT)
48 |       t_['gap'] = Gap.bg
49 |       try:
50 |         t_['temp'] = temp[cnt - 1] #[zz for zz in open(str(cnt) + '.out').read().split('\n') if 'temperature' in zz][-1].split()[2]
51 |       except:
52 |         t_['temp'] = 'NA'
53 |       cnt += 1
54 |       tmp = rest
55 |       data.append(t_)
56 |       with open(jfile, 'w') as outfile:
57 |           json.dump(data, outfile)
58 |       i = next(scf)
59 |       continue
60 |     scf.close()
61 |   except StopIteration:
62 |     scf.close()
63 |     with open(jfile, 'w') as outfile:
64 |         json.dump(data, outfile)
65 | 
66 | if __name__ == '__main__':
67 |   separate_aimd(fname=sys.argv[1])
68 | 


--------------------------------------------------------------------------------
/sample.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import pandas as pd
 4 | import json
 5 | import numpy as np
 6 | 
 7 | def user_input(x):
 8 |   with open(x.location + '/user_input','w') as f:
 9 |     f.write(str(x.bandgap) + '\n')
10 |     f.write(str(np.log(x.bandgap)) + '\n')
11 |     f.write(str(x.bandgap) + '\n')
12 | d = pd.read_json('all.json')
13 | d = d[(d.metallic == False) & (d.bandgap > 0.15)]
14 | d = d.drop(d[(d.phase == 'GaAs') & (d.dopant != 'GaAs')].index)
15 | d.apply(user_input,axis=1)
16 | 
17 | #d = pd.read_csv('data.csv')
18 | t_total = pd.DataFrame()
19 | v_total = pd.DataFrame()
20 | test_total = pd.DataFrame()
21 | phase = d['phase'].unique()
22 | for j in phase:
23 |   t_ = d[d.phase == j]
24 |   dopants = t_['dopant'].unique()
25 |   for i in dopants:
26 |     t = t_[t_.dopant == i]
27 |     if len(t) < 10: continue
28 |     train = t.sample(frac=0.8)
29 |     _ = t.drop(train.index)
30 |     val = _.sample(frac=0.50)
31 |     test = _.drop(val.index)
32 |     t_total = t_total.append(train)
33 |     v_total = v_total.append(val)
34 |     test_total = test_total.append(test)
35 | 
36 | train = open('train.dat','w')
37 | for i in t_total.location.as_matrix():
38 |   train.write(i) 
39 |   train.write('\n')
40 | train.close()
41 | val = open('val.dat','w')
42 | for i in v_total.location.as_matrix():
43 |   val.write(i) 
44 |   val.write('\n')
45 | val.close()
46 | test = open('test.dat','w')
47 | for i in test_total.location.as_matrix():
48 |   test.write(i)
49 |   test.write('\n')
50 | test.close()
51 | 


--------------------------------------------------------------------------------
/sdir.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | function sdir() {
 4 |   if [ -z "$1" ]
 5 |   then
 6 |     echo "Please provide slurm jobid"
 7 |   else
 8 |     DIR=`scontrol show jobid -dd $1 | grep WorkDir | sed 's/   WorkDir=//g'`
 9 |     cd $DIR
10 |     ls
11 |   fi
12 | }
13 | 


--------------------------------------------------------------------------------
/to_xml.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import QE
 4 | from lxml import etree as ET
 5 | from random import shuffle
 6 | import sys
 7 | 
 8 | 
 9 | def xml(f,extra_tag=None,p_=10):
10 |   '''This takes a list of directories and creates a PROPhet xml file'''
11 |   root = ET.Element("PROPhet")
12 |   nsystms = ET.Element('nsystem')
13 |   root.append(nsystms)
14 |   systems = ET.Element("systems")
15 |   for i in range(200): shuffle(f)
16 |   N_train = int(0.80*len(f))
17 |   N_val = int(0.90*len(f))
18 |   sys = []
19 |   cnt = 1
20 |   for c,i in enumerate(f):
21 |     if c%p_ == 0 : print(c)
22 |     if c < N_train: t_flag = "train"
23 |     elif N_train < c < N_val: t_flag = "val"
24 |     else: t_flag = 'test'
25 |     x = QE.Struct()
26 |     try:
27 |       x.XML_Process(i)
28 |     except:
29 |       continue
30 |     system = ET.Element("system",id=str(c + 1))
31 |     train = ET.Element('train')
32 |     train.text = t_flag
33 |     system.append(train)
34 |     lattice = ET.Element('lattice',units='angstrom')
35 |     for j in x.lattice:
36 |       l = ET.Element(j)
37 |       l.text = ' '.join([str(zz) for zz in x.lattice[j]])
38 |       lattice.append(l)
39 |     system.append(lattice)
40 |     atoms = ET.Element('atoms',units='angstrom')
41 |     atm,cell = x.return_params()
42 |     natoms = ET.Element('natoms')
43 |     natoms.text = str(len(x.atoms))
44 |     species = ET.Element('species')
45 |     ntype = len(set([zz.split()[0] for zz in atm.split('\n')[:-1]]))
46 |     species.text = str(ntype)
47 |     atoms.append(natoms)
48 |     atoms.append(species)
49 |     for j in atm.split('\n')[:-1]: 
50 |       atom = ET.Element("atom",specie=j.split()[0])
51 |       atom.text = ' '.join(j.split()[1:4])
52 |       atoms.append(atom)
53 |     system.append(atoms)
54 |     target = ET.Element('target')
55 |     target.text = str(x.energy)
56 |     system.append(target)
57 |     if extra_tag is not None:
58 |       tag = extra_tag[c]['tag']
59 |       val = extra_tag[c]['val']
60 |       if 'other_tags' in list(extra_tag[c].keys()):
61 |         _ = ET.Element(tag,**extra_tag[c]['other_tags'])
62 |       else: 
63 |         _ = ET.Element(tag)
64 |       _.text = val
65 |       system.append(_)
66 |     sys.append(system)
67 |     cnt += 1
68 |     del x
69 |   for i in sys:
70 |     systems.append(i)
71 |   nsystms.text = str(cnt)
72 |   root.append(systems)
73 |   str_ = ET.tostring(root,pretty_print=True).decode('utf-8')
74 |   return str_
75 | 
76 | if __name__ == '__main__':
77 |   d = open(sys.argv[1]).read().split()[0:10]
78 |   t = xml(d)
79 |   f = open('PROPhet.xml','w')
80 |   f.write(t)
81 |   f.close()
82 | 


--------------------------------------------------------------------------------