├── QE.py ├── README.md ├── bandgapoccu.py ├── color.key ├── default.py ├── early_stop.py ├── extractor.py ├── functions.py ├── hse_db.py ├── process.py ├── sample.py ├── sdir.sh └── to_xml.py /QE.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys, os 3 | import numpy as np 4 | from numpy import cos as cos 5 | from numpy import sin as sin 6 | import json 7 | from collections import Counter 8 | import sqlite3 as lite 9 | from xml.etree import ElementTree 10 | from lxml import etree as ET 11 | import copy 12 | 13 | class index(): 14 | def __init__(self): 15 | self.keys = {} 16 | 17 | def sanitize(self,s): 18 | return ''.join([i for i in s if not i.isdigit()]).strip() 19 | 20 | def key(self,specie): 21 | if specie not in self.keys: 22 | self.keys[specie] = 0 23 | else: 24 | self.keys[specie] += 1 25 | return specie + str(self.keys[specie]) 26 | 27 | def reset(self): 28 | self.keys = {} 29 | 30 | class Struct: 31 | def __init__(self,direct = None): 32 | self.BOHRtoA = 0.529177249 33 | self.RYtoeV = 13.605698066 34 | self.program = "QE" 35 | self.version = "" 36 | self.volume = 0.0 37 | self.alat = 0.0 38 | self.natoms = 0 39 | self.nat = 0 40 | self.nelect = 0 41 | self.Ecut = 0.0 42 | self.RhoCut = 0.0 43 | self.Econv = 0.0 44 | self.Exch = "" 45 | self.energy = 0.0 46 | self.natoms = 0 47 | self.bandgap = 0.0 48 | self.bands = 0 49 | self.lattice = {'a':np.zeros(3),'b':np.zeros(3),'c':np.zeros(3)} 50 | self.atoms = {} 51 | self.norms = {'a':0.0,'b':0.0,'c':0.0} 52 | self.angles = {'alpha':0.0,'beta':0.0,'gamma':0.0} 53 | self.kpts = 0 54 | self.bnddiagram = False 55 | self.FermiTest = False 56 | self.Fermi = 0.0 57 | self.JSON = "" 58 | self.email = "" 59 | self.atomindex = index() 60 | self.noband = False 61 | if direct is not None: 62 | if os.path.isfile(direct): 63 | self.File_Process(direct) 64 | elif os.path.isdir(direct): 65 | self.XML_Process(direct) 66 | 67 | def From_Crystal(self): 68 | #vol = np.sqrt(1 - np.cos(self.angles['alpha']*np.pi/180.)**2 - np.cos(self.angles['beta']*np.pi/180.)**2 - np.cos(self.angles['gamma']*np.pi/180.)**2 + 2*np.cos(self.angles['alpha']*np.pi/180.)*np.cos(self.angles['beta']*np.pi/180.)*np.cos(self.angles['gamma']*np.pi/180.)) 69 | #temp1 = [self.norms['a'], self.norms['b']*np.cos(self.angles['gamma']*np.pi/180.), self.norms['c']*np.cos(self.angles['beta']*np.pi/180.)] 70 | #temp2 = [0,self.norms['b']*np.sin(self.angles['gamma']*np.pi/180.),self.norms['c']*((np.cos(self.angles['alpha']*np.pi/180.)-np.cos(self.angles['beta']*np.pi/180.)*np.cos(self.angles['beta']*np.pi/180.))/np.sin(self.angles['gamma']*np.pi/180.))] 71 | #temp3 = [0,0,self.norms['c']*vol/np.sin(self.angles['gamma']*np.pi/180.)] 72 | #conversion = np.vstack((np.asarray(temp1),np.asarray(temp2),np.asarray(temp3))) 73 | t = [] 74 | for i in ['a','b','c']: 75 | t.append(list(self.lattice[i])) 76 | return np.transpose(np.array(t)) 77 | #return np.linalg.inv(np.transpose(np.array(t))) 78 | 79 | def to_Crystal(self): 80 | self.Normalize() 81 | conversion = np.linalg.inv(self.From_Crystal()) 82 | print("ATOM_POSITIONS {crystal}") 83 | for i in self.atoms: 84 | #tmp = copy.deepcopy(self.atoms[i]) 85 | tmp = np.dot(conversion,self.atoms[i]) 86 | print(self.atomindex.sanitize(i) + " " + ' '.join([str(round(j,9)) for j in tmp])) 87 | 88 | def wrap_Cell(self): 89 | self.Normalize() 90 | conversion = np.linalg.inv(self.From_Crystal()) 91 | c_1 = self.From_Crystal() 92 | for i in self.atoms: 93 | tmp = np.dot(conversion,self.atoms[i]) 94 | for c,j in enumerate(tmp): 95 | if tmp[c] < 0.0: 96 | while tmp[c] <= 0.0: 97 | tmp[c] += 1 98 | elif tmp[c] > 1.0: 99 | while tmp[c] >= 1.0: 100 | tmp[c] -= 1 101 | self.atoms[i] = np.dot(c_1,tmp) 102 | 103 | 104 | 105 | def RDF(self,rcut=5.0,dr=0.1): 106 | supcell = [] 107 | rho = self.natoms/self.volume 108 | radius = np.arange(0,rcut + dr,dr) 109 | R = {} 110 | for i in radius: 111 | R[i] = 0.0 112 | conversion = np.linalg.inv(self.From_Crystal()) 113 | crystal = self.From_Crystal() 114 | max_trans = {} 115 | for i in self.norms: 116 | max_trans[i] = int(np.ceil(1.1*rcut/self.norms[i])) 117 | trans_index = [] 118 | for i in range(-max_trans['a'],max_trans['a']+1): 119 | for j in range(-max_trans['b'],max_trans['b']+1): 120 | for k in range(-max_trans['c'],max_trans['c']+1): 121 | trans_index.append([i,j,k]) 122 | y = copy.deepcopy(self) 123 | for i in y.atoms: 124 | y.atoms[i] = np.dot(conversion,y.atoms[i]) 125 | 126 | for i in y.atoms: 127 | pos1 = copy.copy(y.atoms[i]) 128 | for j in y.atoms: 129 | for z in trans_index: 130 | pos2 = np.add(y.atoms[j],z) 131 | delta = np.linalg.norm(np.dot(crystal,np.subtract(pos2,pos1))) 132 | if delta <= rcut: 133 | for ZZ in range(0,len(radius)-1): 134 | if radius[ZZ] < delta < radius[ZZ+1]: 135 | R[radius[ZZ]] += 1.0/(rho*4.*np.pi*(radius[ZZ]**2)*dr) 136 | data = [] 137 | for i in R: 138 | data.append([i,R[i]/(float(self.natoms))]) 139 | return data 140 | 141 | 142 | def CIF(self,filename=""): 143 | self.Normalize() 144 | ciffile = "data_global\n" 145 | ciffile += "_chemical_name " + self.to_Formula().strip() + "\n" 146 | for i in ['a','b','c']: 147 | ciffile += '_cell_length_' + i.strip() + " " + str(self.norms[i]) + "\n" 148 | for i in ['alpha','beta','gamma']: 149 | ciffile += '_cell_angle_' + i.strip() + " " + str(self.angles[i]) + "\n" 150 | ciffile += '_cell_volume_ ' + str(self.volume) + "\n" 151 | ciffile += "loop_\n" 152 | ciffile += "_atom_site_label\n" 153 | ciffile += "_atom_site_fract_x\n" 154 | ciffile += "_atom_site_fract_y\n" 155 | ciffile += "_atom_site_fract_z\n" 156 | conversion = np.linalg.inv(self.From_Crystal()) 157 | counter = 0 158 | for i in self.atoms: 159 | tmp = copy.deepcopy(self.atoms[i]) 160 | dot = np.dot(conversion,tmp) 161 | ciffile += self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in dot]) + "\n" 162 | if filename == "": 163 | print(ciffile) 164 | else: 165 | f = open(filename, 'w') 166 | f.write(ciffile) 167 | f.close() 168 | def to_XYZ(self,filename = None): 169 | printstring = '' 170 | printstring += str(len(self.atoms)) + '\n\n' 171 | for i in self.atoms: 172 | tmp = [round(x,5) for x in self.atoms[i]] 173 | string = self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in self.atoms[i]]) 174 | printstring += string + "\n" 175 | if filename == None: 176 | print(printstring) 177 | else: 178 | f = open(filename,'w') 179 | f.write(printstring) 180 | f.close() 181 | 182 | def return_params(self): 183 | atoms = '' 184 | for i in self.atoms: 185 | tmp = [round(x,5) for x in self.atoms[i]] 186 | string = self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in self.atoms[i]]) 187 | atoms += string + "\n" 188 | cell = '' 189 | for i in ['a','b','c']: 190 | string = " ".join([str(round(x,5)) for x in self.lattice[i]]) 191 | cell += string + "\n" 192 | return atoms,cell 193 | 194 | def print(self): 195 | printstring = "ATOMIC_POSITIONS {angstrom}\n" 196 | for i in self.atoms: 197 | tmp = [round(x,5) for x in self.atoms[i]] 198 | string = self.atomindex.sanitize(i) + " " + " ".join([str(x) for x in self.atoms[i]]) 199 | printstring += string + "\n" 200 | printstring += "CELL_PARAMETERS {angstrom}\n" 201 | for i in ['a','b','c']: 202 | string = " ".join([str(round(x,5)) for x in self.lattice[i]]) 203 | printstring += string + "\n" 204 | return printstring 205 | 206 | def XML_Process(self,dirstring): #Need to talk to Alexie About this, does not store total energy 207 | try: 208 | f = open(dirstring + "/data-file.xml") 209 | except: 210 | raise ValueError("Cannot open " + dirstring + '/data-file.xml') 211 | tree = ElementTree.parse(f) 212 | f.close() 213 | MAP = {'a':'a1','b':'a2','c':'a3'} 214 | try: 215 | self.version = tree.find('./HEADER/CREATOR').attrib['VERSION'] 216 | self.nat = int(tree.find('./IONS/NUMBER_OF_SPECIES').text) 217 | self.atomindex.reset() 218 | self.Exch = tree.find('./EXCHANGE_CORRELATION/DFT').text.rstrip().lstrip() 219 | self.Nelec = float(tree.find('./BAND_STRUCTURE_INFO/NUMBER_OF_ELECTRONS').text.strip()) 220 | self.kpts = float(tree.find('./BAND_STRUCTURE_INFO/NUMBER_OF_K-POINTS').text) 221 | self.fermi = 27.2114*float(tree.find('./BAND_STRUCTURE_INFO/FERMI_ENERGY').text) 222 | self.alat = self.BOHRtoA*float(tree.find('./CELL/LATTICE_PARAMETER').text) 223 | self.Ecut = 27.2114*float(tree.find('./PLANE_WAVES/WFC_CUTOFF').text) 224 | self.RhoCut = 27.2114*float(tree.find('./PLANE_WAVES/WFC_CUTOFF').text) 225 | #self.beta = 0.0 226 | except: 227 | self.version = None 228 | self.natoms = None 229 | self.nat = None 230 | self.atomindex.reset() 231 | self.Exch = None 232 | self.Nelec = None 233 | self.kpts = None 234 | self.fermi = None 235 | self.alat = None 236 | self.Ecut = None 237 | self.RhoCut = None 238 | try: 239 | self.energy = float(tree.find('./TOTAL_ENERGY').text) 240 | except: 241 | self.energy = None 242 | for i in ['a','b','c']: 243 | try: 244 | for c,j in enumerate(tree.find('./CELL/DIRECT_LATTICE_VECTORS/' + MAP[i]).text.split()): 245 | self.lattice[i][c] = self.BOHRtoA*float(j) 246 | except: 247 | for c,j in enumerate(tree.find('./DIRECT_LATTICE_VECTORS/' + MAP[i]).text.split()): 248 | self.lattice[i][c] = self.BOHRtoA*float(j) 249 | self.natoms = int(tree.find('./IONS/NUMBER_OF_ATOMS').text) 250 | for i in range(0,self.natoms): 251 | tmp = tree.find('./IONS/ATOM.' + str(i + 1)).attrib 252 | species = tmp['SPECIES'] 253 | array = [ self.BOHRtoA*float(j) for j in tmp['tau'].split()] 254 | self.atoms[self.atomindex.key(species)] = np.array([array[0],array[1],array[2]]) 255 | self.Normalize() 256 | #test = tree.find('./IONS/ATOM.1') 257 | #print(test) 258 | 259 | def to_Latex(self,caption=None,ncols=1): 260 | self.Normalize() 261 | if caption is None: caption = self.to_Formula() 262 | t = '''\\begin{table}[!ht] 263 | \centering 264 | \caption{CAPTION} 265 | %\label{my-label} 266 | \\bigskip 267 | \\begin{tabular}{lr} 268 | \hline 269 | Parameter & Value \\\\ 270 | \hline 271 | PARAMS 272 | \hline 273 | \end{tabular} 274 | \end{table}''' 275 | params = '' 276 | for c,i in enumerate(['a','b','c']): 277 | params += i.upper() + ' & ' + str(round(self.norms[i],2)) + ' \\\\' + '\n' 278 | for i in ['alpha','beta','gamma']: 279 | params += '$\\' + i + '$ & ' + str(round(self.angles[i],2)) + ' \\\\' + '\n' 280 | print(t.replace('PARAMS',params).replace('CAPTION',caption + ' unit cell parameters. Length units are in \\r{A} and angles are in degrees.')) 281 | header = '& '.join(['Atom & X & Y & Z']*ncols) 282 | t1 = '''\\begin{table}[!ht] 283 | \centering 284 | \caption{CAPTION} 285 | \\bigskip 286 | %\label{my-label} 287 | \\begin{tabular}{''' + '|'.join(['lrrr']*ncols) + '''} 288 | \hline 289 | ''' + header + '''\\\\ 290 | \hline 291 | PARAMS 292 | \hline 293 | \end{tabular} 294 | \end{table}''' 295 | params_1 = '' 296 | conversion = np.linalg.inv(self.From_Crystal()) 297 | print() 298 | atom_keys = list(self.atoms.keys()) 299 | #for i in self.atoms: 300 | for c in range(0,len(atom_keys) - ncols - 1,ncols): 301 | i = atom_keys[c] 302 | lne = '' 303 | for j in range(0,ncols): 304 | try: 305 | tmp = np.dot(conversion,self.atoms[atom_keys[c + j]]) 306 | lne += self.atomindex.sanitize(atom_keys[c+j]) + " & " + ' & '.join([str(round(j,3)) for j in tmp]) + ' ' 307 | if (ncols > 1) & j < ncols -1: lne += '&' 308 | except: 309 | lne += ' & & &' 310 | #params_1 += self.atomindex.sanitize(i) + " & " + ' & '.join([str(round(j,3)) for j in tmp]) + '\\\\ \n' 311 | params_1 += lne + '\\\\ \n' 312 | print(t1.replace('PARAMS',params_1).replace('CAPTION',caption + ' atomic positions. Here positions are shown in fractional coordinates')) 313 | 314 | def File_Process(self,filestring): 315 | try: 316 | f = open(filestring,'r') 317 | except: 318 | print("Cannot open %s" % filestring) 319 | #sys.exit(1) 320 | raise IOError 321 | linenum = 0 322 | for i in f: 323 | i = i.lower() 324 | #if "ERROR" in i.upper(): 325 | #print("There is an error in this calculation") 326 | #sys.exit(2) 327 | if linenum < 1000: 328 | if 'nat' in i.lower(): 329 | self.natoms = int(''.join(zz for zz in i.strip() if zz.isdigit())) 330 | if "lattice parameter (a_0)" in i: 331 | self.alat = float(i.split()[5]) 332 | if "number of k points=" in i: 333 | self.kpts = int(i.split()[4]) 334 | next 335 | if "Program PWSCF" in i: 336 | self.version = i.split()[2].replace('v.','') 337 | next 338 | if "lattice parameter (alat)" in i: 339 | self.alat = float(i.split()[4])*self.BOHRtoA 340 | next 341 | if "number of Kohn-Sham states" in i: 342 | self.bands = int(i.split()[4]) 343 | if "unit-cell volume" in i and "new" not in i: 344 | self.volume = float(i.split()[3])*(self.BOHRtoA**3.0) 345 | next 346 | if "number of atoms/cell" in i: 347 | self.natoms = int(i.split()[4]) 348 | next 349 | if "number of atomic types" in i: 350 | self.nat = int(i.split()[5]) 351 | next 352 | if "number of electrons" in i: 353 | self.nelect = float(i.split()[4]) 354 | next 355 | if "kinetic-energy cutoff" in i: 356 | self.Ecut = float(i.split()[3])*self.RYtoeV 357 | next 358 | if "charge density cutoff" in i: 359 | self.RhoCut = float(i.split()[4])*self.RYtoeV 360 | next 361 | if "convergence threshold" in i: 362 | if len(i.split()) < 4: 363 | self.Econv = float(i.split()[3]) 364 | next 365 | if "Exchange-correlation" in i: 366 | self.Exch = i[i.find('=') + 1:i.find('(')].rstrip() 367 | next 368 | if "a(1) =" in i: 369 | tmp = i.replace('a(1)','').replace('(','').replace('=','').replace(',','').replace(')','').split() 370 | for j in range(0,3): 371 | self.lattice['a'][j] = self.alat*float(tmp[j]) 372 | next 373 | if "a(2) =" in i: 374 | tmp = i.replace('a(2)','').replace('(','').replace('=','').replace(',','').replace(')','').split() 375 | for j in range(0,3): 376 | self.lattice['b'][j] = self.alat*float(tmp[j]) 377 | next 378 | if "a(3) =" in i: 379 | tmp = i.replace('a(3)','').replace('(','').replace('=','').replace(',','').replace(')','').split() 380 | for j in range(0,3): 381 | self.lattice['c'][j] = self.alat*float(tmp[j]) 382 | next 383 | if "site n. atom positions (alat units)" in i: 384 | self.atomindex.reset() 385 | for j in range(0,self.natoms): 386 | line = next(f).split() 387 | self.atoms[self.atomindex.key(line[1])] = np.multiply(np.array([float(line[6]),float(line[7]),float(line[8])]),self.alat) 388 | next 389 | if 'nat' in i.lower(): 390 | self.natoms = int(''.join([zz for zz in i if zz.isdigit()])) 391 | next 392 | if "!" in i and "ENERGY" in i.upper(): 393 | self.energy= float(i.split()[4])*self.RYtoeV 394 | if "new unit-cell volume" in i: 395 | self.volume = float(i.split()[4])*(self.BOHRtoA**3) 396 | if "cell_parameters" in i: 397 | if "angstrom" in i: 398 | for j in ['a','b','c']: 399 | line = next(f) 400 | tmp = line.split() 401 | for k in range(0,3): 402 | self.lattice[j][k] = float(tmp[k]) 403 | else: 404 | for j in ['a','b','c']: 405 | line = next(f) 406 | tmp = line.split() 407 | for k in range(0,3): 408 | self.lattice[j][k] = self.alat*float(tmp[k]) 409 | self.Normalize() 410 | if "ATOMIC_POSITIONS" in i.upper(): 411 | self.atomindex.reset() 412 | if "angstrom" in i: 413 | for j in range(0,self.natoms): 414 | line = next(f).split() 415 | self.atoms[self.atomindex.key(line[0])] = np.array([float(line[1]),float(line[2]),float(line[3])]) 416 | if "alat" in i: 417 | for j in range(0,self.natoms): 418 | line = next(f).split() 419 | self.atoms[self.atomindex.key(line[0])] = np.array([self.alat*float(line[1]),self.alat*float(line[2]),self.alat*float(line[3])]) 420 | if "crystal" in i.lower(): 421 | conversion = self.From_Crystal() 422 | for j in range(0,self.natoms): 423 | line = next(f).split() 424 | tmp = np.transpose(np.array([float(line[1]),float(line[2]),float(line[3])])) 425 | ncoords = np.dot(conversion,tmp) 426 | self.atoms[self.atomindex.key(line[0])] = np.array([float(ncoords[0]),float(ncoords[1]),float(ncoords[2])]) 427 | if "End of self-consistent calculation" in i: 428 | if np.floor(self.bands/8.)*8. <= self.bands: 429 | numlines = int(np.floor(self.bands/8.) + 1) 430 | remainder = int(self.bands - np.floor(self.bands/8.)*8.) 431 | else: 432 | numlines = int(np.floor(self.bands/8.)) 433 | remainder = 0 434 | self.bnddiagram = np.zeros((self.kpts,self.bands)) 435 | counter = 0 436 | self.noband = False 437 | while counter < self.kpts: 438 | line = next(f) 439 | if "Number of k-points >=" in line: 440 | self.noband = True 441 | break 442 | if "k =" in line: 443 | line = next(f) 444 | counter1 = 0 445 | for j in range(0,numlines): 446 | line = next(f) 447 | ''' 448 | for k in range(0,len(line.split())): 449 | self.bnddiagram[counter][counter1 + k] = float(line.split()[k]) 450 | ''' 451 | counter1 += 8 452 | counter += 1 453 | next 454 | if "highest occupied, lowest unoccupied level (ev)" in i: 455 | self.bandgap = float(i.split()[7]) - float(i.split()[6]) 456 | next 457 | if "the Fermi energy is" in i: 458 | self.Fermi = float(i.split()[4]) 459 | self.FermiTest = True 460 | next 461 | linenum += 1 462 | f.close() 463 | self.Normalize() 464 | self.noband = True 465 | if self.FermiTest == True and self.noband == False: 466 | self.bnddiagram = np.subtract(self.bnddiagram,self.Fermi) 467 | emin = np.zeros(self.kpts) 468 | emax = np.zeros(self.kpts) 469 | counter = 0 470 | for j in self.bnddiagram: 471 | emin[counter] = j[np.searchsorted(j, 0.0,side='right')-1] 472 | emax[counter] = j[np.searchsorted(j, 0.0,side='right')] 473 | counter += 1 474 | self.bandgap = float(np.min(emax-emin)) 475 | 476 | def to_JSON(self): 477 | if self.JSON == "": 478 | for i in self.lattice: 479 | self.lattice[i] = self.lattice[i].tolist() 480 | self.bnddiagram = self.bnddiagram.tolist() 481 | self.JSON = json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=1) 482 | 483 | def to_Formula(self): 484 | from math import gcd 485 | string = '' 486 | cmmon = [] 487 | for i in self.atomindex.keys: 488 | cmmon.append(self.atomindex.keys[i] + 1) 489 | if len(cmmon) == 1: 490 | div = cmmon[0] 491 | else: 492 | div = cmmon[0] 493 | for c in cmmon[1::]: 494 | div = gcd(div , c) 495 | for i in self.atomindex.keys: 496 | t_ = str(int((self.atomindex.keys[i]+1)/div)) 497 | if t_ == '1': t_ = '' 498 | else: t_ = '$_' + t_ + '$' 499 | string += i + t_ 500 | return string 501 | 502 | def to_database(self): 503 | con = None 504 | try: 505 | con = lite.connect('QE.db') 506 | cur = con.cursor() 507 | insert_command = 'INSERT INTO QE(A,B,C,ALPHA,BETA,GAMMA,VOLUME,NATOMS,FORMULA,BANDGAP,ENERGY,PROGRAM,VERSION) VALUES (' 508 | for i in('a','b','c'): 509 | insert_command += str(self.norms[i]) + ',' 510 | for i in('alpha','beta','gamma'): 511 | insert_command += str(self.angles[i]) + ',' 512 | insert_command += str(self.volume) + ',' + str(self.natoms) + ',"' + self.to_Formula() + '",' + str(self.bandgap) + ',' + str(self.energy) + ',"QE",' + '"' + self.version + '");' 513 | cur.execute(insert_command) 514 | lid = cur.lastrowid 515 | JSON = self.to_JSON() 516 | insert_command = "INSERT INTO RAW_DATA VALUES (" + str(lid) + ",'" + self.JSON + "');" 517 | cur.execute(insert_command) 518 | self.to_File(lid) 519 | except lite.Error: 520 | print("Unable to insert into the database") 521 | #sys.exit(3) 522 | raise IOError 523 | finally: 524 | if con: 525 | con.commit() 526 | con.close() 527 | 528 | def Normalize(self): 529 | try: 530 | for i in ['a','b','c']: 531 | self.norms[i] = np.linalg.norm(self.lattice[i]) 532 | self.angles['alpha'] = np.arccos(np.dot(self.lattice['b'],self.lattice['c'])/(self.norms['c']*self.norms['b'])) * 180./np.pi 533 | self.angles['gamma'] = np.arccos(np.dot(self.lattice['a'],self.lattice['b'])/(self.norms['a']*self.norms['b'])) * 180./np.pi 534 | self.angles['beta'] = np.arccos(np.dot(self.lattice['a'],self.lattice['c'])/(self.norms['a']*self.norms['c'])) * 180./np.pi 535 | self.volume = np.dot(self.lattice['a'],np.cross(self.lattice['b'],self.lattice['c'])) 536 | except: 537 | print("Lattice undefined") 538 | raise ValueError 539 | #sys.exit(4) 540 | 541 | def to_File(self,lid): 542 | with open(str(lid) + '.json','w') as f: 543 | f.write(self.JSON) 544 | 545 | def to_XML(self,fname): 546 | root = ET.Element("Root") 547 | DIRECT = ET.SubElement(root,'DIRECT_LATTICE_VECTORS') 548 | UNITS = ET.SubElement(DIRECT,'UNITS_FOR_DIRECT_LATTICE_VECTORS') 549 | UNITS.set('UNITS',"Bohr") 550 | lattice = {} 551 | convert = {'a1':'a','a2':'b','a3':'c'} 552 | for i in range(1,4): 553 | key = 'a' + str(i) 554 | lattice[key] = ET.SubElement(DIRECT,key) 555 | lattice[key].set('type','real') 556 | lattice[key].set('size','3') 557 | lattice[key].set('columns','3') 558 | text = ' '.join([str(1.88973*x) for x in self.lattice[convert[key]]]) 559 | lattice[key].text = text 560 | IONS = ET.SubElement(root,'IONS') 561 | NA = ET.SubElement(IONS,'NUMBER_OF_ATOMS') 562 | NA.set('type','integer') 563 | NA.set('size','1') 564 | NA.text=str(self.natoms) 565 | NA = ET.SubElement(IONS,'NUMBER_OF_SPECIES') 566 | NA.set('type','integer') 567 | NA.set('size','1') 568 | NA.text = str(self.nat) 569 | NA = ET.SubElement(IONS,'UNITS_FOR_ATOMIC_POSITIONS') 570 | NA.set('UNITS','bohr') 571 | index = 1 572 | tun = {} 573 | counter = 1 574 | for i in self.atoms: 575 | if self.atomindex.sanitize(i) not in tun: 576 | tun[self.atomindex.sanitize(i)] = str(index) 577 | index += 1 578 | ATOM = ET.SubElement(IONS,'ATOM.' + str(counter)) 579 | ATOM.set('SPECIES',self.atomindex.sanitize(i) + " ") 580 | ATOM.set('INDEX',tun[self.atomindex.sanitize(i)]) 581 | text = ' '.join([str(x*1.88973) for x in self.atoms[i]]) 582 | ATOM.set('tau',text) 583 | ATOM.set('if_pos',"1 1 1") 584 | counter += 1 585 | TE = ET.SubElement(root,'TOTAL_ENERGY') 586 | TE.set('UNITS','eV') 587 | TE.text = str(self.energy) 588 | f = ET.ElementTree(root) 589 | f.write(fname + '.xml',pretty_print=True) 590 | 591 | def to_Supercell(self,array,symm=False): 592 | if isinstance(array,list): 593 | tmp = copy.deepcopy(self) 594 | conversion = np.linalg.inv(tmp.From_Crystal()) 595 | for i in tmp.atoms: 596 | dot = np.dot(conversion,tmp.atoms[i]) 597 | tmp.atoms[i] = dot 598 | COORDs = copy.copy(tmp.atoms) 599 | if symm: 600 | r1 = range(-array[0],array[0] + 1) 601 | r2 = range(-array[1],array[1] + 1) 602 | r3 = range(-array[2],array[2] + 1) 603 | else: 604 | r1 = range(0,array[0]) 605 | r2 = range(0,array[1]) 606 | r3 = range(0,array[2]) 607 | for i in tmp.atoms: 608 | for j in r1: 609 | for k in r2: 610 | for z in r3: 611 | if (j == 0) and (k == 0) and (z == 0): #We already have the (0,0,0) structure 612 | next 613 | else: 614 | COORDs[tmp.atomindex.key(tmp.atomindex.sanitize(i))] = np.add(tmp.atoms[i],np.array([j,k,z])) 615 | for i in COORDs: 616 | COORDs[i] = np.dot(tmp.From_Crystal(),COORDs[i]) 617 | tmpmap = {'a':0,'b':1,'c':2} 618 | array = [float(xx) for xx in array] 619 | for i in tmp.lattice: 620 | #tmp.lattice[i] = np.array([tmp.lattice[i][0]*float(array[tmpmap[i]]), tmp.lattice[i][1]*float(array[tmpmap[i]]), tmp.lattice[i][2]*float(array[tmpmap[i]])]) 621 | tmp.lattice[i] = tmp.lattice[i]*array[tmpmap[i]] 622 | tmp.atoms = COORDs 623 | tmp.natoms = tmp.natoms*(array[0]*array[1]*array[2]) 624 | indexing = ['a','b','c'] 625 | for i in indexing: 626 | tmp.norms[i] *= array[indexing.index(i)] 627 | tmp.volume = tmp.volume*(array[0]*array[1]*array[2]) 628 | tmp.energy = tmp.energy*(array[0]*array[1]*array[2]) 629 | return tmp 630 | else: 631 | print("Invalid supercell dimensions") 632 | 633 | def __str__(self): 634 | return self.print() 635 | 636 | def __eq__(self,other): 637 | if type(other) == type(self): 638 | diff = [] 639 | keys_ = [] 640 | l = sorted(self.atoms.items(),key=lambda x: (x[1][0],x[1][1],x[1][2])) 641 | r = sorted(other.atoms.items(),key=lambda x: (x[1][0],x[1][1],x[1][2])) 642 | for c,i in enumerate(l): 643 | diff.append(np.linalg.norm(i[1] - r[c][1])) 644 | keys_.append(self.atomindex.sanitize(i[0]) == self.atomindex.sanitize(r[c][0])) 645 | m = np.max(np.abs(diff)) 646 | if m > 1e-3: return False 647 | if False in keys_: return False 648 | diff = [] 649 | for c,i in enumerate(self.lattice): 650 | diff.append(np.linalg.norm(self.lattice[i] - other.lattice[i])) 651 | m = np.max(np.abs(diff)) 652 | if m > 1e-3: return False 653 | else: 654 | return False 655 | return True 656 | 657 | def main(command): 658 | test = Struct() 659 | test.email = command[1] 660 | if os.path.isfile(command[0]): 661 | test.File_Process(command[0]) 662 | elif os.path.isdir(command[0]): 663 | print("Is Dir//process xml here") 664 | test.XML_Process(command[0]) 665 | print(test.lattice) 666 | if "@" not in test.email: 667 | print("Invalid Email Supplied") 668 | #sys.exit(5) 669 | raise ValueError 670 | test.to_database() 671 | test = None 672 | 673 | if __name__ == "__main__": 674 | if len(sys.argv) != 3: 675 | print("Incorrect number of arguments, run as ./QE.py QEOUTPUT_FILE EMAIL") 676 | sys.exit(6) 677 | command = [sys.argv[1],sys.argv[2]] 678 | 679 | main(command) 680 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a simple repository to keep track of changes across multiple servers 2 | 3 | This code is not for production, but purely research purposes and, as such, is not commented for wide use 4 | -------------------------------------------------------------------------------- /bandgapoccu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import sys 4 | 5 | vbm = None 6 | cbm = None 7 | bg = None 8 | class bg: 9 | def __init__( self, direct): 10 | self.vbm = None 11 | self.cbm = None 12 | self.bg = None 13 | self.Fermi = None 14 | self.occu = None 15 | self.energy = None 16 | self.metallic = False 17 | self.get_bandgap(direct) 18 | 19 | def __call__ ( self,direct): 20 | self.get_bandgap(direct) 21 | 22 | def __iter__(self): 23 | yield 'gap', self.bg 24 | yield 'cbm', self.cbm 25 | yield 'vbm', self.vbm 26 | 27 | def get_bandgap(self,direct): 28 | self.occu = [] 29 | self.energy = [] 30 | f = open(direct) 31 | 32 | for i in f: 33 | if "End of self-consistent" in i: 34 | self.occu = [] 35 | self.energy = [] 36 | if ' k =' in i: 37 | data = [] 38 | i = next(f) 39 | i = next(f) 40 | while len(i.split()) > 0: 41 | for jj in i.split(): 42 | try: 43 | data.append(float(jj)) 44 | except ValueError: 45 | jjt = [xx for xx in jj.split('-') if xx] 46 | for xx in jjt: 47 | data.append(-float(xx)) 48 | i = next(f) 49 | self.energy.append(data) 50 | if 'occupation numbers' in i: 51 | data = [] 52 | while len(i.split()) > 0: 53 | i = next(f) 54 | for jj in i.split(): 55 | data.append(float(jj.strip())) 56 | self.occu.append(data) 57 | if 'Fermi' in i: 58 | self.Fermi = float(i.split()[4]) 59 | f.close() 60 | minmax = [] 61 | metallic = False 62 | if len(self.occu) > 0: 63 | for c,i in enumerate(self.occu): 64 | t = np.array(i) 65 | if 0.0 in t: 66 | idx = np.where(t==0.0)[0][0] 67 | te = np.array(self.energy[c]) 68 | minmax.append([te[idx-1],te[idx]]) 69 | else: 70 | metallic = True 71 | if not metallic: 72 | minmax = np.array(minmax) 73 | self.vbm = np.max(minmax[:,0]) 74 | self.cbm = np.min(minmax[:,1]) 75 | self.bg = self.cbm - self.vbm 76 | else: 77 | self.vbm = None 78 | self.cbm = None 79 | self.metallic = True 80 | self.bg = 0.0 81 | else: 82 | minmax = [] 83 | for c,i in enumerate(self.energy): 84 | t = np.array(i) 85 | t = np.subtract(i,self.Fermi) 86 | m_ = np.argwhere(t>0) 87 | minmax.append([t[m_[0][0]-1],t[m_[0][0]]]) 88 | minmax = np.array(minmax) 89 | self.vbm = np.max(minmax[:,0]) 90 | self.cbm = np.min(minmax[:,1]) 91 | self.bg = self.cbm - self.vbm 92 | 93 | if __name__ == "__main__": 94 | import sys 95 | x = bg(sys.argv[1]) 96 | print(x.bg) 97 | -------------------------------------------------------------------------------- /color.key: -------------------------------------------------------------------------------- 1 | H #FFFFFF 2 | He #D9FFFF 3 | Li #CC80FF 4 | Be #C2FF00 5 | B #FFB5B5 6 | C #909090 7 | N #3050F8 8 | O #FF0D0D 9 | O2 #FFAE00 10 | F #90E050 11 | Ne #B3E3F5 12 | Na #AB5CF2 13 | Mg #8AFF00 14 | Al #BFA6A6 15 | Si #F0C8A0 16 | P #FF8000 17 | S #FFFF30 18 | Cl #1FF01F 19 | Ar #80D1E3 20 | K #8F40D4 21 | Ca #3DFF00 22 | Sc #E6E6E6 23 | Ti #BFC2C7 24 | Ti1 #BFC2C7 25 | Ti2 #BFC2C7 26 | V #A6A6AB 27 | V1 #A6A6AB 28 | V2 #A6A6AB 29 | Cr #8A99C7 30 | Cr1 #8A99C7 31 | Cr2 #8A99C7 32 | Mn #9C7AC7 33 | Mn1 #9C7AC7 34 | Mn2 #9C7AC7 35 | Fe #FFA800 36 | Fe1 #FFA200 37 | Fe2 #FFD200 38 | Co #F090A0 39 | Co1 #05004C 40 | Co2 #388786 41 | Co3 #67CAC9 42 | Ni #50D050 43 | Ni1 #50D050 44 | Ni2 #50D050 45 | Cu #808080 46 | Cu1 #808080 47 | Cu2 #606060 48 | Zn #7D80B0 49 | Ga #C28F8F 50 | Ge #668F8F 51 | As #BD80E3 52 | Se #FFA100 53 | Br #A62929 54 | Kr #5CB8D1 55 | Rb #702EB0 56 | Sr #00FF00 57 | Y #94FFFF 58 | Zr #94E0E0 59 | Nb #73C2C9 60 | Mo #54B5B5 61 | Tc #3B9E9E 62 | Ru #248F8F 63 | Rh #0A7D8C 64 | Pd #006985 65 | Ag #C0C0C0 66 | Cd #FFD98F 67 | In #A67573 68 | Sn #668080 69 | Sb #9E63B5 70 | Te #D47A00 71 | I #940094 72 | Xe #429EB0 73 | Cs #57178F 74 | Ba #00C900 75 | La #70D4FF 76 | Ce #FFFFC7 77 | Pr #D9FFC7 78 | Nd #C7FFC7 79 | Pm #A3FFC7 80 | Sm #8FFFC7 81 | Eu #61FFC7 82 | Gd #45FFC7 83 | Tb #30FFC7 84 | Dy #1FFFC7 85 | Ho #00FF9C 86 | Er #00E675 87 | Tm #00D452 88 | Yb #00BF38 89 | Lu #00AB24 90 | Hf #4DC2FF 91 | Ta #4DA6FF 92 | W #2194D6 93 | Re #267DAB 94 | Os #266696 95 | Ir #175487 96 | Pt #D0D0E0 97 | Au #FFD123 98 | Hg #B8B8D0 99 | Tl #A6544D 100 | Pb #575961 101 | Bi #9E4FB5 102 | Po #AB5C00 103 | At #754F45 104 | Rn #428296 105 | Fr #420066 106 | Ra #007D00 107 | Ac #70ABFA 108 | Th #00BAFF 109 | Pa #00A1FF 110 | U #008FFF 111 | Np #0080FF 112 | Pu #006BFF 113 | Am #545CF2 114 | Cm #785CE3 115 | Bk #8A4FE3 116 | Cf #A136D4 117 | Es #B31FD4 118 | Fm #B31FBA 119 | Md #B30DA6 120 | No #BD0D87 121 | Lr #C70066 122 | Rf #CC0059 123 | Db #D1004F 124 | Sg #D90045 125 | Bh #E00038 126 | Hs #E6002E 127 | Mt #EB0026 128 | -------------------------------------------------------------------------------- /default.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import json as json 4 | import pickle as pkl 5 | import functions as fun 6 | -------------------------------------------------------------------------------- /early_stop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from functions import prophet_map as pm 5 | import re 6 | import sqlite3 7 | import pandas as pd 8 | import numpy as np 9 | import os 10 | import pickle as pkl 11 | from functions import get_network_info as get_net 12 | 13 | def to_pd(d_,train,df=None,f=None): 14 | if df is not None: 15 | print(d_) 16 | for i in d_: 17 | df.loc[i,'target'] = d_[i]['target'] 18 | df.loc[i,'prediction'] = d_[i]['prophet'] 19 | if 'train' in list(d_[i].keys()): 20 | df.loc[i,'train'] = d_[i]['train'] 21 | else: 22 | df.loc[i,'train'] = train 23 | else: 24 | for i in d_: 25 | f.write(','.join([str(zz) for zz in [d_[i]['target'],d_[i]['prophet'],train]]) + '\n') 26 | 27 | def analysis(s): 28 | d = [] 29 | s_ = s.read().split('\n') 30 | try: 31 | for c,i in enumerate(s_): 32 | if 'System Prediction Target' in i: 33 | c += 2 34 | i = s_[c] 35 | while len(i.split()) > 0: 36 | d.append([float(i.split()[1]),float(i.split()[2])]) 37 | c += 1 38 | i = s_[c] 39 | break 40 | d = np.array(d) 41 | del_ = np.max(np.abs(d[:,0] - d[:,1])) 42 | rmse = np.sqrt(np.sum((d[:,0]-d[:,1])**2)/len(d)) 43 | except: 44 | print(s_) 45 | raise "error with PROPhet" 46 | return del_,rmse 47 | 48 | def convert(s,include,chkpoint): 49 | f = open(s) 50 | inc = False 51 | chk = False 52 | val_f = "" 53 | for i in f: 54 | if 'checkpoint_in' in i.lower(): 55 | val_f += 'checkpoint_in = ' + chkpoint + '\n' 56 | chk = True 57 | elif 'include' in i.lower(): 58 | val_f += 'include = ' + include + '\n' 59 | inc = True 60 | else: 61 | val_f += i 62 | if not inc: 63 | val_f += 'include = ' + include + '\n' 64 | if not chk: 65 | val_f += 'checkpoint_in = ' + chkpoint + '\n' 66 | f.close() 67 | return val_f 68 | 69 | def get_restart(s): 70 | f = open(s) 71 | nsave = None 72 | checkpoint = None 73 | nint = None 74 | for i in f: 75 | if i[0] == '#': continue 76 | if 'nsave' in i.lower(): 77 | nsave = i[i.find('=') + 1:].strip() 78 | if 'checkpoint_out' in i.lower(): 79 | checkpoint = i[i.find('=') + 1:].strip() 80 | if 'niterations' in i.lower(): 81 | nint = i[i.find('=') + 1:].strip() 82 | f.close() 83 | return nsave,checkpoint,nint 84 | 85 | def process(fname,bout=None,df=None,executable='PROPhet',np=32,db=None,d=None): 86 | if bout is not None: 87 | if d is None: 88 | t = [] 89 | f = open(bout) 90 | for i in f: 91 | if 'Iteration ' in i: 92 | i = next(f) 93 | i = next(f) 94 | while len(i.split()) == 4: 95 | try: 96 | t.append(i.split()) 97 | i = next(f) 98 | except: break 99 | f.close() 100 | t = sorted(t,key= lambda x: float(x[2])) 101 | d = [(int(t[0][0]),)] 102 | nsave,checkpoint,nint = get_restart(fname) 103 | print(nsave,checkpoint,nint) 104 | nsave = int(nsave) 105 | c = int(d[0][0]) 106 | correct = round((c/nsave)+1)*nsave 107 | valf = convert(fname,'train.dat','FILE') 108 | if not os.path.isfile(checkpoint + '_' + str(correct)): 109 | if os.path.isfile(checkpoint + '_' + str(int(correct) - int(nsave))): 110 | correct = correct - nsave 111 | chkpoint = checkpoint + '_' + str(correct) 112 | elif int(correct) - int(nsave) == int(nint): 113 | chkpoint = checkpoint 114 | else: 115 | raise ValueError(correct) 116 | else: chkpoint = checkpoint + '_' + str(correct) 117 | f = open('val_temp','w') 118 | f.write(valf.replace('FILE',chkpoint)) 119 | f.close() 120 | #t = os.popen('mpirun -np {np} {prop} -in val_temp -validate | tee train.dat.out'.format(prop=executable,np=np)) 121 | t = os.popen('mpirun -np 32 PROPhet -in val_temp -validate | tee train.dat.out'.format(prop=executable,np=np)).read() 122 | #print(t) 123 | #funct = open(checkpoint + '_' + str(correct)).read() 124 | funct = open(chkpoint).read() 125 | t_file = ['train.dat'] 126 | to_pkl(db=db,df=df,t_file=t_file,funct=funct) 127 | return 128 | np = str(np) 129 | nsave,checkpoint,nint = get_restart(fname) 130 | valf = convert(fname,'val.dat','FILE') 131 | len_ = len(open('val.dat').read().split('\n')[:-1]) 132 | np = str(len_) if len_ < 32 else str(32) 133 | if d is None: 134 | out = open('earlystop.out','w') 135 | d = [] 136 | out.write('step,rmse,max\n') 137 | for i in range(100,int(nint),int(nsave)): 138 | if not os.path.isfile(checkpoint + '_' + str(i)): 139 | break 140 | f = open('val_temp','w') 141 | f.write(valf.replace('FILE',checkpoint + '_' + str(i))) 142 | f.close() 143 | t = os.popen('mpirun -np {np} {prop} -in val_temp -validate'.format(prop=executable,np=np)) 144 | del_,rmse = analysis(t) 145 | d.append((i,rmse,del_)) 146 | print(i,rmse,del_) 147 | d = sorted(d,key=lambda x: x[1]) 148 | for i in d: 149 | out.write(','.join([str(zz) for zz in i]) + '\n') 150 | out.close() 151 | f = open('val_temp','w') 152 | f.write(valf.replace('FILE',checkpoint + '_' + str(d[0][0]))) 153 | f.close() 154 | t = os.popen('mpirun -np {np} {prop} -in val_temp -validate > val.dat.out'.format(prop=executable,np=np)).read() 155 | f = open('train_temp','w') 156 | f.write(valf.replace('FILE',checkpoint + '_' + str(d[0][0])).replace('val.dat','train.dat')) 157 | f.close() 158 | t = os.popen('mpirun -np {np} {prop} -in train_temp -validate > train.dat.out'.format(prop=executable,np=np)).read() 159 | f = open('test_temp','w') 160 | f.write(valf.replace('FILE',checkpoint + '_' + str(d[0][0])).replace('val.dat','test.dat')) 161 | f.close() 162 | t = os.popen('mpirun -np {np} {prop} -in test_temp -validate > test.dat.out'.format(prop=executable,np=np)).read() 163 | t_file = ['train.dat','val.dat','test.dat'] 164 | #t_out = ['train.dat.out','val.dat.out','test.dat.out'] 165 | #flag = ['train','val','test'] 166 | funct = open(checkpoint + '_' + str(d[0][0])).read() 167 | to_pkl(db=db,df=df,t_file=t_file,funct=funct) 168 | 169 | def to_pkl(db=None,fname='bfgs_file',df=None,t_file=['train.dat'],f=None,funct=None): 170 | if df is not None: 171 | for c,i in enumerate(t_file): 172 | t = pm(i + '.out',i) 173 | to_pd(t,i.replace('.dat',''),df=df) 174 | df = df.dropna() 175 | t = get_net(fname='bfgs_file') 176 | if db is not None: 177 | F_pkl = pkl.load(open(db,'rb')) 178 | F_pkl[os.getcwd()] = {'description':t,'df':df.T.to_dict(),'functional':funct} #storing the dataframe as dict for version control 179 | pkl.dump(F_pkl,open(db,'wb')) 180 | df.to_csv('data.csv') 181 | else: 182 | f.write('target,prediction,train\n') 183 | to_pd(t,'train',f=f) 184 | to_pd(v,'train',f=f) 185 | 186 | def construct_df(j): 187 | if j is not None: 188 | _ = pd.read_json(j) 189 | _.set_index('location',inplace=True) 190 | _['target'] = None 191 | _['prediction'] = None 192 | _['train'] = None 193 | return _ 194 | else: 195 | raise ValueError('json file does not exist',j) 196 | 197 | def split_val(df,val_file='val.dat'): 198 | t = open('val.dat').split('\n')[:-1] 199 | d = df.ix[t] 200 | val_temp = pd.DataFrame() 201 | test_temp = pd.DataFrame() 202 | for i in d.phase.unique(): 203 | for j in d[d.phase == i].dopant.unique(): 204 | v_t = d[(d.phase == i) & (d.dopant == j)] 205 | vt = v_t.sample(frac=0.5) 206 | tt = v_t.drop(vt.index) 207 | val_temp = val_temp.append(vt) 208 | test_temp = test_temp.append(tt) 209 | f = open('val.dat','w') 210 | for i in val_temp.index: 211 | f.write(i + '\n') 212 | f.close() 213 | f = open('test.dat','w') 214 | for i in test_temp.index: 215 | f.write(i + '\n') 216 | f.close() 217 | 218 | if __name__ == "__main__": 219 | #df = construct_df('/data/llentz/Charge-Density/no_Phosphate/data/all.json') 220 | #d = process('bfgs_file',df=df,executable='PROPhet',db='/data/llentz/codeplayground/data/Database.pkl') 221 | df = construct_df('/data/llentz/Charge-Density/HSE/data/all.hse.json') 222 | d = process('bfgs_file',df=df,executable='PROPhet',db='/data/llentz/Charge-Density/HSE/data/database.hse.pkl',bout='train.bfgs') 223 | -------------------------------------------------------------------------------- /extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import glob 4 | import QE 5 | from bandgapoccu import bg as BG 6 | import os 7 | import json 8 | import re 9 | 10 | def separate_aimd(fname='scf.out',tempfile='OUT.out',jfile='data.json'): 11 | """This takes a AIMD run from QE and separates the data into jfile 12 | run as separate_aimd(fname=QE out file, tempfile = Temporary file used for parsing, jfile = json file for output 13 | """ 14 | temp = [zz.split()[2] for zz in open(fname).read().split('\n') if 'temperature' in zz] 15 | temp = [float(zz) for zz in temp if re.match('\d+.\d+$',zz)] 16 | scf = open(fname,'r') 17 | 18 | header = '' 19 | 20 | for i in scf: 21 | if 'PseudoPot' in i: break 22 | header += i 23 | 24 | 25 | data = [] 26 | rest = header + '\n\n' 27 | tmp = '' 28 | tmp += rest 29 | cnt = 1 30 | base = QE.Struct() 31 | try: 32 | for i in scf: 33 | while '! total energy' not in i: 34 | i = next(scf) 35 | tmp += i 36 | #data[cnt] = {} 37 | t_ = {} 38 | OUT = tempfile 39 | output = open(OUT,'w') 40 | output.write(tmp) 41 | output.close() 42 | x = QE.Struct() 43 | x.File_Process(OUT) 44 | atoms,cell = x.return_params() 45 | if cnt == 1: base.File_Process(OUT) 46 | displ = {} 47 | for zz in base.atoms: 48 | displ[zz] = list(base.atoms[zz]-x.atoms[zz]) 49 | t_['str'] = x.print() 50 | t_['atoms'] = atoms 51 | t_['cell'] = cell 52 | t_['displ'] = displ 53 | t_['energy'] = x.energy 54 | del x 55 | Gap = BG(OUT) 56 | t_['gap'] = Gap.bg 57 | try: 58 | t_['temp'] = temp[cnt - 1] #[zz for zz in open(str(cnt) + '.out').read().split('\n') if 'temperature' in zz][-1].split()[2] 59 | except: 60 | t_['temp'] = 'NA' 61 | cnt += 1 62 | tmp = rest 63 | data.append(t_) 64 | with open(jfile, 'w') as outfile: 65 | json.dump(data, outfile) 66 | i = next(scf) 67 | continue 68 | scf.close() 69 | except StopIteration: 70 | scf.close() 71 | with open(jfile, 'w') as outfile: 72 | json.dump(data, outfile) 73 | 74 | if __name__ == '__main__': 75 | separate_aimd(fname=sys.argv[1]) 76 | 77 | -------------------------------------------------------------------------------- /functions.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | import json 5 | import copy 6 | 7 | def get_pdos(key,directory = '.'): 8 | d = [] 9 | cnt = [] 10 | files = glob.glob(directory + '/' + '*(' + key + ')*') 11 | for i in files: 12 | lb = i.find('#') + 1 13 | rb = i.find('(',lb) 14 | if i[lb:rb] not in cnt: cnt.append(i[lb:rb]) 15 | for i in [files[0]]: 16 | f = open(i) 17 | next(f) 18 | next(f) 19 | d_ = [] 20 | for jj in f: 21 | d_.append([float(zz) for zz in [jj.split()[0],jj.split()[1]]]) 22 | f.close() 23 | if len(d) == 0: d = np.array(d_) 24 | else: 25 | d[:,1] += np.array(d_)[:,1] 26 | return d.tolist(),len(cnt) 27 | 28 | def get_files(directory = '.'): 29 | files = glob.glob(directory + '/' + '*pdos_atm*') 30 | pdos = {} 31 | cnt = {} 32 | for i in files: 33 | lb = i.find('(') 34 | rb = i.find(')',lb) 35 | key = i[lb+1:rb] 36 | if key not in pdos: 37 | pdos[key],cnt[key] = get_pdos(key,directory=directory) 38 | return pdos,cnt 39 | 40 | def get_fermi(fname='vc-relax.out',directory = '.'): 41 | f = open(directory + '/' + fname) 42 | fermi = [] 43 | for i in f: 44 | if 'Fermi' in i: 45 | fermi.append(float(i.split()[-2])) 46 | return fermi 47 | 48 | def prophet_map(pname,tname): 49 | '''This is a conversion routine to convert PROPhet out put into a dictionary with the PK being the directory''' 50 | try: 51 | d_ = open(tname).read().split('\n')[:-1] 52 | d = [] 53 | train = [] 54 | for i in d_: 55 | if i is not '': 56 | i_ = i.split() 57 | d.append(i_[0]) 58 | if len(i_) > 1: 59 | train.append(i_[1]) 60 | else: 61 | train.append(i_[0]) 62 | d_ = d 63 | except: 64 | print('error opening ',tname) 65 | return 0 66 | p_ = {} 67 | with open(pname,'r') as f: 68 | for i in f: 69 | while 'System' not in i: 70 | i = next(f) 71 | i = next(f) 72 | i = next(f) 73 | cnt = 0 74 | while len(i.split()) > 0: 75 | if 'warning' in i.lower(): continue 76 | s_ = i.split() 77 | t_ = {'prophet':float(s_[1]),'target':float(s_[2]),'train':train[cnt]} 78 | p_[d_[cnt]] = t_ 79 | cnt += 1 80 | i = next(f) 81 | break 82 | return p_ 83 | 84 | def subscript(string): 85 | t = '' 86 | for i in string: 87 | if i.isdigit(): t+= '$_' + i + '$' 88 | else: t += i 89 | return t 90 | 91 | def prophet_list(pname): 92 | try: 93 | d_ = open(pname) 94 | t = [] 95 | for i in d_: 96 | if '-----------------------' in i: 97 | i = next(d_) 98 | while len(i.split()) > 0: 99 | if len(i.split()) == 5: 100 | t_ = i.split() 101 | t.append({'prediction':float(t_[1]),'target':float(t_[2]),'natom':int(t_[3]),'train':t_[4]}) 102 | i = next(d_) 103 | d_.close() 104 | return t 105 | except: 106 | print('error opening ',pname) 107 | return 0 108 | 109 | 110 | def temperature(scf_file,wd): 111 | '''old routine for AIMD''' 112 | f = open(scf_file) 113 | temp = [] 114 | for i in f: 115 | if 'temperature' in i and len(i.split()) == 4: 116 | temp.append(i.split()[2]) 117 | f.close() 118 | for c,i in enumerate(temp): 119 | f_ = open(wd + '/' + str(c+1) + '.save/temperature','w') 120 | f_.write(i) 121 | f_.close() 122 | 123 | def get_network_info(fname='bfgs_file'): 124 | '''extracts relevant network information for PROPhet''' 125 | f = open(fname).read().split('\n') 126 | d = {} 127 | for i in f: 128 | if 'hidden' in i: 129 | l = i.find('=') 130 | d['network'] = i[l+1:len(i)].strip() 131 | if 'downsample' in i: 132 | l = i.find('=') 133 | d['downsample'] = i[l+1:len(i)].strip() 134 | if 'precondition' in i: 135 | if '1' in i and '#' not in i: 136 | d['precondition'] = True 137 | elif '#' in i: 138 | d['precondition'] = False 139 | else: 140 | d['precondition'] = False 141 | return d 142 | 143 | def construct_json(*args): 144 | t = [] 145 | for i in args: 146 | t_ = json.load(open(i)) 147 | for j in t_: t.append(j) 148 | return t 149 | 150 | def rot_dir(theta,dir_ = 'x'): 151 | t = np.pi*theta/180 152 | s = np.sin 153 | c = np.cos 154 | if dir_=='x': 155 | return np.array([[1,0,0],[0,c(t),-s(t)],[0,s(t),c(t)]]) 156 | elif dir_=='y': 157 | return np.array([[c(t),0,s(t)],[0,1,0],[-s(t),0,c(t)]]) 158 | elif dir_ == 'z': 159 | return np.array([[c(t),-s(t),0],[s(t),c(t),0],[0,0,1]]) 160 | 161 | def rotate_QE(x,theta,dir_='x'): 162 | y = copy.deepcopy(x) 163 | rot_matrix = rot_dir(theta,dir_=dir_) 164 | l = [] 165 | for i in y.lattice: 166 | l.append(y.lattice[i]) 167 | l = np.transpose(np.array(l)) 168 | l = np.dot(rot_matrix,l) 169 | for j in y.atoms: 170 | y.atoms[j] = np.transpose(np.dot(rot_matrix,np.transpose(y.atoms[j]))) 171 | t = ['a','b','c'] 172 | for c,i in enumerate(np.transpose(l)): 173 | y.lattice[t[c]] = i 174 | return y 175 | 176 | def split_df(df,**kwargs): 177 | '''This will add a column to a dataframe and split it into train, val, and test datasets''' 178 | if not kwargs: 179 | kwargs = {'frac':0.80} 180 | df['train'] = None 181 | train = df.sample(**kwargs) 182 | df.loc[train.index,'train'] = 'train' 183 | rem = df.drop(train.index) 184 | test = rem.sample(frac=0.50) 185 | df.loc[test.index,'train'] = 'test' 186 | df.loc[rem.drop(test.index).index,'train'] = 'val' 187 | 188 | def upf(stru): 189 | atom,cell = stru.return_params() 190 | t = {'H': 'H 1.0079 H.upf', 'He': 'He 4.0026 He.upf', 'Li': 'Li 6.941 Li.upf', 'Be': 'Be 9.0122 Be.upf', 'B': 'B 10.811 B.upf', 'C': 'C 12.0107 C.upf', 'N': 'N 14.0067 N.upf', 'O': 'O 15.9994 O.upf', 'F': 'F 18.9984 F.upf', 'Ne': 'Ne 20.1797 Ne.upf', 'Na': 'Na 22.9897 Na.upf', 'Mg': 'Mg 24.305 Mg.upf', 'Al': 'Al 26.9815 Al.upf', 'Si': 'Si 28.0855 Si.upf', 'P': 'P 30.9738 P.upf', 'S': 'S 32.065 S.upf', 'Cl': 'Cl 35.453 Cl.upf', 'Ar': 'Ar 39.948 Ar.upf', 'K': 'K 39.0983 K.upf', 'Ca': 'Ca 40.078 Ca.upf', 'Sc': 'Sc 44.9559 Sc.upf', 'Ti': 'Ti 47.867 Ti.upf', 'V': 'V 50.9415 V.upf', 'Cr': 'Cr 51.9961 Cr.upf', 'Mn': 'Mn 54.938 Mn.upf', 'Fe': 'Fe 55.845 Fe.upf', 'Co': 'Co 58.9332 Co.upf', 'Ni': 'Ni 58.6934 Ni.upf', 'Cu': 'Cu 63.54600000000001 Cu.upf', 'Zn': 'Zn 65.39 Zn.upf', 'Ga': 'Ga 69.723 Ga.upf', 'Ge': 'Ge 72.64 Ge.upf', 'As': 'As 74.9216 As.upf', 'Se': 'Se 78.96 Se.upf', 'Br': 'Br 79.904 Br.upf', 'Kr': 'Kr 83.8 Kr.upf', 'Rb': 'Rb 85.4678 Rb.upf', 'Sr': 'Sr 87.62 Sr.upf', 'Y': 'Y 88.9059 Y.upf', 'Zr': 'Zr 91.22399999999999 Zr.upf', 'Nb': 'Nb 92.9064 Nb.upf', 'Mo': 'Mo 95.94 Mo.upf', 'Tc': 'Tc 98.0 Tc.upf', 'Ru': 'Ru 101.07 Ru.upf', 'Rh': 'Rh 102.9055 Rh.upf', 'Pd': 'Pd 106.42 Pd.upf', 'Ag': 'Ag 107.8682 Ag.upf', 'Cd': 'Cd 112.411 Cd.upf', 'In': 'In 114.818 In.upf', 'Sn': 'Sn 118.71 Sn.upf', 'Sb': 'Sb 121.76 Sb.upf', 'Te': 'Te 127.6 Te.upf', 'I': 'I 126.9045 I.upf', 'Xe': 'Xe 131.293 Xe.upf', 'Cs': 'Cs 132.9055 Cs.upf', 'Ba': 'Ba 137.327 Ba.upf', 'La': 'La 138.9055 La.upf', 'Ce': 'Ce 140.116 Ce.upf', 'Pr': 'Pr 140.9077 Pr.upf', 'Nd': 'Nd 144.24 Nd.upf', 'Pm': 'Pm 145.0 Pm.upf', 'Sm': 'Sm 150.36 Sm.upf', 'Eu': 'Eu 151.964 Eu.upf', 'Gd': 'Gd 157.25 Gd.upf', 'Tb': 'Tb 158.9253 Tb.upf', 'Dy': 'Dy 162.5 Dy.upf', 'Ho': 'Ho 164.9303 Ho.upf', 'Er': 'Er 167.25900000000001 Er.upf', 'Tm': 'Tm 168.9342 Tm.upf', 'Yb': 'Yb 173.04 Yb.upf', 'Lu': 'Lu 174.967 Lu.upf', 'Hf': 'Hf 178.49 Hf.upf', 'Ta': 'Ta 180.9479 Ta.upf', 'W': 'W 183.84 W.upf', 'Re': 'Re 186.207 Re.upf', 'Os': 'Os 190.23 Os.upf', 'Ir': 'Ir 192.217 Ir.upf', 'Pt': 'Pt 195.078 Pt.upf', 'Au': 'Au 196.9665 Au.upf', 'Hg': 'Hg 200.59 Hg.upf', 'Tl': 'Tl 204.3833 Tl.upf', 'Pb': 'Pb 207.2 Pb.upf', 'Bi': 'Bi 208.9804 Bi.upf', 'Po': 'Po 209.0 Po.upf', 'At': 'At 210.0 At.upf', 'Rn': 'Rn 222.0 Rn.upf', 'Fr': 'Fr 223.0 Fr.upf', 'Ra': 'Ra 226.0 Ra.upf', 'Ac': 'Ac 227.0 Ac.upf', 'Th': 'Th 232.0381 Th.upf', 'Pa': 'Pa 231.0359 Pa.upf', 'U': 'U 238.0289 U.upf', 'Np': 'Np 237.0 Np.upf', 'Pu': 'Pu 244.0 Pu.upf', 'Am': 'Am 243.0 Am.upf'} 191 | atm = {} 192 | for i in atom.split('\n')[:-1]: 193 | _ = i.split()[0] 194 | if _ in atm: continue 195 | atm[_] = t[_] 196 | _ = '\n'.join(atm[i] for i in atm) 197 | return _ 198 | 199 | 200 | hammett = {'F': 0.34,'NH2':-0.16,'H':0,'COCl':0.51,'CF3':0.43,'OH':0.12,'NHNO2':0.91} 201 | color_keys = {'H': '#FFFFFF', 'He': '#D9FFFF', 'Li': '#CC80FF', 'Be': '#C2FF00', 'B': '#FFB5B5', 'C': '#909090', 'N': '#3050F8', 'O': '#FF0D0D', 'O2': '#FFAE00', 'F': '#90E050', 'Ne': '#B3E3F5', 'Na': '#AB5CF2', 'Mg': '#8AFF00', 'Al': '#BFA6A6', 'Si': '#F0C8A0', 'P': '#FF8000', 'S': '#FFFF30', 'Cl': '#1FF01F', 'Ar': '#80D1E3', 'K': '#8F40D4', 'Ca': '#3DFF00', 'Sc': '#E6E6E6', 'Ti': '#BFC2C7', 'Ti1': '#BFC2C7', 'Ti2': '#BFC2C7', 'V': '#A6A6AB', 'V1': '#A6A6AB', 'V2': '#A6A6AB', 'Cr': '#8A99C7', 'Cr1': '#8A99C7', 'Cr2': '#8A99C7', 'Mn': '#9C7AC7', 'Mn1': '#9C7AC7', 'Mn2': '#9C7AC7', 'Fe': '#FFA800', 'Fe1': '#FFA200', 'Fe2': '#FFD200', 'Co': '#F090A0', 'Co1': '#05004C', 'Co2': '#388786', 'Co3': '#67CAC9', 'Ni': '#50D050', 'Ni1': '#50D050', 'Ni2': '#50D050', 'Cu': '#808080', 'Cu1': '#808080', 'Cu2': '#606060', 'Zn': '#7D80B0', 'Ga': '#C28F8F', 'Ge': '#668F8F', 'As': '#BD80E3', 'Se': '#FFA100', 'Br': '#A62929', 'Kr': '#5CB8D1', 'Rb': '#702EB0', 'Sr': '#00FF00', 'Y': '#94FFFF', 'Zr': '#94E0E0', 'Nb': '#73C2C9', 'Mo': '#54B5B5', 'Tc': '#3B9E9E', 'Ru': '#248F8F', 'Rh': '#0A7D8C', 'Pd': '#006985', 'Ag': '#C0C0C0', 'Cd': '#FFD98F', 'In': '#A67573', 'Sn': '#668080', 'Sb': '#9E63B5', 'Te': '#D47A00', 'I': '#940094', 'Xe': '#429EB0', 'Cs': '#57178F', 'Ba': '#00C900', 'La': '#70D4FF', 'Ce': '#FFFFC7', 'Pr': '#D9FFC7', 'Nd': '#C7FFC7', 'Pm': '#A3FFC7', 'Sm': '#8FFFC7', 'Eu': '#61FFC7', 'Gd': '#45FFC7', 'Tb': '#30FFC7', 'Dy': '#1FFFC7', 'Ho': '#00FF9C', 'Er': '#00E675', 'Tm': '#00D452', 'Yb': '#00BF38', 'Lu': '#00AB24', 'Hf': '#4DC2FF', 'Ta': '#4DA6FF', 'W': '#2194D6', 'Re': '#267DAB', 'Os': '#266696', 'Ir': '#175487', 'Pt': '#D0D0E0', 'Au': '#FFD123', 'Hg': '#B8B8D0', 'Tl': '#A6544D', 'Pb': '#575961', 'Bi': '#9E4FB5', 'Po': '#AB5C00', 'At': '#754F45', 'Rn': '#428296', 'Fr': '#420066', 'Ra': '#007D00', 'Ac': '#70ABFA', 'Th': '#00BAFF', 'Pa': '#00A1FF', 'U': '#008FFF', 'Np': '#0080FF', 'Pu': '#006BFF', 'Am': '#545CF2', 'Cm': '#785CE3', 'Bk': '#8A4FE3', 'Cf': '#A136D4', 'Es': '#B31FD4', 'Fm': '#B31FBA', 'Md': '#B30DA6', 'No': '#BD0D87', 'Lr': '#C70066', 'Rf': '#CC0059', 'Db': '#D1004F', 'Sg': '#D90045', 'Bh': '#E00038', 'Hs': '#E6002E', 'Mt': '#EB0026'} 202 | -------------------------------------------------------------------------------- /hse_db.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import json 3 | import numpy as np 4 | import QE 5 | from bandgapoccu import bg 6 | import json, os 7 | 8 | def get_hse(all_,phse,hse_location,pbe_fname='.out',hse_fname='.out'): 9 | '''This matches the HSE data with the PBE data''' 10 | t = [i for i in all_ if i['phase'] == phse] 11 | hse_lst = [] 12 | for c,i in enumerate(t): 13 | key = i['location'].split('/')[-1].replace('.save','') 14 | d_ = i['location'][0:i['location'].rfind('/')] 15 | pbe = bg(d_ + '/' + key + pbe_fname) 16 | i['bandgap'] = pbe.bg 17 | i['metallic'] = pbe.metallic 18 | hse_f = '/'.join([hse_location,i['dopant'],key+hse_fname]) 19 | hse = None 20 | if os.path.isfile(hse_f): 21 | if 'job done' in open(hse_f).read().lower(): 22 | hse = bg(hse_f) 23 | i['hse_bandgap'] = hse.bg 24 | i['hse_metallic'] = hse.metallic 25 | hse = hse.bg 26 | if hse is None: 27 | i['hse_bandgap'] = None 28 | i['hse_metallic'] = None 29 | i['hse_location'] = hse_f.replace(hse_fname,'.save') 30 | hse_lst.append(i) 31 | return hse_lst 32 | 33 | 34 | def process_save(fname,counts): 35 | '''This gets the bandgap, dopant, etc for a range of save directories''' 36 | f = open(fname).read().split('\n') 37 | from math import gcd 38 | string = '' 39 | cmmon = [] 40 | for i in counts: 41 | cmmon.append(counts[i]) 42 | if len(cmmon) == 1: 43 | div = cmmon[0] 44 | else: 45 | div = cmmon[0] 46 | for c in cmmon[1::]: 47 | div = gcd(div , c) 48 | 49 | counts_sort = sorted([(i,int(counts[i]/div)) for i in counts],key=lambda x: x[1]) 50 | b_ = [i[0] for i in counts_sort] 51 | phase = '' 52 | for i in counts_sort: 53 | if i[1] == 1: 54 | phase += i[0] 55 | else: 56 | phase += i[0] + str(i[1]) 57 | 58 | d = [] 59 | for i in f: 60 | if len(i) == 0: continue 61 | try: 62 | y = QE.Struct() 63 | y.XML_Process(i) 64 | x = bg(i.replace('.save','.out')) 65 | t = [zz for zz in y.atoms if ''.join([tt for tt in zz if not tt.isdigit()]).strip() not in b_] 66 | p = [zz for zz in y.atoms if ''.join([tt for tt in zz if not tt.isdigit()]).strip() in b_] 67 | coun = {} 68 | for zz in p: 69 | c = ''.join([tt for tt in zz.split() if not tt.isdigit()]) 70 | if c in coun: coun[c] += 1 71 | else: coun[c] = 1 72 | loc = phase 73 | loc = b_[0] if coun[b_[0]] < counts[b_[0]] else b_[1] 74 | if len(t) > 0: 75 | dopant = ''.join([zz for zz in t[0] if not zz.isdigit()]).strip() 76 | else: 77 | dopant = phase 78 | d.append({'location':i.strip(),'dopant':dopant,'bandgap':x.bg,'metallic':x.metallic,'dop_sub':loc,'phase':phase,'natom':len(y.atoms)}) 79 | except: 80 | continue 81 | return phase,d 82 | 83 | if __name__ == '__main__': 84 | phase,d = process_save('save.out',{'Ti':8,'O':16}) 85 | 86 | hse_location = '/data/llentz/Charge-Density/HSE/HSE/TiO2/Big/c-len' 87 | t = get_hse(d,phase,hse_location) 88 | json.dump(t, open(phase + '.hse.json','w')) 89 | 90 | -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | #!/global/homes/l/llentz/anaconda3/bin/python 2 | 3 | import tempfile 4 | import sys 5 | import glob 6 | import QE 7 | from bandgapoccu import bg as BG 8 | import os 9 | import json 10 | import re 11 | 12 | def separate_aimd(fname='scf.out',tempfile='OUT.out',jfile='data.json'): 13 | temp = [zz.split()[2] for zz in open(fname).read().split('\n') if 'temperature' in zz] 14 | temp = [float(zz) for zz in temp if re.match('\d+.\d+$',zz)] 15 | scf = open(fname,'r') 16 | 17 | header = '' 18 | 19 | for i in scf: 20 | if 'PseudoPot' in i: break 21 | header += i 22 | 23 | 24 | data = [] 25 | rest = header + '\n\n' 26 | tmp = '' 27 | tmp += rest 28 | cnt = 1 29 | try: 30 | for i in scf: 31 | while '! total energy' not in i: 32 | i = next(scf) 33 | tmp += i 34 | #data[cnt] = {} 35 | t_ = {} 36 | OUT = tempfile 37 | output = open(OUT,'w') 38 | output.write(tmp) 39 | output.close() 40 | x = QE.Struct() 41 | x.File_Process(OUT) 42 | atoms,cell = x.return_params() 43 | t_['str'] = x.print() 44 | t_['atoms'] = atoms 45 | t_['cell'] = cell 46 | del x 47 | Gap = BG(OUT) 48 | t_['gap'] = Gap.bg 49 | try: 50 | t_['temp'] = temp[cnt - 1] #[zz for zz in open(str(cnt) + '.out').read().split('\n') if 'temperature' in zz][-1].split()[2] 51 | except: 52 | t_['temp'] = 'NA' 53 | cnt += 1 54 | tmp = rest 55 | data.append(t_) 56 | with open(jfile, 'w') as outfile: 57 | json.dump(data, outfile) 58 | i = next(scf) 59 | continue 60 | scf.close() 61 | except StopIteration: 62 | scf.close() 63 | with open(jfile, 'w') as outfile: 64 | json.dump(data, outfile) 65 | 66 | if __name__ == '__main__': 67 | separate_aimd(fname=sys.argv[1]) 68 | -------------------------------------------------------------------------------- /sample.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | import json 5 | import numpy as np 6 | 7 | def user_input(x): 8 | with open(x.location + '/user_input','w') as f: 9 | f.write(str(x.bandgap) + '\n') 10 | f.write(str(np.log(x.bandgap)) + '\n') 11 | f.write(str(x.bandgap) + '\n') 12 | d = pd.read_json('all.json') 13 | d = d[(d.metallic == False) & (d.bandgap > 0.15)] 14 | d = d.drop(d[(d.phase == 'GaAs') & (d.dopant != 'GaAs')].index) 15 | d.apply(user_input,axis=1) 16 | 17 | #d = pd.read_csv('data.csv') 18 | t_total = pd.DataFrame() 19 | v_total = pd.DataFrame() 20 | test_total = pd.DataFrame() 21 | phase = d['phase'].unique() 22 | for j in phase: 23 | t_ = d[d.phase == j] 24 | dopants = t_['dopant'].unique() 25 | for i in dopants: 26 | t = t_[t_.dopant == i] 27 | if len(t) < 10: continue 28 | train = t.sample(frac=0.8) 29 | _ = t.drop(train.index) 30 | val = _.sample(frac=0.50) 31 | test = _.drop(val.index) 32 | t_total = t_total.append(train) 33 | v_total = v_total.append(val) 34 | test_total = test_total.append(test) 35 | 36 | train = open('train.dat','w') 37 | for i in t_total.location.as_matrix(): 38 | train.write(i) 39 | train.write('\n') 40 | train.close() 41 | val = open('val.dat','w') 42 | for i in v_total.location.as_matrix(): 43 | val.write(i) 44 | val.write('\n') 45 | val.close() 46 | test = open('test.dat','w') 47 | for i in test_total.location.as_matrix(): 48 | test.write(i) 49 | test.write('\n') 50 | test.close() 51 | -------------------------------------------------------------------------------- /sdir.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function sdir() { 4 | if [ -z "$1" ] 5 | then 6 | echo "Please provide slurm jobid" 7 | else 8 | DIR=`scontrol show jobid -dd $1 | grep WorkDir | sed 's/ WorkDir=//g'` 9 | cd $DIR 10 | ls 11 | fi 12 | } 13 | -------------------------------------------------------------------------------- /to_xml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import QE 4 | from lxml import etree as ET 5 | from random import shuffle 6 | import sys 7 | 8 | 9 | def xml(f,extra_tag=None,p_=10): 10 | '''This takes a list of directories and creates a PROPhet xml file''' 11 | root = ET.Element("PROPhet") 12 | nsystms = ET.Element('nsystem') 13 | root.append(nsystms) 14 | systems = ET.Element("systems") 15 | for i in range(200): shuffle(f) 16 | N_train = int(0.80*len(f)) 17 | N_val = int(0.90*len(f)) 18 | sys = [] 19 | cnt = 1 20 | for c,i in enumerate(f): 21 | if c%p_ == 0 : print(c) 22 | if c < N_train: t_flag = "train" 23 | elif N_train < c < N_val: t_flag = "val" 24 | else: t_flag = 'test' 25 | x = QE.Struct() 26 | try: 27 | x.XML_Process(i) 28 | except: 29 | continue 30 | system = ET.Element("system",id=str(c + 1)) 31 | train = ET.Element('train') 32 | train.text = t_flag 33 | system.append(train) 34 | lattice = ET.Element('lattice',units='angstrom') 35 | for j in x.lattice: 36 | l = ET.Element(j) 37 | l.text = ' '.join([str(zz) for zz in x.lattice[j]]) 38 | lattice.append(l) 39 | system.append(lattice) 40 | atoms = ET.Element('atoms',units='angstrom') 41 | atm,cell = x.return_params() 42 | natoms = ET.Element('natoms') 43 | natoms.text = str(len(x.atoms)) 44 | species = ET.Element('species') 45 | ntype = len(set([zz.split()[0] for zz in atm.split('\n')[:-1]])) 46 | species.text = str(ntype) 47 | atoms.append(natoms) 48 | atoms.append(species) 49 | for j in atm.split('\n')[:-1]: 50 | atom = ET.Element("atom",specie=j.split()[0]) 51 | atom.text = ' '.join(j.split()[1:4]) 52 | atoms.append(atom) 53 | system.append(atoms) 54 | target = ET.Element('target') 55 | target.text = str(x.energy) 56 | system.append(target) 57 | if extra_tag is not None: 58 | tag = extra_tag[c]['tag'] 59 | val = extra_tag[c]['val'] 60 | if 'other_tags' in list(extra_tag[c].keys()): 61 | _ = ET.Element(tag,**extra_tag[c]['other_tags']) 62 | else: 63 | _ = ET.Element(tag) 64 | _.text = val 65 | system.append(_) 66 | sys.append(system) 67 | cnt += 1 68 | del x 69 | for i in sys: 70 | systems.append(i) 71 | nsystms.text = str(cnt) 72 | root.append(systems) 73 | str_ = ET.tostring(root,pretty_print=True).decode('utf-8') 74 | return str_ 75 | 76 | if __name__ == '__main__': 77 | d = open(sys.argv[1]).read().split()[0:10] 78 | t = xml(d) 79 | f = open('PROPhet.xml','w') 80 | f.write(t) 81 | f.close() 82 | --------------------------------------------------------------------------------