├── DIAS_analysis.py
├── DIAS_inputparser.py
├── DIAS_reduce_reorder.py
├── LICENSE
├── README.md
├── autoDIAS.py
├── input_example.inp
├── old
    └── test_autofrag.py
├── plotting_examples
    ├── 2D_print.py
    └── IRC_print.py
└── requirements.txt


/DIAS_analysis.py:
--------------------------------------------------------------------------------
  1 | radian2degree= 57.2958
  2 | hartree2kcal = 627.509
  3 | 
  4 | import numpy as np
  5 | import os, time
  6 | 
  7 | def getSCF(file):
  8 | 	SCF = getmultiSCF(file)
  9 | 
 10 | 	return SCF
 11 | 
 12 | def getmultiSCF(g09out):
 13 | 	out = open(g09out, "r")
 14 | 	SCFenergy = 0.00
 15 | 	out.seek(0) #returns to top of .log file
 16 | 	out = reversed(list(out)) # reverse list to read from back to find last SCF optimization
 17 | 	for line in out:
 18 | 		if "SCF Done" in line: # Gaussian
 19 | 			SCFenergy = line.split()[4]
 20 | 			break
 21 | 		if "Energy=" in line: # Gaussian Force Field
 22 | 			SCFenergy = line.split()[3]
 23 | 			break
 24 | 		elif "FINAL SINGLE POINT ENERGY" in line: # Orca
 25 | 			SCFenergy = line.split()[4]
 26 | 			break
 27 | 		elif "Total energy in the final basis set" in line: # Qchem
 28 | 			SCFenergy = line.split()[8]
 29 | 			break
 30 | 		elif "Total DFT energy" in line: # NWChem, but only for DFT
 31 | 			SCFenergy = line.split()[4]
 32 | 			break	
 33 | 	#check if it is indeed a float
 34 | 	try:
 35 | 		SCFenergy = float(SCFenergy)
 36 | 		pass
 37 | 	except ValueError:
 38 | 		SCFenergy = 0.00
 39 | 	return SCFenergy
 40 | 
 41 | def getDistance(A, B):
 42 | 	distance = np.sqrt( (A[0]-B[0])**2 + (A[1]-B[1])**2 + (A[2]-B[2])**2)
 43 | 	return distance
 44 | 	
 45 | # get angle between three atoms A, B and C, takes numpy arrays for coordinates
 46 | def getAngle(A, B, C):
 47 | 	angle = 0.00
 48 | 	#get unit vectors of vector BA and BC
 49 | 	BA_u = (A - B) / np.linalg.norm(A-B)
 50 | 	BC_u = (C - B) / np.linalg.norm(C-B)
 51 | 	#calculate angle
 52 | 	angle = np.arccos(np.clip(np.dot(BA_u, BC_u), -1, 1))
 53 | 	angle = angle * radian2degree
 54 | 	return angle
 55 | 
 56 | def getDihedral(p0, p1, p2, p3):
 57 |     b0 = -1.0*(p1 - p0)
 58 |     b1 = p2 - p1
 59 |     b2 = p3 - p2
 60 |     b1 /= np.linalg.norm(b1)
 61 |     v = b0 - np.dot(b0, b1)*b1
 62 |     w = b2 - np.dot(b2, b1)*b1
 63 |     x = np.dot(v, w)
 64 |     y = np.dot(np.cross(b1, v), w)
 65 |     return np.degrees(np.arctan2(y, x))
 66 | 
 67 | def create_analysis_file(settings):
 68 | 
 69 | 	analysis_file = open(settings.name+'_DIAS.txt', 'w')
 70 | 	analysis_file.write('{0:<15}'.format('Step'))
 71 | 	
 72 | 	try:
 73 | 		for x in range(0, len(settings.geo_dist)):
 74 | 			analysis_file.write('{0:<18}'.format('Distance between'))
 75 | 	except NameError:
 76 | 		pass
 77 | 		
 78 | 	try:
 79 | 		for x in range(0, len(settings.geo_ang)):
 80 | 			analysis_file.write('{0:<18}'.format('Angle between'))
 81 | 	except NameError:
 82 | 		pass	
 83 | 		
 84 | 	try:
 85 | 		for x in range(0, len(settings.geo_dih)):
 86 | 			analysis_file.write('{0:<18}'.format('Dihedral between'))
 87 | 	except NameError:
 88 | 		pass	
 89 | 
 90 | 	
 91 | 	analysis_file.write('{0:<15}'.format('Total energy'))
 92 | 	analysis_file.write('{0:<15}'.format('E(Int)'))
 93 | 	analysis_file.write('{0:<15}'.format('E(Dist)'))	
 94 | 	analysis_file.write('{0:<15}'.format('E(Dist)'))
 95 | 	analysis_file.write('{0:<15}'.format('E(Dist)'))
 96 | 	analysis_file.write('{0:<20}'.format('E(SCF)'))
 97 | 	analysis_file.write('{0:<20}'.format('E(SCF)'))
 98 | 	analysis_file.write('{0:<20}'.format('E(SCF)'))
 99 | 	analysis_file.write('\n')
100 | 
101 | 
102 | 	analysis_file.write('{0:<15}'.format(''))
103 | 	try:
104 | 		for x in range(0, len(settings.geo_dist)):
105 | 			analysis_file.write('{0:<18}'.format(str(settings.geo_dist[x][0])+'-'+str(settings.geo_dist[x][1])))
106 | 	except NameError:
107 | 		pass		
108 | 	try:
109 | 		for x in range(0, len(settings.geo_ang)):
110 | 			analysis_file.write('{0:<18}'.format(settings.geo_ang[x][0]+'-'+settings.geo_ang[x][1]+'-'+ settings.geo_ang[x][2]))
111 | 	except NameError:
112 | 		pass
113 | 	try:
114 | 		for x in range(0, len(settings.geo_dih)):
115 | 			analysis_file.write('{0:<18}'.format(settings.geo_dih[x][0]+'-'+settings.geo_dih[x][1]+'-'+ settings.geo_dih[x][2]+'-'+ settings.geo_dih[x][3]))
116 | 	except NameError:
117 | 		pass
118 | 
119 | 	analysis_file.write('{0:<15}'.format(''))
120 | 	analysis_file.write('{0:<15}'.format(''))
121 | 	analysis_file.write('{0:<15}'.format('Total'))	
122 | 	analysis_file.write('{0:<15}'.format(settings.frag1name))
123 | 	analysis_file.write('{0:<15}'.format(settings.frag2name))
124 | 	analysis_file.write('{0:<20}'.format('complex'))
125 | 	analysis_file.write('{0:<20}'.format(settings.frag1name))
126 | 	analysis_file.write('{0:<20}'.format(settings.frag2name))
127 | 	analysis_file.write('\n')
128 | 
129 | 	analysis_file.write('{0:<15}'.format(''))
130 | 	try:
131 | 		for x in range(0, len(settings.geo_dist)):
132 | 			analysis_file.write('{0:<18}'.format('angstrom'))
133 | 	except NameError:
134 | 		pass			
135 | 	try:
136 | 		for x in range(0, len(settings.geo_ang)):
137 | 			analysis_file.write('{0:<18}'.format('degree'))
138 | 	except NameError:
139 | 		pass
140 | 	try:
141 | 		for x in range(0, len(settings.geo_dih)):
142 | 			analysis_file.write('{0:<18}'.format('degree'))
143 | 	except NameError:
144 | 		pass
145 | 		
146 | 	analysis_file.write('{0:<15}'.format('kcal/mol'))
147 | 	analysis_file.write('{0:<15}'.format('kcal/mol'))
148 | 	analysis_file.write('{0:<15}'.format('kcal/mol'))	
149 | 	analysis_file.write('{0:<15}'.format('kcal/mol'))
150 | 	analysis_file.write('{0:<15}'.format('kcal/mol'))
151 | 	analysis_file.write('{0:<20}'.format('hartree'))
152 | 	analysis_file.write('{0:<20}'.format('hartree'))
153 | 	analysis_file.write('{0:<20}'.format('hartree'))
154 | 	analysis_file.write('\n')
155 | 	try:
156 | 		for x in range(0, len(settings.geo_dist)):
157 | 			analysis_file.write('{0:-<18}'.format(''))
158 | 	except NameError:
159 | 		pass
160 | 	try:
161 | 		for x in range(0, len(settings.geo_ang)):
162 | 			analysis_file.write('{0:-<18}'.format(''))
163 | 	except NameError:
164 | 		pass
165 | 	try:
166 | 		for x in range(0, len(settings.geo_dih)):
167 | 			analysis_file.write('{0:-<18}'.format(''))
168 | 	except NameError:
169 | 		pass
170 | 	analysis_file.write('{0:-<150}'.format(''))
171 | 	analysis_file.write('\n')
172 | 	analysis_file.close()
173 | 	return
174 | 
175 | def analysis(settings, structures, x):
176 | 	analysis_file = open(settings.name+'_DIAS.txt', 'a')
177 | 	irc_SCF = getSCF(settings.name+'_output/complex_{0:04d}.'.format(x+1)+settings.output_file_extension)
178 | 	fragment1_SCF = getSCF(settings.name+'_output/'+settings.frag1name+'_{0:04d}.'.format(x+1)+settings.output_file_extension)
179 | 	fragment2_SCF = getSCF(settings.name+'_output/'+settings.frag2name+'_{0:04d}.'.format(x+1)+settings.output_file_extension)
180 | 
181 | 	# calculate distortion energiesdef analysis_only(structures,settings):
182 | 	fragment1_dist = fragment1_SCF - settings.frag1energy	
183 | 	fragment2_dist = fragment2_SCF - settings.frag2energy
184 | 	# calculate interaction energy
185 | 	interaction_energy = irc_SCF - (fragment1_SCF + fragment2_SCF)
186 | 
187 | 	# extract geometric values
188 | 	distance_values = []
189 | 	for y in range(0, len(settings.geo_dist)):
190 | 		distance_values = distance_values + [getDistance(structures.xyz[x][int(settings.geo_dist[y][0])-1],structures.xyz[x][int(settings.geo_dist[y][1])-1])]
191 | 
192 | 	angle_values = []
193 | 	for y in range(0, len(settings.geo_ang)):
194 | 		angle_values = angle_values + [getAngle(structures.xyz[x][int(settings.geo_ang[y][0])-1],structures.xyz[x][int(settings.geo_ang[y][1])-1],structures.xyz[x][int(settings.geo_ang[y][2])-1])]	# -1 since array begins with 0
195 | 	dihedral_values = []
196 | 	for y in range(0, len(settings.geo_dih)):
197 | 		dihedral_values = dihedral_values + [getDihedral(structures.xyz[x][int(settings.geo_dih[y][0])-1],structures.xyz[x][int(settings.geo_dih[y][1])-1],structures.xyz[x][int(settings.geo_dih[y][2])-1],structures.xyz[x][int(settings.geo_dih[y][3])-1])]	# -1 since array begins with 0
198 | 
199 | 	# print to file
200 | 	analysis_file.write('{0:<15}'.format('{0:04d}'.format(x+1)))
201 | 
202 | 	for element in distance_values:
203 | 		analysis_file.write('{0:<18}'.format('{0:.5f}'.format(float(element))))
204 | 	for element in angle_values:
205 | 		analysis_file.write('{0:<18}'.format('{0:.3f}'.format(float(element))))
206 | 	for element in dihedral_values:
207 | 		analysis_file.write('{0:<18}'.format('{0:.3f}'.format(float(element))))
208 | 
209 | 	analysis_file.write('{0:<15}'.format('{0:.5f}'.format(float((irc_SCF-settings.frag1energy-settings.frag2energy)*hartree2kcal))))
210 | 	analysis_file.write('{0:<15}'.format('{0:.5f}'.format(float(interaction_energy*hartree2kcal))))
211 | 	analysis_file.write('{0:<15}'.format('{0:.5f}'.format(float((fragment1_dist+fragment2_dist)*hartree2kcal))))
212 | 	analysis_file.write('{0:<15}'.format('{0:.5f}'.format(float(fragment1_dist*hartree2kcal))))
213 | 	analysis_file.write('{0:<15}'.format('{0:.5f}'.format(float(fragment2_dist*hartree2kcal))))
214 | 	analysis_file.write('{0:<20}'.format('{0:.9f}'.format(float(irc_SCF))))
215 | 	analysis_file.write('{0:<20}'.format('{0:.9f}'.format(float(fragment1_SCF))))
216 | 	analysis_file.write('{0:<20}'.format('{0:.9f}'.format(float(fragment2_SCF))))
217 | 	analysis_file.write('\n')		
218 | 	analysis_file.close()
219 | 	
220 | 	
221 | def analysis_only(structures, settings):
222 | 	starttime= time.time()
223 | 	create_analysis_file(settings)
224 | 	for i in range(0, len(structures.xyz)):
225 | 		analysis(settings, structures, i)
226 | 	os.remove(settings.logfile)
227 | 	endtime=time.time()
228 | 	totaltime=str(endtime-starttime)
229 | 	seconds=totaltime.split('.')[0]
230 | 	milliseconds=float('0.'+totaltime.split('.')[1])*1000	
231 | 	print('Analysis of {0} structures done in {1} seconds and {2:.0f} ms'.format(len(structures.xyz),seconds, float(milliseconds)))	
232 | 	return


--------------------------------------------------------------------------------
/DIAS_inputparser.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | import re
  4 | 
  5 | def line_prepender(filename, line):
  6 | 	with open(filename, 'r+') as f:
  7 | 		content = f.read()
  8 | 		f.seek(0, 0)
  9 | 		f.write(line.rstrip('\r\n') + '\n' + content)
 10 | 
 11 | class structures:
 12 | 	pass
 13 | 
 14 | periodic_table = ["","H","He","Li","Be","B","C","N","O","F","Ne","Na","Mg","Al","Si","P","S","Cl","Ar","K","Ca","Sc","Ti","V","Cr","Mn","Fe","Co","Ni","Cu","Zn","Ga","Ge","As","Se","Br","Kr","Rb","Sr","Y","Zr",
 15 |     "Nb","Mo","Tc","Ru","Rh","Pd","Ag","Cd","In","Sn","Sb","Te","I","Xe","Cs","Ba","La","Ce","Pr","Nd","Pm","Sm","Eu","Gd","Tb","Dy","Ho","Er","Tm","Yb","Lu","Hf","Ta","W","Re","Os","Ir","Pt","Au","Hg","Tl",
 16 |     "Pb","Bi","Po","At","Rn","Fr","Ra","Ac","Th","Pa","U","Np","Pu","Am","Cm","Bk","Cf","Es","Fm","Md","No","Lr","Rf","Db","Sg","Bh","Hs","Mt","Ds","Rg","Uub","Uut","Uuq","Uup","Uuh","Uus","Uuo"]
 17 | 	
 18 | #Covalent radii taken from DOI: 10.1039/b801115j
 19 | #Everything beyond Cm was set to 1.80
 20 | covalent_radii = [0.00,0.32,0.28,1.28,0.96,0.84,0.76,0.71,0.66,0.57,0.58,1.66,1.41,1.21,1.11,1.07,1.05,1.02,1.06,2.03,1.76,1.70,1.60,1.53,1.39,1.61,1.52,1.50,1.24,1.32,1.22,1.22,1.20,1.19,1.20,1.20,1.16,2.20,1.95,1.90,1.75,
 21 |     1.64,1.54,1.47,1.46,1.42,1.39,1.45,1.44,1.42,1.39,1.39,1.38,1.39,1.40,2.44,2.15,2.07,2.04,2.03,2.01,1.99,1.98,1.98,1.96,1.94,1.92,1.92,1.89,1.90,1.87,1.87,1.75,1.70,1.62,1.51,1.44,1.41,1.36,1.36,1.32,1.45,
 22 |     1.46,1.48,1.40,1.50,1.50,2.60,2.21,2.15,206,2.00,1.96,1.90,1.87,180,169,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80]
 23 | 
 24 | #takes xyz coordinates as numpy array and returns distance
 25 | def getDistance(atom1, atom2):
 26 | 	distance = 0.00
 27 | 	distance = np.sqrt((atom1[0]-atom2[0])**2+(atom1[1]-atom2[1])**2+(atom1[2]-atom2[2])**2)
 28 | 	return distance
 29 | 
 30 | # checks if "s" is an int, returns true or false
 31 | def isInt(s):
 32 |     try: 
 33 |         int(s)
 34 |         return True
 35 |     except ValueError:
 36 |         return False	
 37 | 
 38 | def molecular_formula(atoms):
 39 | 	alphabetic_periodic_table = sorted(copy.deepcopy(periodic_table))
 40 | 	formula = ""
 41 | 	count = []
 42 | 	for element in alphabetic_periodic_table:
 43 | 		count =  count + [atoms.count(element)]
 44 | 	if count[alphabetic_periodic_table.index("C")] > 1:
 45 | 		formula = formula + 'C' + str(count[alphabetic_periodic_table.index("C")])
 46 | 		count[alphabetic_periodic_table.index("C")] = 0
 47 | 	elif count[alphabetic_periodic_table.index("C")] > 0:
 48 | 		formula = formula + 'C'
 49 | 		count[alphabetic_periodic_table.index("C")] = 0
 50 | 	if count[alphabetic_periodic_table.index("H")] > 1:
 51 | 		formula = formula + 'H' + str(count[alphabetic_periodic_table.index("H")])
 52 | 		count[alphabetic_periodic_table.index("H")] = 0	
 53 | 	elif count[alphabetic_periodic_table.index("H")] > 0:
 54 | 		formula = formula + 'H'
 55 | 		count[alphabetic_periodic_table.index("H")] = 0
 56 | 	for x in range(0, len(alphabetic_periodic_table)):
 57 | 		if count[x] == 1:
 58 | 			formula = formula + alphabetic_periodic_table[x]
 59 | 		elif count[x] > 1:
 60 | 			formula = formula + alphabetic_periodic_table[x] + str(count[x])
 61 | 	return formula
 62 | 	
 63 | def isBond(distance, atomtype1, atomtype2, covalent_radii):
 64 | 	bond = True
 65 | 	'''find bond threshold based on covalent radii
 66 | 	taken from 10.1039/b801115j
 67 | 	For everything beyond Cm 1.80 A was used.
 68 | 	get radii from saved numpy array
 69 | 	use atomtypes as numbers
 70 | 	use factor to be sure to catch bonds. Bigger factor means more bonds are detected
 71 | 	'''
 72 | 	atomtype1 = int(periodic_table.index(atomtype1))
 73 | 	atomtype2 = int(periodic_table.index(atomtype2))
 74 | 	bond_threshold = 1.25 * (covalent_radii[atomtype1]+covalent_radii[atomtype2])
 75 | 	if distance > bond_threshold:
 76 | 		bond = False
 77 | 	return bond
 78 | 	
 79 | def getFragments(structure, atoms):
 80 | 	#get connectivity for each irc structure
 81 | 	adjacency_matrix = []
 82 | 	# iterate over every atom 
 83 | 	for i in range(0, len(structure)):
 84 | 		distances = []
 85 | 		#get the distance to every other atom and itself
 86 | 		for j in range(0, len(structure)):
 87 | 			distances = distances + [getDistance(structure[i], structure[j])]
 88 | 		adjacency_matrix = adjacency_matrix + [distances]
 89 | 
 90 | 	#convert distance to 1 for bond, 0 for no bond
 91 | 	for i in range(0, len(adjacency_matrix)):
 92 | 		for j in range(0, len(adjacency_matrix[i])):
 93 | 			if isBond(adjacency_matrix[i][j], atoms[i], atoms[j], covalent_radii):
 94 | 				adjacency_matrix[i][j] = 1
 95 | 			else:
 96 | 				adjacency_matrix[i][j] = 0
 97 | 	# now make a list of fragments 
 98 | 	#first convert each line to a list of the atoms in the structure which show a bond
 99 | 	for i in range(0, len(adjacency_matrix)):
100 | 		adjacency_matrix[i] = [j+1 for j,x in enumerate(adjacency_matrix[i]) if x == 1]
101 | 	#make empty fragments list
102 | 	fragments = [adjacency_matrix[0]]
103 | 	#now iterate over all the elements in adjacency_matrix
104 | 	for i in range (1, len(adjacency_matrix)):
105 | 		#now iterate over all the fragments in fragments and check if elements are the same
106 | 		for j in range(0, len(fragments)):
107 | 			#check if elements are already present in this list...
108 | 			if len(set(adjacency_matrix[i]) & set(fragments[j])) > 0:
109 | 				#....if they are add the lists together and eliminate duplicates...
110 | 				fragments[j] = list(set(fragments[j]+adjacency_matrix[i]))
111 | 				break
112 | 		else:
113 | 			#....add this list as new list in fragments
114 | 			fragments = fragments + [adjacency_matrix[i]]
115 | 	#now we got all of our fragments but some might belong to the same structure, so we need to test if we need to combine them, then combine them, 
116 | 	#then check again and so on until nothing else changes. iterative process. Take length of fragments for that since it converged if amount of fragments doesn't changes
117 | 	n_frag_old=len(fragments)
118 | 	#set j to some value that cannot be the length of fragments
119 | 	n_frag_new=-5
120 | 	while n_frag_old != n_frag_new:
121 | 		#set i to current value of len(fragments)
122 | 		n_frag_old=len(fragments)
123 | 		new_fragments = [sorted(fragments[0])]
124 | 		#combine fragments like before
125 | 		for i in range(0, len(fragments)):
126 | 			for j in range(0, len(new_fragments)):
127 | 				if len(set(fragments[i]) & set(new_fragments[j])) > 0:
128 | 					new_fragments[j] = sorted(list(set(new_fragments[j]+fragments[i])))
129 | 					break
130 | 			else:
131 | 				new_fragments = new_fragments + [fragments[i]]
132 | 		fragments=new_fragments
133 | 		n_frag_new=len(fragments)
134 | 	return fragments	
135 | 
136 | def getConnectivity(structure):
137 | 	connectivity_matrix = np.zeros((len(structure),len(structure)))
138 | 	for i in range(0,len(connectivity_matrix )):
139 | 		for j in range(0,len(connectivity_matrix)):
140 | 			distance=getDistance(structure[i],structure[j])
141 | 			if isBond(distance,structures.atoms[i], structures.atoms[j],covalent_radii):
142 | 				connectivity_matrix[i][j] = 1
143 | 			else:
144 | 				connectivity_matrix[i][j] = 0
145 | 	return connectivity_matrix
146 | 
147 | 
148 | # finds up to two bonds that get formed
149 | def getBonds(structures):
150 | 	count = np.zeros((structures.xyz[0].shape[0],structures.xyz[0].shape[0]))
151 | 	#construct connectivity matrix step 0
152 | 	connectivity_old = getConnectivity(structures.xyz[0])	
153 | 	#now iterate over all the matrices and count changes
154 | 	for element in structures.xyz:
155 | 		connectivity = getConnectivity(element)
156 | 		for i in range(0, len(connectivity)):
157 | 			for j in range(0, len(connectivity[i])):
158 | 				if connectivity[i][j] != connectivity_old[i][j]:
159 | 					count[i][j] = count[i][j] +1
160 | 		connectivity_old= connectivity
161 | 	#clear of lower triangle of the matrix to eliminate duplicates
162 | 	for i in range(0,len(count)):
163 | 		for j in range(0, len(count)):
164 | 			if i >= j:
165 | 				count[i][j] = 0
166 | 	
167 | 	count[count == 0] = 1000	
168 | 	#now find conenctivity for lowest number in count matrix, equals the bond that changed status the least, might not be the optimal solution
169 | 	freq = np.amin(count)
170 | 	geodist = [[int(np.where(count == freq)[0][0])+1,int(np.where(count == freq)[1][0])+1]]
171 | 	count[np.where(count == freq)[0][0],np.where(count == freq)[1][0]] = 1001
172 | 	
173 | 	freq = np.amin(count)
174 | 	geodist = geodist + [[int(np.where(count == freq)[0][0])+1,int(np.where(count == freq)[1][0])+1]]
175 | 
176 | 	return geodist
177 | 
178 | 	
179 | def duplicates(int_list):
180 | 	int_list = sorted(int_list)
181 | 	duplicates = False
182 | 	for i in range (0, len(int_list)-1):
183 | 		if int_list[i] == int_list[i+1]:
184 | 			duplicates = True
185 | 		else:
186 | 		   pass
187 | 	return duplicates
188 | 	
189 | def auto_frag(structures,settings):
190 | 	list_of_fragments = []
191 | 	for i in range(0, len(structures.xyz)):
192 | 		list_of_fragments =  list_of_fragments + [getFragments(structures.xyz[i], structures.atoms)]
193 | 	n_fragments = []
194 | 	for i in range(0,len(list_of_fragments)):
195 | 		n_fragments = n_fragments + [len(list_of_fragments[i])]
196 | 	#this part determines if there is at least one structure with 2 fragments and at least one with more than 2
197 | 	twofrag = False
198 | 	morefrag = False
199 | 	for element in n_fragments:
200 | 		if element == 2:
201 | 			twofrag = True
202 | 			break
203 | 		if element > 2:
204 | 			morefrag = True
205 | 	if twofrag:
206 | 		indices = [i for i, x in enumerate(n_fragments) if x == 2]
207 | 		counts = [list_of_fragments.count(list_of_fragments[x]) for x in indices]
208 | 		fragments = list_of_fragments[indices[counts.index(max(counts))]]
209 | 		settings.frag1atoms = np.sort(fragments[0])
210 | 		settings.frag2atoms = np.sort(fragments[1])
211 | 	elif morefrag:
212 | 		n_fragments = [x for x in n_fragments if x > 2]
213 | 		indices = [i for i, x in enumerate(n_fragments) if x == min(n_fragments)]
214 | 		counts = [list_of_fragments.count(list_of_fragments[x]) for x in indices]
215 | 		fragments = list_of_fragments[indices[counts.index(max(counts))]]
216 | 		settings.frag1atoms = np.sort(fragments[0])
217 | 		# now combine all other fragments into one
218 | 		settings.frag2atoms = np.sort([j for i in fragments[1:] for j in i])
219 | 		line_prepender(settings.logfile, "WARNING: more than two fragments detected! Check fragments!\n\n")
220 | 	else:
221 | 		line_prepender(settings.logfile, "CRITICAL ERROR: couldn't determine fragments automatically!\n\n")
222 | 		sys.exit("CRITICAL ERROR: couldn't determine fragments automatically!")
223 | 	return settings
224 |  
225 | def get_single(file):
226 | 	structures.atoms = []
227 | 	structures.xyz = []
228 | 	structures.title = ['single structure']
229 | 	with open(file) as input_file:
230 | 		file_contents = input_file.read() 
231 | 	if "Optimization completed." in file_contents:
232 | 		regex = r'\ ---------------------------------------------------------------------\n((?:(?!\ ---------------------------------------------------------------------\n).)*?)\ ---------------------------------------------------------------------\n(?:(?!\ ---------------------------------------------------------------------\n).)*?Stationary\ point\ found'
233 | 		for match in re.finditer(regex, file_contents, re.IGNORECASE|re.DOTALL):
234 | 			raw_coordinates = match.group(1).strip().split("\n")
235 | 			break
236 | 	else:
237 | 		regex = r'\ ---------------------------------------------------------------------\n((?:(?!\ ---------------------------------------------------------------------\n).)*?)\ ---------------------------------------------------------------------\n(?:(?!\ ---------------------------------------------------------------------\n).)*?Rotational\ constants'
238 | 		for match in re.finditer(regex, file_contents, re.IGNORECASE|re.DOTALL):	
239 | 			raw_coordinates = match.group(1).strip().split("\n")	
240 | 	structures.xyz.append(np.array([x.split()[3:] for x in raw_coordinates]))
241 | 	structures.atoms = [x.split()[1] for x in raw_coordinates]
242 | 	structures.atoms = [periodic_table[int(x)] for x in structures.atoms]
243 | 	for i in range(len(structures.xyz)):
244 | 		structures.xyz[i] = structures.xyz[i].astype(float)
245 | 	return structures
246 | 	
247 | def get_scan(file):
248 | 	with open(file) as input_file:
249 | 		file_contents = input_file.read() 
250 | 	structures.xyz = []
251 | 	structures.atoms = []
252 | 
253 | 	# find everything except the TS structure
254 | 	regex = r'---------------------------------------------------------------------((?:(?!---------------------------------------------------------------------).)*?)---------------------------------------------------------------------(?:(?!---------------------------------------------------------------------).)*?Stationary\ point\ found'
255 | 	for match in re.finditer(regex, file_contents, re.IGNORECASE|re.DOTALL):
256 | 		raw_coordinates = match.group(1).strip().split("\n")
257 | 		structures.xyz.append(np.array([x.split()[3:] for x in raw_coordinates]))
258 | 	structures.atoms = [x.split()[1] for x in raw_coordinates]
259 | 	structures.atoms = [periodic_table[int(x)] for x in structures.atoms]		
260 | 		
261 | 	for i in range(len(structures.xyz)):
262 | 		structures.xyz[i] = structures.xyz[i].astype(float)	
263 | 	structures.title = [str(x) for x in range(len(structures.xyz))] 
264 | 	return structures
265 | 
266 | def get_irc(file):
267 | 	with open(file) as input_file:
268 | 		file_contents = input_file.read() 
269 | 	structures.xyz = []
270 | 	structures.atoms = []
271 |     #check if solvent was used
272 | 	if "scrf" in file_contents:
273 | 		a =0 
274 | 		regex = r'Z\n ---------------------------------------------------------------------\n(.*?)\n ---------------------------------------------------------------------\n'
275 | 		for match in re.finditer(regex, file_contents, re.IGNORECASE|re.DOTALL):
276 | 			raw_coordinates = match.group(1).strip().split("\n")
277 | 			structures.xyz.append(np.array([x.split()[3:] for x in raw_coordinates]))
278 | 			print(a)
279 | 			a+=1
280 | 		structures.atoms = [x.split()[1] for x in raw_coordinates]
281 | 		structures.atoms = [periodic_table[int(x)] for x in structures.atoms]
282 | 		for i in range(len(structures.xyz)):
283 | 			structures.xyz[i] = structures.xyz[i].astype(float)	
284 | 		structures.title = [str(x) for x in range(len(structures.xyz))]
285 | 		return structures
286 | 	# find TS structure, it's the first xyz structure available
287 | 	regex = r'Z\n ---------------------------------------------------------------------\n(.*?)\n ---------------------------------------------------------------------\n'
288 | 	for match in re.finditer(regex, file_contents, re.IGNORECASE|re.DOTALL):
289 | 		raw_coordinates = match.group(1).strip().split("\n")
290 | 		break
291 | 	structures.xyz.append(np.array([x.split()[3:] for x in raw_coordinates]))
292 | 	structures.atoms = [x.split()[1] for x in raw_coordinates]
293 | 	structures.atoms = [periodic_table[int(x)] for x in structures.atoms]
294 | 	# find everything except the TS structure for normal run
295 | 	regex2 = r'---------------------------------------------------------------------((?:(?!---------------------------------------------------------------------).)*?)---------------------------------------------------------------------(?:(?!---------------------------------------------------------------------).)*?Delta-x\ Convergence\ Met'
296 | 	for match in re.finditer(regex2, file_contents, re.IGNORECASE|re.DOTALL):
297 | 		raw_coordinates = match.group(1).strip().split("\n")
298 | 		structures.xyz.append(np.array([x.split()[3:] for x in raw_coordinates]))
299 | 	# find everything except the TS structure for #p
300 | 	regex3 = r'(?:(CURRENT).+?)(?:---------------------------------------------------------------------)(?:.*?(---------------------------------------------------------------------))(.*?)(?:(---------------------------------------------------------------------))'	
301 | 	for match in re.finditer(regex3, file_contents, re.DOTALL):
302 | 		raw_coordinates = match.group(3).strip().split("\n")
303 | 		structures.xyz.append(np.array([x.split()[2:] for x in raw_coordinates]))
304 | 	for i in range(len(structures.xyz)):
305 | 		structures.xyz[i] = structures.xyz[i].astype(float)	
306 | 	structures.title = [str(x) for x in range(len(structures.xyz))]
307 | 	return structures
308 |  
309 | def check_fragments(settings,structures):
310 | 	for x in settings.frag1atoms:
311 | 		if int(x) > len(structures.atoms):
312 | 			line_prepender(settings.logfile, "CRITICAL ERROR: an atom number in fragment 1 is higher than the highest atom number!\n\n")
313 | 			sys.exit("CRITICAL ERROR: an atom number in fragment 1 is higher than the highest atom number!")
314 | 	for x in settings.frag2atoms:
315 | 		if int(x) > len(structures.atoms):
316 | 			line_prepender(settings.logfile, "CRITICAL ERROR: an atom number in fragment 2 is higher than the highest atom number!\n\n")
317 | 			sys.exit("CRITICAL ERROR: an atom number in fragment 2 is higher than the highest atom number!")
318 | 	if len(settings.frag1atoms) + len(settings.frag2atoms) != len(structures.atoms):
319 | 		line_prepender(settings.logfile, "CRITICAL ERROR: number of specified fragment atoms does not equal total number of atoms!\n\n")
320 | 		sys.exit("CRITICAL ERROR: number of specified fragment atoms does not equal total number of atoms!")
321 | 	if duplicates(settings.frag1atoms):
322 | 		line_prepender(settings.logfile, "CRITICAL ERROR: at least one atom was defined more than once in fragment1!\n\n")
323 | 		sys.exit("CRITICAL ERROR: at least one atom was defined more than once in fragment1!")
324 | 	if duplicates(settings.frag2atoms):
325 | 		line_prepender(settings.logfile, "CRITICAL ERROR: at least one atom was defined more than once in fragment2!\n\n")
326 | 		sys.exit("CRITICAL ERROR: at least one atom was defined more than once in fragment2!")	
327 | 	elif len(set(settings.frag1atoms) & set(settings.frag2atoms)) > 0: 
328 | 		line_prepender(settings.logfile, "CRITICAL ERROR: at least one atom was defined in both fragments!\n\n")
329 | 		sys.exit("CRITICAL ERROR: at least one atom was defined in both fragments!")	
330 | 	return
331 | 
332 | def check_geo(structures, settings):
333 | 	natoms = len(structures.xyz[0])
334 | 	i = 0
335 | 	while i < len(settings.geo_dist): #check if every bond has two elements
336 | 		if len(settings.geo_dist[i]) != 2:
337 | 			del settings.geo_dist[i]
338 | 			line_prepender(settings.logfile, "WARNING: defined bond did not have two elements! - This input is ignored\n\n")
339 | 		else:
340 | 			i = i+1
341 | 	i = 0
342 | 	while i < len(settings.geo_dist): #check if it's inly integers
343 | 		if isInt(settings.geo_dist[i][0]) and isInt(settings.geo_dist[i][1]):
344 | 			i = i+1
345 | 		else:
346 | 			del settings.geo_dist[i]
347 | 			line_prepender(settings.logfile, "WARNING: defined bond did have non-integer input! - This input is ignored\n\n")	
348 | 	if len(settings.geo_dist) == 0:
349 | 		settings.geo_dist.append(getBonds(structures))
350 | 		line_prepender(settings.logfile, "WARNING: no correct definitions of bonds found! - automatic detection of forming bonds\n\n")	
351 | 	i = 0
352 | 	while i < len(settings.geo_dist): #check if it's out of range
353 | 		if int(settings.geo_dist[i][0]) > natoms or int(settings.geo_dist[i][1]) > natoms or int(settings.geo_dist[i][0]) < 1 or int(settings.geo_dist[i][1]) < 1 :
354 | 			del settings.geo_dist[i]
355 | 			line_prepender(settings.logfile, "WARNING: bond definition: atom number out of range! - This input is ignored\n\n")
356 | 		else:
357 | 			i = i+1		
358 | 	# angles	
359 | 	i=0
360 | 	while i < len(settings.geo_ang): 
361 | 		if len(settings.geo_ang[i]) != 3:
362 | 			del settings.geo_ang[i]
363 | 			line_prepender(settings.logfile, "WARNING: defined angle did not have three elements! - This input is ignored\n\n")
364 | 		else:
365 | 			i = i+1
366 | 	i = 0
367 | 	while i < len(settings.geo_ang): #check if it's inly integers
368 | 		if isInt(settings.geo_ang[i][0]) and isInt(settings.geo_ang[i][1]) and isInt(settings.geo_ang[i][2]):
369 | 			i = i+1
370 | 		else:
371 | 			del settings.geo_ang[i]
372 | 			line_prepender(settings.logfile, "WARNING: defined angle did have non-integer input! - This input is ignored\n\n")
373 | 	i=0
374 | 	while i < len(settings.geo_ang): #check if it's out of range
375 | 		if int(settings.geo_ang[i][0]) > natoms or int(settings.geo_ang[i][1]) > natoms or int(settings.geo_ang[i][2]) > natoms or int(settings.geo_ang[i][0]) < 1 or int(settings.geo_ang[i][1]) < 1 or int(settings.geo_ang[i][2]) < 1:
376 | 			del settings.geo_ang[i]
377 | 			line_prepender(settings.logfile, "WARNING: angle definition: atom number out of range! - This input is ignored\n\n")
378 | 		else:
379 | 			i = i+1					
380 | 	# dihedral angles			
381 | 	i=0
382 | 	while i < len(settings.geo_dih): 
383 | 		if len(settings.geo_dih[i]) != 4:
384 | 			del settings.geo_dih[i]
385 | 			line_prepender(settings.logfile, "WARNING: defined dihedral angle did not have three elements! - This input is ignored\n\n")
386 | 		else:
387 | 			i = i+1
388 | 	i = 0
389 | 	while i < len(settings.geo_dih): #check if it's inly integers
390 | 		if isInt(settings.geo_dih[i][0]) and isInt(settings.geo_dih[i][1]) and isInt(settings.geo_dih[i][2]) and isInt(settings.geo_dih[i][3]):
391 | 			i = i+1
392 | 		else:
393 | 			del settings.geo_dih[i]
394 | 			line_prepender(settings.logfile, "WARNING: defined dihedral angle did have non-integer input! - This input is ignored\n\n")	
395 | 	i=0
396 | 	while i < len(settings.geo_dih): #check if it's out of range
397 | 		if int(settings.geo_dih[i][0]) > natoms or int(settings.geo_dih[i][1]) > natoms or int(settings.geo_dih[i][2]) > natoms or int(settings.geo_dih[i][3]) > natoms or int(settings.geo_dih[i][0]) < 1 or int(settings.geo_dih[i][1]) < 1 or int(settings.geo_dih[i][2]) < 1 or int(settings.geo_dih[i][3]) < 1:
398 | 			del settings.geo_dih[i]
399 | 			line_prepender(settings.logfile, "WARNING: dihedral angle definition: atom number out of range! - This input is ignored\n\n")
400 | 		else:
401 | 			i = i+1	
402 | 	return settings
403 | 
404 | def structures_from_G(file, settings):
405 | 	file_object = open(file, 'r')
406 | 	input = (line for line in file_object)
407 | 	type = "single"
408 | 	for line in input:
409 | 		if "IRC-IRC-IRC" in line:
410 | 			type = "IRC"
411 | 			break
412 | 		elif "Scan" in line:
413 | 			type = "scan"
414 | 			break
415 | 	# Single
416 | 	if type == "single":
417 | 		structures = get_single(file)
418 | 	elif type == "scan":
419 | 		structures = get_scan(file)
420 | 	else:
421 | 		structures = get_irc(file)
422 | 	# Get Charge and multiplicity
423 | 	file_object.seek(0)
424 | 	input_file = (line for line in file_object) # make generator	
425 | 	for line in input_file:
426 | 		if "Charge =" in line:
427 | 			settings.charge = line.split()[2]
428 | 			settings.multi = line.split()[5]
429 | 			break
430 | 	return structures, settings
431 | 
432 | 
433 | def structures_from_xyz(file):
434 | 	structures.atoms = []
435 | 	structures.xyz = []
436 | 	structures.title = []
437 | 	file_object = open(file, 'r')
438 | 	input = (line for line in file_object) #make generator
439 | 	#search for number of atoms
440 | 	for line in input:
441 | 		if isInt(line.strip()):
442 | 			n_atoms=int(line)
443 | 			break	
444 | 	else: #exits if no line with number of atoms was found
445 | 		sys.exit('Error:\t\tNo xyz coordinates found in file: ' + file)
446 | 	#skip one line
447 | 	structures.title.append(next(input).strip())
448 | 	# now there should be n_atoms lines of coordinates
449 | 	for i in range(n_atoms):
450 | 		l=next(input).split()
451 | 		if l[0] in periodic_table:
452 | 			structures.atoms.append(l[0]) #get atom symbol and append to atom list
453 | 		else:
454 | 			sys.exit('Error:\t\tsomething is wrong with the first structure in file: '+file)
455 | 		coords=[float(x) for x in l[1:]] #convert line to list of floats
456 | 		coords=np.array([coords]) #create array with coords
457 | 		try: #try append, doesn't work if XYZ doesn't exist yet
458 | 			XYZ=np.concatenate((XYZ,coords), axis=0)
459 | 		except NameError:
460 | 			XYZ=coords
461 | 	structures.xyz.append(XYZ) #append first structure to structures list
462 | 	del XYZ #get rid of that for the next structure
463 | 	#now search for more structures
464 | 	for line in input:
465 | 		#start extracting if atom number line is found
466 | 		try:
467 | 			if int(line.strip()) == n_atoms:
468 | 				#read one line to skip title
469 | 				structures.title.append(next(input).strip())
470 | 				# now there should be n_atoms lines of coordinates
471 | 				for i in range(n_atoms):
472 | 					l=next(input).split()
473 | 					coords=[float(x) for x in l[1:]]
474 | 					coords=np.array([coords])
475 | 					try: #try append, doesn't work if XYZ doesn't exist yet
476 | 						XYZ=np.concatenate((XYZ,coords), axis=0)
477 | 					except NameError:
478 | 						XYZ=coords
479 | 				structures.xyz.append(XYZ)
480 | 				del XYZ
481 | 		except ValueError:
482 | 			pass			
483 | 	return structures
484 | 
485 | def parse_in(input_filename, analysisonly):
486 | 	class settings:
487 | 		pass
488 | 	settings.void = []
489 | 	input_object = open(input_filename, 'r')
490 | #------------structures filename
491 | 	input_object.seek(0)
492 | 	input_file = (line for line in input_object) # make generator	
493 | 	for line in input_file:
494 | 		if len(line.split()) > 1: # This is needed otherwise it produces out of range error when contents of lines too short
495 | 			if line.split()[0].upper()+line.split()[1].upper() == "INPUTFILE":
496 | 				settings.ircfile = line.split()[-1]
497 | 				break
498 | 	else: #information on input file type is missing
499 | 		sys.exit("No information on the structure input file could be found!")	
500 | #------------GET JOBNAME
501 | 	input_object.seek(0)
502 | 	input_file = (line for line in input_object) # make generator
503 | 	settings.name = settings.ircfile
504 | 	for line in input_file:
505 | 		if len(line.split()) > 3:
506 | 			if line.split()[0].upper()+line.split()[1].upper() == "JOBNAME":
507 | 				try:
508 | 					settings.name = line.split()[3]
509 | 					break
510 | 				except IndexError:
511 | 					break
512 | #------------Set Logfilename
513 | 	if analysisonly:
514 | 		settings.logfile= settings.name + "_analysisonly_log.txt"
515 | 		f= open(settings.logfile, 'w')
516 | 		f.close()
517 | 	else:	
518 | 		settings.logfile= settings.name + "_log.txt"
519 | 		f= open(settings.logfile, 'w')
520 | 		f.close()
521 | #------------Parse Structures
522 | 	#find out what kind of file we are dealing with. See if it's a gaussian file
523 | 	try:
524 | 		structure_object = open(settings.ircfile, 'r')
525 | 	except FileNotFoundError:
526 | 		line_prepender(settings.logfile, "CRITICAL ERROR: structure input file {0} not found!\n\n".format(settings.ircfile))
527 | 		sys.exit("CRITICAL ERROR: structure input file {0} not found!".format(settings.ircfile))		
528 | 	structure_object.seek(0)
529 | 	structure_file = (line for line in structure_object) # make generator
530 | 	settings.filetype="X"
531 | 	for line in structure_file:
532 | 		if "Gaussian, Inc." in line:
533 | 			settings.filetype="G"
534 | 			break
535 | 	if settings.filetype == "X":
536 | 		try:
537 | 			structures = structures_from_xyz(settings.ircfile)
538 | 		except StopIteration:
539 | 			line_prepender(settings.logfile, "CRITICAL ERROR: problem reading structures from file {}!\n\n".format(settings.ircfile))
540 | 			sys.exit("CRITICAL ERROR: problem reading structures from file {}!".format(settings.ircfile))	
541 | 	else:
542 | 		structures, settings = structures_from_G(settings.ircfile, settings)
543 | #------------Get Information on Fragments/determine automatically
544 | 	#CHARGE
545 | 	input_object.seek(0)
546 | 	input_file = (line for line in input_object) # make generator
547 | 	#check if they were extracted from file, if not set to 0 and then try to read
548 | 	try:
549 | 		settings.charge
550 | 	except AttributeError:
551 | 		settings.charge = 0
552 | 	for line in input_file:
553 | 		if len(line.split()) > 0:
554 | 			if line.split()[0].upper() == "CHARGE":
555 | 				try:
556 | 					settings.charge = int(line.split()[-1])
557 | 					break
558 | 				except IndexError:
559 | 					break
560 | 	#MULTIPLICITY
561 | 	try:
562 | 		settings.charge
563 | 	except AttributeError:
564 | 		settings.multi = 1
565 | 	for line in input_file:
566 | 		if len(line.split()) > 0:
567 | 			if line.split()[0].upper() == "MULTIPLICITY":
568 | 				try:
569 | 					settings.multi = int(line.split()[-1])
570 | 					break
571 | 				except (IndexError,ValueError) as error:
572 | 					break
573 | 	#FRAGMENT1
574 | 	input_object.seek(0)
575 | 	input_file = (line for line in input_object) # make generator	
576 | 	settings.frag1name = "fragment1"
577 | 	for line in input_file:
578 | 		if len(line.split()) > 1:
579 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT1NAME":
580 | 				try:
581 | 					settings.frag1name = line.split()[-1]
582 | 					break
583 | 				except (IndexError,ValueError) as error:
584 | 					break
585 | 	input_object.seek(0)
586 | 	input_file = (line for line in input_object) # make generator	
587 | 	settings.frag1charge= 0
588 | 	for line in input_file:
589 | 		if len(line.split()) > 1:
590 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT1CHARGE":
591 | 				try:
592 | 					settings.frag1charge = int(line.split()[-1])
593 | 					break
594 | 				except (IndexError,ValueError) as error:
595 | 					break						
596 | 	input_object.seek(0)
597 | 	input_file = (line for line in input_object) # make generator	
598 | 	settings.frag1multi = 1
599 | 	for line in input_file:
600 | 		if len(line.split()) > 1:
601 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT1MULTIPLICITY":
602 | 				try:
603 | 					settings.frag1multi = int(line.split()[-1])
604 | 					break
605 | 				except (IndexError,ValueError) as error:
606 | 					break
607 | 	input_object.seek(0)
608 | 	input_file = (line for line in input_object) # make generator	
609 | 	settings.frag1energy = 0
610 | 	for line in input_file:
611 | 		if len(line.split()) > 1:
612 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT1ENERGY":
613 | 				try:
614 | 					settings.frag1energy = float(line.split()[-1])
615 | 					break
616 | 				except (IndexError,ValueError) as error:
617 | 					break				
618 | 	input_object.seek(0)
619 | 	input_file = (line for line in input_object) # make generator	
620 | 	settings.frag1atoms = "auto"
621 | 	for line in input_file:
622 | 		if len(re.split("\s+|,",line)) > 2:
623 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT1ATOMS":
624 | 				try:
625 | 					settings.frag1atoms =re.split("\s+|,",line)[3:]
626 | 					break
627 | 				except IndexError:
628 | 					break
629 | 	settings.frag1atoms = [int(x) for x in settings.frag1atoms if isInt(x)]
630 | 	if len(settings.frag1atoms) == 0:
631 | 		settings.frag1atoms  = "auto"
632 | 	#FRAGMENT2
633 | 	input_object.seek(0)
634 | 	input_file = (line for line in input_object) # make generator	
635 | 	settings.frag2name = "fragment2"
636 | 	for line in input_file:
637 | 		if len(line.split()) > 1:
638 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT2NAME":
639 | 				try:
640 | 					settings.frag2name = line.split()[-1]
641 | 					break
642 | 				except IndexError:
643 | 					break
644 | 	input_object.seek(0)
645 | 	input_file = (line for line in input_object) # make generator	
646 | 	settings.frag2charge= 0
647 | 	for line in input_file:
648 | 		if len(line.split()) > 1:
649 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT2CHARGE":
650 | 				try:
651 | 					settings.frag2charge = int(line.split()[-1])
652 | 					break
653 | 				except (IndexError,ValueError) as error:
654 | 					break						
655 | 	input_object.seek(0)
656 | 	input_file = (line for line in input_object) # make generator	
657 | 	settings.frag2multi = 1
658 | 	for line in input_file:
659 | 		if len(line.split()) > 1:
660 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT2MULTIPLICITY":
661 | 				try:
662 | 					settings.frag2multi = int(line.split()[-1])
663 | 					break
664 | 				except (IndexError,ValueError) as error:
665 | 					break
666 | 	input_object.seek(0)
667 | 	input_file = (line for line in input_object) # make generator	
668 | 	settings.frag2energy = 0
669 | 	for line in input_file:
670 | 		if len(line.split()) > 1:
671 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT2ENERGY":
672 | 				try:
673 | 					settings.frag2energy = float(line.split()[-1])
674 | 					break
675 | 				except (IndexError,ValueError) as error:
676 | 					break				
677 | 	input_object.seek(0)
678 | 	input_file = (line for line in input_object) # make generator	
679 | 	settings.frag2atoms = "auto"
680 | 	for line in input_file:
681 | 		if len(re.split("\s+|,",line)) > 2:
682 | 			if line.split()[0].upper()+line.split()[1].upper()  == "FRAGMENT2ATOMS":
683 | 				try:
684 | 					settings.frag2atoms =re.split("\s+|,",line)[3:]
685 | 					break
686 | 				except IndexError:
687 | 					break
688 | 	settings.frag2atoms = [int(x) for x in settings.frag2atoms if isInt(x)]
689 | 	if len(settings.frag2atoms) == 0:
690 | 		settings.frag2atoms  = "auto"
691 | 	#Determine fragment atoms if not set
692 | 	if settings.frag1atoms == "auto" and settings.frag2atoms == "auto":
693 | 		settings = auto_frag(structures,settings)
694 | 	elif settings.frag1atoms == "auto":	
695 | 		settings.frag1atoms = list(range(1,len(structures.atoms)+1))
696 | 		settings.frag1atoms = [item for item in settings.frag1atoms if item not in set(settings.frag2atoms)]
697 | 	elif settings.frag2atoms == "auto":	
698 | 		settings.frag2atoms = list(range(1,len(structures.atoms)+1))
699 | 		settings.frag2atoms = [item for item in settings.frag2atoms if item not in set(settings.frag1atoms)]	
700 | 	check_fragments(settings,structures) #checks if atom lists are coherent
701 | 	#get fragment xyz
702 | 	structures.xyz_1 = []
703 | 	settings.frag1atoms[:] = [x -1 for x in settings.frag1atoms]
704 | 	for x in range(0, len(structures.xyz)):
705 | 		structures.xyz_1 = structures.xyz_1 + [structures.xyz[x][settings.frag1atoms]]
706 | 	structures.xyz_2 = []
707 | 	settings.frag2atoms[:] = [x -1 for x in settings.frag2atoms]
708 | 	for x in range(0, len(structures.xyz)):
709 | 		structures.xyz_2 = structures.xyz_2 + [structures.xyz[x][settings.frag2atoms]]
710 | 	structures.frag1atoms = []
711 | 	for element in settings.frag1atoms:
712 | 		structures.frag1atoms = structures.frag1atoms + [structures.atoms[element]]
713 | 	structures.frag2atoms = []
714 | 	for element in settings.frag2atoms:
715 | 		structures.frag2atoms = structures.frag2atoms + [structures.atoms[element]]
716 | 	
717 | #------------Get analysis information
718 | 	input_object.seek(0)
719 | 	input_file = (line for line in input_object) # make generator	
720 | 	settings.analysis = True
721 | 	for line in input_file:
722 | 		if len(line.split()) > 0:
723 | 			if line.split()[0].upper()  == "ANALYSIS":
724 | 				try:
725 | 					if line.split()[2].upper()  == "YES":
726 | 						settings.analysis = True
727 | 						break
728 | 					elif line.split()[2].upper()  == "NO":
729 | 						settings.analysis = False
730 | 						break
731 | 				except IndexError:
732 | 					break	
733 | 	if settings.analysis:
734 | 		settings.geo_dist = [[]]
735 | 		with open(input_filename) as input_file:
736 | 			file_contents = input_file.read() # reset generator
737 | 
738 | 		for match in re.finditer(r'<distances>(.*?)</distances>', file_contents, re.IGNORECASE|re.DOTALL):
739 | 			settings.geo_dist = match.group(1).strip().split('\n')
740 | 			settings.geo_dist = [element.split() for element in settings.geo_dist]
741 | 		settings.geo_ang = []
742 | 		with open(input_filename) as input_file:
743 | 			file_contents = input_file.read() # reset generator
744 | 		for match in re.finditer(r'<angles>(.*?)</angles>', file_contents, re.IGNORECASE|re.DOTALL):
745 | 			settings.geo_ang = match.group(1).strip().split('\n')
746 | 			settings.geo_ang = [element.split() for element in settings.geo_ang]
747 | 		settings.geo_dih = []
748 | 		with open(input_filename) as input_file:
749 | 			file_contents = input_file.read() # reset generator
750 | 		for match in re.finditer(r'<dihedral>(.*?)</dihedral>', file_contents, re.IGNORECASE|re.DOTALL):
751 | 			settings.geo_dih = match.group(1).strip().split('\n')
752 | 			settings.geo_dih = [element.split() for element in settings.geo_dih]
753 | 
754 | 		#Automatic determination of formed, broken bonds if requested
755 | 		if settings.geo_dist[0] == []:
756 | 			settings.geo_dist = [["auto"]]
757 | 		for element in settings.geo_dist:
758 | 			if "auto" in element:
759 | 				auto_distances = getBonds(structures)
760 | 				#replace auto in that list against the new distances
761 | 				settings.geo_dist = [ x for x in settings.geo_dist if "auto" not in x]
762 | 				settings.geo_dist.append(auto_distances[0])
763 | 				settings.geo_dist.append(auto_distances[1])
764 | 				break
765 | 		#eliminate problems with empty lists
766 | 		if len(settings.geo_ang)>0:
767 | 			if settings.geo_ang[0] == []:
768 | 				settings.geo_ang = []
769 | 		if len(settings.geo_dih)>0:
770 | 			if settings.geo_dih[0] == "":
771 | 				settings.geo_dih = []
772 | 		#eliminate problems with wrong inputs
773 | 		settings = check_geo(structures, settings)
774 | #------------Get Further Setting
775 | 	#keep xyz
776 | 	input_object.seek(0)
777 | 	input_file = (line for line in input_object) # make generator	
778 | 	settings.keepxyz = True
779 | 	for line in input_file:
780 | 		if len(line.split()) > 2:
781 | 			if line.split()[0].upper()+line.split()[1].upper()  == "KEEPXYZ":
782 | 				try: #in case it's blank
783 | 					line.split()[3]
784 | 				except IndexError:
785 | 					settings.keepxyz = True
786 | 					break
787 | 				if line.split()[3].upper() == "YES":
788 | 					settings.keepxyz = True
789 | 				elif line.split()[3].upper() == "NO":
790 | 					settings.keepxyz = False
791 | 				else:
792 | 					settings.keepxyz = True
793 | 				break		
794 | 	#keep input files
795 | 	input_object.seek(0)
796 | 	input_file = (line for line in input_object) # make generator	
797 | 	settings.keepinput = True
798 | 	for line in input_file:
799 | 		if len(line.split()) > 3:
800 | 			if line.split()[0].upper()+line.split()[1].upper()+line.split()[2].upper()   == "KEEPINPUTFILES":
801 | 				try: #in case it's blank
802 | 					line.split()[4]
803 | 				except IndexError:
804 | 					settings.keepinput  = True
805 | 					break
806 | 				if line.split()[4].upper() == "YES":
807 | 					settings.keepinput = True
808 | 				elif line.split()[4].upper() == "NO":
809 | 					settings.keepinput  = False
810 | 				else:
811 | 					settings.keepinput  = True
812 | 				break				
813 | 	#keep output files
814 | 	input_object.seek(0)
815 | 	input_file = (line for line in input_object) # make generator	
816 | 	settings.keepoutput = True
817 | 	for line in input_file:
818 | 		if len(line.split()) > 3:
819 | 			if line.split()[0].upper()+line.split()[1].upper()+line.split()[2].upper()   == "KEEPOUTPUTFILES":
820 | 				try: #in case it's blank
821 | 					line.split()[4]
822 | 				except IndexError:
823 | 					settings.keepoutput  = True
824 | 					break
825 | 				if line.split()[4].upper() == "YES":
826 | 					settings.keepoutput = True
827 | 				elif line.split()[4].upper() == "NO":
828 | 					settings.keepoutput  = False
829 | 				else:
830 | 					settings.keepoutput  = True
831 | 				break		
832 | 	#keep log file
833 | 	input_object.seek(0)
834 | 	input_file = (line for line in input_object) # make generator	
835 | 	settings.keeplog = True
836 | 	for line in input_file:
837 | 		if len(line.split()) > 3:
838 | 			if line.split()[0].upper()+line.split()[1].upper()+line.split()[2].upper()   == "KEEPLOGFILE":
839 | 				try: #in case it's blank
840 | 					line.split()[4]
841 | 				except IndexError:
842 | 					settings.keeplog   = True
843 | 					break
844 | 				if line.split()[4].upper() == "YES":
845 | 					settings.keeplog  = True
846 | 				elif line.split()[4].upper() == "NO":
847 | 					settings.keeplog   = False
848 | 				else:
849 | 					settings.keeplog   = True
850 | 				break				
851 | 	#reorder
852 | 	input_object.seek(0)
853 | 	input_file = (line for line in input_object) # make generator	
854 | 	settings.reorder = False
855 | 	for line in input_file:
856 | 		if len(line.split()) > 2:
857 | 			if line.split()[0].upper()   == "REORDER":
858 | 				try: #in case it's blank
859 | 					line.split()[2]
860 | 				except IndexError:
861 | 					settings.reorder   = False
862 | 					break
863 | 				if line.split()[2].upper() == "YES":
864 | 					settings.reorder  = True
865 | 				elif line.split()[2].upper() == "NO":
866 | 					settings.reorder   = False
867 | 				else:
868 | 					settings.reorder   = False
869 | 				break					
870 | 	#reduce structures
871 | 	input_object.seek(0)
872 | 	input_file = (line for line in input_object) # make generator	
873 | 	settings.reduce = False
874 | 	for line in input_file:
875 | 		if len(line.split()) > 3:
876 | 			if line.split()[0].upper()+line.split()[1].upper()   == "REDUCESTRUCTURES":
877 | 				try: #in case it's blank
878 | 					line.split()[3]
879 | 				except IndexError:
880 | 					settings.reduce   = False
881 | 					break
882 | 				if line.split()[3].upper() == "YES":
883 | 					settings.reduce  = True
884 | 				elif line.split()[3].upper() == "NO":
885 | 					settings.reduce   = False
886 | 				else:
887 | 					settings.reduce   = False
888 | 				break	
889 | 	if settings.reduce:
890 | 		input_object.seek(0)
891 | 		input_file = (line for line in input_object) # make generator	
892 | 		settings.reduce_tresh = 0.2
893 | 		for line in input_file:
894 | 			if len(line.split()) > 3:
895 | 				if line.split()[0].upper()+line.split()[1].upper()   == "RMSDTRESHOLD":
896 | 					try: #in case it's blank
897 | 						line.split()[3]
898 | 					except IndexError:
899 | 						settings.reduce_tresh    = 0.2
900 | 						break
901 | 					try:
902 | 						settings.reduce_tresh = float(line.split()[3].upper() )
903 | 					except ValueError:
904 | 						settings.reduce_tresh = 0.2
905 | 					break
906 | 	#prepare only
907 | 	input_object.seek(0)
908 | 	input_file = (line for line in input_object) # make generator	
909 | 	settings.prepareonly = False
910 | 	for line in input_file:
911 | 		if len(line.split()) > 3:
912 | 			if line.split()[0].upper()+line.split()[1].upper()   == "PREPAREONLY":
913 | 				try: #in case it's blank
914 | 					line.split()[3]
915 | 				except IndexError:
916 | 					settings.prepareonly  = False
917 | 					break
918 | 				if line.split()[3].upper() == "YES":
919 | 					settings.prepareonly  = True
920 | 				elif line.split()[3].upper() == "NO":
921 | 					settings.prepareonly   = False
922 | 				else:
923 | 					settings.prepareonly   = False
924 | 				break								
925 | #------------Get Settings for running the application
926 | 	input_object.seek(0)
927 | 	input_file = (line for line in input_object) # reset generator
928 | 	settings.input_file_extension = "com" #setting default
929 | 	for line in input_file:
930 | 		if len(line.split()) > 2: # This is needed otherwise it produces out of range error when contents of lines too short
931 | 			if line.split()[0].upper()+line.split()[1].upper()+line.split()[2].upper() == "INPUTFILEEXTENSION":
932 | 				try: #in case it's blank
933 | 					settings.input_file_extension = line.split()[4]
934 | 					break
935 | 				except IndexError:
936 | 					break
937 | 	input_object.seek(0)
938 | 	input_file = (line for line in input_object) # reset generator
939 | 	settings.output_file_extension = "log" #setting default
940 | 	for line in input_file:
941 | 		if len(line.split()) > 2: # This is needed otherwise it produces out of range error when contents of lines too short
942 | 			if line.split()[0].upper()+line.split()[1].upper()+line.split()[2].upper() == "OUTPUTFILEEXTENSION":
943 | 				try: #in case it's blank
944 | 					settings.output_file_extension = line.split()[4]
945 | 					break
946 | 				except IndexError:
947 | 					break
948 | 	#get input layout
949 | 	settings.inputlayout = ""
950 | 	with open(input_filename) as input_file:
951 | 		file_contents = input_file.read() # reset generator
952 | 	for match in re.finditer(r'<layout>(.*?)</layout>', file_contents, re.IGNORECASE|re.DOTALL):
953 | 		settings.inputlayout = match.group(1).strip()+'\n\n'
954 | 	#get run settings
955 | 	settings.submit_setting = ""
956 | 	with open(input_filename) as input_file:
957 | 		file_contents = input_file.read() # reset generator
958 | 	for match in re.finditer(r'<run_job>(.*?)</run_job>', file_contents, re.IGNORECASE|re.DOTALL):
959 | 		settings.submit_setting  = match.group(1).strip()
960 | 
961 | 	return settings, structures


--------------------------------------------------------------------------------
/DIAS_reduce_reorder.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | 
  4 | # rotates structure, takes strcuture A and rotation R, returns rotated structure B
  5 | def rotate(A, rot):
  6 | 	B=np.dot(A, rot)
  7 | 	return B
  8 | 
  9 | # does partial procrustes analysis, takes structure A and reference R and returns RMSD
 10 | def procrustes(A, R):
 11 | 	rot=find_rotation(A, R)
 12 | 	#calculates rotated structure B
 13 | 	B=rotate(A, rot)
 14 | 	RMSD=calc_RMSD(B,R)
 15 | 	return RMSD
 16 | 	
 17 | #function that finds centroid from numpy array, returns numpy array with position of centroid
 18 | def find_centroid(P):
 19 | 	C=P.mean(axis=0)
 20 | 	return C
 21 | 	
 22 | #function that calculates RMSD between two xyz structures, takes structures as (m*3) numpy array
 23 | def calc_RMSD(A, B):
 24 | 	RMSD=0.0
 25 | 	for i in range(3):
 26 | 		for x in range(len(A)):
 27 | 			RMSD += (A[x,i]-B[x,i])**2
 28 | 	return np.sqrt(RMSD/len(A))
 29 | 
 30 | 	#takes structure A and reference structure R, calculates optimal rotation (R) using SVD for alignment based on Kabsch algorithm!
 31 | def find_rotation(A, R):
 32 | 	# Computation of the covariance matrix
 33 | 	C = np.dot(np.transpose(A), R)
 34 | 	
 35 | 	u, s, vh = np.linalg.svd(C)
 36 | 	
 37 | 	#assure right handed coordinate system)
 38 | 	if (np.linalg.det(u) * np.linalg.det(vh)) < 0.0:
 39 | 		s[-1] = -s[-1]
 40 | 		u[:, -1] = -u[:, -1]
 41 | 	
 42 | 	#calculate rotation R
 43 | 	rot = np.dot(u, vh)
 44 | 	return rot
 45 | 
 46 | def center_xyz(structure, atoms, mode):
 47 | 	
 48 | 	#first calculate translation vector V by calculating center and reversing the vector
 49 | 	if mode == 'c':
 50 | 		V=-1*find_centroid(structure)
 51 | 	elif mode == 'm':
 52 | 		V=-1*find_centerofmass(structure, atoms)
 53 | 	else:
 54 | 		sys.exit('something went wrong')
 55 | 	
 56 | 	#now add vector to each atom
 57 | 	structure=structure+V
 58 | 	return structure
 59 | 
 60 | def reorder(structures):
 61 | 	if len(structures.xyz) == 1: # only one structure
 62 | 		return structures
 63 | 	RMSD = []
 64 | 	for i in range(0, len(structures.xyz)-1):
 65 | 		RMSD = RMSD + [procrustes(structures.xyz[i],structures.xyz[i+1])]
 66 | 	if np.amax(RMSD) > 3* np.mean(RMSD):
 67 | 		cut = RMSD.index(np.amax(RMSD))
 68 | 		rmsd1=procrustes(structures.xyz[0], structures.xyz[cut+1])
 69 | 		rmsd2=procrustes(structures.xyz[cut], structures.xyz[cut+1])
 70 | 		rmsd3=procrustes(structures.xyz[0], structures.xyz[-1])
 71 | 		rmsd4=procrustes(structures.xyz[cut], structures.xyz[-1])
 72 | 		
 73 | 		if rmsd1 < rmsd2 and rmsd1 < rmsd3 and rmsd1 < rmsd4:
 74 | 			structures.xyz = structures.xyz[cut::-1]+structures.xyz[cut+1:]
 75 | 			structures.xyz_1 = structures.xyz_1[cut::-1]+structures.xyz_1[cut+1:]
 76 | 			structures.xyz_2 = structures.xyz_2[cut::-1]+structures.xyz_2[cut+1:]
 77 | 			structures.title = structures.title[cut::-1]+structures.title[cut+1:]
 78 | 		elif rmsd2 < rmsd1 and rmsd2 < rmsd3 and rmsd2 < rmsd4:
 79 | 			pass		
 80 | 		elif rmsd3 < rmsd1 and rmsd3 < rmsd2 and rmsd3 < rmsd4:
 81 | 			structures.xyz = structures.xyz[cut::-1]+structures.xyz[:cut:-1]
 82 | 			structures.xyz_1 = structures.xyz_1[cut::-1]+structures.xyz_1[:cut:-1]
 83 | 			structures.xyz_2 = structures.xyz_2[cut::-1]+structures.xyz_2[:cut:-1]
 84 | 			structures.title = structures.title[cut::-1]+structures.title[:cut:-1]
 85 | 		elif rmsd4 < rmsd1 and rmsd4 < rmsd2 and rmsd4 < rmsd3:
 86 | 			structures.xyz = structures.xyz[:cut+1]+structures.xyz[:cut:-1]
 87 | 			structures.xyz_1 = structures.xyz_1[:cut+1]+structures.xyz_1[:cut:-1]
 88 | 			structures.xyz_2 = structures.xyz_2[:cut+1]+structures.xyz_2[:cut:-1]
 89 | 			structures.title = structures.title[:cut+1]+structures.title[:cut:-1]
 90 | 		else:
 91 | 			pass
 92 | 	return structures
 93 | 	
 94 | def reduce(structures, settings):
 95 | 	if len(structures.xyz) == 1: # only one structure
 96 | 		return structures
 97 | 	i=0
 98 | 	count = 0
 99 | 	while i < len(structures.xyz)-1:
100 | 		if procrustes(structures.xyz[i], structures.xyz[i+1]) >= settings.reduce_tresh:
101 | 			i = i+1
102 | 		else:
103 | 			count = count +1
104 | 			del structures.xyz[i+1]
105 | 			del structures.title[i+1]
106 | 			del structures.xyz_1[i+1]
107 | 			del structures.xyz_2[i+1]
108 | 	log = open(settings.logfile, 'a')
109 | 	log.write("Reduced structures by {0} ({1:.0f}%)!\n".format(count, float(count)/(len(structures.xyz)+count)*100))
110 | 	log.close()
111 | 	return structures


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Dennis Svatunek
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # autoDIAS
2 | 
3 | ## a python tool for an automated Distortion/Interaction Activation Strain Analysis
4 | 
5 | The Distortion/Interaction Activation Strain (DIAS) analysis is a powerful tool for the investigation of energy barriers. However, setup and data analysis of such a calculation can be cumbersome and requires lengthy intervention of the user. We present autoDIAS, a python tool for the automated setup, performance, and data extraction of the DIAS analysis, including automated detection of fragments and relevant geometric parameters. 
6 | 
7 | 


--------------------------------------------------------------------------------
/autoDIAS.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python 
  2 | 
  3 | banner="""
  4 | *****************************************************************************************
  5 |                                    autoDIAS
  6 | 								Dennis Svatunek
  7 | 								    UCLA
  8 | 							d.svatunek@chem.ucla.com
  9 | 							    @clickchemist
 10 | 							
 11 | 						Download the newest version from:
 12 | 					   https://github.com/dsvatunek/autoDIAS
 13 | 					   
 14 | 					 Please cite. DOI: 10.1002/jcc.26023
 15 | *****************************************************************************************				   
 16 | 
 17 | Python wrapper for several computational chemistry software packages to automatically 
 18 | perform a Distortion/Interaction Activation Strain analysis calculations.
 19 | 
 20 | *****************************************************************************************
 21 | """
 22 | 
 23 | from DIAS_inputparser import *
 24 | from DIAS_analysis import *
 25 | from DIAS_reduce_reorder import *
 26 | import argparse, sys, time, subprocess, os, datetime, copy
 27 | 
 28 | __version__= '1.0.0'
 29 | __author__= 'Dennis Svatunek'
 30 | __email__ = "d.svatunek@chem.ucla.edu"
 31 | 
 32 | #Python2 compatibility
 33 | try:
 34 | 	range = xrange
 35 | except NameError:
 36 | 	pass
 37 | 
 38 | def save_xyz(A, atoms, title, filename):
 39 | 	file = open(filename + '.xyz', 'w')
 40 | 	file.write(str(len(atoms))+ "\n"+ title + "\n")
 41 | 	for i in range(len(A)):
 42 | 		file.write("{0:2s} {1:15.12f} {2:15.12f} {3:15.12f}\n".format(atoms[i], A[i, 0], A[i, 1], A[i, 2]))
 43 | 	file.close()
 44 | 	return
 45 | 
 46 | def save_input(A, atoms, title, filename, jobname, g09structure, charge, multiplicity):
 47 | 	file = open(filename, 'w')
 48 | 	#make a string with coordinates
 49 | 	coordinates = ""
 50 | 	for i in range(len(A)):
 51 | 		coordinates = coordinates +("{0:2s} {1:15.12f} {2:15.12f} {3:15.12f}\n".format(atoms[i], A[i, 0], A[i, 1], A[i, 2]))
 52 | 		#use G09 input string ans substitute the correct things
 53 | 	g09structure = g09structure.replace("$multiplicity", str(multiplicity))
 54 | 	g09structure = g09structure.replace("$charge", str(charge))
 55 | 	g09structure = g09structure.replace("$filename", jobname)
 56 | 	g09structure = g09structure.replace("$coordinates", coordinates.strip())
 57 | 	file.write(g09structure)
 58 | 	file.close()
 59 | 	return
 60 | 
 61 | def molecular_formula(atoms):
 62 | 	alphabetic_periodic_table = sorted(copy.deepcopy(periodic_table))
 63 | 	formula = ""
 64 | 	count = []
 65 | 	for element in alphabetic_periodic_table:
 66 | 		count =  count + [atoms.count(element)]
 67 | 	
 68 | 	if count[alphabetic_periodic_table.index("C")] > 1:
 69 | 		formula = formula + 'C' + str(count[alphabetic_periodic_table.index("C")])
 70 | 		count[alphabetic_periodic_table.index("C")] = 0
 71 | 	elif count[alphabetic_periodic_table.index("C")] > 0:
 72 | 		formula = formula + 'C'
 73 | 		count[alphabetic_periodic_table.index("C")] = 0
 74 | 	
 75 | 	if count[alphabetic_periodic_table.index("H")] > 1:
 76 | 		formula = formula + 'H' + str(count[alphabetic_periodic_table.index("H")])
 77 | 		count[alphabetic_periodic_table.index("H")] = 0	
 78 | 	elif count[alphabetic_periodic_table.index("H")] > 0:
 79 | 		formula = formula + 'H'
 80 | 		count[alphabetic_periodic_table.index("H")] = 0
 81 | 		
 82 | 	for x in range(0, len(alphabetic_periodic_table)):
 83 | 		if count[x] == 1:
 84 | 			formula = formula + alphabetic_periodic_table[x]
 85 | 		elif count[x] > 1:
 86 | 			formula = formula + alphabetic_periodic_table[x] + str(count[x])
 87 | 	return formula
 88 | 
 89 | def main():
 90 | 
 91 | 	parser =  argparse.ArgumentParser(usage='%(prog)s inp_file')
 92 | 	parser.add_argument('inp_file', metavar='Input file', type=str, help='Name of the input file')
 93 | 	parser.add_argument("-a", "--analysis",  action='store_true', help='Analysis only', default=False)
 94 | 	if len(sys.argv) == 1:
 95 | 		parser.print_help()
 96 | 		sys.exit(1)	
 97 | 	args = parser.parse_args()
 98 | 	
 99 | #----------------------------------------------------------------------------------------
100 | 	overallstarttime=time.time()	
101 | #-------------------------------------Parse Settings and Inputs----------------------
102 | 	starttime=time.time() # start timing for input parsing
103 | 	settings, structures = parse_in(args.inp_file,args.analysis)
104 | 	#analysis only run analysis only and then stop
105 | 	if args.analysis:
106 | 		endtime=time.time()
107 | 		totaltime=str(endtime-starttime)
108 | 		seconds=totaltime.split('.')[0]
109 | 		milliseconds=float('0.'+totaltime.split('.')[1])*1000	
110 | 		print('Processed input and parsed structures in {0} seconds and {1:.0f} ms'.format(seconds, float(milliseconds)))	
111 | 		if settings.reorder:
112 | 			structures = reorder(structures)
113 | 		if settings.reduce:
114 | 			structures = reduce(structures, settings)
115 | 		analysis_only(structures,settings)
116 | 		return
117 | 		
118 | 	endtime=time.time()
119 | 	totaltime=str(endtime-starttime)
120 | 	seconds=totaltime.split('.')[0]
121 | 	milliseconds=float('0.'+totaltime.split('.')[1])*1000
122 | 	
123 | 	log = open(settings.logfile, 'a')
124 | 	log.write(banner)
125 | 	try: #python2.6
126 | 		hostname = subprocess.check_output(['hostname']).decode('utf-8')
127 | 		username = subprocess.check_output(['whoami']).decode('utf-8')
128 | 	except AttributeError:
129 | 		hostname= ""
130 | 		username= ""
131 | 	log.write(str(username).strip()+'@'+str(hostname).strip()+'\n')
132 | 	log.write(time.ctime()+'\n')
133 | 	log.write('-------------------------------------------------------\n')
134 | 	log.write('Input file: '+args.inp_file)
135 | 	log.write('\nProcessed input and parsed structures in {0} seconds and {1:.0f} ms\n'.format(seconds, float(milliseconds)))
136 | 	log.write('-------------------------------------------------------\n')
137 | 	log.write('Determined settings and variables: \n\n')
138 | 	log.write('{0:50}{1}\n'.format("Job name",settings.name))
139 | 	log.write('{0:50}{1}\n'.format("Structure input",settings.ircfile))
140 | 	log.write('{0:50}{1}\n'.format("Structure input filetype",settings.filetype))
141 | 	if settings.analysis:
142 | 		log.write('{0:50}{1}\n'.format("\nAnalysis will be performed","YES"))
143 | 		if len(settings.geo_dist) > 0:
144 | 			log.write('    {0:50}\n'.format("The following bond distances will be examined:\n"))
145 | 			for i in range(len(settings.geo_dist)):
146 | 				log.write('         {0:15}atom 1: {1}\n         {2:15}atom 2: {3}\n\n'.format("Bond "+str(i+1),settings.geo_dist[i][0],"",settings.geo_dist[i][1]))
147 | 		if len(settings.geo_ang) > 0:
148 | 			log.write('    {0:50}\n'.format("The following angles will be examined:\n"))
149 | 			for i in range(len(settings.geo_ang)):
150 | 				log.write('         {0:15}atom 1: {1}\n         {2:15}atom 2: {3}\n         {2:15}atom 3: {4}\n\n'.format("Angle "+str(i+1),settings.geo_ang[i][0],"",settings.geo_ang[i][1],settings.geo_ang[i][2]))
151 | 		if len(settings.geo_dih) > 0:
152 | 			log.write('    {0:50}\n'.format("The following dihedral angles will be examined:\n"))
153 | 			for i in range(len(settings.geo_dih)):
154 | 				log.write('         {0:15}atom 1: {1}\n         {2:15}atom 2: {3}\n         {2:15}atom 3: {4}\n         {2:15}atom 4: {5}\n\n'.format("Dihedral "+str(i+1),settings.geo_dih[i][0],"",settings.geo_dih[i][1],settings.geo_dih[i][2],settings.geo_dih[i][3]))	
155 | 	log.write('{0:50}{1}\n'.format("Keep xyz files","YES" if settings.keepxyz else "NO"))
156 | 	log.write('{0:50}{1}\n'.format("Keep input files","YES" if settings.keepinput else "NO"))
157 | 	log.write('{0:50}{1}\n'.format("Keep output files","YES" if settings.keepoutput else "NO"))
158 | 	log.write('{0:50}{1}\n'.format("Keep this log file","YES" if settings.keeplog else "NO"))
159 | 	log.write('{0:50}{1}\n'.format("Reorder structures","YES" if settings.reorder else "NO"))
160 | 	log.write('{0:50}{1}\n'.format("Reduce structures","YES" if settings.reduce else "NO"))
161 | 	if settings.reduce:
162 | 		log.write('{0:50}{1}\n'.format("Threshold for reducing structures",settings.reduce_tresh))
163 | 	log.write('{0:50}{1}\n'.format("Prepare input and exit","YES" if settings.prepareonly else "NO"))
164 | 	# structure fragment details
165 | 	formula = molecular_formula(structures.atoms)
166 | 	fragment1_formula = molecular_formula(structures.frag1atoms)
167 | 	fragment2_formula = molecular_formula(structures.frag2atoms)
168 | 	
169 | 	log.write('\nStructure information\n\n')
170 | 	log.write('Input structure has {0} atoms with a molecular formula of:{1:>15}\n\n'.format(len(structures.atoms), formula))
171 | 	log.write('{0} has {1} atoms with a molecular formula of:{2:>15}\n\n'.format(settings.frag1name, len(structures.frag1atoms), fragment1_formula))
172 | 	log.write('{0} has {1} atoms with a molecular formula of:{2:>15}\n\n'.format(settings.frag2name, len(structures.frag2atoms), fragment2_formula))
173 | 	log.write('Complex has a charge and multiplicity of: {0} {1}\n'.format(settings.charge, settings.multi))
174 | 	log.write('{0} has a charge and multiplicity of: {1} {2}\n'.format(settings.frag1name, settings.frag1charge, settings.frag1multi))
175 | 	log.write('{0} has a charge and multiplicity of: {1} {2}\n'.format(settings.frag2name, settings.frag2charge, settings.frag2multi))
176 | 	log.write('{0} has an energy of: {1}\n'.format(settings.frag1name, settings.frag1energy))
177 | 	log.write('{0} has an energy of: {1}\n'.format(settings.frag2name, settings.frag2energy))
178 | 	log.write('\n')
179 | 	
180 | 	log.write('-------------------------------------------------------\n')		
181 | 	log.close()
182 | 	
183 | #-------------------------------------Reorder and reduce----------------------------
184 | 	if settings.reorder:
185 | 		structures = reorder(structures)
186 | 	if settings.reduce:
187 | 		structures = reduce(structures, settings)
188 | #-------------------------------------Produce input---------------------------------
189 | 	starttime=time.time()
190 | 
191 | 	if settings.keepxyz:
192 | 		if not os.path.exists(settings.name+'_xyz'):
193 | 			os.makedirs(settings.name+'_xyz')
194 | 		for x in range(0, len(structures.xyz)):
195 | 			save_xyz(structures.xyz[x], structures.atoms, structures.title[x], settings.name+'_xyz/complex_{0:04d}'.format(x+1))
196 | 			save_xyz(structures.xyz_1[x], structures.frag1atoms,settings.frag1name+'_{0:04d}'.format(x+1), settings.name+'_xyz/'+settings.frag1name+'_{0:04d}'.format(x+1))
197 | 			save_xyz(structures.xyz_2[x], structures.frag2atoms,settings.frag2name+'_{0:04d}'.format(x+1), settings.name+'_xyz/'+settings.frag2name+'_{0:04d}'.format(x+1))
198 | 		os.system("cat {0}_xyz/complex*.xyz > {0}_xyz/complete.xyz".format(settings.name))
199 | 	if not os.path.exists(settings.name+'_input'):
200 | 		os.makedirs(settings.name+'_input')
201 | 	if not os.path.exists(settings.name+'_output'):
202 | 		os.makedirs(settings.name+'_output')
203 | 	
204 | 	for x in range (0, len(structures.xyz)):
205 | 		save_input(structures.xyz[x], structures.atoms, structures.title[x], settings.name+'_input/complex_{0:04d}.'.format(x+1)+settings.input_file_extension, settings.name+'_output/complex_{0:04d}'.format(x+1), settings.inputlayout, settings.charge, settings.multi)
206 | 		save_input(structures.xyz_1[x], structures.frag1atoms, settings.frag1name+'_{0:04d}'.format(x+1), settings.name+'_input/'+settings.frag1name+'_{0:04d}.'.format(x+1)+settings.input_file_extension, settings.name+'_output/'+settings.frag1name+'_{0:04d}'.format(x+1), settings.inputlayout, settings.frag1charge, settings.frag1multi)
207 | 		save_input(structures.xyz_2[x], structures.frag2atoms, settings.frag2name+'_{0:04d}'.format(x+1), settings.name+'_input/'+settings.frag2name+'_{0:04d}.'.format(x+1)+settings.input_file_extension, settings.name+'_output/'+settings.frag2name+'_{0:04d}'.format(x+1), settings.inputlayout, settings.frag2charge, settings.frag2multi)	
208 | 	
209 | 	endtime=time.time()	
210 | 	totaltime=str(endtime-starttime)
211 | 	seconds=totaltime.split('.')[0]
212 | 	milliseconds=float('0.'+totaltime.split('.')[1])*1000
213 | 	log = open(settings.logfile, 'a')
214 | 	log.write('Produced input files for {0} structures in {1} seconds and {2:.0f} ms\n'.format(len(structures.xyz),seconds, float(milliseconds)))
215 | 	log.write('-------------------------------------------------------\n')	
216 | 	log.close()	
217 | 	
218 | #-------------------------------------Run jobs-----------------------------------------
219 | 	if settings.analysis and not settings.prepareonly:
220 | 		create_analysis_file(settings)
221 | 	log = open(settings.logfile, 'a')
222 | 	
223 | 	if settings.prepareonly:
224 | 		log.write('No calculations requested!\n')
225 | 	else:
226 | 		for x in range (0, len(structures.xyz)):
227 | 			#produce string with correct bash command
228 | 			command = settings.submit_setting.replace("$input", settings.name+'_input/complex_{0:04d}.'.format(x+1)+settings.input_file_extension)
229 | 			command = command.replace("$output", settings.name+'_output/complex_{0:04d}.'.format(x+1)+settings.output_file_extension)
230 | 			log.write(time.ctime()+'{0:<30}'.format('\tStarting {0:>15}'.format('complex_{0:04d}'.format(x+1))))
231 | 			log.flush()	
232 | 			starttime=time.time()
233 | 			os.system(command)
234 | 			endtime=time.time()
235 | 			totaltime=endtime-starttime
236 | 			log.write('\t finished after {0:0>15}\n'.format(str(datetime.timedelta(seconds=totaltime))))	
237 | 			log.flush()
238 | 			
239 | 			command = settings.submit_setting.replace("$input", settings.name+'_input/'+settings.frag1name+'_{0:04d}.'.format(x+1)+settings.input_file_extension)
240 | 			command = command.replace("$output", settings.name+'_output/'+settings.frag1name+'_{0:04d}.'.format(x+1)+settings.output_file_extension)
241 | 			log.write(time.ctime()+'{0:<30}'.format('\tStarting {0:>15}'.format(settings.frag1name+'_{0:04d}'.format(x+1))))
242 | 			log.flush()	
243 | 			starttime=time.time()
244 | 			os.system(command)
245 | 			endtime=time.time()
246 | 			totaltime=endtime-starttime
247 | 			log.write('\t finished after {0:0>15}\n'.format(str(datetime.timedelta(seconds=totaltime))))	
248 | 			log.flush()
249 | 			
250 | 			command = settings.submit_setting.replace("$input", settings.name+'_input/'+settings.frag2name+'_{0:04d}.'.format(x+1)+settings.input_file_extension)
251 | 			command = command.replace("$output", settings.name+'_output/'+settings.frag2name+'_{0:04d}.'.format(x+1)+settings.output_file_extension)
252 | 			log.write(time.ctime()+'{0:<30}'.format('\tStarting {0:>15}'.format(settings.frag2name+'_{0:04d}'.format(x+1))))
253 | 			log.flush()	
254 | 			starttime=time.time()
255 | 			os.system(command)
256 | 			endtime=time.time()
257 | 			totaltime=endtime-starttime
258 | 			log.write('\t finished after {0:0>15}\n'.format(str(datetime.timedelta(seconds=totaltime))))	
259 | 			log.flush()
260 | 			if settings.analysis:
261 | 				analysis(settings, structures, x)
262 | 				
263 | 	totaltime=time.time()-overallstarttime
264 | 	log.write('-------------------------------------------------------\n')
265 | 	log.write(time.ctime()+' Finished after {0:0>15}\n'.format(str(datetime.timedelta(seconds=totaltime))))
266 | 	log.write('-------------------------------------------------------\n')
267 | 	log.close()	
268 | #-------------------------------------Clean up-----------------------------------------
269 | 	if settings.keepinput == False:
270 | 		shutil.rmtree("input/", ignore_errors=True)
271 | 	if settings.keepoutput == False:
272 | 		shutil.rmtree("output/", ignore_errors=True)	
273 | 	if settings.keeplog == False:
274 | 		os.remove(settings.logfile)
275 | 		
276 | if __name__ == "__main__":
277 | 	main()
278 | 	
279 | 


--------------------------------------------------------------------------------
/input_example.inp:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------
  2 | Example input file
  3 | #--------------------------------------------------
  4 | 
  5 | # relative or absolute path to file with structures. Can be xyz file or G09 output from IRC, Scan, single point, frequency or geometry opt calculation
  6 | 
  7 | Input file = test.xyz
  8 | 
  9 | # Define a job name. Default is the input file name without file extension
 10 | 
 11 | Job name = test
 12 | 
 13 | #--------------------------------------------------
 14 | Information on Structures
 15 | #--------------------------------------------------
 16 | 
 17 | # Charge and multiplicity of the complex. Needs to be set for xyz files. Optional for G09 input 
 18 | 
 19 | charge = 0
 20 | multiplicity = 1
 21 | 
 22 | # Fragment information
 23 | # fragment name, default is fragment1 or fragment2.
 24 | Fragment1 name = Fragment1
 25 | # fragment charge and multiplicity have to be provided. Default is charge 0 and multiplicity of 1
 26 | Fragment1 charge = 0
 27 | Fragment1 multiplicity = 1
 28 | # list of the atoms in this fragment, provide a comma or space separated list, leave empty for automatic determination. If atom lists for both fragments are empty, then fragment 1 will contain the fragment containing atom 1
 29 | Fragment1 atoms = 
 30 | # Electronic energy of this fragment, required for calculation of relative energy of the complex and distortion energies. Defaults to 0.
 31 | Fragment1 energy = -243.485731
 32 | 
 33 | Fragment2 name = Fragment2
 34 | Fragment2 charge = 0
 35 | Fragment2 multiplicity = 1
 36 | Fragment2 atoms = 
 37 | Fragment2 energy = -464.752500
 38 | 
 39 | #--------------------------------------------------
 40 | Information for Analysis
 41 | #--------------------------------------------------
 42 | 
 43 | # Analysis can be turned on (YES) or off (NO), default is YES
 44 | Analysis = YES
 45 | 
 46 | # Distances can be extracted. Use atom numbers to define distances. Use one line per atom pair
 47 | # write "auto" in line to attempt finding a total of two bonds that get formed or broken.
 48 | <distances>
 49 | 1 2
 50 | auto
 51 | </distances>
 52 | 
 53 | # Angles can be extracted. Use atom numbers to define angles. Use one line per angle
 54 | <angles>
 55 | 1 2 3
 56 | </angles>
 57 | 
 58 | # Dihedral angles can be extracted. Use atom numbers to define angles. Use one line per angle
 59 | <dihedral>
 60 | 1 2 3 4 
 61 | </dihedral>
 62 | 
 63 | #--------------------------------------------------
 64 | Further Settings
 65 | #--------------------------------------------------
 66 | 
 67 | # If set to YES autoDIA produces XYZ files of all complex and fragment structures and saves them in a folder called <jobname>_xyz
 68 | Keep xyz = YES
 69 | # If set to YES autoDIA does not delete input files of all complex and fragment structures in folder called <jobname>_input
 70 | Keep input files = YES
 71 | # If set to YES autoDIA does not delete output files of all complex and fragment structures in folder called <jobname>_output
 72 | Keep output files = YES
 73 | # If set to YES autoDIA does not delete the log file called <jobname>_log.txt after successful termination
 74 | Keep log file = YES
 75 | 
 76 | #Set if structures should be reordered
 77 | reorder = NO
 78 | 
 79 | #Eliminate very similar structures based on a certain RMSD treshold
 80 | Reduce structures = NO
 81 | RMSD treshold = 0.2
 82 | 
 83 | #Does only prepare input files but does not run them, does not do analysis (use -a flag for analysis only)
 84 | Prepare only = NO
 85 | 
 86 | #--------------------------------------------------
 87 | Input File Structure
 88 | #--------------------------------------------------
 89 | 
 90 | # Input file structure for the chosen driver
 91 | # provide the settings for single point calculations
 92 | # use $filename and $coordinates $charge and $multiplicity in places where they are required
 93 | # Keep in mind the correct number of blank lines
 94 | 
 95 | <layout>
 96 | %mem=32GB
 97 | %nprocshared=16
 98 | # B3LYP/6-311+g(d,p) empiricaldispersion=gd3
 99 | 
100 | Title
101 | 
102 | $charge $multiplicity
103 | $coordinates
104 | 
105 | 
106 | </layout>
107 | 
108 | #--------------------------------------------------
109 | Input File Structure
110 | #--------------------------------------------------
111 | # provide the correct command to run the desired Software (e.g. Gaussian) on your system
112 | # use $input and if needed $output for input and output file name
113 | 
114 | <run_job>
115 | g09 $input $output
116 | </run_job>
117 | 
118 | # Option to set the input file extension, some software might require a specific one. Defaults to com
119 | input file extension = com
120 | # Option to set the output file extension, some software might require a specific one. Defaults to log
121 | output file extension = log
122 | 


--------------------------------------------------------------------------------
/old/test_autofrag.py:
--------------------------------------------------------------------------------
  1 | #fragments test
  2 | class settings:
  3 | 	pass
  4 | class structures:
  5 | 	pass
  6 | 
  7 | import numpy as np
  8 | 
  9 | 
 10 | periodic_table = ["","H","He","Li","Be","B","C","N","O","F","Ne","Na","Mg","Al","Si","P","S","Cl","Ar","K","Ca","Sc","Ti","V","Cr","Mn","Fe","Co","Ni","Cu","Zn","Ga","Ge","As","Se","Br","Kr","Rb","Sr","Y","Zr",
 11 |     "Nb","Mo","Tc","Ru","Rh","Pd","Ag","Cd","In","Sn","Sb","Te","I","Xe","Cs","Ba","La","Ce","Pr","Nd","Pm","Sm","Eu","Gd","Tb","Dy","Ho","Er","Tm","Yb","Lu","Hf","Ta","W","Re","Os","Ir","Pt","Au","Hg","Tl",
 12 |     "Pb","Bi","Po","At","Rn","Fr","Ra","Ac","Th","Pa","U","Np","Pu","Am","Cm","Bk","Cf","Es","Fm","Md","No","Lr","Rf","Db","Sg","Bh","Hs","Mt","Ds","Rg","Uub","Uut","Uuq","Uup","Uuh","Uus","Uuo"]
 13 | 	
 14 | #Covalent radii taken from DOI: 10.1039/b801115j
 15 | #Everything beyond Cm was set to 1.80
 16 | covalent_radii = [0.00,0.32,0.28,1.28,0.96,0.84,0.76,0.71,0.66,0.57,0.58,1.66,1.41,1.21,1.11,1.07,1.05,1.02,1.06,2.03,1.76,1.70,1.60,1.53,1.39,1.61,1.52,1.50,1.24,1.32,1.22,1.22,1.20,1.19,1.20,1.20,1.16,2.20,1.95,1.90,1.75,
 17 |     1.64,1.54,1.47,1.46,1.42,1.39,1.45,1.44,1.42,1.39,1.39,1.38,1.39,1.40,2.44,2.15,2.07,2.04,2.03,2.01,1.99,1.98,1.98,1.96,1.94,1.92,1.92,1.89,1.90,1.87,1.87,1.75,1.70,1.62,1.51,1.44,1.41,1.36,1.36,1.32,1.45,
 18 |     1.46,1.48,1.40,1.50,1.50,2.60,2.21,2.15,206,2.00,1.96,1.90,1.87,180,169,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80,1.80]
 19 | 	
 20 | def isInt(s):
 21 |     try: 
 22 |         int(s)
 23 |         return True
 24 |     except ValueError:
 25 |         return False	
 26 | #takes xyz coordinates as numpy array and returns distance
 27 | def getDistance(atom1, atom2):
 28 | 	distance = 0.00
 29 | 	distance = np.sqrt((atom1[0]-atom2[0])**2+(atom1[1]-atom2[1])**2+(atom1[2]-atom2[2])**2)
 30 | 	return distance
 31 | 
 32 | def structures_from_xyz(file):
 33 | 	structures.atoms = []
 34 | 	structures.xyz = []
 35 | 	structures.title = []
 36 | 	file_object = open(file, 'r')
 37 | 	input = (line for line in file_object) #make generator
 38 | 	#search for number of atoms
 39 | 	for line in input:
 40 | 		if isInt(line.strip()):
 41 | 			n_atoms=int(line)
 42 | 			break	
 43 | 	else: #exits if no line with number of atoms was found
 44 | 		sys.exit('Error:\t\tNo xyz coordinates found in file: ' + file)
 45 | 	#skip one line
 46 | 	structures.title.append(next(input).strip())
 47 | 	# now there should be n_atoms lines of coordinates
 48 | 	for i in range(n_atoms):
 49 | 		l=next(input).split()
 50 | 		if l[0] in periodic_table:
 51 | 			structures.atoms.append(l[0]) #get atom symbol and append to atom list
 52 | 		else:
 53 | 			sys.exit('Error:\t\tsomething is wrong with the first structure in file: '+file)
 54 | 		coords=[float(x) for x in l[1:]] #convert line to list of floats
 55 | 		coords=np.array([coords]) #create array with coords
 56 | 		try: #try append, doesn't work if XYZ doesn't exist yet
 57 | 			XYZ=np.concatenate((XYZ,coords), axis=0)
 58 | 		except NameError:
 59 | 			XYZ=coords
 60 | 	structures.xyz.append(XYZ) #append first structure to structures list
 61 | 	del XYZ #get rid of that for the next structure
 62 | 	#now search for more structures
 63 | 	for line in input:
 64 | 		#start extracting if atom number line is found
 65 | 		try:
 66 | 			if int(line.strip()) == n_atoms:
 67 | 				#read one line to skip title
 68 | 				structures.title.append(next(input).strip())
 69 | 				# now there should be n_atoms lines of coordinates
 70 | 				for i in range(n_atoms):
 71 | 					l=next(input).split()
 72 | 					coords=[float(x) for x in l[1:]]
 73 | 					coords=np.array([coords])
 74 | 					try: #try append, doesn't work if XYZ doesn't exist yet
 75 | 						XYZ=np.concatenate((XYZ,coords), axis=0)
 76 | 					except NameError:
 77 | 						XYZ=coords
 78 | 				structures.xyz.append(XYZ)
 79 | 				del XYZ
 80 | 		except ValueError:
 81 | 			pass			
 82 | 	return structures
 83 | 	
 84 | def isBond(distance, atomtype1, atomtype2, covalent_radii):
 85 | 	bond = True
 86 | 	'''find bond threshold based on covalent radii
 87 | 	taken from 10.1039/b801115j
 88 | 	For everything beyond Cm 1.80 A was used.
 89 | 	get radii from saved numpy array
 90 | 	use atomtypes as numbers
 91 | 	use factor to be sure to catch bonds. Bigger factor means more bonds are detected
 92 | 	'''
 93 | 	atomtype1 = int(periodic_table.index(atomtype1))
 94 | 	atomtype2 = int(periodic_table.index(atomtype2))
 95 | 	bond_threshold = 1.25 * (covalent_radii[atomtype1]+covalent_radii[atomtype2])
 96 | 	if distance > bond_threshold:
 97 | 		bond = False
 98 | 	return bond	
 99 | 	
100 | def getFragments(structure, atoms):
101 | 	#get connectivity for each irc structure
102 | 	adjacency_matrix = []
103 | 	# iterate over every atom 
104 | 	for i in range(0, len(structure)):
105 | 		distances = []
106 | 		#get the distance to every other atom and itself
107 | 		for j in range(0, len(structure)):
108 | 			distances = distances + [getDistance(structure[i], structure[j])]
109 | 		adjacency_matrix = adjacency_matrix + [distances]
110 | 
111 | 	#convert distance to 1 for bond, 0 for no bond
112 | 	for i in range(0, len(adjacency_matrix)):
113 | 		for j in range(0, len(adjacency_matrix[i])):
114 | 			if isBond(adjacency_matrix[i][j], atoms[i], atoms[j], covalent_radii):
115 | 				adjacency_matrix[i][j] = 1
116 | 			else:
117 | 				adjacency_matrix[i][j] = 0
118 | 	# now make a list of fragments 
119 | 	#first convert each line to a list of the atoms in the structure which show a bond
120 | 	for i in range(0, len(adjacency_matrix)):
121 | 		adjacency_matrix[i] = [j+1 for j,x in enumerate(adjacency_matrix[i]) if x == 1]
122 | 	#make empty fragments list
123 | 	fragments = [adjacency_matrix[0]]
124 | 	#now iterate over all the elements in adjacency_matrix
125 | 	for i in range (1, len(adjacency_matrix)):
126 | 		#now iterate over all the fragments in fragments and check if elements are the same
127 | 		for j in range(0, len(fragments)):
128 | 			#check if elements are already present in this list...
129 | 			if len(set(adjacency_matrix[i]) & set(fragments[j])) > 0:
130 | 				#....if they are add the lists together and eliminate duplicates...
131 | 				fragments[j] = list(set(fragments[j]+adjacency_matrix[i]))
132 | 				break
133 | 		else:
134 | 			#....add this list as new list in fragments
135 | 			fragments = fragments + [adjacency_matrix[i]]
136 | 	#now we got all of our fragments but some might belong to the same structure, so we need to test if we need to combine them, then combine them, 
137 | 	#then check again and so on until nothing else changes, iterative process. Take length of fragments for that since it converged if amount of fragments doesn't changes
138 | 	n_frag_old=len(fragments)
139 | 	#set j to some value that cannot be the length of fragments
140 | 	n_frag_new=-5
141 | 	while n_frag_old != n_frag_new:
142 | 		#set i to current value of len(fragments)
143 | 		n_frag_old=len(fragments)
144 | 		new_fragments = [sorted(fragments[0])]
145 | 		#combine fragments like before
146 | 		for i in range(0, len(fragments)):
147 | 			for j in range(0, len(new_fragments)):
148 | 				if len(set(fragments[i]) & set(new_fragments[j])) > 0:
149 | 					new_fragments[j] = sorted(list(set(new_fragments[j]+fragments[i])))
150 | 					break
151 | 			else:
152 | 				new_fragments = new_fragments + [fragments[i]]
153 | 		fragments=new_fragments
154 | 		n_frag_new=len(fragments)
155 | 	return fragments	
156 | 	
157 | def auto_frag(structures,settings):
158 | 	list_of_fragments = []
159 | 	for i in range(0, len(structures.xyz)):
160 | 		list_of_fragments =  list_of_fragments + [getFragments(structures.xyz[i], structures.atoms)]
161 | 	n_fragments = []
162 | 	print(list_of_fragments)
163 | 	for i in range(0,len(list_of_fragments)):
164 | 		n_fragments = n_fragments + [len(list_of_fragments[i])]
165 | 	#the next steps only work in cases where two fragments were found
166 | 	try:
167 | 		indices = [i for i, x in enumerate(n_fragments) if x == 2]
168 | 		counts = [list_of_fragments.count(list_of_fragments[x]) for x in indices]
169 | 		fragments = list_of_fragments[indices[counts.index(max(counts))]]
170 | 		settings.frag1atoms = np.sort(fragments[0])
171 | 		settings.frag2atoms = np.sort(fragments[1])
172 | 	except:
173 | 		pass
174 | 	return settings
175 | 
176 | settings.ircfile = "test.xyz"
177 | structures = structures_from_xyz(settings.ircfile)
178 | settings = auto_frag(structures, settings)
179 | print(settings.frag1atoms)
180 | print(settings.frag2atoms)


--------------------------------------------------------------------------------
/plotting_examples/2D_print.py:
--------------------------------------------------------------------------------
 1 | #example of plotting a complete DIA along a 2D surface
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | import matplotlib.mlab as ml
 5 | 
 6 | #import data from autoDIA output file
 7 | a = np.genfromtxt('Scan2D_DIA.txt',skip_header=4)
 8 | 
 9 | #create plot and subplots and layout
10 | f, axes = plt.subplots(2, 3,figsize=(5.25, 5),dpi=300,gridspec_kw = {'width_ratios':[5,5, 0.5]})
11 | f.delaxes(axes[0,1])
12 | f.delaxes(axes[0,2])
13 | axes[0,0].set_aspect(1, adjustable='box')
14 | axes[1,0].set_aspect(1, adjustable='box')
15 | axes[1,1].set_aspect(1, adjustable='box')
16 | axes[1,2].set_aspect(10, adjustable='box')
17 | f.subplots_adjust(wspace=0, hspace=0)
18 | 
19 | """The second column (keep in mind that Numpy numbering starts with 0) represents the first bond distance,
20 | the third column contains information about the second bond distance, the 6th column contains the total energy
21 | """
22 | x = a[:,1]
23 | y = a[:,2]
24 | z = a[:,5]
25 | 
26 | #getting the limits for the axis
27 | xmin = np.amin(x)
28 | xmax = np.amax(x)
29 | ymin = np.amin(y)
30 | ymax = np.amax(y)
31 | 
32 | #Manually select lower and upper energy limits
33 | Emin=-180
34 | Emax=150
35 | 
36 | #create array for contour lines which should be plottet. Spacing 10 kcal/mol
37 | b = np.linspace(Emin,Emax,(Emax-Emin)//10)
38 | 
39 | #create the griddata with z values needed for contour and pcolormesh plotting
40 | xi = np.linspace(xmin, xmax, len(x))
41 | yi = np.linspace(ymin, ymax, len(y))
42 | zi = ml.griddata(x, y, z, xi, yi, interp='linear')
43 | 
44 | #plotting contour lines and colormesh for the total energy subplot
45 | axes[0,0].contour(xi, yi, zi, b, linewidths = 0.5, colors = 'k',linestyles='solid')
46 | z1_plot = axes[0,0].pcolormesh(xi, yi, zi,vmin=Emin,vmax=Emax, cmap = plt.get_cmap('rainbow'))
47 | 
48 | #setting axes
49 | axes[0,0].set_title('Electronic energy')
50 | axes[0,0].set_ylabel('Bond distance 1')
51 | axes[0,0].set_yticks([1.5,2.0,2.5,3.0])
52 | 
53 | #plotting colorbar
54 | cb = plt.colorbar(z1_plot,cax=axes[1,2], ticks=[-150,-100, -50, 0, 50, 100, 150]) 
55 | cb.ax.set_yticklabels([-150,-100, -50, 0, 50, 100, 150],ha='right')
56 | cb.ax.yaxis.set_tick_params(pad=25)
57 | axes[0,0].set_xticklabels([])
58 | 
59 | #column 7 contains information about the interaction energy
60 | z = a[:,6]
61 | 
62 | #create the griddata with z values needed for contour and pcolormesh plotting
63 | zi = ml.griddata(x, y, z, xi, yi, interp='linear')
64 | 
65 | #plotting contour lines and colormesh for the interaction subplot
66 | axes[1,0].contour(xi, yi, zi, b, linewidths = 0.5, colors = 'k',linestyles='solid')
67 | axes[1,0].pcolormesh(xi, yi, zi,vmin=Emin,vmax=Emax, cmap = plt.get_cmap('rainbow'))
68 | 
69 | #setting axes
70 | axes[1,0].set_title('Interaction energy')
71 | axes[1,0].set_ylabel('Bond distance 1')
72 | axes[1,0].set_xlabel('Bond distance 2')
73 | axes[1,0].set_yticks([1.5,2.0,2.5,3.0])
74 | 
75 | #column 8 contains information about the distortion energy
76 | z = a[:,7]
77 | 
78 | #create the griddata with z values needed for contour and pcolormesh plotting
79 | zi = ml.griddata(x, y, z, xi, yi, interp='linear')
80 | 
81 | #plotting contour lines and colormesh for the distortion subplot
82 | axes[1,1].contour(xi, yi, zi, b, linewidths = 0.5, colors = 'k',linestyles='solid')
83 | axes[1,1].pcolormesh(xi, yi, zi,vmin=Emin,vmax=Emax, cmap = plt.get_cmap('rainbow'))
84 | 
85 | #setting axes
86 | axes[1,1].set_title('Distortion energy')
87 | axes[1,1].set_xlabel('Bond distance 2')
88 | axes[1,1].set_yticklabels([])
89 | 
90 | #layout and saving the figure
91 | f.tight_layout()
92 | plt.savefig('2D.png')
93 | 


--------------------------------------------------------------------------------
/plotting_examples/IRC_print.py:
--------------------------------------------------------------------------------
 1 | #example of plotting a complete DIA along an IRC as shown in Figure 6
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | import matplotlib.patches as mpatches
 5 | 
 6 | #create plot
 7 | f= plt.figure(figsize=(2.5, 2.5),dpi=300)
 8 | ax1 = f.add_subplot(1,1,1)
 9 | 
10 | #read data from autoDIA output
11 | a = np.genfromtxt('IRC1_DIA.txt',skip_header=4)
12 | 
13 | #set axis
14 | ax1.set_ylabel('Energy')
15 | ax1.set_xlabel('Forming bond distance')
16 | ax1.axis([np.amax(a[:,2])+0.1, np.amin(a[:,2])-0.1, np.amin(a[:,6])-20, np.amax(a[:,7])+20])
17 | ax1.axhline(y=0, color='grey', linestyle='--',linewidth = 0.5)
18 | 
19 | #the second geometric parameter (column 3 - keep in mind that Numpy counting starts at 0) representing a bond length is used as X-Value
20 | x = a[:,2]
21 | #In this case column 7 contains information about interaction energy
22 | y = a[:,6]
23 | ax1.plot(x,y,'-',color='g',linewidth = 1)
24 | 
25 | #In this case column 8 contains information about total distortion energy
26 | y = a[:,7]
27 | ax1.plot(x,y,'-',color='r',linewidth = 1)
28 | 
29 | #In this case column 9 contains information about the acrolein distortion energy
30 | y = a[:,8]
31 | ax1.plot(x,y,'-',color='lightcoral',linewidth = 0.5)
32 | 
33 | #In this case column 10 contains information about the butadiene distortion energy
34 | y = a[:,9]
35 | ax1.plot(x,y,'-',color='orange',linewidth = 0.5)
36 | 
37 | #In this case column 6 contains information about the total energy. Plottet last to be on top.
38 | y = a[:,5]
39 | ax1.plot(x,y,'-',color='k',linewidth = 1)
40 | 
41 | #creating the legend
42 | black_patch = mpatches.Patch(color='k', label='Total energy')
43 | red_patch = mpatches.Patch(color='r', label='Distortion energy')
44 | green_patch = mpatches.Patch(color='g', label='Interaction energy')
45 | coral_patch = mpatches.Patch(color='lightcoral', label='Acrolein distortion energy')
46 | orange_patch = mpatches.Patch(color='orange', label='Butadiene distortion energy')
47 | plt.legend(handles=[black_patch,red_patch,coral_patch,orange_patch, green_patch],prop={'size': 5},loc=3,frameon=False)
48 | 
49 | f.tight_layout()
50 | 
51 | f.savefig('IRC.png')
52 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | 


--------------------------------------------------------------------------------