-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathauto_cluster_analysis.py
300 lines (268 loc) · 21.1 KB
/
auto_cluster_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# -*- coding: utf-8 -*-
"""
@author: Josh Kemppainen
Revision 1.2
November 4th, 2023
Michigan Technological University
1400 Townsend Dr.
Houghton, MI 49931
**************************************************************
* Requirements: *
* python 3.7+ *
* *
* Dependencies: *
* python natsort module: *
* - pip3 install natsort (if pip manager is installed) *
* - if not installed files will be sorted using the *
* standard python sort function (which may not sort *
* files in a human logical manner). *
* *
* ALL read in topofiles from the files_directory MUST have *
* some NUMBER OR LETTER that represents the "progression" *
* or "evolution" sequence of the topofiles. Usually this *
* will be a "time stamp" being used to write the LAMMPS *
* datafile using the nevery command in LAMMPS, but also *
* could be a crosslink density value as well that provides *
* a logical sequencing of the sorted topofiles. *
* *
* Run methods: *
* - IDE (manipulate variables and run from IDE) *
* - GUI (manipulate variables and run from. Default *
* settings set from this script) *
* - command line (python3 auto_cluster_analysis.py -man to *
* get command line override options and *
* examples) *
* *
**************************************************************
"""
##############
### Inputs ###
##############
#################################################################################################################
# Python boolean variable to use GUI or not (True or False). All GUI settings are intialized from this script #
# so adjusting this script will set the default settings that the GUI will load with. Please NOTE that when #
# using the GUI to check the console or terminal print outs every time a system is run through the code #
# because that is where the import information will be displayed. Examples: #
# use_GUI = True # Will launch GUI to manipulate variables and run from GUI #
# use_GUI = False # Will NOT launch GUI where to manipulate variables and run will have to be from the #
# IDE or command line #
# #
# Python int value to range from 0 to 200 to control the GUI zoom. Where it option was added to account for the #
# different ways individuals have their OS display settings setup. A GUI_zoom = 100, means that default settings#
# are used, whereas a GUI_zoom = 120 means increase GUI size by 20% and GUI_zoom = 80 means decrease GUI by 20%.#
# Examples: #
# GUI_zoom = 100 # use default GUI size #
# GUI_zoom = 75 # decrease GUI size by 25% #
# #
# Update use_GUI and GUI_zoom as desired. #
#################################################################################################################
use_GUI = True
GUI_zoom = 100
#################################################################################################################
# Python string variable type to set directory where LAMMPS datafiles are stored. The code will change to this #
# directory to read ALL files in this directory with the *.data extension. This means ONLY have LAMMPS *.data #
# files in this directory you want to read into this code to automate the molecule logging process. Examples: #
# files_directory = 'EXMAPLES/auto_cluster_analysis' # Will read all *.data files in this directory #
# #
# Update files_directory as desired. #
#################################################################################################################
files_directory = 'EXAMPLES/auto_cluster_analysis'
#################################################################################################################
# Python string variable type to set new file name(s). Example for newfile = 'auto_logging_molecule_data' #
# filename = auto_logging_molecule_data.csv # *NOTE info in file will be dependant on info in logger dict #
# #
# Update newfile as desired. #
#################################################################################################################
newfile = 'auto_logging_molecule_data'
#################################################################################################################
# Inputs for computing degree of polymerization (Xn) and Extent of reaction (p). Equations and info can be #
# found in: Polymers Chemistry and Physics of Modern Materials, Third Edition by Cowie page 42 #
# #
# Extent of reaction: #
# p = (2*(N0 - N))/(N0*fav) #
# #
# Critical Extent of reaction (gel point): #
# pg = 2/fav #
# #
# Degree of polymerization: #
# Xn = 2/(2 - p*fav) #
# #
# Where: #
# p = Extent of reaction #
# pg = Critical Extent of reaction (gel point) #
# N0 = Number of intial molecules before polymerization #
# N = Number of molecules at any stage or polymerization #
# fav = Average number of functional groups present per monomer unit #
# Xn = Degree of polymerization #
# #
# Determining fav for EPON 862 example: #
# 1 DETDA molecule (4-functional groups) #
# 2 DGEBF molecule (2-fuctional groups) #
# mix = 1*DETDA + 2*DGEBF (2 DETDA to every 1 DGEBF) #
# fav = (1-DETDA*4-functional-groups + 2-DGEBF*2-functional-groups)/3-molecules #
# fav = (1*4 + 2*2)/3 = 2.66 #
#################################################################################################################
N0 = 150
fav = 2.66
#################################################################################################################
# Option to write a .txt file of BASENAME.txt (topofile) of all connectivty information found (True or False). #
#################################################################################################################
txtfile = False
###########################################
### Main auto_cluster_analysis function ###
###########################################
import src.io_functions as io_functions
def main(files_directory, N0, fav, txtfile, newfile, log=io_functions.LUNAR_logger()):
import src.clusters as clusters
import time
import os
# Get time
start_time = time.time()
# Configure log (default is level='production', switch to 'debug' if debuging)
log.configure(level='production')
#log.configure(level='debug')
###########################################################################
# Setting up directories and where to write final files and results to #
###########################################################################
# Find present working directory and Find/create paths to store code results
pwd = os.getcwd();
path = os.path.join(pwd, files_directory);
# Check if path exists. IF not create.
if not os.path.isdir(path):
os.makedirs(path, exist_ok=True)
# Change the current working directory to path to write all new files to outputs directory
os.chdir(path)
log.out('Moved to {} directory to find LAMMPS datafiles to iterate through'.format(path))
# Check for newfile, if already exists delete
if os.path.exists(os.path.join(path, newfile+'.csv')):
os.remove(os.path.join(path, newfile+'.csv'))
################################################
# Function to write .csv file from logger dict #
################################################
def write_csv(logger, newfile):
# Function to write to .csv file with mode='w' or mode='a'
def write_from_logger(logger, mode):
with open(newfile, mode) as f:
# Find titles and data to write
titles = []; data = [];
for i in logger:
titles.append('{}'.format(i)); data.append('{}'.format(logger[i]))
# Join with comma's/Write title and data if file does not exists
if mode == 'w': f.write('{}\n'.format(', '.join(titles))) # only write headers in 'w' mode
f.write('{}\n'.format(', '.join(data))) # always write data in either 'w' or 'a' mode
return
# Check if path+newfile exists, if not write title and data. else append to file
if not os.path.exists(os.path.join(path, newfile)): write_from_logger(logger, mode='w')
else: write_from_logger(logger, mode='a')
return
###############################################################################################################
# Find molecules and store them in the molecules instance. Info available from the instance: #
# molecules.ATTRIBUTE with the following attributes: #
# #
# - molecules.clusters = [ {molID 1 atomIDs}, {molID2 atomIDs}, NmolIDs, ... ] #
# - molecules.info[molID].ATTRIBUTE where the following attributes are available: #
# * .atoms = set(atomIDs in molID) #
# * .size = number of atoms in molID #
# * .mass = mass of molID #
# * .psize = % number of atoms in molID #
# * .pmass = % mass of molID fragment #
# * Examples: #
# molecules.info[1].atoms (gets set of atomIDs in molID 1) #
# molecules.info[2].mass (gets mass of molID 2) #
# molecules.info[3].size (gets natoms of molID 3) #
# * NOTES: #
# molIDs are set based on cluster size that is 1st sorted by number of atoms.can ONLY access molIDs #
# that exists (the 1st 10 molIDs are intialized w/ ZERO's to help access molIDs that may not exist). #
# - molecules.mass_total = total mass of system in mass units of datafile #
# - molecules.size_total = total number of atoms in the read in datafile #
# - molecules.p = Extent of reaction (chemistry/eqn specific. May need to be changed) #
# - molecules.Xn = Degree of polymerization (chemistry/eqn specific. May need to be changed) #
# - molecules.Mw = Weight-average molar mass (Mw) #
# - molecules.Mn = Number-average molar mass (Mn) #
# - molecules.Mz = Higher-average molar mass (Mz) #
# - molecules.Mz1 = Higher-average molar mass (Mz+1) #
# - molecules.RMW = Weight-averge reduced molecular weight (RMW) #
###############################################################################################################
try:
from natsort import natsorted
topofiles = natsorted([file for file in os.listdir(path) if file.endswith('.data')])
log.out('Read in topofiles were sorted using natsort, thus they should be iterated through')
log.out('in a sequence that represents evolution or progression and logged in that order.')
except:
topofiles = sorted([file for file in os.listdir(path) if file.endswith('.data')])
log.warn('WARNING natsort was not installed and read in topofiles were sorted using pythons')
log.out('sorted function. The file iteration sequence may be unorder from a evolution or')
log.out('progression stand point. Depending on the names given to the read in files. You may')
log.out('intall natsort with: pip3 install natsort (if pip manager is installed)')
log.out('\n\nIterating through topofiles to find molecule information ....')
for iteration, topofile in enumerate(topofiles, 1):
#---------------------------------------------------------------------------------------#
# Find molecules and store data in a class w/multiple layers of attributes listed above #
#---------------------------------------------------------------------------------------#
molecules = clusters.analysis(topofile, N0, txtfile, fav, pflag=False, log=log)
#---------------------------------------------------------------------------------#
# Set up logger dict to hold the info to write to the .csv file. Update as desired#
# header name in file: written value #
#---------------------------------------------------------------------------------#
logger = {'Iteration': iteration,
'1st cluster Mass': molecules.info[1].mass,
'2nd cluster Mass': molecules.info[2].mass,
'1st cluster %Mass': molecules.info[1].pmass,
'2nd cluster %Mass': molecules.info[2].pmass,
'RMW': molecules.RMW,
'filename': molecules.filename,
}
#-----------------------------------#
# Write logger results to .csv file #
#-----------------------------------#
write_csv(logger, newfile+'.csv')
####################################
# Cleanup and finalization of code #
####################################
# Change back to the intial directory to keep directory free for deletion
os.chdir(pwd)
# Print file locations and completion of code
log.out(f'\n\nAll outputs can be found in {path} directory')
log.out('\n\nNormal program termination\n\n')
# Script run time
execution_time = (time.time() - start_time)
log.out('Execution time in seconds: ' + str(execution_time))
# Show number of warnings and errors
log.out_warnings_and_errors()
return
###################################
### Import needed files and run ###
###################################
if __name__ == "__main__":
import src.command_line as cl
import sys
####################################################################################################
# Set IDE/command line mode option: #
# - if run in IDE commandline_inputs lst will have zero entries. Which means use hard coded inputs #
# from inputs section. #
# #
# - if run in command line, but commandline_inputs still has zero entries, this means use hard #
# coded inputs from inputs section. #
# #
# - else run in command line and commandline_inputs has entries determine what entries will be #
# overrided from hard coded inputs from inputs section. This overide will occur in in the inputs #
# section called Command Line Override. *NOTE: if user does not specify certain command line #
# options, the default settings from inputs section will be enforced. A message will tell user #
# if inputs from inputs section have been enforced. The user can find all command line options #
# by running: #
# python3 auto_cluster_analysis.py -opt #
# or #
# python3 auto_cluster_analysis.py -man #
# in the terminal and the code will print out all command line option and terminate before any #
# further analysis is done #
####################################################################################################
# Find command line inputs that are not the python script itself
commandline_inputs = [i for n, i in enumerate(sys.argv) if n > 0]
if commandline_inputs and '-gui' not in commandline_inputs:
files_directory, N0, txtfile, fav, newfile = cl.clusters_auto(files_directory, N0, txtfile, fav, newfile, commandline_inputs)
use_GUI = False
if use_GUI or '-gui' in commandline_inputs:
print('\n\n\nauto_cluster_analysis is currently running in GUI mode, where all GUI inputs are intialized from auto_cluster_analysis.\n\n\n')
from src.auto_clusters_GUI import GUI
GUI(files_directory, N0, fav, txtfile, newfile, GUI_zoom)
else: main(files_directory, N0, fav, txtfile, newfile)