forked from DanielMarchand/aiida-alloy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaiida_launch_workflow_alalloy.py
executable file
·551 lines (474 loc) · 22.6 KB
/
aiida_launch_workflow_alalloy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
#!/usr/bin/env python
import click
import time
import copy
import sys
from pprint import pprint
import aiida
aiida.load_profile()
from aiida.engine import workfunction
from aiida.orm import Dict
def retrieve_alluncalculated_structures(structure_group_label,
workchain_group_label=None):
from aiida.orm import Group
from aiida.orm import StructureData
from aiida.orm import WorkChainNode
from aiida.orm import QueryBuilder
sqb = QueryBuilder()
sqb.append(Group, filters={'label': structure_group_label}, tag='g')
sqb.append(StructureData, project='id', tag='s', with_group='g')
sqb.append(WorkChainNode, tag='job', with_incoming='s')
filters = {}
if workchain_group_label:
filters = {'label': workchain_group_label}
sqb.append(Group, with_node='job', filters=filters)
ids_dealt_with = [_ for _, in sqb.distinct().all()] or [-1] # prevent empty list
# # Now the main query:
qb = QueryBuilder()
qb.append(Group, filters={'label': structure_group_label}, tag='g')
qb.append(StructureData, project='*', tag='s', with_group='g',
filters={'id': {'!in': ids_dealt_with}}) # filter out calculated '!in' for not in
res = [x[0] for x in qb.all()]
return res
def retrieve_numactive_calculations():
from aiida.orm import QueryBuilder
from aiida.orm import CalcJobNode
qb = QueryBuilder()
qb.append(CalcJobNode,
filters={'attributes.process_state':
{'!in': ['finished', 'excepted', 'killed']}}
)
return len(qb.all())
def retrieve_numactive_elastic():
from aiida.orm import QueryBuilder
from aiida.orm import CalcJobNode
qb = QueryBuilder()
qb.append(CalcJobNode,
filters={'attributes.process_state':
{'!in': ['finished', 'excepted', 'killed']},
'attributes._process_label':'ElasticWorkChain'}
)
return len(qb.all())
def get_numelectrons_structure_upffamily(structure, pseudos):
def parse_numelectrons_upfdata(upfdata):
upfcontent = upfdata.get_content().split('\n')
for line in upfcontent:
if "valence" in line.lower() and "z" in line.lower():
if len(line.split("=")) == 2:
num_e = int(float((line.split("=")[-1].strip().strip('"'))))
elif len(line.split()) == 3:
num_e = int(float(line.split()[0]))
else:
raise Exception("Could not parse {}".format(upfdata))
return num_e
def build_upf_numelectrons_dict(structure_ase, pseudos):
element_nume_dict = {}
for element in set(structure_ase.get_chemical_symbols()):
print(pseudos[element])
upfdata = pseudos[element]
element_nume_dict[element] = parse_numelectrons_upfdata(upfdata)
return element_nume_dict
structure_ase = structure.get_ase()
element_nume_dict = build_upf_numelectrons_dict(structure_ase, pseudos)
num_e = 0
for element in structure_ase.get_chemical_symbols():
num_e += element_nume_dict[element]
return num_e
def get_kmeshfrom_kptper_recipang(aiida_structure, kptper_recipang):
import numpy as np
ase_structure = aiida_structure.get_ase()
reci_cell = ase_structure.get_reciprocal_cell()
kmesh = [np.ceil(kptper_recipang * np.linalg.norm(reci_cell[i]))
for i in range(len(reci_cell))]
return kmesh
def get_nummachines(structure, pseudo_familyname):
# NOTE: used very adhoc guess for nodes, assuming quadratic scaling
from aiida.orm.nodes.data.upf import get_pseudos_from_structure
pseudos = get_pseudos_from_structure(structure, pseudo_familyname)
num_electrons = get_numelectrons_structure_upffamily(structure, pseudos)
a2 = 1.5*10**-6
a1 = 5.7*10**-3
a0 = 2
numnodes = a2*num_electrons**2+a1*num_electrons+a0
numnodes = max(round(numnodes/2)*2, 2) # force even # of nodes
return numnodes
def get_nk(num_machines, code):
def nk_nump_evenlydivisible(nk, nump):
nk = float(nk)
nump = float(nump)
if round(nump/nk) == nump/nk:
return True
else:
return False
nk = str(max(4, int(num_machines/2))) # adhoc guess
# check if our choice is valid
computer = code.computer
ppm = computer.get_default_mpiprocs_per_machine()
# if a local computer we set nk = 1
if ppm == 1:
nk = str(1)
return nk
nump = num_machines * ppm
if not nk_nump_evenlydivisible(nk, nump):
raise Exception("Error number processors: {} "
"is not divisible by nk: {}".format(nump, nk))
return nk
def wf_getconventionalstructure(structuredata):
'''
Standardize an AiiDA StructureData object via pymatgen Structure
using spglib
:param structuredata: original StructureData
'''
raise Exception("THIS SHOULD BE A WORKFUNCTION!")
from aiida.orm import StructureData
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
mg_structure = structuredata.get_pymatgen()
sga = SpacegroupAnalyzer(mg_structure)
standard_structure = sga.get_conventional_standard_structure()
standard_structuredata = StructureData(pymatgen_structure=standard_structure)
return standard_structuredata
def wf_getkpoints(aiida_structure, kptper_recipang):
from aiida.orm import KpointsData
def get_kmeshfrom_kptper_recipang(aiida_structure, kptper_recipang):
import numpy as np
kptper_recipang = kptper_recipang.value
ase_structure = aiida_structure.get_ase()
reci_cell = ase_structure.get_reciprocal_cell()
kmesh = [np.ceil(kptper_recipang * np.linalg.norm(reci_cell[i]))
for i in range(len(reci_cell))]
return kmesh
kpoints_mesh = get_kmeshfrom_kptper_recipang(aiida_structure, kptper_recipang)
kpoints = KpointsData()
kpoints.set_kpoints_mesh(kpoints_mesh)
return kpoints
def wf_setupparams(base_parameter, structure,
pseudo_familyname, nume2bnd_ratio,
cellpress_parameter):
from aiida.orm.nodes.data.upf import get_pseudos_from_structure
import collections
def update(d, u):
for k, v in u.items():
if isinstance(v, collections.Mapping):
d[k] = update(d.get(k, {}), v)
else:
d[k] = v
return d
pseudos = get_pseudos_from_structure(structure, pseudo_familyname.value)
nelec = get_numelectrons_structure_upffamily(structure, pseudos)
nbnd = nelec * nume2bnd_ratio.value
nbnd = max(nbnd, 20) # minimum of 20 bands to avoid certain crashes
parameter_dict = base_parameter.get_dict()
parameter_dict['SYSTEM']['nbnd'] = nbnd
cellpress_dict = cellpress_parameter.get_dict()
parameter_dict.update(cellpress_dict)
parameters = Dict(dict=parameter_dict)
return parameters
def wf_delete_vccards(parameter):
new_dict = parameter.get_dict()
if 'CELL' in new_dict:
del new_dict['CELL']
return Dict(dict=new_dict)
@click.command()
@click.option('-c', '--code_node', required=True,
help="node of code to use")
@click.option('-sg', '--structure_group_label', required=True,
help='input group of structures to submit workchains on')
@click.option('-wg', '--workchain_group_label', required=True,
help='output group of workchains')
@click.option('-sn', '--structure_node',
help='structure node to submit workchains on.'
'creates the provided structure group and adds the node')
@click.option('-bp', '--base_parameter_node', required=True,
help='node of base ParameterData to setup calculations')
@click.option('-pfn', '--pseudo_familyname', required=True,
help='name of pseudopotential family to use')
@click.option('-kra', '--kptper_recipang', required=True,
help='number of kpoints to use per reciprocal angstrom')
@click.option('-ber', '--nume2bnd_ratio', required=True,
help='band to electron ratio')
@click.option('-cm', '--calc_method', default='scf',
type=click.Choice(["scf", "relax", "vc-relax", "elastic"]),
help='The type of calculation to perform')
@click.option('-ucs', '--use_conventional_structure', is_flag=True,
help='Turns the input structure to its pymatgen conventional form prior to running')
@click.option('-pct', '--press_conv_thr', default=None,
help='Specify the pressure conv threshold in Kbar (vc-relax only)')
@click.option('-mws', '--max_wallclock_seconds', default=8*60*60,
help='maximum wallclock time per job in seconds')
@click.option('-mac', '--max_active_calculations', default=300,
help='maximum number of active calculations')
@click.option('-mae', '--max_active_elastic', default=5,
help='maximum number of active elastic workchains')
@click.option('-mns', '--max_nodes_submit', default=20,
help='maximum nodes that can be used in a submission')
@click.option('-mas', '--max_atoms_submit', default=400,
help='maximum number atoms that can be used in a submission')
@click.option('-nnd', '--number_of_nodes', default=None,
help='Force all calculations to use the specified number of nodes')
@click.option('-memgb', '--memory_gb', default=None,
help='specify the amount of memory for all jobs in GB')
@click.option('-nd', '--ndiag', default=None,
help='ndiag setting to be passed direct to QE')
@click.option('-nk', '--npools', default=None,
help='npools setting to be passed direct to QE')
@click.option('-sli', '--sleep_interval', default=10*60,
help='time to wait (sleep) between calculation submissions')
@click.option('-zmo', '--z_movement_only', is_flag=True,
help='Restricts movement to the z direction only. For relaxing stacking fault')
@click.option('-zco', '--z_cellrelax_only', is_flag=True,
help='Restricts vc-relax to the z direction only. For relaxing stacking fault')
@click.option('-stm', '--strain_magnitudes', default=None,
help='A comma seperated list of strain magnitudes. Only used for elastic workchain')
@click.option('-uas', '--use_all_strains', is_flag=True,
help='Force use of all strains. Only used for elastic workchain')
@click.option('-kwd', '--keep_workdir', is_flag=True,
help='Keep the workdir files after running')
@click.option('-dr', '--dryrun', is_flag=True,
help="Prints inputs but does not launch anything")
@click.option('-sdb', '--submit_debug', is_flag=True,
help='submit the script to debug queue. Submits one structure only'
' and does not attach the output to the workchain_group')
@click.option('-rdb', '--run_debug', is_flag=True,
help='run the script in debug mode. runs first calc then exits'
' and does not attach the output to the workchain_group')
def launch(code_node, structure_group_label, workchain_group_label,
structure_node, base_parameter_node,
pseudo_familyname, kptper_recipang,
nume2bnd_ratio, press_conv_thr,
calc_method, use_conventional_structure,
max_wallclock_seconds, max_active_calculations, max_active_elastic,
max_nodes_submit, max_atoms_submit,
number_of_nodes, memory_gb, ndiag, npools,
sleep_interval, z_movement_only, z_cellrelax_only,
strain_magnitudes, use_all_strains,
keep_workdir, dryrun, submit_debug, run_debug):
from aiida.orm import Group, load_node
from aiida.orm import Bool, Dict, Float, List, Int, Str, StructureData
from aiida.engine import submit, run
from aiida.plugins.factories import WorkflowFactory
# announce if running in debug mode
if submit_debug:
print("Running in debug mode!")
# setup parameters
code = load_node(code_node)
workchain_group = Group.objects.get_or_create(label=workchain_group_label)[0]
base_parameter = load_node(base_parameter_node)
if structure_node:
structure_group = Group.objects.get_or_create(label=structure_group_label)[0]
input_structure = load_node(structure_node)
if not isinstance(input_structure, StructureData):
raise Exception("structure node was not a StructureData")
structure_group.add_nodes([input_structure])
# Load all the structures in the structure group, not-yet run in workchain_group_label
structure_group = Group.get(label=structure_group_label)
uncalculated_structures = retrieve_alluncalculated_structures(
structure_group_label,
workchain_group_label=workchain_group_label
)
if len(uncalculated_structures) == 0:
print(("All structures in {} already have associated workchains in "
"the group {}".format(structure_group_label, workchain_group_label)))
sys.exit()
# determine number of calculations to submit
running_calculations = retrieve_numactive_calculations()
calcs_to_submit = max_active_calculations - running_calculations
if calc_method == 'elastic':
running_elastic = retrieve_numactive_elastic()
calcs_to_submit = max_active_elastic - running_elastic
# submit calculations
submit_counter=0
for structure in uncalculated_structures:
if use_conventional_structure:
structure = wf_getconventionalstructure(structure)
print("Preparing to launch {}".format(structure))
print("calcs to submit: {} (active/max){}:{}".format(
len(uncalculated_structures) -submit_counter,
calcs_to_submit,
max_active_calculations))
submit_counter += 1
if len(structure.get_ase()) > max_atoms_submit:
print("{} has more atoms than the max allowed {}".format(structure,
max_atoms_submit))
print("If you wish to overide please use --max_atoms_submit")
continue
# ensure no more than the max number of calcs are submitted
while (calcs_to_submit <= 0):
running_calculations = retrieve_numactive_calculations()
calcs_to_submit = max_active_calculations - running_calculations
if calc_method == 'elastic':
running_elastic = retrieve_numactive_elastic()
calcs_to_submit = max_active_elastic - running_elastic
if calcs_to_submit <= 0: # in case jobs finished during submission
if calc_method == 'elastic':
print(("{} elastic running,"
"max num elastic {} waiting....".format(
running_elastic, max_active_elastic)))
else:
print(("{} calcs running,"
"max num calcs {} waiting....".format(
running_calculations, max_active_calculations)))
time.sleep(sleep_interval)
# start timer to inspect job submission times
from timeit import default_timer as timer
start = timer()
# add any cell-related parameters specified from cli
if "CELL" in base_parameter.get_dict():
cellpress_dict = {"CELL":base_parameter.get_dict()["CELL"]}
else:
cellpress_dict = {}
if press_conv_thr or z_cellrelax_only:
cellpress_dict["CELL"] = {}
if press_conv_thr:
cellpress_dict["CELL"]["press_conv_thr"] = float(press_conv_thr)
if z_cellrelax_only:
cellpress_dict["CELL"]["cell_dofree"] = "z"
# determine number of bands & setup the parameters
cellpress_parameter = Dict(dict=cellpress_dict)
parameters = wf_setupparams(base_parameter,
structure,
Str(pseudo_familyname),
Float(nume2bnd_ratio),
cellpress_parameter)
# determine kpoint mesh & setup kpoints
kpoints = wf_getkpoints(structure, Int(kptper_recipang))
# determine parallelization & resources (setup the settings & options)
if number_of_nodes:
num_machines = int(number_of_nodes)
else:
num_machines = get_nummachines(structure, pseudo_familyname)
if calc_method in ['relax', 'vc-relax']:
num_machines += 4
if num_machines > int(max_nodes_submit):
print("{} nodes requested, maximum is {}".format(num_machines, max_nodes_submit))
print("If you wish to launch please choose nodes manually with --number_of_nodes")
continue
options_dict = {
'max_wallclock_seconds': max_wallclock_seconds,
'resources': {'num_machines': num_machines},
}
if memory_gb:
options_dict['max_memory_kb'] = int(int(memory_gb)*1024*1024)
if submit_debug:
num_machines = 2
options_dict['resources']['num_machines'] = num_machines
options_dict['max_wallclock_seconds'] = int(30*60)
options_dict['queue_name'] = 'debug'
workchain_options = options_dict
if npools:
nk = npools
else:
nk = get_nk(num_machines, code)
settings_dict = {
'cmdline': ['-nk', nk],
'no_bands': True
}
if ndiag:
settings_dict['cmdline'] += ['-ndiag', ndiag]
if z_movement_only:
num_atoms = len(structure.get_ase())
coordinate_fix = [[True,True,False]]*num_atoms
settings_dict['fixed_coords'] = coordinate_fix
settings = Dict(dict=settings_dict)
# setup inputs & submit workchain
clean_workdir = not keep_workdir
inputs = {
'clean_workdir': Bool(clean_workdir),
}
base_inputs = {
'pw': {
'code': code,
'parameters': wf_delete_vccards(parameters),
'metadata': {'options': workchain_options},
'settings': settings,
}
}
relax_inputs = {
'base': {k: base_inputs[k] for k in base_inputs if k != 'parameters'},
'relaxation_scheme': Str('relax'),
'final_scf' : Bool(False),
'meta_convergence' : Bool(False)
}
if calc_method == 'scf':
WorkChain = WorkflowFactory('quantumespresso.pw.base')
inputs.update(base_inputs)
inputs['pw']['structure'] = structure
inputs['kpoints'] = kpoints
inputs['pseudo_family'] = Str(pseudo_familyname)
elif calc_method in ['relax', 'vc-relax']:
WorkChain = WorkflowFactory('quantumespresso.pw.relax')
inputs.update(relax_inputs)
inputs['structure'] = structure
inputs['base']['pseudo_family'] = Str(pseudo_familyname)
inputs['base']['kpoints'] = kpoints
if calc_method == 'relax':
inputs['relaxation_scheme'] = Str('relax')
parameters = wf_delete_vccards(parameters)
inputs['base']['pw']['parameters'] = parameters
elif calc_method == 'vc-relax':
inputs['relaxation_scheme'] = Str('vc-relax')
inputs['base']['pw']['parameters'] = parameters
if calc_method == 'elastic':
if submit_debug:
print("Using debug queue with elastic workchain is not advised!")
elif calc_method == 'elastic':
WorkChain = WorkflowFactory('elastic')
inputs['structure'] = structure
# Unfortunately deepcopy on code caueses issues so we need to duplicate
# a lot of information
sub_relax_inputs = {
'base': {k: base_inputs[k] for k in base_inputs if k != 'parameters'},
'relaxation_scheme': Str('relax'),
'final_scf' : Bool(False),
'meta_convergence' : Bool(False)
}
sub_relax_inputs['base']['pseudo_family'] = Str(pseudo_familyname)
sub_relax_inputs['base']['kpoints'] = kpoints
sub_relax_inputs['relaxation_scheme'] = Str('relax')
sub_relax_parameters = wf_delete_vccards(parameters)
sub_relax_inputs['base']['pw']['parameters'] = sub_relax_parameters
sub_vcrelax_inputs = {
'base': {k: base_inputs[k] for k in base_inputs if k != 'parameters'},
'relaxation_scheme': Str('relax'),
'final_scf' : Bool(False),
'meta_convergence' : Bool(False)
}
sub_vcrelax_inputs['base']['pseudo_family'] = Str(pseudo_familyname)
sub_vcrelax_inputs['base']['kpoints'] = kpoints
sub_vcrelax_inputs['relaxation_scheme'] = Str('relax')
sub_vcrelax_inputs['relaxation_scheme'] = Str('vc-relax')
sub_vcrelax_inputs['base']['pw']['parameters'] = parameters
inputs['initial_relax'] = sub_vcrelax_inputs
inputs['elastic_relax'] = sub_relax_inputs
if strain_magnitudes:
strain_magnitudes_list = [float(x) for x in strain_magnitudes.split(',')]
inputs['strain_magnitudes'] = List(list=strain_magnitudes_list)
if use_all_strains:
inputs['symmetric_strains_only'] = Bool(False)
else:
raise Exception("Invalid calc_method: {}".format(calc_method))
def print_timing(start):
end = timer()
time_elapsed = end - start
print("timing: {}s".format(time_elapsed))
calcs_to_submit -= 1
if dryrun:
pprint("ase_structure: {}".format(structure.get_ase()))
pprint("aiida_settings: {}".format(settings.get_dict()))
#pprint "aiida_parameters: {}".format(inputs['base']['parameters'].get_dict())
pprint("aiida_options: {}".format(workchain_options))
pprint("aiida_inputs: ")
pprint(inputs)
print_timing(start)
continue
elif run_debug:
run(WorkChain, **inputs)
sys.exit()
else:
node = submit(WorkChain, **inputs)
print("WorkChain: {} submitted".format(node))
print_timing(start)
if submit_debug:
sys.exit()
workchain_group.add_nodes([node])
if __name__ == "__main__":
launch()