Skip to content

Commit

Permalink
Improved calculation of formation energy (#29)
Browse files Browse the repository at this point in the history
* Improved calculation of formation energy

* Added script for computing atom reference energies
  • Loading branch information
peastman authored Jun 23, 2022
1 parent 40795ce commit 65feb5b
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 9 deletions.
34 changes: 34 additions & 0 deletions downloader/computeAtomEnergies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import psi4
from openmm.app import element

# This script uses Psi4 to compute the per-atom reference energies that appear in the downloader script.
# It is included here in case we want to add more elements in the future.

charges = {'Br': (-1, 0), 'C': (-1, 0, 1), 'Ca': (2,), 'Cl': (-1, 0), 'F': (-1, 0), 'H': (-1, 0, 1), 'I': (-1, 0),
'K': (1,), 'Li': (1,), 'Mg': (2,), 'N': (-1, 0, 1), 'Na': (1,), 'O': (-1, 0, 1), 'P': (0, 1), 'S': (-1, 0, 1)}

psi4.set_options({'reference': 'uhf'})
energies = {}
for symbol in charges:
energies[symbol] = {}
for charge in charges[symbol]:
# Try all multiplicities up to 4 to find the one with lowest energy.

electrons = element.get_by_symbol(symbol).atomic_number+charge
multiplicity = 1 if electrons%2 == 0 else 2
best_energy = None
while multiplicity <= 4:
try:
mol = psi4.geometry(f"""
{charge} {multiplicity}
{symbol}
""")
energy = psi4.energy('wB97M-D3BJ/def2-TZVPPD', molecule=mol)
if best_energy is None or energy < best_energy:
best_energy = energy
best_multiplicity = multiplicity
except:
pass
multiplicity += 2
energies[symbol][charge] = best_energy
print(energies)
54 changes: 45 additions & 9 deletions downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,50 @@

# Reference energies computed with Psi4 1.5 wB97M-D3BJ/def2-TZVPPD.

atom_energy = {('Br', -1): -2574.2451510945853, ('Br', 0): -2574.1167240829964, ('C', -1): -37.91424135791358, ('C', 0): -37.87264507233593,
('C', 1): -37.45349214963933, ('Ca', 2): -676.9528465198214, ('Cl', -1): -460.3350243496703, ('Cl', 0): -460.1988762285739,
('F', -1): -99.91298732343974, ('F', 0): -99.78611622985483, ('H', 0): -0.4987605100487341, ('I', -1): -297.8813829975981,
('I', 0): -297.76228914445625, ('K', 1): -599.8025677513111, ('Li', 1): -7.285254714046546, ('Mg', 2): -199.2688420040449,
('N', -1): -54.602291095426494, ('N', 0): -54.62327513368922, ('N', 1): -54.08594142587869, ('Na', 1): -162.11366478783253,
('O', -1): -75.17101657391741, ('O', 0): -75.11317840410095, ('O', 1): -74.60241514396725, ('P', 0): -341.3059197024934,
('P', 1): -340.9258392474849, ('S', -1): -398.2405387031612, ('S', 0): -398.1599636677874, ('S', 1): -397.7746615977658}
atom_energy = {'Br': {-1: -2574.2451510945853, 0: -2574.1167240829964},
'C': {-1: -37.91424135791358, 0: -37.87264507233593, 1: -37.45349214963933},
'Ca': {2: -676.9528465198214},
'Cl': {-1: -460.3350243496703, 0: -460.1988762285739},
'F': {-1: -99.91298732343974, 0: -99.78611622985483},
'H': {-1: -0.5027370838721259, 0: -0.4987605100487531, 1: 0.0},
'I': {-1: -297.8813829975981, 0: -297.76228914445625},
'K': {1: -599.8025677513111},
'Li': {1: -7.285254714046546},
'Mg': {2: -199.2688420040449},
'N': {-1: -54.602291095426494, 0: -54.62327513368922, 1: -54.08594142587869},
'Na': {1: -162.11366478783253},
'O': {-1: -75.17101657391741, 0: -75.11317840410095, 1: -74.60241514396725},
'P': {0: -341.3059197024934, 1: -340.9258392474849},
'S': {-1: -398.2405387031612, 0: -398.1599636677874, 1: -397.7746615977658}}
default_charge = {}
for symbol in atom_energy:
energies = [(energy, charge) for charge, energy in atom_energy[symbol].items()]
default_charge[symbol] = sorted(energies)[0][1]

# Given a SMILES string, compute the reference energy of the atoms when fully separated.

def compute_reference_energy(smiles):
rdmol = Chem.MolFromSmiles(smiles, sanitize=False)
total_charge = sum(atom.GetFormalCharge() for atom in rdmol.GetAtoms())
symbol = [atom.GetSymbol() for atom in rdmol.GetAtoms()]
charge = [default_charge[s] for s in symbol]
delta = np.sign(total_charge-sum(charge))
while delta != 0:
best_index = -1
best_energy = None
for i in range(len(symbol)):
s = symbol[i]
e = atom_energy[s]
new_charge = charge[i]+delta
if new_charge in e:
if best_index == -1 or e[new_charge]-e[charge[i]] < best_energy:
best_index = i
best_energy = e[new_charge]-e[charge[i]]
charge[best_index] += delta
delta = np.sign(total_charge - sum(charge))
return sum(atom_energy[s][c] for s, c in zip(symbol, charge))

# Process the configuration file and download data.

with open('config.yaml') as input:
config = yaml.safe_load(input.read())
Expand Down Expand Up @@ -44,8 +81,7 @@
molecules = mols_by_name[name]
qcvars = [r.extras['qcvars'] for r in group_recs]
smiles = molecules[0].extras['canonical_isomeric_explicit_hydrogen_mapped_smiles']
rdmol = Chem.MolFromSmiles(smiles, sanitize=False)
ref_energy = sum(atom_energy[(atom.GetSymbol(), atom.GetFormalCharge())] for atom in rdmol.GetAtoms())
ref_energy = compute_reference_energy(smiles)
group = outputfile.create_group(name)
group.create_dataset('subset', data=[subset], dtype=h5py.string_dtype())
group.create_dataset('smiles', data=[smiles], dtype=h5py.string_dtype())
Expand Down

0 comments on commit 65feb5b

Please sign in to comment.