diff --git a/.gitignore b/.gitignore index 214fe7c..90525f6 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,6 @@ results/ # VSCode config .vscode/ + +# Local test input. +tests/input/deepmd diff --git a/emle/_backends/_backend.py b/emle/_backends/_backend.py new file mode 100644 index 0000000..eb2cd03 --- /dev/null +++ b/emle/_backends/_backend.py @@ -0,0 +1,72 @@ +####################################################################### +# EMLE-Engine: https://github.com/chemle/emle-engine +# +# Copyright: 2023-2024 +# +# Authors: Lester Hedges +# Kirill Zinovjev +# +# EMLE-Engine is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# EMLE-Engine is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with EMLE-Engine. If not, see . +##################################################################### + +"""In-vacuo backend base class.""" + +__all__ = ["Backend"] + + +class Backend: + """ + Base class for in-vacuo backends. + + This class should not be instantiated directly, but should be subclassed + by specific backends. + """ + + def __init__(self): + """ + Constructor. + """ + + # Don't allow user to create an instance of the base class. + if type(self) is Backend: + raise NotImplementedError( + "'Backend' is an abstract class and should not be instantiated directly" + ) + + def calculate(self, atomic_numbers, xyz, forces=True): + """ + Compute the energy and forces. + + Parameters + ---------- + + atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,) + The atomic numbers of the atoms in the QM region. + + xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3) + The coordinates of the atoms in the QM region in Angstrom. + + forces: bool + Whether to calculate and return forces. + + Returns + ------- + + energy: float + The in-vacuo energy in Eh. + + forces: numpy.ndarray + The in-vacuo forces in Eh/Bohr. + """ + raise NotImplementedError diff --git a/emle/_backends/_deepmd.py b/emle/_backends/_deepmd.py index 76f536a..635e8d5 100644 --- a/emle/_backends/_deepmd.py +++ b/emle/_backends/_deepmd.py @@ -20,104 +20,160 @@ # along with EMLE-Engine. If not, see . ##################################################################### -"""DeepMD in-vacuo backend implementation.""" +"""DeePMD in-vacuo backend implementation.""" -__all__ = ["calculate_deepmd"] +__all__ = ["DeePMD"] +import ase as _ase import numpy as _np +import os as _os -from .._constants import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM +from .._units import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM +from ._backend import Backend as _Backend -def calculate_deepmd(calculator, xyz, elements, gradient=True): - """ - Internal function to compute in vacuo energies and gradients using - DeepMD. - - Parameters - ---------- - - calculator: :class:`emle.calculator.EMLECalculator` - The EMLECalculator instance. - - xyz: numpy.array - The coordinates of the QM region in Angstrom. - - elements: [str] - The list of elements. - gradient: bool - Whether to return the gradient. - - Returns - ------- - - energy: float - The in vacuo ML energy in Eh. - - gradients: numpy.array - The in vacuo ML gradient in Eh/Bohr. +class DeePMD(_Backend): + """ + DeepMD in-vacuo backend implementation. """ - if not isinstance(xyz, _np.ndarray): - raise TypeError("'xyz' must be of type 'numpy.ndarray'") - if xyz.dtype != _np.float64: - raise TypeError("'xyz' must have dtype 'float64'.") - - if not isinstance(elements, (list, tuple)): - raise TypeError("'elements' must be of type 'list'") - if not all(isinstance(element, str) for element in elements): - raise TypeError("'elements' must be a 'list' of 'str' types") - - # Reshape to a frames x (natoms x 3) array. - xyz = xyz.reshape([1, -1]) - - e_list = [] - f_list = [] - - # Run a calculation for each model and take the average. - for dp in calculator._deepmd_potential: - # Work out the mapping between the elements and the type indices - # used by the model. - try: - mapping = { - element: index for index, element in enumerate(dp.get_type_map()) - } - except: - raise ValueError(f"DeePMD model doesnt' support element '{element}'") - - # Now determine the atom types based on the mapping. - atom_types = [mapping[element] for element in elements] - - e, f, _ = dp.eval(xyz, cells=None, atom_types=atom_types) - e_list.append(e) - f_list.append(f) - - # Write the maximum DeePMD force deviation to file. - if calculator._deepmd_deviation: - from deepmd.infer.model_devi import calc_model_devi_f - - max_f_std = calc_model_devi_f(_np.array(f_list))[0][0] - if ( - calculator._deepmd_deviation_threshold - and max_f_std > calculator._deepmd_deviation_threshold - ): - msg = "Force deviation threshold reached!" - _logger.error(msg) - raise ValueError(msg) - with open(calculator._deepmd_deviation, "a") as f: - f.write(f"{max_f_std:12.5f}\n") - # To be written to qm_xyz_file. - calculator._max_f_std = max_f_std - - # Take averages and return. (Gradient equals minus the force.) - e_mean = _np.mean(_np.array(e_list), axis=0) - grad_mean = -_np.mean(_np.array(f_list), axis=0) - return ( - ( - e_mean[0][0] * _EV_TO_HARTREE, - grad_mean[0] * _EV_TO_HARTREE * _BOHR_TO_ANGSTROM, + def __init__(self, model, deviation=None, deviation_threshold=None): + # We support a str, or list/tuple of strings. + if not isinstance(model, (str, list, tuple)): + raise TypeError("'model' must be of type 'str', or a list of 'str' types") + else: + # Make sure all values are strings. + if isinstance(model, (list, tuple)): + for m in model: + if not isinstance(m, str): + raise TypeError( + "'model' must be of type 'str', or a list of 'str' types" + ) + # Convert to a list. + else: + model = [model] + + # Make sure all of the model files exist. + for m in model: + if not _os.path.isfile(m): + raise IOError(f"Unable to locate DeePMD model file: '{m}'") + + # Validate the deviation file. + if deviation is not None: + if not isinstance(deviation, str): + raise TypeError("'deviation' must be of type 'str'") + + self._deviation = deviation + + if deviation_threshold is not None: + try: + deviation_threshold = float(deviation_threshold) + except: + raise TypeError("'deviation_threshold' must be of type 'float'") + + self._deviation_threshold = deviation_threshold + else: + self._deviation = None + self._deviation_threshold = None + + # Store the list of model files, removing any duplicates. + self._model = list(set(model)) + if len(self._model) == 1 and deviation: + raise IOError( + "More that one DeePMD model needed to calculate the deviation!" + ) + + # Initialise DeePMD backend attributes. + try: + from deepmd.infer import DeepPot as _DeepPot + + self._potential = [_DeepPot(m) for m in self._model] + self._z_map = [] + for dp in self._potential: + self._z_map.append( + { + element: index + for index, element in enumerate(dp.get_type_map()) + } + ) + except: + raise RuntimeError("Unable to create the DeePMD potentials!") + + self._max_f_std = None + + def calculate(self, atomic_numbers, xyz, forces=True): + """ + Compute the energy and forces. + + Parameters + ---------- + + atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,) + The atomic numbers of the atoms in the QM region. + + xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3) + The coordinates of the atoms in the QM region in Angstrom. + + forces: bool + Whether to calculate and return forces. + + Returns + ------- + + energy: float + The in-vacuo energy in Eh. + + forces: numpy.ndarray + The in-vacuo forces in Eh/Bohr. + """ + + if not isinstance(atomic_numbers, _np.ndarray): + raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") + if not isinstance(xyz, _np.ndarray): + raise TypeError("'xyz' must be of type 'numpy.ndarray'") + + if len(atomic_numbers) != len(xyz): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not " + f"match length of 'xyz' ({len(xyz)})" + ) + + e_list = [] + f_list = [] + + # Run a calculation for each model and take the average. + for i, dp in enumerate(self._potential): + # Assume all frames have the same number of atoms. + atom_types = [ + self._z_map[i][_ase.Atom(z).symbol] for z in atomic_numbers[0] + ] + e, f, _ = dp.eval(xyz, cells=None, atom_types=atom_types) + e_list.append(e) + f_list.append(f) + + # Write the maximum DeePMD force deviation to file. + if self._deviation: + from deepmd.infer.model_devi import calc_model_devi_f + + max_f_std = calc_model_devi_f(_np.array(f_list))[0][0] + if self._deviation_threshold and max_f_std > self._deviation_threshold: + msg = "Force deviation threshold reached!" + self._max_f_std = max_f_std + raise ValueError(msg) + with open(self._deviation, "a") as f: + f.write(f"{max_f_std:12.5f}\n") + # To be written to qm_xyz_file. + self._max_f_std = max_f_std + + # Take averages over models and return. + e_mean = _np.mean(_np.array(e_list), axis=0) + f_mean = -_np.mean(_np.array(f_list), axis=0) + + # Covert units. + e, f = ( + e_mean.flatten() * _EV_TO_HARTREE, + f_mean * _EV_TO_HARTREE * _BOHR_TO_ANGSTROM, ) - if gradient - else e_mean[0][0] * _EV_TO_HARTREE - ) + + return e, f if forces else e diff --git a/emle/_backends/_orca.py b/emle/_backends/_orca.py index 8f9d8c7..e355f19 100644 --- a/emle/_backends/_orca.py +++ b/emle/_backends/_orca.py @@ -22,8 +22,9 @@ """ORCA in-vacuo backend implementation.""" -__all__ = ["calculate_orca"] +__all__ = ["ORCA"] +import ase as _ase import numpy as _np import os as _os import shlex as _shlex @@ -31,144 +32,207 @@ import subprocess as _subprocess import tempfile as _tempfile +from ._backend import Backend as _Backend -def calculate_orca( - calculator, - orca_input=None, - xyz_file_qm=None, - elements=None, - xyz_qm=None, - gradient=True, -): + +class ORCA(_Backend): + """ + ORCA in-vacuo backend implementation. """ - Internal function to compute in vacuo energies and gradients using - ORCA. - Parameters - ---------- + def __init__(self, exe, template=None): + """ + Constructor. - calculator: :class:`emle.calculator.EMLECalculator` - The EMLECalculator instance. + Parameters + ---------- - orca_input: str - The path to the ORCA input file. (Used with the sander interface.) + exe: str + The path to the ORCA executable. - xyz_file_qm: str - The path to the xyz coordinate file for the QM region. (Used with the - sander interface.) + template: str + The path to the ORCA template file. + """ - elements: [str] - The list of elements. (Used with the Sire interface.) + if not isinstance(exe, str): + raise TypeError("'exe' must be of type 'str'") - xyz_qm: numpy.array - The coordinates of the QM region in Angstrom. (Used with the Sire - interface.) + if not _os.path.isfile(exe): + raise IOError(f"Unable to locate ORCA executable: '{exe}'") - gradient: bool - Whether to return the gradient. + if template is not None: + if not isinstance(template, str): + raise TypeError("'template' must be of type 'str'") - Returns - ------- + if not _os.path.isfile(template): + raise IOError(f"Unable to locate ORCA template file: '{template}'") - energy: float - The in vacuo QM energy in Eh. + # Read the ORCA template file to check for the presence of the + # '*xyzfile' directive. Also store the charge and spin multiplicity. + lines = [] + with open(template, "r") as f: + for line in f: + if "*xyzfile" in line: + is_xyzfile = True + else: + lines.append(line) - gradients: numpy.array - The in vacuo QM gradient in Eh/Bohr. - """ + if not is_xyzfile: + raise ValueError( + "ORCA template file must contain '*xyzfile' directive!" + ) - if orca_input is not None and not isinstance(orca_input, str): - raise TypeError("'orca_input' must be of type 'str'.") - if orca_input is not None and not _os.path.isfile(orca_input): - raise IOError(f"Unable to locate the ORCA input file: {orca_input}") - - if xyz_file_qm is not None and not isinstance(xyz_file_qm, str): - raise TypeError("'xyz_file_qm' must be of type 'str'.") - if xyz_file_qm is not None and not _os.path.isfile(xyz_file_qm): - raise IOError(f"Unable to locate the ORCA QM xyz file: {xyz_file_qm}") - - if elements is not None and not isinstance(elements, (list, tuple)): - raise TypeError("'elements' must be of type 'list' or 'tuple'.") - if elements is not None and not all( - isinstance(element, str) for element in elements - ): - raise TypeError("'elements' must be a 'list' of 'str' types.") - - if xyz_qm is not None and not isinstance(xyz_qm, _np.ndarray): - raise TypeError("'xyz_qm' must be of type 'numpy.ndarray'") - if xyz_qm is not None and xyz_qm.dtype != _np.float64: - raise TypeError("'xyz_qm' must have dtype 'float64'.") - - # ORCA input files take precedence. - is_orca_input = True - if orca_input is None or xyz_file_qm is None: - if elements is None: - raise ValueError("No elements specified!") - if xyz_qm is None: - raise ValueError("No QM coordinates specified!") - - is_orca_input = False - - if calculator._orca_template is None: - raise ValueError( - "No ORCA template file specified. Use the 'orca_template' keyword." - ) + # Try to extract the charge and spin multiplicity from the line. + try: + _, charge, mult, _ = line.split() + except: + raise ValueError( + "Unable to parse charge and spin multiplicity from ORCA template file!" + ) - fd_orca_input, orca_input = _tempfile.mkstemp( - prefix="orc_job_", suffix=".inp", text=True - ) - fd_xyz_file_qm, xyz_file_qm = _tempfile.mkstemp( - prefix="inpfile_", suffix=".xyz", text=True - ) - - # Parse the ORCA template file. Here we exclude the *xyzfile line, - # which will be replaced later using the correct path to the QM - # coordinate file that is written. - is_xyzfile = False - lines = [] - with open(calculator._orca_template, "r") as f: - for line in f: - if "*xyzfile" in line: - is_xyzfile = True - else: - lines.append(line) - - if not is_xyzfile: - raise ValueError("ORCA template file doesn't contain *xyzfile line!") - - # Try to extract the charge and spin multiplicity from the line. - try: - _, charge, mult, _ = line.split() - except: + self._template_lines = lines + self._charge = int(charge) + self._mult = int(mult) + + self._exe = exe + self._template = template + + def calculate(self, atomic_numbers, xyz, forces=True): + """ + Compute the energy and forces. + + Parameters + ---------- + + atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,) + The atomic numbers of the atoms in the QM region. + + xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3) + The coordinates of the atoms in the QM region in Angstrom. + + forces: bool + Whether to calculate and return forces. + + Returns + ------- + + energy: float + The in-vacuo energy in Eh. + + forces: numpy.ndarray + The in-vacuo forces in Eh/Bohr. + """ + + if not isinstance(atomic_numbers, _np.ndarray): + raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") + if atomic_numbers.dtype != _np.int64: + raise TypeError("'atomic_numbers' must have dtype 'int32'.") + + if not isinstance(xyz, _np.ndarray): + raise TypeError("'xyz' must be of type 'numpy.ndarray'") + if xyz.dtype != _np.float64: + raise TypeError("'xyz' must have dtype 'float64'.") + + if len(atomic_numbers) != len(xyz): raise ValueError( - "Unable to parse charge and spin multiplicity from ORCA template file!" + f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not " + f"match length of 'xyz' ({len(xyz)})" ) - # Write the ORCA input file. - with open(orca_input, "w") as f: - for line in lines: - f.write(line) - - # Add the QM coordinate file path. - with open(orca_input, "a") as f: - f.write(f"*xyzfile {charge} {mult} {_os.path.basename(xyz_file_qm)}\n") - - # Write the xyz input file. - with open(xyz_file_qm, "w") as f: - f.write(f"{len(elements):5d}\n\n") - for elem, xyz in zip(elements, xyz_qm): - f.write(f"{elem:<3s} {xyz[0]:20.16f} {xyz[1]:20.16f} {xyz[2]:20.16f}\n") - - # Create a temporary working directory. - with _tempfile.TemporaryDirectory() as tmp: - # Work out the name of the input files. - inp_name = f"{tmp}/{_os.path.basename(orca_input)}" - xyz_name = f"{tmp}/{_os.path.basename(xyz_file_qm)}" - - # Copy the files to the working directory. - if is_orca_input: + # Lists to store results. + results_energy = [] + results_forces = [] + + # Loop over batches. + for i, (atomic_numbers_i, xyz_i) in enumerate(zip(atomic_numbers, xyz)): + if len(atomic_numbers_i) != len(xyz_i): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers_i)}) does not " + f"match length of 'xyz' ({len(xyz_i)}) for index {i}" + ) + + # Create a temporary working directory. + with _tempfile.TemporaryDirectory() as tmp: + # Workout the name of the input and xyz files. + inp_name = f"{tmp}/input.inp" + xyz_name = f"{tmp}/input.xyz" + + # Write the input file. + with open(inp_name, "w") as f: + for line in self._template_lines: + f.write(line) + f.write(f"*xyzfile {self._charge} {self._mult} {xyz_name}\n") + + # Work out the elements. + elements = [_ase.Atom(id).symbol for id in atomic_numbers_i] + + # Write the xyz file. + with open(xyz_name, "w") as f: + f.write(f"{len(elements):5d}\n\n") + for elem, pos in zip(elements, xyz_i): + f.write( + f"{elem:<3s} {pos[0]:20.16f} {pos[1]:20.16f} {pos[2]:20.16f}\n" + ) + + # Run the ORCA calculation. + e, f = self.calculate_sander(xyz_name, inp_name, forces=forces) + + # Store the results. + results_energy.append(e) + results_forces.append(f) + + # Convert the results to NumPy arrays. + results_energy = _np.array(results_energy) + results_forces = _np.array(results_forces) + + return results_energy, results_forces if forces else results_energy + + def calculate_sander(self, xyz_file, orca_input, forces=True): + """ + Internal function to compute in vacuo energies and forces using + ORCA via input written by sander. + + Parameters + ---------- + + xyz_file: str + The path to the xyz coordinate file for the QM region. + + orca_input: str + The path to the ORCA input file. + + forces: bool + Whether to compute and return the forces. + + Returns + ------- + + energy: float + The in vacuo QM energy in Eh. + + forces: numpy.array + The in vacuo QM forces in Eh/Bohr. + """ + + if not isinstance(xyz_file, str): + raise TypeError("'xyz_file' must be of type 'str'") + if not _os.path.isfile(xyz_file): + raise IOError(f"Unable to locate the ORCA QM xyz file: {xyz_file}") + + if not isinstance(orca_input, str): + raise TypeError("'orca_input' must be of type 'str'") + if not _os.path.isfile(orca_input): + raise IOError(f"Unable to locate the ORCA input file: {orca_input}") + + # Create a temporary working directory. + with _tempfile.TemporaryDirectory() as tmp: + # Work out the name of the input files. + inp_name = f"{tmp}/{_os.path.basename(orca_input)}" + xyz_name = f"{tmp}/{_os.path.basename(xyz_file)}" + + # Copy the files to the working directory. _shutil.copyfile(orca_input, inp_name) - _shutil.copyfile(xyz_file_qm, xyz_name) + _shutil.copyfile(xyz_file, xyz_name) # Edit the input file to remove the point charges. lines = [] @@ -179,83 +243,82 @@ def calculate_orca( with open(inp_name, "w") as f: for line in lines: f.write(line) - else: - _shutil.move(orca_input, inp_name) - _shutil.move(xyz_file_qm, xyz_name) - - # Create the ORCA command. - command = f"{calculator._orca_path} {inp_name}" - - # Run the command as a sub-process. - proc = _subprocess.run( - _shlex.split(command), - cwd=tmp, - shell=False, - stdout=_subprocess.PIPE, - stderr=_subprocess.PIPE, - ) - - if proc.returncode != 0: - raise RuntimeError("ORCA job failed!") - - # Parse the output file for the energies and gradients. - engrad = f"{tmp}/{_os.path.splitext(_os.path.basename(orca_input))[0]}.engrad" - - if not _os.path.isfile(engrad): - raise IOError(f"Unable to locate ORCA engrad file: {engrad}") - - with open(engrad, "r") as f: - is_nrg = False - is_grad = False - gradient = [] - for line in f: - if line.startswith("# The current total"): - is_nrg = True - count = 0 - elif line.startswith("# The current gradient"): - is_grad = True - count = 0 - else: - # This is an energy record. These start two lines after - # the header, following a comment. So we need to count - # one line forward. - if is_nrg and count == 1 and not line.startswith("#"): - try: - energy = float(line.strip()) - except: - IOError("Unable to parse ORCA energy record!") - # This is a gradient record. These start two lines after - # the header, following a comment. So we need to count - # one line forward. - elif is_grad and count == 1 and not line.startswith("#"): - try: - gradient.append(float(line.strip())) - except: - IOError("Unable to parse ORCA gradient record!") + + # Create the ORCA command. + command = f"{self._exe} {inp_name}" + + # Run the command as a sub-process. + proc = _subprocess.run( + _shlex.split(command), + cwd=tmp, + shell=False, + stdout=_subprocess.PIPE, + stderr=_subprocess.PIPE, + ) + + if proc.returncode != 0: + raise RuntimeError("ORCA job failed!") + + # Parse the output file for the energies and gradients. + engrad = ( + f"{tmp}/{_os.path.splitext(_os.path.basename(orca_input))[0]}.engrad" + ) + + if not _os.path.isfile(engrad): + raise IOError(f"Unable to locate ORCA engrad file: {engrad}") + + with open(engrad, "r") as f: + is_nrg = False + is_grad = False + gradient = [] + for line in f: + if line.startswith("# The current total"): + is_nrg = True + count = 0 + elif line.startswith("# The current gradient"): + is_grad = True + count = 0 else: - if is_nrg: - # We've hit the end of the records, abort. - if count == 1: - is_nrg = False - # Increment the line count since the header. - else: - count += 1 - if is_grad: - # We've hit the end of the records, abort. - if count == 1: - is_grad = False - # Increment the line count since the header. - else: - count += 1 - - if not gradient: - return energy - - # Convert the gradient to a NumPy array and reshape. (Read as a single - # column, convert to x, y, z components for each atom.) - try: - gradient = _np.array(gradient).reshape(int(len(gradient) / 3), 3) - except: - raise IOError("Number of ORCA gradient records isn't a multiple of 3!") - - return energy, gradient + # This is an energy record. These start two lines after + # the header, following a comment. So we need to count + # one line forward. + if is_nrg and count == 1 and not line.startswith("#"): + try: + energy = float(line.strip()) + except: + IOError("Unable to parse ORCA energy record!") + # This is a gradient record. These start two lines after + # the header, following a comment. So we need to count + # one line forward. + elif is_grad and count == 1 and not line.startswith("#"): + try: + gradient.append(float(line.strip())) + except: + IOError("Unable to parse ORCA gradient record!") + else: + if is_nrg: + # We've hit the end of the records, abort. + if count == 1: + is_nrg = False + # Increment the line count since the header. + else: + count += 1 + if is_grad: + # We've hit the end of the records, abort. + if count == 1: + is_grad = False + # Increment the line count since the header. + else: + count += 1 + + if not forces: + return energy + + # Convert the gradient to a NumPy array and reshape. (Read as a single + # column, convert to x, y, z components for each atom.) + try: + gradient = _np.array(gradient).reshape(int(len(gradient) / 3), 3) + except: + raise IOError("Number of ORCA gradient records isn't a multiple of 3!") + + return energy, -gradient diff --git a/emle/_backends/_rascal.py b/emle/_backends/_rascal.py index 9f530aa..c988a1b 100644 --- a/emle/_backends/_rascal.py +++ b/emle/_backends/_rascal.py @@ -22,60 +22,142 @@ """Rascal in-vacuo backend implementation.""" -__all__ = ["calculate_rascal"] +__all__ = ["Rascal"] +import ase as _ase +import os as _os +import pickle as _pickle import numpy as _np -from .._constants import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM +from .._units import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM +from ._backend import Backend as _Backend -def calculate_rascal(calculator, atoms, gradient=True): - """ - Internal function to compute delta-learning corrections using Rascal. - - Parameters - ---------- - - calculator: :class:`emle.calculator.EMLECalculator` - The EMLECalculator instance. - - atoms: ase.Atoms - The atoms in the QM region. - - gradient: bool - Whether to return the gradient - - Returns - ------- - - energy: float - The in vacuo MM energy in Eh. - gradients: numpy.array - The in vacuo MM gradient in Eh/Bohr. +class Rascal(_Backend): + """ + Rascal in-vacuo backend implementation. """ - if not isinstance(atoms, _ase.Atoms): - raise TypeError("'atoms' must be of type 'ase.Atoms'") - - # Rascal requires periodic box information so we translate the atoms so that - # the lowest (x, y, z) position is zero, then set the cell to the maximum - # position. - atoms.positions -= _np.min(atoms.positions, axis=0) - atoms.cell = _np.max(atoms.positions, axis=0) - - # Run the calculation. - calculator._rascal_calc.calculate(atoms) - - # Get the energy. - energy = calculator._rascal_calc.results["energy"][0] * _EV_TO_HARTREE - - if not gradient: - return energy - - # Get the gradient. - gradient = ( - -calculator._rascal_calc.results["forces"] * _EV_TO_HARTREE * _BOHR_TO_ANGSTROM - ) - - return energy, gradient + def __init__(self, model): + """ + Constructor. + + Parameters + ---------- + + model: str + The path to the Rascal model file. + """ + + if not isinstance(model, str): + raise TypeError("'model' must be of type 'str'") + + # Convert to an absolute path. + abs_model = _os.path.abspath(model) + + # Make sure the model file exists. + if not _os.path.isfile(abs_model): + raise IOError(f"Unable to locate Rascal model file: '{model}'") + + # Load the model. + try: + self._model = _pickle.load(open(abs_model, "rb")) + except: + raise IOError(f"Unable to load Rascal model file: '{model}'") + + # Try to get the SOAP parameters from the model. + try: + soap = self._model.get_representation_calculator() + except: + raise ValueError("Unable to extract SOAP parameters from Rascal model!") + + # Create the Rascal calculator. + try: + from rascal.models.asemd import ASEMLCalculator as _ASEMLCalculator + + self._calculator = _ASEMLCalculator(self._model, soap) + except: + raise RuntimeError("Unable to create Rascal calculator!") + + def calculate(atomic_numbers, xyz, forces=True): + """ + Compute the energy and forces. + + Parameters + ---------- + + atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,) + The atomic numbers of the atoms in the QM region. + + xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3) + The coordinates of the atoms in the QM region in Angstrom. + + forces: bool + Whether to calculate and return forces. + + Returns + ------- + + energy: float + The in-vacuo energy in Eh. + + forces: numpy.ndarray + The in-vacuo forces in Eh/Bohr. + """ + + if not isinstance(atomic_numbers, _np.ndarray): + raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") + if not isinstance(xyz, _np.ndarray): + raise TypeError("'xyz' must be of type 'numpy.ndarray'") + + if len(atomic_numbers) != len(xyz): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not " + f"match length of 'xyz' ({len(xyz)})" + ) + + # Lists to store results. + results_energy = [] + results_forces = [] + + # Loop over batches. + for i, (atomic_numbers_i, xyz_i) in enumerate(zip(atomic_numbers, xyz)): + if len(atomic_numbers_i) != len(xyz_i): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers_i)}) does not " + f"match length of 'xyz' ({len(xyz_i)}) for index {i}" + ) + + # Create an ASE atoms object. + atoms = _ase.Atoms( + numbers=atomic_numbers_i, + positions=xyz_i, + ) + + # Rascal requires periodic box information so we translate the atoms so that + # the lowest (x, y, z) position is zero, then set the cell to the maximum + # position. + atoms.positions -= _np.min(atoms.positions, axis=0) + atoms.cell = _np.max(atoms.positions, axis=0) + + # Run the calculation. + self._calculator.calculate(atoms) + + # Get the energy. + results_energy.append( + self._calculator.results["energy"][0] * _EV_TO_HARTREE + ) + + if forces: + results_forces.append( + self._calculator.results["forces"] + * _EV_TO_HARTREE + * _BOHR_TO_ANGSTROM + ) + + # Convert to NumPy arrays. + results_energy = _np.array(results_energy) + results_forces = _np.array(results_forces) + + return results_energy, results_forces if forces else results_energy diff --git a/emle/_backends/_sander.py b/emle/_backends/_sander.py index 9360e82..5631d9b 100644 --- a/emle/_backends/_sander.py +++ b/emle/_backends/_sander.py @@ -22,15 +22,18 @@ """Sander in-vacuo backend implementation.""" -__all__ = ["calculate_sander"] +__all__ = ["Sander"] import ase as _ase from ase.calculators.calculator import Calculator as _Calculator from ase.calculators.calculator import all_changes as _all_changes +import os as _os import numpy as _np import sander as _sander -from .._constants import _KCAL_MOL_TO_HARTREE, _BOHR_TO_ANGSTROM +from .._units import _KCAL_MOL_TO_HARTREE, _BOHR_TO_ANGSTROM + +from ._backend import Backend as _Backend class SanderCalculator(_Calculator): @@ -111,58 +114,103 @@ def _get_box(atoms): return atoms.get_cell().cellpar() -def calculate_sander(atoms, parm7, is_gas=True, gradient=True): +class Sander(_Backend): + """ + Class for in-vacuo calculations using the AMBER Sander molecular + dynamics engine. """ - Internal function to compute in vacuo energies and gradients using - pysander. - Parameters - ---------- + def __init__(self, parm7, is_gas=True): + """ + Constructor. + """ - atoms: ase.Atoms - The atoms in the QM region. + if not isinstance(parm7, str): + raise TypeError("'parm7' must be of type 'str'") - parm7: str - The path to the AMBER topology file. + if not _os.path.isfile(parm7): + raise FileNotFoundError(f"Could not find AMBER topology file: '{parm7}'") - bool: is_gas - Whether this is a gas phase calculation. + if not isinstance(is_gas, bool): + raise TypeError("'is_gas' must be of type 'bool'") - gradient: bool - Whether to return the gradient. + self._parm7 = parm7 + self._is_gas = is_gas - Returns - ------- + def calculate(self, atomic_numbers, xyz, forces=True): + """ + Compute the energy and forces. - energy: float - The in vacuo MM energy in Eh. + Parameters + ---------- - gradients: numpy.array - The in vacuo MM gradient in Eh/Bohr. - """ + atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,) + The atomic numbers of the atoms in the QM region. + + xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3) + The coordinates of the atoms in the QM region in Angstrom. + + forces: bool + Whether to calculate and return forces. + + Returns + ------- + + energy: float + The in-vacuo energy in Eh. + + forces: numpy.ndarray + The in-vacuo forces in Eh/Bohr. + """ + + if not isinstance(atomic_numbers, _np.ndarray): + raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") + + if not isinstance(xyz, _np.ndarray): + raise TypeError("'xyz' must be of type 'numpy.ndarray'") + + if len(atomic_numbers) != len(xyz): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not " + f"match length of 'xyz' ({len(xyz)})" + ) + + if not isinstance(forces, bool): + raise TypeError("'forces' must be of type 'bool'") - if not isinstance(atoms, _ase.Atoms): - raise TypeError("'atoms' must be of type 'ase.Atoms'") + # Lists to store results. + results_energy = [] + results_forces = [] - if not isinstance(parm7, str): - raise TypeError("'parm7' must be of type 'str'") + # Loop over batches. + for i, (atomic_numbers_i, xyz_i) in enumerate(zip(atomic_numbers, xyz)): + if len(atomic_numbers_i) != len(xyz_i): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers_i)}) does not " + f"match length of 'xyz' ({len(xyz_i)}) for index {i}" + ) - if not isinstance(is_gas, bool): - raise TypeError("'is_gas' must be of type 'bool'") + # Create an ASE atoms object. + atoms = _ase.Atoms( + numbers=atomic_numbers_i, + positions=xyz_i, + ) - # Instantiate a SanderCalculator. - sander_calculator = SanderCalculator(atoms, parm7, is_gas) + # Instantiate a SanderCalculator. + sander_calculator = SanderCalculator(atoms, self._parm7, self._is_gas) - # Run the calculation. - sander_calculator.calculate(atoms) + # Run the calculation. + sander_calculator.calculate(atoms) - # Get the energy. - energy = sander_calculator.results["energy"] + # Get the energy. + results_energy.append(sander_calculator.results["energy"]) - if not gradient: - return energy + # Get the force. + if forces: + results_forces.append(sander_calculator.results["forces"]) - # Get the gradient. - gradient = -sander_calculator.results["forces"] + # Convert the results to NumPy arrays. + results_energy = _np.array(results_energy) + results_forces = _np.array(results_forces) - return energy, gradient + return results_energy, results_forces if forces else results_energy diff --git a/emle/_backends/_sqm.py b/emle/_backends/_sqm.py index 6999054..c05f97f 100644 --- a/emle/_backends/_sqm.py +++ b/emle/_backends/_sqm.py @@ -22,7 +22,7 @@ """SQM in-vacuo backend implementation.""" -__all__ = ["calculate_sqm"] +__all__ = ["SQM"] import numpy as _np import os as _os @@ -30,149 +30,222 @@ import subprocess as _subprocess import tempfile as _tempfile -from .._constants import _KCAL_MOL_TO_HARTREE, _BOHR_TO_ANGSTROM +from .._units import _KCAL_MOL_TO_HARTREE, _BOHR_TO_ANGSTROM +from ._backend import Backend as _Backend -def calculate_sqm(calculator, xyz, atomic_numbers, qm_charge, gradient=True): + +class SQM(_Backend): + """ + SQM in-vacuo backend implementation. """ - Internal function to compute in vacuo energies and gradients using - SQM. - Parameters - ---------- + def __init__(self, parm7, theory="DFTB3", qm_charge=0): + """ + Constructor. - calculator: :class:`emle.calculator.EMLECalculator` - The EMLECalculator instance. + Parameters + ---------- - xyz: numpy.array - The coordinates of the QM region in Angstrom. + parm7: str + The path to the AMBER topology file for the QM region. - atomic_numbers: numpy.array - The atomic numbers of the atoms in the QM region. + theory: str + The SQM theory to use. - qm_charge: int - The charge on the QM region. + qm_charge: int + The charge on the QM region. + """ - gradient: bool - Whether to return the gradient. + # Make sure a topology file has been set. + if parm7 is None: + raise ValueError("'parm7' must be specified when using the SQM backend") - Returns - ------- + try: + from sander import AmberParm as _AmberParm - energy: float - The in vacuo QM energy in Eh. + amber_parm = _AmberParm(parm7) + except: + raise IOError(f"Unable to load AMBER topology file: '{parm7}'") + self._parm7 = parm7 - gradients: numpy.array - The in vacuo QM gradient in Eh/Bohr. - """ + if not isinstance(theory, str): + raise TypeError("'theory' must be of type 'str'.") + + # Store the atom names for the QM region. + self._atom_names = [atom.name for atom in amber_parm.atoms] + + # Strip whitespace. + self._theory = theory.replace(" ", "") + + # Validate the QM charge. + if not isinstance(qm_charge, int): + raise TypeError("'qm_charge' must be of type 'int'.") + self._qm_charge = qm_charge + + def calculate(self, atomic_numbers, xyz, qm_charge=None, forces=True): + """ + Compute the energy and forces. + + Parameters + ---------- + + atomic_numbers: numpy.ndarray + The atomic numbers of the atoms in the QM region. + + xyz: numpy.ndarray + The coordinates of the atoms in the QM region in Angstrom. + + forces: bool + Whether to calculate and return forces. + + Returns + ------- + + energy: float + The in-vacuo energy in Eh. + + forces: numpy.ndarray + The in-vacuo gradient in Eh/Bohr. + """ + + if not isinstance(xyz, _np.ndarray): + raise TypeError("'xyz' must be of type 'numpy.ndarray'") + if xyz.dtype != _np.float64: + raise TypeError("'xyz' must have dtype 'float64'.") + + if not isinstance(atomic_numbers, _np.ndarray): + raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") + + if qm_charge is None: + qm_charge = self._qm_charge + + else: + if not isinstance(qm_charge, int): + raise TypeError("'qm_charge' must be of type 'int'.") + + if len(atomic_numbers) != len(xyz): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not " + f"match length of 'xyz' ({len(xyz)})" + ) + + # Lists to store results. + results_energy = [] + results_forces = [] - if not isinstance(xyz, _np.ndarray): - raise TypeError("'xyz' must be of type 'numpy.ndarray'") - if xyz.dtype != _np.float64: - raise TypeError("'xyz' must have dtype 'float64'.") - - if not isinstance(atomic_numbers, _np.ndarray): - raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") - - if not isinstance(qm_charge, int): - raise TypeError("'qm_charge' must be of type 'int'.") - - # Store the number of QM atoms. - num_qm = len(atomic_numbers) - - # Create a temporary working directory. - with _tempfile.TemporaryDirectory() as tmp: - # Work out the name of the input files. - inp_name = f"{tmp}/sqm.in" - out_name = f"{tmp}/sqm.out" - - # Write the input file. - with open(inp_name, "w") as f: - # Write the header. - f.write("Run semi-empirical minimization\n") - f.write(" &qmmm\n") - f.write(f" qm_theory='{calculator._sqm_theory}',\n") - f.write(f" qmcharge={qm_charge},\n") - f.write(" maxcyc=0,\n") - f.write(" verbosity=4,\n") - f.write(f" /\n") - - # Write the QM region coordinates. - for num, name, xyz_qm in zip( - atomic_numbers, calculator._sqm_atom_names, xyz - ): - x, y, z = xyz_qm - f.write(f" {num} {name} {x:.4f} {y:.4f} {z:.4f}\n") - - # Create the SQM command. - command = f"sqm -i {inp_name} -o {out_name}" - - # Run the command as a sub-process. - proc = _subprocess.run( - _shlex.split(command), - shell=False, - stdout=_subprocess.PIPE, - stderr=_subprocess.PIPE, - ) - - if proc.returncode != 0: - raise RuntimeError("SQM job failed!") - - if not _os.path.isfile(out_name): - raise IOError(f"Unable to locate SQM output file: {out_name}") - - with open(out_name, "r") as f: - is_converged = False - is_force = False - num_forces = 0 - forces = [] - for line in f: - # Skip lines prior to convergence. - if line.startswith(" QMMM SCC-DFTB: SCC-DFTB for step 0 converged"): - is_converged = True - continue - - # Now process the final energy and force records. - if is_converged: - if line.startswith(" Total SCF energy"): - try: - energy = float(line.split()[4]) - except: - raise IOError(f"Unable to parse SCF energy record: {line}") - elif line.startswith( - "QMMM: Forces on QM atoms from SCF calculation" + # Loop over batches. + for i, (atomic_numbers_i, xyz_i) in enumerate(zip(atomic_numbers, xyz)): + if len(atomic_numbers_i) != len(xyz_i): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers_i)}) does not " + f"match length of 'xyz' ({len(xyz_i)}) for index {i}" + ) + + # Store the number of QM atoms. + num_qm = len(atomic_numbers_i) + + # Create a temporary working directory. + with _tempfile.TemporaryDirectory() as tmp: + # Work out the name of the input files. + inp_name = f"{tmp}/sqm.in" + out_name = f"{tmp}/sqm.out" + + # Write the input file. + with open(inp_name, "w") as f: + # Write the header. + f.write("Run semi-empirical minimization\n") + f.write(" &qmmm\n") + f.write(f" qm_theory='{self._theory}',\n") + f.write(f" qmcharge={qm_charge},\n") + f.write(" maxcyc=0,\n") + f.write(" verbosity=4,\n") + f.write(f" /\n") + + # Write the QM region coordinates. + for num, name, xyz_qm in zip( + atomic_numbers_i, self._atom_names, xyz_i ): - # Flag that force records are coming. - is_force = True - elif is_force: - try: - force = [float(x) for x in line.split()[3:6]] - except: - raise IOError( - f"Unable to parse SCF gradient record: {line}" - ) - - # Update the forces. - forces.append(force) - num_forces += 1 - - # Exit if we've got all the forces. - if num_forces == num_qm: - is_force = False - break - - if num_forces != num_qm: - raise IOError("Didn't find force records for all QM atoms in the SQM output!") - - # Convert units. - energy *= _KCAL_MOL_TO_HARTREE - - if not gradient: - return energy - - # Convert the gradient to a NumPy array and reshape. Misleading comment - # in sqm output, the "forces" are actually gradients so no need to - # multiply by -1 - gradient = _np.array(forces) * _KCAL_MOL_TO_HARTREE * _BOHR_TO_ANGSTROM - - return energy, gradient + x, y, z = xyz_qm + f.write(f" {num} {name} {x:.4f} {y:.4f} {z:.4f}\n") + + # Create the SQM command. + command = f"sqm -i {inp_name} -o {out_name}" + + # Run the command as a sub-process. + proc = _subprocess.run( + _shlex.split(command), + shell=False, + stdout=_subprocess.PIPE, + stderr=_subprocess.PIPE, + ) + + if proc.returncode != 0: + raise RuntimeError("SQM job failed!") + + if not _os.path.isfile(out_name): + raise IOError(f"Unable to locate SQM output file: {out_name}") + + with open(out_name, "r") as f: + is_converged = False + is_force = False + num_forces = 0 + forces = [] + for line in f: + # Skip lines prior to convergence. + if line.startswith( + " QMMM SCC-DFTB: SCC-DFTB for step 0 converged" + ): + is_converged = True + continue + + # Now process the final energy and force records. + if is_converged: + if line.startswith(" Total SCF energy"): + try: + energy = float(line.split()[4]) + except: + raise IOError( + f"Unable to parse SCF energy record: {line}" + ) + elif line.startswith( + "QMMM: Forces on QM atoms from SCF calculation" + ): + # Flag that force records are coming. + is_force = True + elif is_force: + try: + force = [float(x) for x in line.split()[3:6]] + except: + raise IOError( + f"Unable to parse SCF gradient record: {line}" + ) + + # Update the forces. + forces.append(force) + num_forces += 1 + + # Exit if we've got all the forces. + if num_forces == num_qm: + is_force = False + break + + if num_forces != num_qm: + raise IOError( + "Didn't find force records for all QM atoms in the SQM output!" + ) + + # Convert units. + results_energy.append(energy * _KCAL_MOL_TO_HARTREE) + + # Convert the gradient to a NumPy array and reshape. Misleading comment + # in sqm output, the "forces" are actually gradients so need to multiply by -1 + results_forces.append( + -_np.array(forces) * _KCAL_MOL_TO_HARTREE * _BOHR_TO_ANGSTROM + ) + + # Convert to NumPy arrays. + results_energy = _np.array(results_energy) + results_forces = _np.array(results_forces) + + return results_energy, results_forces if forces else results_energy diff --git a/emle/_backends/_xtb.py b/emle/_backends/_xtb.py index 35021e1..0c9b3d6 100644 --- a/emle/_backends/_xtb.py +++ b/emle/_backends/_xtb.py @@ -22,50 +22,90 @@ """XTB in-vacuo backend implementation.""" -__all__ = ["calculate_xtb"] +__all__ = ["XTB"] -from .._constants import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM +import ase as _ase +import numpy as _np +from .._units import _EV_TO_HARTREE, _BOHR_TO_ANGSTROM -def calculate_xtb(atoms, gradient=True): +from ._backend import Backend as _Backend + + +class XTB(_Backend): + """ + XTB in-vacuo backend implementation. """ - Internal function to compute in vacuo energies and gradients using - the xtb-python interface. Currently only uses the "GFN2-xTB" method. - Parameters - ---------- + @staticmethod + def calculate(atomic_numbers, xyz, forces=True): + """ + Compute the energy and forces. - atoms: ase.Atoms - The atoms in the QM region. + Parameters + ---------- - gradient: bool - Whether to return the gradient. + atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,) + The atomic numbers of the atoms in the QM region. - Returns - ------- + xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3) + The coordinates of the atoms in the QM region in Angstrom. - energy: float - The in vacuo ML energy in Eh. + forces: bool + Whether to calculate and return forces. - gradients: numpy.array - The in vacuo gradient in Eh/Bohr. - """ + Returns + ------- + + energy: float + The in-vacuo energy in Eh. + + forces: numpy.ndarray + The in-vacuo forces in Eh/Bohr. + """ + + if not isinstance(atomic_numbers, _np.ndarray): + raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") + if not isinstance(xyz, _np.ndarray): + raise TypeError("'xyz' must be of type 'numpy.ndarray'") + + if len(atomic_numbers) != len(xyz): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not " + f"match length of 'xyz' ({len(xyz)})" + ) + + from xtb.ase.calculator import XTB as _XTB + + # Lists to store results. + results_energy = [] + results_forces = [] - if not isinstance(atoms, _ase.Atoms): - raise TypeError("'atoms' must be of type 'ase.Atoms'") + # Loop over batches. + for i, (atomic_numbers_i, xyz_i) in enumerate(zip(atomic_numbers, xyz)): + if len(atomic_numbers_i) != len(xyz_i): + raise ValueError( + f"Length of 'atomic_numbers' ({len(atomic_numbers_i)}) does not " + f"match length of 'xyz' ({len(xyz_i)}) for index {i}" + ) - from xtb.ase.calculator import XTB as _XTB + # Create an ASE atoms object. + atoms = _ase.Atoms( + numbers=atomic_numbers_i, + positions=xyz_i, + ) - # Create the calculator. - atoms.calc = _XTB(method="GFN2-xTB") + # Create the calculator. + atoms.calc = _XTB(method="GFN2-xTB") - # Get the energy. - energy = atoms.get_potential_energy() * _EV_TO_HARTREE + # Get the energy. + results_energy.append(atoms.get_potential_energy() * _EV_TO_HARTREE) - if not gradient: - return energy + if forces: + results_forces.append(atoms.get_forces() * _BOHR_TO_ANGSTROM) - # Get the gradient. - gradient = -atoms.get_forces() * _BOHR_TO_ANGSTROM + # Convert to NumPy arrays. + results_energy = _np.array(results_energy) + results_forces = _np.array(results_forces) - return energy, gradient + return results_energy, results_forces if forces else results_energy diff --git a/emle/_constants.py b/emle/_units.py similarity index 100% rename from emle/_constants.py rename to emle/_units.py diff --git a/emle/calculator.py b/emle/calculator.py index dcafb8e..7eb7958 100644 --- a/emle/calculator.py +++ b/emle/calculator.py @@ -30,7 +30,6 @@ from loguru import logger as _logger import os as _os -import pickle as _pickle import numpy as _np import sys as _sys import yaml as _yaml @@ -42,7 +41,7 @@ from .models import EMLE as _EMLE -from emle._constants import ( +from emle._units import ( _NANOMETER_TO_BOHR, _BOHR_TO_ANGSTROM, _HARTREE_TO_KJ_MOL, @@ -539,81 +538,6 @@ def __init__( self._parm7 = abs_parm7 - if deepmd_model is not None and backend == "deepmd": - # We support a str, or list/tuple of strings. - if not isinstance(deepmd_model, (str, list, tuple)): - msg = "'deepmd_model' must be of type 'str', or a list of 'str' types" - _logger.error(msg) - raise TypeError(msg) - else: - # Make sure all values are strings. - if isinstance(deepmd_model, (list, tuple)): - for mod in deepmd_model: - if not isinstance(mod, str): - msg = "'deepmd_model' must be of type 'str', or a list of 'str' types" - _logger.error(msg) - raise TypeError(msg) - # Convert to a list. - else: - deepmd_model = [deepmd_model] - - # Make sure all of the model files exist. - for model in deepmd_model: - if not _os.path.isfile(model): - msg = f"Unable to locate DeePMD model file: '{model}'" - _logger.error(msg) - raise IOError(msg) - - # Validate the deviation file. - if deepmd_deviation is not None: - if not isinstance(deepmd_deviation, str): - msg = "'deepmd_deviation' must be of type 'str'" - _logger.error(msg) - raise TypeError(msg) - - self._deepmd_deviation = deepmd_deviation - - if deepmd_deviation_threshold is not None: - try: - deepmd_deviation_threshold = float( - deepmd_deviation_threshold - ) - except: - msg = "'deepmd_deviation_threshold' must be of type 'float'" - _logger.error(msg) - raise TypeError(msg) - - self._deepmd_deviation_threshold = deepmd_deviation_threshold - - # Store the list of model files, removing any duplicates. - self._deepmd_model = list(set(deepmd_model)) - if len(self._deepmd_model) == 1 and deepmd_deviation: - msg = ( - "More that one DeePMD model needed to calculate the deviation!" - ) - _logger.error(msg) - raise IOError(msg) - - # Initialise DeePMD backend attributes. - try: - from deepmd.infer import DeepPot as _DeepPot - - self._deepmd_potential = [ - _DeepPot(model) for model in self._deepmd_model - ] - except: - msg = "Unable to create the DeePMD potentials!" - _logger.error(msg) - raise RuntimeError(msg) - else: - if self._backend == "deepmd": - msg = "'deepmd_model' must be specified when using the DeePMD backend!" - _logger.error(msg) - raise ValueError(msg) - - # Set the deviation file to None in case it was spuriously set. - self._deepmd_deviation = None - # Validate the QM XYZ file options. if qm_xyz_file is None: @@ -649,83 +573,128 @@ def __init__( raise ValueError(msg) self._qm_xyz_frequency = qm_xyz_frequency - # Validate the QM method for SQM. - if backend == "sqm": - if sqm_theory is None: - sqm_theory = "DFTB3" + # Validate and create the backend. - if not isinstance(sqm_theory, str): - msg = "'sqm_theory' must be of type 'str'" - _logger.error(msg) - raise TypeError(msg) + if self._backend == "torchani": + from .models import ANI2xEMLE as _ANI2xEMLE - # Make sure a topology file has been set. - if parm7 is None: - msg = "'parm7' must be specified when using the SQM backend" - _logger.error(msg) - raise ValueError(msg) + ani2x_emle = _ANI2xEMLE( + emle_model=model, + emle_method=method, + alpha_mode=alpha_mode, + mm_charges=self._mm_charges, + qm_charge=self._qm_charge, + model_index=ani2x_model_index, + atomic_numbers=atomic_numbers, + device=self._device, + ) - # Strip whitespace. - self._sqm_theory = sqm_theory.replace(" ", "") + # Convert to TorchScript. + self._ani2x_emle = _torch.jit.script(ani2x_emle).eval() - try: - from sander import AmberParm as _AmberParm + # Store the model index. + self._ani2x_model_index = ani2x_model_index - amber_parm = _AmberParm(self._parm7) - except: - msg = f"Unable to load AMBER topology file: '{parm7}'" - _logger.error(msg) - raise IOError(msg) + # Initialise the MACEMLE model. + elif self._backend == "mace": + from .models import MACEEMLE as _MACEEMLE - # Store the atom names for the QM region. - self._sqm_atom_names = [atom.name for atom in amber_parm.atoms] + mace_emle = _MACEEMLE( + emle_model=model, + emle_method=method, + alpha_mode=alpha_mode, + mm_charges=self._mm_charges, + qm_charge=self._qm_charge, + mace_model=mace_model, + atomic_numbers=atomic_numbers, + device=self._device, + ) - # Make sure a QM topology file is specified for the 'sander' backend. - elif backend == "sander": - if parm7 is None: - msg = "'parm7' must be specified when using the 'sander' backend" + # Convert to TorchScript. + self._mace_emle = _torch.jit.script(mace_emle).eval() + + # Store the MACE model. + self._mace_model = mace_model + + # If the backend is ORCA, then try to find the executable. + elif self._backend == "orca": + if orca_path is None: + msg = "'orca_path' must be specified when using the ORCA backend" _logger.error(msg) raise ValueError(msg) - # Validate and load the Rascal model. - if rascal_model is not None: - if not isinstance(rascal_model, str): - msg = "'rascal_model' must be of type 'str'" + if not isinstance(orca_path, str): + msg = "'orca_path' must be of type 'str'" _logger.error(msg) raise TypeError(msg) # Convert to an absolute path. - abs_rascal_model = _os.path.abspath(rascal_model) + abs_orca_path = _os.path.abspath(orca_path) - # Make sure the model file exists. - if not _os.path.isfile(abs_rascal_model): - msg = f"Unable to locate Rascal model file: '{rascal_model}'" + if not _os.path.isfile(abs_orca_path): + msg = f"Unable to locate ORCA executable: '{orca_path}'" _logger.error(msg) raise IOError(msg) - # Load the model. + self._orca_path = abs_orca_path + + elif backend == "deepmd": try: - self._rascal_model = _pickle.load(open(abs_rascal_model, "rb")) - except: - msg = f"Unable to load Rascal model file: '{rascal_model}'" + from ._backends import DeepMD + + self._backend = DeepMD( + model=deepmd_model, + deviation=deepmd_deviation, + deviation_threshold=deepmd_deviation_threshold, + ) + self._deepmd_model = self._backend._model + self._deepmd_deviation = self._backend._deviation + self._deepmd_deviation_threshold = self._backend._deviation_threshold + except Exception as e: + msg = "Unable to create DeePMD backend: {e}" _logger.error(msg) - raise IOError(msg) + raise RuntimeError(msg) - # Try to get the SOAP parameters from the model. + elif backend == "sqm": try: - soap = self._rascal_model.get_representation_calculator() - except: - msg = "Unable to extract SOAP parameters from Rascal model!" + from ._backends import SQM + + self._backend = SQM(parm7, theory=sqm_theory) + self._sqm_theory = self._backend._theory + except Exception as e: + msg = "Unable to create SQM backend: {e}" _logger.error(msg) - raise ValueError(msg) + raise RuntimeError(msg) - # Create the Rascal calculator. + elif backend == "xtb": try: - from rascal.models.asemd import ASEMLCalculator as _ASEMLCalculator + from ._backends import XTB - self._rascal_calc = _ASEMLCalculator(self._rascal_model, soap) - except: - msg = "Unable to create Rascal calculator!" + self._backend = XTB() + except Exception as e: + msg = "Unable to create XTB backend: {e}" + _logger.error(msg) + raise RuntimeError(msg) + + # Make sure a QM topology file is specified for the 'sander' backend. + elif backend == "sander": + try: + from ._backends import Sander + + self._backend = Sander(parm7) + except Exception as e: + msg = "Unable to create Sander backend: {e}" + _logger.error(msg) + raise RuntimeError(msg) + + # Validate and load the Rascal model for delta-learning corrections. + if rascal_model is not None: + try: + from ._backends import Rascal + + self._rascal_calc = Rascal(rascal_model) + except Exception as e: + msg = "Unable to create Rascal backend: {e}" _logger.error(msg) raise RuntimeError(msg) @@ -891,70 +860,6 @@ def __init__( else: self._orca_template = None - # Initialise the ANI2xEMLE model. - if self._backend == "torchani": - from .models import ANI2xEMLE as _ANI2xEMLE - - ani2x_emle = _ANI2xEMLE( - emle_model=model, - emle_method=method, - alpha_mode=alpha_mode, - mm_charges=self._mm_charges, - qm_charge=self._qm_charge, - model_index=ani2x_model_index, - atomic_numbers=atomic_numbers, - device=self._device, - ) - - # Convert to TorchScript. - self._ani2x_emle = _torch.jit.script(ani2x_emle).eval() - - # Store the model index. - self._ani2x_model_index = ani2x_model_index - - # Initialise the MACEMLE model. - elif self._backend == "mace": - from .models import MACEEMLE as _MACEEMLE - - mace_emle = _MACEEMLE( - emle_model=model, - emle_method=method, - alpha_mode=alpha_mode, - mm_charges=self._mm_charges, - qm_charge=self._qm_charge, - mace_model=mace_model, - atomic_numbers=atomic_numbers, - device=self._device, - ) - - # Convert to TorchScript. - self._mace_emle = _torch.jit.script(mace_emle).eval() - - # Store the MACE model. - self._mace_model = mace_model - - # If the backend is ORCA, then try to find the executable. - elif self._backend == "orca": - if orca_path is None: - msg = "'orca_path' must be specified when using the ORCA backend" - _logger.error(msg) - raise ValueError(msg) - - if not isinstance(orca_path, str): - msg = "'orca_path' must be of type 'str'" - _logger.error(msg) - raise TypeError(msg) - - # Convert to an absolute path. - abs_orca_path = _os.path.abspath(orca_path) - - if not _os.path.isfile(abs_orca_path): - msg = f"Unable to locate ORCA executable: '{orca_path}'" - _logger.error(msg) - raise IOError(msg) - - self._orca_path = abs_orca_path - # Intialise the maximum number of MM atoms that have been seen. self._max_mm_atoms = 0 @@ -1078,82 +983,28 @@ def run(self, path=None): xyz_mm = _np.append(xyz_mm, xyz_mm_pad, axis=0) charges_mm = _np.append(charges_mm, charges_mm_pad) - # Convert the QM atomic numbers to elements. - elements = [] - for id in atomic_numbers: - elements.append(_ase.Atom(id).symbol) - # First try to use the specified backend to compute in vacuo # energies and (optionally) gradients. # Internal backends. if not self._is_external_backend: - # TorchANI. - if self._backend == "torchani": + # Torch based backends. + if self._backend in ["torchani", "mace"]: # In vacuo and embedding energies and gradients are computed in one go - # using the ANI2x EMLE model. + # using the EMLE Torch models. pass - # DeePMD. - elif self._backend == "deepmd": - try: - from ._backends import calculate_deepmd as _calculate_deepmd - - E_vac, grad_vac = _calculate_deepmd(self, xyz_qm, elements) - except Exception as e: - msg = f"Failed to calculate in vacuo energies using DeePMD backend: {e}" - _logger.error(msg) - raise RuntimeError(msg) - - # ORCA. - elif self._backend == "orca": - try: - from ._backends import calculate_orca as _calculate_orca - - E_vac, grad_vac = _calculate_orca(self, orca_input, xyz_file_qm) - except Exception as e: - msg = ( - f"Failed to calculate in vacuo energies using ORCA backend: {e}" - ) - _logger.error(msg) - raise RuntimeError(msg) - - # Sander. - elif self._backend == "sander": - try: - from ._backends import calculate_sander as _calculate_sander - - E_vac, grad_vac = _calculate_sander(atoms, self._parm7, is_gas=True) - except Exception as e: - msg = f"Failed to calculate in vacuo energies using Sander backend: {e}" - _logger.error(msg) - raise RuntimeError(msg) - - # SQM. - elif self._backend == "sqm": + # Non-Torch backends. + elif backend is not None: try: - from ._backends import calculate_sqm as _calculate_sqm - - E_vac, grad_vac = _calculate_sqm( - self, xyz_qm, atomic_numbers, charge + energy, forces = self._backend( + _np.expand_dims(atomic_numbers, axis=0), + _np.expand_dims(xyz_qm, axis=0), ) + E_vac = energy[0] + grad_vac = -forces[0] except Exception as e: - msg = ( - f"Failed to calculate in vacuo energies using SQM backend: {e}" - ) - _logger.error(msg) - raise RuntimeError(msg) - - # XTB. - elif self._backend == "xtb": - try: - from ._backends import calculate_xtb as _calculate_xtb - - E_vac, grad_vac = _calculate_xtb(atoms) - except Exception as e: - msg = ( - f"Failed to calculate in vacuo energies using XTB backend: {e}" - ) + msg = f"Failed to calculate in vacuo energies using {self._backend} backend: {e}" _logger.error(msg) raise RuntimeError(msg) @@ -1175,9 +1026,12 @@ def run(self, path=None): # Apply delta-learning corrections using Rascal. if self._is_delta and self._backend is not None: try: - from ._backends import calculate_rascal as _calculate_rascal - - delta_E, delta_grad = _calculate_rascal(self, atoms) + energy, forces = self._rascal_calc( + _np.expand_dims(atomic_numbers, axis=0), + _np.expand_dims(xyz_qm, axis=0), + ) + delta_E = energy[0] + delta_grad = -forces[0] except Exception as e: msg = f"Failed to compute delta-learning corrections using Rascal: {e}" _logger.error(msg) @@ -1260,13 +1114,18 @@ def run(self, path=None): if self._is_interpolate: # Compute the in vacuo MM energy and gradients for the QM region. if self._backend != None: - from ._backends import calculate_sander as _calculate_sander + from ._backends import Sander - E_mm_qm_vac, grad_mm_qm_vac = _calculate_sander( - atoms=atoms, - parm7=self._parm7, - is_gas=True, + # Create a Sander backend instance. + backend = Sander(self._parm7) + + # Compute the in vacuo MM energy and forces for the QM region. + energy, forces = backend.calculate( + _np.expand_dims(atomic_numbers_np, axis=0), + _np.expand_dims(xyz_qm_np, axis=0), ) + E_mm_qm_vac = energy[0] + grad_mm_qm_vac = -forces[0] # If no backend is specified, then the MM energy and gradients are zero. else: @@ -1503,86 +1362,28 @@ def _sire_callback(self, atomic_numbers, charges_mm, xyz_qm, xyz_mm, idx_mm=None xyz_mm = _np.append(xyz_mm, xyz_mm_pad, axis=0) charges_mm = _np.append(charges_mm, charges_mm_pad) - # Convert the QM atomic numbers to elements. - elements = [] - for id in atomic_numbers: - elements.append(_ase.Atom(id).symbol) - # First try to use the specified backend to compute in vacuo # energies and (optionally) gradients. # Internal backends. if not self._is_external_backend: - # TorchANI. - if self._backend == "torchani": + # TorchANI and MACE. + if self._backend in ["torchani", "mace"]: # In vacuo and embedding energies and gradients are computed in one go - # using the ANI2x EMLE model. + # using the EMLE Torch models. pass - # DeePMD. - elif self._backend == "deepmd": - try: - from ._backends import calculate_deepmd as _calculate_deepmd - - E_vac, grad_vac = _calculate_deepmd(self, xyz_qm, elements) - except Exception as e: - msg = f"Failed to calculate in vacuo energies using DeePMD backend: {e}" - _logger.error(msg) - raise RuntimeError(msg) - - # ORCA. - elif self._backend == "orca": - try: - from ._backends import calculate_orca as _calculate_orca - - E_vac, grad_vac = _calculate_orca( - self, elements=elements, xyz_qm=xyz_qm - ) - except Exception as e: - msg = ( - f"Failed to calculate in vacuo energies using ORCA backend: {e}" - ) - _logger.error(msg) - raise RuntimeError(msg) - - # Sander. - elif self._backend == "sander": - try: - from ._backends import calculate_sander as _calculate_sander - - atoms = _ase.Atoms(positions=xyz_qm, numbers=atomic_numbers) - E_vac, grad_vac = _calculate_sander(atoms, self._parm7, is_gas=True) - except Exception as e: - msg = f"Failed to calculate in vacuo energies using Sander backend: {e}" - _logger.error(msg) - raise RuntimeError(msg) - - # SQM. - elif self._backend == "sqm": + # Non-Torch backends. + elif self._backend is not None: try: - from ._backends import calculate_sqm as _calculate_sqm - - E_vac, grad_vac = _calculate_sqm( - self, xyz_qm, atomic_numbers, charge - ) - except Exception as e: - msg = ( - f"Failed to calculate in vacuo energies using SQM backend: {e}" + energy, forces = self._backend( + _np.expand_dims(atomic_numbers, axis=0), + _np.expand_dims(xyz_qm, axis=0), ) - _logger.error(msg) - raise RuntimeError(msg) - - # XTB. - elif self._backend == "xtb": - try: - from ._backends import calculate_xtb as _calculate_xtb - - atoms = _ase.Atoms(positions=xyz_qm, numbers=atomic_numbers) - E_vac, grad_vac = _calculate_xtb(atoms) + E_vac = energy[0] + grad_vac = -forces[0] except Exception as e: - msg = ( - f"Failed to calculate in vacuo energies using XTB backend: {e}" - ) + msg = f"Failed to calculate in vacuo energies using {self._backend} backend: {e}" _logger.error(msg) raise RuntimeError(msg) @@ -1605,11 +1406,12 @@ def _sire_callback(self, atomic_numbers, charges_mm, xyz_qm, xyz_mm, idx_mm=None # Apply delta-learning corrections using Rascal. if self._is_delta and self._backend is not None: try: - if atoms is None: - atoms = _ase.Atoms(positions=xyz_qm, numbers=atomic_numbers) - from ._backends import calculate_rascal as _calculate_rascal - - delta_E, delta_grad = _calculate_rascal(self, atoms) + energy, forces = self._rascal_calc( + _np.expand_dims(atomic_numbers, axis=0), + _np.expand_dims(xyz_qm, axis=0), + ) + delta_E = energy[0] + delta_grad = -forces[0] except Exception as e: msg = f"Failed to compute delta-learning corrections using Rascal: {e}" _logger.error(msg) @@ -1627,6 +1429,10 @@ def _sire_callback(self, atomic_numbers, charges_mm, xyz_qm, xyz_mm, idx_mm=None [], ) + # Store a copy of the atomic numbers and QM coordinates as NumPy arrays. + atomic_numbers_np = atomic_numbers + xyz_qm_np = xyz_qm + # Convert inputs to Torch tensors. atomic_numbers = _torch.tensor( atomic_numbers, dtype=_torch.int64, device=self._device @@ -1699,22 +1505,22 @@ def _sire_callback(self, atomic_numbers, charges_mm, xyz_qm, xyz_mm, idx_mm=None # Interpolate between the MM and ML/MM potential. if self._is_interpolate: - # Create the ASE atoms object if it wasn't already created by the backend. - if atoms is None: - atoms = _ase.Atoms( - positions=xyz_qm.detach().cpu(), numbers=atomic_numbers.cpu() - ) - # Compute the in vacuo MM energy and gradients for the QM region. if self._backend != None: - from ._backends import calculate_sander as _calculate_sander + from ._backends import Sander - E_mm_qm_vac, grad_mm_qm_vac = _calculate_sander( - atoms=atoms, - parm7=self._parm7, - is_gas=True, + # Create a Sander backend instance. + backend = Sander(self._parm7) + + # Compute the in vacuo MM energy and forces for the QM region. + energy, forces = backend.calculate( + _np.expand_dims(atomic_numbers_np, axis=0), + _np.expand_dims(xyz_qm_np, axis=0), ) + E_mm_qm_vac = energy[0] + grad_mm_qm_vac = -forces[0] + # If no backend is specified, then the MM energy and gradients are zero. else: E_mm_qm_vac, grad_mm_qm_vac = 0.0, _np.zeros_like(xyz_qm.detach().cpu()) diff --git a/tests/input/methane.prm7 b/tests/input/methane.prm7 new file mode 100644 index 0000000..41fadfd --- /dev/null +++ b/tests/input/methane.prm7 @@ -0,0 +1,134 @@ +%VERSION VERSION_STAMP = V0001.000 DATE = 12/09/24 13:54:56 +%FLAG TITLE +%FORMAT(20a4) +BioSimSpace_System +%FLAG POINTERS +%FORMAT(10I8) + 5 2 4 0 6 0 0 0 0 0 + 11 1 0 0 0 1 1 0 2 0 + 0 0 0 0 0 0 0 0 5 0 + 0 0 0 +%FLAG ATOM_NAME +%FORMAT(20a4) +C1 H2 H3 H4 H5 +%FLAG CHARGE +%FORMAT(5E16.8) + -1.97529732E+00 4.93824330E-01 4.93824330E-01 4.93824330E-01 4.93824330E-01 +%FLAG ATOMIC_NUMBER +%FORMAT(10I8) + 6 1 1 1 1 +%FLAG MASS +%FORMAT(5E16.8) + 1.20100000E+01 1.00800000E+00 1.00800000E+00 1.00800000E+00 1.00800000E+00 +%FLAG ATOM_TYPE_INDEX +%FORMAT(10I8) + 1 2 2 2 2 +%FLAG NUMBER_EXCLUDED_ATOMS +%FORMAT(10I8) + 4 3 2 1 1 +%FLAG NONBONDED_PARM_INDEX +%FORMAT(10I8) + 1 2 2 3 +%FLAG RESIDUE_LABEL +%FORMAT(20a4) +LIG +%FLAG RESIDUE_POINTER +%FORMAT(10I8) + 1 +%FLAG BOND_FORCE_CONSTANT +%FORMAT(5E16.8) + 3.30600000E+02 +%FLAG BOND_EQUIL_VALUE +%FORMAT(5E16.8) + 1.09690000E+00 +%FLAG ANGLE_FORCE_CONSTANT +%FORMAT(5E16.8) + 3.94000000E+01 +%FLAG ANGLE_EQUIL_VALUE +%FORMAT(5E16.8) + 1.87762601E+00 +%FLAG DIHEDRAL_FORCE_CONSTANT +%FORMAT(5E16.8) + +%FLAG DIHEDRAL_PERIODICITY +%FORMAT(5E16.8) + +%FLAG DIHEDRAL_PHASE +%FORMAT(5E16.8) + +%FLAG SCEE_SCALE_FACTOR +%FORMAT(5E16.8) + +%FLAG SCNB_SCALE_FACTOR +%FORMAT(5E16.8) + +%FLAG SOLTY +%FORMAT(5E16.8) + 0.00000000E+00 0.00000000E+00 +%FLAG LENNARD_JONES_ACOEF +%FORMAT(5E16.8) + 1.04308023E+06 9.71708117E+04 7.51607703E+03 +%FLAG LENNARD_JONES_BCOEF +%FORMAT(5E16.8) + 6.75612247E+02 1.26919150E+02 2.17257828E+01 +%FLAG BONDS_INC_HYDROGEN +%FORMAT(10I8) + 0 3 1 0 6 1 0 9 1 0 + 12 1 +%FLAG BONDS_WITHOUT_HYDROGEN +%FORMAT(10I8) + +%FLAG ANGLES_INC_HYDROGEN +%FORMAT(10I8) + 3 0 6 1 3 0 9 1 3 0 + 12 1 6 0 9 1 6 0 12 1 + 9 0 12 1 +%FLAG ANGLES_WITHOUT_HYDROGEN +%FORMAT(10I8) + +%FLAG DIHEDRALS_INC_HYDROGEN +%FORMAT(10I8) + +%FLAG DIHEDRALS_WITHOUT_HYDROGEN +%FORMAT(10I8) + +%FLAG EXCLUDED_ATOMS_LIST +%FORMAT(10I8) + 2 3 4 5 3 4 5 4 5 5 + 0 +%FLAG HBOND_ACOEF +%FORMAT(5E16.8) + +%FLAG HBOND_BCOEF +%FORMAT(5E16.8) + +%FLAG HBCUT +%FORMAT(5E16.8) + +%FLAG AMBER_ATOM_TYPE +%FORMAT(20a4) +c3 hc hc hc hc +%FLAG TREE_CHAIN_CLASSIFICATION +%FORMAT(20a4) +BLA BLA BLA BLA BLA +%FLAG JOIN_ARRAY +%FORMAT(10I8) + 0 0 0 0 0 +%FLAG IROTAT +%FORMAT(10I8) + 0 0 0 0 0 +%FLAG RADIUS_SET +%FORMAT(1a80) +modified Bondi radii (mbondi) +%FLAG RADII +%FORMAT(5E16.8) + 1.70000000E+00 1.30000000E+00 1.30000000E+00 1.30000000E+00 1.30000000E+00 +%FLAG SCREEN +%FORMAT(5E16.8) + 7.20000000E-01 8.50000000E-01 8.50000000E-01 8.50000000E-01 8.50000000E-01 +%FLAG ATOMS_PER_MOLECULE +%FORMAT(10I8) + 5 +%FLAG IPOL +%FORMAT(1I8) + 0 diff --git a/tests/input/orc_job.inp b/tests/input/orc_job.inp new file mode 100644 index 0000000..189bbc5 --- /dev/null +++ b/tests/input/orc_job.inp @@ -0,0 +1,16 @@ +# Run using SANDER file-based interface for Orca +# +%pal nprocs 1 end +!BLYP 6-31G* verytightscf +%method + grid 4 + finalgrid 6 +end +%scf + maxiter 100 +end +%MaxCore 1024 +! ENGRAD +! Angs NoUseSym +%pointcharges "ptchrg.xyz" +*xyzfile 0 1 inpfile.xyz diff --git a/tests/test_backends.py b/tests/test_backends.py new file mode 100644 index 0000000..fd8a306 --- /dev/null +++ b/tests/test_backends.py @@ -0,0 +1,124 @@ +import numpy as np +import os +import pytest +import socket +import tempfile + +from emle._backends import * + + +@pytest.fixture(scope="module") +def data(): + atomic_numbers = np.array([[6, 1, 1, 1, 1]]) + xyz = np.array( + [ + [ + [0.03192167, 0.00638559, 0.01301679], + [-0.83140486, 0.39370209, -0.26395324], + [-0.66518241, -0.84461308, 0.20759389], + [0.45554739, 0.54289633, 0.81170881], + [0.66091919, -0.16799635, -0.91037834], + ] + ], + ) + return atomic_numbers, xyz + + +def test_sqm(data): + """ + Test the SQM backend. + """ + + # Set up the data. + atomic_numbers, xyz = data + + # Instantiate the SQM backend. + backend = SQM("tests/input/methane.prm7") + + # Calculate the energy and forces. + energy, forces = backend.calculate(atomic_numbers, xyz) + + +def test_sander(data): + """ + Test the Sander backend. + """ + + # Set up the data. + atomic_numbers, xyz = data + + # Instantiate the Sander backend. + backend = Sander("tests/input/methane.prm7") + + # Calculate the energy and forces. + energy, forces = backend.calculate(atomic_numbers, xyz) + + +def test_xtb(data): + """ + Test the XTB backend. + """ + + # Set up the data. + atomic_numbers, xyz = data + + # Instantiate the XTB backend. + backend = XTB() + + # Calculate the energy and forces. + energy, forces = backend.calculate(atomic_numbers, xyz) + + +@pytest.mark.skipif( + socket.gethostname() != "porridge", + reason="Local test requiring ORCA installation.", +) +def test_orca(data): + """ + Test the ORCA backend. + """ + + # Set the ORCA path. + orca_path = "/home/lester/Downloads/orca/bin" + + # Set up the data. + atomic_numbers, xyz = data + + # Set the ORCA environment variables. + os.environ["LD_LIBRARY_PATH"] = f"{orca_path}:{os.environ['LD_LIBRARY_PATH']}" + + # Instantiate the ORCA backend. + backend = ORCA(exe=f"{orca_path}/orca", template="tests/input/orc_job.inp") + + # Calculate the energy and forces. + energy, forces = backend.calculate(atomic_numbers, xyz) + + +@pytest.mark.skipif( + socket.gethostname() != "porridge", + reason="Local test requiring DeePMD models.", +) +def test_deepmd(data): + """ + Test the DeePMD backend. + """ + + # Set up the data. + atomic_numbers, xyz = data + + models = [ + "tests/input/deepmd/01.pb", + "tests/input/deepmd/02.pb", + "tests/input/deepmd/03.pb", + ] + + with tempfile.NamedTemporaryFile() as tmp: + # Instantiate the DeePMD backend. + backend = DeePMD(models, deviation=tmp.name) + + # Calculate the energy and forces. + energy, forces = backend.calculate(atomic_numbers, xyz) + + # Make sure the deviation is calculated. + with open(tmp.name, "r") as f: + deviation = float(f.read())