Skip to content

datamol.conformers

Generate and working with conformers

generate(mol, n_confs=None, rms_cutoff=None, clear_existing=True, align_conformers=True, minimize_energy=False, method=None, energy_iterations=500, warning_not_converged=10, random_seed=19, add_hs=True, verbose=False)

Compute conformers of a molecule.

Examples:

import datamol as dm
smiles = "O=C(C)Oc1ccccc1C(=O)O"
mol = dm.to_mol(smiles)
mol = dm.conformers.generate(mol)

# Get all conformers as a list
conformers = mol.GetConformers()

# Get the 3D atom positions of the first conformer
positions = mol.GetConformer(0).GetPositions()

# If minimization has been enabled (default to True)
# you can access the computed energy.
conf = mol.GetConformer(0)
props = conf.GetPropsAsDict()
print(props)
# {'rdkit_uff_energy': 1.7649408317784008}

Parameters:

Name Type Description Default
mol Mol

a molecule

required
n_confs int

Number of conformers to generate. Depends on the number of rotatable bonds by default.

None
rms_cutoff Optional[float]

The minimum RMS value in Angstrom at which two conformers are considered redundant and one is deleted. If None, all conformers are kept. This step is done after an eventual minimization step.

None
clear_existing bool

Whether to overwrite existing conformers for the molecule.

True
align_conformers bool

Wehther to align conformer.

True
minimize_energy bool

Wether to minimize conformer's energies using UFF. Disable to generate conformers much faster.

False
method str

RDKit method to use for embedding. Choose among ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]. If None, "ETKDGv3" is used.

None
energy_iterations int

Maximum number of iterations during the energy minimization procedure. It corresponds to the maxIters argument in RDKit.

500
warning_not_converged int

Wether to log a warning when the number of not converged conformers during the minimization is higher than warning_not_converged. Only works when verbose is set to True. Disable with 0. Defaults to 10.

10
random_seed int

Set to None or -1 to disable.

19
add_hs bool

Whether to add hydrogens to the mol before embedding. If set to True, the hydrogens are removed in the returned molecule. Warning: explicit hydrogens won't be conserved. It is strongly recommended to let the default value to True. The RDKit documentation says: "To get good 3D conformations, it’s almost always a good idea to add hydrogens to the molecule first."

True
verbose bool

Wether to enable logs during the process.

False

Returns:

Type Description
Mol

mol: the molecule with the conformers.

Source code in datamol/conformers/_conformers.py
def generate(
    mol: Chem.rdchem.Mol,
    n_confs: int = None,
    rms_cutoff: Optional[float] = None,
    clear_existing: bool = True,
    align_conformers: bool = True,
    minimize_energy: bool = False,
    method: str = None,
    energy_iterations: int = 500,
    warning_not_converged: int = 10,
    random_seed: int = 19,
    add_hs: bool = True,
    verbose: bool = False,
) -> Chem.rdchem.Mol:
    """Compute conformers of a molecule.

    Example:

    ```python
    import datamol as dm
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Get all conformers as a list
    conformers = mol.GetConformers()

    # Get the 3D atom positions of the first conformer
    positions = mol.GetConformer(0).GetPositions()

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(0)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_uff_energy': 1.7649408317784008}
    ```

    Args:
        mol: a molecule
        n_confs: Number of conformers to generate. Depends on the
            number of rotatable bonds by default.
        rms_cutoff: The minimum RMS value in Angstrom at which two conformers
            are considered redundant and one is deleted. If None, all conformers
            are kept. This step is done after an eventual minimization step.
        clear_existing: Whether to overwrite existing conformers for the molecule.
        align_conformers: Wehther to align conformer.
        minimize_energy: Wether to minimize conformer's energies using UFF.
            Disable to generate conformers much faster.
        method: RDKit method to use for embedding. Choose among
            ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]. If None, "ETKDGv3" is used.
        energy_iterations: Maximum number of iterations during the energy minimization procedure.
            It corresponds to the `maxIters` argument in RDKit.
        warning_not_converged: Wether to log a warning when the number of not converged conformers
            during the minimization is higher than `warning_not_converged`. Only works when `verbose` is set to True. Disable with 0. Defaults to 10.
        random_seed: Set to None or -1 to disable.
        add_hs: Whether to add hydrogens to the mol before embedding. If set to True, the hydrogens
            are removed in the returned molecule. Warning: explicit hydrogens won't be conserved. It is strongly
            recommended to let the default value to True. The RDKit documentation says: "To get good 3D conformations,
            it’s almost always a good idea to add hydrogens to the molecule first."
        verbose: Wether to enable logs during the process.

    Returns:
        mol: the molecule with the conformers.
    """

    AVAILABLE_METHODS = ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]

    if method is None:
        method = "ETKDGv3"

    if method not in AVAILABLE_METHODS:
        raise ValueError(f"The method {method} is not supported. Use from {AVAILABLE_METHODS}")

    # Random seed
    if random_seed is None:
        random_seed = -1

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Remove existing conformers
    if clear_existing:
        mol.RemoveAllConformers()

    # Add hydrogens
    if add_hs:
        mol = Chem.AddHs(mol)

    if not n_confs:
        # Set the number of conformers depends on
        # the number of rotatable bonds.
        rotatable_bonds = Descriptors.NumRotatableBonds(mol)
        if rotatable_bonds < 8:
            n_confs = 50
        elif rotatable_bonds < 12:
            n_confs = 200
        else:
            n_confs = 300

    # Embed conformers
    params = getattr(AllChem, method)()
    params.randomSeed = random_seed
    params.enforceChirality = True
    confs = AllChem.EmbedMultipleConfs(mol, numConfs=n_confs, params=params)

    # Sometime embedding fails. Here we try again by disabling `enforceChirality`.
    if len(confs) == 0:
        if verbose:
            logger.warning(
                f"Conformers embedding failed for {dm.to_smiles(mol)}. Trying without enforcing chirality."
            )
        params = getattr(AllChem, method)()
        params.randomSeed = random_seed
        params.enforceChirality = False
        confs = AllChem.EmbedMultipleConfs(mol, numConfs=n_confs, params=params)

    if len(confs) == 0:
        raise ValueError(f"Conformers embedding failed for {dm.to_smiles(mol)}")

    # Minimize energy
    if minimize_energy:

        # Minimize conformer's energy using UFF
        results = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=energy_iterations)
        energies = [energy for _, energy in results]

        # Some conformers might not have converged during minimization.
        not_converged = sum([not_converged for not_converged, _ in results if not_converged])
        if warning_not_converged != 0 and not_converged > warning_not_converged and verbose:
            logger.warning(
                f"{not_converged}/{len(results)} conformers have not converged for {dm.to_smiles(mol)}"
            )

        # Add the energy as a property to each conformers
        [
            conf.SetDoubleProp("rdkit_uff_energy", energy)
            for energy, conf in zip(energies, mol.GetConformers())
        ]

        # Now we reorder conformers according to their energies,
        # so the lowest energies conformers are first.
        mol_clone = copy.deepcopy(mol)
        ordered_conformers = [conf for _, conf in sorted(zip(energies, mol_clone.GetConformers()))]
        mol.RemoveAllConformers()
        [mol.AddConformer(conf, assignId=True) for conf in ordered_conformers]

    # Align conformers to each others
    if align_conformers:
        rdMolAlign.AlignMolConformers(mol)

    if rms_cutoff is not None:
        mol = cluster(
            mol,
            rms_cutoff=rms_cutoff,
            already_aligned=align_conformers,
            centroids=True,
        )  # type: ignore

    if add_hs:
        mol = Chem.RemoveHs(mol)

    return mol

cluster(mol, rms_cutoff=1, already_aligned=False, centroids=True)

Cluster the conformers of a molecule according to an RMS threshold in Angstrom.

Parameters:

Name Type Description Default
mol Mol

a molecule

required
rms_cutoff float

The RMS cutoff in Angstrom.

1
already_aligned bool

Whether or not the conformers are aligned. If False, they will be aligmned furing the RMS computation.

False
centroids bool

If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster. Defaults to True.

True
Source code in datamol/conformers/_conformers.py
def cluster(
    mol: Chem.rdchem.Mol,
    rms_cutoff: float = 1,
    already_aligned: bool = False,
    centroids: bool = True,
):
    """Cluster the conformers of a molecule according to an RMS threshold in Angstrom.

    Args:
        mol: a molecule
        rms_cutoff: The RMS cutoff in Angstrom.
        already_aligned: Whether or not the conformers are aligned. If False,
            they will be aligmned furing the RMS computation.
        centroids: If True, return one molecule with centroid conformers
            only. If False return a list of molecules per cluster with all
            the conformers of the cluster. Defaults to True.
    """

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Compute RMS
    dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=already_aligned)

    # Cluster
    conf_clusters = Butina.ClusterData(
        dmat,
        nPts=mol.GetNumConformers(),
        distThresh=rms_cutoff,
        isDistData=True,
        reordering=False,
    )

    return return_centroids(mol, conf_clusters, centroids=centroids)

return_centroids(mol, conf_clusters, centroids=True)

Given a list of cluster indices, return one single molecule with only the centroid of the clusters of a list of molecules per cluster.

Parameters:

Name Type Description Default
mol Mol

a molecule.

required
conf_clusters Sequence[Sequence[int]]

list of cluster indices.

required
centroids bool

If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster.

True
Source code in datamol/conformers/_conformers.py
def return_centroids(
    mol: Chem.rdchem.Mol,
    conf_clusters: Sequence[Sequence[int]],
    centroids: bool = True,
) -> Union[List[Chem.rdchem.Mol], Chem.rdchem.Mol]:
    """Given a list of cluster indices, return one single molecule
    with only the centroid of the clusters of a list of molecules per cluster.

    Args:
        mol: a molecule.
        conf_clusters: list of cluster indices.
        centroids: If True, return one molecule with centroid conformers
            only. If False return a list of molecules per cluster with all
            the conformers of the cluster.
    """

    if centroids:
        # Collect centroid of each cluster (first element of the list)
        centroid_list = [indices[0] for indices in conf_clusters]

        # Keep only centroid conformers
        mol_clone = copy.deepcopy(mol)
        confs = [mol_clone.GetConformers()[i] for i in centroid_list]
        mol.RemoveAllConformers()
        [mol.AddConformer(conf, assignId=True) for conf in confs]
        return mol

    else:
        # Create a new molecule for each cluster and add conformers to it.
        mols = []
        for cluster in conf_clusters:
            m = copy.deepcopy(mol)
            m.RemoveAllConformers()
            [m.AddConformer(mol.GetConformer(c), assignId=True) for c in cluster]
            mols.append(m)
        return mols

rmsd(mol)

Compute the RMSD between all the conformers of a molecule.

Parameters:

Name Type Description Default
mol Mol

a molecule

required
Source code in datamol/conformers/_conformers.py
def rmsd(mol: Chem.rdchem.Mol) -> np.ndarray:
    """Compute the RMSD between all the conformers of a molecule.

    Args:
        mol: a molecule
    """

    if mol.GetNumConformers() <= 1:
        raise ValueError(
            "The molecule has 0 or 1 conformer. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    n_confs = mol.GetNumConformers()
    rmsds = []
    for i in range(n_confs):
        for j in range(n_confs):
            rmsd = rdMolAlign.AlignMol(prbMol=mol, refMol=mol, prbCid=i, refCid=j)
            rmsds.append(rmsd)
    return np.array(rmsds).reshape(n_confs, n_confs)

sasa(mol, conf_id=None, n_jobs=1)

Compute Solvent Accessible Surface Area of all the conformers using FreeSASA (https://freesasa.github.io/). Values are returned as an array and also stored within each conformer as a property called rdkit_free_sasa.

Examples:

smiles = "O=C(C)Oc1ccccc1C(=O)O"
mol = dm.to_mol(smiles)
mol = dm.conformers.generate(mol)

# Compute SASA for all the conformers without parallelization
sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1)

# If minimization has been enabled (default to True)
# you can access the computed energy.
conf = mol.GetConformer(0)
props = conf.GetPropsAsDict()
print(props)
# {'rdkit_uff_energy': 1.7649408317784008}

Parameters:

Name Type Description Default
mol Mol

a molecule

required
conf_id Union[int, List[int]]

Id of the conformers to compute. If None, compute all.

None
n_jobs int

Number of jobs for parallelization. Set to 1 to disable and -1 to use all cores.

1

Returns:

Type Description
ndarray

mol: the molecule with the conformers.

Source code in datamol/conformers/_features.py
@dm.utils.decorators.disable_on_os("win")
def sasa(
    mol: Chem.rdchem.Mol,
    conf_id: Union[int, List[int]] = None,
    n_jobs: int = 1,
) -> np.ndarray:
    """Compute Solvent Accessible Surface Area of all the conformers
    using FreeSASA (https://freesasa.github.io/). Values are returned
    as an array and also stored within each conformer as a property
    called `rdkit_free_sasa`.

    Example:

    ```python
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Compute SASA for all the conformers without parallelization
    sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1)

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(0)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_uff_energy': 1.7649408317784008}
    ```

    Args:
        mol: a molecule
        conf_id: Id of the conformers to compute. If None, compute all.
        n_jobs: Number of jobs for parallelization. Set to 1 to disable
            and -1 to use all cores.

    Returns:
        mol: the molecule with the conformers.
    """
    from rdkit.Chem import rdFreeSASA

    if mol.GetNumConformers() == 0:
        raise ValueError(
            "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    # Get Van der Waals radii (angstrom)
    radii = [dm.PERIODIC_TABLE.GetRvdw(atom.GetAtomicNum()) for atom in mol.GetAtoms()]

    # Which conformers to compute
    conf_ids = []
    if conf_id is None:
        # If None compute for all the conformers
        conf_ids = list(range(mol.GetNumConformers()))  # type: ignore
    elif isinstance(conf_id, int):
        conf_ids = [conf_id]
    else:
        conf_ids = conf_id

    # Compute solvent accessible surface area
    def _get_sasa(i):
        conf = mol.GetConformer(i)
        sasa = rdFreeSASA.CalcSASA(mol, radii, confIdx=conf.GetId())
        conf.SetDoubleProp("rdkit_free_sasa", sasa)
        return sasa

    runner = dm.JobRunner(n_jobs=n_jobs)
    sasa_values = runner(_get_sasa, conf_ids)
    return np.array(sasa_values)

Low-level conformer manipulation

center_of_mass(mol, use_atoms=True, digits=None, conf_id=-1)

Compute the center of mass of a conformer of a molecule.

Parameters:

Name Type Description Default
mol Mol

a molecule

required
use_atoms bool

Whether to compute the true center of mass or the geometrical center.

True
digits int

Number of digits to round to.

None
conf_id int

the conformer id.

-1

Returns cm: Center of mass or geometrical center

Source code in datamol/conformers/_features.py
def center_of_mass(
    mol: Chem.rdchem.Mol,
    use_atoms: bool = True,
    digits: int = None,
    conf_id: int = -1,
) -> np.ndarray:
    """Compute the center of mass of a conformer of a molecule.

    Args:
        mol: a molecule
        use_atoms: Whether to compute the true center of mass or the geometrical center.
        digits: Number of digits to round to.
        conf_id: the conformer id.

    Returns
        cm: Center of mass or geometrical center
    """
    coords = get_coords(mol)
    atom_weight = np.ones((coords.shape[0]))

    if use_atoms:
        atom_weight = np.array([atom.GetMass() for atom in mol.GetAtoms()])

    atom_weight = atom_weight[:, None]
    atom_weight /= atom_weight.sum()
    center = (coords * atom_weight).sum(axis=0)

    if digits is not None:
        center = center.round(digits)

    return center

get_coords(mol, conf_id=-1)

Get the coordinate of a conformer of a molecule.

Parameters:

Name Type Description Default
mol Mol

a molecule.

required
conf_id int

a conformer id.

-1
Source code in datamol/conformers/_features.py
def get_coords(mol: Chem.rdchem.Mol, conf_id: int = -1):
    """Get the coordinate of a conformer of a molecule.

    Args:
        mol: a molecule.
        conf_id: a conformer id.
    """

    if mol.GetNumConformers() == 0:
        raise ValueError("Molecule does not have any conformers.")

    conf = mol.GetConformer(id=conf_id)
    return conf.GetPositions()

translate(mol, new_centroid, conf_id=-1)

Move a given conformer of a molecule to a new position. The transformation is performed in place.

Parameters:

Name Type Description Default
mol Mol

the molecule.

required
new_centroid Union[numpy.ndarray, List[int]]

the new position to move to of shape [x, y, z]

required
conf_id int

id of the conformer.

-1
Source code in datamol/conformers/_conformers.py
def translate(mol: Chem.rdchem.Mol, new_centroid: Union[np.ndarray, List[int]], conf_id: int = -1):
    """Move a given conformer of a molecule to a new position. The transformation is performed
    in place.

    Args:
        mol: the molecule.
        new_centroid: the new position to move to of shape [x, y, z]
        conf_id: id of the conformer.
    """

    # Get conformer
    conf = mol.GetConformer(conf_id)

    # Compute the vector for translation
    mol_center = rdMolTransforms.ComputeCentroid(conf)
    mol_center = np.array([mol_center.x, mol_center.y, mol_center.z])

    # Make the transformation matrix
    T = np.eye(4)
    T[:3, 3] = new_centroid - mol_center

    # Transform
    rdMolTransforms.TransformConformer(conf, T)