`datamol.conformers`¶

`align_conformers(mols, ref_id=0, copy=True, conformer_id=-1, backend='crippenO3A')` ¶

Align a list of molecules to a reference molecule.

Note that using the O3A backend, hydrogens will be added at the beginning of the procedure and removed at the end of the procedure.

Parameters:

Name	Type	Description	Default
`mols`	`List[Mol]`	List of molecules to align. All the molecules must have a conformer.	required
`ref_id`	`int`	Index of the reference molecule. By default, the first molecule in the list will be used as reference.	`0`
`copy`	`bool`	Whether to copy the molecules before performing the alignement.	`True`
`conformer_id`	`int`	Conformer id to use.	`-1`
`backend`	`str`	Backend to use to compute the alignment from `crippenO3A`, `O3A`.	`'crippenO3A'`

Returns:

Name	Type	Description
`mols`	`list`	The aligned molecules.
`scores`	`list`	The score of the alignement.

Source code in datamol/conformers/_conformers.py

def align_conformers(
    mols: List[Mol],
    ref_id: int = 0,
    copy: bool = True,
    conformer_id: int = -1,
    backend: str = "crippenO3A",
) -> Tuple[list, list]:
    """Align a list of molecules to a reference molecule.

    Note that using the `O3A` backend, hydrogens will be added at the beginning of the procedure
    and removed at the end of the procedure.

    Args:
        mols: List of molecules to align. All the molecules must have a conformer.
        ref_id: Index of the reference molecule. By default, the first molecule in the list
            will be used as reference.
        copy: Whether to copy the molecules before performing the alignement.
        conformer_id: Conformer id to use.
        backend: Backend to use to compute the alignment from `crippenO3A`, `O3A`.

    Returns:
        mols: The aligned molecules.
        scores: The score of the alignement.
    """

    allowed_backends = ["crippenO3A", "O3A"]
    if backend not in allowed_backends:
        raise ValueError(
            f"The backend '{backend}' is not supported. Choose from: {allowed_backends}"
        )

    # Check all input molecules has a conformer
    if not all([mol.GetNumConformers() >= 1 for mol in mols]):
        raise ValueError("One or more input molecules is missing a conformer.")

    # Make a copy of the molecules since they are going to be modified
    if copy:
        mols = [dm_mol.copy_mol(mol) for mol in mols]

    # Split ref and probe mols
    mol_ref = mols[ref_id]
    mol_probes = mols

    if backend == "crippenO3A":
        # Compute Crippen contributions for every atoms and molecules
        crippen_contribs = [rdMolDescriptors._CalcCrippenContribs(mol) for mol in mol_probes]
        crippen_contrib_ref = crippen_contribs[ref_id]
        crippen_contrib_probes = crippen_contribs

        # Loop and align
        # NOTE(hadim): we could eventually parallelize this if that's needed.

        scores = []
        for i, mol in enumerate(mol_probes):
            crippenO3A = rdMolAlign.GetCrippenO3A(
                prbMol=mol,
                refMol=mol_ref,
                prbCrippenContribs=crippen_contrib_probes[i],
                refCrippenContribs=crippen_contrib_ref,
                prbCid=conformer_id,
                refCid=conformer_id,
                maxIters=50,
            )
            crippenO3A.Align()

            scores.append(crippenO3A.Score())

    elif backend == "O3A":
        # Add hydrogens first
        mol_probes = [dm_mol.add_hs(mol, add_coords=True) for mol in mol_probes]
        mol_ref = dm_mol.add_hs(mol_ref, add_coords=True)

        # Compute MMFF params for every molecules
        mmff_params = [rdForceFieldHelpers.MMFFGetMoleculeProperties(mol) for mol in mol_probes]

        # Split reference and probe molecules
        mmff_params_ref = mmff_params[ref_id]
        mmff_params_probes = mmff_params

        # Loop and align
        # NOTE(hadim): we could eventually parallelize this if that's needed.

        scores = []
        for i, mol in enumerate(mol_probes):
            pyO3A = rdMolAlign.GetO3A(
                prbMol=mol,
                refMol=mol_ref,
                prbPyMMFFMolProperties=mmff_params_probes[i],
                refPyMMFFMolProperties=mmff_params_ref,
                prbCid=conformer_id,
                refCid=conformer_id,
                maxIters=50,
            )
            pyO3A.Align()

            scores.append(pyO3A.Score())

        # Remove the hydrogens
        mol_probes = [dm_mol.remove_hs(mol) for mol in mol_probes]

    else:
        raise ValueError(f"Backend {backend} not supported.")

    scores = np.array(scores)

    return mol_probes, scores

`cluster(mol, rms_cutoff=1, already_aligned=False, centroids=True)` ¶

Cluster the conformers of a molecule according to an RMS threshold in Angstrom.

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	a molecule	required
`rms_cutoff`	`float`	The RMS cutoff in Angstrom.	`1`
`already_aligned`	`bool`	Whether or not the conformers are aligned. If False, they will be aligmned furing the RMS computation.	`False`
`centroids`	`bool`	If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster. Defaults to True.	`True`

Source code in datamol/conformers/_conformers.py

def cluster(
    mol: Mol,
    rms_cutoff: float = 1,
    already_aligned: bool = False,
    centroids: bool = True,
):
    """Cluster the conformers of a molecule according to an RMS threshold in Angstrom.

    Args:
        mol: a molecule
        rms_cutoff: The RMS cutoff in Angstrom.
        already_aligned: Whether or not the conformers are aligned. If False,
            they will be aligmned furing the RMS computation.
        centroids: If True, return one molecule with centroid conformers
            only. If False return a list of molecules per cluster with all
            the conformers of the cluster. Defaults to True.
    """

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Compute RMS
    dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=already_aligned)

    # Cluster
    conf_clusters = Butina.ClusterData(
        dmat,
        nPts=mol.GetNumConformers(),
        distThresh=rms_cutoff,
        isDistData=True,
        reordering=False,
    )

    return return_centroids(mol, conf_clusters, centroids=centroids)

`generate(mol, n_confs=None, use_random_coords=True, enforce_chirality=True, num_threads=1, rms_cutoff=None, clear_existing=True, align_conformers=True, minimize_energy=False, sort_by_energy=True, method=None, forcefield='UFF', ewindow=np.inf, eratio=np.inf, energy_iterations=200, warning_not_converged=0, random_seed=19, add_hs=True, ignore_failure=False, embed_params=None, verbose=False)` ¶

Compute conformers of a molecule.

Example:

import datamol as dm
smiles = "O=C(C)Oc1ccccc1C(=O)O"
mol = dm.to_mol(smiles)
mol = dm.conformers.generate(mol)

# Get all conformers as a list
conformers = mol.GetConformers()

# Get the 3D atom positions of the first conformer
positions = mol.GetConformer(0).GetPositions()

# If minimization has been enabled (default to True)
# you can access the computed energy.
conf = mol.GetConformer(1)
props = conf.GetPropsAsDict()
print(props)
# {'rdkit_UFF_energy': 35.64074017773132,'rdkit_UFF_delta_energy': 0.24682258222552633}

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	a molecule	required
`n_confs`	`Optional[int]`	Number of conformers to generate. Depends on the number of rotatable bonds by default: 50 for <8, 200 for <12 and 300 for >12.	`None`
`use_random_coords`	`bool`	Start the embedding from random coordinates instead of using eigenvalues of the distance matrix.	`True`
`enforce_chirality`	`bool`	Enforce correct chirilaty if chiral centers are present.	`True`
`num_threads`	`int`	Number of threads to use when embedding multiple conformations.	`1`
`rms_cutoff`	`Optional[float]`	The minimum RMS value in Angstrom at which two conformers are considered redundant and one is deleted. If None, all conformers are kept. This step is done after an eventual minimization step.	`None`
`clear_existing`	`bool`	Whether to overwrite existing conformers for the molecule.	`True`
`align_conformers`	`bool`	Whether to align the conformers.	`True`
`minimize_energy`	`bool`	Whether to minimize conformer's energies using MMFF94s. Disable to generate conformers much faster.	`False`
`sort_by_energy`	`bool`	Sort conformers by energy when minimizing is turned to False.	`True`
`method`	`Optional[str]`	RDKit method to use for embedding. Choose among ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]. If None, "ETKDGv3" is used.	`None`
`forcefield`	`str`	molecular forcefield to use, one of ['UFF','MMFF94s','MMFF94s_noEstat']	`'UFF'`
`ewindow`	`float`	maximum energy above minimum energy conformer to output	`np.inf`
`eratio`	`float`	max delta-energy divided by rotatable bonds for conformers	`np.inf`
`energy_iterations`	`int`	Maximum number of iterations during the energy minimization procedure. It corresponds to the `maxIters` argument in RDKit.	`200`
`warning_not_converged`	`int`	Wether to log a warning when the number of not converged conformers during the minimization is higher than `warning_not_converged`. Only works when `verbose` is set to True. Disable with 0. Defaults to 10.	`0`
`random_seed`	`int`	Set to None or -1 to disable.	`19`
`add_hs`	`bool`	Whether to add hydrogens to the mol before embedding. If set to True, the hydrogens are removed in the returned molecule. Warning: explicit hydrogens won't be conserved. It is strongly recommended to let the default value to True. The RDKit documentation says: "To get good 3D conformations, it's almost always a good idea to add hydrogens to the molecule first."	`True`
`ignore_failure`	`bool`	It set to True, this will avoid raising an error when the embedding fails and return None instead.	`False`
`embed_params`	`Optional[dict]`	Allows the user to specify arbitrary embedding parameters for the conformers. This will override any other default settings. See https://www.rdkit.org/docs/source/rdkit.Chem.rdDistGeom.html#rdkit.Chem.rdDistGeom.EmbedParameters for more details.	`None`
`verbose`	`bool`	Wether to enable logs during the process.	`False`

Returns:

Name	Type	Description
`mol`	`Mol`	the molecule with the conformers.

Source code in datamol/conformers/_conformers.py

def generate(
    mol: Mol,
    n_confs: Optional[int] = None,
    use_random_coords: bool = True,
    enforce_chirality: bool = True,
    num_threads: int = 1,
    rms_cutoff: Optional[float] = None,
    clear_existing: bool = True,
    align_conformers: bool = True,
    minimize_energy: bool = False,
    sort_by_energy: bool = True,
    method: Optional[str] = None,
    forcefield: str = "UFF",
    ewindow: float = np.inf,
    eratio: float = np.inf,
    energy_iterations: int = 200,
    warning_not_converged: int = 0,
    random_seed: int = 19,
    add_hs: bool = True,
    ignore_failure: bool = False,
    embed_params: Optional[dict] = None,
    verbose: bool = False,
) -> Mol:
    """Compute conformers of a molecule.

    Example:

    ```python
    import datamol as dm
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Get all conformers as a list
    conformers = mol.GetConformers()

    # Get the 3D atom positions of the first conformer
    positions = mol.GetConformer(0).GetPositions()

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(1)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_UFF_energy': 35.64074017773132,'rdkit_UFF_delta_energy': 0.24682258222552633}
    ```

    Args:
        mol: a molecule
        n_confs: Number of conformers to generate. Depends on the number of rotatable bonds
            by default: 50 for <8, 200 for <12 and 300 for >12.
        use_random_coords: Start the embedding from random coordinates instead of using eigenvalues
            of the distance matrix.
        enforce_chirality: Enforce correct chirilaty if chiral centers are present.
        num_threads: Number of threads to use when embedding multiple conformations.
        rms_cutoff: The minimum RMS value in Angstrom at which two conformers
            are considered redundant and one is deleted. If None, all conformers
            are kept. This step is done after an eventual minimization step.
        clear_existing: Whether to overwrite existing conformers for the molecule.
        align_conformers: Whether to align the conformers.
        minimize_energy: Whether to minimize conformer's energies using MMFF94s.
            Disable to generate conformers much faster.
        sort_by_energy: Sort conformers by energy when minimizing is turned to False.
        method: RDKit method to use for embedding. Choose among
            ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]. If None, "ETKDGv3" is used.
        forcefield: molecular forcefield to use, one of ['UFF','MMFF94s','MMFF94s_noEstat']
        ewindow: maximum energy above minimum energy conformer to output
        eratio: max delta-energy divided by rotatable bonds for conformers
        energy_iterations: Maximum number of iterations during the energy minimization procedure.
            It corresponds to the `maxIters` argument in RDKit.
        warning_not_converged: Wether to log a warning when the number of not converged conformers
            during the minimization is higher than `warning_not_converged`. Only works when `verbose` is set to True.
            Disable with 0. Defaults to 10.
        random_seed: Set to None or -1 to disable.
        add_hs: Whether to add hydrogens to the mol before embedding. If set to True, the hydrogens
            are removed in the returned molecule. Warning: explicit hydrogens won't be conserved. It is strongly
            recommended to let the default value to True. The RDKit documentation says: "To get good 3D conformations,
            it's almost always a good idea to add hydrogens to the molecule first."
        ignore_failure: It set to True, this will avoid raising an error when the embedding fails and return None instead.
        embed_params: Allows the user to specify arbitrary embedding parameters for the conformers. This will override any
            other default settings. See https://www.rdkit.org/docs/source/rdkit.Chem.rdDistGeom.html#rdkit.Chem.rdDistGeom.EmbedParameters
            for more details.
        verbose: Wether to enable logs during the process.

    Returns:
        mol: the molecule with the conformers.
    """

    AVAILABLE_METHODS = ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]

    if method is None:
        method = "ETKDGv3"

    if method not in AVAILABLE_METHODS:
        raise ValueError(f"The method {method} is not supported. Use from {AVAILABLE_METHODS}")

    # Random seed
    if random_seed is None:
        random_seed = -1

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Remove existing conformers
    if clear_existing:
        mol.RemoveAllConformers()

    # Add hydrogens
    if add_hs:
        mol = dm_mol.add_hs(mol)

    rotatable_bonds = descriptors.n_rotatable_bonds(mol)
    if not n_confs:
        # Set the number of conformers depends on
        # the number of rotatable bonds.
        if rotatable_bonds < 8:
            n_confs = 50
        elif rotatable_bonds < 12:
            n_confs = 200
        else:
            n_confs = 300

    # Setup the parameters for the embedding
    params = getattr(rdDistGeom, method)()
    params.randomSeed = random_seed
    params.enforceChirality = enforce_chirality
    params.useRandomCoords = use_random_coords
    params.numThreads = num_threads

    if embed_params is not None:
        for k, v in embed_params.items():
            setattr(params, k, v)

    # Embed conformers
    confs = rdDistGeom.EmbedMultipleConfs(mol, numConfs=n_confs, params=params)

    if len(confs) == 0:
        if ignore_failure:
            if verbose:
                logger.warning(
                    f"Conformers embedding failed for {convert.to_smiles(mol)}. Returning None because ignore_failure is set."
                )
            return None
        raise ValueError(f"Conformers embedding failed for {convert.to_smiles(mol)}")

    energies = None

    # Minimize energy
    if minimize_energy:
        # Minimize conformer's energy using MMFF
        ff = _get_ff(mol, forcefield)
        results = rdForceFieldHelpers.OptimizeMoleculeConfs(
            mol, ff, maxIters=energy_iterations, numThreads=num_threads
        )
        energies = [energy for _, energy in results]

        # Some conformers might not have converged during minimization.
        not_converged = sum([not_converged for not_converged, _ in results if not_converged])
        if warning_not_converged != 0 and not_converged > warning_not_converged and verbose:
            logger.warning(
                f"{not_converged}/{len(results)} conformers have not converged for {convert.to_smiles(mol)}"
            )

    elif sort_by_energy:
        energies = []
        for conf in mol.GetConformers():
            ff = _get_ff(mol, forcefield, conf_id=conf.GetId())
            energies.append(ff.CalcEnergy())
        energies = np.array(energies)

    if energies is not None:
        minE = np.min(energies)
        # Add the energy as a property to each conformers
        [
            (
                conf.SetDoubleProp(f"rdkit_{forcefield}_energy", energy),
                conf.SetDoubleProp(f"rdkit_{forcefield}_delta_energy", energy - minE),
            )
            for energy, conf in zip(energies, mol.GetConformers())
        ]

        # Now we reorder conformers according to their energies,
        # so the lowest energies conformers are first.  eliminate conformers that exceed ewindow, eratio
        mol_clone = copy.deepcopy(mol)
        ordered_conformers = [
            conf
            for E, conf in sorted(zip(energies, mol_clone.GetConformers()), key=lambda x: x[0])
            if E - minE <= ewindow and (E - minE) / rotatable_bonds <= eratio
        ]
        mol.RemoveAllConformers()
        [mol.AddConformer(conf, assignId=True) for conf in ordered_conformers]

    # Align conformers to each others
    if align_conformers:
        rdMolAlign.AlignMolConformers(mol)

    if rms_cutoff is not None:
        mol = cluster(
            mol,
            rms_cutoff=rms_cutoff,
            already_aligned=align_conformers,
            centroids=True,
        )  # type: ignore

    if add_hs:
        mol = dm_mol.remove_hs(mol)

    return mol

`return_centroids(mol, conf_clusters, centroids=True)` ¶

Given a list of cluster indices, return one single molecule with only the centroid of the clusters of a list of molecules per cluster.

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	a molecule.	required
`conf_clusters`	`Sequence[Sequence[int]]`	list of cluster indices.	required
`centroids`	`bool`	If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster.	`True`

Source code in datamol/conformers/_conformers.py

def return_centroids(
    mol: Mol,
    conf_clusters: Sequence[Sequence[int]],
    centroids: bool = True,
) -> Union[List[Mol], Mol]:
    """Given a list of cluster indices, return one single molecule
    with only the centroid of the clusters of a list of molecules per cluster.

    Args:
        mol: a molecule.
        conf_clusters: list of cluster indices.
        centroids: If True, return one molecule with centroid conformers
            only. If False return a list of molecules per cluster with all
            the conformers of the cluster.
    """

    if centroids:
        # Collect centroid of each cluster (first element of the list)
        centroid_list = [indices[0] for indices in conf_clusters]

        # Keep only centroid conformers
        mol_clone = copy.deepcopy(mol)
        confs = [mol_clone.GetConformers()[i] for i in centroid_list]
        mol.RemoveAllConformers()
        [mol.AddConformer(conf, assignId=True) for conf in confs]
        return mol

    else:
        # Create a new molecule for each cluster and add conformers to it.
        mols = []
        for cluster in conf_clusters:
            m = copy.deepcopy(mol)
            m.RemoveAllConformers()
            [m.AddConformer(mol.GetConformer(c), assignId=True) for c in cluster]
            mols.append(m)
        return mols

`rmsd(mol)` ¶

Compute the RMSD between all the conformers of a molecule.

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	a molecule	required

Source code in datamol/conformers/_conformers.py

def rmsd(mol: Mol) -> np.ndarray:
    """Compute the RMSD between all the conformers of a molecule.

    Args:
        mol: a molecule
    """

    if mol.GetNumConformers() <= 1:
        raise ValueError(
            "The molecule has 0 or 1 conformer. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    n_confs = mol.GetNumConformers()
    rmsds = []
    for i in range(n_confs):
        for j in range(n_confs):
            rmsd = rdMolAlign.AlignMol(prbMol=mol, refMol=mol, prbCid=i, refCid=j)
            rmsds.append(rmsd)
    return np.array(rmsds).reshape(n_confs, n_confs)

`translate(mol, new_centroid, conf_id=-1)` ¶

Move a given conformer of a molecule to a new position. The transformation is performed in place.

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	the molecule.	required
`new_centroid`	`Union[np.ndarray, List[int]]`	the new position to move to of shape [x, y, z]	required
`conf_id`	`int`	id of the conformer.	`-1`

Source code in datamol/conformers/_conformers.py

def translate(mol: Mol, new_centroid: Union[np.ndarray, List[int]], conf_id: int = -1):
    """Move a given conformer of a molecule to a new position. The transformation is performed
    in place.

    Args:
        mol: the molecule.
        new_centroid: the new position to move to of shape [x, y, z]
        conf_id: id of the conformer.
    """

    # Get conformer
    conf = mol.GetConformer(conf_id)

    # Compute the vector for translation
    mol_center = rdMolTransforms.ComputeCentroid(conf)
    mol_center = np.array([mol_center.x, mol_center.y, mol_center.z])

    # Make the transformation matrix
    T = np.eye(4)
    T[:3, 3] = new_centroid - mol_center

    # Transform
    rdMolTransforms.TransformConformer(conf, T)

`center_of_mass(mol, use_atoms=True, digits=None, conf_id=-1)` ¶

Compute the center of mass of a conformer of a molecule.

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	a molecule	required
`use_atoms`	`bool`	Whether to compute the true center of mass or the geometrical center.	`True`
`digits`	`Optional[int]`	Number of digits to round to.	`None`
`conf_id`	`int`	the conformer id.	`-1`

Returns cm: Center of mass or geometrical center

Source code in datamol/conformers/_features.py

def center_of_mass(
    mol: Mol,
    use_atoms: bool = True,
    digits: Optional[int] = None,
    conf_id: int = -1,
) -> np.ndarray:
    """Compute the center of mass of a conformer of a molecule.

    Args:
        mol: a molecule
        use_atoms: Whether to compute the true center of mass or the geometrical center.
        digits: Number of digits to round to.
        conf_id: the conformer id.

    Returns
        cm: Center of mass or geometrical center
    """
    coords = get_coords(mol, conf_id=conf_id)
    atom_weight = np.ones((coords.shape[0]))

    if use_atoms:
        atom_weight = np.array([atom.GetMass() for atom in mol.GetAtoms()])

    atom_weight = atom_weight[:, None]
    atom_weight /= atom_weight.sum()
    center = (coords * atom_weight).sum(axis=0)

    if digits is not None:
        center = center.round(digits)

    return center

`get_coords(mol, conf_id=-1)` ¶

Get the coordinate of a conformer of a molecule.

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	a molecule.	required
`conf_id`	`int`	a conformer id.	`-1`

Source code in datamol/conformers/_features.py

def get_coords(mol: Mol, conf_id: int = -1):
    """Get the coordinate of a conformer of a molecule.

    Args:
        mol: a molecule.
        conf_id: a conformer id.
    """

    if mol.GetNumConformers() == 0:
        raise ValueError("Molecule does not have any conformers.")

    conf = mol.GetConformer(id=conf_id)
    return conf.GetPositions()

`keep_conformers(mol, indices_to_keep=-1, assign_id=True, copy=True)` ¶

Keep on the specified conformer(s) in indices_to_keep.

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	A molecule.	required
`indices_to_keep`	`Union[int, List[int]]`	A indice or a least of indices of conformers to keep.	`-1`
`assign_id`	`bool`	Whether to assign the kept conformers an id or keep the original one.	`True`
`copy`	`bool`	Whether to copy the molecule or not.	`True`

Source code in datamol/conformers/_features.py

def keep_conformers(
    mol: Mol,
    indices_to_keep: Union[int, List[int]] = -1,
    assign_id: bool = True,
    copy: bool = True,
):
    """Keep on the specified conformer(s) in `indices_to_keep`.

    Args:
        mol: A molecule.
        indices_to_keep: A indice or a least of indices of conformers to keep.
        assign_id: Whether to assign the kept conformers an id or keep the original one.
        copy: Whether to copy the molecule or not.
    """

    if copy:
        mol = copy_mol(mol)

    if not isinstance(indices_to_keep, list):
        indices_to_keep = [indices_to_keep]

    # Extract conformers to keep
    confs_to_keep = [mol.GetConformer(conf_id) for conf_id in indices_to_keep]

    # Copy current mol and remove all conformers
    mol2 = copy_mol(mol)
    mol2.RemoveAllConformers()

    # Add conformers
    _ = [mol2.AddConformer(conf, assignId=assign_id) for conf in confs_to_keep]

    # Cleanup
    mol = mol2

    return mol

`sasa(mol, conf_id=None, n_jobs=1)` ¶

Compute Solvent Accessible Surface Area of all the conformers using FreeSASA (https://freesasa.github.io/). Values are returned as an array and also stored within each conformer as a property called rdkit_free_sasa.

Example:

smiles = "O=C(C)Oc1ccccc1C(=O)O"
mol = dm.to_mol(smiles)
mol = dm.conformers.generate(mol)

# Compute SASA for all the conformers without parallelization
sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1)

# If minimization has been enabled (default to True)
# you can access the computed energy.
conf = mol.GetConformer(0)
props = conf.GetPropsAsDict()
print(props)
# {'rdkit_uff_energy': 1.7649408317784008}

Parameters:

Name	Type	Description	Default
`mol`	`Mol`	a molecule	required
`conf_id`	`Optional[Union[int, List[int]]]`	Id of the conformers to compute. If None, compute all.	`None`
`n_jobs`	`int`	Number of jobs for parallelization. Set to 1 to disable and -1 to use all cores.	`1`

Returns:

Name	Type	Description
`mol`	`np.ndarray`	the molecule with the conformers.

Source code in datamol/conformers/_features.py

@decorators.disable_on_os("win")
def sasa(
    mol: Mol,
    conf_id: Optional[Union[int, List[int]]] = None,
    n_jobs: int = 1,
) -> np.ndarray:
    """Compute Solvent Accessible Surface Area of all the conformers
    using FreeSASA (https://freesasa.github.io/). Values are returned
    as an array and also stored within each conformer as a property
    called `rdkit_free_sasa`.

    Example:

    ```python
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Compute SASA for all the conformers without parallelization
    sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1)

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(0)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_uff_energy': 1.7649408317784008}
    ```

    Args:
        mol: a molecule
        conf_id: Id of the conformers to compute. If None, compute all.
        n_jobs: Number of jobs for parallelization. Set to 1 to disable
            and -1 to use all cores.

    Returns:
        mol: the molecule with the conformers.
    """
    from rdkit.Chem import rdFreeSASA

    if mol.GetNumConformers() == 0:
        raise ValueError(
            "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    # Get Van der Waals radii (angstrom)
    radii = [PERIODIC_TABLE.GetRvdw(atom.GetAtomicNum()) for atom in mol.GetAtoms()]

    # Which conformers to compute
    conf_ids = []
    if conf_id is None:
        # If None compute for all the conformers
        conf_ids = list(range(mol.GetNumConformers()))  # type: ignore
    elif isinstance(conf_id, int):
        conf_ids = [conf_id]
    else:
        conf_ids = conf_id

    # Compute solvent accessible surface area
    def _get_sasa(i):
        conf = mol.GetConformer(i)
        sasa = rdFreeSASA.CalcSASA(mol, radii, confIdx=conf.GetId())
        conf.SetDoubleProp("rdkit_free_sasa", sasa)
        return sasa

    runner = JobRunner(n_jobs=n_jobs)
    sasa_values = runner(_get_sasa, conf_ids)
    return np.array(sasa_values)

datamol.conformers¶

align_conformers(mols, ref_id=0, copy=True, conformer_id=-1, backend='crippenO3A') ¶

cluster(mol, rms_cutoff=1, already_aligned=False, centroids=True) ¶

return_centroids(mol, conf_clusters, centroids=True) ¶

rmsd(mol) ¶

translate(mol, new_centroid, conf_id=-1) ¶

center_of_mass(mol, use_atoms=True, digits=None, conf_id=-1) ¶

get_coords(mol, conf_id=-1) ¶

keep_conformers(mol, indices_to_keep=-1, assign_id=True, copy=True) ¶

sasa(mol, conf_id=None, n_jobs=1) ¶

`datamol.conformers`¶

`align_conformers(mols, ref_id=0, copy=True, conformer_id=-1, backend='crippenO3A')` ¶

`cluster(mol, rms_cutoff=1, already_aligned=False, centroids=True)` ¶

`return_centroids(mol, conf_clusters, centroids=True)` ¶

`rmsd(mol)` ¶

`translate(mol, new_centroid, conf_id=-1)` ¶

`center_of_mass(mol, use_atoms=True, digits=None, conf_id=-1)` ¶

`get_coords(mol, conf_id=-1)` ¶

`keep_conformers(mol, indices_to_keep=-1, assign_id=True, copy=True)` ¶

`sasa(mol, conf_id=None, n_jobs=1)` ¶