Skip to content

datamol.conformers

align_conformers(mols, ref_id=0, copy=True, conformer_id=-1, backend='crippenO3A')

Align a list of molecules to a reference molecule.

Note that using the O3A backend, hydrogens will be added at the beginning of the procedure and removed at the end of the procedure.

Parameters:

Name Type Description Default
mols List[Mol]

List of molecules to align. All the molecules must have a conformer.

required
ref_id int

Index of the reference molecule. By default, the first molecule in the list will be used as reference.

0
copy bool

Whether to copy the molecules before performing the alignement.

True
conformer_id int

Conformer id to use.

-1
backend str

Backend to use to compute the alignment from crippenO3A, O3A.

'crippenO3A'

Returns:

Name Type Description
mols list

The aligned molecules.

scores list

The score of the alignement.

Source code in datamol/conformers/_conformers.py
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
def align_conformers(
    mols: List[Mol],
    ref_id: int = 0,
    copy: bool = True,
    conformer_id: int = -1,
    backend: str = "crippenO3A",
) -> Tuple[list, list]:
    """Align a list of molecules to a reference molecule.

    Note that using the `O3A` backend, hydrogens will be added at the beginning of the procedure
    and removed at the end of the procedure.

    Args:
        mols: List of molecules to align. All the molecules must have a conformer.
        ref_id: Index of the reference molecule. By default, the first molecule in the list
            will be used as reference.
        copy: Whether to copy the molecules before performing the alignement.
        conformer_id: Conformer id to use.
        backend: Backend to use to compute the alignment from `crippenO3A`, `O3A`.

    Returns:
        mols: The aligned molecules.
        scores: The score of the alignement.
    """

    allowed_backends = ["crippenO3A", "O3A"]
    if backend not in allowed_backends:
        raise ValueError(
            f"The backend '{backend}' is not supported. Choose from: {allowed_backends}"
        )

    # Check all input molecules has a conformer
    if not all([mol.GetNumConformers() >= 1 for mol in mols]):
        raise ValueError("One or more input molecules is missing a conformer.")

    # Make a copy of the molecules since they are going to be modified
    if copy:
        mols = [dm_mol.copy_mol(mol) for mol in mols]

    # Split ref and probe mols
    mol_ref = mols[ref_id]
    mol_probes = mols

    if backend == "crippenO3A":
        # Compute Crippen contributions for every atoms and molecules
        crippen_contribs = [rdMolDescriptors._CalcCrippenContribs(mol) for mol in mol_probes]
        crippen_contrib_ref = crippen_contribs[ref_id]
        crippen_contrib_probes = crippen_contribs

        # Loop and align
        # NOTE(hadim): we could eventually parallelize this if that's needed.

        scores = []
        for i, mol in enumerate(mol_probes):
            crippenO3A = rdMolAlign.GetCrippenO3A(
                prbMol=mol,
                refMol=mol_ref,
                prbCrippenContribs=crippen_contrib_probes[i],
                refCrippenContribs=crippen_contrib_ref,
                prbCid=conformer_id,
                refCid=conformer_id,
                maxIters=50,
            )
            crippenO3A.Align()

            scores.append(crippenO3A.Score())

    elif backend == "O3A":
        # Add hydrogens first
        mol_probes = [dm_mol.add_hs(mol, add_coords=True) for mol in mol_probes]
        mol_ref = dm_mol.add_hs(mol_ref, add_coords=True)

        # Compute MMFF params for every molecules
        mmff_params = [rdForceFieldHelpers.MMFFGetMoleculeProperties(mol) for mol in mol_probes]

        # Split reference and probe molecules
        mmff_params_ref = mmff_params[ref_id]
        mmff_params_probes = mmff_params

        # Loop and align
        # NOTE(hadim): we could eventually parallelize this if that's needed.

        scores = []
        for i, mol in enumerate(mol_probes):
            pyO3A = rdMolAlign.GetO3A(
                prbMol=mol,
                refMol=mol_ref,
                prbPyMMFFMolProperties=mmff_params_probes[i],
                refPyMMFFMolProperties=mmff_params_ref,
                prbCid=conformer_id,
                refCid=conformer_id,
                maxIters=50,
            )
            pyO3A.Align()

            scores.append(pyO3A.Score())

        # Remove the hydrogens
        mol_probes = [dm_mol.remove_hs(mol) for mol in mol_probes]

    else:
        raise ValueError(f"Backend {backend} not supported.")

    scores = np.array(scores)

    return mol_probes, scores

cluster(mol, rms_cutoff=1, already_aligned=False, centroids=True)

Cluster the conformers of a molecule according to an RMS threshold in Angstrom.

Parameters:

Name Type Description Default
mol Mol

a molecule

required
rms_cutoff float

The RMS cutoff in Angstrom.

1
already_aligned bool

Whether or not the conformers are aligned. If False, they will be aligmned furing the RMS computation.

False
centroids bool

If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster. Defaults to True.

True
Source code in datamol/conformers/_conformers.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
def cluster(
    mol: Mol,
    rms_cutoff: float = 1,
    already_aligned: bool = False,
    centroids: bool = True,
):
    """Cluster the conformers of a molecule according to an RMS threshold in Angstrom.

    Args:
        mol: a molecule
        rms_cutoff: The RMS cutoff in Angstrom.
        already_aligned: Whether or not the conformers are aligned. If False,
            they will be aligmned furing the RMS computation.
        centroids: If True, return one molecule with centroid conformers
            only. If False return a list of molecules per cluster with all
            the conformers of the cluster. Defaults to True.
    """

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Compute RMS
    dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=already_aligned)

    # Cluster
    conf_clusters = Butina.ClusterData(
        dmat,
        nPts=mol.GetNumConformers(),
        distThresh=rms_cutoff,
        isDistData=True,
        reordering=False,
    )

    return return_centroids(mol, conf_clusters, centroids=centroids)

generate(mol, n_confs=None, use_random_coords=True, enforce_chirality=True, num_threads=1, rms_cutoff=None, clear_existing=True, align_conformers=True, minimize_energy=False, sort_by_energy=True, method=None, forcefield='UFF', ewindow=np.inf, eratio=np.inf, energy_iterations=200, warning_not_converged=0, random_seed=19, add_hs=True, ignore_failure=False, embed_params=None, verbose=False)

Compute conformers of a molecule.

Example:

import datamol as dm
smiles = "O=C(C)Oc1ccccc1C(=O)O"
mol = dm.to_mol(smiles)
mol = dm.conformers.generate(mol)

# Get all conformers as a list
conformers = mol.GetConformers()

# Get the 3D atom positions of the first conformer
positions = mol.GetConformer(0).GetPositions()

# If minimization has been enabled (default to True)
# you can access the computed energy.
conf = mol.GetConformer(1)
props = conf.GetPropsAsDict()
print(props)
# {'rdkit_UFF_energy': 35.64074017773132,'rdkit_UFF_delta_energy': 0.24682258222552633}

Parameters:

Name Type Description Default
mol Mol

a molecule

required
n_confs Optional[int]

Number of conformers to generate. Depends on the number of rotatable bonds by default: 50 for <8, 200 for <12 and 300 for >12.

None
use_random_coords bool

Start the embedding from random coordinates instead of using eigenvalues of the distance matrix.

True
enforce_chirality bool

Enforce correct chirilaty if chiral centers are present.

True
num_threads int

Number of threads to use when embedding multiple conformations.

1
rms_cutoff Optional[float]

The minimum RMS value in Angstrom at which two conformers are considered redundant and one is deleted. If None, all conformers are kept. This step is done after an eventual minimization step.

None
clear_existing bool

Whether to overwrite existing conformers for the molecule.

True
align_conformers bool

Whether to align the conformers.

True
minimize_energy bool

Whether to minimize conformer's energies using MMFF94s. Disable to generate conformers much faster.

False
sort_by_energy bool

Sort conformers by energy when minimizing is turned to False.

True
method Optional[str]

RDKit method to use for embedding. Choose among ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]. If None, "ETKDGv3" is used.

None
forcefield str

molecular forcefield to use, one of ['UFF','MMFF94s','MMFF94s_noEstat']

'UFF'
ewindow float

maximum energy above minimum energy conformer to output

inf
eratio float

max delta-energy divided by rotatable bonds for conformers. Ignored if the molecule has no rotatable bonds.

inf
energy_iterations int

Maximum number of iterations during the energy minimization procedure. It corresponds to the maxIters argument in RDKit.

200
warning_not_converged int

Wether to log a warning when the number of not converged conformers during the minimization is higher than warning_not_converged. Only works when verbose is set to True. Disable with 0. Defaults to 10.

0
random_seed int

Set to None or -1 to disable.

19
add_hs bool

Whether to add hydrogens to the mol before embedding. If set to True, the hydrogens are removed in the returned molecule. Warning: explicit hydrogens won't be conserved. It is strongly recommended to let the default value to True. The RDKit documentation says: "To get good 3D conformations, it's almost always a good idea to add hydrogens to the molecule first."

True
ignore_failure bool

It set to True, this will avoid raising an error when the embedding fails and return None instead.

False
embed_params Optional[dict]

Allows the user to specify arbitrary embedding parameters for the conformers. This will override any other default settings. See https://www.rdkit.org/docs/source/rdkit.Chem.rdDistGeom.html#rdkit.Chem.rdDistGeom.EmbedParameters for more details.

None
verbose bool

Wether to enable logs during the process.

False

Returns:

Name Type Description
mol Mol

the molecule with the conformers.

Source code in datamol/conformers/_conformers.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def generate(
    mol: Mol,
    n_confs: Optional[int] = None,
    use_random_coords: bool = True,
    enforce_chirality: bool = True,
    num_threads: int = 1,
    rms_cutoff: Optional[float] = None,
    clear_existing: bool = True,
    align_conformers: bool = True,
    minimize_energy: bool = False,
    sort_by_energy: bool = True,
    method: Optional[str] = None,
    forcefield: str = "UFF",
    ewindow: float = np.inf,
    eratio: float = np.inf,
    energy_iterations: int = 200,
    warning_not_converged: int = 0,
    random_seed: int = 19,
    add_hs: bool = True,
    ignore_failure: bool = False,
    embed_params: Optional[dict] = None,
    verbose: bool = False,
) -> Mol:
    """Compute conformers of a molecule.

    Example:

    ```python
    import datamol as dm
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Get all conformers as a list
    conformers = mol.GetConformers()

    # Get the 3D atom positions of the first conformer
    positions = mol.GetConformer(0).GetPositions()

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(1)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_UFF_energy': 35.64074017773132,'rdkit_UFF_delta_energy': 0.24682258222552633}
    ```

    Args:
        mol: a molecule
        n_confs: Number of conformers to generate. Depends on the number of rotatable bonds
            by default: 50 for <8, 200 for <12 and 300 for >12.
        use_random_coords: Start the embedding from random coordinates instead of using eigenvalues
            of the distance matrix.
        enforce_chirality: Enforce correct chirilaty if chiral centers are present.
        num_threads: Number of threads to use when embedding multiple conformations.
        rms_cutoff: The minimum RMS value in Angstrom at which two conformers
            are considered redundant and one is deleted. If None, all conformers
            are kept. This step is done after an eventual minimization step.
        clear_existing: Whether to overwrite existing conformers for the molecule.
        align_conformers: Whether to align the conformers.
        minimize_energy: Whether to minimize conformer's energies using MMFF94s.
            Disable to generate conformers much faster.
        sort_by_energy: Sort conformers by energy when minimizing is turned to False.
        method: RDKit method to use for embedding. Choose among
            ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]. If None, "ETKDGv3" is used.
        forcefield: molecular forcefield to use, one of ['UFF','MMFF94s','MMFF94s_noEstat']
        ewindow: maximum energy above minimum energy conformer to output
        eratio: max delta-energy divided by rotatable bonds for conformers.
            Ignored if the molecule has no rotatable bonds.
        energy_iterations: Maximum number of iterations during the energy minimization procedure.
            It corresponds to the `maxIters` argument in RDKit.
        warning_not_converged: Wether to log a warning when the number of not converged conformers
            during the minimization is higher than `warning_not_converged`. Only works when `verbose` is set to True.
            Disable with 0. Defaults to 10.
        random_seed: Set to None or -1 to disable.
        add_hs: Whether to add hydrogens to the mol before embedding. If set to True, the hydrogens
            are removed in the returned molecule. Warning: explicit hydrogens won't be conserved. It is strongly
            recommended to let the default value to True. The RDKit documentation says: "To get good 3D conformations,
            it's almost always a good idea to add hydrogens to the molecule first."
        ignore_failure: It set to True, this will avoid raising an error when the embedding fails and return None instead.
        embed_params: Allows the user to specify arbitrary embedding parameters for the conformers. This will override any
            other default settings. See https://www.rdkit.org/docs/source/rdkit.Chem.rdDistGeom.html#rdkit.Chem.rdDistGeom.EmbedParameters
            for more details.
        verbose: Wether to enable logs during the process.

    Returns:
        mol: the molecule with the conformers.
    """

    AVAILABLE_METHODS = ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]

    if method is None:
        method = "ETKDGv3"

    if method not in AVAILABLE_METHODS:
        raise ValueError(f"The method {method} is not supported. Use from {AVAILABLE_METHODS}")

    # Random seed
    if random_seed is None:
        random_seed = -1

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Remove existing conformers
    if clear_existing:
        mol.RemoveAllConformers()

    # Add hydrogens
    if add_hs:
        mol = dm_mol.add_hs(mol)

    rotatable_bonds = descriptors.n_rotatable_bonds(mol)
    if not n_confs:
        # Set the number of conformers depends on
        # the number of rotatable bonds.
        if rotatable_bonds < 8:
            n_confs = 50
        elif rotatable_bonds < 12:
            n_confs = 200
        else:
            n_confs = 300

    # Setup the parameters for the embedding
    params = getattr(rdDistGeom, method)()
    params.randomSeed = random_seed
    params.enforceChirality = enforce_chirality
    params.useRandomCoords = use_random_coords
    params.numThreads = num_threads

    if embed_params is not None:
        for k, v in embed_params.items():
            setattr(params, k, v)

    # Embed conformers
    confs = rdDistGeom.EmbedMultipleConfs(mol, numConfs=n_confs, params=params)

    if len(confs) == 0:
        if ignore_failure:
            if verbose:
                logger.warning(
                    f"Conformers embedding failed for {convert.to_smiles(mol)}. Returning None because ignore_failure is set."
                )
            return None
        raise ValueError(f"Conformers embedding failed for {convert.to_smiles(mol)}")

    energies = None

    # Minimize energy
    if minimize_energy:
        # Minimize conformer's energy using MMFF
        ff = _get_ff(mol, forcefield)
        results = rdForceFieldHelpers.OptimizeMoleculeConfs(
            mol, ff, maxIters=energy_iterations, numThreads=num_threads
        )
        energies = [energy for _, energy in results]

        # Some conformers might not have converged during minimization.
        not_converged = sum([not_converged for not_converged, _ in results if not_converged])
        if warning_not_converged != 0 and not_converged > warning_not_converged and verbose:
            logger.warning(
                f"{not_converged}/{len(results)} conformers have not converged for {convert.to_smiles(mol)}"
            )

    elif sort_by_energy:
        energies = []
        for conf in mol.GetConformers():
            ff = _get_ff(mol, forcefield, conf_id=conf.GetId())
            energies.append(ff.CalcEnergy())
        energies = np.array(energies)

    if energies is not None:
        minE = np.min(energies)
        # Add the energy as a property to each conformer
        [
            (
                conf.SetDoubleProp(f"rdkit_{forcefield}_energy", energy),
                conf.SetDoubleProp(f"rdkit_{forcefield}_delta_energy", energy - minE),
            )
            for energy, conf in zip(energies, mol.GetConformers())
        ]

        # Now we reorder conformers according to their energies,
        # so the lowest energies conformers are first.  eliminate conformers that exceed ewindow, eratio
        mol_clone = copy.deepcopy(mol)
        ordered_conformers = [
            conf
            for E, conf in sorted(zip(energies, mol_clone.GetConformers()), key=lambda x: x[0])
            if E - minE <= ewindow
            and (rotatable_bonds == 0 or (E - minE) / rotatable_bonds <= eratio)
        ]
        mol.RemoveAllConformers()
        [mol.AddConformer(conf, assignId=True) for conf in ordered_conformers]

    # Align conformers to each others
    if align_conformers:
        rdMolAlign.AlignMolConformers(mol)

    if rms_cutoff is not None:
        mol = cluster(
            mol,
            rms_cutoff=rms_cutoff,
            already_aligned=align_conformers,
            centroids=True,
        )  # type: ignore

    if add_hs:
        mol = dm_mol.remove_hs(mol)

    return mol

return_centroids(mol, conf_clusters, centroids=True)

Given a list of cluster indices, return one single molecule with only the centroid of the clusters of a list of molecules per cluster.

Parameters:

Name Type Description Default
mol Mol

a molecule.

required
conf_clusters Sequence[Sequence[int]]

list of cluster indices.

required
centroids bool

If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster.

True
Source code in datamol/conformers/_conformers.py
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def return_centroids(
    mol: Mol,
    conf_clusters: Sequence[Sequence[int]],
    centroids: bool = True,
) -> Union[List[Mol], Mol]:
    """Given a list of cluster indices, return one single molecule
    with only the centroid of the clusters of a list of molecules per cluster.

    Args:
        mol: a molecule.
        conf_clusters: list of cluster indices.
        centroids: If True, return one molecule with centroid conformers
            only. If False return a list of molecules per cluster with all
            the conformers of the cluster.
    """

    if centroids:
        # Collect centroid of each cluster (first element of the list)
        centroid_list = [indices[0] for indices in conf_clusters]

        # Keep only centroid conformers
        mol_clone = copy.deepcopy(mol)
        confs = [mol_clone.GetConformers()[i] for i in centroid_list]
        mol.RemoveAllConformers()
        [mol.AddConformer(conf, assignId=True) for conf in confs]
        return mol

    else:
        # Create a new molecule for each cluster and add conformers to it.
        mols = []
        for cluster in conf_clusters:
            m = copy.deepcopy(mol)
            m.RemoveAllConformers()
            [m.AddConformer(mol.GetConformer(c), assignId=True) for c in cluster]
            mols.append(m)
        return mols

rmsd(mol)

Compute the RMSD between all the conformers of a molecule.

Parameters:

Name Type Description Default
mol Mol

a molecule

required
Source code in datamol/conformers/_conformers.py
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
def rmsd(mol: Mol) -> np.ndarray:
    """Compute the RMSD between all the conformers of a molecule.

    Args:
        mol: a molecule
    """

    if mol.GetNumConformers() <= 1:
        raise ValueError(
            "The molecule has 0 or 1 conformer. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    n_confs = mol.GetNumConformers()
    rmsds = []
    for i in range(n_confs):
        for j in range(n_confs):
            rmsd = rdMolAlign.AlignMol(prbMol=mol, refMol=mol, prbCid=i, refCid=j)
            rmsds.append(rmsd)
    return np.array(rmsds).reshape(n_confs, n_confs)

translate(mol, new_centroid, conf_id=-1)

Move a given conformer of a molecule to a new position. The transformation is performed in place.

Parameters:

Name Type Description Default
mol Mol

the molecule.

required
new_centroid Union[ndarray, List[int]]

the new position to move to of shape [x, y, z]

required
conf_id int

id of the conformer.

-1
Source code in datamol/conformers/_conformers.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
def translate(mol: Mol, new_centroid: Union[np.ndarray, List[int]], conf_id: int = -1):
    """Move a given conformer of a molecule to a new position. The transformation is performed
    in place.

    Args:
        mol: the molecule.
        new_centroid: the new position to move to of shape [x, y, z]
        conf_id: id of the conformer.
    """

    # Get conformer
    conf = mol.GetConformer(conf_id)

    # Compute the vector for translation
    mol_center = rdMolTransforms.ComputeCentroid(conf)
    mol_center = np.array([mol_center.x, mol_center.y, mol_center.z])

    # Make the transformation matrix
    T = np.eye(4)
    T[:3, 3] = new_centroid - mol_center

    # Transform
    rdMolTransforms.TransformConformer(conf, T)

center_of_mass(mol, use_atoms=True, digits=None, conf_id=-1)

Compute the center of mass of a conformer of a molecule.

Parameters:

Name Type Description Default
mol Mol

a molecule

required
use_atoms bool

Whether to compute the true center of mass or the geometrical center.

True
digits Optional[int]

Number of digits to round to.

None
conf_id int

the conformer id.

-1

Returns cm: Center of mass or geometrical center

Source code in datamol/conformers/_features.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def center_of_mass(
    mol: Mol,
    use_atoms: bool = True,
    digits: Optional[int] = None,
    conf_id: int = -1,
) -> np.ndarray:
    """Compute the center of mass of a conformer of a molecule.

    Args:
        mol: a molecule
        use_atoms: Whether to compute the true center of mass or the geometrical center.
        digits: Number of digits to round to.
        conf_id: the conformer id.

    Returns
        cm: Center of mass or geometrical center
    """
    coords = get_coords(mol, conf_id=conf_id)
    atom_weight = np.ones((coords.shape[0]))

    if use_atoms:
        atom_weight = np.array([atom.GetMass() for atom in mol.GetAtoms()])

    atom_weight = atom_weight[:, None]
    atom_weight /= atom_weight.sum()
    center = (coords * atom_weight).sum(axis=0)

    if digits is not None:
        center = center.round(digits)

    return center

get_coords(mol, conf_id=-1)

Get the coordinate of a conformer of a molecule.

Parameters:

Name Type Description Default
mol Mol

a molecule.

required
conf_id int

a conformer id.

-1
Source code in datamol/conformers/_features.py
84
85
86
87
88
89
90
91
92
93
94
95
96
def get_coords(mol: Mol, conf_id: int = -1):
    """Get the coordinate of a conformer of a molecule.

    Args:
        mol: a molecule.
        conf_id: a conformer id.
    """

    if mol.GetNumConformers() == 0:
        raise ValueError("Molecule does not have any conformers.")

    conf = mol.GetConformer(id=conf_id)
    return conf.GetPositions()

keep_conformers(mol, indices_to_keep=-1, assign_id=True, copy=True)

Keep on the specified conformer(s) in indices_to_keep.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required
indices_to_keep Union[int, List[int]]

A indice or a least of indices of conformers to keep.

-1
assign_id bool

Whether to assign the kept conformers an id or keep the original one.

True
copy bool

Whether to copy the molecule or not.

True
Source code in datamol/conformers/_features.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def keep_conformers(
    mol: Mol,
    indices_to_keep: Union[int, List[int]] = -1,
    assign_id: bool = True,
    copy: bool = True,
):
    """Keep on the specified conformer(s) in `indices_to_keep`.

    Args:
        mol: A molecule.
        indices_to_keep: A indice or a least of indices of conformers to keep.
        assign_id: Whether to assign the kept conformers an id or keep the original one.
        copy: Whether to copy the molecule or not.
    """

    if copy:
        mol = copy_mol(mol)

    if not isinstance(indices_to_keep, list):
        indices_to_keep = [indices_to_keep]

    # Extract conformers to keep
    confs_to_keep = [mol.GetConformer(conf_id) for conf_id in indices_to_keep]

    # Copy current mol and remove all conformers
    mol2 = copy_mol(mol)
    mol2.RemoveAllConformers()

    # Add conformers
    _ = [mol2.AddConformer(conf, assignId=assign_id) for conf in confs_to_keep]

    # Cleanup
    mol = mol2

    return mol

sasa(mol, conf_id=None, n_jobs=1)

Compute Solvent Accessible Surface Area of all the conformers using FreeSASA (https://freesasa.github.io/). Values are returned as an array and also stored within each conformer as a property called rdkit_free_sasa.

Example:

smiles = "O=C(C)Oc1ccccc1C(=O)O"
mol = dm.to_mol(smiles)
mol = dm.conformers.generate(mol)

# Compute SASA for all the conformers without parallelization
sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1)

# If minimization has been enabled (default to True)
# you can access the computed energy.
conf = mol.GetConformer(0)
props = conf.GetPropsAsDict()
print(props)
# {'rdkit_uff_energy': 1.7649408317784008}

Parameters:

Name Type Description Default
mol Mol

a molecule

required
conf_id Optional[Union[int, List[int]]]

Id of the conformers to compute. If None, compute all.

None
n_jobs int

Number of jobs for parallelization. Set to 1 to disable and -1 to use all cores.

1

Returns:

Name Type Description
mol ndarray

the molecule with the conformers.

Source code in datamol/conformers/_features.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
@decorators.disable_on_os("win")
def sasa(
    mol: Mol,
    conf_id: Optional[Union[int, List[int]]] = None,
    n_jobs: int = 1,
) -> np.ndarray:
    """Compute Solvent Accessible Surface Area of all the conformers
    using FreeSASA (https://freesasa.github.io/). Values are returned
    as an array and also stored within each conformer as a property
    called `rdkit_free_sasa`.

    Example:

    ```python
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Compute SASA for all the conformers without parallelization
    sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1)

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(0)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_uff_energy': 1.7649408317784008}
    ```

    Args:
        mol: a molecule
        conf_id: Id of the conformers to compute. If None, compute all.
        n_jobs: Number of jobs for parallelization. Set to 1 to disable
            and -1 to use all cores.

    Returns:
        mol: the molecule with the conformers.
    """
    from rdkit.Chem import rdFreeSASA

    if mol.GetNumConformers() == 0:
        raise ValueError(
            "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    # Get Van der Waals radii (angstrom)
    radii = [PERIODIC_TABLE.GetRvdw(atom.GetAtomicNum()) for atom in mol.GetAtoms()]

    # Which conformers to compute
    conf_ids = []
    if conf_id is None:
        # If None compute for all the conformers
        conf_ids = list(range(mol.GetNumConformers()))  # type: ignore
    elif isinstance(conf_id, int):
        conf_ids = [conf_id]
    else:
        conf_ids = conf_id

    # Compute solvent accessible surface area
    def _get_sasa(i):
        conf = mol.GetConformer(i)
        sasa = rdFreeSASA.CalcSASA(mol, radii, confIdx=conf.GetId())
        conf.SetDoubleProp("rdkit_free_sasa", sasa)
        return sasa

    runner = JobRunner(n_jobs=n_jobs)
    sasa_values = runner(_get_sasa, conf_ids)
    return np.array(sasa_values)