Skip to content

datamol.actions

_actions

add_bond_between(mol, a1, a2, bond_type, sanitize=True)

Add a new bond between atom

Source code in datamol/actions/_actions.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def add_bond_between(
    mol: Chem.rdchem.Mol,
    a1: Union[int, Chem.rdchem.Atom],
    a2: Union[int, Chem.rdchem.Atom],
    bond_type: Chem.rdchem.BondType,
    sanitize: bool = True,
):
    """Add a new bond between atom"""

    if isinstance(a1, Chem.rdchem.Atom):
        a1 = a1.GetIdx()

    if isinstance(a2, Chem.rdchem.Atom):
        a2 = a2.GetIdx()

    emol = Chem.EditableMol(dm.copy_mol(mol))
    emol.AddBond(a1, a2, bond_type)

    if sanitize:
        return dm.sanitize_mol(emol.GetMol())

    return emol.GetMol()

all_atom_add(mol, atom_types=['C', 'N', 'O', 'F', 'Cl', 'Br'], asMols=True, max_num_action=float('Inf'), **kwargs)

Add a new atom on the mol, by considering all bond type

.. warning:: This is computationally expensive

Parameters:

Name Type Description Default
mol

Input molecule

required
atom_types

list List of atom symbol to use as replacement (Default: ["C", "N", "O", "F", "Cl", "Br"])

['C', 'N', 'O', 'F', 'Cl', 'Br']
asMols

bool, optional Whether to return output as molecule or smiles

True
max_num_action

float, optional Maximum number of action to reduce complexity

float('Inf')

Returns:

Type Description

All possible molecules with one additional atom added

Source code in datamol/actions/_actions.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
def all_atom_add(
    mol,
    atom_types=["C", "N", "O", "F", "Cl", "Br"],
    asMols=True,
    max_num_action=float("Inf"),
    **kwargs,
):
    """Add a new atom on the mol, by considering all bond type

    .. warning::
        This is computationally expensive

    Args:
        mol: <Chem.Mol>
            Input molecule
        atom_types: list
            List of atom symbol to use as replacement
            (Default: ["C", "N", "O", "F", "Cl", "Br"])
        asMols: bool, optional
            Whether to return output as molecule or smiles
        max_num_action: float, optional
            Maximum number of action to reduce complexity
    Returns:
        All possible molecules with one additional atom added

    """
    new_mols = []
    stop = False
    with dm.without_rdkit_log():
        for atom in mol.GetAtoms():
            if stop:
                break
            if atom.GetImplicitValence() == 0:
                continue
            for atom_symb in atom_types:
                emol = Chem.RWMol(mol)
                new_index = emol.AddAtom(Chem.Atom(atom_symb))
                emol.UpdatePropertyCache(strict=False)
                new_mols.extend(all_atom_join(emol, atom, emol.GetMol().GetAtomWithIdx(new_index)))
                if len(new_mols) > max_num_action:
                    stop = True
                    break

        new_mols = [dm.sanitize_mol(mol) for mol in new_mols]
        new_mols = [mol for mol in new_mols if mol is not None]
        if not asMols:
            return [dm.to_smiles(x) for x in new_mols if x]
    return new_mols

all_atom_join(mol, a1, a2)

Join two atoms (a1, a2) in a molecule in all possible valid manner.

Source code in datamol/actions/_actions.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def all_atom_join(
    mol: Chem.rdchem.Mol,
    a1: Union[int, Chem.rdchem.Atom],
    a2: Union[int, Chem.rdchem.Atom],
):
    """Join two atoms (a1, a2) in a molecule in all possible valid manner."""

    if isinstance(a1, int):
        a1 = mol.GetAtomWithIdx(a1)

    if isinstance(a2, int):
        a2 = mol.GetAtomWithIdx(a2)

    new_mols = []
    with dm.without_rdkit_log():
        try:
            Chem.Kekulize(mol, clearAromaticFlags=True)
        except:
            pass

        v1, v2 = a1.GetImplicitValence(), a2.GetImplicitValence()
        bond = mol.GetBondBetweenAtoms(a1.GetIdx(), a2.GetIdx())

        if bond is None:
            if v1 > 0 and v2 > 0:
                new_mols.append(add_bond_between(mol, a1, a2, dm.SINGLE_BOND))
            if v1 > 1 and v2 > 1:
                new_mols.append(add_bond_between(mol, a1, a2, dm.DOUBLE_BOND))
            if v1 > 2 and v2 > 2:
                new_mols.append(add_bond_between(mol, a1, a2, dm.TRIPLE_BOND))

        elif bond.GetBondType() == dm.SINGLE_BOND:
            if v1 > 0 and v2 > 0:
                new_mols.append(update_bond(mol, bond, dm.DOUBLE_BOND))
            if v1 > 1 and v2 > 1:
                new_mols.append(update_bond(mol, bond, dm.TRIPLE_BOND))

        elif bond.GetBondType() == dm.DOUBLE_BOND:
            if v1 > 0 and v2 > 0:
                new_mols.append(update_bond(mol, bond, dm.TRIPLE_BOND))

    return [mol for mol in new_mols if mol is not None]

all_atom_replace(mol, atom_types=None, asMols=True, max_num_action=float('Inf'), **kwargs)

Replace all non-hydrogen atoms by other possibilities.

.. warning:: This is computationally expensive

Parameters:

Name Type Description Default
mol

Input molecule

required
atom_types

list List of atom symbol to use as replacement (Default: ['C', 'N', 'S', 'O'])

None
asMols

bool, optional Whether to return output as molecule or smiles

True
max_num_action

float, optional Maximum number of action to reduce complexity

float('Inf')

Returns:

Type Description

All possible molecules with atoms replaced

Source code in datamol/actions/_actions.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
def all_atom_replace(
    mol,
    atom_types=None,
    asMols=True,
    max_num_action=float("Inf"),
    **kwargs,
):
    """Replace all non-hydrogen atoms by other possibilities.

    .. warning::
        This is computationally expensive

    Args:
        mol: <Chem.Mol>
            Input molecule
        atom_types: list
            List of atom symbol to use as replacement
            (Default: ['C', 'N', 'S', 'O'])
        asMols: bool, optional
            Whether to return output as molecule or smiles
        max_num_action: float, optional
            Maximum number of action to reduce complexity

    Returns:
        All possible molecules with atoms replaced

    """
    if atom_types is None:
        atom_types = ["C", "N", "S", "O"]
    new_mols = []
    stop = False
    with dm.without_rdkit_log():
        for atom in mol.GetAtoms():
            if stop:
                break
            if atom.GetAtomicNum() > 1:
                for atom_symb in atom_types:
                    emol = Chem.RWMol(mol)
                    emol.ReplaceAtom(atom.GetIdx(), Chem.Atom(atom_symb))
                    new_mols.append(emol)
                    if len(new_mols) > max_num_action:
                        stop = True
                        break

        # Sanitize and remove bad molecules
        new_mols = [dm.sanitize_mol(mol) for mol in new_mols]
        new_mols = [mol for mol in new_mols if mol is not None]

    if not asMols:  # Return SMILES
        return [dm.to_smiles(x) for x in new_mols]
    return new_mols

all_bond_add(mol, allowed_ring_sizes=None, bond_between_rings=True, asMols=True, max_num_action=float('Inf'), **kwargs)

Add bond to a molecule

.. warning:: This is computationally expensive

Parameters:

Name Type Description Default
mol

Input molecule

required
allowed_ring_sizes

list, optional Set of integer allowed ring sizes; used to remove some actions that would create rings with disallowed sizes.

None
bond_between_rings

bool, optional Whether to allow actions that add bonds between atoms that are both in rings.

True
asMols

bool, optional Whether to return output as molecule or smiles

True
max_num_action

float, optional Maximum number of action to reduce complexity

float('Inf')

Returns:

Type Description

All possible molecules with additional bond added between atoms

Source code in datamol/actions/_actions.py
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
def all_bond_add(
    mol,
    allowed_ring_sizes=None,
    bond_between_rings=True,
    asMols=True,
    max_num_action=float("Inf"),
    **kwargs,
):
    """Add bond to a molecule

    .. warning::
        This is computationally expensive

    Args:
        mol: <Chem.Mol>
            Input molecule
        allowed_ring_sizes: list, optional
            Set of integer allowed ring sizes; used to remove some
            actions that would create rings with disallowed sizes.
        bond_between_rings: bool, optional
            Whether to allow actions that add bonds
            between atoms that are both in rings.
        asMols: bool, optional
            Whether to return output as molecule or smiles
        max_num_action: float, optional
            Maximum number of action to reduce complexity

    Returns:
        All possible molecules with additional bond added between atoms
    """
    new_mols = []
    num_atoms = mol.GetNumAtoms()
    stop = False
    for i1 in range(num_atoms):
        if stop:
            break
        a1 = mol.GetAtomWithIdx(i1)
        if a1.GetImplicitValence() == 0:
            continue
        for i2 in range(i1 + 1, num_atoms):
            a2 = mol.GetAtomWithIdx(i2)
            # Chem.rdmolops.GetShortestPath(mol, i1, i2)
            all_paths = get_all_path_between(mol, i1, i2, ignore_cycle_basis=True)
            all_path_len = {len(path) for path in all_paths}
            if a2.GetImplicitValence() == 0:
                continue
            # no bond between atoms already in rings
            bond = mol.GetBondBetweenAtoms(i1, i2)
            if not bond_between_rings and a1.IsInRing() and a2.IsInRing():
                continue
            # no bond to form large rings
            if (
                (bond is None)
                and (allowed_ring_sizes is not None)
                and not all_path_len.issubset(allowed_ring_sizes)
            ):
                continue
            new_mols.extend(all_atom_join(mol, a1, a2))
            if len(new_mols) > max_num_action:
                stop = True
                break
    if not asMols:
        return list({dm.to_smiles(x) for x in new_mols if x})
    return [m for m in new_mols if m is not None]

all_bond_remove(mol, as_mol=True, allow_bond_decrease=True, allow_atom_trim=True, max_num_action=float('Inf'))

Remove bonds from a molecule

Warning

This can be computationally expensive.

Parameters:

Name Type Description Default
mol Chem.rdchem.Mol

Input molecule

required
allow_bond_decrease bool

Allow decreasing bond type in addition to bond cut

True
max_num_action

Maximum number of action to reduce complexity

float('Inf')
allow_atom_trim bool

Allow bond removal even when it results in dm.SINGLE_BOND

True

Returns:

Type Description

All possible molecules from removing bonds

Source code in datamol/actions/_actions.py
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
def all_bond_remove(
    mol: Chem.rdchem.Mol,
    as_mol: bool = True,
    allow_bond_decrease: bool = True,
    allow_atom_trim: bool = True,
    max_num_action=float("Inf"),
):
    """Remove bonds from a molecule

    Warning:
        This can be computationally expensive.

    Args:
        mol: Input molecule
        allow_bond_decrease: Allow decreasing bond type in addition to bond cut
        max_num_action: Maximum number of action to reduce complexity
        allow_atom_trim: Allow bond removal even when it results in dm.SINGLE_BOND

    Returns:
        All possible molecules from removing bonds

    """
    new_mols = []

    try:
        Chem.Kekulize(mol, clearAromaticFlags=True)
    except:
        pass

    for bond in mol.GetBonds():
        if len(new_mols) > max_num_action:
            break

        original_bond_type = bond.GetBondType()
        emol = Chem.RWMol(mol)
        emol.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        new_mol = dm.sanitize_mol(emol.GetMol())

        if not new_mol:
            continue

        frag_list = list(rdmolops.GetMolFrags(new_mol, asMols=True))
        has_single_atom = any([x.GetNumAtoms() < 2 for x in frag_list])
        if not has_single_atom or allow_atom_trim:
            new_mols.extend(frag_list)
        if allow_bond_decrease:
            if original_bond_type in [dm.DOUBLE_BOND, dm.TRIPLE_BOND]:
                new_mol = update_bond(mol, bond, dm.SINGLE_BOND)
                if new_mol is not None:
                    new_mols.extend(list(rdmolops.GetMolFrags(new_mol, asMols=True)))
            if original_bond_type == dm.TRIPLE_BOND:
                new_mol = update_bond(mol, bond, dm.DOUBLE_BOND)
                if new_mol is not None:
                    new_mols.extend(list(rdmolops.GetMolFrags(new_mol, asMols=True)))

    new_mols = [mol for mol in new_mols if mol is not None]

    if not as_mol:
        return [dm.to_smiles(x) for x in new_mols if x]

    return new_mols

all_fragment_assemble(fragmentlist, max_num_action=float('Inf'), asMols=True, seen=None)

Assemble a set of fragment into a new molecule

.. warning:: This is computationally expensive

Parameters:

Name Type Description Default
fragmentlist

list List of blocks to use for replacement, or addition to molparent

required
max_num_action

float, optional Maximum number of action to reduce complexity. No limit by default

float('Inf')
asMols

bool, optional Whether to return smiles or mols

True
seen

list, optional List of initial molecules

None

Returns:

Type Description

reconstructed molecules

Source code in datamol/actions/_actions.py
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
def all_fragment_assemble(
    fragmentlist,
    max_num_action=float("Inf"),
    asMols=True,
    seen=None,
):
    """Assemble a set of fragment into a new molecule

    .. warning::
        This is computationally expensive

    Args:
        fragmentlist: list
            List of blocks to use for replacement, or addition to molparent
        max_num_action: float, optional
            Maximum number of action to reduce complexity. No limit by default
        asMols: bool, optional
            Whether to return smiles or mols
        seen: list, optional
            List of initial molecules

    Returns:
        reconstructed molecules

    """
    mols = []
    for m in dm.fragment.assemble_fragment_order(
        fragmentlist,
        seen=seen,
        allow_incomplete=False,
        max_n_mols=max_num_action,
    ):
        if len(mols) > max_num_action:
            break
        mols.append(m)

    if not asMols:
        mols = [dm.to_smiles(x) for x in mols if x is not None]
    return mols

all_fragment_attach(mol, fragmentlist, bond_between_rings=True, max_num_action=10, asMols=True)

List all possible way to attach a list of fragment to a dm.SINGLE_BOND molecule.

.. warning:: This is computationally expensive

Parameters:

Name Type Description Default
mol Chem.rdchem.Mol

Input molecule

required
fragmentlist List[Chem.rdchem.Mol]

Molecular fragments to attach.

required
bond_between_rings bool

Whether to allow bond between two rings atoms

True
max_num_action int

Maximum fragment attachment to allow. Reduce time complexity

10
asMols bool

Whether to return output as molecule or smiles

True

Returns:

Type Description

All possible molecules resulting from attaching the molecular fragment to the root molecule

Source code in datamol/actions/_actions.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
def all_fragment_attach(
    mol: Chem.rdchem.Mol,
    fragmentlist: List[Chem.rdchem.Mol],
    bond_between_rings: bool = True,
    max_num_action: int = 10,
    asMols: bool = True,
):
    """List all possible way to attach a list of fragment to a dm.SINGLE_BOND molecule.

    .. warning::
        This is computationally expensive

    Args:
        mol: Input molecule
        fragmentlist: Molecular fragments to attach.
        bond_between_rings: Whether to allow bond between two rings atoms
        max_num_action: Maximum fragment attachment to allow. Reduce time complexity
        asMols: Whether to return output as molecule or smiles

    Returns:
        All possible molecules resulting from attaching the molecular fragment to the root molecule

    """
    fragment_set = set([])
    mol_atom_count = mol.GetNumAtoms()
    generators = [None] * len(fragmentlist)
    empty_generators = np.zeros(len(generators))
    while len(fragment_set) < max_num_action and not np.all(empty_generators):
        for i, fragment in enumerate(fragmentlist):
            if len(fragment_set) >= max_num_action:
                break
            if generators[i] is None:
                generators[i] = compute_fragment_join(
                    mol, fragment, mol_atom_count, bond_between_rings, asMols
                )
            if not empty_generators[i]:
                try:
                    fragment_set.add(next(generators[i]))
                except StopIteration as e:
                    empty_generators[i] = 1
                    continue
    return fragment_set

all_fragment_on_bond(mol, asMols=False, max_num_action=float('Inf'), break_aromatic=True)

Fragment all possible bond in a molecule and return the set of resulting fragments This is similar to random_bond_cut, but is not stochastic as it does not return a random fragment but all the fragments resulting from all potential bond break in the molecule.

.. note:: This will always be a subset of all_bond_remove, the main difference being that all_bond_remove, allow decreasing bond count, while this one will always break a molecule into two.

Parameters:

Name Type Description Default
mol

input molecule

required
asMols

bool, optional Whether to return results as mols or smiles

False
max_num_action

float, optional Maximum number of action to reduce complexity

float('Inf')
break_aromatic

bool, optional Whether to attempt to break even aromatic bonds (Default: True)

True

Returns:

Type Description

set of fragments

Source code in datamol/actions/_actions.py
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
def all_fragment_on_bond(mol, asMols=False, max_num_action=float("Inf"), break_aromatic=True):
    """Fragment all possible bond in a molecule and return the set of resulting fragments
    This is similar to `random_bond_cut`, but is not stochastic as it does not return a random fragment
    but all the fragments resulting from all potential bond break in the molecule.

    .. note::
        This will always be a subset of all_bond_remove, the main difference being that all_bond_remove, allow decreasing
        bond count, while this one will always break a molecule into two.

    Args:
        mol: <Chem.Mol>
            input molecule
        asMols: bool, optional
            Whether to return results as mols or smiles
        max_num_action: float, optional
            Maximum number of action to reduce complexity
        break_aromatic: bool, optional
            Whether to attempt to break even aromatic bonds
            (Default: True)

    Returns:
        set of fragments

    """
    mol.GetRingInfo().AtomRings()
    fragment_set = set([])
    bonds = list(mol.GetBonds())
    stop = False
    if bonds:
        if break_aromatic:
            Chem.Kekulize(mol, clearAromaticFlags=True)
        for bond in bonds:
            if stop:
                break
            if break_aromatic or not bond.GetIsAromatic():
                truncate = Chem.FragmentOnBonds(mol, [bond.GetIdx()], addDummies=False)
                truncate = dm.sanitize_mol(truncate)
                if truncate is not None:
                    for frag in rdmolops.GetMolFrags(truncate, asMols=True):
                        frag = dm.sanitize_mol(frag)
                        if frag:
                            if not asMols:
                                frag = dm.to_smiles(frag)
                            fragment_set.add(frag)
                        if len(fragment_set) > max_num_action:
                            stop = True
                            break
    return fragment_set

all_fragment_update(molparent, fragmentlist, bond_between_rings=True, max_num_action=float('Inf'), asMols=False)

Break molecule a molecules into all set of fragment (including the molecule itself). Then enumerate all possible combination with blocks from the fragmentlist. This corresponds to exploring all valid actions by adding/replacing fragments in a molecules.

.. warning:: This is computationally expensive

.. note:: You should perform a valency check after

Parameters:

Name Type Description Default
molparent

input molecule

required
fragmentlist

list List of blocks to use for replacement, or addition to molparent

required
bond_between_rings

bool, optional Whether to allow bond between rings (Default: True)

True
max_num_action

float, optional Maximum number of action to reduce complexity

float('Inf')
asMols

bool, optional Whether to return smiles or mols

False

Returns:

Type Description

set of modified mols

Source code in datamol/actions/_actions.py
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
def all_fragment_update(
    molparent,
    fragmentlist,
    bond_between_rings=True,
    max_num_action=float("Inf"),
    asMols=False,
):
    """
    Break molecule a molecules into all set of fragment (including the molecule itself).
    Then enumerate all possible combination with blocks from the fragmentlist.
    This corresponds to exploring all valid actions by adding/replacing fragments in a molecules.

    .. warning::
        This is computationally expensive

    .. note::
        You should perform a valency check after

    Args:
        molparent: <Chem.Mol>
            input molecule
        fragmentlist: list
            List of blocks to use for replacement, or addition to molparent
        bond_between_rings: bool, optional
            Whether to allow bond between rings
            (Default: True)
        max_num_action: float, optional
            Maximum number of action to reduce complexity
        asMols: bool, optional
            Whether to return smiles or mols

    Returns:
        set of modified mols
    """
    fragment_set = set([])
    mol_frags = dm.fragment.anybreak(molparent, remove_parent=False)
    for mol in mol_frags:
        mol_update = all_fragment_attach(
            mol, fragmentlist, bond_between_rings, max_num_action, asMols
        )
        fragment_set.update(mol_update)
        if len(fragment_set) > max_num_action:
            break
    return list(fragment_set)

all_join_on_attach_point(mol1, mol2)

Join two molecules on all possible attaching point

Parameters:

Name Type Description Default
mol1 Chem.rdchem.Mol

Input molecule 1.

required
mol2 Chem.rdchem.Mol

Input molecule 2.

required

Returns:

Type Description

iterator of all possible way to attach both molecules from dummy indicators.

Source code in datamol/actions/_actions.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def all_join_on_attach_point(mol1: Chem.rdchem.Mol, mol2: Chem.rdchem.Mol):
    """Join two molecules on all possible attaching point

    Args:
        mol1: Input molecule 1.
        mol2: Input molecule 2.

    Returns:
        iterator of all possible way to attach both molecules from dummy indicators.
    """
    atom_map_min = 100
    mol_idxs = []
    count = 0
    mod_mols = []

    for ind, m in enumerate([mol1, mol2]):
        atms = [(a.GetIdx(), a) for a in m.GetAtoms() if not a.IsInRing() and a.GetAtomicNum() == 0]
        atms.sort(reverse=True, key=operator.itemgetter(0))
        for a_idx, a in atms:
            for a_nei in a.GetNeighbors():
                a_nei.SetAtomMapNum(atom_map_min + count)
                count += 1
        mod_mol = dm.fix_mol(m)
        mod_mols.append(mod_mol)
        mol_idxs.append(
            [a.GetIdx() for a in mod_mol.GetAtoms() if a.GetAtomMapNum() >= atom_map_min]
        )

    for ind1, ind2 in itertools.product(*mol_idxs):
        yield random_fragment_add(copy.copy(mod_mols[0]), copy.copy(mod_mols[1]), ind1, ind2)

all_mmpa_assemble(molist, max_num_action=float('Inf'), asMols=True, **kwargs)

Enumerate all mmpa assembly of molecules in molist

Parameters:

Name Type Description Default
molist

list of List of molecules to fragmente and reconstruct

required
asMols

bool, optional Whether to return smiles or mols

True
max_num_action

int, optional Maximum number of assembly (Default: inf)

float('Inf')

Returns:

Name Type Description
res

list of Molecules obtained by merging core and side_chains

Source code in datamol/actions/_actions.py
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
def all_mmpa_assemble(molist, max_num_action=float("Inf"), asMols=True, **kwargs):
    """Enumerate all mmpa assembly of molecules in molist

    Args:
        molist: list of <Chem.Mol>
            List of molecules to fragmente and reconstruct
        asMols: bool, optional
            Whether to return smiles or mols
        max_num_action: int, optional
            Maximum number of assembly
            (Default: inf)

    Returns:
        res: list of <Chem.Mol>
            Molecules obtained by merging core and side_chains
    """
    frags = set([])
    cores = []
    side_chains = []
    for mol in molist:
        mol_frag = dm.fragment.mmpa_frag(mol, max_bond_cut=30)
        if not mol_frag:
            continue
        _, mol_frag = map(list, zip(*mol_frag))
        for m in mol_frag:
            core, sidechain = m.split(".")
            cores.append(Chem.MolFromSmiles(core.replace("[*:1]", "[1*]")))
            side_chains.append(Chem.MolFromSmiles(sidechain.replace("[*:1]", "[1*]")))
    new_mols = _compute_mmpa_assembly(cores, side_chains, max_num_action=max_num_action)
    if not asMols:
        new_mols = [dm.to_smiles(x) for x in new_mols if x]
    return new_mols

all_transform_apply(mol, rxns, max_num_action=float('Inf'), asMols=True)

Apply a transformation defined as a reaction from a set of reaction to the input molecule.

The reaction need to be one reactant-only

Parameters:

Name Type Description Default
mol

Input molecule

required
rnxs

list list of reactions/ reaction smarts

required
max_num_action

int, optional Maximum number of result to return (Default: inf)

float('Inf')
asMols

bool, optional Whether to return smiles or mols

True

Returns:

Type Description

Products obtained from applying the chemical reactions

Source code in datamol/actions/_actions.py
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
def all_transform_apply(
    mol,
    rxns,
    max_num_action=float("Inf"),
    asMols=True,
):
    """
    Apply a transformation defined as a reaction from a set of reaction to the input molecule.

    The reaction need to be one reactant-only

    Args:
        mol: <Chem.Mol>
            Input molecule
        rnxs: list
            list of reactions/ reaction smarts
        max_num_action: int, optional
            Maximum number of result to return
            (Default: inf)
        asMols: bool, optional
            Whether to return smiles or mols

    Returns:
        Products obtained from applying the chemical reactions
    """

    mols = set([])
    with dm.without_rdkit_log():
        for rxn in rxns:
            if len(mols) >= max_num_action:
                break
            if isinstance(rxn, str):
                rxn = AllChem.ReactionFromSmarts(rxn)
            try:
                pcdts = [products[0] for products in rxn.RunReactants([mol])]
                pcdts = [dm.sanitize_mol(x) for x in pcdts]
                mols.update([dm.to_smiles(x) for x in pcdts if x])
            except:
                pass
    mols = [x for x in mols if x is not None]
    if np.isfinite(max_num_action):
        mols = mols[:max_num_action]

    mols = [dm.to_mol(x) for x in mols]
    if not asMols:
        mols = [dm.to_smiles(x) for x in mols if x is not None]
    return mols

compute_fragment_join(mol, fragment, mol_atom_count, bond_between_rings=True, asMols=True)

List all posibilities of where a fragment can be attached to a mol.

Source code in datamol/actions/_actions.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
def compute_fragment_join(
    mol: Chem.rdchem.Mol,
    fragment: Chem.rdchem.Mol,
    mol_atom_count: int,
    bond_between_rings: bool = True,
    asMols: bool = True,
):
    """List all posibilities of where a fragment can be attached to a mol."""

    fragment = copy.copy(
        fragment
    )  # need to copy the fragment copy is faster than all the other methods

    with dm.without_rdkit_log():

        combined = Chem.CombineMols(mol, fragment)
        for i1 in range(mol.GetNumAtoms()):
            a1 = combined.GetAtomWithIdx(i1)

            if a1.GetImplicitValence() == 0:
                continue

            for i2 in range(fragment.GetNumAtoms()):
                i2 += mol_atom_count
                a2 = combined.GetAtomWithIdx(i2)
                if a2.GetImplicitValence() == 0:
                    continue

                # no bond between atoms already in rings
                if not bond_between_rings and a1.IsInRing() and a2.IsInRing():
                    continue

                # no bond to form large rings
                else:
                    possibilities = all_atom_join(combined, a1, a2)
                    for x in possibilities:
                        x = dm.sanitize_mol(x)
                        if x is not None:
                            if not asMols:
                                x = dm.to_smiles(x)
                            yield x

mmpa_fragment_exchange(mol1, mol2, return_all=False, **kwargs)

Perform a fragment exchange between two molecules using mmpa rules

Parameters:

Name Type Description Default
mol1

input molecule 1

required
mol2

input molecule 1

required
return_all

bool, optional Whether to return list of all molecules

False

Returns:

Type Description

modified_mol1, modified_mol2 Molecules obtained by exchanging fragment between mol1 and mol2. In case of failure, mol1, mol2 are returned

Source code in datamol/actions/_actions.py
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
def mmpa_fragment_exchange(mol1, mol2, return_all=False, **kwargs):
    """Perform a fragment exchange between two molecules using mmpa rules

    Args:
        mol1: <Chem.Mol>
            input molecule 1
        mol2: <Chem.Mol>
            input molecule 1
        return_all: bool, optional
            Whether to return list of all molecules

    Returns:
        modified_mol1, modified_mol2
            Molecules obtained by exchanging fragment between mol1 and mol2.
            In case of failure, mol1, mol2 are returned

    """

    unwanted = [dm.to_smiles(m) for m in [mol1, mol2]] + [None]
    res = all_mmpa_assemble([mol1, mol2])
    # find unique
    res = set([dm.to_smiles(m) for m in res])
    res = list(res - set(unwanted))
    out = []
    for sm in res:
        r = None
        try:
            r = dm.to_mol(sm, sanitize=True)
        except:
            continue
        if r is not None:
            out.append(r)

    if return_all:
        return out
    random.shuffle(out)
    out.extend([mol1, mol2])
    return out[0], out[1]

pick_atom_idx(mol, prepick=None)

pick an atom from the molecule

Source code in datamol/actions/_actions.py
18
19
20
21
22
23
24
25
26
27
28
def pick_atom_idx(mol: Chem.rdchem.Mol, prepick: Optional[int] = None):
    """pick an atom from the molecule"""

    mol.UpdatePropertyCache()
    if not (prepick is not None and prepick >= 0 and prepick < mol.GetNumAtoms()):
        pickable_atoms = [a.GetIdx() for a in mol.GetAtoms() if a.GetImplicitValence() > 0]
        if pickable_atoms:
            prepick = random.choice(pickable_atoms)
        else:
            prepick = None
    return prepick

remove_bond_between(mol, a1, a2, sanitize=True)

Remove a bond between atoms.

Source code in datamol/actions/_actions.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def remove_bond_between(
    mol: Chem.rdchem.Mol,
    a1: Union[int, Chem.rdchem.Atom],
    a2: Union[int, Chem.rdchem.Atom],
    sanitize: bool = True,
):
    """Remove a bond between atoms."""

    if isinstance(a1, Chem.rdchem.Atom):
        a1 = a1.GetIdx()

    if isinstance(a2, Chem.rdchem.Atom):
        a2 = a2.GetIdx()

    emol = Chem.EditableMol(dm.copy_mol(mol))
    emol.RemoveBond(a1, a2)

    if sanitize:
        return dm.sanitize_mol(emol.GetMol())

    return emol.GetMol()

update_bond(mol, bond, bond_type, sanitize=True)

Update bond type between atoms

Source code in datamol/actions/_actions.py
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def update_bond(
    mol: Chem.rdchem.Mol,
    bond: Union[int, Chem.rdchem.Bond],
    bond_type: Chem.rdchem.BondType,
    sanitize: bool = True,
):
    """Update bond type between atoms"""
    new_mol = dm.copy_mol(mol)

    if isinstance(bond, Chem.rdchem.Bond):
        bond = bond.GetIdx()

    with dm.without_rdkit_log():
        new_bond = new_mol.GetBondWithIdx(bond)
        new_bond.SetBondType(bond_type)

    if sanitize:
        return dm.sanitize_mol(new_mol)

    return new_mol