Skip to content

datamol.reactions

apply_reaction(rxn, reactants, product_index=None, single_product_group=False, as_smiles=False, rm_attach=False, disable_logs=True, sanitize=True)

Apply a chemical reaction on a molecule

Parameters:

Name Type Description Default
rxn ChemicalReaction

Reaction object.

required
reactants tuple

A tuple of reactants.

required
product_index Optional[Union[int, list]]

The index of the product of interest.

None
single_product_group bool

Whether return one product group from all possible product groups.

False
as_smiles bool

Whether return products in SMILES.

False
rm_attach bool

Whether remove the attachment point from products.

False
disable_logs bool

Whether disable rdkit logs.

True
sanitize bool

Whether sanitize the products.

True

Returns:

Type Description
Union[list, str, Mol]

Reaction products.

Source code in datamol/reactions/_reactions.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
def apply_reaction(
    rxn: dm.ChemicalReaction,
    reactants: tuple,
    product_index: Optional[Union[int, list]] = None,
    single_product_group: bool = False,
    as_smiles: bool = False,
    rm_attach: bool = False,
    disable_logs: bool = True,
    sanitize: bool = True,
) -> Union[list, str, dm.Mol]:
    """
    Apply a chemical reaction on a molecule

    Args:
       rxn: Reaction object.
       reactants: A tuple of reactants.
       product_index: The index of the product of interest.
       single_product_group: Whether return one product group from all possible product groups.
       as_smiles: Whether return products in SMILES.
       rm_attach: Whether remove the attachment point from products.
       disable_logs: Whether disable rdkit logs.
       sanitize: Whether sanitize the products.

    Returns:
       Reaction products.
    """
    with dm.without_rdkit_log(enable=disable_logs):
        if not rxn.IsInitialized():
            rxn.Initialize()  # pragma: no cover

        product = rxn.RunReactants(reactants)
        outputs = select_reaction_output(
            product=product,
            product_index=product_index,
            single_product_group=single_product_group,
            as_smiles=as_smiles,
            rm_attach=rm_attach,
            sanitize=sanitize,
        )

    return outputs

can_react(rxn, mol)

Check if a molecule is a reactant to a chemical reaction.

Parameters:

Name Type Description Default
rxn ChemicalReaction

Reaction to check.

required
mol Mol

Molecule to check if it is a reactant.

required

Returns:

Type Description
bool

True if mol is a reactant of rxn.

Source code in datamol/reactions/_reactions.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def can_react(rxn: dm.ChemicalReaction, mol: dm.Mol) -> bool:
    """Check if a molecule is a reactant to a chemical reaction.

    Args:
        rxn: Reaction to check.
        mol: Molecule to check if it is a reactant.

    Returns:
        True if `mol` is a reactant of rxn.
    """
    if not rxn.IsInitialized():
        rxn.Initialize()  # pragma: no cover
    if rxn.IsMoleculeReactant(mol):
        return find_reactant_position(rxn, mol) != -1
    return False

find_reactant_position(rxn, mol)

Find the position of a reactant in a reaction.

Parameters:

Name Type Description Default
rxn ChemicalReaction

Reaction

required
mol Mol

Molecule

required

Returns:

Type Description
int

Reactant position or -1 if mol is not a reactant.

Source code in datamol/reactions/_reactions.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
def find_reactant_position(rxn: dm.ChemicalReaction, mol: dm.Mol) -> int:
    """Find the position of a reactant in a reaction.

    Args:
        rxn: Reaction
        mol: Molecule

    Returns:
        Reactant position or -1 if `mol` is not a reactant.
    """

    if not rxn.IsInitialized():
        rxn.Initialize()  # pragma: no cover

    react_pos = -1
    for pos, rct in enumerate(rxn.GetReactants()):
        if mol.HasSubstructMatch(rct):
            react_pos = pos
    return react_pos

inverse_reaction(rxn)

Get the reverse reaction of the input reaction

Parameters:

Name Type Description Default
rxn ChemicalReaction

Reaction to inverse.

required

Returns:

Type Description
ChemicalReaction

Inversed reaction.

Source code in datamol/reactions/_reactions.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def inverse_reaction(rxn: dm.ChemicalReaction) -> dm.ChemicalReaction:
    """
    Get the reverse reaction of the input reaction

    Args:
        rxn: Reaction to inverse.

    Returns:
        Inversed reaction.
    """
    rxn2 = rdChemReactions.ChemicalReaction()
    for i in range(rxn.GetNumReactantTemplates()):
        rxn2.AddProductTemplate(rxn.GetReactantTemplate(i))
    for i in range(rxn.GetNumProductTemplates()):
        rxn2.AddReactantTemplate(rxn.GetProductTemplate(i))
    rxn2.Initialize()
    return rxn2

is_reaction_ok(rxn, enable_logs=False)

Check if the given reaction is synthetically valid.

Parameters:

Name Type Description Default
rxn ChemicalReaction

dm.ChemicalReaction object

required
enable_logs bool

Whether to enable logs.

False

Returns:

Type Description
bool

Boolean whether reaction is valid

Source code in datamol/reactions/_reactions.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def is_reaction_ok(rxn: dm.ChemicalReaction, enable_logs: bool = False) -> bool:
    """Check if the given reaction is synthetically valid.

    Args:
        rxn: dm.ChemicalReaction object
        enable_logs: Whether to enable logs.

    Returns:
        Boolean whether reaction is valid
    """
    nWarn, nError, nReactants, nProducts, labels = rdChemReactions.PreprocessReaction(rxn)

    if enable_logs:
        logger.info(f"Number of warnings:{nWarn}")
        logger.info(f"Number of preprocessing errors: {nError}")
        logger.info(f"Number of reactants in reaction: {nReactants}")
        logger.info(f"Number of products in reaction: {nProducts}")
        logger.info(f"Preprocess labels added:{labels}")

    return rdChemReactions.SanitizeRxn(rxn) in [
        rdChemReactions.SanitizeFlags.SANITIZE_NONE,
        None,
    ]

rxn_from_block(rxn_block, sanitize=False)

Create a reaction from a block.

Parameters:

Name Type Description Default
rxn_block str

A reaction block.

required
sanitize bool

Whether to sanitize the reaction.

False

Returns:

Type Description
ChemicalReaction

Initialized reaction.

Source code in datamol/reactions/_reactions.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def rxn_from_block(
    rxn_block: str,
    sanitize: bool = False,
) -> dm.ChemicalReaction:
    """Create a reaction from a block.

    Args:
        rxn_block: A reaction block.
        sanitize: Whether to sanitize the reaction.

    Returns:
        Initialized reaction.

    """
    rxn = rdChemReactions.ReactionFromRxnBlock(rxnblock=rxn_block, sanitize=sanitize)
    rxn.Initialize()
    return rxn

rxn_from_block_file(rxn_block_path, sanitize=False)

Create a reaction from a block file.

Parameters:

Name Type Description Default
rxn_block_path Union[str, PathLike]

Filepath to a reaction block file.

required
sanitize bool

Whether to sanitize the reaction.

False

Returns:

Type Description
ChemicalReaction

Initialized reaction.

Source code in datamol/reactions/_reactions.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def rxn_from_block_file(
    rxn_block_path: Union[str, os.PathLike],
    sanitize: bool = False,
) -> dm.ChemicalReaction:
    """Create a reaction from a block file.

    Args:
        rxn_block_path: Filepath to a reaction block file.
        sanitize: Whether to sanitize the reaction.

    Returns:
        Initialized reaction.
    """
    with fsspec.open(rxn_block_path) as f:
        rxn_block = f.read()  # type: ignore
        rxn = rxn_from_block(rxn_block=rxn_block, sanitize=sanitize)
    return rxn

rxn_from_smarts(rxn_smarts)

Create a reaction from smarts

Parameters:

Name Type Description Default
rxn_smarts str

Reaction SMARTS string

required

Returns:

Type Description
ChemicalReaction

Initilized reaction.

Source code in datamol/reactions/_reactions.py
22
23
24
25
26
27
28
29
30
31
32
33
34
def rxn_from_smarts(rxn_smarts: str) -> dm.ChemicalReaction:
    """
    Create a reaction from smarts

    Args:
        rxn_smarts:  Reaction SMARTS string

    Returns:
        Initilized reaction.
    """
    rxn = rdChemReactions.ReactionFromSmarts(SMARTS=rxn_smarts)
    rxn.Initialize()
    return rxn

rxn_to_block(rxn, separate_agents=False, force_V3000=False)

Create a block from a reaction object.

Parameters:

Name Type Description Default
rxn ChemicalReaction

A reaction object.

required
separate_agents bool

Whether to separate agents from the reactants block. Not supported if force_V3000=True.

False
force_V3000 bool

Write the block in a V3000 format.

False

Returns:

Type Description
str

Reaction block as string

Source code in datamol/reactions/_reactions.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def rxn_to_block(
    rxn: dm.ChemicalReaction,
    separate_agents: bool = False,
    force_V3000: bool = False,
) -> str:
    """Create a block from a reaction object.

    Args:
        rxn: A reaction object.
        separate_agents: Whether to separate agents from the reactants block. Not supported
            if `force_V3000=True`.
        force_V3000: Write the block in a V3000 format.

    Returns:
        Reaction block as string
    """

    args = {}
    if dm.is_lower_than_current_rdkit_version("2022"):
        logger.warning("RDKit version prior to 2022.* does not support the `force_V3000` flag.")
    else:
        args["forceV3000"] = force_V3000

    return rdChemReactions.ReactionToRxnBlock(reaction=rxn, separateAgents=separate_agents, **args)

rxn_to_block_file(rxn, output_block_path, separate_agents=False, force_V3000=False)

Create a block from a reaction object.

Parameters:

Name Type Description Default
rxn ChemicalReaction

A reaction object.

required
output_block_path Union[str, PathLike]

Filepath to a reaction block file.

required
separate_agents bool

Whether to separate agents from the reactants block. Not supported if force_V3000=True.

False
force_V3000 bool

Write the block in a V3000 format.

False
Source code in datamol/reactions/_reactions.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def rxn_to_block_file(
    rxn: dm.ChemicalReaction,
    output_block_path: Union[str, os.PathLike],
    separate_agents: bool = False,
    force_V3000: bool = False,
):
    """Create a block from a reaction object.

    Args:
        rxn: A reaction object.
        output_block_path: Filepath to a reaction block file.
        separate_agents: Whether to separate agents from the reactants block. Not supported
            if `force_V3000=True`.
        force_V3000: Write the block in a V3000 format.
    """
    block = rxn_to_block(
        rxn=rxn,
        separate_agents=separate_agents,
        force_V3000=force_V3000,
    )

    with fsspec.open(output_block_path, "w") as f:
        f = cast(io.TextIOWrapper, f)
        f.write(block)

rxn_to_smarts(rxn)

Create a SMARTS from a reaction.

Parameters:

Name Type Description Default
rxn ChemicalReaction

dm.ChemicalReaction object.

required

Returns:

Type Description
str

SMARTS as string.

Source code in datamol/reactions/_reactions.py
37
38
39
40
41
42
43
44
45
46
def rxn_to_smarts(rxn: dm.ChemicalReaction) -> str:
    """Create a SMARTS from a reaction.

    Args:
        rxn: dm.ChemicalReaction object.

    Returns:
        SMARTS as string.
    """
    return rdChemReactions.ReactionToSmarts(reaction=rxn)

select_reaction_output(product, product_index=None, single_product_group=True, rm_attach=False, as_smiles=False, sanitize=True)

Compute the products from a reaction. It only takes the first product of the

Parameters:

Name Type Description Default
product Sequence[Sequence[Mol]]

All the products from a reaction. A sequence of the list of products.

required
product_index Optional[Union[int, list]]

Index of the product to select. Examples: A.B -> C.D. The indices of products are 0 and 1. Both C and D will be returned if index is None or product indices are to [0, 1].

None
single_product_group bool

Whether return a single group of products from a reaction.

True
rm_attach bool

Whether remove the attachment point from the products.

False
as_smiles bool

Whether return the result in smiles.

False
sanitize bool

Whether sanitize the products to return.

True

Returns:

Type Description
Union[list, str, Mol]

Processed products from reaction.

Source code in datamol/reactions/_reactions.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def select_reaction_output(
    product: Sequence[Sequence[dm.Mol]],
    product_index: Optional[Union[int, list]] = None,
    single_product_group: bool = True,
    rm_attach: bool = False,
    as_smiles: bool = False,
    sanitize: bool = True,
) -> Union[list, str, dm.Mol]:
    """
    Compute the products from a reaction. It only takes the first product of the

    Args:
        product: All the products from a reaction. A sequence of the list of products.
        product_index: Index of the product to select.
            Examples: A.B -> C.D. The indices of products are 0 and 1.
            Both C and D will be returned if index is None or product indices are to [0, 1].
        single_product_group: Whether return a single group of products from a reaction.
        rm_attach: Whether remove the attachment point from the products.
        as_smiles: Whether return the result in smiles.
        sanitize: Whether sanitize the products to return.

    Returns:
        Processed products from reaction.
    """
    if len(product) == 0:
        return list(product)
    product = np.array(product)
    if product_index is not None:
        product = product[:, product_index]
    if single_product_group:
        index = np.random.randint(product.shape[0], size=1)
        product = product[index]
    if sanitize:
        product = np.vectorize(dm.sanitize_mol)(product)
    if rm_attach:
        fn = lambda x: dm.remove_dummies(x) if x is not None else x
        product = np.vectorize(fn)(product)
    if as_smiles:
        fn = lambda x: dm.to_smiles(x, allow_to_fail=True) if x is not None else x
        product = np.vectorize(fn)(product)
    product = product.tolist()
    if single_product_group:
        return product[0]
    return product

add_brackets_to_attachment_points(smiles)

Adds brackets to the attachment points (if they don't have them). Example: "CC(C)CO" to "CC(C)CO[]"

Parameters:

Name Type Description Default
smiles str

A smiles string.

required

Returns:

Type Description
str

A smiles string with brackets.

Source code in datamol/reactions/_attachments.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def add_brackets_to_attachment_points(smiles: str) -> str:
    """
    Adds brackets to the attachment points (if they don't have them).
    Example: "CC(C)CO*" to "CC(C)CO[*]"

    Args:
        smiles: A smiles string.

    Returns:
        A smiles string with brackets.
    """
    return re.sub(
        ATTACHMENT_POINT_NO_BRACKETS_REGEXP,
        "[{}]".format(ATTACHMENT_POINT_TOKEN),
        smiles,
    )

convert_attach_to_isotope(mol_or_smiles, same_isotope=False, as_smiles=False)

Convert attachment to isotope mapping.

Examples: "O=C(NCc1cnc([])c1)[]" to "O=C(NCc1cnc([1])c1)[2]"

Parameters:

Name Type Description Default
mol_or_smiles Union[Mol, str]

A Mol object or a smiles to be converted

required
same_isotope bool

Whether convert to the same isotope. Example: "O=C(NCc1cnc([])c1)[]" to "O=C(NCc1cnc([1])c1)[1]"

False

Returns:

Type Description
Union[Mol, str]

Converted Mol object or SMILES.

Source code in datamol/reactions/_attachments.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def convert_attach_to_isotope(
    mol_or_smiles: Union[dm.Mol, str],
    same_isotope: bool = False,
    as_smiles: bool = False,
) -> Union[dm.Mol, str]:
    """Convert attachment to isotope mapping.

    Examples: "O=C(NCc1cnc([*])c1)[*]" to  "O=C(NCc1cnc([1*])c1)[2*]"

    Args:
        mol_or_smiles: A Mol object or a smiles to be converted
        same_isotope: Whether convert to the same isotope.
            Example: "O=C(NCc1cnc([*])c1)[*]" to  "O=C(NCc1cnc([1*])c1)[1*]"

    Returns:
        Converted Mol object or SMILES.
    """
    mol = dm.to_mol(mol_or_smiles)
    smiles = dm.to_smiles(mol)
    smiles = cast(str, smiles)

    smiles = add_brackets_to_attachment_points(smiles)

    # reg matching seems to be the most effective
    subs_reg = r"[\g<1>{}]"
    if same_isotope:
        subs_reg = "[1{}]"

    smiles = re.sub(ATTACHMENT_POINT_NUM_REGEXP, subs_reg.format(ATTACHMENT_POINT_TOKEN), smiles)

    if as_smiles:
        return smiles
    return dm.to_mol(smiles)

num_attachment_points(mol_or_smiles)

Get the number of attachment point in the

Parameters:

Name Type Description Default
mol_or_smiles Union[Mol, str]

A Mol object or a smiles to be converted

required

Returns:

Type Description
int

Number of attachment points of the given molecule.

Source code in datamol/reactions/_attachments.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def num_attachment_points(mol_or_smiles: Union[dm.Mol, str]) -> int:
    """
    Get the number of attachment point in the

    Args:
        mol_or_smiles: A Mol object or a smiles to be converted

    Returns:
        Number of attachment points of the given molecule.
    """
    if isinstance(mol_or_smiles, dm.Mol):
        mol = cast(dm.Mol, mol_or_smiles)
        n_points = len(
            [atom for atom in mol.GetAtoms() if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN]
        )
    else:
        n_points = len(re.findall(ATTACHMENT_POINT_REGEXP, mol_or_smiles))

    return n_points

open_attach_points(mol, fix_atom_map=False, bond_type=dm.SINGLE_BOND)

Compute attachment points on a molecule. This will highlight all valid attachment point on the current molecule instead.

Parameters:

Name Type Description Default
mol Mol

A Mol object to be processed.

required
fix_atom_map bool

Whether fix the atom mapping of the molecule.

False
bond_type BondType

The bond type to be opened.

SINGLE_BOND

Returns:

Type Description
Mol

Molecule with open attachment points

Source code in datamol/reactions/_attachments.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def open_attach_points(
    mol: dm.Mol,
    fix_atom_map: bool = False,
    bond_type: dm.BondType = dm.SINGLE_BOND,
) -> dm.Mol:
    """Compute attachment points on a molecule.
    This will highlight all valid attachment point on the current molecule instead.

    Args:
        mol: A Mol object to be processed.
        fix_atom_map: Whether fix the atom mapping of the molecule.
        bond_type: The bond type to be opened.

    Returns:
        Molecule with open attachment points
    """

    emol = Chem.rdchem.RWMol(dm.to_mol(mol))
    with dm.log.without_rdkit_log():
        atoms = [
            (a.GetIdx(), a)
            for a in emol.GetAtoms()
            if a.GetSymbol() != ATTACHMENT_POINT_TOKEN
            and a.GetImplicitValence() > 0
            and (not a.HasProp("_protected") or a.GetProp("_protected") != "1")
        ]
        atoms.sort(reverse=True, key=operator.itemgetter(0))

        for atom in atoms:
            new_atom = Chem.rdchem.Atom(ATTACHMENT_POINT_TOKEN)
            new_atom.SetAtomMapNum(1 if fix_atom_map else atom[0])
            new_index = emol.AddAtom(new_atom)
            emol.UpdatePropertyCache(strict=False)
            if bond_type is not None:
                emol.AddBond(atom[0], new_index, bond_type)
            else:
                emol.AddBond(atom[0], new_index)

    mol = dm.sanitize_mol(emol)
    return mol