Skip to content

datamol.reactions

apply_reaction(rxn, reactants, single_output=False, as_smiles=False, rm_attach=False, disable_logs=True, sanitize=True)

Apply a chemical reaction on a molecule

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

Reaction object.

required
reactants tuple

A tuple of reactants.

required
single_output bool

Whether return one product from all possible product.

False
as_smiles bool

Whether return product in SMILES.

False
rm_attach bool

Whether remove the attachment point from product.

False
disable_logs bool

Whether disable rdkit logs.

True
sanitize bool

Whether sanitize the product.

True

Returns:

Type Description
Union[list, str, dm.Mol]

Reaction products.

Source code in datamol/reactions/_reactions.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def apply_reaction(
    rxn: dm.ChemicalReaction,
    reactants: tuple,
    single_output: bool = False,
    as_smiles: bool = False,
    rm_attach: bool = False,
    disable_logs: bool = True,
    sanitize: bool = True,
) -> Union[list, str, dm.Mol]:
    """
    Apply a chemical reaction on a molecule

    Args:
       rxn: Reaction object.
       reactants: A tuple of reactants.
       single_output: Whether return one product from all possible product.
       as_smiles: Whether return product in SMILES.
       rm_attach: Whether remove the attachment point from product.
       disable_logs: Whether disable rdkit logs.
       sanitize: Whether sanitize the product.

    Returns:
       Reaction products.
    """
    with dm.without_rdkit_log(enable=disable_logs):
        if not rxn.IsInitialized():
            rxn.Initialize()  # pragma: no cover

        product = rxn.RunReactants(reactants)
        outputs = select_reaction_output(
            product=product,
            single_output=single_output,
            as_smiles=as_smiles,
            rm_attach=rm_attach,
            sanitize=sanitize,
        )

    return outputs

can_react(rxn, mol)

Check if a molecule is a reactant to a chemical reaction.

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

Reaction to check.

required
mol dm.Mol

Molecule to check if it is a reactant.

required

Returns:

Type Description
bool

True if mol is a reactant of rxn.

Source code in datamol/reactions/_reactions.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def can_react(rxn: dm.ChemicalReaction, mol: dm.Mol) -> bool:
    """Check if a molecule is a reactant to a chemical reaction.

    Args:
        rxn: Reaction to check.
        mol: Molecule to check if it is a reactant.

    Returns:
        True if `mol` is a reactant of rxn.
    """
    if not rxn.IsInitialized():
        rxn.Initialize()  # pragma: no cover
    if rxn.IsMoleculeReactant(mol):
        return find_reactant_position(rxn, mol) != -1
    return False

find_reactant_position(rxn, mol)

Find the position of a reactant in a reaction.

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

Reaction

required
mol dm.Mol

Molecule

required

Returns:

Type Description
int

Reactant position or -1 if mol is not a reactant.

Source code in datamol/reactions/_reactions.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def find_reactant_position(rxn: dm.ChemicalReaction, mol: dm.Mol) -> int:
    """Find the position of a reactant in a reaction.

    Args:
        rxn: Reaction
        mol: Molecule

    Returns:
        Reactant position or -1 if `mol` is not a reactant.
    """

    if not rxn.IsInitialized():
        rxn.Initialize()  # pragma: no cover

    react_pos = -1
    for pos, rct in enumerate(rxn.GetReactants()):
        if mol.HasSubstructMatch(rct):
            react_pos = pos
    return react_pos

inverse_reaction(rxn)

Get the reverse reaction of the input reaction

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

Reaction to inverse.

required

Returns:

Type Description
dm.ChemicalReaction

Inversed reaction.

Source code in datamol/reactions/_reactions.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
def inverse_reaction(rxn: dm.ChemicalReaction) -> dm.ChemicalReaction:
    """
    Get the reverse reaction of the input reaction

    Args:
        rxn: Reaction to inverse.

    Returns:
        Inversed reaction.
    """
    rxn2 = rdChemReactions.ChemicalReaction()
    for i in range(rxn.GetNumReactantTemplates()):
        rxn2.AddProductTemplate(rxn.GetReactantTemplate(i))
    for i in range(rxn.GetNumProductTemplates()):
        rxn2.AddReactantTemplate(rxn.GetProductTemplate(i))
    rxn2.Initialize()
    return rxn2

is_reaction_ok(rxn, enable_logs=False)

Check if the given reaction is synthetically valid.

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

dm.ChemicalReaction object

required
enable_logs bool

Whether to enable logs.

False

Returns:

Type Description
bool

Boolean whether reaction is valid

Source code in datamol/reactions/_reactions.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def is_reaction_ok(rxn: dm.ChemicalReaction, enable_logs: bool = False) -> bool:
    """Check if the given reaction is synthetically valid.

    Args:
        rxn: dm.ChemicalReaction object
        enable_logs: Whether to enable logs.

    Returns:
        Boolean whether reaction is valid
    """
    nWarn, nError, nReactants, nProducts, labels = rdChemReactions.PreprocessReaction(rxn)

    if enable_logs:
        logger.info(f"Number of warnings:{nWarn}")
        logger.info(f"Number of preprocessing errors: {nError}")
        logger.info(f"Number of reactants in reaction: {nReactants}")
        logger.info(f"Number of products in reaction: {nProducts}")
        logger.info(f"Preprocess labels added:{labels}")

    return rdChemReactions.SanitizeRxn(rxn) in [
        rdChemReactions.SanitizeFlags.SANITIZE_NONE,
        None,
    ]

rxn_from_block(rxn_block, sanitize=False)

Create a reaction from a block.

Parameters:

Name Type Description Default
rxn_block str

A reaction block.

required
sanitize bool

Whether to sanitize the reaction.

False

Returns:

Type Description
dm.ChemicalReaction

Initialized reaction.

Source code in datamol/reactions/_reactions.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def rxn_from_block(
    rxn_block: str,
    sanitize: bool = False,
) -> dm.ChemicalReaction:
    """Create a reaction from a block.

    Args:
        rxn_block: A reaction block.
        sanitize: Whether to sanitize the reaction.

    Returns:
        Initialized reaction.

    """
    rxn = rdChemReactions.ReactionFromRxnBlock(rxnblock=rxn_block, sanitize=sanitize)
    rxn.Initialize()
    return rxn

rxn_from_block_file(rxn_block_path, sanitize=False)

Create a reaction from a block file.

Parameters:

Name Type Description Default
rxn_block_path Union[str, os.PathLike]

Filepath to a reaction block file.

required
sanitize bool

Whether to sanitize the reaction.

False

Returns:

Type Description
dm.ChemicalReaction

Initialized reaction.

Source code in datamol/reactions/_reactions.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def rxn_from_block_file(
    rxn_block_path: Union[str, os.PathLike],
    sanitize: bool = False,
) -> dm.ChemicalReaction:
    """Create a reaction from a block file.

    Args:
        rxn_block_path: Filepath to a reaction block file.
        sanitize: Whether to sanitize the reaction.

    Returns:
        Initialized reaction.
    """
    with fsspec.open(rxn_block_path) as f:
        rxn_block = f.read()  # type: ignore
        rxn = rxn_from_block(rxn_block=rxn_block, sanitize=sanitize)
    return rxn

rxn_from_smarts(rxn_smarts)

Create a reaction from smarts

Parameters:

Name Type Description Default
rxn_smarts str

Reaction SMARTS string

required

Returns:

Type Description
dm.ChemicalReaction

Initilized reaction.

Source code in datamol/reactions/_reactions.py
20
21
22
23
24
25
26
27
28
29
30
31
32
def rxn_from_smarts(rxn_smarts: str) -> dm.ChemicalReaction:
    """
    Create a reaction from smarts

    Args:
        rxn_smarts:  Reaction SMARTS string

    Returns:
        Initilized reaction.
    """
    rxn = rdChemReactions.ReactionFromSmarts(SMARTS=rxn_smarts)
    rxn.Initialize()
    return rxn

rxn_to_block(rxn, separate_agents=False, force_V3000=False)

Create a block from a reaction object.

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

A reaction object.

required
separate_agents bool

Whether to separate agents from the reactants block. Not supported if force_V3000=True.

False
force_V3000 bool

Write the block in a V3000 format.

False

Returns:

Type Description
str

Reaction block as string

Source code in datamol/reactions/_reactions.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def rxn_to_block(
    rxn: dm.ChemicalReaction,
    separate_agents: bool = False,
    force_V3000: bool = False,
) -> str:
    """Create a block from a reaction object.

    Args:
        rxn: A reaction object.
        separate_agents: Whether to separate agents from the reactants block. Not supported
            if `force_V3000=True`.
        force_V3000: Write the block in a V3000 format.

    Returns:
        Reaction block as string
    """

    args = {}
    if dm.is_lower_than_current_rdkit_version("2022"):
        logger.warning("RDKit version prior to 2022.* does not support the `force_V3000` flag.")
    else:
        args["forceV3000"] = force_V3000

    return rdChemReactions.ReactionToRxnBlock(reaction=rxn, separateAgents=separate_agents, **args)

rxn_to_block_file(rxn, output_block_path, separate_agents=False, force_V3000=False)

Create a block from a reaction object.

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

A reaction object.

required
output_block_path Union[str, os.PathLike]

Filepath to a reaction block file.

required
separate_agents bool

Whether to separate agents from the reactants block. Not supported if force_V3000=True.

False
force_V3000 bool

Write the block in a V3000 format.

False
Source code in datamol/reactions/_reactions.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def rxn_to_block_file(
    rxn: dm.ChemicalReaction,
    output_block_path: Union[str, os.PathLike],
    separate_agents: bool = False,
    force_V3000: bool = False,
):
    """Create a block from a reaction object.

    Args:
        rxn: A reaction object.
        output_block_path: Filepath to a reaction block file.
        separate_agents: Whether to separate agents from the reactants block. Not supported
            if `force_V3000=True`.
        force_V3000: Write the block in a V3000 format.
    """
    block = rxn_to_block(
        rxn=rxn,
        separate_agents=separate_agents,
        force_V3000=force_V3000,
    )

    with fsspec.open(output_block_path, "w") as f:
        f = cast(io.TextIOWrapper, f)
        f.write(block)

rxn_to_smarts(rxn)

Create a SMARTS from a reaction.

Parameters:

Name Type Description Default
rxn dm.ChemicalReaction

dm.ChemicalReaction object.

required

Returns:

Type Description
str

SMARTS as string.

Source code in datamol/reactions/_reactions.py
35
36
37
38
39
40
41
42
43
44
def rxn_to_smarts(rxn: dm.ChemicalReaction) -> str:
    """Create a SMARTS from a reaction.

    Args:
        rxn: dm.ChemicalReaction object.

    Returns:
        SMARTS as string.
    """
    return rdChemReactions.ReactionToSmarts(reaction=rxn)

select_reaction_output(product, single_output=True, rm_attach=False, as_smiles=False, sanitize=True)

Compute the products from a reaction. It only takes the first product of the

Parameters:

Name Type Description Default
product list

All the products from a reaction.

required
single_output bool

Whether return a single output from a reaction.

True
rm_attach bool

Whether remove the attachment point from the product.

False
as_smiles bool

Whether return the result in smiles.

False
sanitize bool

Whether sanitize the product to return.

True

Returns:

Type Description
Union[list, str, dm.Mol]

Processed products from reaction.

Source code in datamol/reactions/_reactions.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def select_reaction_output(
    product: list,
    single_output: bool = True,
    rm_attach: bool = False,
    as_smiles: bool = False,
    sanitize: bool = True,
) -> Union[list, str, dm.Mol]:
    """
    Compute the products from a reaction. It only takes the first product of the

    Args:
        product: All the products from a reaction.
        single_output: Whether return a single output from a reaction.
        rm_attach: Whether remove the attachment point from the product.
        as_smiles: Whether return the result in smiles.
        sanitize: Whether sanitize the product to return.

    Returns:
        Processed products from reaction.
    """
    # flatten all possible products of a reaction
    product = list(sum(product, ()))
    if single_output:
        product = list(np.random.choice(product, 1))
    if sanitize:
        product = [dm.sanitize_mol(m) for m in product]
    if rm_attach:
        product = [dm.remove_dummies(x) for x in product]
    if as_smiles:
        product = [dm.to_smiles(x) for x in product if x is not None]
    if single_output:
        return product[0]
    return product

add_brackets_to_attachment_points(smiles)

Adds brackets to the attachment points (if they don't have them). Example: "CC(C)CO" to "CC(C)CO[]"

Parameters:

Name Type Description Default
smiles str

A smiles string.

required

Returns:

Type Description
str

A smiles string with brackets.

Source code in datamol/reactions/_attachments.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def add_brackets_to_attachment_points(smiles: str) -> str:
    """
    Adds brackets to the attachment points (if they don't have them).
    Example: "CC(C)CO*" to "CC(C)CO[*]"

    Args:
        smiles: A smiles string.

    Returns:
        A smiles string with brackets.
    """
    return re.sub(
        ATTACHMENT_POINT_NO_BRACKETS_REGEXP,
        "[{}]".format(ATTACHMENT_POINT_TOKEN),
        smiles,
    )

convert_attach_to_isotope(mol_or_smiles, same_isotope=False, as_smiles=False)

Convert attachment to isotope mapping.

"O=C(NCc1cnc([*])c1)[*]" to "O=C(NCc1cnc([1*])c1)[2*]"

Parameters:

Name Type Description Default
mol_or_smiles Union[dm.Mol, str]

A Mol object or a smiles to be converted

required
same_isotope bool

Whether convert to the same isotope. Example: "O=C(NCc1cnc([])c1)[]" to "O=C(NCc1cnc([1])c1)[1]"

False

Returns:

Type Description
Union[dm.Mol, str]

Converted Mol object or SMILES.

Source code in datamol/reactions/_attachments.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def convert_attach_to_isotope(
    mol_or_smiles: Union[dm.Mol, str],
    same_isotope: bool = False,
    as_smiles: bool = False,
) -> Union[dm.Mol, str]:
    """Convert attachment to isotope mapping.

    Examples: "O=C(NCc1cnc([*])c1)[*]" to  "O=C(NCc1cnc([1*])c1)[2*]"

    Args:
        mol_or_smiles: A Mol object or a smiles to be converted
        same_isotope: Whether convert to the same isotope.
            Example: "O=C(NCc1cnc([*])c1)[*]" to  "O=C(NCc1cnc([1*])c1)[1*]"

    Returns:
        Converted Mol object or SMILES.
    """
    mol = dm.to_mol(mol_or_smiles)
    smiles = dm.to_smiles(mol)
    smiles = cast(str, smiles)

    smiles = add_brackets_to_attachment_points(smiles)

    # reg matching seems to be the most effective
    subs_reg = r"[\g<1>{}]"
    if same_isotope:
        subs_reg = "[1{}]"

    smiles = re.sub(ATTACHMENT_POINT_NUM_REGEXP, subs_reg.format(ATTACHMENT_POINT_TOKEN), smiles)

    if as_smiles:
        return smiles
    return dm.to_mol(smiles)

num_attachment_points(mol_or_smiles)

Get the number of attachment point in the

Parameters:

Name Type Description Default
mol_or_smiles Union[dm.Mol, str]

A Mol object or a smiles to be converted

required

Returns:

Type Description
int

Number of attachment points of the given molecule.

Source code in datamol/reactions/_attachments.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def num_attachment_points(mol_or_smiles: Union[dm.Mol, str]) -> int:
    """
    Get the number of attachment point in the

    Args:
        mol_or_smiles: A Mol object or a smiles to be converted

    Returns:
        Number of attachment points of the given molecule.
    """
    if isinstance(mol_or_smiles, dm.Mol):
        mol = cast(dm.Mol, mol_or_smiles)
        n_points = len(
            [atom for atom in mol.GetAtoms() if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN]
        )
    else:
        n_points = len(re.findall(ATTACHMENT_POINT_REGEXP, mol_or_smiles))

    return n_points

open_attach_points(mol, fix_atom_map=False, bond_type=dm.SINGLE_BOND)

Compute attachment points on a molecule. This will highlight all valid attachment point on the current molecule instead.

Parameters:

Name Type Description Default
mol dm.Mol

A Mol object to be processed.

required
fix_atom_map bool

Whether fix the atom mapping of the molecule.

False
bond_type dm.BondType

The bond type to be opened.

dm.SINGLE_BOND

Returns:

Type Description
dm.Mol

Molecule with open attachment points

Source code in datamol/reactions/_attachments.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def open_attach_points(
    mol: dm.Mol,
    fix_atom_map: bool = False,
    bond_type: dm.BondType = dm.SINGLE_BOND,
) -> dm.Mol:
    """Compute attachment points on a molecule.
    This will highlight all valid attachment point on the current molecule instead.

    Args:
        mol: A Mol object to be processed.
        fix_atom_map: Whether fix the atom mapping of the molecule.
        bond_type: The bond type to be opened.

    Returns:
        Molecule with open attachment points
    """

    emol = Chem.rdchem.RWMol(dm.to_mol(mol))
    with dm.log.without_rdkit_log():
        atoms = [
            (a.GetIdx(), a)
            for a in emol.GetAtoms()
            if a.GetSymbol() != ATTACHMENT_POINT_TOKEN
            and a.GetImplicitValence() > 0
            and (not a.HasProp("_protected") or a.GetProp("_protected") != "1")
        ]
        atoms.sort(reverse=True, key=operator.itemgetter(0))

        for atom in atoms:
            new_atom = Chem.rdchem.Atom(ATTACHMENT_POINT_TOKEN)
            new_atom.SetAtomMapNum(1 if fix_atom_map else atom[0])
            new_index = emol.AddAtom(new_atom)
            emol.UpdatePropertyCache(strict=False)
            if bond_type is not None:
                emol.AddBond(atom[0], new_index, bond_type)
            else:
                emol.AddBond(atom[0], new_index)

    mol = dm.sanitize_mol(emol)
    return mol