Skip to content

datamol.descriptors

n_aromatic_atoms(mol)

Calculate the number of aromatic atoms.

Source code in datamol/descriptors/descriptors.py
90
91
92
93
def n_aromatic_atoms(mol: Mol) -> int:
    """Calculate the number of aromatic atoms."""
    matches = mol.GetSubstructMatches(_AROMATIC_QUERY)
    return len(matches)

n_aromatic_atoms_proportion(mol)

Calculate the aromatic proportion: # aromatic atoms/#atoms total.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required

Only heavy atoms are considered.

Source code in datamol/descriptors/descriptors.py
 96
 97
 98
 99
100
101
102
103
104
def n_aromatic_atoms_proportion(mol: Mol) -> int:
    """Calculate the aromatic proportion: # aromatic atoms/#atoms total.

    Args:
        mol: A molecule.

    Only heavy atoms are considered.
    """
    return n_aromatic_atoms(mol) / mol.GetNumHeavyAtoms()

n_charged_atoms(mol)

Compute the number of charged atoms in a molecule.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required

Returns:

Name Type Description
n_charged_atoms int

number of charged atoms in the molecule

Source code in datamol/descriptors/descriptors.py
143
144
145
146
147
148
149
150
151
152
def n_charged_atoms(mol: Mol) -> int:
    """Compute the number of charged atoms in a molecule.

    Args:
        mol: A molecule.

    Returns:
        n_charged_atoms: number of charged atoms in the molecule
    """
    return sum([at.GetFormalCharge() != 0 for at in mol.GetAtoms()])

n_rigid_bonds(mol)

Compute the number of rigid bonds in a molecule.

Rigid bonds are bonds that are not single and not in rings.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required

Returns:

Name Type Description
n_rigid_bonds int

number of rigid bonds in the molecule

Source code in datamol/descriptors/descriptors.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def n_rigid_bonds(mol: Mol) -> int:
    """Compute the number of rigid bonds in a molecule.

    Rigid bonds are bonds that are not single and not in rings.

    Args:
        mol: A molecule.

    Returns:
        n_rigid_bonds: number of rigid bonds in the molecule
    """
    non_rigid_bonds_count = from_smarts("*-&!@*")
    n_rigid_bonds = mol.GetNumBonds() - len(mol.GetSubstructMatches(non_rigid_bonds_count))
    return n_rigid_bonds

n_stereo_centers(mol)

Compute the number of stereocenters in a molecule.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required

Returns:

Name Type Description
n_stero_center int

number of stereocenters in the molecule

Source code in datamol/descriptors/descriptors.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def n_stereo_centers(mol: Mol) -> int:
    """Compute the number of stereocenters in a molecule.

    Args:
        mol: A molecule.

    Returns:
        n_stero_center: number of stereocenters in the molecule
    """
    n = 0
    try:
        rdmolops.FindPotentialStereo(mol, cleanIt=False)
        n = rdMolDescriptors.CalcNumAtomStereoCenters(mol)
    except Exception:
        pass
    return n

n_stereo_centers_unspecified(mol)

Compute the number of unspecified stereocenters in a molecule.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required

Returns:

Name Type Description
n_stereo_centers_unspecified int

number of unspecified stereocenters in the molecule

Source code in datamol/descriptors/descriptors.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def n_stereo_centers_unspecified(mol: Mol) -> int:
    """Compute the number of unspecified stereocenters in a molecule.

    Args:
        mol: A molecule.

    Returns:
        n_stereo_centers_unspecified: number of unspecified stereocenters in the molecule
    """
    n = 0
    try:
        rdmolops.FindPotentialStereo(mol, cleanIt=False)
        n = rdMolDescriptors.CalcNumUnspecifiedAtomStereoCenters(mol)
    except Exception:
        pass
    return n

any_rdkit_descriptor(name)

Return a descriptor function by name either from rdkit.Chem import Descriptors or rdkit.Chem.rdMolDescriptors.

Parameters:

Name Type Description Default
name str

Descriptor name.

required
Source code in datamol/descriptors/compute.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def any_rdkit_descriptor(name: str) -> Callable:
    """Return a descriptor function by name either from
    `rdkit.Chem import Descriptors` or `rdkit.Chem.rdMolDescriptors`.

    Args:
        name: Descriptor name.
    """
    fn = getattr(Descriptors, name, None)

    if fn is None:
        fn = getattr(rdMolDescriptors, name, None)

    if fn is None:
        raise ValueError(f"Descriptor {name} not found.")

    return fn

batch_compute_many_descriptors(mols, properties_fn=None, add_properties=True, n_jobs=1, batch_size=None, progress=False, progress_leave=True)

Compute a list of opiniated molecular properties on a list of molecules.

Parameters:

Name Type Description Default
mols List[Mol]

A list of molecules.

required
properties_fn Optional[Dict[str, Union[Callable, str]]]

A list of functions that compute properties. If None, a default list of properties is used. If the function is a string, dm.descriptors.any_descriptor() is used to retrieve the descriptor function.

None
add_properties bool

Whether to add the computed properties to the default list.

True

Returns:

Type Description
DataFrame

A dataframe of computed properties with one row per input molecules.

Source code in datamol/descriptors/compute.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def batch_compute_many_descriptors(
    mols: List[Mol],
    properties_fn: Optional[Dict[str, Union[Callable, str]]] = None,
    add_properties: bool = True,
    n_jobs: int = 1,
    batch_size: Optional[int] = None,
    progress: bool = False,
    progress_leave: bool = True,
) -> pd.DataFrame:
    """Compute a list of opiniated molecular properties on a list of molecules.

    Args:
        mols: A list of molecules.
        properties_fn: A list of functions that compute properties. If None,
            a default list of properties is used. If the function is a string,
            `dm.descriptors.any_descriptor()` is used to retrieve the descriptor
            function.
        add_properties: Whether to add the computed properties to the default list.

    Returns:
        A dataframe of computed properties with one row per input molecules.
    """

    compute_fn = functools.partial(
        compute_many_descriptors,
        properties_fn=properties_fn,
        add_properties=add_properties,
    )

    props = parallelized(
        compute_fn,
        mols,
        batch_size=batch_size,
        progress=progress,
        n_jobs=n_jobs,
        tqdm_kwargs=dict(leave=progress_leave),
    )
    return pd.DataFrame(props)

compute_many_descriptors(mol, properties_fn=None, add_properties=True)

Compute a list of opiniated molecular properties.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required
properties_fn Optional[Dict[str, Union[Callable, str]]]

A list of functions that compute properties. If None, a default list of properties is used. If the function is a string, dm.descriptors.any_descriptor() is used to retrieve the descriptor function.

None
add_properties bool

Whether to add the computed properties to the default list.

True

Returns:

Type Description
dict

Computed properties as a dict.

Source code in datamol/descriptors/compute.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def compute_many_descriptors(
    mol: Mol,
    properties_fn: Optional[Dict[str, Union[Callable, str]]] = None,
    add_properties: bool = True,
) -> dict:
    """Compute a list of opiniated molecular properties.

    Args:
        mol: A molecule.
        properties_fn: A list of functions that compute properties. If None,
            a default list of properties is used. If the function is a string,
            `dm.descriptors.any_descriptor()` is used to retrieve the descriptor
            function.
        add_properties: Whether to add the computed properties to the default list.

    Returns:
        Computed properties as a dict.
    """

    if properties_fn is None:
        properties_fn = _DEFAULT_PROPERTIES_FN
    elif add_properties:
        [properties_fn.setdefault(k, v) for k, v in _DEFAULT_PROPERTIES_FN.items()]

    props = {}
    for k, v in properties_fn.items():
        if isinstance(v, str):
            v = any_rdkit_descriptor(v)

        props[k] = v(mol)

    return props