datamol.viz
¶
Vizualize molecule in 2D or 3D¶
to_image(mols, legends=None, n_cols=4, use_svg=True, mol_size=(300, 300), highlight_atom=None, highlight_bond=None, outfile=None, max_mols=32, copy=True, indices=False, bond_indices=False, bond_line_width=2, stereo_annotations=True, legend_fontsize=16, kekulize=True, align=False, **kwargs)
¶
Generate an image out of a molecule or a list of molecules.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
mols |
Union[List[rdkit.Chem.rdchem.Mol], rdkit.Chem.rdchem.Mol] |
One or a list of molecules. |
required |
legends |
Union[List[Optional[str]], str] |
A string or a list of string as legend for every molecules. |
None |
n_cols |
int |
Number of molecules per column. |
4 |
use_svg |
bool |
Whether to ouput an SVG (or a PNG). |
True |
mol_size |
Union[Tuple[int, int], int] |
A int or a tuple of int defining the size per molecule. |
(300, 300) |
highlight_atom |
List[List[int]] |
the atoms to highlight. |
None |
highlight_bond |
List[List[int]] |
The bonds to highlight. |
None |
outfile |
str |
Path where to save the image (local or remote path). |
None |
max_mols |
int |
The maximum number of molecules to display. |
32 |
copy |
bool |
Whether to copy the molecules or not. |
True |
indices |
bool |
Whether to draw the atom indices. |
False |
bond_indices |
bool |
Whether to draw the bond indices. |
False |
bond_line_width |
int |
The width of the bond lines. |
2 |
legend_fontsize |
int |
Font size for the legend. |
16 |
kekulize |
bool |
Run kekulization routine on molecules. Skipped if fails. |
True |
align |
Union[bool, rdkit.Chem.rdchem.Mol, str] |
Whether to align the 2D coordinates of the molecules. If True
or set to a valid molecule object |
False |
kwargs |
Additional arguments to pass to the drawing function. See RDKit
documentation related to |
{} |
Source code in datamol/viz/_viz.py
def to_image(
mols: Union[List[dm.Mol], dm.Mol],
legends: Union[List[Union[str, None]], str, None] = None,
n_cols: int = 4,
use_svg: bool = True,
mol_size: Union[Tuple[int, int], int] = (300, 300),
highlight_atom: List[List[int]] = None,
highlight_bond: List[List[int]] = None,
outfile: str = None,
max_mols: int = 32,
copy: bool = True,
indices: bool = False,
bond_indices: bool = False,
bond_line_width: int = 2,
stereo_annotations: bool = True,
legend_fontsize: int = 16,
kekulize: bool = True,
align: Union[bool, dm.Mol, str] = False,
**kwargs,
):
"""Generate an image out of a molecule or a list of molecules.
Args:
mols: One or a list of molecules.
legends: A string or a list of string as legend for every molecules.
n_cols: Number of molecules per column.
use_svg: Whether to ouput an SVG (or a PNG).
mol_size: A int or a tuple of int defining the size per molecule.
highlight_atom: the atoms to highlight.
highlight_bond: The bonds to highlight.
outfile: Path where to save the image (local or remote path).
max_mols: The maximum number of molecules to display.
copy: Whether to copy the molecules or not.
indices: Whether to draw the atom indices.
bond_indices: Whether to draw the bond indices.
bond_line_width: The width of the bond lines.
legend_fontsize: Font size for the legend.
kekulize: Run kekulization routine on molecules. Skipped if fails.
align: Whether to align the 2D coordinates of the molecules. If True
or set to a valid molecule object `dm.viz.utils.align_2d_coordinates` is used.
If `align` is set to a molecule object or a string, this molecule will be used as a
pattern for the alignment. If `align` is set to True, the MCS will be computed.
**Warning**:
- This will slow down the process. You can pre-compute the alignment by calling
`dm.viz.utils.align_2d_coordinates`.
- In some cases, the alignment will fail. So you should always check it visually.
Please report any list of molecules failing to align.
kwargs: Additional arguments to pass to the drawing function. See RDKit
documentation related to `MolDrawOptions` for more details at
https://www.rdkit.org/docs/source/rdkit.Chem.Draw.rdMolDraw2D.html.
"""
if isinstance(mol_size, int):
mol_size = (mol_size, mol_size)
if isinstance(mols, dm.Mol):
mols = [mols]
if isinstance(legends, str):
legends = [legends]
if copy:
mols = [dm.copy_mol(mol) for mol in mols]
if max_mols is not None:
mols = mols[:max_mols]
if legends is not None:
legends = legends[:max_mols]
# Whether to align the molecules
if align is True:
mols = dm.viz.utils.align_2d_coordinates(mols, copy=False)
elif isinstance(align, dm.Mol):
mols = dm.viz.utils.align_2d_coordinates(mols, pattern=align, copy=False)
elif isinstance(align, str):
mols = dm.viz.utils.align_2d_coordinates(mols, pattern=dm.from_smarts(align), copy=False)
# Prepare molecules before drawing
mols = [prepare_mol_for_drawing(mol, kekulize=kekulize) for mol in mols]
_highlight_atom = highlight_atom
if highlight_atom is not None and isinstance(highlight_atom[0], int):
_highlight_atom = [highlight_atom]
_highlight_bond = highlight_bond
if highlight_bond is not None and isinstance(highlight_bond[0], int):
_highlight_bond = [highlight_bond]
# Don't make the image bigger than it
if len(mols) < n_cols:
n_cols = len(mols)
draw_options = Draw.rdMolDraw2D.MolDrawOptions()
draw_options.legendFontSize = legend_fontsize
draw_options.addAtomIndices = indices
draw_options.addBondIndices = bond_indices
draw_options.addStereoAnnotation = stereo_annotations
draw_options.bondLineWidth = bond_line_width
# Add the custom drawing options.
_kwargs = {}
for k, v in kwargs.items():
if hasattr(draw_options, k):
setattr(draw_options, k, v)
else:
_kwargs[k] = v
image = Draw.MolsToGridImage(
mols,
legends=legends,
molsPerRow=n_cols,
useSVG=use_svg,
subImgSize=mol_size,
highlightAtomLists=_highlight_atom,
highlightBondLists=_highlight_bond,
drawOptions=draw_options,
**_kwargs,
)
if outfile is not None:
with fsspec.open(outfile, "wb") as f:
if use_svg:
if isinstance(image, str):
# in a terminal process
f.write(image.encode())
else:
# in a jupyter kernel process
f.write(image.data.encode()) # type: ignore
else:
if isinstance(image, PIL.PngImagePlugin.PngImageFile): # type: ignore
# in a terminal process
image.save(f)
else:
# in a jupyter kernel process
f.write(image.data) # type: ignore
return image
conformers(mol, conf_id=-1, n_confs=None, align_conf=True, n_cols=3, sync_views=True, remove_hs=True, width='auto')
¶
Visualize the conformer(s) of a molecule.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
mol |
Mol |
a molecule. |
required |
conf_id |
int |
The ID of the conformer to show. -1 shows
the first conformer. Only works if |
-1 |
n_confs |
Union[int, List[int]] |
Can be a number of conformers to shows or a list of conformer indices. When None, only the first conformer is displayed. When -1, show all conformers. |
None |
align_conf |
bool |
Whether to align conformers together. |
True |
n_cols |
int |
Number of columns. Defaults to 3. |
3 |
sync_views |
bool |
Wether to sync the multiple views. |
True |
remove_hs |
bool |
Wether to remove the hydrogens of the conformers. |
True |
width |
str |
The width of the returned view. Defaults to "auto". |
'auto' |
Source code in datamol/viz/_conformers.py
def conformers(
mol: Chem.rdchem.Mol,
conf_id: int = -1,
n_confs: Union[int, List[int]] = None,
align_conf: bool = True,
n_cols: int = 3,
sync_views: bool = True,
remove_hs: bool = True,
width: str = "auto",
):
"""Visualize the conformer(s) of a molecule.
Args:
mol: a molecule.
conf_id: The ID of the conformer to show. -1 shows
the first conformer. Only works if `n_confs` is None.
n_confs: Can be a number of conformers
to shows or a list of conformer indices. When None, only the first
conformer is displayed. When -1, show all conformers.
align_conf: Whether to align conformers together.
n_cols: Number of columns. Defaults to 3.
sync_views: Wether to sync the multiple views.
remove_hs: Wether to remove the hydrogens of the conformers.
width: The width of the returned view. Defaults to "auto".
"""
widgets = _get_ipywidgets()
nv = _get_nglview()
if mol.GetNumConformers() == 0:
raise ValueError(
"The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`."
)
# Clone the molecule
mol = copy.deepcopy(mol)
if remove_hs:
mol = Chem.RemoveHs(mol) # type: ignore
else:
mol = Chem.AddHs(mol) # type: ignore
if n_confs is None:
return nv.show_rdkit(mol, conf_id=conf_id)
# If n_confs is int, convert to list of conformer IDs
if n_confs == -1:
n_confs = [conf.GetId() for conf in mol.GetConformers()]
elif isinstance(n_confs, int):
if n_confs > mol.GetNumConformers():
n_confs = mol.GetNumConformers()
n_confs = list(range(n_confs)) # type: ignore
if align_conf:
rdMolAlign.AlignMolConformers(mol, confIds=n_confs)
# Get number of rows
n_rows = len(n_confs) // n_cols
n_rows += 1 if (len(n_confs) % n_cols) > 0 else 0
# Create a grid
grid = widgets.GridspecLayout(n_rows, n_cols) # type: ignore
# Create and add views to the grid.
widget_coords = itertools.product(range(n_rows), range(n_cols))
views = []
for i, (conf_id, (x, y)) in enumerate(zip(n_confs, widget_coords)):
view = nv.show_rdkit(mol, conf_id=conf_id)
view.layout.width = width
view.layout.align_self = "stretch"
grid[x, y] = view
views.append(view)
# Sync views
if sync_views:
for view in views:
view._set_sync_camera(views)
return grid
Specific plotting functions¶
MolsCircleGrid
¶
Source code in datamol/viz/_circle_grid.py
class MolsCircleGrid:
def __init__(
self,
center_mol: Chem.rdchem.Mol,
circle_mols: List[List[Chem.rdchem.Mol]],
legend: str = None,
mol_size: Tuple[int, int] = (200, 200),
circle_margin: int = 50,
act_mapper: dict = None,
):
"""Show molecules in concentric rings, with one molecule at the center
Args:
center_mol: Molecule at center
circle_mols: List of molecule for each concentric circle around the center mol
mol_size: Tuple of width and height for each molecule
circle_margin: Margin between the circle layers
act_mapper: Map each molecule to a dictionary of activity
"""
self.circle_mols = circle_mols
self.circle_count = len(self.circle_mols)
self.legend = legend or ""
self.margin = circle_margin
self.center_mol = center_mol
self.mol_size = mol_size
size = (max(mol_size) + self.margin) * (self.circle_count + 1)
self.size = size
self.image = Image.new(mode="RGBA", size=(size, size), color=(255, 255, 255, 0))
self.midpoint = size // 2
self.draw = None
self.act_mapper = act_mapper or {}
self._draw()
def show(self, crop=False):
if crop:
crop_img = ImageOps.crop(self.image, border=1)
else:
crop_img = self.image
return crop_img.show()
def save(self, filename):
self.image.save(filename)
def _draw(self):
"""Create circles and slices in-memory"""
draw = ImageDraw.Draw(self.image)
self.draw = draw
all_radius = self._draw_circles(draw)
self._draw_center_mol()
self._draw_ring_mols(all_radius)
font = None
w, h = draw.textsize(self.legend)
try:
fn = FontManager()
fontpath = fn.findfont("Droid sans")
font = ImageFont.truetype(fontpath, 12 * self.size // 800)
w, h = font.getsize(self.legend)
except:
pass
draw.text(
((self.size // 2 - w) - 2, self.size - 2 * h),
self.legend,
fill="black",
font=font,
)
del draw
self.draw = None
def _repr_png_(self):
bio = io.BytesIO()
self.image.save(bio, format="PNG")
return bio.getvalue()
def _draw_circles(self, draw):
if self.circle_count <= 0:
return []
radius_step = int(self.midpoint / (self.circle_count + 1))
radius_list = []
full_range = range(0, self.size // 2, radius_step)
for i, radius in enumerate(full_range):
radius += self.margin // 2
bounding_box = [
(self.midpoint - radius, self.midpoint - radius),
(self.midpoint + radius, self.midpoint + radius),
]
if radius > self.margin:
transp = int(255 - (200 * (i - 1) / len(full_range)))
draw.arc(bounding_box, 0, 360, fill=(190, 190, 190, transp))
radius_list.append(radius + radius_step)
return radius_list
def _draw_mol_at(
self,
mol,
center_x,
center_y,
mol_size=None,
act_dict={},
center=False,
**kwargs,
):
img = mol
if mol_size is None:
mol_size = self.mol_size
if isinstance(mol, Chem.Mol):
img = Draw.MolToImage(mol, mol_size, kekulize=True, fitImage=True, **kwargs)
width, height = img.size
self.image.paste(img, (int(center_x - width / 2), int(center_y - height / 2)))
txt = []
for prop, propval in act_dict.items():
if not isinstance(propval, str):
propval = "{:.2f}".format(propval)
txt.append(f"{prop}: {propval}")
if txt and self.draw is not None:
txt = "\n".join(txt)
font = None
w, h = self.draw.multiline_textsize(txt)
try:
fn = FontManager()
fontpath = fn.findfont("Droid sans")
font = ImageFont.truetype(fontpath, 18 + center * 8)
w, h = self.draw.multiline_textsize(txt, font=font)
except:
passcircle_mols
def _draw_center_mol(self):
self._draw_mol_at(
self.center_mol,
self.midpoint,
self.midpoint,
mol_size=[x + self.margin for x in self.mol_size],
act_dict=self.act_mapper.get(self.center_mol, {}),
center=True,
)
def _draw_ring_mols(self, radius_list):
for i, mols in enumerate(self.circle_mols):
radius = radius_list[i]
ni = len(mols)
rand_unit = random.random() * 2 * math.pi
for k, mol in enumerate(mols):
center_x = radius * math.cos(2 * k * math.pi / ni + rand_unit) + self.midpoint
center_y = radius * math.sin(2 * k * math.pi / ni + rand_unit) + self.midpoint
self._draw_mol_at(mol, center_x, center_y, act_dict=self.act_mapper.get(mol, {}))
__init__(self, center_mol, circle_mols, legend=None, mol_size=(200, 200), circle_margin=50, act_mapper=None)
special
¶
Show molecules in concentric rings, with one molecule at the center
Parameters:
Name | Type | Description | Default |
---|---|---|---|
center_mol |
Mol |
Molecule at center |
required |
circle_mols |
List[List[rdkit.Chem.rdchem.Mol]] |
List of molecule for each concentric circle around the center mol |
required |
mol_size |
Tuple[int, int] |
Tuple of width and height for each molecule |
(200, 200) |
circle_margin |
int |
Margin between the circle layers |
50 |
act_mapper |
dict |
Map each molecule to a dictionary of activity |
None |
Source code in datamol/viz/_circle_grid.py
def __init__(
self,
center_mol: Chem.rdchem.Mol,
circle_mols: List[List[Chem.rdchem.Mol]],
legend: str = None,
mol_size: Tuple[int, int] = (200, 200),
circle_margin: int = 50,
act_mapper: dict = None,
):
"""Show molecules in concentric rings, with one molecule at the center
Args:
center_mol: Molecule at center
circle_mols: List of molecule for each concentric circle around the center mol
mol_size: Tuple of width and height for each molecule
circle_margin: Margin between the circle layers
act_mapper: Map each molecule to a dictionary of activity
"""
self.circle_mols = circle_mols
self.circle_count = len(self.circle_mols)
self.legend = legend or ""
self.margin = circle_margin
self.center_mol = center_mol
self.mol_size = mol_size
size = (max(mol_size) + self.margin) * (self.circle_count + 1)
self.size = size
self.image = Image.new(mode="RGBA", size=(size, size), color=(255, 255, 255, 0))
self.midpoint = size // 2
self.draw = None
self.act_mapper = act_mapper or {}
self._draw()
circle_grid(center_mol, circle_mols, legend=None, mol_size=(200, 200), circle_margin=50, act_mapper=None)
¶
Show molecules in concentric rings, with one molecule at the center
Parameters:
Name | Type | Description | Default |
---|---|---|---|
center_mol |
Chem.Mol |
Molecule at center |
required |
circle_mols |
list of list of <Chem.Mol> |
List of molecule for each concentric circle around the center mol |
required |
mol_size |
tuple |
Tuple of width and height for each molecule |
(200, 200) |
circle_margin |
int |
Margin between the circle layers |
50 |
act_mapper |
dict |
Map each molecule to a dictionary of activity |
None |
Source code in datamol/viz/_circle_grid.py
def circle_grid(
center_mol: Chem.rdchem.Mol,
circle_mols: List[List[Chem.rdchem.Mol]],
legend: str = None,
mol_size: Tuple[int, int] = (200, 200),
circle_margin: int = 50,
act_mapper: dict = None,
):
"""Show molecules in concentric rings, with one molecule at the center
Args:
center_mol (Chem.Mol): Molecule at center
circle_mols (list of list of <Chem.Mol>): List of molecule for each concentric circle around the center mol
mol_size (tuple, optional): Tuple of width and height for each molecule
circle_margin (int, optional): Margin between the circle layers
act_mapper (dict): Map each molecule to a dictionary of activity
"""
return MolsCircleGrid(center_mol, circle_mols, legend, mol_size, circle_margin, act_mapper)