他身在高楼广厦之中,却有山泽鱼鸟之思
1014 字
5 分钟
weight_picture
前言
TIP准备工作 分子
smiles\
如何对分子进行模型权重图绘制
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import Draw
from rdkit.DataStructs import ConvertToNumpyArray
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import skchem
from ipywidgets import interact, Dropdown, Text, Box
from matplotlib.colors import ListedColormap
def sym_twod_gaussian(x, mu=np.zeros((2,))):
    return np.exp(-(x - mu).dot(x - mu))
def perpendicular(a) :
    b = np.empty_like(a)
    b[0] = -a[1]
    b[1] = a[0]
    return b
def highlight(m, weights=None, bond_gap=0.05, padding=0.1, vis_mode='circle', cmap='Blues'):
    two_d_idx = Chem.Compute2DCoords(m)
    conf = m.conformers[two_d_idx]
    coords = np.array(conf.atom_positions)
    boundary = (coords.min(axis=0) - padding, coords.max(axis=0) + padding)
    length = boundary[0] - boundary[1]
    aspect = length[0] / length[1]
    tall = aspect < 1
    if tall:
        width, height = 12, 12 * aspect
    else:
        width, height = 12 * aspect, 12
    
    plt.figure(figsize=(width, height), facecolor=None, dpi=300)
    plt.xlim(boundary[0][0], boundary[1][0])
    plt.ylim(boundary[0][1], boundary[1][1])
    if vis_mode == 'circle': 
        weights = 4000 * weights if weights is not None else 0
        plt.scatter(coords[:, 0], coords[:, 1], s=weights, facecolors=None)
    elif vis_mode == 'contour':
        weights = weights if weights is not None else 0
        x, y = coords[:,0], coords[:,1]
        xx, yy = np.mgrid[boundary[0][0]:boundary[1][0]:50j, boundary[0][1]:boundary[1][1]:50j]
        positions = np.vstack([xx.ravel(), yy.ravel()])
        values = np.vstack([x, y])
        kernel = lambda x: sum(w * sym_twod_gaussian(x, c) for w, c in zip(weights, coords[:,:2]))
        f = np.reshape(np.array([kernel(i) for i in positions.T]).T, xx.shape)
        
        def my_cmp(cmap):
            cmap = plt.get_cmap(cmap)
            new_cmap = cmap(np.linspace(0, 1, 256))
            new_cmap[:1, :] = np.array([1,1,1,1])
            return ListedColormap(new_cmap)
        
        plt.contourf(xx, yy, f, cmap=my_cmp(cmap))
    else:
        raise NotImplementedError
    
    # 绘制原子标签
    for i, a in enumerate(m.atoms):
        hs = a.GetNumImplicitHs() + a.GetNumExplicitHs()
        if hs > 1:
            label = '${}H_{}$'.format(a.element, hs) 
        elif hs == 1:
            label = '${}H$'.format(a.element) 
        else:
            label = '$' + a.element + '$'
        plt.text(coords[i, 0] - 0.1, coords[i, 1], label, verticalalignment='center', fontsize=12)
        
    # 绘制键
    for b in m.bonds:
        idxs = b.GetBeginAtomIdx(), b.GetEndAtomIdx()
        coord1, coord2 = coords[idxs[0], :2], coords[idxs[1], :2]
        norm = np.linalg.norm(coord2 - coord1)
        vect = (coord1 - coord2) / norm
        coord1, coord2 = coord1 - 0.1 * vect, coord2 + 0.1 * vect
        
        # 绘制单键
        if b.order == 1:
            plt.plot([coord1[0], coord2[0]], [coord1[1], coord2[1]], c='black')
        
        # 绘制芳香键
        if b.order == 1.5:
            plt.plot([coord1[0] + 0.5 * bond_gap * perpendicular(vect)[0], coord2[0] + 0.5 * bond_gap * perpendicular(vect)[0]], 
                     [coord1[1] + 0.5 * bond_gap * perpendicular(vect)[1], coord2[1] + 0.5 * bond_gap * perpendicular(vect)[1]], '--', c='black')
            plt.plot([coord1[0] - 0.5 * bond_gap * perpendicular(vect)[0], coord2[0] - 0.5 * bond_gap * perpendicular(vect)[0]], 
                     [coord1[1] - 0.5 * bond_gap * perpendicular(vect)[1], coord2[1] - 0.5 * bond_gap * perpendicular(vect)[1]], c='black')
        
        # 绘制双键
        if b.order == 2:
            plt.plot([coord1[0] + 0.5 * bond_gap * perpendicular(vect)[0], coord2[0] + 0.5 * bond_gap * perpendicular(vect)[0]], 
                     [coord1[1] + 0.5 * bond_gap * perpendicular(vect)[1], coord2[1] + 0.5 * bond_gap * perpendicular(vect)[1]], c='black')
            plt.plot([coord1[0] - 0.5 * bond_gap * perpendicular(vect)[0], coord2[0] - 0.5 * bond_gap * perpendicular(vect)[0]], 
                     [coord1[1] - 0.5 * bond_gap * perpendicular(vect)[1], coord2[1] - 0.5 * bond_gap * perpendicular(vect)[1]], c='black')
        
        # 绘制三键
        if b.order == 3:
            plt.plot([coord1[0], coord2[0]], [coord1[1], coord2[1]], c='black')
            plt.plot([coord1[0] + bond_gap * perpendicular(vect)[0], coord2[0] + bond_gap * perpendicular(vect)[0]], 
                     [coord1[1] + bond_gap * perpendicular(vect)[1], coord2[1] + bond_gap * perpendicular(vect)[1]], c='black')
            plt.plot([coord1[0] - bond_gap * perpendicular(vect)[0], coord2[0] - bond_gap * perpendicular(vect)[0]], 
                     [coord1[1] - bond_gap * perpendicular(vect)[1], coord2[1] - bond_gap * perpendicular(vect)[1]], c='black')
    plt.axis('off')  
    return plt.gca() 
# 绘制分子图
def plot_mol(smiles, atom_weights, vis_mode='contour', cmap='Blues', save_path='plot_mol.png'):
    m = skchem.Mol.from_smiles(smiles)  # 从 SMILES 创建分子对象
    print(len(atom_weights))
    print(len(m.GetAtoms()))
    assert len(atom_weights) == len(m.GetAtoms())  # 确保原子数和权重数匹配
    highlight(m, weights=atom_weights, padding=1.5, vis_mode=vis_mode, cmap=cmap)  # 绘制分子
    plt.savefig(save_path, bbox_inches='tight', transparent=True)  # 保存图片
    plt.show()  # 显示图片
    plt.close()  # 关闭图形
# 示例:随机生成 7 个原子权重,绘制分子图
weights1 = np.random.rand(26)
weights2 = np.random.rand(17)
weights3 = np.random.rand(19)
weights4 = np.random.rand(19)
#plot_mol(smiles='CN[C@@H](C)C(=O)N[C@H](C(=O)N1CC2=CC(OCCOCCOCCOCCOCCOC(=O)N3CCC[C@@H](N4N=C(C5=CC=C(OC6=CC=C(F)C=C6F)C=C5)C(C(N)=O)=C4N)C3)=CC=C2C[C@H]1C(=O)N[C@@H]1CCCC2=CC=CC=C21)C(C)(C)C', atom_weights=weights)  # 绘制苯酚分子图
smiles_list = [
    'COc1ccc2c(c1)OCCN(CC2)C(=O)CCCCN3CCOCC3',
    'CCN(CC)CCCC(=O)Nc1ccccc1',
    'CN1CCN(CC1)CCCC(=O)Nc2ccccc2',
    'COc1cccc2c1C(=O)NC(C2)c3ccccc3'
]
plot_mol(smiles='COc1ccc2c(c1)OCCN(CC2)C(=O)CCCCN3CCOCC3', atom_weights=weights1)  # 
plot_mol(smiles='CCN(CC)CCCC(=O)Nc1ccccc1', atom_weights=weights2)  # 
plot_mol(smiles='CN1CCN(CC1)CCCC(=O)Nc2ccccc2', atom_weights=weights3)  # 
plot_mol(smiles='COc1cccc2c1C(=O)NC(C2)c3ccccc3', atom_weights=weights4)  # 
如何对分子属性权重图绘制
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import SimilarityMaps
from matplotlib.colors import LinearSegmentedColormap
from rdkit.Chem.Draw import rdMolDraw2D
# 定义多个分子的 SMILES
smiles_list = [
    'COc1ccc2c(c1)OCCN(CC2)C(=O)CCCCN3CCOCC3',
    'CCN(CC)CCCC(=O)Nc1ccccc1',
    'CN1CCN(CC1)CCCC(=O)Nc2ccccc2',
    'COc1cccc2c1C(=O)NC(C2)c3ccccc3'
]
# 参数:调整权重范围的缩放因子
scaling_factor = 1  # 强化权重的显著性
# 设置高权重和低权重的颜色
low_weight_color = '#6595e8'  # 低权重颜色
high_weight_color = '#d7697e'  # 高权重颜色
custom_cmap = LinearSegmentedColormap.from_list('custom', [low_weight_color, "white", high_weight_color])
# 绘制多个分子的权重图并保存为 SVG
for idx, smiles in enumerate(smiles_list):
    # 生成分子对象
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        print(f"Invalid SMILES: {smiles}")
        continue
    # 计算 Gasteiger 电荷
    AllChem.ComputeGasteigerCharges(mol)
    contribs = [float(mol.GetAtomWithIdx(i).GetProp('_GasteigerCharge')) for i in range(mol.GetNumAtoms())]
    # **直接限制贡献范围**:
    # 通过调整映射范围增强高权重区域染色
    max_contrib = max(contribs)
    min_contrib = min(contribs)
    midpoint = (max_contrib + min_contrib) / 3.5  # 中间点
    scaled_contribs = [
        ((c - midpoint) * scaling_factor) / (max_contrib - min_contrib + 1e-5) for c in contribs
    ]
    # 使用 RDKit 的 GetSimilarityMapFromWeights 绘制分子权重图
    fig = SimilarityMaps.GetSimilarityMapFromWeights(
        mol, scaled_contribs, colorMap=custom_cmap, contourLines=10
    )
    # 使用 RDKit 的 MolDraw2DSVG 绘制并保存为 SVG
    drawer = rdMolDraw2D.MolDraw2DSVG(500, 500)  # 定义 SVG 绘图区域
    drawer.SetDrawOptions(drawer.drawOptions())
    drawer.DrawMolecule(mol, highlightAtoms=list(range(mol.GetNumAtoms())))
    drawer.FinishDrawing()
    # 保存 SVG 文件
    svg_file = f"molecule_{idx + 1}.svg"
    with open(svg_file, 'w') as f:
        f.write(drawer.GetDrawingText())
    print(f"SVG saved: {svg_file}")
# 提示保存完成
print("All molecules have been processed and saved as SVG files.")
作图结果示例
图片参考 属性图 
weight_picture
https://sereinna.github.io/posts/weight_pocture/

