他身在高楼广厦之中,却有山泽鱼鸟之思
603 字
3 分钟
smiles分子图批量绘制
前言
TIP准备工作 分子
smiles
代码
import pandas as pd from rdkit import Chem from rdkit.Chem import Draw, AllChem from rdkit.Chem.Draw import rdMolDraw2D import os from rdkit.Chem import inchi # 原子高亮和颜色设置(可以在此快速设置) HIGHLIGHT_ATOMS = [] ATOM_COLORS = { 'N': (1.0, 0.7569, 0.6235), # 氮原子的颜色 FFC19F 'S': (0.9725, 0.7059, 0.8627), # 硫原子的颜色 F8B4DC 'O': (0.0000, 0.7882, 0.7176), # 氧原子的颜色 #008578 深青绿色 'F': (0.9725, 0.7059, 0.8627), # 氟原子的颜色 F8B4DC 'Cl': (0.1216, 0.6824, 0.2824), # 氯原子的颜色 #1FAD48 绿色 'Br': (0.6510, 0.1647, 0.1647), # 溴原子的颜色 #A62929 深红色 'I': (0.4941, 0.0980, 0.6157), # 碘原子的颜色 #7E199D 紫色 } # 特殊基团颜色(例如苯磺酰胺) BENZENESULFONAMIDE_COLOR = (0.9922, 0.5765, 0.7686) # #fd93c4 亮粉红色 HIGHLIGHT_BOND_COLOR = (1.0000, 0.7941, 0.9725) # #ffe4f8 非常浅的粉红色 # 自动生成颜色映射 def assign_colors_by_atom_type(mol, highlight_atoms, atom_colors): color_map = {} for i, atom in enumerate(mol.GetAtoms()): atom_symbol = atom.GetSymbol() if atom_symbol in highlight_atoms and atom_symbol in atom_colors: color_map[i] = atom_colors[atom_symbol] return color_map # 识别苯磺酰胺基团的函数 def identify_benzenesulfonamide(mol): pattern = Chem.MolFromSmarts('c1ccc(S(=O)(N)=O)cc1') matches = mol.GetSubstructMatches(pattern) if matches: return list(matches[0]) return [] # 辅助函数:生成唯一的文件名 def generate_unique_filename(directory, base_filename, extension): filename = f"{base_filename}.{extension}" filepath = os.path.join(directory, filename) counter = 1 while os.path.exists(filepath): filename = f"{base_filename}_{counter}.{extension}" filepath = os.path.join(directory, filename) counter += 1 return filename # 绘制分子并保存 def draw_and_save_smiles(smiles, svg_dir, jpg_dir, idx, highlight_atoms, atom_colors): mol = Chem.MolFromSmiles(smiles) if mol is None: print(f"Invalid SMILES at index {idx}: {smiles}") return None, 0 AllChem.Compute2DCoords(mol) draw_options = rdMolDraw2D.MolDrawOptions() draw_options.useBWAtomPalette() draw_options.fixedBondLength = 30 draw_options.bondLineWidth = 1 drawer = rdMolDraw2D.MolDraw2DSVG(300, 300) drawer.SetDrawOptions(draw_options) color_map = assign_colors_by_atom_type(mol, highlight_atoms, atom_colors) # 识别苯磺酰胺基团 benzenesulfonamide_atoms = identify_benzenesulfonamide(mol) label = 1 if benzenesulfonamide_atoms else 0 for atom in benzenesulfonamide_atoms: color_map[atom] = BENZENESULFONAMIDE_COLOR highlight_atoms_idx = list(set([i for i, atom in enumerate(mol.GetAtoms()) if atom.GetSymbol() in highlight_atoms] + benzenesulfonamide_atoms)) highlight_bonds = [] for bond in mol.GetBonds(): if bond.GetBeginAtomIdx() in benzenesulfonamide_atoms and bond.GetEndAtomIdx() in benzenesulfonamide_atoms: highlight_bonds.append(bond.GetIdx()) drawer.DrawMolecule(mol, highlightAtoms=highlight_atoms_idx, highlightAtomColors=color_map, highlightBonds=highlight_bonds, highlightBondColors={i: HIGHLIGHT_BOND_COLOR for i in highlight_bonds}) drawer.FinishDrawing() svg_output = drawer.GetDrawingText() inchi_key = inchi.MolToInchiKey(mol) base_filename = inchi_key[4:12] unique_svg_filename = generate_unique_filename(svg_dir, base_filename, "svg") svg_filepath = os.path.join(svg_dir, unique_svg_filename) with open(svg_filepath, "w") as svg_file: svg_file.write(svg_output.replace('svg:', '')) unique_jpg_filename = generate_unique_filename(jpg_dir, base_filename, "jpg") jpg_filepath = os.path.join(jpg_dir, unique_jpg_filename) img = Draw.MolToImage(mol, size=(900, 900), highlightAtoms=highlight_atoms_idx, highlightAtomColors=color_map, highlightBonds=highlight_bonds) img.save(jpg_filepath) print(f"Images saved as {unique_svg_filename} and {unique_jpg_filename}") return os.path.splitext(unique_svg_filename)[0], label def process_smiles_file(file_path, smiles_column, output_dir, highlight_atoms, atom_colors): svg_dir = os.path.join(output_dir, "svg") jpg_dir = os.path.join(output_dir, "jpg") if not os.path.exists(svg_dir): os.makedirs(svg_dir) if not os.path.exists(jpg_dir): os.makedirs(jpg_dir) if file_path.endswith('.csv'): df = pd.read_csv(file_path) elif file_path.endswith('.txt'): df = pd.read_csv(file_path, delimiter='\t') else: raise ValueError("Only CSV or TXT files are supported") file_names = [] labels = [] for idx, smiles in enumerate(df[smiles_column].dropna()): short_filename, label = draw_and_save_smiles(smiles, svg_dir, jpg_dir, idx + 1, highlight_atoms, atom_colors) if short_filename: file_names.append(short_filename) labels.append(label) else: file_names.append("Invalid_SMILES") labels.append(0) df['Generated_Filename'] = file_names df['label'] = labels output_csv_path = os.path.join(output_dir, 'top_50_smiles_5fl4_9.csv') df.to_csv(output_csv_path, index=False) print(f"Updated CSV saved as {output_csv_path}") # 示例使用 smiles_column = 'smiles' # 你的文件中的SMILES列名 file_path = '/home/tim/hmt/del_picture/smiles/top_50_smiles_5fl4_9.txt' # 你的CSV或TXT文件路径 output_dir = '/home/tim/hmt/del_picture/smiles' # 输出文件夹 process_smiles_file(file_path, smiles_column, output_dir, HIGHLIGHT_ATOMS, ATOM_COLORS)
效果图
smiles分子图批量绘制
https://sereinna.github.io/posts/smiles如何绘制好看并突出基团/