他身在高楼广厦之中,却有山泽鱼鸟之思
258 字
1 分钟
大数据库蛋白-配体作用数据提取
筛选代码
import pymysql
import pandas as pd
class MysqlSave:
def __init__(self):
self.content = pymysql.Connect(
host='127.0.0.1', # mysql的主机ip
port=3306, # 端口
user='root', # 用户名
passwd='serein', # 数据库密码
charset='utf8', # 使用字符集
)
self.cursor = self.content.cursor()
def search_and_save(self,sql,csv_file):
"""
导出为csv的函数
:param sql: 要执行的mysql指令
:param csv_file: 导出的csv文件名
:return:
"""
# 执行sql语句
self.cursor.execute(sql)
# 拿到表头
des = self.cursor.description
title = [each[0] for each in des]
# 拿到数据库查询的内容
result_list = []
for each in self.cursor.fetchall():
result_list.append(list(each))
# 保存成dataframe
df_dealed = pd.DataFrame(result_list, columns=title)
# 保存成csv 这个编码是为了防止中文没法保存,index=None的意思是没有行号
df_dealed.to_csv(csv_file, index = None, encoding ='utf_8_sig')
if __name__ == '__main__':
mysql = MysqlSave()
sql = '''
SELECT DISTINCT
act_id,
std_activity_type,
activity_uom,
activity_value,
assay_type,
sub_smiles,
mol_weight,
all_activity_gostar.pdb_id,
uniprot_id
FROM
gostar.all_activity_gostar,
gostar.structure_details,
gostar.drug_targets,
gostar.standard_name_master,
gostar.target_protein_master
WHERE
gostar.all_activity_gostar.gvk_id = gostar.structure_details.gvk_id
AND gostar.all_activity_gostar.stdname_id = gostar.standard_name_master.stdname_id
AND gostar.all_activity_gostar.target_id = gostar.target_protein_master.target_id
AND activity_type = 'IC50'
AND activity_type = 'Ki' or 'Kd'
AND activity_prefix = "="
AND assay_type = "B"
AND activity_uom = "nM"
AND sub_smiles IS NOT NULL
AND uniprot_id IS NOT NULL
AND all_activity_gostar.pdb_id IS NOT NULL
AND mol_weight < 800
#AND activity_value <= 100
#LIMIT 50;
'''
#file_path = r'/mnt/c/Users/86173/rmsd/mysql3.csv'
mysql.search_and_save(sql, 'mysql_kikd_100.csv')
```
大数据库蛋白-配体作用数据提取
https://sereinna.github.io/posts/gostar数据库的筛选/