蛋白批量突变并进行分子动力学模拟

批量突变蛋白并进行分子动力学模拟

最近在做蛋白-配体的分子动力学模拟，需要考察氨基酸突变对蛋白配体结合的影响。当突变残基变多时，重复突变、模拟的过程实在是让人心累。因此，用bash脚本写了一个半自动的流程。

该流程只需要提供蛋白pdb文件，配体的gro以及itp文件，以及指定被突变残基序号和突变成的氨基酸库即可。

在运行之前，首先需要使用分子对接将配体对接到结合口袋以及生成配体的topol文件（这里推荐使用卢天老师开发的sobtop）。即提供的配体gro文件中配体的坐标为对接之后的坐标。

如下为使用的bash脚本。

后续会将配体top的制作加入进来，以达到只需要分子对接之后即可省心省力的重复模拟。

注：该脚本在linux下运行，同时需要保证命令行能够正确调用pymol以及gromacs。

# 使用的pdb文件
protein=../originalfile/protein.pdb

# 使用的配体gro
lig=../originalfile/lig.gro

# 配体top
ligtop=../originalfile/FPP.itp

# md时所用核数
runcore=12

# 突变成的氨基酸库
resarray=("ALA" "HIS" "LYS" "SER" "GLY" "PHE" "ILE" "VAL" "MET" "PRO" "TRP" "LEU")
# 被突变的残基序号
REI="482"

# 循环模拟
for RES in ${resarray[@]} 
do
# 检查是否存在该文件夹，存在就删除再创建
[ -d R$REI$RES ] &&  rm -rf R$REI$RES
mkdir R$REI$RES
cd R$REI$RES

# 编写em文件
cat > em.mdp <<EOF
define = -DFLEXIBLE
integrator = cg
nstcgsteep=100
nsteps = 5000
emtol  = 1000
emstep = 0.01
;
nstxout   = 50
nstlog    = 50
nstenergy = 50
;
pbc = xyz
cutoff-scheme            = Verlet
coulombtype              = PME
rcoulomb                 = 1.0
vdwtype                  = Cut-off
rvdw                     = 1.0
DispCorr                 = EnerPres
;
constraints              = none
EOF

# 编写md文件
cat > md.mdp << EOF
define =
integrator = md
dt         = 0.002   ; ps
nsteps     = 10000000
comm-grps  = protein_lig
comm-mode  = angular
energygrps = 
;
nstxout = 0
nstvout = 0
nstfout = 0
nstlog  = 5000
nstenergy = 1000
nstxout-compressed = 2000
compressed-x-grps  = system
;
pbc = xyz
cutoff-scheme = Verlet
coulombtype   = PME
rcoulomb      = 1.0
vdwtype       = cut-off
rvdw          = 1.0
DispCorr      = EnerPres
;
Tcoupl  = V-rescale
tau_t   = 0.2 0.2
tc_grps = protein_lig envir
ref_t   = 306.15 306.15
;
Pcoupl     = parrinello-rahman
pcoupltype = isotropic
tau_p = 2.0
ref_p = 1.0
compressibility = 4.5e-5
;
freezegrps  = 
freezedim   = 
constraints = hbonds
EOF

# 编写pymol脚本
cat > pfile.py << EOF
from pymol import cmd
cmd.load("$protein")
cmd.wizard("mutagenesis")
cmd.get_wizard().set_mode("$RES")
cmd.get_wizard().do_select("$REI/")
# Apply the mutation
cmd.get_wizard().apply()
cmd.save("protein.pdb")
cmd.quit()
EOF

# 使用pymol突变残基并保存在当前目录下
pymol pfile.py

# 生成蛋白gro文件 第一个为使用的力场，第二个为使用的水类型，第一次使用需要根据自己的版本修改
gmx pdb2gmx -f protein.pdb -p topol.top -o protein.gro -ignh <<EOF
2
1
EOF

# 获取配体分子名
ligname=`grep -A2 moleculetype $ligtop |tail -n 1 | sed 's/ \+[0-9]//g' `

# 将分子名添加入topol
sed -i '$a '"$ligname"'   1'  topol.top

# 读取配体top插入行并插入topol.top
insertline=`grep -n "#include" topol.top | head -n 1 | awk -F '[:]' '{print $1}'`
sed -i ''"$(($insertline+1))"'i  #include '"\"$ligtop\""'' topol.top

# 预处理蛋白、配体结构文件，并将其合并为complex.gro文件
sed '$d' protein.gro > complex.gro
sed  '1,2d' $lig >> complex.gro 
total=$((`awk 'NR==2' protein.gro` + `awk 'NR==2' $lig`)) 
sed -i '2c '"$total"'' complex.gro 

# 生成盒子，可以根据自己体系修改
# gmx editconf -f complex.gro -o complex_box.gro -bt cubic -d 0.8 
gmx editconf -f complex.gro -o complex_box.gro -rotate 30 30 15 -box 7.5 7 10

# 溶剂化
gmx solvate -cp complex_box.gro -o complex_sol.gro -p topol.top 

# 加抗衡粒子 注意 选取的组，我的版本替换的SOL为第15组
gmx grompp -f em.mdp -c complex_sol.gro -p topol.top -o em.tpr -maxwarn 4
gmx genion -s em.tpr -p topol.top -o complex_ion.gro -neutral<< EOF
15
EOF

# 能量最小化
gmx grompp -f em.mdp -c complex_ion.gro -p topol.top -o em.tpr -maxwarn 3
gmx mdrun -v -deffnm em -nt 1

# 制作蛋白配体的index，命名自己指定，须跟mdp中命名一致
# 同抗衡粒子选取一样，我的版本第1组和第13组分别为蛋白和配体，组合的组为22
gmx make_ndx -f em.gro -o index.ndx <<EOF
1|13
!22
name 22 protein_lig
name 23 envir
q
EOF

# md模拟
gmx grompp -f md.mdp -c em.gro -p topol.top -o md.tpr -n index.ndx -maxwarn 5
gmx mdrun -v -deffnm md -nt $runcore -pin on

# 后处理，分析rmsd和rmsf
gmx rms -s md.tpr -f md.xtc -o rmsd.xvg  << EOF
1
1
EOF
gmx rms -s md.tpr -f md.xtc -o rmsd_lig.xvg  << EOF
13
13
EOF
gmx rmsf -s md.tpr -f md.xtc -o rmsf.xvg -res <<EOF
1
EOF
cd ..
done