Source code for autosolvate.clustergen

import getopt, sys, os
import subprocess
import mdtraj as md
import numpy as np

def formatXyz(mdtrajxyz,outfile):
    out = open(outfile,'w')
    oldxyz = open(mdtrajxyz).readlines()
    out.write(oldxyz[0])
    out.write(oldxyz[1])
    for i in range(2,len(oldxyz)):
        e,x,y,z = oldxyz[i].split()
        e = e.strip("0123456789")
        out.write("{0:>2s} {1:>16s} {2:>16s} {3:>16s}\n".format(e,x,y,z))
    out.close()



[docs]def clustergen(filename='water_solvated', trajname='water_solvated.netcdf', startframe=0, interval=100, size=4, srun_use=False): r""" Extract microsolvated cluster around center solute Parameters ---------- filename : str, default: 'water_solvated.prmtop' Filename name of .prmtop files trajname : str, default: 'water_solvated.netcdf' Name of trajectory startframe : int, Optional, default: 0 First frame to extract the microsolvated clusters from trajectory interval : int, Optional, default: 100 Interval at which to extract microsolvated clusters from trajectory size : float, Optional, default: 4 size of solvent shell around center solute in Angstrom srun_use : bool, Optional, default: False Run all commands with a srun prefix. Returns ------- None Result stored as 'filename'-cluster.xyz file """ print('Loading trajectory') a=md.load(trajname, top=filename) traj2=a.xyz if traj2.shape[0]<startframe: print("trajectory too short") print('selecting center solute') molecules=a.topology.find_molecules() center_list=[] for atom in molecules[0]: center_list.append(atom.index) print('extracting from frames:', list(range(startframe, traj2.shape[0], interval))) print('calculating distance to all solvent molecules') for iframe in range(startframe, traj2.shape[0], interval): center_xyz=traj2[iframe, np.array(center_list),:] center_xyz=np.average(center_xyz,axis=0) unit_cell_list=a.unitcell_lengths[0] tshape=traj2.shape unit_cell_list3 = np.repeat(unit_cell_list[np.newaxis, :], tshape[1], axis=0) shift=-center_xyz+unit_cell_list3/2. traj3=np.remainder(traj2[iframe,:,:]+shift,unit_cell_list3) dist_molecules=np.empty(len(molecules)) select_molecules=[] size_molecules=[] use_molecules=0 for i in range(len(molecules)): select_true=False dist_atom=10000 for atom in molecules[i]: for center_idx in center_list: dist4=np.linalg.norm(traj3[atom.index,:]-traj3[center_idx,:]) if dist4<dist_atom: dist_atom=dist4 dist_molecules[i]=dist_atom if iframe==startframe: print('select solvent molecules') dist_use=np.sort(dist_molecules)*10. ncutout=np.argwhere(dist_use>size)[0][0] ncutout=ncutout+1 if iframe==startframe: print("for first frame selected", ncutout, 'solvent molecules') select_mol=np.sort(np.argsort(dist_molecules)[:ncutout]) if iframe==startframe: print('saving xyz') select_list=[] for i in select_mol: for atom in molecules[i]: select_list.append(atom.index) select_list.sort() #print("select atoms", select_list) b=a.slice(iframe) select_xyz=traj3[select_list,:] c=b.atom_slice(select_list) c.xyz[0,:,:]=select_xyz c.save_xyz('tmp.xyz', force_overwrite=True) xyzname = filename.replace(".prmtop","")+'-cutoutn-'+str(iframe)+'.xyz' formatXyz('tmp.xyz', xyzname) os.remove('tmp.xyz')
[docs]def startclustergen(argumentList): r""" Wrap function that parses command line options for autosolvate clustergen, extracts microsolvated clusters from trajectory, Parameters ---------- argumentList: list The list contains the command line options to specify input trajectory, microsolvated cluster size, and other options related to microsolvated cluster extraction. Command line option definitions: -m, --filename name of the .prmtop file -t, --trajname name of .netcdf trajectory to extract the microsolvate clusters from -a, --startframe first frame at which to start extracting from the trajectory the microsolvated clusters -i, --interval interval in frames at which to extract microsolvated clusters from the trajectory -s, --size solvent shell size for microsolvated clusters in Angstrom, upper limit for minimum solute-solvent distance -r, --srunuse option to run inside a slurm job -h, --help short usage description Returns ------- None Generates the ```.xyz`` file containing the microsolvated cluster """ #print(argumentList) options = "hf:t:a:i:s:r" long_options = ["help", "filename", "trajname", "startframe", "interval", "size", "srunuse"] arguments, values = getopt.getopt(argumentList, options, long_options) srun_use=False size=4 startframe=0 interval=100 for currentArgument, currentValue in arguments: if currentArgument in ("-h", "-help"): print('Usage: autosolvate clustergen [OPTIONS]') print(' -m, --filename name of the .prmtop file') print(' -t, --trajname name of .netcdf trajectory to extract the microsolvate clusters from') print(' -a, --startframe first frame at which to start extracting from the trajectory the microsolvated clusters') print(' -i, --interval interval in frames at which to extract microsolvated clusters from the trajectory') print(' -s, --size solvent shell size for microsolvated clusters in Angstrom, upper limit for minimum solute-solvent distance') print(' -r, --srunuse option to run inside a slurm job') print(' -h, --help short usage description') exit() elif currentArgument in ("-f", "-filename"): print ("Filename:", currentValue) filename=str(currentValue) elif currentArgument in ("-t", "-trajname"): print ("Trajectory name:", currentValue) trajname=str(currentValue) elif currentArgument in ("-a", "-startframe"): print ("startframe to extract:", currentValue) startframe=int(currentValue) elif currentArgument in ("-i", "-interval"): print ("interval to extract:", currentValue) interval=int(currentValue) elif currentArgument in ("-s", "-size"): print ("Cutout size in Angstrom:", currentValue) size=float(currentValue) elif currentArgument in ("-r", "-srunuse"): print("usign srun") srun_use=True clustergen(filename=filename, trajname=trajname, startframe=startframe, interval=interval, size=size, srun_use=srun_use)
if __name__ == '__main__': argumentList = sys.argv[1:] startclustergen(argumentList)