Source code for autosolvate.clustergen

import getopt, sys, os
import subprocess
import mdtraj as md
import numpy as np

def formatXyz(mdtrajxyz,outfile):
    out = open(outfile,'w')
    oldxyz = open(mdtrajxyz).readlines()
    out.write(oldxyz[0])
    out.write(oldxyz[1])
    for i in range(2,len(oldxyz)):
        e,x,y,z = oldxyz[i].split()
        e = e.strip("0123456789")
        out.write("{0:>2s} {1:>16s} {2:>16s} {3:>16s}\n".format(e,x,y,z))
    out.close()



[docs]def clustergen(filename='water_solvated', trajname='water_solvated.netcdf', startframe=0, interval=100, size=4, srun_use=False):
    r"""
    Extract microsolvated cluster around center solute

    Parameters
    ----------
    filename : str, default: 'water_solvated.prmtop'
        Filename name of .prmtop files
    trajname :  str, default: 'water_solvated.netcdf'
        Name of trajectory
    startframe : int, Optional, default: 0
        First frame to extract the microsolvated clusters from trajectory
    interval : int, Optional, default: 100
        Interval at which to extract microsolvated clusters from trajectory
    size : float, Optional, default: 4
        size of solvent shell around center solute in Angstrom
    srun_use : bool, Optional, default: False
        Run all commands with a srun prefix.

    Returns
    -------
    None
        Result stored as 'filename'-cluster.xyz file
    """
    
    print('Loading trajectory')
    a=md.load(trajname, top=filename)
    traj2=a.xyz
    if traj2.shape[0]<startframe:
      print("trajectory too short")
    print('selecting center solute')
    molecules=a.topology.find_molecules()
    center_list=[]
    for atom in molecules[0]:
      center_list.append(atom.index)
    print('extracting from frames:', list(range(startframe, traj2.shape[0], interval)))
    print('calculating distance to all solvent molecules')
    for iframe in range(startframe, traj2.shape[0], interval):
      center_xyz=traj2[iframe, np.array(center_list),:]
      center_xyz=np.average(center_xyz,axis=0)
      unit_cell_list=a.unitcell_lengths[0]
      tshape=traj2.shape
      unit_cell_list3 = np.repeat(unit_cell_list[np.newaxis, :], tshape[1], axis=0)
      shift=-center_xyz+unit_cell_list3/2.
      traj3=np.remainder(traj2[iframe,:,:]+shift,unit_cell_list3)
      dist_molecules=np.empty(len(molecules))
      select_molecules=[]
      size_molecules=[]
      use_molecules=0
      for i in range(len(molecules)):
        select_true=False
        dist_atom=10000
        for atom in molecules[i]:
          for center_idx in center_list:
            dist4=np.linalg.norm(traj3[atom.index,:]-traj3[center_idx,:])
            if dist4<dist_atom:
                dist_atom=dist4
        dist_molecules[i]=dist_atom
      if iframe==startframe:
        print('select solvent molecules')
      dist_use=np.sort(dist_molecules)*10.
      ncutout=np.argwhere(dist_use>size)[0][0]
      ncutout=ncutout+1
      if iframe==startframe:
        print("for first frame selected", ncutout, 'solvent molecules')
      select_mol=np.sort(np.argsort(dist_molecules)[:ncutout])
      if iframe==startframe:
        print('saving xyz')
      select_list=[]
      for i in select_mol:
        for atom in molecules[i]:
          select_list.append(atom.index)
      select_list.sort()
      #print("select atoms", select_list)
      b=a.slice(iframe)
      select_xyz=traj3[select_list,:]
      c=b.atom_slice(select_list)
      c.xyz[0,:,:]=select_xyz
      c.save_xyz('tmp.xyz', force_overwrite=True)
      xyzname = filename.replace(".prmtop","")+'-cutoutn-'+str(iframe)+'.xyz'
      formatXyz('tmp.xyz', xyzname)
      os.remove('tmp.xyz')

[docs]def startclustergen(argumentList):
    r"""
    Wrap function that parses command line options for autosolvate clustergen,
    extracts microsolvated clusters from trajectory,

    Parameters
    ----------
    argumentList: list
       The list contains the command line options to specify input trajectory, microsolvated cluster size, and other options
       related to microsolvated cluster extraction.

       Command line option definitions:
         -m, --filename  name of the .prmtop file 
         -t, --trajname  name of .netcdf trajectory to extract the microsolvate clusters from
         -a, --startframe  first frame at which to start extracting from the trajectory the microsolvated clusters
         -i, --interval  interval in frames at which to extract microsolvated clusters from the trajectory
         -s, --size  solvent shell size for microsolvated clusters in Angstrom, upper limit for minimum solute-solvent distance
         -r, --srunuse  option to run inside a slurm job
         -h, --help  short usage description

    Returns
    -------
    None
        Generates the ```.xyz`` file containing the microsolvated cluster
    """
    #print(argumentList)
    options = "hf:t:a:i:s:r"
    long_options = ["help", "filename", "trajname", "startframe", "interval", "size", "srunuse"]
    arguments, values = getopt.getopt(argumentList, options, long_options)
    srun_use=False
    size=4
    startframe=0
    interval=100
    for currentArgument, currentValue in arguments:
        if currentArgument in ("-h", "-help"):
            print('Usage: autosolvate clustergen [OPTIONS]')
            print('  -m, --filename             name of the .prmtop file')
            print('  -t, --trajname             name of .netcdf trajectory to extract the microsolvate clusters from')
            print('  -a, --startframe           first frame at which to start extracting from the trajectory the microsolvated clusters')
            print('  -i, --interval             interval in frames at which to extract microsolvated clusters from the trajectory')
            print('  -s, --size                 solvent shell size for microsolvated clusters in Angstrom, upper limit for minimum solute-solvent distance')
            print('  -r, --srunuse              option to run inside a slurm job')
            print('  -h, --help                 short usage description')
            exit()
        elif currentArgument in ("-f", "-filename"):
            print ("Filename:", currentValue)
            filename=str(currentValue)
        elif currentArgument in ("-t", "-trajname"):
            print ("Trajectory name:", currentValue)
            trajname=str(currentValue)
        elif currentArgument in ("-a", "-startframe"):
            print ("startframe to extract:", currentValue)
            startframe=int(currentValue)
        elif currentArgument in ("-i", "-interval"):
            print ("interval to extract:", currentValue)
            interval=int(currentValue)
        elif currentArgument in ("-s", "-size"):
            print ("Cutout size in Angstrom:", currentValue)
            size=float(currentValue)
        elif currentArgument in ("-r", "-srunuse"):
            print("usign srun")
            srun_use=True

    clustergen(filename=filename, trajname=trajname, startframe=startframe, interval=interval, size=size, srun_use=srun_use)

if __name__ == '__main__':
    argumentList = sys.argv[1:]
    startclustergen(argumentList)