Showing posts with label molecule. Show all posts
Showing posts with label molecule. Show all posts

16 April 2014

572. autorotate/superimpose python script

If you want to calculate reaction coordinates between two structures you need to make sure that the structures haven't been rotated or translated, something which easily happens if you allow symmetry in gaussian and (it seems) z-matrix in nwchem.

I've written a script that lets you take two structures and align and superimpose them so that only the atoms that take part in the reaction move.

It works by you defining a minimum of four atoms that aren't supposed to move /relative to each other/ (i.e. they can be translated/rotated --- just not relative to each other) between the two structures. Four atoms far from each other are ideal. You need to make sure that they also don't lie in the same plane, but form a three-dimensional space.

I've tried this on real molecules too and it works better than I'd ever dared to hope for. The more 'stationary' atoms that you can use to make up the transformation matrix, the better.


Example:
In this example atom F is in a different location in structures a and b. The structures have also been rotated relative to each other.

a.xyz
6 A A 0.00000 0.00000 1.00000 B 0.00000 1.00000 0.00000 C 1.00000 0.00000 0.00000 D -1.00000 0.00000 0.00000 E 0.00000 0.00000 -1.00000 F 0.00000 0.500000 0.00000
b.xyz
6 B A 1 0 0 B 0 1 0 C 0 0 -1 D 0 0 1 E -1 0 0 F 0 -1 0

./autorotate.py a.xyz b.xyz '1,2,3,4'
Selected atoms in molecules 1 and 2 ['A', 0.0, 0.0, 1.0] ['A', 1.0, 0.0, 0.0] ['B', 0.0, 1.0, 0.0] ['B', 0.0, 1.0, 0.0] ['C', 1.0, 0.0, 0.0] ['C', 0.0, 0.0, -1.0] ['D', -1.0, 0.0, 0.0] ['D', 0.0, 0.0, 1.0] Transformation max error: 3.33066907388e-16 Writing to a.rot.xyz

a.rot.xyz
6 A A 1.00000 0.00000 0.00000 B -0.00000 1.00000 -0.00000 C -0.00000 0.00000 -1.00000 D -0.00000 0.00000 1.00000 E -1.00000 -0.00000 -0.00000 F -0.00000 0.50000 -0.00000
This is how it looks (note that the axis aren't aligned with (1,0,0; 0,1,0; 0,0,1) but seem to go through the centre of the molecule):
a.xyz
a.rot.xyz


b.xyz





Code:
#!/usr/bin/python
import sys
import numpy as np

#autorotate input_1.xyz input_2.xyz '1,2,3,4'
# need to pick at least four atoms that are not in the same plane
# input_1.xyz will be rotated to align with input_2.xyz
# you pick at least four atoms that should have the same positions
# relative to one another (i.e. distance and relative geometry). These 
# are then used to calculate an affine transform matrix which is used 
# to rotate and translate structure input_1.xyz to overlap with 
# structure 2

def formatinput(argument):
 infile1=sys.argv[1]
 atoms=sys.argv[3]
 atoms=atoms.split(',')
 coord_sys=[]

 for n in atoms:
  coord_sys+=[int(n)-1]
 try:
  infile2=sys.argv[2]
 except:
  infile2=''
 infile=[infile1,infile2]
 return infile,coord_sys
 
def getrawdata(infile):
 f=open(infile,'r')
 
 n=0
 preamble=[]
 
 struct=[]
 
 for line in f:
  if n<2: data-blogger-escaped-if="" data-blogger-escaped-line.rstrip="" data-blogger-escaped-n="" data-blogger-escaped-preamble="">1:
   line=line.rstrip()
   struct+=[line]
  n+=1
 xyz=[struct]
 
 return xyz, preamble

def getcoords(rawdata,preamble,atoms):
 
 n=0
 cartesian=[]
 
 for structure in rawdata:
  n=n+1
  num="%03d" % (n,)
  for item in structure:
   
   coordx=filter(None,item.split(' '))
   coordy=filter(None,item.split('\t'))
   if len(coordx)>len(coordy):
    coords=coordx
   else:
    coords=coordy
      
   coordinates=[float(coords[1]),float(coords[2]),float(coords[3])]
   element=coords[0]
   cartesian+=[[element,float(coords[1]),float(coords[2]),float(coords[3])]]
     
 return cartesian

def getstructures(rawdata,preamble):
 
 n=0
 cartesian=[]
 
 for structure in rawdata:
  n=n+1
  num="%03d" % (n,)
  for item in structure:
   
   coordx=filter(None,item.split(' '))
   coordy=filter(None,item.split('\t'))
   if len(coordx)>len(coordy):
    coords=coordx
   else:
    coords=coordy
      
   coordinates=[float(coords[1]),float(coords[2]),float(coords[3])]
   element=coords[0]
   cartesian+=[coordinates]
     
 return cartesian

def affine_transform(atoms,structures):
# from http://stackoverflow.com/questions/20546182/how-to-perform-coordinates-affine-transformation-using-python-part-2
 primaryatomcoords=[]
 for n in atoms:
  primaryatomcoords+=[structures[0][n]]

 secondaryatomcoords=[]
 for n in atoms:
  secondaryatomcoords+=[structures[1][n]]

 primary = np.array(primaryatomcoords)
 secondary = np.array(secondaryatomcoords)
 primaryfull = np.array(structures[0])

 # Pad the data with ones, so that our transformation can do translations too
 n = primary.shape[0]
 pad = lambda x: np.hstack([x, np.ones((x.shape[0], 1))])
 unpad = lambda x: x[:,:-1]
 X = pad(primary)
 Y = pad(secondary)
 Xp= pad(primaryfull)

 # Solve the least squares problem X * A = Y
 # to find our transformation matrix A
 A, res, rank, s = np.linalg.lstsq(X, Y)

 transform = lambda x: unpad(np.dot(pad(x), A))

# print "Max error should be as small as possible if the rotation is successful"
# print "If max error is large you may have selected a bad set of atoms"
 print "Transformation max error:", np.abs(secondary - transform(primary)).max()
 secondaryfull=transform(primaryfull)
 return secondaryfull

def transform_xyz(tmatrix,newxyz):
 final_xyz=[]
 for n in newxyz:
  coord=np.mat(str(n[0])+';'+str(n[1])+';'+str(n[2]))
  newcoord=np.dot(tmatrix,coord)
  newcoord=np.matrix.tolist(newcoord)
  final_xyz+=[[ newcoord[0][0],newcoord[1][0],newcoord[2][0]]]
 return final_xyz

def genxyzstring(coords,elementnumber):
 x_str='%10.5f'% coords[0]
 y_str='%10.5f'% coords[1]
 z_str='%10.5f'% coords[2]
 element=elementnumber
 xyz_string=element+(3-len(element))*' '+10*' '+\
 (8-len(x_str))*' '+x_str+10*' '+(8-len(y_str))*' '+y_str+10*' '+(8-len(z_str))*' '+z_str+'\n'
 
 return xyz_string

def write_aligned(aligned_structure,atom_coords,preamble,outfile):
 outfile=outfile.replace('.xyz','.rot.xyz')
 print "Writing to ",outfile
 g=open(outfile,'w')
 g.write(str(preamble[0])+'\n'+str(preamble[1])+'\n')
 
 for n in range(0,len(aligned_structure)):
  xyzstring=genxyzstring(aligned_structure[n],atom_coords[n][0])
  g.write(xyzstring)
 g.close()
 return 0
 
if __name__ == "__main__":
 infile,atoms=formatinput(sys.argv)
 
 xyz=['','']
 preamble=['','']
 
 #get raw data
 xyz[0],preamble[0]=getrawdata(infile[0])
 xyz[1],preamble[1]=getrawdata(infile[1])

 atom_coords=[getcoords(xyz[0],preamble[0],atoms)]
 atom_coords+=[getcoords(xyz[1],preamble[1],atoms)]
 
 #collect structures from raw data
 structures=[getstructures(xyz[0],preamble[0])]
 structures+=[getstructures(xyz[1],preamble[1])]
 
 print "Selected atoms in molecules 1 and 2"
 for n in atoms:
  print atom_coords[0][n],atom_coords[1][n]
  
 #transform structure
 aligned_structure=affine_transform(atoms,structures)
 
 write_aligned(aligned_structure,atom_coords[0],preamble[0],str(infile[0]))
 

19 February 2014

557. Briefly: Drawing Molecules on Linux: ACD/ChemSketch in Wine

This is another post where I'm cheating a bit -- this is a windows program running in wine.

Let me explain. Sometimes you absolutely need to get a task done -- I use linux at work and I can't not do something just because I can't write/find a piece of software that does it for me. Science is the goal, and the route there is immaterial. So sometimes you need to be pragmatic about your software choices.

I much prefer to use free and open source software since it allows me to keep a copy of the source code which I can recompile in case the prebuilt binary suddenly won't work with a future version of debian. Having the source makes me feel more secure in the assertion that it's worth my time learning how to use that piece of software.

Second to that, I prefer native linux software -- although unfortunately 'native' here often means 'written in java', and -- while not knowing too much about java -- from a user point of view java software tends to be comparatively slow to load and run. Certainly it appears slower than a comparable C/C++ program.

As a last resort, I can accept having to run a windows binary in wine. It doesn't make me happy, and often there are small, niggling issues associated with it -- but if it can get the job done, so be it.

Note that I'm unwilling to actually run a native windows program on a native windows installation -- one has to have some standards...

Either way, this is why I look at windows programs as well for drawing structures and processing NMR spectra.

So I recently tested marvinsketch (linux), bkchem (linux), easychem (linux), chemtool (linux), gchempaint (linux), ISISDraw (wine) and ChemSketch (wine). Out of those I prefer MarvinSketch. However, I'm still exploring the alternatives.

ACD/ChemSketch for Linux is really just an older (2006) version of ACD/LABS free ChemSketch (the windows version is from 2013). To get it, go to http://www.acdlabs.com/resources/freeware/index.php and click on Download. I tried the version 'for linux', and not the windows one. I'm making the presumption that the newer binary won't play well with wine.

You don't need to be an academic to qualify for the free version.

wine ~/Downloads/chemsk50.exe

Once it's installed you can explore ChemSketch. Overall it's fairly similar to e.g. ISISDraw.







11 October 2013

519. Formatting an XYZ molecular geometry file using python

This is a silly little script -- ECCE, which I use to manage all my computations, is very particular about what XYZ files it can and cannot read -- if the number of spaces between the coordinates are wrong, it will fail to read the structure. So here's a python script, based on parts I've cannibalised from other scripts that I've written in the past (i.e. it could be made more elegant, but I had the parts ready and just wanted something that works) which takes a somewhat ugly XYZ file and turns it into something beautiful. At least in the eyes of ECCE.

The impetus for this was that someone gave me an XYZ file the had generated by copying columns from Excel. On Mac. mac2unix took care of the line endings, but there were tabs (\t) all over the place.

Usage:
polish_xyz ugly.xyz pretty.xyz

Script:
#!/usr/bin/python
import sys 

def getrawdata(infile):
    f=open(infile,'r')
    n=0 
    preamble=[]
    struct=[]
    
    for line in f:
        if n<2: data-blogger-escaped-if="" data-blogger-escaped-line.rstrip="" data-blogger-escaped-n="" data-blogger-escaped-preamble="">3:
            line=line.rstrip()
            struct+=[line]
        n+=1
    xyz=[struct]
    return xyz, preamble

def genxyzstring(coords,elementnumber):
    x_str='%10.5f'% coords[0]
    y_str='%10.5f'% coords[1]
    z_str='%10.5f'% coords[2]
    element=elementnumber
    xyz_string=element+(3-len(element))*' '+10*' '+\ 
    (8-len(x_str))*' '+x_str+10*' '+(8-len(y_str))*' '+y_str+10*' '+(8-len(z_str))*' '+z_str+'\n'
    
    return xyz_string

def getstructures(rawdata,preamble,outfile):
    n=0 
    for structure in rawdata:
        n=n+1
        num="%03d" % (n,)
        g=open(outfile,'w')
        itson=False
        cartesian=[]
    
        for item in structure:
            coordx=filter(None,item.split(' ')) 
            coordy=filter(None,item.split('\t'))
            if len(coordx)>len(coordy):
                coords=coordx
            else:
                coords=coordy
            coordinates=[float(coords[1]),float(coords[2]),float(coords[3])]
            element=coords[0]
            cartesian+=[genxyzstring(coordinates,element)]
        g.write(str(preamble[0])+'\n')
        g.write(str(preamble[1])+'\n')
        for line in cartesian:
            g.write(line)
        g.close()
        cartesian=[]
    return 0

if __name__ == "__main__":
    infile=sys.argv[1]
    outfile=sys.argv[2]
    xyz,preamble=getrawdata(infile)
    structures=getstructures(xyz,preamble,outfile)

518. Generating enantiomers of molecular structures given in XYZ coordinates using python.

What I'm showing here is probably overkill -- there may be better ways of doing this with sed/awk*. However, since I had most of the code below ready as it was part of another script, doing this is python was quick and easy. Plus it's portable-ish.

*[ECCE, which I use for managing my calculations, is very, very particular about the format of the XYZ file, including the number of chars between coordinates. So simply doing e.g.

cat molecule.xyz|gawk '{print $1,$2,$3,-$4}'

won't work. Not all pieces of software are that picky when it comes to xyz coordinates though -- jmol, for example, is very forgiving.]

Anyway, the script below flips a molecular structure by taking the geometry given as a (properly formatted) XYZ file, and changing the sign in front of the Z coordinates. It's that simple.
Save it, call it flip_xyz, make it executable and call it using e.g.

flip_xyz molecule.xyz flipped_molecule.xyz

Script:
#!/usr/bin/python
import sys 

def getrawdata(infile):
    f=open(infile,'r')
    n=0 
    preamble=[]
    struct=[]
  
    for line in f:
        if n<2: data-blogger-escaped-if="" data-blogger-escaped-line.rstrip="" data-blogger-escaped-n="" data-blogger-escaped-preamble="">1:
            line=line.rstrip()
            struct+=[line]
        n+=1
    xyz=[struct]
    
    return xyz, preamble

def genxyzstring(coords,elementnumber):
    x_str='%10.5f'% coords[0]
    y_str='%10.5f'% coords[1]
    z_str='%10.5f'% -coords[2]
    element=elementnumber
    xyz_string=element+(3-len(element))*' '+10*' '+\ 
    (8-len(x_str))*' '+x_str+10*' '+(8-len(y_str))*' '+y_str+10*' '+(8-len(z_str))*' '+z_str+'\n'
    
    return xyz_string

def getstructures(rawdata,preamble,outfile):
    n=0 
    for structure in rawdata:
        n=n+1
        num="%03d" % (n,)
        g=open(outfile,'w')
        cartesian=[]
    
        for item in structure:
            coordx=filter(None,item.split(' ')) 
            coordy=filter(None,item.split('\t'))
            if len(coordx)>len(coordy):
                coords=coordx
            else:
                coords=coordy
    
            coordinates=[float(coords[1]),float(coords[2]),float(coords[3])]
            element=coords[0]
            cartesian+=[genxyzstring(coordinates,element)]
    
        g.write(str(preamble[0])+'\n')
        g.write(str(preamble[1])+'\n')
        for line in cartesian:
            g.write(line)
        g.close()
        cartesian=[]
    return 0

if __name__ == "__main__":
    infile=sys.argv[1]
    outfile=sys.argv[2]
    xyz,preamble=getrawdata(infile)
    structures=getstructures(xyz,preamble,outfile)