mirror of
https://github.com/openbabel/openbabel.git
synced 2025-02-25 18:55:23 -06:00
Updated to use plugin descriptors. Changed to create a single instance of plugin descriptors and fingerprinters. Also, readfile now raises an IOError if the file does not exist. * scripts/python/examples/findcoverage.bat: Add .bat file to automate calculation of code coverage for pybel
492 lines
16 KiB
Python
492 lines
16 KiB
Python
import openbabel as ob
|
|
import os.path
|
|
|
|
def _formatstodict(list):
|
|
broken = [x.replace("[Read-only]", "").replace("[Write-only]","").split(" -- ") for x in list]
|
|
broken = [(x,y.strip()) for x,y in broken]
|
|
return dict(broken)
|
|
_obconv = ob.OBConversion()
|
|
informats = _formatstodict(_obconv.GetSupportedInputFormat())
|
|
outformats = _formatstodict(_obconv.GetSupportedOutputFormat())
|
|
|
|
def _getplugins(findplugin, names):
|
|
plugins = dict([(x, findplugin(x)) for x in names if findplugin(x)])
|
|
return plugins
|
|
descriptors = _getplugins(ob.OBDescriptor.FindType, ['LogP', 'MR', 'TPSA'])
|
|
fingerprinters = _getplugins(ob.OBFingerprint.FindFingerprint, ['FP2', 'FP3', 'FP4'])
|
|
|
|
def readfile(format, filename):
|
|
"""Iterate over the molecules in a file.
|
|
|
|
Required parameters:
|
|
format
|
|
filename
|
|
|
|
You can access the first molecule in a file using:
|
|
mol = readfile("smi", "myfile.smi").next()
|
|
|
|
You can make a list of the molecules in a file using:
|
|
mols = [mol for mol in readfile("smi", "myfile.smi")]
|
|
|
|
You can iterate over the molecules in a file as shown in the
|
|
following code snippet...
|
|
|
|
>>> atomtotal = 0
|
|
>>> for mol in readfile("sdf","head.sdf"):
|
|
... atomtotal += len(mol.atoms)
|
|
...
|
|
>>> print atomtotal
|
|
43
|
|
"""
|
|
obconversion = ob.OBConversion()
|
|
formatok = obconversion.SetInFormat(format)
|
|
if not formatok:
|
|
raise ValueError,"%s is not a recognised OpenBabel format" % format
|
|
if not os.path.isfile(filename):
|
|
raise IOError, "No such file: '%s'" % filename
|
|
obmol = ob.OBMol()
|
|
notatend = obconversion.ReadFile(obmol,filename)
|
|
while notatend:
|
|
yield Molecule(obmol)
|
|
obmol = ob.OBMol()
|
|
notatend = obconversion.Read(obmol)
|
|
|
|
def readstring(format, string):
|
|
"""Read in a molecule from a string.
|
|
|
|
Required parameters:
|
|
format
|
|
string
|
|
|
|
>>> input = "C1=CC=CS1"
|
|
>>> mymol = readstring("smi",input)
|
|
>>> len(mymol.atoms)
|
|
5
|
|
"""
|
|
obmol = ob.OBMol()
|
|
obconversion = ob.OBConversion()
|
|
|
|
formatok = obconversion.SetInFormat(format)
|
|
if not formatok:
|
|
raise ValueError,"%s is not a recognised OpenBabel format" % format
|
|
|
|
obconversion.ReadString(obmol, string)
|
|
return Molecule(obmol)
|
|
|
|
class Outputfile(object):
|
|
"""Represent a file to which *output* is to be sent.
|
|
|
|
Although it's possible to write a single molecule to a file by
|
|
calling the write() method of a molecule, if multiple molecules
|
|
are to be written to the same file you should use the Outputfile
|
|
class.
|
|
|
|
Required parameters:
|
|
format
|
|
filename
|
|
Optional parameters:
|
|
overwrite (default is False) -- if the output file already exists,
|
|
should it be overwritten?
|
|
Methods:
|
|
write(molecule)
|
|
"""
|
|
def __init__(self, format, filename, overwrite=False):
|
|
self.format = format
|
|
self.filename = filename
|
|
if not overwrite and os.path.isfile(self.filename):
|
|
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % self.filename
|
|
self.obConversion = ob.OBConversion()
|
|
formatok = self.obConversion.SetOutFormat(self.format)
|
|
if not formatok:
|
|
raise ValueError,"%s is not a recognised OpenBabel format" % format
|
|
self.total = 0 # The total number of molecules written to the file
|
|
|
|
def write(self, molecule):
|
|
"""Write a molecule to the output file.
|
|
|
|
Required parameters:
|
|
molecule
|
|
"""
|
|
if not self.filename:
|
|
raise IOError, "Outputfile instance is closed."
|
|
|
|
if self.total==0:
|
|
self.obConversion.WriteFile(molecule.OBMol, self.filename)
|
|
else:
|
|
self.obConversion.Write(molecule.OBMol)
|
|
self.total += 1
|
|
|
|
def close(self):
|
|
"""Close the Outputfile to further writing."""
|
|
self.obConversion.CloseOutFile()
|
|
self.filename = None
|
|
|
|
class Molecule(object):
|
|
"""Represent a Pybel molecule.
|
|
|
|
Optional parameters:
|
|
OBMol -- an Open Babel molecule (default is None)
|
|
|
|
An empty Molecule is created if an Open Babel molecule is not provided.
|
|
|
|
Attributes:
|
|
atoms, charge, data, dim, energy, exactmass, flags, formula,
|
|
mod, molwt, spin, sssr, title, unitcell.
|
|
(refer to the Open Babel library documentation for more info).
|
|
|
|
Methods:
|
|
write(), calcfp(), calcdesc()
|
|
|
|
The original Open Babel molecule can be accessed using the attribute:
|
|
OBMol
|
|
"""
|
|
_getmethods = {
|
|
'conformers':'GetConformers',
|
|
# 'coords':'GetCoordinates', you can access the coordinates the atoms elsewhere
|
|
# 'data':'GetData', has been removed
|
|
'dim':'GetDimension',
|
|
'energy':'GetEnergy',
|
|
'exactmass':'GetExactMass',
|
|
'flags':'GetFlags',
|
|
'formula':'GetFormula',
|
|
# 'internalcoord':'GetInternalCoord', # Causes SWIG warning
|
|
'mod':'GetMod',
|
|
'molwt':'GetMolWt',
|
|
'sssr':'GetSSSR',
|
|
'title':'GetTitle',
|
|
'charge':'GetTotalCharge',
|
|
'spin':'GetTotalSpinMultiplicity'
|
|
}
|
|
|
|
def __init__(self, OBMol=None):
|
|
|
|
self.OBMol = OBMol
|
|
if not self.OBMol:
|
|
self.OBMol = ob.OBMol()
|
|
|
|
def __getattr__(self, attr):
|
|
"""Return the value of an attribute
|
|
|
|
Note: The values are calculated on-the-fly. You may want to store the value in
|
|
a variable if you repeatedly access the same attribute.
|
|
"""
|
|
# This function is not accessed in the case of OBMol
|
|
if attr == "atoms":
|
|
# Create an atoms attribute on-the-fly
|
|
return [ Atom(self.OBMol.GetAtom(i+1),i+1) for i in range(self.OBMol.NumAtoms()) ]
|
|
elif attr == "data":
|
|
# Create a data attribute on-the-fly
|
|
return MoleculeData(self.OBMol)
|
|
elif attr == "unitcell":
|
|
# Create a unitcell attribute on-th-fly
|
|
unitcell = self.OBMol.GetData(ob.UnitCell)
|
|
if unitcell:
|
|
return ob.toUnitCell(unitcell)
|
|
else:
|
|
raise AttributeError, "Molecule has no attribute 'unitcell'"
|
|
elif attr in self._getmethods:
|
|
# Call the OB Method to find the attribute value
|
|
return getattr(self.OBMol, self._getmethods[attr])()
|
|
else:
|
|
raise AttributeError, "Molecule has no attribute '%s'" % attr
|
|
|
|
def __iter__(self):
|
|
"""Iterate over the Atoms of the Molecule.
|
|
|
|
This allows constructions such as the following:
|
|
for atom in mymol:
|
|
print atom
|
|
"""
|
|
for atom in self.atoms:
|
|
yield atom
|
|
|
|
def calcdesc(self, descnames=[]):
|
|
"""Calculate descriptor values.
|
|
|
|
Optional parameter:
|
|
descnames -- a list of names of descriptors
|
|
|
|
If descnames is not specified, the full list of Open Babel
|
|
descriptors is calculated: LogP, PSA and MR.
|
|
"""
|
|
if not descnames:
|
|
descnames = descriptors.keys()
|
|
ans = {}
|
|
for descname in descnames:
|
|
try:
|
|
desc = descriptors[descname]
|
|
except KeyError:
|
|
raise ValueError, "%s is not a recognised Open Babel descriptor type" % descname
|
|
ans[descname] = desc.Predict(self.OBMol)
|
|
return ans
|
|
|
|
def calcfp(self, fptype="FP2"):
|
|
"""Calculate a molecular fingerprint.
|
|
|
|
Optional parameters:
|
|
fptype -- the name of the Open Babel fingerprint type.
|
|
|
|
If fptype is not specified, the FP2 fingerprint type
|
|
is used. See the Open Babel library documentation for more
|
|
details.
|
|
"""
|
|
fp = ob.vectorUnsignedInt()
|
|
try:
|
|
fingerprinter = fingerprinters[fptype]
|
|
except KeyError:
|
|
raise ValueError, "%s is not a recognised Open Babel Fingerprint type" % fptype
|
|
fingerprinter.GetFingerprint(self.OBMol, fp)
|
|
return Fingerprint(fp)
|
|
|
|
def write(self, format="SMI", filename=None, overwrite=False):
|
|
"""Write the molecule to a file or return a string.
|
|
|
|
Optional parameters:
|
|
format -- default is "SMI"
|
|
filename -- default is None
|
|
overwite -- default is False
|
|
|
|
If a filename is specified, the result is written to a file.
|
|
Otherwise, a string is returned containing the result.
|
|
The overwrite flag is ignored if a filename is not specified.
|
|
It controls whether to overwrite an existing file.
|
|
"""
|
|
|
|
obconversion = ob.OBConversion()
|
|
formatok = obconversion.SetOutFormat(format)
|
|
if not formatok:
|
|
raise ValueError,"%s is not a recognised OpenBabel format" % format
|
|
|
|
if filename:
|
|
if not overwrite and os.path.isfile(filename):
|
|
raise IOError, "%s already exists. Use 'overwrite=True' to overwrite it." % filename
|
|
obconversion.WriteFile(self.OBMol,filename)
|
|
else:
|
|
return obconversion.WriteString(self.OBMol)
|
|
|
|
def __str__(self):
|
|
return self.write()
|
|
|
|
|
|
class Atom(object):
|
|
"""Represent a Pybel atom.
|
|
|
|
Optional parameters:
|
|
OBAtom -- an Open Babel Atom (default is None)
|
|
index -- the index of the atom in the molecule (default is None)
|
|
|
|
An empty Atom is created if an Open Babel atom is not provided.
|
|
|
|
Attributes:
|
|
atomicmass, atomicnum, cidx, coords, coordidx, exactmass,
|
|
formalcharge, heavyvalence, heterovalence, hyb, idx,
|
|
implicitvalence, index, isotope, partialcharge, spin, type,
|
|
valence, vector.
|
|
|
|
(refer to the Open Babel library documentation for more info).
|
|
|
|
The original Open Babel atom can be accessed using the attribute:
|
|
OBAtom
|
|
"""
|
|
|
|
_getmethods = {
|
|
'atomicmass':'GetAtomicMass',
|
|
'atomicnum':'GetAtomicNum',
|
|
'cidx':'GetCIdx',
|
|
'coordidx':'GetCoordinateIdx',
|
|
# 'data':'GetData', has been removed
|
|
'exactmass':'GetExactMass',
|
|
'formalcharge':'GetFormalCharge',
|
|
'heavyvalence':'GetHvyValence',
|
|
'heterovalence':'GetHeteroValence',
|
|
'hyb':'GetHyb',
|
|
'idx':'GetIdx',
|
|
'implicitvalence':'GetImplicitValence',
|
|
'isotope':'GetIsotope',
|
|
'partialcharge':'GetPartialCharge',
|
|
'spin':'GetSpinMultiplicity',
|
|
'type':'GetType',
|
|
'valence':'GetValence',
|
|
'vector':'GetVector',
|
|
}
|
|
|
|
def __init__(self, OBAtom=None, index=None):
|
|
if not OBAtom:
|
|
OBAtom = ob.OBAtom()
|
|
self.OBAtom = OBAtom
|
|
# For the moment, I will remember the index of the atom in the molecule...
|
|
# I'm not sure if this is useful, though.
|
|
self.index = index
|
|
|
|
def __getattr__(self, attr):
|
|
if attr == "coords":
|
|
return (self.OBAtom.GetX(), self.OBAtom.GetY(), self.OBAtom.GetZ())
|
|
elif attr in self._getmethods:
|
|
return getattr(self.OBAtom, self._getmethods[attr])()
|
|
else:
|
|
raise AttributeError, "Molecule has no attribute %s" % attr
|
|
|
|
def __str__(self):
|
|
"""Create a string representation of the atom.
|
|
|
|
>>> a = Atom()
|
|
>>> print a
|
|
Atom: 0 (0.0, 0.0, 0.0)
|
|
"""
|
|
return "Atom: %d %s" % (self.atomicnum, self.coords.__str__())
|
|
|
|
def findbits(fp, bitsperint):
|
|
"""Find which bits are set in a list/vector.
|
|
|
|
This function is used by the Fingerprint class.
|
|
|
|
>>> findbits([13, 71], 8)
|
|
[1, 3, 4, 9, 10, 11, 15]
|
|
"""
|
|
ans = []
|
|
start = 1
|
|
for x in fp:
|
|
i = start
|
|
while x > 0:
|
|
if x % 2:
|
|
ans.append(i)
|
|
x >>= 1
|
|
i += 1
|
|
start += bitsperint
|
|
return ans
|
|
|
|
class Fingerprint(object):
|
|
"""A Molecular Fingerprint.
|
|
|
|
Required parameters:
|
|
obFingerprint -- a vector calculated by OBFingerprint.FindFingerprint()
|
|
|
|
Attributes:
|
|
fp -- the original obFingerprint
|
|
bits -- a list of bits set in the Fingerprint
|
|
|
|
Methods:
|
|
The "|" operator can be used to calculate the Tanimoto coeff. For example,
|
|
given two Fingerprints 'a', and 'b', the Tanimoto coefficient is given by:
|
|
tanimoto = a | b
|
|
"""
|
|
def __init__(self, obFingerprint):
|
|
self.fp = obFingerprint
|
|
def __or__(self, other):
|
|
return ob.OBFingerprint.Tanimoto(self.fp, other.fp)
|
|
def __getattr__(self, attr):
|
|
if attr == "bits":
|
|
# Create a bits attribute on-the-fly
|
|
return findbits(self.fp, ob.OBFingerprint.Getbitsperint())
|
|
else:
|
|
raise AttributeError, "Molecule has no attribute %s" % attr
|
|
def __str__(self):
|
|
return ", ".join([str(x) for x in self.fp])
|
|
|
|
class Smarts(object):
|
|
"""A Smarts Pattern Matcher
|
|
|
|
Required parameters:
|
|
smartspattern
|
|
|
|
Methods:
|
|
findall()
|
|
|
|
Example:
|
|
>>> mol = readstring("smi","CCN(CC)CC") # triethylamine
|
|
>>> smarts = Smarts("[#6][#6]") # Matches an ethyl group
|
|
>>> print smarts.findall(mol)
|
|
[(1, 2), (4, 5), (6, 7)]
|
|
"""
|
|
def __init__(self,smartspattern):
|
|
"""Initialise with a SMARTS pattern."""
|
|
self.obsmarts = ob.OBSmartsPattern()
|
|
self.obsmarts.Init(smartspattern)
|
|
def findall(self,molecule):
|
|
"""Find all matches of the SMARTS pattern to a particular molecule.
|
|
|
|
Required parameters:
|
|
molecule
|
|
"""
|
|
self.obsmarts.Match(molecule.OBMol)
|
|
return [x for x in self.obsmarts.GetUMapList()]
|
|
|
|
class MoleculeData(object):
|
|
"""Store molecule data in a dictionary-type object
|
|
|
|
Required parameters:
|
|
obmol -- an Open Babel OBMol
|
|
|
|
Methods and accessor methods are like those of a dictionary except
|
|
that the data is retrieved on-the-fly from the underlying OBMol.
|
|
|
|
Example:
|
|
>>> mol = readfile("sdf", 'head.sdf').next()
|
|
>>> data = mol.data
|
|
>>> print data
|
|
{'Comment': 'CORINA 2.61 0041 25.10.2001', 'NSC': '1'}
|
|
>>> print len(data), data.keys(), data.has_key("NSC")
|
|
2 ['Comment', 'NSC'] True
|
|
>>> print data['Comment']
|
|
CORINA 2.61 0041 25.10.2001
|
|
>>> data['Comment'] = 'This is a new comment'
|
|
>>> for k,v in data.iteritems():
|
|
... print k, "-->", v
|
|
Comment --> This is a new comment
|
|
NSC --> 1
|
|
>>> del data['NSC']
|
|
>>> print len(data), data.keys(), data.has_key("NSC")
|
|
1 ['Comment'] False
|
|
"""
|
|
def __init__(self, obmol):
|
|
self._mol = obmol
|
|
def _data(self):
|
|
return [ob.toPairData(x) for x in self._mol.GetData() if x.GetDataType()==ob.PairData or x.GetDataType()==ob.CommentData]
|
|
def _testforkey(self, key):
|
|
if not key in self:
|
|
raise KeyError, "'%s'" % key
|
|
def keys(self):
|
|
return [x.GetAttribute() for x in self._data()]
|
|
def values(self):
|
|
return [x.GetValue() for x in self._data()]
|
|
def items(self):
|
|
return zip(self.keys(), self.values())
|
|
def __iter__(self):
|
|
return iter(self.keys())
|
|
def iteritems(self):
|
|
return iter(self.items())
|
|
def __len__(self):
|
|
return len(self._data())
|
|
def __contains__(self, key):
|
|
return self._mol.HasData(key)
|
|
def __delitem__(self, key):
|
|
self._testforkey(key)
|
|
self._mol.DeleteData(self._mol.GetData(key))
|
|
def clear(self):
|
|
for key in self:
|
|
del self[key]
|
|
def has_key(self, key):
|
|
return key in self
|
|
def update(self, dictionary):
|
|
for k, v in dictionary.iteritems():
|
|
self[k] = v
|
|
def __getitem__(self, key):
|
|
self._testforkey(key)
|
|
return ob.toPairData(self._mol.GetData(key)).GetValue()
|
|
def __setitem__(self, key, value):
|
|
if key in self:
|
|
pairdata = ob.toPairData(self._mol.GetData(key))
|
|
pairdata.SetValue(str(value))
|
|
else:
|
|
pairdata = ob.OBPairData()
|
|
pairdata.SetAttribute(key)
|
|
pairdata.SetValue(str(value))
|
|
pairdata.thisown = 0 # So that SWIG Proxy will not delete pairdata
|
|
self._mol.SetData(pairdata)
|
|
def __repr__(self):
|
|
return dict(self.iteritems()).__repr__()
|
|
|
|
if __name__=="__main__": #pragma: no cover
|
|
import doctest
|
|
doctest.testmod(verbose=True)
|