Add description of functional groups to fingerprintformat and tidy up fingerprints FP3 and FP4

This commit is contained in:
Chris Morley
2007-08-30 13:41:36 +00:00
parent b072abea4f
commit ed0bee278b
8 changed files with 192 additions and 147 deletions

View File

@@ -1,3 +1,6 @@
2007-08-30 Chris Morley
2007-08-27 Jean Brefort <jean@acer>
* include/openbabel/generic.h: make OBUnitCell use SpaceGroup.

View File

@@ -2,9 +2,11 @@
# SMARTS Patterns for Functional Group Classification
#
# written by Christian Laggner
# Copyright Inte:Ligand Software-Entwicklungs und Consulting GmbH
# Released under the Lesser General Public License (LGPL license)
# http://www.gnu.org/copyleft/lesser.html
# Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH
#
# Released under the Lesser General Public License (LGPL license)
# see http://www.gnu.org/copyleft/lesser.html
# Modified from Version 221105
#####################################################################################################
# General Stuff:
@@ -31,7 +33,7 @@ Secondary_carbon: [CX4H2]([#6])[#6]
Tertiary_carbon: [CX4H1]([#6])([#6])[#6]
Quartary_carbon: [CX4]([#6])([#6])([#6])[#6]
Quaternary_carbon: [CX4]([#6])([#6])([#6])[#6]
# I.1.2 C-C double and Triple Bonds
@@ -85,7 +87,7 @@ Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
Diarylether: [c][OX2][c]
Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])
Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
Diarylthioether: [c][SX2][c]
@@ -94,36 +96,36 @@ Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
# I.2.3 Amines
Amine: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
# hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ...
# the following amines include also the protonated forms
Primary_aliph_amine: [NX3H2,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Secondary_aliph_amine: [NX3H1,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Tertiary_aliph_amine: [NX3H0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Quartary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Quaternary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
Primary_arom_amine: [NX3H2,NX4H3+]c
Primary_arom_amine: [NX3H2+0,NX4H3+]c
Secondary_arom_amine: [NX3H1,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
Tertiary_arom_amine: [NX3H0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
Quartary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
Quaternary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
Secondary_mixed_amine: [NX3H1,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])]
Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])]
Tertiary_mixed_amine: [NX3H0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])]
Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])]
Quartary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])]
Quaternary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])]
Ammonium: [N+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
# only C and H substituents allowed. NX4+ or Nv4+ is not recognized by Daylight's
# depictmatch if less than four C are present...
Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])]
# only C and H substituents allowed. Quaternary or protonated amines
# NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present
# I.2.4 Others
@@ -177,7 +179,7 @@ Thioketone: [#6][CX3](=[SX1])[#6]
Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])]
# nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar
Immonium: [N+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])]]
Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])]
Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H]
@@ -337,13 +339,13 @@ Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=
Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])]
# may also be part of a ring / aromatic
Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH],$([C][#6])]=[NX2;!$(NC=[O,S])]
Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])]
# only basic amidines, not as part of aromatic ring (e.g. imidazole)
Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])]
Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])])]
# #does not hit anhydrides of carboxylic acids withs hydroxamic acids
Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])]
# does not hit anhydrides of carboxylic acids withs hydroxamic acids
Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
@@ -483,15 +485,15 @@ Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1]
Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7]))=[OX1]
Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1]
Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])=[SX1]
Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1]
Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])=[SX1]
Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1]
@@ -588,7 +590,7 @@ Diazonium: [#6][NX2+]#[NX1]
Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1]
Nitrosamide: [NX2](=[OX1])[N-*=O]
Nitrosamide: [NX2](=[OX1])N-*=O
# includes nitrososulfonamides
N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])]
@@ -680,7 +682,7 @@ Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6]
Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])]
# similar to amine, but less restrictive: includes also amide- and aminal-analogues
Phosphine_oxide: [PX4;$([H3]=[OX]),$([H2](=[OX])[#6]),$([H1](=[OX])([#6])[#6]),$([H0](=[OX])([#6])([#6])[#6])]
Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])]
Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])]
# similar to Ammonium
@@ -807,7 +809,7 @@ Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6]
Borohydride: [BH1,BH2,BH3,BH4]
# at least one H attached to B
Quartary_boron: [BX4]
Quaternary_boron: [BX4]
# mostly borates (negative charge), in complex with Lewis-base
@@ -838,7 +840,9 @@ NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1
# toxic/reactive according to Maybridge's garbage filter
Spiro: [D4R;$(*(@*)(@*)(@*)@*)]
# at least two different rings can be found which are sharing just one atom
# at least two different rings can be found which are sharing just one atom.
# these two rings can be connected by a third ring, so it matches also some
# bridged systems, like morphine
Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]
# two different rings sharing exactly two atoms
@@ -881,10 +885,10 @@ Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C
Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
# 5 or 6-membered cyclic hemi-acetal
# Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)])
##Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)])
# pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!)
# Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)])
##Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)])
# pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!)
@@ -917,21 +921,21 @@ Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX
C_ONS_bond: [#6]~[#7,#8,#16]
# probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter
# Mixture: (*).(*)
## Mixture: (*).(*)
# two or more seperate parts, may also be salt
# Does not work with Openbabel 2.x (no component-level grouping)
# component-level grouping is not yet supported in Open Babel Version 2.0
Charged: [!+0]
Anion: [-1,-2,-3,-4,-5,-6,-7]
Cation: [+1,+2,+3,+4,+5,+6,+7]
Kation: [+1,+2,+3,+4,+5,+6,+7]
# Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7])
Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7])
# two or more seperate components with opposite charges
# Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7])
##Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7])
# both negative and positive charges somewhere within the same molecule.
1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)]
@@ -973,5 +977,7 @@ Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$(
# Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string
# depictmach does not find oxonium, sulfonium, or sulfoxides!
Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)]
# Hits atoms with tetrahedral chirality, even if chiral center is not specified in the SMILES string
# Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)]
# Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string
# "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0

View File

@@ -12,7 +12,9 @@
# SMARTS Patterns are used by finger3.cpp:PatternFP #
# Format of each line is a SMARTS pattern, then optionally #
# followed by a tab character and a pattern number and/or description #
# (everything after the tab will be ignored by the code #
# (everything after the tab will be ignored by the code) #
# A file of this format needs the same first line as this one. #
# An alternative format, as in SMARTS_InteLigand.txt, can also be used #
# #
# INCOMPLETE!! Really only useful to test the fingerprint FP3 #
##############################################################################

View File

@@ -53,9 +53,12 @@ public:
virtual ~OBFingerprint(){}
/// Sets the nth bit
void SetBit(std::vector<unsigned int>& vec, unsigned int n);
void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
/// Repeatedly ORs the top half with the bottom half until no smaller than nbits
///return true if the nth bit is set;
bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
/// Repeatedly ORs the top half with the bottom half until no smaller than nbits
void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
/// \return fingerprint in vector, which may be resized, folded to nbits (if nbits!=0)
@@ -65,6 +68,13 @@ public:
enum FptFlag{FPT_UNIQUEBITS=1};
virtual unsigned int Flags() { return 0;};
/// Returns a description of each bit that is set (or unset, if bSet=false)
virtual std::string DescribeBits(const std:: vector<unsigned int> fp, bool bSet=true)
{
std::string txt("Bit descriptions are not available for this fingerprint type");
return txt;
}
/// \return the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions)
static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);

View File

@@ -33,11 +33,17 @@ namespace OpenBabel
const unsigned int OBFingerprint::bitsperint = 8 * sizeof(unsigned int);
void OBFingerprint::SetBit(vector<unsigned int>& vec, unsigned int n)
void OBFingerprint::SetBit(vector<unsigned int>& vec, const unsigned int n)
{
vec[n/Getbitsperint()] |= (1 << (n % Getbitsperint()));
}
bool OBFingerprint::GetBit(const vector<unsigned int>& vec, const unsigned int n)
{
unsigned int word =vec[n/Getbitsperint()];
return (word &= (1 << (n % Getbitsperint())))!=0;
}
////////////////////////////////////////
void OBFingerprint::Fold(vector<unsigned int>& vec, unsigned int nbits)
{

View File

@@ -31,65 +31,65 @@ namespace OpenBabel
class PatternFP : public OBFingerprint
{
private:
vector<string> smartsStrings;
vector<string> smartsStrings;
protected:
string _patternsfile;
string _patternsfile;
public:
PatternFP(const char* ID, const char* filename=NULL,
bool IsDefault=false) : OBFingerprint(ID, IsDefault)
{
if(filename==NULL)
_patternsfile="patterns.txt";
else
_patternsfile = filename;
};
virtual const char* Description()
{
static string desc;
PatternFP(const char* ID, const char* filename=NULL,
bool IsDefault=false) : OBFingerprint(ID, IsDefault)
{
if(filename==NULL)
_patternsfile="patterns.txt";
else
_patternsfile = filename;
};
virtual const char* Description()
{
static string desc;
desc = "SMARTS patterns specified in the file " + _patternsfile;
return (desc.c_str());
};
};
//Each bits represents a single substructure; no need for confirmation when substructure searching
virtual unsigned int Flags() { return FPT_UNIQUEBITS;};
//Each bit represents a single substructure; no need for confirmation when substructure searching
virtual unsigned int Flags() { return FPT_UNIQUEBITS;};
bool GetFingerprint(OBBase* pOb, vector<unsigned int>&fp, int nbits)
{
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
if(!pmol)
return false;
//Read patterns file if it has not been done already
if(smartsStrings.empty())
ReadPatternFile(_patternsfile, smartsStrings);
bool GetFingerprint(OBBase* pOb, vector<unsigned int>&fp, int nbits)
{
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
if(!pmol)
return false;
//Read patterns file if it has not been done already
if(smartsStrings.empty())
ReadPatternFile(_patternsfile, smartsStrings);
//Make fp size the smallest power of two to contain the patterns
unsigned int n=Getbitsperint();
while(n<smartsStrings.size())n*=2;
fp.resize(n/Getbitsperint());
//Make fp size the smallest power of two to contain the patterns
unsigned int n=Getbitsperint();
while(n<smartsStrings.size())n*=2;
fp.resize(n/Getbitsperint());
for(n=0;n<smartsStrings.size();++n)
{
OBSmartsPattern sp;
sp.Init(smartsStrings[n]);
if(sp.Match(*pmol))
SetBit(fp, n);
}
for(n=0;n<smartsStrings.size();++n)
{
OBSmartsPattern sp;
sp.Init(smartsStrings[n]);
if(sp.Match(*pmol))
SetBit(fp, n);
}
if(nbits)
Fold(fp, nbits);
return true;
};
if(nbits)
Fold(fp, nbits);
return true;
};
bool ReadPatternFile(const string& filename, vector<string>& lines)
{
//Reads two types of file: SMARTS + comments and vice versa
//depending on whether the first line is #Comments after SMARTS
//Output strings in vector are SMARTS + comments
string file = filename;
ifstream ifs;
bool ReadPatternFile(const string& filename, vector<string>& lines)
{
//Reads two types of file: SMARTS + comments and vice versa
//depending on whether the first line is #Comments after SMARTS
//Output strings in vector are SMARTS + comments
string file = filename;
ifstream ifs;
#ifdef HAVE_SSTREAM
stringstream errorMsg;
#else
@@ -102,49 +102,66 @@ public:
return false;
}
if(!(ifs))
{
errorMsg << "Cannot open " << filename << endl;
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
return false;
}
string smarts, formatline;
if(!(ifs))
{
errorMsg << "Cannot open " << filename << endl;
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
return false;
}
string smarts, formatline;
if(!getline(ifs, formatline)) return false;
if(formatline=="#Comments after SMARTS")
{
while(ifs.good())
{
if( getline(ifs,smarts)
&& smarts.size() > 0
&& smarts[0] != '#')
lines.push_back(smarts); //leave the comments in
}
}
else
{
// Christian Laggner's format: SMARTS at end of line
while(ifs.good())
{
if( getline(ifs,smarts) && smarts[0]!='#')
{
if(!getline(ifs, formatline)) return false;
if(Trim(formatline)=="#Comments after SMARTS")
{
while(ifs.good())
{
if( getline(ifs,smarts)
&& Trim(smarts).size() > 0
&& smarts[0] != '#')
lines.push_back(smarts); //leave the comments in
}
}
else
{
// Christian Laggner's format: SMARTS at end of line
while(ifs.good())
{
if( getline(ifs,smarts) && smarts[0]!='#')
{
string::size_type pos = smarts.find(':');
if(pos!=string::npos)
{
pos = smarts.find_first_not_of(" \t", pos+1);
if(pos!=string::npos)
lines.push_back(smarts.substr(pos) + ' ' + smarts.substr(0,pos));
}
}
}
}
if(pos!=string::npos)
{
pos = smarts.find_first_not_of(" \t", pos+1);
if(pos!=string::npos)
lines.push_back(Trim(smarts.substr(pos)) + ' ' + smarts.substr(0,pos));
}
}
}
}
if (ifs)
ifs.close();
return true;
}
};
return true;
}
virtual string DescribeBits(const vector<unsigned int> fp, bool bSet=true)
{
stringstream ss;
ss << "out of possible " << smartsStrings.size();
for(int i=0; i<smartsStrings.size(); ++i)
{
if(GetBit(fp, i)==bSet)
{
string::size_type pos = smartsStrings[i].find(' ');
if(pos!=string::npos)
ss << '\n' << smartsStrings[i].substr(pos+1);
}
}
return ss.str();
}
};
//***********************************************
//Make a global instance
PatternFP thePatternFP("FP3");

View File

@@ -32,15 +32,16 @@ namespace OpenBabel
virtual const char* Description() //required
{ return
"Fingerprint format\n \
Constructs and displays fingerprints and (for multiple input objects)\n \
the Tanimoto coefficient and whether a superstructure of the first object\n \
Options e.g. -xfFP3 -xn128\n \
f<id> fingerprint type\n \
N# fold to specified number of bits, 32, 64, 128, etc.\n \
h hex output when multiple molecules\n \
F displays the available fingerprint types\n \
";
"Fingerprint format\n"
"Constructs and displays fingerprints and (for multiple input objects)\n"
"the Tanimoto coefficient and whether a superstructure of the first object\n"
"Output options e.g. -xfFP3 -xN128\n"
" f<id> fingerprint type\n"
" N# fold to specified number of bits, 32, 64, 128, etc.\n"
" h hex output when multiple molecules\n"
" s describe each set bit\n"
" u describe each unset bit\n"
;
};
virtual unsigned int Flags(){return NOTREADABLE;};
@@ -60,13 +61,6 @@ Options e.g. -xfFP3 -xn128\n \
bool FingerprintFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
{
ostream &ofs = *pConv->GetOutStream();
/* if(pConv->IsOption("F"))
{
FOR_EACH(OBFingerprint, iter)
ofs << iter.ID() << " -- " << iter->Description() << endl;
return true;
}
*/
bool hexoutput=false;
if(pConv->IsOption("h") || (pConv->GetOutputIndex()==1 && pConv->IsLast()))
@@ -111,7 +105,7 @@ Options e.g. -xfFP3 -xn128\n \
for(;wd;wd=wd<<1)//count bits set by shifting into sign bit until word==0
if(wd<0) ++bitsset;
}
ofs << " " << bitsset << " bits set. ";
ofs << " " << bitsset << " bits set ";
}
if(pConv->GetOutputIndex()==1)
@@ -122,7 +116,13 @@ Options e.g. -xfFP3 -xn128\n \
firstname=pmol->GetTitle();
if(firstname.empty())
firstname = "first mol";
}
if(pConv->IsOption("s"))
ofs << pFP->DescribeBits(fptvec);
if(pConv->IsOption("u"))
ofs << pFP->DescribeBits(fptvec, false);
}
else
{
ofs << " Tanimoto from " << firstname << " = " << OBFingerprint::Tanimoto(firstfp, fptvec);

View File

@@ -24,6 +24,7 @@ GNU General Public License for more details.
#include <map>
#include <set>
#include <vector>
#include <stdarg.h>
using namespace std;