mirror of
https://github.com/openbabel/openbabel.git
synced 2025-02-25 18:55:23 -06:00
Add description of functional groups to fingerprintformat and tidy up fingerprints FP3 and FP4
This commit is contained in:
@@ -1,3 +1,6 @@
|
||||
2007-08-30 Chris Morley
|
||||
|
||||
|
||||
2007-08-27 Jean Brefort <jean@acer>
|
||||
|
||||
* include/openbabel/generic.h: make OBUnitCell use SpaceGroup.
|
||||
|
||||
@@ -2,9 +2,11 @@
|
||||
# SMARTS Patterns for Functional Group Classification
|
||||
#
|
||||
# written by Christian Laggner
|
||||
# Copyright Inte:Ligand Software-Entwicklungs und Consulting GmbH
|
||||
# Released under the Lesser General Public License (LGPL license)
|
||||
# http://www.gnu.org/copyleft/lesser.html
|
||||
# Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH
|
||||
#
|
||||
# Released under the Lesser General Public License (LGPL license)
|
||||
# see http://www.gnu.org/copyleft/lesser.html
|
||||
# Modified from Version 221105
|
||||
#####################################################################################################
|
||||
|
||||
# General Stuff:
|
||||
@@ -31,7 +33,7 @@ Secondary_carbon: [CX4H2]([#6])[#6]
|
||||
|
||||
Tertiary_carbon: [CX4H1]([#6])([#6])[#6]
|
||||
|
||||
Quartary_carbon: [CX4]([#6])([#6])([#6])[#6]
|
||||
Quaternary_carbon: [CX4]([#6])([#6])([#6])[#6]
|
||||
|
||||
|
||||
# I.1.2 C-C double and Triple Bonds
|
||||
@@ -85,7 +87,7 @@ Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
|
||||
|
||||
Diarylether: [c][OX2][c]
|
||||
|
||||
Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])
|
||||
Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
|
||||
|
||||
Diarylthioether: [c][SX2][c]
|
||||
|
||||
@@ -94,36 +96,36 @@ Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
|
||||
|
||||
# I.2.3 Amines
|
||||
|
||||
Amine: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
|
||||
# hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ...
|
||||
|
||||
# the following amines include also the protonated forms
|
||||
|
||||
Primary_aliph_amine: [NX3H2,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Secondary_aliph_amine: [NX3H1,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Tertiary_aliph_amine: [NX3H0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Quartary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Quaternary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Primary_arom_amine: [NX3H2,NX4H3+]c
|
||||
Primary_arom_amine: [NX3H2+0,NX4H3+]c
|
||||
|
||||
Secondary_arom_amine: [NX3H1,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Tertiary_arom_amine: [NX3H0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Quartary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Quaternary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Secondary_mixed_amine: [NX3H1,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Tertiary_mixed_amine: [NX3H0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Quartary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])]
|
||||
Quaternary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])]
|
||||
|
||||
Ammonium: [N+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
|
||||
# only C and H substituents allowed. NX4+ or Nv4+ is not recognized by Daylight's
|
||||
# depictmatch if less than four C are present...
|
||||
Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])]
|
||||
# only C and H substituents allowed. Quaternary or protonated amines
|
||||
# NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present
|
||||
|
||||
|
||||
# I.2.4 Others
|
||||
@@ -177,7 +179,7 @@ Thioketone: [#6][CX3](=[SX1])[#6]
|
||||
Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])]
|
||||
# nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar
|
||||
|
||||
Immonium: [N+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])]]
|
||||
Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])]
|
||||
|
||||
Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H]
|
||||
|
||||
@@ -337,13 +339,13 @@ Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=
|
||||
Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])]
|
||||
# may also be part of a ring / aromatic
|
||||
|
||||
Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH],$([C][#6])]=[NX2;!$(NC=[O,S])]
|
||||
Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])]
|
||||
# only basic amidines, not as part of aromatic ring (e.g. imidazole)
|
||||
|
||||
Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])]
|
||||
|
||||
Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])])]
|
||||
# #does not hit anhydrides of carboxylic acids withs hydroxamic acids
|
||||
Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])]
|
||||
# does not hit anhydrides of carboxylic acids withs hydroxamic acids
|
||||
|
||||
|
||||
Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
|
||||
@@ -483,15 +485,15 @@ Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
|
||||
|
||||
Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1]
|
||||
|
||||
Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7]))=[OX1]
|
||||
Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
|
||||
|
||||
Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1]
|
||||
|
||||
Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])=[SX1]
|
||||
Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
|
||||
|
||||
Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1]
|
||||
|
||||
Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])=[SX1]
|
||||
Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
|
||||
|
||||
Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1]
|
||||
|
||||
@@ -588,7 +590,7 @@ Diazonium: [#6][NX2+]#[NX1]
|
||||
|
||||
Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1]
|
||||
|
||||
Nitrosamide: [NX2](=[OX1])[N-*=O]
|
||||
Nitrosamide: [NX2](=[OX1])N-*=O
|
||||
# includes nitrososulfonamides
|
||||
|
||||
N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])]
|
||||
@@ -680,7 +682,7 @@ Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6]
|
||||
Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])]
|
||||
# similar to amine, but less restrictive: includes also amide- and aminal-analogues
|
||||
|
||||
Phosphine_oxide: [PX4;$([H3]=[OX]),$([H2](=[OX])[#6]),$([H1](=[OX])([#6])[#6]),$([H0](=[OX])([#6])([#6])[#6])]
|
||||
Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])]
|
||||
|
||||
Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])]
|
||||
# similar to Ammonium
|
||||
@@ -807,7 +809,7 @@ Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6]
|
||||
Borohydride: [BH1,BH2,BH3,BH4]
|
||||
# at least one H attached to B
|
||||
|
||||
Quartary_boron: [BX4]
|
||||
Quaternary_boron: [BX4]
|
||||
# mostly borates (negative charge), in complex with Lewis-base
|
||||
|
||||
|
||||
@@ -838,7 +840,9 @@ NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1
|
||||
# toxic/reactive according to Maybridge's garbage filter
|
||||
|
||||
Spiro: [D4R;$(*(@*)(@*)(@*)@*)]
|
||||
# at least two different rings can be found which are sharing just one atom
|
||||
# at least two different rings can be found which are sharing just one atom.
|
||||
# these two rings can be connected by a third ring, so it matches also some
|
||||
# bridged systems, like morphine
|
||||
|
||||
Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]
|
||||
# two different rings sharing exactly two atoms
|
||||
@@ -881,10 +885,10 @@ Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C
|
||||
Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
|
||||
# 5 or 6-membered cyclic hemi-acetal
|
||||
|
||||
# Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)])
|
||||
##Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)])
|
||||
# pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!)
|
||||
|
||||
# Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)])
|
||||
##Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)])
|
||||
# pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!)
|
||||
|
||||
|
||||
@@ -917,21 +921,21 @@ Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX
|
||||
C_ONS_bond: [#6]~[#7,#8,#16]
|
||||
# probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter
|
||||
|
||||
# Mixture: (*).(*)
|
||||
## Mixture: (*).(*)
|
||||
# two or more seperate parts, may also be salt
|
||||
# Does not work with Openbabel 2.x (no component-level grouping)
|
||||
# component-level grouping is not yet supported in Open Babel Version 2.0
|
||||
|
||||
|
||||
Charged: [!+0]
|
||||
|
||||
Anion: [-1,-2,-3,-4,-5,-6,-7]
|
||||
|
||||
Cation: [+1,+2,+3,+4,+5,+6,+7]
|
||||
Kation: [+1,+2,+3,+4,+5,+6,+7]
|
||||
|
||||
# Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7])
|
||||
Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7])
|
||||
# two or more seperate components with opposite charges
|
||||
|
||||
# Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7])
|
||||
##Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7])
|
||||
# both negative and positive charges somewhere within the same molecule.
|
||||
|
||||
1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)]
|
||||
@@ -973,5 +977,7 @@ Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$(
|
||||
# Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string
|
||||
# depictmach does not find oxonium, sulfonium, or sulfoxides!
|
||||
|
||||
Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)]
|
||||
# Hits atoms with tetrahedral chirality, even if chiral center is not specified in the SMILES string
|
||||
# Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)]
|
||||
# Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string
|
||||
# "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0
|
||||
|
||||
@@ -12,7 +12,9 @@
|
||||
# SMARTS Patterns are used by finger3.cpp:PatternFP #
|
||||
# Format of each line is a SMARTS pattern, then optionally #
|
||||
# followed by a tab character and a pattern number and/or description #
|
||||
# (everything after the tab will be ignored by the code #
|
||||
# (everything after the tab will be ignored by the code) #
|
||||
# A file of this format needs the same first line as this one. #
|
||||
# An alternative format, as in SMARTS_InteLigand.txt, can also be used #
|
||||
# #
|
||||
# INCOMPLETE!! Really only useful to test the fingerprint FP3 #
|
||||
##############################################################################
|
||||
|
||||
@@ -53,9 +53,12 @@ public:
|
||||
virtual ~OBFingerprint(){}
|
||||
|
||||
/// Sets the nth bit
|
||||
void SetBit(std::vector<unsigned int>& vec, unsigned int n);
|
||||
void SetBit(std::vector<unsigned int>& vec, const unsigned int n);
|
||||
|
||||
/// Repeatedly ORs the top half with the bottom half until no smaller than nbits
|
||||
///return true if the nth bit is set;
|
||||
bool GetBit(const std::vector<unsigned int>& vec, const unsigned int n);
|
||||
|
||||
/// Repeatedly ORs the top half with the bottom half until no smaller than nbits
|
||||
void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
|
||||
|
||||
/// \return fingerprint in vector, which may be resized, folded to nbits (if nbits!=0)
|
||||
@@ -65,6 +68,13 @@ public:
|
||||
enum FptFlag{FPT_UNIQUEBITS=1};
|
||||
virtual unsigned int Flags() { return 0;};
|
||||
|
||||
/// Returns a description of each bit that is set (or unset, if bSet=false)
|
||||
virtual std::string DescribeBits(const std:: vector<unsigned int> fp, bool bSet=true)
|
||||
{
|
||||
std::string txt("Bit descriptions are not available for this fingerprint type");
|
||||
return txt;
|
||||
}
|
||||
|
||||
/// \return the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions)
|
||||
static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
|
||||
|
||||
|
||||
@@ -33,11 +33,17 @@ namespace OpenBabel
|
||||
|
||||
const unsigned int OBFingerprint::bitsperint = 8 * sizeof(unsigned int);
|
||||
|
||||
void OBFingerprint::SetBit(vector<unsigned int>& vec, unsigned int n)
|
||||
void OBFingerprint::SetBit(vector<unsigned int>& vec, const unsigned int n)
|
||||
{
|
||||
vec[n/Getbitsperint()] |= (1 << (n % Getbitsperint()));
|
||||
}
|
||||
|
||||
bool OBFingerprint::GetBit(const vector<unsigned int>& vec, const unsigned int n)
|
||||
{
|
||||
unsigned int word =vec[n/Getbitsperint()];
|
||||
return (word &= (1 << (n % Getbitsperint())))!=0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////
|
||||
void OBFingerprint::Fold(vector<unsigned int>& vec, unsigned int nbits)
|
||||
{
|
||||
|
||||
@@ -31,65 +31,65 @@ namespace OpenBabel
|
||||
class PatternFP : public OBFingerprint
|
||||
{
|
||||
private:
|
||||
vector<string> smartsStrings;
|
||||
vector<string> smartsStrings;
|
||||
protected:
|
||||
string _patternsfile;
|
||||
string _patternsfile;
|
||||
|
||||
public:
|
||||
PatternFP(const char* ID, const char* filename=NULL,
|
||||
bool IsDefault=false) : OBFingerprint(ID, IsDefault)
|
||||
{
|
||||
if(filename==NULL)
|
||||
_patternsfile="patterns.txt";
|
||||
else
|
||||
_patternsfile = filename;
|
||||
};
|
||||
|
||||
virtual const char* Description()
|
||||
{
|
||||
static string desc;
|
||||
PatternFP(const char* ID, const char* filename=NULL,
|
||||
bool IsDefault=false) : OBFingerprint(ID, IsDefault)
|
||||
{
|
||||
if(filename==NULL)
|
||||
_patternsfile="patterns.txt";
|
||||
else
|
||||
_patternsfile = filename;
|
||||
};
|
||||
|
||||
virtual const char* Description()
|
||||
{
|
||||
static string desc;
|
||||
desc = "SMARTS patterns specified in the file " + _patternsfile;
|
||||
return (desc.c_str());
|
||||
};
|
||||
};
|
||||
|
||||
//Each bits represents a single substructure; no need for confirmation when substructure searching
|
||||
virtual unsigned int Flags() { return FPT_UNIQUEBITS;};
|
||||
//Each bit represents a single substructure; no need for confirmation when substructure searching
|
||||
virtual unsigned int Flags() { return FPT_UNIQUEBITS;};
|
||||
|
||||
bool GetFingerprint(OBBase* pOb, vector<unsigned int>&fp, int nbits)
|
||||
{
|
||||
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
|
||||
if(!pmol)
|
||||
return false;
|
||||
|
||||
//Read patterns file if it has not been done already
|
||||
if(smartsStrings.empty())
|
||||
ReadPatternFile(_patternsfile, smartsStrings);
|
||||
bool GetFingerprint(OBBase* pOb, vector<unsigned int>&fp, int nbits)
|
||||
{
|
||||
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
|
||||
if(!pmol)
|
||||
return false;
|
||||
|
||||
//Read patterns file if it has not been done already
|
||||
if(smartsStrings.empty())
|
||||
ReadPatternFile(_patternsfile, smartsStrings);
|
||||
|
||||
//Make fp size the smallest power of two to contain the patterns
|
||||
unsigned int n=Getbitsperint();
|
||||
while(n<smartsStrings.size())n*=2;
|
||||
fp.resize(n/Getbitsperint());
|
||||
//Make fp size the smallest power of two to contain the patterns
|
||||
unsigned int n=Getbitsperint();
|
||||
while(n<smartsStrings.size())n*=2;
|
||||
fp.resize(n/Getbitsperint());
|
||||
|
||||
for(n=0;n<smartsStrings.size();++n)
|
||||
{
|
||||
OBSmartsPattern sp;
|
||||
sp.Init(smartsStrings[n]);
|
||||
if(sp.Match(*pmol))
|
||||
SetBit(fp, n);
|
||||
}
|
||||
for(n=0;n<smartsStrings.size();++n)
|
||||
{
|
||||
OBSmartsPattern sp;
|
||||
sp.Init(smartsStrings[n]);
|
||||
if(sp.Match(*pmol))
|
||||
SetBit(fp, n);
|
||||
}
|
||||
|
||||
if(nbits)
|
||||
Fold(fp, nbits);
|
||||
return true;
|
||||
};
|
||||
if(nbits)
|
||||
Fold(fp, nbits);
|
||||
return true;
|
||||
};
|
||||
|
||||
bool ReadPatternFile(const string& filename, vector<string>& lines)
|
||||
{
|
||||
//Reads two types of file: SMARTS + comments and vice versa
|
||||
//depending on whether the first line is #Comments after SMARTS
|
||||
//Output strings in vector are SMARTS + comments
|
||||
string file = filename;
|
||||
ifstream ifs;
|
||||
bool ReadPatternFile(const string& filename, vector<string>& lines)
|
||||
{
|
||||
//Reads two types of file: SMARTS + comments and vice versa
|
||||
//depending on whether the first line is #Comments after SMARTS
|
||||
//Output strings in vector are SMARTS + comments
|
||||
string file = filename;
|
||||
ifstream ifs;
|
||||
#ifdef HAVE_SSTREAM
|
||||
stringstream errorMsg;
|
||||
#else
|
||||
@@ -102,49 +102,66 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!(ifs))
|
||||
{
|
||||
errorMsg << "Cannot open " << filename << endl;
|
||||
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
|
||||
return false;
|
||||
}
|
||||
string smarts, formatline;
|
||||
if(!(ifs))
|
||||
{
|
||||
errorMsg << "Cannot open " << filename << endl;
|
||||
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
|
||||
return false;
|
||||
}
|
||||
string smarts, formatline;
|
||||
|
||||
if(!getline(ifs, formatline)) return false;
|
||||
if(formatline=="#Comments after SMARTS")
|
||||
{
|
||||
while(ifs.good())
|
||||
{
|
||||
if( getline(ifs,smarts)
|
||||
&& smarts.size() > 0
|
||||
&& smarts[0] != '#')
|
||||
lines.push_back(smarts); //leave the comments in
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Christian Laggner's format: SMARTS at end of line
|
||||
while(ifs.good())
|
||||
{
|
||||
if( getline(ifs,smarts) && smarts[0]!='#')
|
||||
{
|
||||
if(!getline(ifs, formatline)) return false;
|
||||
if(Trim(formatline)=="#Comments after SMARTS")
|
||||
{
|
||||
while(ifs.good())
|
||||
{
|
||||
if( getline(ifs,smarts)
|
||||
&& Trim(smarts).size() > 0
|
||||
&& smarts[0] != '#')
|
||||
lines.push_back(smarts); //leave the comments in
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Christian Laggner's format: SMARTS at end of line
|
||||
while(ifs.good())
|
||||
{
|
||||
if( getline(ifs,smarts) && smarts[0]!='#')
|
||||
{
|
||||
string::size_type pos = smarts.find(':');
|
||||
if(pos!=string::npos)
|
||||
{
|
||||
pos = smarts.find_first_not_of(" \t", pos+1);
|
||||
if(pos!=string::npos)
|
||||
lines.push_back(smarts.substr(pos) + ' ' + smarts.substr(0,pos));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(pos!=string::npos)
|
||||
{
|
||||
pos = smarts.find_first_not_of(" \t", pos+1);
|
||||
if(pos!=string::npos)
|
||||
lines.push_back(Trim(smarts.substr(pos)) + ' ' + smarts.substr(0,pos));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ifs)
|
||||
ifs.close();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
virtual string DescribeBits(const vector<unsigned int> fp, bool bSet=true)
|
||||
{
|
||||
stringstream ss;
|
||||
ss << "out of possible " << smartsStrings.size();
|
||||
for(int i=0; i<smartsStrings.size(); ++i)
|
||||
{
|
||||
if(GetBit(fp, i)==bSet)
|
||||
{
|
||||
string::size_type pos = smartsStrings[i].find(' ');
|
||||
if(pos!=string::npos)
|
||||
ss << '\n' << smartsStrings[i].substr(pos+1);
|
||||
}
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
};
|
||||
//***********************************************
|
||||
//Make a global instance
|
||||
PatternFP thePatternFP("FP3");
|
||||
|
||||
@@ -32,15 +32,16 @@ namespace OpenBabel
|
||||
|
||||
virtual const char* Description() //required
|
||||
{ return
|
||||
"Fingerprint format\n \
|
||||
Constructs and displays fingerprints and (for multiple input objects)\n \
|
||||
the Tanimoto coefficient and whether a superstructure of the first object\n \
|
||||
Options e.g. -xfFP3 -xn128\n \
|
||||
f<id> fingerprint type\n \
|
||||
N# fold to specified number of bits, 32, 64, 128, etc.\n \
|
||||
h hex output when multiple molecules\n \
|
||||
F displays the available fingerprint types\n \
|
||||
";
|
||||
"Fingerprint format\n"
|
||||
"Constructs and displays fingerprints and (for multiple input objects)\n"
|
||||
"the Tanimoto coefficient and whether a superstructure of the first object\n"
|
||||
"Output options e.g. -xfFP3 -xN128\n"
|
||||
" f<id> fingerprint type\n"
|
||||
" N# fold to specified number of bits, 32, 64, 128, etc.\n"
|
||||
" h hex output when multiple molecules\n"
|
||||
" s describe each set bit\n"
|
||||
" u describe each unset bit\n"
|
||||
;
|
||||
};
|
||||
|
||||
virtual unsigned int Flags(){return NOTREADABLE;};
|
||||
@@ -60,13 +61,6 @@ Options e.g. -xfFP3 -xn128\n \
|
||||
bool FingerprintFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
|
||||
{
|
||||
ostream &ofs = *pConv->GetOutStream();
|
||||
/* if(pConv->IsOption("F"))
|
||||
{
|
||||
FOR_EACH(OBFingerprint, iter)
|
||||
ofs << iter.ID() << " -- " << iter->Description() << endl;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
bool hexoutput=false;
|
||||
if(pConv->IsOption("h") || (pConv->GetOutputIndex()==1 && pConv->IsLast()))
|
||||
@@ -111,7 +105,7 @@ Options e.g. -xfFP3 -xn128\n \
|
||||
for(;wd;wd=wd<<1)//count bits set by shifting into sign bit until word==0
|
||||
if(wd<0) ++bitsset;
|
||||
}
|
||||
ofs << " " << bitsset << " bits set. ";
|
||||
ofs << " " << bitsset << " bits set ";
|
||||
}
|
||||
|
||||
if(pConv->GetOutputIndex()==1)
|
||||
@@ -122,7 +116,13 @@ Options e.g. -xfFP3 -xn128\n \
|
||||
firstname=pmol->GetTitle();
|
||||
if(firstname.empty())
|
||||
firstname = "first mol";
|
||||
}
|
||||
|
||||
if(pConv->IsOption("s"))
|
||||
ofs << pFP->DescribeBits(fptvec);
|
||||
if(pConv->IsOption("u"))
|
||||
ofs << pFP->DescribeBits(fptvec, false);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
ofs << " Tanimoto from " << firstname << " = " << OBFingerprint::Tanimoto(firstfp, fptvec);
|
||||
|
||||
@@ -24,6 +24,7 @@ GNU General Public License for more details.
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <stdarg.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user