RDKit
Open-source cheminformatics and machine learning.
AtomPairGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_ATOMPAIRGEN_H_2018_06
13 #define RD_ATOMPAIRGEN_H_2018_06
14 
17 
18 namespace RDKit {
19 namespace AtomPair {
20 using namespace AtomPairs;
21 
23  : public AtomInvariantsGenerator {
24  const bool df_includeChirality;
25  const bool df_topologicalTorsionCorrection;
26 
27  public:
28  /**
29  \brief Construct a new AtomPairAtomInvGenerator object
30 
31  \param includeChirality toggles the inclusions of bits indicating R/S
32  chirality
33  \param topologicalTorsionCorrection when set subtracts 2 from invariants
34  returned, added so TopologicalTorsionGenerator can use this
35  */
36  AtomPairAtomInvGenerator(bool includeChirality = false,
37  bool topologicalTorsionCorrection = false);
38 
39  std::vector<std::uint32_t> *getAtomInvariants(
40  const ROMol &mol) const override;
41 
42  std::string infoString() const override;
43  AtomPairAtomInvGenerator *clone() const override;
44 };
45 
46 /*!
47  \brief class that holds atom-pair fingerprint specific arguments
48 
49  */
50 template <typename OutputType>
52  : public FingerprintArguments<OutputType> {
53  public:
54  const bool df_includeChirality;
55  const bool df_use2D;
56  const unsigned int d_minDistance;
57  const unsigned int d_maxDistance;
58 
59  OutputType getResultSize() const override;
60 
61  std::string infoString() const override;
62 
63  /*!
64  \brief construct a new AtomPairArguments object
65 
66  \param countSimulation if set, use count simulation while generating the
67  fingerprint
68  \param includeChirality if set, chirality will be used in the atom
69  invariants, this is ignored if atomInvariantsGenerator is present for
70  the /c FingerprintGenerator that uses this
71  \param use2D if set, the 2D (topological) distance matrix will be
72  used
73  \param minDistance minimum distance between atoms to be considered in a
74  pair, default is 1 bond
75  \param maxDistance maximum distance between atoms to be considered in a
76  pair, default is maxPathLen-1 bonds
77  \param countBounds boundaries for count simulation, corresponding bit
78  will be set if the count is higher than the number provided for that spot
79  \param fpSize size of the generated fingerprint, does not affect the sparse
80  versions
81 
82  */
83  AtomPairArguments(const bool countSimulation = true,
84  const bool includeChirality = false,
85  const bool use2D = true, const unsigned int minDistance = 1,
86  const unsigned int maxDistance = (maxPathLen - 1),
87  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
88  const std::uint32_t fpSize = 2048);
89 };
90 
91 /*!
92  \brief class that holds atom-environment data needed for atom-pair fingerprint
93  generation
94 
95  */
96 template <typename OutputType>
98  : public AtomEnvironment<OutputType> {
99  const unsigned int d_atomIdFirst;
100  const unsigned int d_atomIdSecond;
101  const unsigned int d_distance;
102 
103  public:
105  const std::vector<std::uint32_t> *atomInvariants,
106  const std::vector<std::uint32_t> *bondInvariants,
107  const AdditionalOutput *additionalOutput,
108  const bool hashResults = false,
109  const std::uint64_t fpSize = 0) const override;
110 
111  /*!
112  \brief construct a new AtomPairAtomEnv object
113 
114  \param atomIdFirst id of the first atom of the atom-pair
115  \param atomIdSecond id of the second atom of the atom-pair
116  \param distance distance between the atoms
117  */
118  AtomPairAtomEnv(const unsigned int atomIdFirst,
119  const unsigned int atomIdSecond, const unsigned int distance);
120 };
121 
122 /*!
123  \brief class that generates atom-environments for atom-pair fingerprint
124 
125  */
126 template <typename OutputType>
128  : public AtomEnvironmentGenerator<OutputType> {
129  public:
130  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
131  const ROMol &mol, FingerprintArguments<OutputType> *arguments,
132  const std::vector<std::uint32_t> *fromAtoms,
133  const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
134  const AdditionalOutput *additionalOutput,
135  const std::vector<std::uint32_t> *atomInvariants,
136  const std::vector<std::uint32_t> *bondInvariants,
137  const bool hashResults = false) const override;
138 
139  std::string infoString() const override;
140 };
141 
142 /*!
143  \brief helper function that generates a /c FingerprintGenerator that generates
144  atom-pair fingerprints
145  \tparam OutputType determines the size of the bitIds and the result, can be 32
146  or 64 bit unsigned integer
147  \param minDistance minimum distance between atoms to be considered in a pair,
148  default is 1 bond
149  \param maxDistance maximum distance between atoms to be considered in a pair,
150  default is maxPathLen-1 bonds
151  \param includeChirality if set, chirality will be used in the atom invariants,
152  this is ignored if atomInvariantsGenerator is provided
153  \param use2D if set, the 2D (topological) distance matrix will be used
154  \param atomInvariantsGenerator atom invariants to be used during fingerprint
155  generation
156  \param useCountSimulation if set, use count simulation while generating the
157  fingerprint
158  \param countBounds boundaries for count simulation, corresponding bit will be
159  set if the count is higher than the number provided for that spot
160  \param fpSize size of the generated fingerprint, does not affect the sparse
161  versions
162  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
163  fingerprint generator
164 
165  \return FingerprintGenerator<OutputType>* that generates atom-pair
166  fingerprints
167 
168  This generator supports the following \c AdditionalOutput types:
169  - \c atomToBits : which bits each atom is involved in
170  - \c atomCounts : how many bits each atom sets
171  - \c bitInfoMap : map from bitId to (atomId1, atomId2) pairs
172 
173  */
174 template <typename OutputType>
177  const unsigned int minDistance = 1,
178  const unsigned int maxDistance = maxPathLen - 1,
179  const bool includeChirality = false, const bool use2D = true,
180  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
181  const bool useCountSimulation = true, const std::uint32_t fpSize = 2048,
182  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
183  const bool ownsAtomInvGen = false);
184 
185 } // namespace AtomPair
186 } // namespace RDKit
187 
188 #endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
class that holds atom-pair fingerprint specific arguments
AtomPairArguments(const bool countSimulation=true, const bool includeChirality=false, const bool use2D=true, const unsigned int minDistance=1, const unsigned int maxDistance=(maxPathLen - 1), const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const std::uint32_t fpSize=2048)
construct a new AtomPairArguments object
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
class that holds atom-environment data needed for atom-pair fingerprint generation
OutputType getBitId(FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
AtomPairAtomEnv(const unsigned int atomIdFirst, const unsigned int atomIdSecond, const unsigned int distance)
construct a new AtomPairAtomEnv object
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
AtomPairAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
AtomPairAtomInvGenerator(bool includeChirality=false, bool topologicalTorsionCorrection=false)
Construct a new AtomPairAtomInvGenerator object.
class that generates atom-environments for atom-pair fingerprint
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getAtomPairGenerator(const unsigned int minDistance=1, const unsigned int maxDistance=maxPathLen - 1, const bool includeChirality=false, const bool use2D=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, const bool useCountSimulation=true, const std::uint32_t fpSize=2048, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const bool ownsAtomInvGen=false)
helper function that generates a /c FingerprintGenerator that generates atom-pair fingerprints
const unsigned int maxPathLen
Std stuff.
Definition: Abbreviations.h:19