RDKit
Open-source cheminformatics and machine learning.
RDDepictor.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RDDEPICTOR_H
13#define RDDEPICTOR_H
14
16#include <RDGeneral/types.h>
17#include <Geometry/point.h>
18#include <boost/smart_ptr.hpp>
19
20namespace RDKit {
21class ROMol;
22}
23
24namespace RDDepict {
25
26RDKIT_DEPICTOR_EXPORT extern bool
27 preferCoordGen; // Ignored if coordgen support isn't active
28
29typedef boost::shared_array<double> DOUBLE_SMART_PTR;
30
31class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
32 public:
33 DepictException(const char *msg) : _msg(msg) {}
34 DepictException(const std::string msg) : _msg(msg) {}
35 const char *what() const noexcept override { return _msg.c_str(); }
36 ~DepictException() noexcept override = default;
37
38 private:
39 std::string _msg;
40};
41
42//! \brief Set the path to the file containing the ring system templates
43/*!
44
45 \param templatePath the file path to a file containing the ring system
46 templates. Each template must be a single line in the file represented using
47 CXSMILES, and the structure should be a single ring system.
48
49 \throws DepictException if any of the templates are invalid
50*/
52setRingSystemTemplates(const std::string templatePath);
53
54//! \brief Add ring system templates to be used in 2D coordinater generation.
55/// If there are duplicates, the most recently added template will be used.
56/*!
57
58 \param templatePath the file path to a file containing the ring system
59 templates. Each template must be a single line in the file represented using
60 CXSMILES, and the structure should be a single ring system.
61
62 \throws DepictException if any of the templates are invalid
63*/
65addRingSystemTemplates(const std::string templatePath);
66
67//! \brief Load default ring system templates to be used in 2D coordinate
68//! generation
70
72 const RDGeom::INT_POINT2D_MAP *coordMap =
73 nullptr; //!< a map of int to Point2D, between atom IDs and their
74 //!< locations. This is the container the user needs to
75 //!< fill if he/she wants to specify coordinates for a portion
76 //!< of the molecule, defaults to 0
77 bool canonOrient = false; //!< canonicalize the orientation so that the long
78 //!< axes align with the x-axis etc.
79 bool clearConfs = true; //!< clear all existing conformations on the molecule
80 //!< before adding the 2D coordinates instead of
81 //!< simply adding to the list
82 unsigned int nFlipsPerSample = 0; //!< the number of rotatable bonds that are
83 //!< flipped at random for each sample
84 unsigned int nSamples = 0; //!< the number of samples
85 int sampleSeed = 0; //!< seed for the random sampling process
86 bool permuteDeg4Nodes = false; //!< try permuting the drawing order of bonds
87 //!< around atoms with four neighbors in order
88 //!< to improve the depiction
89 bool forceRDKit = false; //!< use RDKit to generate coordinates even if
90 //!< preferCoordGen is set to true
91 bool useRingTemplates = false; //!< whether to use ring system templates for
92 //!< generating initial coordinates
93
95};
96
97//! \brief Generate 2D coordinates (a depiction) for a molecule
98/*!
99
100 \param mol the molecule were are interested in
101
102 \param params parameters used for 2D coordinate generation
103
104 \return ID of the conformation added to the molecule containing the
105 2D coordinates
106
107*/
109 RDKit::ROMol &mol, const Compute2DCoordParameters &params);
110
111//! \brief Generate 2D coordinates (a depiction) for a molecule
112/*!
113
114 \param mol the molecule were are interested in
115
116 \param coordMap a map of int to Point2D, between atom IDs and
117 their locations. This is the container the user needs to fill if
118 he/she wants to specify coordinates for a portion of the molecule,
119 defaults to 0
120
121 \param canonOrient canonicalize the orientation so that the long
122 axes align with the x-axis etc.
123
124 \param clearConfs clear all existing conformations on the molecule
125 before adding the 2D coordinates instead of simply adding to the
126 list
127
128 \param nFlipsPerSample - the number of rotatable bonds that are
129 flipped at random for each sample
130
131 \param nSamples - the number of samples
132
133 \param sampleSeed - seed for the random sampling process
134
135 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
136 atoms with four neighbors in order to improve the depiction
137
138 \param forceRDKit - use RDKit to generate coordinates even if
139 preferCoordGen is set to true
140
141 \param useRingTemplates whether to use ring system templates for generating
142 initial coordinates
143
144 \return ID of the conformation added to the molecule containing the
145 2D coordinates
146
147*/
149 RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap = nullptr,
150 bool canonOrient = false, bool clearConfs = true,
151 unsigned int nFlipsPerSample = 0, unsigned int nSamples = 0,
152 int sampleSeed = 0, bool permuteDeg4Nodes = false, bool forceRDKit = false,
153 bool useRingTemplates = false);
154
155//! \brief Compute the 2D coordinates such the interatom distances
156/// mimic those in a distance matrix
157/*!
158
159 This function generates 2D coordinates such that the inter-atom
160 distances mimic those specified via dmat. This is done by randomly
161 sampling(flipping) the rotatable bonds in the molecule and
162 evaluating a cost function which contains two components. The
163 first component is the sum of inverse of the squared inter-atom
164 distances, this helps in spreading the atoms far from each
165 other. The second component is the sum of squares of the
166 difference in distance between those in dmat and the generated
167 structure. The user can adjust the relative importance of the two
168 components via a adjustable parameter (see below)
169
170 ARGUMENTS:
171
172 \param mol - molecule to generate coordinates for
173
174 \param dmat - the distance matrix we want to mimic, this is a
175 symmetric N by N matrix where N is the number of atoms in mol. All
176 negative entries in dmat are ignored.
177
178 \param canonOrient - canonicalize the orientation after the 2D
179 embedding is done
180
181 \param clearConfs - clear any previously existing conformations on
182 mol before adding a conformation
183
184 \param weightDistMat - A value between 0.0 and 1.0, this
185 determines the importance of mimicing the inter atoms
186 distances in dmat. (1.0 - weightDistMat) is the weight associated
187 to spreading out the structure (density) in the cost function
188
189 \param nFlipsPerSample - the number of rotatable bonds that are
190 flipped at random for each sample
191
192 \param nSamples - the number of samples
193
194 \param sampleSeed - seed for the random sampling process
195
196 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
197 atoms with four neighbors in order to improve the depiction
198
199 \param forceRDKit - use RDKit to generate coordinates even if
200 preferCoordGen is set to true
201
202 \return ID of the conformation added to the molecule containing the
203 2D coordinates
204
205
206*/
208 RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat = nullptr,
209 bool canonOrient = true, bool clearConfs = true, double weightDistMat = 0.5,
210 unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100,
211 int sampleSeed = 25, bool permuteDeg4Nodes = true, bool forceRDKit = false);
212
213//! \brief Compute 2D coordinates where a piece of the molecule is
214/// constrained to have the same coordinates as a reference.
215/*!
216 This function generates a depiction for a molecule where a piece of the
217 molecule is constrained to have the same coordinates as a reference.
218
219 This is useful for, for example, generating depictions of SAR data
220 sets so that the cores of the molecules are all oriented the same way.
221
222 ARGUMENTS:
223
224 \param mol - the molecule to be aligned, this will come back
225 with a single conformer.
226 \param reference - a molecule with the reference atoms to align to;
227 this should have a depiction.
228 \param confId - (optional) the id of the reference conformation to use
229 \param referencePattern - (optional) a query molecule to be used to
230 generate the atom mapping between the molecule
231 and the reference.
232 \param acceptFailure - (optional) if true, standard depictions will be
233 generated for molecules that don't have a substructure
234 match to the reference; if false, throws a
235 DepictException.
236 \param forceRDKit - (optional) use RDKit to generate coordinates even if
237 preferCoordGen is set to true
238 \param allowOptionalAttachments - (optional) if true, terminal dummy atoms in
239 the reference are ignored if they match an implicit
240 hydrogen in the molecule, and a constrained
241 depiction is still attempted
242 RETURNS:
243
244 \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
245 the constrained depiction
246*/
248 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
249 const RDKit::ROMol *referencePattern =
250 static_cast<const RDKit::ROMol *>(nullptr),
251 bool acceptFailure = false, bool forceRDKit = false,
252 bool allowOptionalAttachments = false);
253
254//! \brief Compute 2D coordinates where a piece of the molecule is
255/// constrained to have the same coordinates as a reference.
256/*!
257 This function generates a depiction for a molecule where a piece of the
258 molecule is constrained to have the same coordinates as a reference.
259
260 This is useful for, for example, generating depictions of SAR data
261 sets so that the cores of the molecules are all oriented the same way.
262 This overload allow to specify the (referenceAtom, molAtom) index pairs
263 which should be matched as MatchVectType. Please note that the
264 vector can be shorter than the number of atoms in the reference.
265
266 ARGUMENTS:
267
268 \param mol - the molecule to be aligned, this will come back
269 with a single conformer.
270 \param reference - a molecule with the reference atoms to align to;
271 this should have a depiction.
272 \param refMatchVect - a MatchVectType that will be used to
273 generate the atom mapping between the molecule
274 and the reference.
275 \param confId - (optional) the id of the reference conformation to use
276 \param forceRDKit - (optional) use RDKit to generate coordinates even if
277 preferCoordGen is set to true
278*/
280 RDKit::ROMol &mol, const RDKit::ROMol &reference,
281 const RDKit::MatchVectType &refMatchVect, int confId = -1,
282 bool forceRDKit = false);
283
284//! \brief Generate a 2D depiction for a molecule where all or part of
285/// it mimics the coordinates of a 3D reference structure.
286/*!
287 Generates a depiction for a molecule where a piece of the molecule
288 is constrained to have coordinates similar to those of a 3D reference
289 structure.
290
291 ARGUMENTS:
292 \param mol - the molecule to be aligned, this will come back
293 with a single conformer containing 2D coordinates
294 \param reference - a molecule with the reference atoms to align to.
295 By default this should be the same as mol, but with
296 3D coordinates
297 \param confId - (optional) the id of the reference conformation to use
298 \param refPattern - (optional) a query molecule to map a subset of
299 the reference onto the mol, so that only some of the
300 atoms are aligned.
301 \param acceptFailure - (optional) if true, standard depictions will be
302 generated
303 for molecules that don't match the reference or the
304 referencePattern; if false, throws a DepictException.
305 \param forceRDKit - (optional) use RDKit to generate coordinates even if
306 preferCoordGen is set to true
307*/
309 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
310 RDKit::ROMol *referencePattern = nullptr, bool acceptFailure = false,
311 bool forceRDKit = false);
312
313//! \brief Rotate the 2D depiction such that the majority of bonds have an
314//! angle with the X axis which is a multiple of 30 degrees.
315/*!
316
317 ARGUMENTS:
318 \param mol - the molecule to be rotated
319 \param confId - (optional) the id of the reference conformation to use
320 \param minimizeRotation - (optional) if false (the default), the molecule
321 is rotated such that the majority of bonds have an angle with the
322 X axis of 30 or 90 degrees. If true, the minimum rotation is applied
323 such that the majority of bonds have an angle with the X axis of
324 0, 30, 60, or 90 degrees, with the goal of altering the initial
325 orientation as little as possible .
326*/
327
329 int confId = -1,
330 bool minimizeRotation = false);
331
332//! \brief Normalizes the 2D depiction.
333/*!
334 If canonicalize is != 0, the depiction is subjected to a canonical
335 transformation such that its main axis is aligned along the X axis
336 (canonicalize >0, the default) or the Y axis (canonicalize <0).
337 If canonicalize is 0, no canonicalization takes place.
338 If scaleFactor is <0.0 (the default) the depiction is scaled such
339 that bond lengths conform to RDKit standards. The applied scaling
340 factor is returned.
341
342 ARGUMENTS:
343 \param mol - the molecule to be normalized
344 \param confId - (optional) the id of the reference conformation to use
345 \param canonicalize - (optional) if != 0, a canonical transformation is
346 applied: if >0 (the default), the main molecule axis is
347 aligned to the X axis, if <0 to the Y axis.
348 If 0, no canonical transformation is applied.
349 \param scaleFactor - (optional) if >0.0, the scaling factor to apply. The
350 default (-1.0) means that the depiction is automatically
351 scaled such that bond lengths are the standard RDKit
352 ones.
353 RETURNS:
354
355 \return the applied scaling factor.
356*/
357
359 int confId = -1,
360 int canonicalize = 1,
361 double scaleFactor = -1.0);
362}; // namespace RDDepict
363
364#endif
DepictException(const char *msg)
Definition: RDDepictor.h:33
~DepictException() noexcept override=default
DepictException(const std::string msg)
Definition: RDDepictor.h:34
const char * what() const noexcept override
Definition: RDDepictor.h:35
#define RDKIT_DEPICTOR_EXPORT
Definition: export.h:89
boost::shared_array< double > DOUBLE_SMART_PTR
Definition: EmbeddedFrag.h:26
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching3DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, RDKit::ROMol *referencePattern=nullptr, bool acceptFailure=false, bool forceRDKit=false)
Generate a 2D depiction for a molecule where all or part of it mimics the coordinates of a 3D referen...
void RDKIT_DEPICTOR_EXPORT loadDefaultRingSystemTemplates()
Load default ring system templates to be used in 2D coordinate generation.
RDKIT_DEPICTOR_EXPORT double normalizeDepiction(RDKit::ROMol &mol, int confId=-1, int canonicalize=1, double scaleFactor=-1.0)
Normalizes the 2D depiction.
void RDKIT_DEPICTOR_EXPORT addRingSystemTemplates(const std::string templatePath)
Add ring system templates to be used in 2D coordinater generation. If there are duplicates,...
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(RDKit::ROMol &mol, const Compute2DCoordParameters &params)
Generate 2D coordinates (a depiction) for a molecule.
RDKIT_DEPICTOR_EXPORT void straightenDepiction(RDKit::ROMol &mol, int confId=-1, bool minimizeRotation=false)
Rotate the 2D depiction such that the majority of bonds have an angle with the X axis which is a mult...
void RDKIT_DEPICTOR_EXPORT setRingSystemTemplates(const std::string templatePath)
Set the path to the file containing the ring system templates.
RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, const RDKit::ROMol *referencePattern=static_cast< const RDKit::ROMol * >(nullptr), bool acceptFailure=false, bool forceRDKit=false, bool allowOptionalAttachments=false)
Compute 2D coordinates where a piece of the molecule is constrained to have the same coordinates as a...
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoordsMimicDistMat(RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat=nullptr, bool canonOrient=true, bool clearConfs=true, double weightDistMat=0.5, unsigned int nFlipsPerSample=3, unsigned int nSamples=100, int sampleSeed=25, bool permuteDeg4Nodes=true, bool forceRDKit=false)
Compute the 2D coordinates such the interatom distances mimic those in a distance matrix.
RDKIT_DEPICTOR_EXPORT bool preferCoordGen
std::map< int, Point2D > INT_POINT2D_MAP
Definition: point.h:550
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)