RDKit
Open-source cheminformatics and machine learning.
Canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_CANON_H
12#define RD_CANON_H
13
15#include <tuple>
16#include <boost/dynamic_bitset.hpp>
18
19namespace RDKit {
20class ROMol;
21class Atom;
22class Bond;
23
24namespace Canon {
25const int MAX_NATOMS = 5000; //!< used in the canonical traversal code
26const int MAX_CYCLES = 1000; //!< used in the canonical traversal code
27const int MAX_BONDTYPE = 32; //!< used in the canonical traversal code
28
29//! used in traversals of the molecule
30typedef enum {
31 WHITE_NODE = 0, //!< not visited
32 GREY_NODE, //!< visited, but not finished
33 BLACK_NODE, //!< visited and finished
35
36//! used to indicate types of entries in the molecular stack:
37typedef enum {
38 MOL_STACK_ATOM = 0, //!< an Atom
39 MOL_STACK_BOND, //!< a Bond
40 MOL_STACK_RING, //!< a ring closure
41 MOL_STACK_BRANCH_OPEN, //!< beginning of a branch
42 MOL_STACK_BRANCH_CLOSE, //!< end of a branch
44
45//! used to store components in the molecular stack
46typedef union {
50
51//! these are the actual elements in the molecular stack
53 public:
54 //! construct an Atom node
55 explicit MolStackElem(Atom *at) {
56 type = MOL_STACK_ATOM;
57 obj.atom = at;
58 }
59 //! construct a bond node
60 /*!
61
62 \param bond pointer to the Bond being added
63 \param idx index of the Atom traversed before this Bond
64 (beginAtom in the canonical traversal order)
65 */
66 explicit MolStackElem(Bond *bond, int idx) {
67 type = MOL_STACK_BOND;
68 obj.bond = bond;
69 number = idx;
70 }
71 //! construct for a ring closure
72 explicit MolStackElem(int idx) {
73 type = MOL_STACK_RING;
74 number = idx;
75 }
76 //! construct for a branch opening or closing
77 explicit MolStackElem(const char *chr, int idx) {
78 switch (chr[0]) {
79 case '(':
81 break;
82 case ')':
84 break;
85 default:
86 break;
87 }
88 number = idx;
89 }
90 MolStackTypes type; //!< stores the type of node
91 MolStackUnion obj; //!< holds our pointer (if appropriate)
92 int number =
93 -1; //!< stores our number (relevant for bonds and ring closures)
94};
95typedef std::vector<MolStackElem> MolStack;
96
97//! used to represent possible branches from an atom
98typedef std::tuple<int, int, Bond *> PossibleType;
99
100//! constructs the canonical traversal order for a molecular fragment
101/*!
102
103 \param mol the ROMol we're working on
104 \param atomIdx the index of the atom to start the traversal from
105 \param colors the traversal status of each atom in \c mol
106 \param ranks the assigned rank of each atom in \c mol
107 \param molStack the current traversal stack (used to return the results)
108
109 <b>Notes</b>
110 - \c mol will, in general, be modified by this operation as bond directions
111 and the like are changed to fit the canonical traversal order
112
113 */
115 ROMol &mol, int atomIdx, std::vector<AtomColors> &colors,
116 const std::vector<unsigned int> &ranks, MolStack &molStack,
117 const boost::dynamic_bitset<> *bondsInPlay = nullptr,
118 const std::vector<std::string> *bondSymbols = nullptr,
119 bool doIsomericSmiles = false, bool doRandom = false);
120
121//! Check if a chiral atom needs to have its tag flipped after reading or before
122//! writing SMILES
124 const RDKit::Atom *atom,
125 bool isAtomFirst,
126 size_t numClosures);
127
128//! Canonicalizes the atom stereo labels in enhanced stereo groups
129/*!
130
131 For example, after calling this function the chiral centers in the
132 molecules `C[C@H](F)Cl |&1:1|` and `C[C@@H](F)Cl |&1:1|` will have the same
133 chiral tags.
134
135*/
137 ROMol &mol, const std::vector<unsigned int> *atomRanks = nullptr);
138
139} // end of namespace Canon
140} // end of namespace RDKit
141#endif
The class for representing atoms.
Definition: Atom.h:68
class for representing a bond
Definition: Bond.h:47
these are the actual elements in the molecular stack
Definition: Canon.h:52
MolStackTypes type
stores the type of node
Definition: Canon.h:90
MolStackElem(Atom *at)
construct an Atom node
Definition: Canon.h:55
MolStackElem(const char *chr, int idx)
construct for a branch opening or closing
Definition: Canon.h:77
MolStackUnion obj
holds our pointer (if appropriate)
Definition: Canon.h:91
MolStackElem(int idx)
construct for a ring closure
Definition: Canon.h:72
MolStackElem(Bond *bond, int idx)
construct a bond node
Definition: Canon.h:66
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:225
std::tuple< int, int, Bond * > PossibleType
used to represent possible branches from an atom
Definition: Canon.h:98
MolStackTypes
used to indicate types of entries in the molecular stack:
Definition: Canon.h:37
@ MOL_STACK_BOND
a Bond
Definition: Canon.h:39
@ MOL_STACK_BRANCH_OPEN
beginning of a branch
Definition: Canon.h:41
@ MOL_STACK_RING
a ring closure
Definition: Canon.h:40
@ MOL_STACK_BRANCH_CLOSE
end of a branch
Definition: Canon.h:42
@ MOL_STACK_ATOM
an Atom
Definition: Canon.h:38
std::vector< MolStackElem > MolStack
Definition: Canon.h:95
RDKIT_GRAPHMOL_EXPORT bool chiralAtomNeedsTagInversion(const RDKit::ROMol &mol, const RDKit::Atom *atom, bool isAtomFirst, size_t numClosures)
const int MAX_BONDTYPE
used in the canonical traversal code
Definition: Canon.h:27
const int MAX_CYCLES
used in the canonical traversal code
Definition: Canon.h:26
RDKIT_GRAPHMOL_EXPORT void canonicalizeEnhancedStereo(ROMol &mol, const std::vector< unsigned int > *atomRanks=nullptr)
Canonicalizes the atom stereo labels in enhanced stereo groups.
RDKIT_GRAPHMOL_EXPORT void canonicalizeFragment(ROMol &mol, int atomIdx, std::vector< AtomColors > &colors, const std::vector< unsigned int > &ranks, MolStack &molStack, const boost::dynamic_bitset<> *bondsInPlay=nullptr, const std::vector< std::string > *bondSymbols=nullptr, bool doIsomericSmiles=false, bool doRandom=false)
constructs the canonical traversal order for a molecular fragment
AtomColors
used in traversals of the molecule
Definition: Canon.h:30
@ GREY_NODE
visited, but not finished
Definition: Canon.h:32
@ BLACK_NODE
visited and finished
Definition: Canon.h:33
@ WHITE_NODE
not visited
Definition: Canon.h:31
const int MAX_NATOMS
used in the canonical traversal code
Definition: Canon.h:25
Std stuff.
Definition: Abbreviations.h:19
used to store components in the molecular stack
Definition: Canon.h:46