RDKit
Open-source cheminformatics and machine learning.
SmilesParse.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2021 Greg Landrum and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SMILESPARSE_H
12 #define RD_SMILESPARSE_H
13 
14 #include <GraphMol/RWMol.h>
16 #include <string>
17 #include <exception>
18 #include <map>
19 
20 namespace RDKit {
21 
23  int debugParse = 0; /**< enable debugging in the SMILES parser*/
24  bool sanitize = true; /**< sanitize the molecule after building it */
25  std::map<std::string, std::string> *replacements =
26  nullptr; /**< allows SMILES "macros" */
27  bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
28  bool strictCXSMILES =
29  true; /**< throw an exception if the CXSMILES parsing fails */
30  bool parseName = true; /**< parse (and set) the molecule name as well */
31  bool removeHs = true; /**< remove Hs after constructing the molecule */
32  bool useLegacyStereo =
33  true; /**< \deprecated use the legacy stereochemistry perception code
34  DEPRECATED, please use Chirality::setUseLegacyStereoPerception()
35  instead. */
36  bool skipCleanup =
37  false; /**< skip the final cleanup stage (for internal use) */
38 };
39 RDKIT_SMILESPARSE_EXPORT RWMol *SmilesToMol(const std::string &smi,
40  const SmilesParserParams &params);
41 
42 RDKIT_SMILESPARSE_EXPORT Atom *SmilesToAtom(const std::string &smi);
43 RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi);
44 
45 //! Construct a molecule from a SMILES string
46 /*!
47  \param smi the SMILES to convert
48  \param debugParse toggles verbose debugging information from the parser
49  \param sanitize toggles H removal and sanitization of the molecule
50  \param replacements a string->string map of replacement strings. See below
51  for more information about replacements.
52 
53  \return a pointer to the new molecule; the caller is responsible for free'ing
54  this.
55 
56  The optional replacements map can be used to do string substitution of
57  abbreviations
58  in the input SMILES. The set of substitutions is repeatedly looped through
59  until
60  the string no longer changes. It is the responsibility of the caller to make
61  sure
62  that substitutions results in legal and sensible SMILES.
63 
64  Examples of substitutions:
65  \code
66  CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
67  C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
68  C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC
69  \endcode
70 
71  */
73  const std::string &smi, int debugParse = 0, bool sanitize = true,
74  std::map<std::string, std::string> *replacements = nullptr) {
75  SmilesParserParams params;
76  params.debugParse = debugParse;
77  params.replacements = replacements;
78  if (sanitize) {
79  params.sanitize = true;
80  params.removeHs = true;
81  } else {
82  params.sanitize = false;
83  params.removeHs = false;
84  }
85  return SmilesToMol(smi, params);
86 };
87 
89  int debugParse = 0; /**< enable debugging in the SMARTS parser*/
90  std::map<std::string, std::string> *replacements =
91  nullptr; /**< allows SMARTS "macros" */
92  bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
93  bool strictCXSMILES =
94  true; /**< throw an exception if the CXSMILES parsing fails */
95  bool parseName = true; /**< parse (and set) the molecule name as well */
96  bool mergeHs =
97  true; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
98  bool skipCleanup =
99  false; /**< skip the final cleanup stage (for internal use) */
100 };
102  const SmartsParserParams &ps);
103 
104 //! Construct a molecule from a SMARTS string
105 /*!
106  \param sma the SMARTS to convert
107  \param debugParse toggles verbose debugging information from the parser
108  \param mergeHs toggles merging H atoms in the SMARTS into neighboring
109  atoms
110  \param replacements a string->string map of replacement strings.
111  \see SmilesToMol for more information about replacements
112 
113  \return a pointer to the new molecule; the caller is responsible for free'ing
114  this.
115  */
117  const std::string &sma, int debugParse = 0, bool mergeHs = false,
118  std::map<std::string, std::string> *replacements = nullptr) {
120  ps.debugParse = debugParse;
121  ps.mergeHs = mergeHs;
122  ps.replacements = replacements;
123  return SmartsToMol(sma, ps);
124 };
125 
126 RDKIT_SMILESPARSE_EXPORT Atom *SmartsToAtom(const std::string &sma);
127 RDKIT_SMILESPARSE_EXPORT Bond *SmartsToBond(const std::string &sma);
128 
129 class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
130  public:
131  SmilesParseException(const char *msg) : _msg(msg) {}
132  SmilesParseException(const std::string msg) : _msg(msg) {}
133  const char *what() const noexcept override { return _msg.c_str(); }
134  ~SmilesParseException() noexcept override = default;
135 
136  private:
137  std::string _msg;
138 };
139 
140 inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
141  size_t len) {
142  std::string smi(text, len);
143  RWMol *ptr = nullptr;
144  try {
145  ptr = SmilesToMol(smi);
146  } catch (const RDKit::MolSanitizeException &) {
147  ptr = nullptr;
148  }
149  return std::unique_ptr<RWMol>(ptr);
150 }
151 inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
152  size_t len) {
153  std::string smi(text, len);
154  // no need for exception handling here: SmartsToMol() doesn't do
155  // sanitization
156  RWMol *ptr = SmartsToMol(smi);
157  return std::unique_ptr<RWMol>(ptr);
158 }
159 
160 } // namespace RDKit
161 
162 #endif
Defines the editable molecule class RWMol.
The class for representing atoms.
Definition: Atom.h:68
class for representing a bond
Definition: Bond.h:47
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
SmilesParseException(const std::string msg)
Definition: SmilesParse.h:132
SmilesParseException(const char *msg)
Definition: SmilesParse.h:131
const char * what() const noexcept override
Definition: SmilesParse.h:133
~SmilesParseException() noexcept override=default
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:457
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
Definition: Abbreviations.h:19
RDKIT_SMILESPARSE_EXPORT Atom * SmartsToAtom(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT RWMol * SmartsToMol(const std::string &sma, const SmartsParserParams &ps)
RDKIT_SMILESPARSE_EXPORT Atom * SmilesToAtom(const std::string &smi)
RDKIT_SMILESPARSE_EXPORT Bond * SmilesToBond(const std::string &smi)
RDKIT_SMILESPARSE_EXPORT Bond * SmartsToBond(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:90
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:25