11#ifndef RD_FILEPARSERUTILS_H
12#define RD_FILEPARSERUTILS_H
17#include <boost/lexical_cast.hpp>
18#include <boost/algorithm/string.hpp>
19#include <boost/format.hpp>
28namespace FileParserUtils {
30 std::string_view orig, std::string stripChars =
" \t\r\n") {
31 std::string_view res = orig;
32 auto start = res.find_first_not_of(stripChars);
33 if (start != std::string_view::npos) {
34 auto end = res.find_last_not_of(stripChars) + 1;
35 res = res.substr(start, end - start);
44 auto trimmed =
strip(input,
" ");
45 if (acceptSpaces && trimmed.empty()) {
48 return boost::lexical_cast<T>(trimmed);
53 return stripSpacesAndCast<T>(std::string_view(input.c_str()), acceptSpaces);
56 bool acceptSpaces =
true);
58 bool acceptSpaces =
true);
60 bool acceptSpaces =
true);
62 bool acceptSpaces =
true);
64 bool acceptSpaces =
true);
66 bool acceptSpaces =
true);
77 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
78 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
79 bool strictParsing =
true,
bool expectMEND =
true);
83 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
84 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
85 bool strictParsing =
true);
90 bool chiralityPossible,
99 const std::string &prefix,
100 const std::string &missingValueMarker =
"n/a") {
101 std::string atompn = pn.substr(prefix.size());
102 std::string strVect = mol.
getProp<std::string>(pn);
103 std::vector<std::string> tokens;
104 boost::split(tokens, strVect, boost::is_any_of(
" \t\n"),
105 boost::token_compress_on);
108 <<
"Property list " << pn <<
" too short, only " << tokens.size()
109 <<
" elements found. Ignoring it." << std::endl;
112 std::string mv = missingValueMarker;
113 size_t first_token = 0;
114 if (tokens.size() == mol.
getNumAtoms() + 1 && tokens[0].front() ==
'[' &&
115 tokens[0].back() ==
']') {
116 mv = std::string(tokens[0].begin() + 1, tokens[0].end() - 1);
121 <<
" is empty." << std::endl;
123 for (
size_t i = first_token; i < tokens.size(); ++i) {
124 if (tokens[i] != mv) {
125 unsigned int atomid = i - first_token;
127 T apv = boost::lexical_cast<T>(tokens[i]);
129 }
catch (
const boost::bad_lexical_cast &) {
131 <<
"Value " << tokens[i] <<
" for property " << pn <<
" of atom "
132 << atomid <<
" can not be parsed. Ignoring it." << std::endl;
142 const std::string missingValueMarker =
"n/a") {
144 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
145 applyMolListPropToAtoms<T>(mol, pn, prefix, missingValueMarker);
153 ROMol &mol,
const std::string pn,
154 const std::string &missingValueMarker =
"n/a") {
157 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
158 applyMolListPropToAtoms<std::string>(mol, pn, prefix, missingValueMarker);
161 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
162 applyMolListPropToAtoms<std::int64_t>(mol, pn, prefix,
166 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
167 applyMolListPropToAtoms<double>(mol, pn, prefix, missingValueMarker);
170 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
171 applyMolListPropToAtoms<bool>(mol, pn, prefix, missingValueMarker);
181 ROMol &mol,
const std::string &missingValueMarker =
"n/a") {
188 std::string missingValueMarker =
"",
189 unsigned int lineSize = 190) {
192 if (!missingValueMarker.empty()) {
193 propVal += boost::str(boost::format(
"[%s] ") % missingValueMarker);
195 missingValueMarker =
"n/a";
197 for (
const auto &atom : mol.
atoms()) {
198 std::string apVal = missingValueMarker;
199 if (atom->hasProp(atomPropName)) {
200 T tVal = atom->getProp<T>(atomPropName);
201 apVal = boost::lexical_cast<std::string>(tVal);
205 if (propVal.length() + apVal.length() + 1 >= lineSize) {
208 res += propVal +
"\n";
211 propVal += apVal +
" ";
213 if (!propVal.empty()) {
221 ROMol &mol,
const std::string &atomPropName,
222 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
223 std::string molPropName =
"atom.iprop." + atomPropName;
225 getAtomPropertyList<boost::int64_t>(
226 mol, atomPropName, missingValueMarker, lineSize));
229 ROMol &mol,
const std::string &atomPropName,
230 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
231 std::string molPropName =
"atom.dprop." + atomPropName;
233 getAtomPropertyList<double>(mol, atomPropName, missingValueMarker,
237 ROMol &mol,
const std::string &atomPropName,
238 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
239 std::string molPropName =
"atom.bprop." + atomPropName;
241 getAtomPropertyList<bool>(mol, atomPropName, missingValueMarker,
245 ROMol &mol,
const std::string &atomPropName,
246 const std::string &missingValueMarker =
"",
unsigned int lineSize = 190) {
247 std::string molPropName =
"atom.prop." + atomPropName;
249 getAtomPropertyList<std::string>(mol, atomPropName,
250 missingValueMarker, lineSize));
#define BOOST_LOG(__arg__)
RDKIT_RDGENERAL_EXPORT RDLogger rdWarningLog
The class for representing atoms.
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
Atom * getAtomWithIdx(unsigned int idx)
returns a pointer to a particular Atom
unsigned int getNumAtoms() const
returns our number of atoms
CXXAtomIterator< MolGraph, Atom * > atoms()
C++11 Range iterator.
RWMol is a molecule class that is intended to be edited.
#define RDKIT_FILEPARSERS_EXPORT
void processMolPropertyList(ROMol &mol, const std::string pn, const std::string &missingValueMarker="n/a")
RDKIT_FILEPARSERS_EXPORT std::string getV3000CTAB(const ROMol &tmol, int confId=-1)
RDKIT_FILEPARSERS_EXPORT void moveAdditionalPropertiesToSGroups(RWMol &mol)
RDKIT_FILEPARSERS_EXPORT void finishMolProcessing(RWMol *res, bool chiralityPossible, bool sanitize, bool removeHs)
void createAtomDoublePropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)
RDKIT_FILEPARSERS_EXPORT double toDouble(const std::string &input, bool acceptSpaces=true)
void createAtomIntPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)
RDKIT_FILEPARSERS_EXPORT int toInt(const std::string &input, bool acceptSpaces=true)
RDKIT_FILEPARSERS_EXPORT Atom * replaceAtomWithQueryAtom(RWMol *mol, Atom *atom)
Deprecated, please use QueryOps::replaceAtomWithQueryAtom instead.
T stripSpacesAndCast(std::string_view input, bool acceptSpaces=false)
void createAtomStringPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)
void applyMolListPropToAtoms(ROMol &mol, const std::string &pn, const std::string &prefix, const std::string &missingValueMarker="n/a")
applies a particular property to the atoms as an atom property list
std::string getAtomPropertyList(ROMol &mol, const std::string &atomPropName, std::string missingValueMarker="", unsigned int lineSize=190)
RDKIT_FILEPARSERS_EXPORT std::string_view strip(std::string_view orig, std::string stripChars=" \t\r\n")
void applyMolListPropsToAtoms(ROMol &mol, const std::string &prefix, const std::string missingValueMarker="n/a")
RDKIT_FILEPARSERS_EXPORT bool ParseV3000CTAB(std::istream *inStream, unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible, unsigned int &nAtoms, unsigned int &nBonds, bool strictParsing=true, bool expectMEND=true)
void processMolPropertyLists(ROMol &mol, const std::string &missingValueMarker="n/a")
RDKIT_FILEPARSERS_EXPORT bool ParseV2000CTAB(std::istream *inStream, unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible, unsigned int &nAtoms, unsigned int &nBonds, bool strictParsing=true)
static const std::string atomPropPrefix
RDKIT_FILEPARSERS_EXPORT std::string getV3000Line(std::istream *inStream, unsigned int &line)
RDKIT_FILEPARSERS_EXPORT unsigned int toUnsigned(const std::string &input, bool acceptSpaces=true)
void createAtomBoolPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=190)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed