11 #ifndef RGROUP_DECOMP_DATA
12 #define RGROUP_DECOMP_DATA
45 std::vector<std::vector<RGroupMatch>>
matches;
59 :
params(std::move(inputParams)) {
66 :
params(std::move(inputParams)) {
67 for (
const auto &core : inputCores) {
75 RWMol core(inputCore);
84 for (
auto &core :
cores) {
85 RWMol *alignCore = core.first ?
cores[0].core.get() :
nullptr;
87 "Could not prepare at least one core");
89 core.second.labelledCore.reset(
new RWMol(*core.second.core));
119 unsigned int label = 0;
132 "Scoring method is not fingerprint variance!");
135 "Illegal permutation prune length");
166 "permutation.size() should be <= matches.size()");
168 for (
size_t mol_idx = 0; mol_idx <
permutation.size(); ++mol_idx) {
169 std::vector<RGroupMatch> keepVector;
170 size_t mi = mol_idx + offset;
185 const bool removeAllHydrogenRGroups =
189 std::vector<RGroupMatch> results;
191 for (
size_t i = 0; i <
matches.size(); ++i) {
193 results.push_back(
matches[i].at(pi));
206 std::map<int, std::set<int>> labelCores;
207 std::set<int> coresVisited;
208 for (
auto &position : results) {
209 int core_idx = position.core_idx;
210 if (coresVisited.find(core_idx) == coresVisited.end()) {
211 coresVisited.insert(core_idx);
212 auto core =
cores.find(core_idx);
213 if (core !=
cores.end()) {
214 for (
auto rlabels :
getRlabels(*core->second.core)) {
215 int rlabel = rlabels.first;
216 labelCores[rlabel].insert(core_idx);
222 for (
int label :
labels) {
223 if (label > 0 && !removeAllHydrogenRGroups) {
227 for (
auto &position : results) {
228 R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
229 bool labelHasCore = labelCores[label].find(position.core_idx) !=
230 labelCores[label].end();
231 if (labelHasCore && rgroup != position.rgroups.end() &&
232 !rgroup->second->is_hydrogen) {
239 for (
auto &position : results) {
240 position.rgroups.erase(label);
270 for (
const auto &p : atoms) {
278 const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
279 for (
const auto &i : atomsToAdd) {
280 mol.
addAtom(i.second,
false,
true);
290 UsedLabels &used_labels,
const std::set<int> &indexLabels,
291 const std::map<
int, std::vector<int>> &extraAtomRLabels) {
299 std::map<int, Atom *> atoms =
getRlabels(core);
307 std::map<int, std::vector<int>> bondsToCore;
308 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
311 for (
const auto &rlabels : atoms) {
312 int userLabel = rlabels.first;
316 Atom *atom = rlabels.second;
317 mappings[userLabel] = userLabel;
318 used_labels.
add(userLabel);
324 auto *newAt =
new Atom(0);
326 atomsToAdd.emplace_back(atom, newAt);
331 for (
auto newLabel : indexLabels) {
332 auto atm = atoms.find(newLabel);
333 if (atm == atoms.end()) {
337 Atom *atom = atm->second;
340 auto mapping = mappings.find(newLabel);
341 if (mapping == mappings.end()) {
342 rlabel = used_labels.
next();
343 mappings[newLabel] = rlabel;
345 rlabel = mapping->second;
353 auto *newAt =
new Atom(0);
355 atomsToAdd.emplace_back(atom, newAt);
360 for (
const auto &extraAtomRLabel : extraAtomRLabels) {
361 auto atm = atoms.find(extraAtomRLabel.first);
362 if (atm == atoms.end()) {
365 Atom *atom = atm->second;
367 for (
size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
368 int rlabel = used_labels.
next();
372 "Multiple attachments to a dummy (or hydrogen) is weird.");
373 auto *newAt =
new Atom(0);
375 atomsToAdd.emplace_back(atom, newAt);
380 for (
const auto &rlabels : atoms) {
381 auto atom = rlabels.second;
399 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
400 std::map<int, int> rLabelCoreIndexToAtomicWt;
407 const std::vector<int> &rlabels =
411 for (
int rlabel : rlabels) {
412 auto label = mappings.find(rlabel);
423 auto *newAt =
new Atom(0);
425 atomsToAdd.emplace_back(atom, newAt);
433 rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->
getAtomicNum();
442 bool implicitOnly =
false;
443 bool updateExplicitCount =
false;
444 bool sanitize =
false;
466 std::cerr <<
"Relabel Rgroup smiles " <<
MolToSmiles(mol) << std::endl;
477 std::set<int> indexLabels;
486 std::map<int, std::vector<int>> extraAtomRLabels;
488 for (
auto &it : best) {
489 for (
auto &rgroup : it.rgroups) {
490 if (rgroup.first > 0) {
494 indexLabels.insert(rgroup.first);
497 std::map<int, int> rlabelsUsedInRGroup =
498 rgroup.second->getNumBondsToRlabels();
499 for (
auto &numBondsUsed : rlabelsUsedInRGroup) {
501 if (numBondsUsed.second > 1) {
502 extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
510 for (
auto &core :
cores) {
511 core.second.labelledCore.reset(
new RWMol(*core.second.core));
524 used_labels.
add(userLabel);
526 for (
auto &core :
cores) {
528 indexLabels, extraAtomRLabels);
531 for (
auto &it : best) {
532 for (
auto &rgroup : it.rgroups) {
537 std::set<int> uniqueMappedValues;
539 std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
540 [](
const std::pair<int, int> &p) { return p.second; });
542 "Error in uniqueness of final RLabel mapping");
544 uniqueMappedValues.size() ==
userLabels.size() + indexLabels.size(),
545 "Error in final RMapping size");
552 switch (scoreMethod) {
558 fingerprintVarianceScoreData);
566 bool finalize =
false) {
570 auto t0 = std::chrono::steady_clock::now();
571 std::unique_ptr<CartesianProduct> iterator;
581 auto best = max_element(results.begin(), results.end(),
583 return a.rGroupScorer.getBestScore() <
584 b.rGroupScorer.getBestScore();
588 auto result = ga.
run();
598 std::vector<size_t> permutations;
606 std::back_inserter(permutations),
607 [](
const std::vector<RGroupMatch> &m) { return m.size(); });
608 permutation = std::vector<size_t>(permutations.size(), 0);
614 std::cerr <<
"Processing" << std::endl;
617 iterator = std::move(it);
621 while (iterator->next()) {
622 if (count > iterator->maxPermutations) {
626 std::cerr <<
"**************************************************"
631 :
score(iterator->permutation);
638 std::cerr <<
" ===> current best:" << newscore <<
">"
656 if (pruneMatches || finalize) {
#define CHECK_INVARIANT(expr, mess)
#define PRECONDITION(expr, mess)
The class for representing atoms.
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
void setAtomicNum(int newNum)
sets our atomic number
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
int getAtomMapNum() const
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
unsigned int getIsotope() const
returns our isotope number
unsigned int getDegree() const
bool getPropIfPresent(const std::string &key, T &res) const
void clearProp(const std::string &key) const
clears the value of a property
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
std::set< int > labels_used
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void breakTies(const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
double getBestScore() const
return the best score found so far
unsigned int getNumConformers() const
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
static std::string to_string(const Descriptor &desc)
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
RDKIT_RDGENERAL_EXPORT const std::string _MolFileRLabel
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_CORE_INDEX
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string done
const std::string _rgroupInputDummy
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string SIDECHAIN_RLABELS
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_TYPE
const unsigned int EMPTY_CORE_LABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
iterate through all possible permutations of the rgroups
double fingerprintVarianceGroupScore()
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
A single rgroup attached to a given core.
boost::shared_ptr< RWMol > combinedMol
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
size_t permutationProduct
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void addCore(const ROMol &inputCore)
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * >> &atomsToAdd)
std::vector< int > processedRlabels
int getRlabel(Atom *atom) const
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int >> &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
RGroupScorer rGroupScorer
bool removeAllHydrogenRGroupsAndLabels
unsigned int matchingStrategy
bool allowMultipleRGroupsOnUnlabelled
void addDummyAtomsToUnlabelledCoreAtoms(RWMol &core)
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
double timeout
timeout in seconds. <=0 indicates no timeout
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
unsigned int rgroupLabelling
bool prepareCore(RWMol &, const RWMol *alignCore)