RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
18#include <cstdint>
19#include <boost/dynamic_bitset.hpp>
21#include <cstring>
22#include <iostream>
23#include <cassert>
24#include <cstring>
25#include <vector>
26
27// #define VERBOSE_CANON 1
28
29namespace RDKit {
30namespace Canon {
31struct canon_atom;
32
34 Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
35 unsigned int bondStereo{
36 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
37 unsigned int nbrSymClass{0};
38 unsigned int nbrIdx{0};
39 Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
40 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
41 const std::string *p_symbol{
42 nullptr}; // if provided, this is used to order bonds
43 unsigned int bondIdx{0};
44
47 unsigned int nsc, unsigned int bidx)
48 : bondType(bt),
49 bondStereo(static_cast<unsigned int>(bs)),
50 nbrSymClass(nsc),
51 nbrIdx(ni),
52 bondIdx(bidx) {}
53 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
54 unsigned int nsc, unsigned int bidx)
55 : bondType(bt),
56 bondStereo(bs),
57 nbrSymClass(nsc),
58 nbrIdx(ni),
59 bondIdx(bidx) {}
60
61 int compareStereo(const bondholder &o) const;
62
63 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
64 static bool greater(const bondholder &lhs, const bondholder &rhs) {
65 return compare(lhs, rhs) > 0;
66 }
67
68 static int compare(const bondholder &x, const bondholder &y,
69 unsigned int div = 1) {
70 if (x.p_symbol && y.p_symbol) {
71 if ((*x.p_symbol) < (*y.p_symbol)) {
72 return -1;
73 } else if ((*x.p_symbol) > (*y.p_symbol)) {
74 return 1;
75 }
76 }
77 if (x.bondType < y.bondType) {
78 return -1;
79 } else if (x.bondType > y.bondType) {
80 return 1;
81 }
82 if (x.bondStereo < y.bondStereo) {
83 return -1;
84 } else if (x.bondStereo > y.bondStereo) {
85 return 1;
86 }
87 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
88 if (scdiv) {
89 return scdiv;
90 }
91 if (x.bondStereo && y.bondStereo) {
92 auto cs = x.compareStereo(y);
93 if (cs) {
94 return cs;
95 }
96 }
97 return 0;
98 }
99};
101 const Atom *atom{nullptr};
102 int index{-1};
103 unsigned int degree{0};
104 unsigned int totalNumHs{0};
105 bool hasRingNbr{false};
106 bool isRingStereoAtom{false};
107 unsigned int whichStereoGroup{0};
109 int *nbrIds{nullptr};
110 const std::string *p_symbol{
111 nullptr}; // if provided, this is used to order atoms
112 std::vector<int> neighborNum;
113 std::vector<int> revistedNeighbors;
114 std::vector<bondholder> bonds;
115
117
118 ~canon_atom() { free(nbrIds); }
119};
120
122 canon_atom *atoms, std::vector<bondholder> &nbrs);
123
125 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
126 std::vector<std::pair<unsigned int, unsigned int>> &result);
127
128/*
129 * Different types of atom compare functions:
130 *
131 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
132 *dependent chirality
133 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
134 *highly symmetrical graphs/molecules
135 * - AtomCompareFunctor: Basic atom compare function which also allows to
136 *include neighbors within the ranking
137 */
138
140 public:
141 Canon::canon_atom *dp_atoms{nullptr};
142 const ROMol *dp_mol{nullptr};
143 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
144 *dp_bondsInPlay{nullptr};
145
148 Canon::canon_atom *atoms, const ROMol &m,
149 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
150 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
151 : dp_atoms(atoms),
152 dp_mol(&m),
153 dp_atomsInPlay(atomsInPlay),
154 dp_bondsInPlay(bondsInPlay) {}
155 int operator()(int i, int j) const {
156 PRECONDITION(dp_atoms, "no atoms");
157 PRECONDITION(dp_mol, "no molecule");
158 PRECONDITION(i != j, "bad call");
159 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
160 return 0;
161 }
162
163 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
164 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
165 }
166 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
167 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
168 }
169 for (unsigned int ii = 0;
170 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
171 int cmp =
172 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
173 if (cmp) {
174 return cmp;
175 }
176 }
177
178 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
179 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
180 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
181 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
182 }
183 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
184 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
185 }
186 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
187 int cmp = swapsi[ii].second - swapsj[ii].second;
188 if (cmp) {
189 return cmp;
190 }
191 }
192 return 0;
193 }
194};
195
197 public:
198 Canon::canon_atom *dp_atoms{nullptr};
199 const ROMol *dp_mol{nullptr};
200 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
201 *dp_bondsInPlay{nullptr};
202
205 Canon::canon_atom *atoms, const ROMol &m,
206 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
207 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
208 : dp_atoms(atoms),
209 dp_mol(&m),
210 dp_atomsInPlay(atomsInPlay),
211 dp_bondsInPlay(bondsInPlay) {}
212 int operator()(int i, int j) const {
213 PRECONDITION(dp_atoms, "no atoms");
214 PRECONDITION(dp_mol, "no molecule");
215 PRECONDITION(i != j, "bad call");
216 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
217 return 0;
218 }
219
220 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
221 return -1;
222 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
223 return 1;
224 }
225
226 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
227 return -1;
228 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
229 return 1;
230 }
231
232 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
233 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
234 }
235 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
236 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
237 }
238 for (unsigned int ii = 0;
239 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
240 int cmp =
241 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
242 if (cmp) {
243 return cmp;
244 }
245 }
246
247 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
248 return -1;
249 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
250 return 1;
251 }
252 return 0;
253 }
254};
255
256namespace {
257unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
258 unsigned int i) {
259 unsigned int res = 0;
260 std::vector<unsigned int> perm;
261 perm.reserve(dp_atoms[i].atom->getDegree());
262 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
263 auto rnk = dp_atoms[nbr->getIdx()].index;
264 // make sure we don't have duplicate ranks
265 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
266 break;
267 } else {
268 perm.push_back(rnk);
269 }
270 }
271 if (perm.size() == dp_atoms[i].atom->getDegree()) {
272 auto ctag = dp_atoms[i].atom->getChiralTag();
273 if (ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ||
274 ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CCW) {
275 auto sortedPerm = perm;
276 std::sort(sortedPerm.begin(), sortedPerm.end());
277 auto nswaps = countSwapsToInterconvert(perm, sortedPerm);
278 res = ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ? 2 : 1;
279 if (nswaps % 2) {
280 res = res == 2 ? 1 : 2;
281 }
282 }
283 }
284 return res;
285}
286} // namespace
288 unsigned int getAtomRingNbrCode(unsigned int i) const {
289 if (!dp_atoms[i].hasRingNbr) {
290 return 0;
291 }
292
293 int *nbrs = dp_atoms[i].nbrIds;
294 unsigned int code = 0;
295 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
296 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
297 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
298 }
299 }
300 return code;
301 }
302
303 int basecomp(int i, int j) const {
304 unsigned int ivi, ivj;
305
306 // always start with the current class:
307 ivi = dp_atoms[i].index;
308 ivj = dp_atoms[j].index;
309 if (ivi < ivj) {
310 return -1;
311 } else if (ivi > ivj) {
312 return 1;
313 }
314 // use the atom-mapping numbers if they were assigned
315 /* boost::any_cast FILED here:
316 std::string molAtomMapNumber_i="";
317 std::string molAtomMapNumber_j="";
318 */
319 int molAtomMapNumber_i = 0;
320 int molAtomMapNumber_j = 0;
321 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
322 molAtomMapNumber_i);
323 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
324 molAtomMapNumber_j);
325 if (molAtomMapNumber_i < molAtomMapNumber_j) {
326 return -1;
327 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
328 return 1;
329 }
330 // start by comparing degree
331 ivi = dp_atoms[i].degree;
332 ivj = dp_atoms[j].degree;
333 if (ivi < ivj) {
334 return -1;
335 } else if (ivi > ivj) {
336 return 1;
337 }
338 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
339 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
340 return -1;
341 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
342 return 1;
343 } else {
344 return 0;
345 }
346 }
347
348 // move onto atomic number
349 ivi = dp_atoms[i].atom->getAtomicNum();
350 ivj = dp_atoms[j].atom->getAtomicNum();
351 if (ivi < ivj) {
352 return -1;
353 } else if (ivi > ivj) {
354 return 1;
355 }
356 // isotopes if we're using them
357 if (df_useIsotopes) {
358 ivi = dp_atoms[i].atom->getIsotope();
359 ivj = dp_atoms[j].atom->getIsotope();
360 if (ivi < ivj) {
361 return -1;
362 } else if (ivi > ivj) {
363 return 1;
364 }
365 }
366
367 // nHs
368 ivi = dp_atoms[i].totalNumHs;
369 ivj = dp_atoms[j].totalNumHs;
370 if (ivi < ivj) {
371 return -1;
372 } else if (ivi > ivj) {
373 return 1;
374 }
375 // charge
376 ivi = dp_atoms[i].atom->getFormalCharge();
377 ivj = dp_atoms[j].atom->getFormalCharge();
378 if (ivi < ivj) {
379 return -1;
380 } else if (ivi > ivj) {
381 return 1;
382 }
383 // chirality if we're using it
384 if (df_useChirality) {
385 // look at enhanced stereo
386 ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
387 // it's set then we're in an SG
388 ivj = dp_atoms[j].whichStereoGroup;
389 if (ivi || ivj) {
390 if (ivi && !ivj) {
391 return 1;
392 } else if (ivj && !ivi) {
393 return -1;
394 } else if (ivi && ivj) {
395 ivi = static_cast<unsigned int>(dp_atoms[i].typeOfStereoGroup);
396 ivj = static_cast<unsigned int>(dp_atoms[j].typeOfStereoGroup);
397 if (ivi < ivj) {
398 return -1;
399 } else if (ivi > ivj) {
400 return 1;
401 }
402 ivi = dp_atoms[i].whichStereoGroup - 1;
403 ivj = dp_atoms[j].whichStereoGroup - 1;
404 // now check the current classes of the other members of the SG
405 std::set<unsigned int> sgi;
406 for (auto sgat : dp_mol->getStereoGroups()[ivi].getAtoms()) {
407 sgi.insert(dp_atoms[sgat->getIdx()].index);
408 }
409 std::set<unsigned int> sgj;
410 for (auto sgat : dp_mol->getStereoGroups()[ivj].getAtoms()) {
411 sgj.insert(dp_atoms[sgat->getIdx()].index);
412 }
413 if (sgi < sgj) {
414 return -1;
415 } else if (sgi > sgj) {
416 return 1;
417 }
418 }
419 } else {
420 // if there's no stereogroup, then use whatever atom stereochem is
421 // specfied:
422 ivi = 0;
423 ivj = 0;
424 // can't actually use values here, because they are arbitrary
425 ivi = dp_atoms[i].atom->getChiralTag() != 0;
426 ivj = dp_atoms[j].atom->getChiralTag() != 0;
427 if (ivi < ivj) {
428 return -1;
429 } else if (ivi > ivj) {
430 return 1;
431 }
432 // stereo set
433 if (ivi && ivj) {
434 if (ivi) {
435 ivi = getChiralRank(dp_mol, dp_atoms, i);
436 }
437 if (ivj) {
438 ivj = getChiralRank(dp_mol, dp_atoms, j);
439 }
440 if (ivi < ivj) {
441 return -1;
442 } else if (ivi > ivj) {
443 return 1;
444 }
445 }
446 }
447 }
448
449 if (df_useChiralityRings) {
450 // ring stereochemistry
451 ivi = getAtomRingNbrCode(i);
452 ivj = getAtomRingNbrCode(j);
453 if (ivi < ivj) {
454 return -1;
455 } else if (ivi > ivj) {
456 return 1;
457 } // bond stereo is taken care of in the neighborhood comparison
458 }
459 return 0;
460 }
461
462 public:
463 Canon::canon_atom *dp_atoms{nullptr};
464 const ROMol *dp_mol{nullptr};
465 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
466 *dp_bondsInPlay{nullptr};
467 bool df_useNbrs{false};
468 bool df_useIsotopes{true};
469 bool df_useChirality{true};
470 bool df_useChiralityRings{true};
471
474 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
475 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
476 : dp_atoms(atoms),
477 dp_mol(&m),
478 dp_atomsInPlay(atomsInPlay),
479 dp_bondsInPlay(bondsInPlay),
480 df_useNbrs(false),
481 df_useIsotopes(true),
482 df_useChirality(true),
483 df_useChiralityRings(true) {}
484 int operator()(int i, int j) const {
485 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
486 return 0;
487 }
488 int v = basecomp(i, j);
489 if (v) {
490 return v;
491 }
492
493 if (df_useNbrs) {
494 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
495 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
496 }
497 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
498 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
499 }
500
501 for (unsigned int ii = 0;
502 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
503 ++ii) {
504 int cmp =
505 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
506 if (cmp) {
507 return cmp;
508 }
509 }
510
511 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
512 return -1;
513 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
514 return 1;
515 }
516 }
517 return 0;
518 }
519};
520
521/*
522 * A compare function to discriminate chiral atoms, similar to the CIP rules.
523 * This functionality is currently not used.
524 */
525
526const unsigned int ATNUM_CLASS_OFFSET = 10000;
528 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
529 for (unsigned j = 0; j < nbrs.size(); ++j) {
530 unsigned int nbrIdx = nbrs[j].nbrIdx;
531 if (nbrIdx == ATNUM_CLASS_OFFSET) {
532 // Ignore the Hs
533 continue;
534 }
535 const Atom *nbr = dp_atoms[nbrIdx].atom;
536 nbrs[j].nbrSymClass =
537 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
538 }
539 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
540 // FIX: don't want to be doing this long-term
541 }
542
543 int basecomp(int i, int j) const {
544 PRECONDITION(dp_atoms, "no atoms");
545 unsigned int ivi, ivj;
546
547 // always start with the current class:
548 ivi = dp_atoms[i].index;
549 ivj = dp_atoms[j].index;
550 if (ivi < ivj) {
551 return -1;
552 } else if (ivi > ivj) {
553 return 1;
554 }
555
556 // move onto atomic number
557 ivi = dp_atoms[i].atom->getAtomicNum();
558 ivj = dp_atoms[j].atom->getAtomicNum();
559 if (ivi < ivj) {
560 return -1;
561 } else if (ivi > ivj) {
562 return 1;
563 }
564
565 // isotopes:
566 ivi = dp_atoms[i].atom->getIsotope();
567 ivj = dp_atoms[j].atom->getIsotope();
568 if (ivi < ivj) {
569 return -1;
570 } else if (ivi > ivj) {
571 return 1;
572 }
573
574 // atom stereochem:
575 ivi = 0;
576 ivj = 0;
577 std::string cipCode;
578 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
579 cipCode)) {
580 ivi = cipCode == "R" ? 2 : 1;
581 }
582 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
583 cipCode)) {
584 ivj = cipCode == "R" ? 2 : 1;
585 }
586 if (ivi < ivj) {
587 return -1;
588 } else if (ivi > ivj) {
589 return 1;
590 }
591
592 // bond stereo is taken care of in the neighborhood comparison
593 return 0;
594 }
595
596 public:
597 Canon::canon_atom *dp_atoms{nullptr};
598 const ROMol *dp_mol{nullptr};
599 bool df_useNbrs{false};
602 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
603 int operator()(int i, int j) const {
604 PRECONDITION(dp_atoms, "no atoms");
605 PRECONDITION(dp_mol, "no molecule");
606 PRECONDITION(i != j, "bad call");
607 int v = basecomp(i, j);
608 if (v) {
609 return v;
610 }
611
612 if (df_useNbrs) {
613 getAtomNeighborhood(dp_atoms[i].bonds);
614 getAtomNeighborhood(dp_atoms[j].bonds);
615
616 // we do two passes through the neighbor lists. The first just uses the
617 // atomic numbers (by passing the optional 10000 to bondholder::compare),
618 // the second takes the already-computed index into account
619 for (unsigned int ii = 0;
620 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
621 ++ii) {
622 int cmp = bondholder::compare(
623 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
624 if (cmp) {
625 return cmp;
626 }
627 }
628 for (unsigned int ii = 0;
629 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
630 ++ii) {
631 int cmp =
632 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
633 if (cmp) {
634 return cmp;
635 }
636 }
637 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
638 return -1;
639 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
640 return 1;
641 }
642 }
643 return 0;
644 }
645};
646
647/*
648 * Basic canonicalization function to organize the partitions which will be
649 * sorted next.
650 * */
651
652template <typename CompareFunc>
653void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
654 int mode, int *order, int *count, int &activeset,
655 int *next, int *changed, char *touchedPartitions) {
656 unsigned int nAtoms = mol.getNumAtoms();
657 int partition;
658 int symclass = 0;
659 int *start;
660 int offset;
661 int index;
662 int len;
663 int i;
664 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
665
666 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
667 while (activeset != -1) {
668 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
669 // std::cerr<<" next: ";
670 // for(unsigned int ii=0;ii<nAtoms;++ii){
671 // std::cerr<<ii<<":"<<next[ii]<<" ";
672 // }
673 // std::cerr<<std::endl;
674 // for(unsigned int ii=0;ii<nAtoms;++ii){
675 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
676 // "<<atoms[order[ii]].index<<std::endl;
677 // }
678
679 partition = activeset;
680 activeset = next[partition];
681 next[partition] = -2;
682
683 len = count[partition];
684 offset = atoms[partition].index;
685 start = order + offset;
686 // std::cerr<<"\n\n**************************************************************"<<std::endl;
687 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
688 // for(unsigned int ii=0;ii<len;++ii){
689 // std::cerr<<" "<<order[offset+ii]+1;
690 // }
691 // std::cerr<<std::endl;
692 // for(unsigned int ii=0;ii<nAtoms;++ii){
693 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
694 // "<<atoms[order[ii]].index<<std::endl;
695 // }
696 hanoisort(start, len, count, changed, compar);
697 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
698 // std::cerr<<" result:";
699 // for(unsigned int ii=0;ii<nAtoms;++ii){
700 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
701 // "<<atoms[order[ii]].index<<std::endl;
702 // }
703 for (int k = 0; k < len; ++k) {
704 changed[start[k]] = 0;
705 }
706
707 index = start[0];
708 // std::cerr<<" len:"<<len<<" index:"<<index<<"
709 // count:"<<count[index]<<std::endl;
710 for (i = count[index]; i < len; i++) {
711 index = start[i];
712 if (count[index]) {
713 symclass = offset + i;
714 }
715 atoms[index].index = symclass;
716 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
717 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
718 // activeset=index;
719 //}
720 for (unsigned j = 0; j < atoms[index].degree; ++j) {
721 changed[atoms[index].nbrIds[j]] = 1;
722 }
723 }
724 // std::cerr<<std::endl;
725
726 if (mode) {
727 index = start[0];
728 for (i = count[index]; i < len; i++) {
729 index = start[i];
730 for (unsigned j = 0; j < atoms[index].degree; ++j) {
731 unsigned int nbor = atoms[index].nbrIds[j];
732 touchedPartitions[atoms[nbor].index] = 1;
733 }
734 }
735 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
736 if (touchedPartitions[ii]) {
737 partition = order[ii];
738 if ((count[partition] > 1) && (next[partition] == -2)) {
739 next[partition] = activeset;
740 activeset = partition;
741 }
742 touchedPartitions[ii] = 0;
743 }
744 }
745 }
746 }
747} // end of RefinePartitions()
748
749template <typename CompareFunc>
750void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
751 int mode, int *order, int *count, int &activeset, int *next,
752 int *changed, char *touchedPartitions) {
753 unsigned int nAtoms = mol.getNumAtoms();
754 int partition;
755 int offset;
756 int index;
757 int len;
758 int oldPart = 0;
759
760 for (unsigned int i = 0; i < nAtoms; i++) {
761 partition = order[i];
762 oldPart = atoms[partition].index;
763 while (count[partition] > 1) {
764 len = count[partition];
765 offset = atoms[partition].index + len - 1;
766 index = order[offset];
767 atoms[index].index = offset;
768 count[partition] = len - 1;
769 count[index] = 1;
770
771 // test for ions, water molecules with no
772 if (atoms[index].degree < 1) {
773 continue;
774 }
775 for (unsigned j = 0; j < atoms[index].degree; ++j) {
776 unsigned int nbor = atoms[index].nbrIds[j];
777 touchedPartitions[atoms[nbor].index] = 1;
778 changed[nbor] = 1;
779 }
780
781 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
782 if (touchedPartitions[ii]) {
783 int npart = order[ii];
784 if ((count[npart] > 1) && (next[npart] == -2)) {
785 next[npart] = activeset;
786 activeset = npart;
787 }
788 touchedPartitions[ii] = 0;
789 }
790 }
791 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
792 changed, touchedPartitions);
793 }
794 // not sure if this works each time
795 if (atoms[partition].index != oldPart) {
796 i -= 1;
797 }
798 }
799} // end of BreakTies()
800
802 int *order, int *count,
803 canon_atom *atoms);
804
805RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
806 int *count, int &activeset,
807 int *next, int *changed);
808
810 std::vector<unsigned int> &res,
811 bool breakTies = true,
812 bool includeChirality = true,
813 bool includeIsotopes = true);
814
816 const ROMol &mol, std::vector<unsigned int> &res,
817 const boost::dynamic_bitset<> &atomsInPlay,
818 const boost::dynamic_bitset<> &bondsInPlay,
819 const std::vector<std::string> *atomSymbols,
820 const std::vector<std::string> *bondSymbols, bool breakTies,
821 bool includeChirality, bool includeIsotope);
822
824 const ROMol &mol, std::vector<unsigned int> &res,
825 const boost::dynamic_bitset<> &atomsInPlay,
826 const boost::dynamic_bitset<> &bondsInPlay,
827 const std::vector<std::string> *atomSymbols = nullptr,
828 bool breakTies = true, bool includeChirality = true,
829 bool includeIsotopes = true) {
830 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
831 breakTies, includeChirality, includeIsotopes);
832};
833
835 std::vector<unsigned int> &res);
836
838 std::vector<Canon::canon_atom> &atoms,
839 bool includeChirality = true);
840
841namespace detail {
843 std::vector<Canon::canon_atom> &atoms,
844 bool includeChirality,
845 const std::vector<std::string> *atomSymbols,
846 const std::vector<std::string> *bondSymbols,
847 const boost::dynamic_bitset<> &atomsInPlay,
848 const boost::dynamic_bitset<> &bondsInPlay,
849 bool needsInit);
850void freeCanonAtoms(std::vector<Canon::canon_atom> &atoms);
851template <typename T>
852void rankWithFunctor(T &ftor, bool breakTies, int *order,
853 bool useSpecial = false, bool useChirality = false,
854 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
855 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
856
857} // namespace detail
858
859} // namespace Canon
860} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition: Atom.h:68
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:126
BondType
the type of Bond
Definition: Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition: Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:473
int operator()(int i, int j) const
Definition: new_canon.h:484
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:601
int operator()(int i, int j) const
Definition: new_canon.h:603
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:147
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:204
const std::vector< StereoGroup > & getStereoGroups() const
Gets a reference to the groups of atoms with relative stereochemistry.
Definition: ROMol.h:781
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:415
CXXAtomIterator< const MolGraph, Atom *const, MolGraph::adjacency_iterator > atomNeighbors(Atom const *at) const
Definition: ROMol.h:284
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:225
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
void freeCanonAtoms(std::vector< Canon::canon_atom > &atoms)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:526
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:750
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:653
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
Definition: Abbreviations.h:19
StereoGroupType
Definition: StereoGroup.h:29
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition: utils.h:54
const std::string * p_symbol
Definition: new_canon.h:41
Bond::BondType bondType
Definition: new_canon.h:34
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:64
bool operator<(const bondholder &o) const
Definition: new_canon.h:63
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:53
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:46
unsigned int bondStereo
Definition: new_canon.h:35
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:68
unsigned int nbrSymClass
Definition: new_canon.h:37
std::vector< bondholder > bonds
Definition: new_canon.h:114
std::vector< int > revistedNeighbors
Definition: new_canon.h:113
std::vector< int > neighborNum
Definition: new_canon.h:112