33#ifndef RDK_SUBSTRUCT_LIBRARY
34#define RDK_SUBSTRUCT_LIBRARY
52#include <boost/lexical_cast.hpp>
73 virtual boost::shared_ptr<ROMol>
getMol(
unsigned int)
const = 0;
76 virtual unsigned int size()
const = 0;
85 std::vector<boost::shared_ptr<ROMol>> mols;
91 mols.push_back(boost::make_shared<ROMol>(m));
95 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
96 if (idx >= mols.size()) {
102 unsigned int size()
const override {
103 return rdcast<unsigned int>(mols.size());
106 std::vector<boost::shared_ptr<ROMol>> &
getMols() {
return mols; }
107 const std::vector<boost::shared_ptr<ROMol>> &
getMols()
const {
return mols; }
119 std::vector<std::string> mols;
126 MolPickler::pickleMol(m, mols.back());
133 mols.push_back(pickle);
137 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
138 if (idx >= mols.size()) {
141 boost::shared_ptr<ROMol> mol(
new ROMol);
142 MolPickler::molFromPickle(mols[idx], mol.get());
146 unsigned int size()
const override {
147 return rdcast<unsigned int>(mols.size());
150 std::vector<std::string> &
getMols() {
return mols; }
151 const std::vector<std::string> &
getMols()
const {
return mols; }
165 std::vector<std::string> mols;
171 bool doIsomericSmiles =
true;
179 mols.push_back(smiles);
183 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
184 if (idx >= mols.size()) {
188 boost::shared_ptr<ROMol> mol(
SmilesToMol(mols[idx]));
192 unsigned int size()
const override {
193 return rdcast<unsigned int>(mols.size());
196 std::vector<std::string> &
getMols() {
return mols; }
197 const std::vector<std::string> &
getMols()
const {
return mols; }
216 std::vector<std::string> mols;
222 bool doIsomericSmiles =
true;
230 mols.push_back(smiles);
234 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
235 if (idx >= mols.size()) {
241 m->updatePropertyCache();
243 return boost::shared_ptr<ROMol>(m);
246 unsigned int size()
const override {
247 return rdcast<unsigned int>(mols.size());
250 std::vector<std::string> &
getMols() {
return mols; }
251 const std::vector<std::string> &
getMols()
const {
return mols; }
256 std::vector<ExplicitBitVect *> fps;
260 for (
size_t i = 0; i < fps.size(); ++i) {
265 virtual unsigned int size()
const {
return rdcast<unsigned int>(fps.size()); }
269 fps.push_back(makeFingerprint(m));
270 return rdcast<unsigned int>(fps.size() - 1);
278 return rdcast<unsigned int>(fps.size() - 1);
290 if (idx >= fps.size()) {
300 if (idx >= fps.size()) {
317 unsigned int numBits;
329 static const unsigned int DEFAULT_NUM_BITS = 2048;
330 return DEFAULT_NUM_BITS;
340 std::vector<unsigned int> *atomCounts =
nullptr;
342 const bool tautomericFingerprint =
true;
344 tautomericFingerprint);
357 virtual unsigned int addKey(
const std::string &) = 0;
361 virtual const std::string &
getKey(
unsigned int)
const = 0;
365 const std::vector<unsigned int> &indices)
const = 0;
367 virtual unsigned int size()
const = 0;
371 std::string propname;
372 std::vector<std::string> keys;
373 const std::string empty_string = {};
377 : propname(propname) {}
382 std::vector<std::string> &
getKeys() {
return keys; }
383 const std::vector<std::string> &
getKeys()
const {
return keys; }
387 if (m.getPropIfPresent(propname, key)) {
388 keys.push_back(std::move(key));
393 const static std::string prefix(
"LIBIDX-");
394 keys.emplace_back(prefix + boost::lexical_cast<std::string>(keys.size()));
396 return keys.size() - 1u;
399 unsigned int addKey(
const std::string &key)
override {
401 return keys.size() - 1u;
404 const std::string &
getKey(
unsigned int idx)
const override {
405 if (idx >= keys.size()) {
412 const std::vector<unsigned int> &indices)
const override {
413 std::vector<std::string> res;
414 std::transform(indices.begin(), indices.end(), std::back_inserter(res),
415 [=](
unsigned idx) { return keys.at(idx); });
418 unsigned int size()
const override {
return keys.size(); }
517 boost::shared_ptr<MolHolderBase> molholder;
518 boost::shared_ptr<FPHolderBase> fpholder;
519 boost::shared_ptr<KeyHolderBase> keyholder;
523 bool is_tautomerquery =
false;
524 std::vector<unsigned int> searchOrder;
531 mols(molholder.get()) {}
534 : molholder(std::move(molecules)),
537 mols(molholder.get()),
541 boost::shared_ptr<FPHolderBase> fingerprints)
542 : molholder(std::move(molecules)),
543 fpholder(std::move(fingerprints)),
545 mols(molholder.get()),
546 fps(fpholder.get()) {
547 if (fpholder.get() &&
549 is_tautomerquery =
true;
554 boost::shared_ptr<KeyHolderBase> keys)
555 : molholder(std::move(molecules)),
557 keyholder(std::move(keys)),
558 mols(molholder.get()),
560 if (fpholder.get() &&
562 is_tautomerquery =
true;
567 boost::shared_ptr<FPHolderBase> fingerprints,
568 boost::shared_ptr<KeyHolderBase> keys)
569 : molholder(std::move(molecules)),
570 fpholder(std::move(fingerprints)),
571 keyholder(std::move(keys)),
572 mols(molholder.get()),
573 fps(fpholder.get()) {
574 if (fpholder.get() &&
576 is_tautomerquery =
true;
583 mols(molholder.get()),
585 initFromString(pickle);
586 if (fpholder.get() &&
588 is_tautomerquery =
true;
600 boost::shared_ptr<FPHolderBase> &
getFpHolder() {
return fpholder; }
616 PRECONDITION(mols,
"Molecule holder NULL in SubstructLibrary");
639 if (!keyholder.get()) {
642 return *keyholder.get();
648 if (!keyholder.get()) {
651 return *keyholder.get();
676 template <
class Query>
678 bool recursionPossible =
true,
679 bool useChirality =
true,
680 bool useQueryQueryMatches =
false,
682 int maxResults = -1)
const {
687 return getMatches(query, 0, size(), params, numThreads, maxResults);
690 template <
class Query>
694 int maxResults = -1)
const {
695 return getMatches(query, 0, size(), params, numThreads, maxResults);
713 template <
class Query>
715 const Query &query,
unsigned int startIdx,
unsigned int endIdx,
716 bool recursionPossible =
true,
bool useChirality =
true,
717 bool useQueryQueryMatches =
false,
int numThreads = -1,
718 int maxResults = -1)
const {
723 return getMatches(query, startIdx, endIdx, params, numThreads, maxResults);
727 unsigned int startIdx,
731 int maxResults = -1)
const;
734 unsigned int startIdx,
738 int maxResults = -1)
const;
741 unsigned int startIdx,
745 int maxResults = -1)
const;
759 template <
class Query>
760 unsigned int countMatches(
const Query &query,
bool recursionPossible =
true,
761 bool useChirality =
true,
762 bool useQueryQueryMatches =
false,
763 int numThreads = -1)
const {
768 return countMatches(query, 0, size(), params, numThreads);
771 template <
class Query>
774 int numThreads = -1)
const {
775 return countMatches(query, 0, size(), params, numThreads);
794 template <
class Query>
796 unsigned int endIdx,
bool recursionPossible =
true,
797 bool useChirality =
true,
798 bool useQueryQueryMatches =
false,
799 int numThreads = -1)
const {
804 return countMatches(query, startIdx, endIdx, params, numThreads);
811 int numThreads = -1)
const;
816 int numThreads = -1)
const;
821 int numThreads = -1)
const;
835 template <
class Query>
836 bool hasMatch(
const Query &query,
bool recursionPossible =
true,
837 bool useChirality =
true,
bool useQueryQueryMatches =
false,
838 int numThreads = -1)
const {
843 return hasMatch(query, 0, size(), params, numThreads);
846 template <
class Query>
848 int numThreads = -1)
const {
849 return hasMatch(query, 0, size(), params, numThreads);
864 template <
class Query>
865 bool hasMatch(
const Query &query,
unsigned int startIdx,
unsigned int endIdx,
866 bool recursionPossible =
true,
bool useChirality =
true,
867 bool useQueryQueryMatches =
false,
int numThreads = -1)
const {
872 return hasMatch(query, startIdx, endIdx, params, numThreads);
875 bool hasMatch(
const ROMol &query,
unsigned int startIdx,
unsigned int endIdx,
877 int numThreads = -1)
const;
881 int numThreads = -1)
const;
885 int numThreads = -1)
const;
891 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const {
893 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
904 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
910 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
911 return rdcast<unsigned int>(molholder->size());
916 for (
const auto idx : order) {
917 if (idx >= mols->
size()) {
931 is_tautomerquery =
false;
932 mols = molholder.get();
933 fps = fpholder.get();
935 is_tautomerquery =
true;
Contains general bit-comparison and similarity operations.
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch(const char *probe, const char *ref)
#define PRECONDITION(expr, mess)
Defines a class for managing bundles of molecules.
pulls in the core RDKit functionality
a class for bit vectors that are densely occupied
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
Concrete class that holds binary cached molecules in memory.
std::vector< std::string > & getMols()
unsigned int size() const override
Get the current library size.
unsigned int addMol(const ROMol &m) override
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
const std::vector< std::string > & getMols() const
unsigned int addBinary(const std::string &pickle)
Concrete class that holds smiles strings in memory.
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
const std::vector< std::string > & getMols() const
unsigned int addMol(const ROMol &m) override
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
unsigned int size() const override
Get the current library size.
Concrete class that holds trusted smiles strings in memory.
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
unsigned int addMol(const ROMol &m) override
const std::vector< std::string > & getMols() const
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
unsigned int size() const override
Get the current library size.
CachedTrustedSmilesMolHolder()
Base FPI for the fingerprinter used to rule out impossible matches.
std::vector< ExplicitBitVect * > & getFingerprints()
unsigned int addMol(const ROMol &m)
Adds a molecule to the fingerprinter.
virtual unsigned int size() const
const std::vector< ExplicitBitVect * > & getFingerprints() const
bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const
Return false if a substructure search can never match the molecule.
unsigned int addFingerprint(ExplicitBitVect *v)
const ExplicitBitVect & getFingerprint(unsigned int idx) const
unsigned int addFingerprint(const ExplicitBitVect &v)
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const =0
KeyFromPropHolder(const std::string &propname="_Name")
const std::string & getKey(unsigned int idx) const override
unsigned int addKey(const std::string &key) override
unsigned int size() const override
Get the current keeyholder size.
const std::vector< std::string > & getKeys() const
std::vector< std::string > & getKeys()
std::vector< std::string > getKeys(const std::vector< unsigned int > &indices) const override
const std::string & getPropName() const
unsigned int addMol(const ROMol &m) override
Add a key to the database getting it from the molecule.
std::string & getPropName()
virtual std::vector< std::string > getKeys(const std::vector< unsigned int > &indices) const =0
virtual const std::string & getKey(unsigned int) const =0
virtual unsigned int addMol(const ROMol &m)=0
Add a key to the database getting it from the molecule.
virtual unsigned int size() const =0
Get the current keeyholder size.
virtual unsigned int addKey(const std::string &)=0
MolBundle contains a collection of related ROMols.
Base class API for holding molecules to substructure search.
virtual unsigned int addMol(const ROMol &m)=0
virtual unsigned int size() const =0
Get the current library size.
virtual boost::shared_ptr< ROMol > getMol(unsigned int) const =0
Concrete class that holds molecules in memory.
unsigned int addMol(const ROMol &m) override
const std::vector< boost::shared_ptr< ROMol > > & getMols() const
unsigned int size() const override
Get the current library size.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
std::vector< boost::shared_ptr< ROMol > > & getMols()
PatternHolder(unsigned int numBits)
const unsigned int & getNumBits() const
unsigned int & getNumBits()
static unsigned int defaultNumBits()
ExplicitBitVect * makeFingerprint(const ROMol &m) const override
Caller owns the vector!
RWMol is a molecule class that is intended to be edited.
Substructure Search a library of molecules.
unsigned int countMatches(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Return the number of matches for the query.
unsigned int addMol(const ROMol &mol)
Add a molecule to the library.
std::vector< unsigned int > getMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
void initFromStream(std::istream &ss)
initializes from a stream pickle
KeyHolderBase & getKeys()
Get the underlying key holder implementation.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Returns the molecule at the given index.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
bool hasMatch(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Returns true if any match exists for the query.
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints, boost::shared_ptr< KeyHolderBase > keys)
unsigned int countMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Return the number of matches for the query.
std::vector< unsigned int > getMatches(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
std::vector< unsigned int > getMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
std::vector< unsigned int > getMatches(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const
Get the matching indices for the query.
const MolHolderBase & getMolecules() const
void initFromString(const std::string &text)
initializes from a string pickle
unsigned int countMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
const KeyHolderBase & getKeys() const
Get the underlying key holder implementation.
const boost::shared_ptr< KeyHolderBase > & getKeyHolder() const
Get the underlying molecule holder implementation.
bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
unsigned int countMatches(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
FPHolderBase & getFingerprints()
Get the underlying fingerprint implementation.
void setSearchOrder(const std::vector< unsigned int > &order)
does error checking
bool hasMatch(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
const FPHolderBase & getFingerprints() const
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< KeyHolderBase > keys)
bool hasMatch(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
unsigned int countMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
bool hasMatch(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints)
boost::shared_ptr< KeyHolderBase > & getKeyHolder()
Get the underlying molecule holder implementation.
bool hasMatch(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
const std::vector< unsigned int > & getSearchOrder() const
void resetHolders()
access required for serialization
unsigned int size() const
return the number of molecules in the library
std::vector< unsigned int > getMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const
Get the matching indices for the query between the given indices.
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules)
SubstructLibrary(const std::string &pickle)
std::string Serialize() const
returns a string with a serialized (pickled) representation
unsigned int countMatches(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
std::vector< unsigned int > getMatches(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
void toStream(std::ostream &ss) const
serializes (pickles) to a stream
boost::shared_ptr< ROMol > operator[](unsigned int idx)
Returns the molecule at the given index.
const boost::shared_ptr< FPHolderBase > & getFpHolder() const
Get the underlying molecule holder implementation.
const boost::shared_ptr< MolHolderBase > & getMolHolder() const
std::vector< unsigned int > & getSearchOrder()
ExplicitBitVect * makeFingerprint(const ROMol &m) const override
Caller owns the vector!
TautomerPatternHolder(unsigned int numBits)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
#define RDKIT_SUBSTRUCTLIBRARY_EXPORT
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams ¶ms)
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool tautomericFingerprint=false)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms)
returns canonical SMILES for a molecule
RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize()
bool recursionPossible
Allow recursive queries.
bool useQueryQueryMatches