10#ifndef GENERAL_FILE_READER_H
11#define GENERAL_FILE_READER_H
13#include <RDStreams/streams.h>
15#include <boost/algorithm/string.hpp>
26namespace GeneralMolSupplier {
46 "sdf",
"mae",
"maegz",
"sdfgz",
"smi",
"csv",
"txt",
"tsv",
"tdt"};
55 std::string& compressionFormat) {
60 if (boost::algorithm::iends_with(path,
".maegz")) {
62 compressionFormat =
"gz";
64 }
else if (boost::algorithm::iends_with(path,
".sdfgz")) {
66 compressionFormat =
"gz";
68 }
else if (boost::algorithm::iends_with(path,
".gz")) {
69 compressionFormat =
"gz";
70 basename = path.substr(0, path.size() - 3);
71 }
else if (boost::algorithm::iends_with(path,
".zst") ||
72 boost::algorithm::iends_with(path,
".bz2") ||
73 boost::algorithm::iends_with(path,
".7z")) {
75 "Unsupported compression extension (.zst, .bz2, .7z) given path: " +
79 compressionFormat =
"";
82 if (boost::algorithm::iends_with(basename,
"." + suffix)) {
88 "Unsupported structure or compression extension given path: " + path);
98std::unique_ptr<MolSupplier>
getSupplier(
const std::string& path,
100 std::string fileFormat =
"";
101 std::string compressionFormat =
"";
106 if (compressionFormat.empty()) {
107 strm =
new std::ifstream(path.c_str(), std::ios::in | std::ios::binary);
109#ifdef RDK_USE_BOOST_IOSTREAMS
110 strm =
new gzstream(path);
113 "compressed files are only supported if the RDKit is built with boost::iostreams support");
118 if (fileFormat ==
"sdf") {
119#ifdef RDK_BUILD_THREADSAFE_SSS
121 MultithreadedSDMolSupplier* sdsup =
new MultithreadedSDMolSupplier(
124 std::unique_ptr<MolSupplier> p(sdsup);
130 std::unique_ptr<MolSupplier> p(sdsup);
134 else if (fileFormat ==
"smi" || fileFormat ==
"csv" || fileFormat ==
"txt" ||
135 fileFormat ==
"tsv") {
136#ifdef RDK_BUILD_THREADSAFE_SSS
138 MultithreadedSmilesMolSupplier* smsup =
139 new MultithreadedSmilesMolSupplier(
142 std::unique_ptr<MolSupplier> p(smsup);
149 std::unique_ptr<MolSupplier> p(smsup);
152#ifdef RDK_BUILD_MAEPARSER_SUPPORT
153 else if (fileFormat ==
"mae") {
154 MaeMolSupplier* maesup =
156 std::unique_ptr<MolSupplier> p(maesup);
160 else if (fileFormat ==
"tdt") {
163 std::unique_ptr<MolSupplier> p(tdtsup);
used by various file parsing classes to indicate a bad file
lazy file parser for Smiles tables
lazy file parser for TDT files
const std::vector< std::string > supportedCompressionFormats
current supported compression formats
const std::vector< std::string > supportedFileFormats
current supported file formats
void determineFormat(const std::string path, std::string &fileFormat, std::string &compressionFormat)
std::unique_ptr< MolSupplier > getSupplier(const std::string &path, const struct SupplierOptions &opt)
unsigned int numWriterThreads