#ifndef _TIPSDATABASE_H #define _TIPSDATABASE_H #include "Database.h" #include "FilterGroup.h" #include "Domain.h" #include "DBObjects.h" #include "Streams.h" #include "Parsers.h" class Domain; //field MACROS for loadFilters(...) const; #define DBF_FILTERS_FILTERID 0, (int)0 #define DBF_FILTERS_RUNORDER 1, (int)0 #define DBF_FILTERS_REGEX 2, (char*)0 #define DBF_FILTERS_REGEXCEPT 3, (char*)0 #define DBF_FILTERS_REPLACEMENT 4, (char*)0 #define DBF_FILTERS_DESC 5, (char*)0 #define DBF_FILTERS_FLAGS 6, (int)0 #define DBF_FILTERS_MULTIPASS 7, (bool)0 //DBObject types for the loadEntities(StringMapCI *entities, char **sError) const; #define DBF_OBJECT_NOT 0 #define DBF_OBJECT_COMPANY 1 #define DBF_OBJECT_COUNTRY 2 #define DBF_OBJECT_PERSON 3 #define DBF_OBJECT_PRODUCT 4 #define DBF_OBJECT_PRODUCTTYPE 5 //Parser area types #define DBF_PAGEGROUP_TYPE_COMPANIES DBF_OBJECT_COMPANY #define DBF_PAGEGROUP_TYPE_COUNTRIES DBF_OBJECT_COUNTRY #define DBF_PAGEGROUP_TYPE_PEOPLE DBF_OBJECT_PERSON #define DBF_PAGEGROUP_TYPE_PRODUCTS DBF_OBJECT_PRODUCT #define DBF_PAGEGROUP_TYPE_PRODUCTTYPES DBF_OBJECT_PRODUCTTYPE #define DBF_PAGEGROUP_TYPE_NEWS 1001 #define DBF_PAGEGROUP_TYPE_BLOG 1002 //field MACROS for getExternalSources(const char *sql, vector *domains) const; #define DBF_DOMAINS_DOMAINID 0, (int)0 #define DBF_DOMAINS_DOMAIN 1, (char*)0 //internet domain name #define DBF_DOMAINS_STARTPAGE 2, (char*)0 //where the Spider should start browsing #define DBF_DOMAINS_AREA 3, (char*)0 //area of the website to browse #define DBF_DOMAINS_ROOTPAGETITLE 4, (char*)0 // of the root page #define DBF_DOMAINS_EXTERNALSOURCETYPEID 5, (int)0 //website //page groups part #define DBF_DOMAINS_PAGEGROUPID 0, (int)0 #define DBF_DOMAINS_PAGEGROUPFILTER 1, (char*)0 #define DBF_DOMAINS_EXTERNALSOURCEID 2, (int)0 #define DBF_DOMAINS_CLEANERFILTER_HEADER 3, (char*)0 #define DBF_DOMAINS_SECTIONFILTER 4, (char*)0 #define DBF_DOMAINS_OUTPUTFILTER 5, (char*)0 #define DBF_DOMAINS_ENABLE 6, (bool)0 #define DBF_DOMAINS_DESCRIPTION 7, (char*)0 #define DBF_DOMAINS_PARSERTYPE 8, (int)0 //HTMLArticleParser, PDFParser, etc. (see below) #define DBF_DOMAINS_HEADINGFILTER_LEVEL1 9, (char*)0 #define DBF_DOMAINS_HEADINGFILTER_LEVEL2 10, (char*)0 #define DBF_DOMAINS_EXCEPTIONSREGXX 11, (char*)0 #define DBF_DOMAINS_TYPEID 12, (int)0 //magazine articles, news, press, campaign, mixed, other, publications #define DBF_DOMAINS_CLEANERFILTER_FOOTER 13, (char*)0 #define DBF_DOMAINS_CLEANERFILTER_OTHER 14, (char*)0 //Parser type ids #define DBF_PARSERTYPE_HTMLGENERALPARSER 0 #define DBF_PARSERTYPE_HTMLARTICLEPARSER 1 #define DBF_PARSERTYPE_PDFARTICLEPARSER 2 #define DBF_PARSERTYPE_HTMLOBJECTPARSER 3 //Parser settings #define DBF_PARSERSETTING_PAGEGROUPID 0, (int)0 #define DBF_PARSERSETTING_REGEX 1, (char*)0 #define DBF_PARSERSETTING_PROPERTYID 2, (int)0 #define DBF_PARSERSETTING_TYPE 3, (int)0 #define DBF_PARSERSETTING_REQUIRED 4, (bool)0 //field MACROS for loadEntities(StringMapCI<unsigned int> *entities, char **sError) const; #define DBF_QUALIFIERSET_ENTITYID 0, (int)0 #define DBF_QUALIFIERSET_TYPE 1, (int)0 #define DBF_QUALIFIERSET_NAME 2, (char*)0 #define DBF_QUALIFIERSET_COMMONWORD 3, (bool)0 //MD5s #define DBF_MD5 0, (char*)0 class TIPsDatabase: private Character { static Database *m_db; static const char *m_feederid; //using a string because we send back a string in requests const char *registerFeeder(char **sError = 0) const; int loadFilter(Filter **f, const char *procedure) const; int loadFilter(Filter **f, RecordSet *recordset) const; int loadFilters(FilterGroup *fg, const char *procedure, const FilterGroup::enmConflictMode iConflictMode=FilterGroup::conflict_includeAll) const; int loadFilters(FilterGroup *fg, RecordSet *recordset, const FilterGroup::enmConflictMode iConflictMode=FilterGroup::conflict_includeAll) const; public: TIPsDatabase(Database *_db); ~TIPsDatabase(); int loadExternalSources(const char *sql, vector<Domain*> *domains, char **sError = 0); int loadEntities(StringMultiMapCI<DBEntity*> *entities, char **sError = 0); int loadMD5s(StringMap<char> *md5s, char **sError = 0); int loadTagFilters(FilterGroup *fg) const; int loadHTMLObjectParserSettings(const int pagegroupid, FilterGroup *settings, char **sError = 0) const; int save(Summary *object, char **sError = 0); int save(Article *object, char **sError = 0); int save(Company *object, char **sError = 0); int save(Product *object, char **sError = 0); int save(Country *object, char **sError = 0); int save(ProductType *object, char **sError = 0); int save(Person *object, char **sError = 0); //utilities static size_t multiValueToVector(const char *multivalue, vector<int> *v); static size_t multiValueToVector(const char *multivalue, vector<unsigned int> *v); static size_t multiValueToVector(const char *multivalue, vector<const char*> *v); static const char *vectorToMultiValue(const vector<unsigned int> *v); static const char *vectorToMultiValue(const vector<int> *v); static const char *vectorToMultiValue(const vector<const char*> *v); static const char *vectorToMultiValues(const vector<const char*> *v, const char delimiter = ' ', const bool appendArraySizer = true); static const char *delimitedToMultiValue(const char *value, const char delimiter = ','); static const char *commaspaceDelimitedToMultiValue(const char *value); static const char *commaDelimitedToMultiValue(const char *value) {return delimitedToMultiValue(value, ',');} static const char *spaceDelimitedToMultiValue(const char *value) {return delimitedToMultiValue(value, ' ');} //exceptions class FilterNotFound {}; class NoObjectType {}; //type = 0 class UnknownObjectType { //type not registered const int m_type; public: UnknownObjectType(const int _type): m_type(_type) {} }; class UnknownParserType { const int m_type; public: UnknownParserType(const int _type): m_type(_type) {} }; }; #endif