#ifndef _INTERNETRESOURCE_H #define _INTERNETRESOURCE_H class Domain; #include "ContentType.h" #include "FilterGroup.h" #include "Domain.h" #include "ResourceUseEventSink.h" #include "InternetURIRequest.h" #include "Parsers.h" //#include SEE BELOW FOR MORE includes (all derived classes) #include using namespace std; //InternetResource is controlled by Domain. Spider plays with them but InternetResource does not know about Spider class InternetResource: public ResourceMonitor { //sub-InternetResource links protected: static TIPsDatabase *m_db; StringMap m_parameters; InternetURIRequest *m_ir; const char *m_ifModifiedSince; const char *m_body; //accessed with setBody and freeBody (which also alter status) int m_responseCode; //in case the indexer wants to ignore it static unsigned int m_count; const unsigned int m_id; unsigned int m_checkDigit; size_t m_size; const bool m_manageBuffer; InternetResource(TIPsDatabase *_db, InternetURIRequest *_ir, const char *_body, const int _responseCode, const char *_ifModifiedSince, unsigned int _checkDigit = 0, const bool _manageBuffer = false); static pthread_mutex_t m_hFirst_mutex; //MUTEX to let only the first init the static vars vector *links(FilterGroup *fg); //caller controls links vector friend class Report_Full; friend class Report_DomainSummary; public: //virtual destructors get wutomatically called when a derived class is destroyed virtual ~InternetResource(); //just in case it is overridden to release mem //accessors const char *domain() const; const char *absoluteURL() const; InternetURIRequest *internetURIRequest() const {return m_ir;} void freeBody(); //leaves the size in-tact const char *body() const {return m_body;} //const member initialised at construction from m_body size_t size() const {return m_size;} //const member initialised at construction from m_body virtual vector *links() {return 0;} virtual const size_t parse(vector *objects); virtual bool validDocument() {return false;} //if the document is valid for parsing static FilterGroup m_fgConc; //for rationalising links. Used by other classes //checkdigit functions (for infinite reccursion and similar/moving documents) const bool equals(const InternetResource *other) const {return m_checkDigit && other && m_checkDigit == other->m_checkDigit;} const bool operator==(const InternetResource *other) const {return equals(other);} const bool operator!=(const InternetResource *other) const {return !(this == other);} //only a debug function virtual const int writeToFile() const; }; //derived classes included here to allow Clients to access all with one include //requires complete InternetResource {} class definition #include "HTMLPage.h" #include "JavaScript.h" #include "XMLPage.h" #include "PDF.h" #include "Word.h" #include "CSS.h" #endif