#ifndef _HTMLPAGE_H #define _HTMLPAGE_H class Domain; #include "InternetResource.h" #include "Domain.h" #include using namespace std; //Page is controlled by Domain. Spider plays with them but Page does not know about Spider class HTMLPage: public InternetResource { //sub-page links static pthread_mutex_t m_hFirst_mutex; //MUTEX to let only the first init the static vars static FilterGroup m_fgURLs; //for analysing the page for links //parameter parts for incomplete urls (protocol:\1 domain:\2 port:\3 href:\4 folder:\5 file:\6 query:\7) enum urlParts { whole = 0, protocol, domain, port, href, folder, file, query }; friend class Report_Full; friend class Report_DomainSummary; public: HTMLPage(TIPsDatabase *_db, InternetURIRequest *_ir, const char *_body, const int _responseCode = 0, const char *_ifModifiedSince = 0, const bool _manageBuffer = false); ~HTMLPage(); vector *links(); bool validDocument() {return m_responseCode == 200 && m_size > 400;} //if the document is valid for parsing }; #endif