#ifndef _HTMLPAGE_H
#define _HTMLPAGE_H
class Domain;
#include "InternetResource.h"
#include "Domain.h"
#include
using namespace std;
//Page is controlled by Domain. Spider plays with them but Page does not know about Spider
class HTMLPage: public InternetResource { //sub-page links
static pthread_mutex_t m_hFirst_mutex; //MUTEX to let only the first init the static vars
static FilterGroup m_fgURLs; //for analysing the page for links
//parameter parts for incomplete urls (protocol:\1 domain:\2 port:\3 href:\4 folder:\5 file:\6 query:\7)
enum urlParts {
whole = 0,
protocol,
domain,
port,
href,
folder,
file,
query
};
friend class Report_Full;
friend class Report_DomainSummary;
public:
HTMLPage(TIPsDatabase *_db, InternetURIRequest *_ir, const char *_body, const int _responseCode = 0, const char *_ifModifiedSince = 0, const bool _manageBuffer = false);
~HTMLPage();
vector *links();
bool validDocument() {return m_responseCode == 200 && m_size > 400;} //if the document is valid for parsing
};
#endif