#include
#include "HTMLPage.h"
pthread_mutex_t HTMLPage::m_hFirst_mutex = PTHREAD_MUTEX_INITIALIZER;
FilterGroup HTMLPage::m_fgURLs;
HTMLPage::HTMLPage(TIPsDatabase *_db, InternetURIRequest *_ir, const char *_body, const int _responseCode, const char *_ifModifiedSince, const bool _manageBuffer):
InternetResource(_db, _ir, _body, _responseCode, _ifModifiedSince, 0, _manageBuffer)
{
//these will be cleared on destruction of the program (static)
pthread_mutex_lock(&m_hFirst_mutex);
if (!m_fgURLs.size()) {
DEBUGPRINT("[%s]: HTMLPage static load of [fltsURLs]", DEBUG_LINE, m_ir->domain()->m_domain);
m_fgURLs.addFilter(new Filter(58,10,"<[^>]+ href *= *['\"]?([^\"' <>]{1,1024})","\\1",0,"any href (a, image map, etc)",regex::NOCASE,false));
m_fgURLs.addFilter(new Filter(59,20,"document.location *= *['\"]([^\"' <>,)]{1,1024})","\\1",0,"javascript",regex::NOCASE,false));
m_fgURLs.addFilter(new Filter(60,30,"<[^>]+ src *= *['\"]?([^\"' <>]{1,1024})","\\1",0,"src (iframe, frame, link, etc)",regex::NOCASE,false));
m_fgURLs.addFilter(new Filter(68,40,"window.open\\( *['\"]?([^\"'<>, )]{1,1024})","\\1",0,"popups",regex::NOCASE,false));
m_fgURLs.addFilter(new Filter(70,50,"http://[^ \"'<>]{1,1024}","\\0",0,"absolute links",regex::NOCASE,false));
//
}
pthread_mutex_unlock(&m_hFirst_mutex);
memoryDelta((int)sizeof(HTMLPage), this); //inform the domain which will, in turn, inform the Spider
}
HTMLPage::~HTMLPage() {
memoryDelta(-(int)sizeof(HTMLPage), this); //inform the domain which will, in turn, inform the Spider
}
vector *HTMLPage::links() {
return InternetResource::links(&m_fgURLs);
}