#include #include "HTMLPage.h" pthread_mutex_t HTMLPage::m_hFirst_mutex = PTHREAD_MUTEX_INITIALIZER; FilterGroup HTMLPage::m_fgURLs; HTMLPage::HTMLPage(TIPsDatabase *_db, InternetURIRequest *_ir, const char *_body, const int _responseCode, const char *_ifModifiedSince, const bool _manageBuffer): InternetResource(_db, _ir, _body, _responseCode, _ifModifiedSince, 0, _manageBuffer) { //these will be cleared on destruction of the program (static) pthread_mutex_lock(&m_hFirst_mutex); if (!m_fgURLs.size()) { DEBUGPRINT("[%s]: HTMLPage static load of [fltsURLs]", DEBUG_LINE, m_ir->domain()->m_domain); m_fgURLs.addFilter(new Filter(58,10,"<[^>]+ href *= *['\"]?([^\"' <>]{1,1024})","\\1",0,"any href (a, image map, etc)",regex::NOCASE,false)); m_fgURLs.addFilter(new Filter(59,20,"document.location *= *['\"]([^\"' <>,)]{1,1024})","\\1",0,"javascript",regex::NOCASE,false)); m_fgURLs.addFilter(new Filter(60,30,"<[^>]+ src *= *['\"]?([^\"' <>]{1,1024})","\\1",0,"src (iframe, frame, link, etc)",regex::NOCASE,false)); m_fgURLs.addFilter(new Filter(68,40,"window.open\\( *['\"]?([^\"'<>, )]{1,1024})","\\1",0,"popups",regex::NOCASE,false)); m_fgURLs.addFilter(new Filter(70,50,"http://[^ \"'<>]{1,1024}","\\0",0,"absolute links",regex::NOCASE,false)); // } pthread_mutex_unlock(&m_hFirst_mutex); memoryDelta((int)sizeof(HTMLPage), this); //inform the domain which will, in turn, inform the Spider } HTMLPage::~HTMLPage() { memoryDelta(-(int)sizeof(HTMLPage), this); //inform the domain which will, in turn, inform the Spider } vector *HTMLPage::links() { return InternetResource::links(&m_fgURLs); }