//DB connection(s) //dev DHCPPC1 direct connect (LIVEDB = false) #define PG_DBSERVER "dhcppc1" #define PG_DBNAME "xsearchs_tips" #define PG_DBUSR "xsearchs_feeder" #define PG_DBPWD "queenpool1" //test DHCPPC1 XMLHTTPDB setup (LIVEDB = true) /* #define XH_DBSERVER "xsearch.daoconsumer.com" #define XH_DBNAME "serverCommunication/Request.php" #define XH_DBUSR "xsearchs_feeder" #define XH_DBPWD "queenpool1" */ //Live DB connect #define XH_DBSERVER "xsearchservices.com" #define XH_DBNAME "serverCommunication/Request.php" #define XH_DBUSR "xsearchs_feeder" #define XH_DBPWD "queenpool1" //initial domains //#define SPIDERMANAGER_DOMAINS " where domain='www.newint.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.babymilkaction.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.soilassociation.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.wdm.org.uk'" //#define SPIDERMANAGER_DOMAINS " where domain='www.coopamerica.org'" //closeConnection=1 //#define SPIDERMANAGER_DOMAINS " where domain='www.corpwatch.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.cio.com'" //#define SPIDERMANAGER_DOMAINS " where domain='www.schnews.org.uk'" //#define SPIDERMANAGER_DOMAINS " where domain='dte.gn.apc.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.babymilkaction.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.waterjustice.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.ciwf.org.uk'" //#define SPIDERMANAGER_DOMAINS " where domain='www.buav.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.guardian.co.uk'" //#define SPIDERMANAGER_DOMAINS " where domain='www.ibfan.org'" //#define SPIDERMANAGER_DOMAINS " where domain in('www.buav.org', 'www.guardian.co.uk', 'dte.gn.apc.org')" //#define SPIDERMANAGER_DOMAINS " where domain='www.waterjustice.org'" //#define SPIDERMANAGER_DOMAINS " where domain='www.xsearchservices.dev'" //#define SPIDERMANAGER_DOMAINS " limit 10" //------------------------- special Object domains //#define SPIDERMANAGER_DOMAINS " where domain='www.coopamerica.org'" //companies //#define SPIDERMANAGER_DOMAINS " where domain='www.whosaliveandwhosdead.com'" //people //#define SPIDERMANAGER_DOMAINS " where domain='www.reviewcentre.com'" //products (company-productname) //#define SPIDERMANAGER_DOMAINS " where domain='www.epeat.net'" //products (company, productname) //#define SPIDERMANAGER_DOMAINS " where domain='www.allproducts.com'" //products (company, productname) //#define SPIDERMANAGER_DOMAINS " where domain='www.business-humanrights.org'" //company (and extra usefull domains) #ifndef SPIDERMANAGER_DOMAINS //platform specific //limit to domains that have some sort of parser defined for them #ifdef _WIN32 //artificial FD_SETSIZE limit from windows #define SPIDERMANAGER_DOMAINS " where exists (select * from txpagegroup xpg where xpg.externalsourceid = pexternalsources.externalsourceid and xpg.enable=true) order by externalsourceid limit 60" #else #define SPIDERMANAGER_DOMAINS " where exists (select * from txpagegroup xpg where xpg.externalsourceid = pexternalsources.externalsourceid and xpg.enable=true) order by externalsourceid" #endif #endif //misc //#define ENTITYLOAD "limit 10" #define ENTITYLOAD "" #define FEEDER_NAME "DirtSpider" #define FEEDER_VERSIONNAME "alpha" #define FEEDER_MAJORVERSION "0" #define FEEDER_MINORVERSION "9" #define FEEDER_LIVEBUILD "4" //#define TESTS //play code at start in main.cpp //#define LIVEDB //connect to the live system //#define GOOGLECOUNT //calc google count @startup #define MIN_MEMORY 100 #define MAX_OBJECTS 10000 #define SPIDER_MAXBODYBUFFER 60 //Kb #define SPIDER_THREADPRIORITY 3 #define DOMAIN_DOMAINSAVEDELTA 100 //save domain every x pages (in case of crash) #define SM_DBTHREADPRIORITY 15 #define XMLHTTPDB_MAXBODYBUFFER 10000 //Kb #define SPIDER_MAXCONNFAILS 3 #define SPIDER_THREADSTACKSIZE 16*1024*1024 //bytes #define DC_TIMEOUT 120 //seconds 120 = 2 minutes #define PROTOCOL_HIDELOSTCONNS //hide live connection drops #define HTMLPAGE_NEWLINKS 32 #define HTMLPAGE_LINESPERKB 20 //lines for the line density analysis / kb #define SPIDER_IPR_QUERYCOUNT 20 //maximum query string variables #define SPIDER_IPR_PARENTSEARCH 3 //parent levels to search for a matching checkdigit #define GRETA_MATCHMODE MODE_DEFAULT //MODE_DEFAULT, MODE_SAFE, MODE_FAST #define REPORT_PORT 10436