/************************************************************************** THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. Copyright 1997 Microsoft Corporation. All Rights Reserved. **************************************************************************/ /************************************************************************** File: ExplrBar.h Description: CExplorerBar definitions. **************************************************************************/ #include #include #include "Globals.h" #include "debug.h" #ifndef EXPLORERBAR_H #define EXPLORERBAR_H #include "mshtml.h" #include "mshtmhst.h" #include "exdisp.h" #include "DispEx.h" class CExplorerBar; class CWebBrowserEventSink; class CDocHostUIHandler; #include "StringMap.h" #include "WebBrowserEventSink.h" #include "DocHostUIHandler.h" #include using namespace std; #define EB_CLASS_NAME (TEXT("ExplorerBarSampleClass")) #define MIN_SIZE_X 10 #define MIN_SIZE_Y 10 typedef short unsigned int PERCENTAGE; #define _STRNCASECMP(s1,s2,n) strnicmp(s1,s2,n) #define _STRCASECMP(s1,s2) stricmp(s1,s2) #define _STRCMP(s1,s2) strcmp(s1,s2) #define _STRNCMP(s1,s2,n) strncmp(s1,s2,n) #define _STRDUP(a) _strdup(a) #define _STRNCPY(sDest, sSource, iChars) strncpy_s(sDest,iChars+1,sSource,iChars); #define _SNPRINTF(dest, size, format, ...) _snprintf_s(dest, size, size-1, format, __VA_ARGS__) #define _FOPEN(pFile,Name,Mode) fopen_s(&pFile,Name,Mode) //fopen depreciated for fopen_s #define strdupCheck(a) strdup(a) #define mallocCheck(a) malloc(a) #define DEBUGERROR0(a) MessageBox(0, a, "debug", 0) /************************************************************************** CExplorerBar class definition **************************************************************************/ class CExplorerBar : public IDeskBand, public IInputObject, //------- connection to parent client WebBrowser //light version of IOleObject: only GetSite, SetSite for container of this public IObjectWithSite, //------- hosting of child WebBrowser server //we are the Site of the WebBrowser (reverse of IOleObject/IObjectWithSite) public IOleClientSite, public IOleInPlaceSite, //GetWindowContext public IOleInPlaceFrame, //(IOleInPlaceUIWindow) GetWindow, GetBorder for embeded WebBrowser public IDocHostUIHandler, //window.external events from the hosted web browser //------- other //save to disk stuff public IStorage, public IPersistStream //save to disk stuff { //---------------------------------------------------- HTML knowledge classes #define NORM false //used for not PI and not HEADING //processingInstruction #define PI true //jumpTag #define JUMP true #define PARSE false //inlineTag #define INLINE true #define BLOCK false //cssTag #define CSS true #define NCSS false //headingTag #define HEADING true //using a Tag structure because of all the various useful properties we can give to a tag //that instruct the code how to handle them class Tag { static StringMapCI m_tags; const char *m_text; //e.g. "div" const bool m_processingInstruction; //PI or NORM (comments are ignored) const bool m_jumpTag; //JUMP or PARSE (jump over these blocks in the HTML) const bool m_inlineTag; //INLINE or BLOCK (affect the markup areas) const bool m_cssTag; //CSS or NCSS (include the tag in the CSS level information) const bool m_headingTag; const char *m_endtag; // If 0 then indicates that it can be generated from the tag name by this constructor. comment is n exception (-->) char *m_dynamicEndTag; //dynamic endtag if a 0 is sent through for the endtag (managed by the class) public: Tag(const char *_text, bool _processingInstruction = NORM, bool _jumpTag = PARSE, bool _inlineTag = INLINE, bool _cssTag = CSS, bool _headingTag = NORM, const char *_endtag = 0); ~Tag(); //special operators used primarily for comparing heading levels h1 > h6 const bool operator== (Tag *t) {return this == t;} //compare address as these are singletons const bool operator!= (Tag *t) {return !(this->operator==(t));} const bool operator> (Tag *t) {return !t || strcmp(m_text, t->m_text) < 0;} //for example: tag_h1 > &tag_h6 is strcmp("h1", "h6") < 0 is true (1<6) const bool operator< (Tag *t) {return !(this->operator>(t) || this->operator==(t));} const bool operator>= (Tag *t) {return (this->operator>(t) || this->operator==(t));} const bool operator<= (Tag *t) {return (this->operator<(t) || this->operator==(t));} //accessors const char *text() {return m_text;} const char *endtag() {return (m_endtag ? m_endtag : m_dynamicEndTag);} const char processingInstruction() {return m_processingInstruction;} const char jumpTag() {return m_jumpTag;} const char inlineTag() {return m_inlineTag;} const char cssTag() {return m_cssTag;} const char headingTag() {return m_headingTag;} //lookup static Tag *tag(const char *text); static Tag *tag(const char *textstart, const char *textfinish); }; static Tag //extra (included in maps) tag_comment, tag_DOCTYPE, tag_u, tag_s, tag_strike, //http://www.htmldog.com/reference/htmltags/ tag_a, tag_abbr, tag_acronym, tag_address, tag_area, tag_b, tag_base, tag_bdo, tag_big, tag_blink, tag_blockquote, tag_body, tag_br, tag_button, tag_caption, tag_cite, tag_code, tag_col, tag_colgroup, tag_dd, tag_del, tag_dfn, tag_div, tag_dl, tag_dt, tag_em, tag_fieldset, tag_font, tag_form, tag_h1, tag_h2, tag_h3, tag_h4, tag_h5, tag_h6, tag_head, tag_html, tag_hr, tag_i, tag_img, tag_input, tag_ins, tag_kbd, tag_label, tag_legend, tag_li, tag_link, tag_map, tag_meta, tag_noscript, tag_object, tag_ol, tag_optgroup, tag_option, tag_p, tag_param, tag_pre, tag_q, tag_samp, tag_script, tag_select, tag_small, tag_span, tag_strong, tag_style, tag_sub, tag_sup, tag_table, tag_tbody, tag_td, tag_textarea, tag_tfoot, tag_th, tag_thead, tag_title, tag_tr, tag_tt, tag_ul, tag_var; //http://htmlhelp.com/reference/html40/entities/ enum entity { //in order of appearance in tables unknownEntity = 0, //special chars entity_quot, entity_amp, entity_lt, entity_gt, entity_OElig, entity_oelig, entity_Scaron, entity_scaron, entity_Yuml, entity_circ, entity_tilde, entity_ensp, entity_emsp, entity_thinsp, entity_zwnj, entity_zwj, entity_lrm, entity_rlm, entity_ndash, entity_mdash, entity_lsquo, entity_rsquo, entity_sbquo, entity_ldquo, entity_rdquo, entity_bdquo, entity_dagger, entity_Dagger, entity_permil, entity_lsaquo, entity_rsaquo, entity_euro, //Latin-1 Entities entity_nbsp, entity_iexcl, entity_cent, entity_pound, entity_curren, entity_yen, entity_brvbar, entity_sect, entity_uml, entity_copy, entity_ordf, entity_laquo, entity_not, entity_shy, entity_reg, entity_macr, entity_deg, entity_plusmn, entity_sup2, entity_sup3, entity_acute, entity_micro, entity_para, entity_middot, entity_cedil, entity_sup1, entity_ordm, entity_raquo, entity_frac14, entity_frac12, entity_frac34, entity_iquest, entity_Agrave, entity_Aacute, entity_Acirc, entity_Atilde, entity_Auml, entity_Aring, entity_AElig, entity_Ccedil, entity_Egrave, entity_Eacute, entity_Ecirc, entity_Euml, entity_Igrave, entity_Iacute, entity_Icirc, entity_Iuml, entity_ETH, entity_Ntilde, entity_Ograve, entity_Oacute, entity_Ocirc, entity_Otilde, entity_Ouml, entity_times, entity_Oslash, entity_Ugrave, entity_Uacute, entity_Ucirc, entity_Uuml, entity_Yacute, entity_THORN, entity_szlig, entity_agrave, entity_aacute, entity_acirc, entity_atilde, entity_auml, entity_aring, entity_aelig, entity_ccedil, entity_egrave, entity_eacute, entity_ecirc, entity_euml, entity_igrave, entity_iacute, entity_icirc, entity_iuml, entity_eth, entity_ntilde, entity_ograve, entity_oacute, entity_ocirc, entity_otilde, entity_ouml, entity_divide, entity_oslash, entity_ugrave, entity_uacute, entity_ucirc, entity_uuml, entity_yacute, entity_thorn, entity_yuml, //Symbols and Greek Letters entity_fnof, entity_Alpha, entity_Beta, entity_Gamma, entity_Delta, entity_Epsilon, entity_Zeta, entity_Eta, entity_Theta, entity_Iota, entity_Kappa, entity_Lambda, entity_Mu, entity_Nu, entity_Xi, entity_Omicron, entity_Pi, entity_Rho, entity_Sigma, entity_Tau, entity_Upsilon, entity_Phi, entity_Chi, entity_Psi, entity_Omega, entity_alpha, entity_beta, entity_gamma, entity_delta, entity_epsilon, entity_zeta, entity_eta, entity_theta, entity_iota, entity_kappa, entity_lambda, entity_mu, entity_nu, entity_xi, entity_omicron, entity_pi, entity_rho, entity_sigmaf, entity_sigma, entity_tau, entity_upsilon, entity_phi, entity_chi, entity_psi, entity_omega, entity_thetasym, entity_upsih, entity_piv, entity_bull, entity_hellip, entity_prime, entity_Prime, entity_oline, entity_frasl, entity_weierp, entity_image, entity_real, entity_trade, entity_alefsym, entity_larr, entity_uarr, entity_rarr, entity_darr, entity_harr, entity_crarr, entity_lArr, entity_uArr, entity_rArr, entity_dArr, entity_hArr, entity_forall, entity_part, entity_exist, entity_empty, entity_nabla, entity_isin, entity_notin, entity_ni, entity_prod, entity_sum, entity_minus, entity_lowast, entity_radic, entity_prop, entity_infin, entity_ang, entity_and, entity_or, entity_cap, entity_cup, entity_int, entity_there4, entity_sim, entity_cong, entity_asymp, entity_ne, entity_equiv, entity_le, entity_ge, entity_sub, entity_sup, entity_nsub, entity_sube, entity_supe, entity_oplus, entity_otimes, entity_perp, entity_sdot, entity_lceil, entity_rceil, entity_lfloor, entity_rfloor, entity_lang, entity_rang, entity_loz, entity_spades, entity_clubs, entity_hearts, entity_diams }; enum attribute { unknownAttribute = 0, attribute_id, attribute_class, attribute_style, attribute_alt, attribute_onclick, attribute_href, attribute_src, attribute_content //in META tags }; //---------------------------------------------------- text chunk analysis classes enum chunktype { chunk_none = 0, chunk_text, chunk_image, chunk_alt, chunk_content }; struct cssinstance { Tag *t; //these are all pointers into the document //the strings are not copied or zero terminated //their start and finishes are simply remembered (8 bytes per string) const char *idStart; const char *idFinish; const char *classnameStart; const char *classnameFinish; const char *styleStart; const char *styleFinish; }; //a textchunk has NO HTML markup. Just text. struct textchunk { chunktype type; //text/image src/img alt/meta content const char *start; //references in source document const char *finish; //not zero terminated size_t textLength; size_t bytes; const char *hrefStart; //start finish pair: the string is not copied, just referenced in the document const char *hrefFinish; //not zero terminated unsigned int wordCount; unsigned int alphanumerics; unsigned int sentences; Tag *headingTag; //h1 - 6 or font-br, b-br combos PERCENTAGE textDensity; unsigned int wordDensity; PERCENTAGE a1Density; size_t markupAreaID; //the associated continuous in-line markup area size_t depth; //tag depth vector csspath; }; struct markuparea { const char *start; const char *finish; PERCENTAGE maxA1Density; unsigned int wordCount; unsigned int sentences; Tag *ender; //the tag type that caused the area to end size_t firstchunk; //the index of the first text chunk size_t lastchunk; //the index of the last text chunk }; struct selectedzone { size_t firstMUA; //the index of the first markup area size_t lastMUA; //the index of the last markup area unsigned int wordCount; bool hasLinkedHeadings; bool hasMoreLink; bool hasImages; }; //content analysis maps static StringMapCI m_entities; //entity lookup static vector m_entity_chars; //entity -> chars (replacement char(s)) static StringMapCI m_attributes; //attribute lookup //content analysis functions void initMaps(); const char * parse(const char *body, size_t size = 0) const; const size_t calcAggregates(vector *chunks, vector *markupareas, unsigned int &totalWordCount, unsigned int &totalChunksSize, unsigned int &totalAlphaNumerics, unsigned int &totalBytesSize, unsigned int &maxDepth, Tag **maxHeadingTag, unsigned int &numHeadings, PERCENTAGE &avgTextHTMLDensity, PERCENTAGE &avgAlphaNumeric, unsigned int &avgWordDensity) const; const size_t selectZones(vector *chunks, vector *markupareas, vector *selectedzones) const; const char *writeNewBody(vector *chunks, vector *markupareas, vector *selectedzones, const char *body, const size_t bodysize) const; const size_t getTextChunks(const char *body, size_t bodysize, vector *chunks, vector *markupareas) const; const size_t generateCodeString(vector *chunks, vector *markupareas, char **rcodestring) const; Tag *gettag( const char *tagstart) const; entity getentityname( const char *entityStart) const; attribute getattributename( const char *attributeStart) const; size_t numentities() const {return m_entities.size();} protected: DWORD m_ObjRefCount; public: CExplorerBar(); ~CExplorerBar(); //IUnknown methods STDMETHODIMP QueryInterface(REFIID, LPVOID*); STDMETHODIMP_(DWORD) AddRef(); STDMETHODIMP_(DWORD) Release(); //IOleWindow methods STDMETHOD (GetWindow) (HWND*); STDMETHOD (ContextSensitiveHelp) (BOOL); //IDockingWindow methods (Windows 95 (Explorer 4.0), 98, NT) STDMETHOD (ShowDW) (BOOL fShow); STDMETHOD (CloseDW) (DWORD dwReserved); STDMETHOD (ResizeBorderDW) (LPCRECT prcBorder, IUnknown* punkToolbarSite, BOOL fReserved); //IDeskBand methods (Windows 95 (Explorer 4.0), 98, NT) STDMETHOD (GetBandInfo) (DWORD, DWORD, DESKBANDINFO*); //IInputObject methods (Windows 95 (Explorer 4.0), 98, NT) STDMETHOD (UIActivateIO) (BOOL, LPMSG); STDMETHOD (HasFocusIO) (void); STDMETHOD (TranslateAcceleratorIO) (LPMSG); //IObjectWithSite methods (Windows NT, >95) STDMETHOD (SetSite) (IUnknown*); STDMETHOD (GetSite) (REFIID, LPVOID*); //IPersistStream methods (Windows NT >3.1, >95) STDMETHOD (GetClassID) (LPCLSID); STDMETHOD (IsDirty) (void); STDMETHOD (Load) (LPSTREAM); STDMETHOD (Save) (LPSTREAM, BOOL); STDMETHOD (GetSizeMax) (ULARGE_INTEGER*); //IOleInPlaceUIWindow (+IOleWindow) (Windows NT >3.1, >95) STDMETHOD (GetBorder) (LPRECT lprectBorder); STDMETHOD (RequestBorderSpace) (LPCBORDERWIDTHS pborderwidths); STDMETHOD (SetActiveObject) (IOleInPlaceActiveObject *pActiveObject, LPCOLESTR pszObjName); STDMETHOD (SetBorderSpace) (LPCBORDERWIDTHS pborderwidths); //IOleClientSite (Windows NT >3.1, >95) STDMETHOD (SaveObject) (); STDMETHOD (GetMoniker) (DWORD dwAssign, DWORD dwWhichMoniker, IMoniker ** ppmk); STDMETHOD (GetContainer) (LPOLECONTAINER FAR* ppContainer); STDMETHOD (ShowObject) (); STDMETHOD (OnShowWindow) (BOOL fShow); STDMETHOD (RequestNewObjectLayout) (); //IOleInPlaceSite, //GetWindowContext (Windows NT >3.1, >95) STDMETHOD (CanInPlaceActivate) (); STDMETHOD (OnInPlaceActivate) (); STDMETHOD (OnUIActivate) (); STDMETHOD (GetWindowContext) (LPOLEINPLACEFRAME FAR* lplpFrame,LPOLEINPLACEUIWINDOW FAR* lplpDoc,LPRECT lprcPosRect,LPRECT lprcClipRect,LPOLEINPLACEFRAMEINFO lpFrameInfo); STDMETHOD (Scroll) (SIZE scrollExtent); STDMETHOD (OnUIDeactivate) (BOOL fUndoable); STDMETHOD (OnInPlaceDeactivate) (); STDMETHOD (DiscardUndoState) (); STDMETHOD (DeactivateAndUndo) (); STDMETHOD (OnPosRectChange) (LPCRECT lprcPosRect); //IStorage (Windows everything) STDMETHOD (CreateStream) (const WCHAR *pwcsName, DWORD grfMode, DWORD reserved1, DWORD reserved2, IStream **ppstm); STDMETHOD (OpenStream) (const WCHAR * pwcsName, void *reserved1, DWORD grfMode, DWORD reserved2, IStream **ppstm); STDMETHOD (CreateStorage) (const WCHAR *pwcsName, DWORD grfMode, DWORD reserved1, DWORD reserved2, IStorage **ppstg); STDMETHOD (OpenStorage) (const WCHAR * pwcsName, IStorage * pstgPriority, DWORD grfMode, SNB snbExclude, DWORD reserved, IStorage **ppstg); STDMETHOD (CopyTo) (DWORD ciidExclude, IID const *rgiidExclude, SNB snbExclude,IStorage *pstgDest); STDMETHOD (MoveElementTo) (const OLECHAR *pwcsName,IStorage * pstgDest, const OLECHAR *pwcsNewName, DWORD grfFlags); STDMETHOD (Commit) (DWORD grfCommitFlags); STDMETHOD (Revert) (); STDMETHOD (EnumElements) (DWORD reserved1, void * reserved2, DWORD reserved3, IEnumSTATSTG ** ppenum); STDMETHOD (DestroyElement) (const OLECHAR *pwcsName); STDMETHOD (RenameElement) (const WCHAR *pwcsOldName, const WCHAR *pwcsNewName); STDMETHOD (SetElementTimes) (const WCHAR *pwcsName, FILETIME const *pctime, FILETIME const *patime, FILETIME const *pmtime); STDMETHOD (SetClass) (REFCLSID clsid); STDMETHOD (SetStateBits) (DWORD grfStateBits, DWORD grfMask); STDMETHOD (Stat) (STATSTG * pstatstg, DWORD grfStatFlag); //IOleInPlaceFrame (Windows NT >3.1, >95) STDMETHOD (InsertMenus) (HMENU hmenuShared, LPOLEMENUGROUPWIDTHS lpMenuWidths); STDMETHOD (SetMenu) (HMENU hmenuShared, HOLEMENU holemenu, HWND hwndActiveObject); STDMETHOD (RemoveMenus) (HMENU hmenuShared); STDMETHOD (SetStatusText) (LPCOLESTR pszStatusText); STDMETHOD (EnableModeless) (BOOL fEnable); STDMETHOD (TranslateAccelerator) (LPMSG lpmsg, WORD wID); //IDocHostUIHandler (Windows 95, NT 4.0) STDMETHOD (FilterDataObject) (IDataObject *pDO, IDataObject **ppDORet); //S_FALSE STDMETHOD (GetDropTarget) (IDropTarget *pDropTarget, IDropTarget **ppDropTarget); //E_NOTIMPL STDMETHOD (GetExternal) (IDispatch **ppDispatch); //S_OK (supply Dispatch object) STDMETHOD (GetHostInfo) (DOCHOSTUIINFO *pInfo); //S_OK (return structure) STDMETHOD (GetOptionKeyPath) (LPOLESTR *pchKey, DWORD dw); //see implementation STDMETHOD (HideUI) (VOID); //S_OK STDMETHOD (OnDocWindowActivate) (BOOL fActivate); //S_OK STDMETHOD (OnFrameWindowActivate) (BOOL fActivate); //S_OK STDMETHOD (ResizeBorder) (LPCRECT prcBorder, IOleInPlaceUIWindow *pUIWindow, BOOL fFrameWindow); //S_OK STDMETHOD (ShowContextMenu) (DWORD dwID, POINT *ppt, IUnknown *pcmdtReserved, IDispatch *pdispReserved); //S_OK suppress context menu STDMETHOD (ShowUI) (DWORD dwID, IOleInPlaceActiveObject *pActiveObject, IOleCommandTarget *pCommandTarget, IOleInPlaceFrame *pFrame, IOleInPlaceUIWindow *pDoc); //S_OK host dealt with the UI (toolbars) STDMETHOD (TranslateAccelerator) (LPMSG lpMsg, const GUID *pguidCmdGroup, DWORD nCmdID); //S_OK ignore STDMETHOD (TranslateUrl) (DWORD dwTranslate, OLECHAR *pchURLIn, OLECHAR **ppchURLOut); //S_FALSE not translated STDMETHOD (UpdateUI) (); //S_OK private: BOOL m_bFocus; HWND m_hwndParent; HWND m_hWnd; DWORD m_dwViewMode; DWORD m_dwBandID; char *m_sServerUrl; //Interfaces and event sinks IInputObjectSite *m_pSite; IWebBrowser2 *m_pFrameWB; //main page WebBrowser control CWebBrowserEventSink *m_pMainDWebBrowserEvents2Sink; //main webBrowser event sink IConnectionPoint *m_pMainDWebBrowserEvents2SinkConnectionPoint; //it's client connector DWORD m_wMainDWebBrowserEvents2SinkCookie; //and its reference cookie CDocHostUIHandler *m_pDocHostUIHandler; IWebBrowser2 *m_pWebBrowser; //child browser IDispatch *m_pIDocumentDispatch; //A pointer to the main Document for the child JS IDispatch *m_pIMSXMLDispatch; //A pointer to an MSXML Document for the child JS //message loop and handlers static LRESULT CALLBACK WndProc(HWND hWnd, UINT uMessage, WPARAM wParam, LPARAM lParam); LRESULT OnKillFocus(void); LRESULT OnSetFocus(void); HRESULT OnResize(); LRESULT OnPaint(void); LRESULT OnCommand(WPARAM wParam, LPARAM lParam); void FocusChange(BOOL); BOOL RegisterAndCreateWindow(void); HRESULT EmbedBrowserObject(); HRESULT FindTexts(IWebBrowser2 *pIFrameWB, vector* >* > *vTexts); HRESULT FindTextsRecursive(IHTMLDocument2 *pIHTMLDocument, IHTMLDOMNode *pParentNode, vector* >* > *vTexts); public: int EventNotify(const DISPID dispIdMember, REFIID riid, const LCID lcid, const WORD wFlags, const DISPPARAMS *pDispParams, VARIANT *pVarResult, EXCEPINFO *pExcepInfo, UINT *puArgErr); HRESULT InvokeFromPage(const DISPID dispIdMember, const DISPPARAMS *pDispParams, VARIANT *pVarResult, EXCEPINFO *pExcepInfo, UINT *puArgErr); }; #endif //EXPLORERBAR_H