Main Page | Namespace List | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

CSHTMLHelper.h

Go to the documentation of this file.
00001 #ifndef CSHTMLHelper_H
00002 #define CSHTMLHelper_H
00003 
00004 #ifdef WIN32
00005 #pragma warning(disable : 4786 )
00006 #endif
00007 
00008 #include "CSLog.h"
00009 #include <map>
00010 #include <string>
00011 #include <iostream.h>
00012 
00013 #include "tinyxml.h"
00014 
00015 
00016 const int CSHTML_OK = 0;                        //!< Return code of \a getError() if everyting was ok.
00017 const int CSHTML_DOCUMENT_LOADING_ERROR = 1;    //!< Return code of \a getError() html-page could not be loaded
00018 
00019 typedef std::map<std::string, std::string> HTMLEntityMap;
00020 
00021 class CSHTMLHelper;
00022 //! Class that hold all information about a html-node.
00023 /**
00024     \a CSHTMLNode holds all information of a single html-node and its children.
00025     (The root-node of a html-page e.g. holds the complete document structur.)
00026     The root node of a html-page can be got using the class \a CSHTMLHelper
00027 
00028     For an example how \a CSHTMLNode is used see \a CSHTMLHelper.
00029     \sa CSHTMLHelper
00030 
00031     \warning The corresponding \a CSHTMLHelper must not be deleted, as long as \b any node got from it is in any way active! <B>DON'T</B> delete the \a CSHTMLHelper!!!
00032 */
00033 class CSHTMLNode
00034 {
00035     friend CSHTMLHelper;                        //!< to call private constructor
00036     private:
00037         static HTMLEntityMap mHTMLEntityMap;    //!< holds all "known" entities
00038         TiXmlNode *mNode;                       //!< if empty 0, otherwise the corresponding \a tinyxml node
00039         TiXmlNode* mCurrentChild;               //!< current child node
00040         TiXmlElement* mCurrentElement;          //!< current element
00041         TiXmlAttribute* mCurrentAttribute;      //!< current attribute
00042         
00043         //! replace all known entities in the given \a text
00044         static std::string replaceHTMLEntities(const std::string &text);
00045     
00046         //! Constructor for use with \a tinyxml - node.
00047         /** Only internaly used!
00048         */
00049         CSHTMLNode(TiXmlNode *node) 
00050         {
00051             mNode = node;
00052             mCurrentChild = 0;
00053             mCurrentAttribute = 0;
00054             mCurrentElement = mNode->ToElement();
00055         }
00056 
00057     public:
00058         //! The unique class identifier (the name of the class).
00059         static const char *CLASS;
00060 
00061         //! Get the unique class identifier
00062         virtual std::string getType() {return (std::string) CLASS;}
00063 
00064         //! Builds an empty \a CSHTMLNode
00065         CSHTMLNode() 
00066         {
00067             mNode = 0;
00068         }
00069         
00070         //! Copy constructor
00071         /** \param node the other \a CSHTMLNode that is copied.
00072         */
00073         CSHTMLNode(const CSHTMLNode &node) 
00074         {
00075             // pointers are only copied!
00076             mNode = node.mNode;
00077             mCurrentChild = node.mCurrentChild;
00078             mCurrentElement = node.mCurrentElement;
00079             mCurrentAttribute = node.mCurrentAttribute;
00080         }
00081 
00082         //! Destructor
00083         virtual ~CSHTMLNode() {}    // pointers are never freed either!
00084 
00085         //! Get the next sibling node 
00086         CSHTMLNode nextSibling();
00087 
00088         //! Get the previous sibling node 
00089         CSHTMLNode previousSibling();   
00090 
00091         //! Get the parent node 
00092         CSHTMLNode parent();        
00093 
00094         //! Get the first child node 
00095         CSHTMLNode firstChild();    
00096 
00097         //! Get the last child node 
00098         CSHTMLNode lastChild();     
00099 
00100         //! Get the next child node 
00101         CSHTMLNode nextChild();     
00102 
00103         //! Get the previous child node 
00104         CSHTMLNode previousChild();
00105 
00106         //! Whether the node actually exits! 
00107         /** All methods return a \a CSHTMLNode, if an error occured, or
00108             a node does not exist, an empty node is returned. An empty
00109             node can be checked with this method.
00110             \return \a true on an empty node
00111             \return \a false on correct node
00112         */
00113         bool isEmpty() {return (mNode==0);}
00114 
00115         //! Is the this node a "text" node?
00116         bool isText();
00117 
00118         //! Get the value of the node.
00119         std::string getValue();
00120 
00121         //! Is this node a single TAG node?
00122         bool isSingleTag();
00123 
00124         //! Get TAG (name)
00125         std::string getTag();
00126 
00127         //! Get first attribute (name)
00128         std::string firstAttribut();
00129 
00130         //! Get next attribute (name)
00131         std::string nextAttribut();
00132 
00133         //! Get value of attribte (with name as in "attribut")
00134         std::string attributValue(const std::string &attribut);
00135 };
00136 
00137 //! Class to load a html-page, and give access to it's root node.
00138 /**
00139     A html-page can only be loaded from a local filesystem, not via an URL
00140     from the internet. This class is responsible for loading the page and giving
00141     access to it by calling the \a getRootNode() method. This method returns an \a CSHTMLNode,
00142     with which one can navigate thru the html-page and it's tags.
00143     \sa CSHTMLNode
00144 
00145     To parse a HTML-file use e.g. the following code:
00146     <PRE>
00147     
00148     CSHTMLHelper html("index.html");
00149     CSHTMLNode rootNode = html.getRootNode();
00150 
00151     if (!rootNode.isEmpty())
00152     {
00153         printf("Root Node Value: %s\n", rootNode.getValue().c_str());
00154         printf("Root Node TAG: %s\n", rootNode.getTag().c_str());
00155         printf("Tree follows...\n");
00156         printChildren(rootNode);
00157     }
00158     </PRE>
00159     with the helper function as follows:
00160     <PRE>
00161     void indent(int ind)
00162     {
00163         for (int i=0; i<ind;i++) printf(" "); 
00164     }
00165 
00166     void printChildren(CSHTMLNode &rootNode)
00167     {
00168         static int ind = 0;
00169         ind++;
00170         CSHTMLNode node = rootNode.firstChild();
00171         while (!node.isEmpty())
00172         {
00173             std::string value = node.getValue();
00174             std::string tag = node.getTag();
00175             if (tag.size())
00176             {
00177                 indent(ind); 
00178                 printf("<%s", tag.c_str());
00179                 
00180                 std::string attribute = node.firstAttribut();
00181                 while (attribute.size())
00182                 {
00183                     std::string avalue = node.attributValue(attribute);
00184                     printf(" %s=", attribute.c_str());
00185                     printf("\"%s\"", avalue.c_str());
00186                     attribute = node.nextAttribut();
00187                 }
00188                 printf(">\n");
00189             }
00190             if (value.size())
00191             {
00192                 indent(ind); 
00193                 printf("%s\n", value.c_str());
00194             }
00195             printChildren(node);
00196             if (!node.isSingleTag())
00197             {
00198                 if (tag.size())
00199                 {
00200                     indent(ind); 
00201                     printf("</%s>\n", tag.c_str());
00202                 }
00203             }
00204             node = rootNode.nextChild();
00205         }
00206         ind--;
00207     }
00208     </PRE>
00209 
00210     \warning As long as the node got via \a getRootNode() is in any way active <B>DON'T</B> delete the \a CSHTMLHelper!!!
00211 */
00212 
00213 class CSHTMLHelper
00214 {
00215     private:
00216         std::string mErrorMessageString;
00217         int mError;
00218         TiXmlDocument   *mXMLdoc;
00219         
00220     public:
00221         //! The unique class identifier (the name of the class).
00222         static const char *CLASS;
00223 
00224         //! Get the unique class identifier
00225         virtual std::string getType() {return (std::string) CLASS;}
00226 
00227         //! Constructor of CSHTMLHelper
00228         CSHTMLHelper(const std::string &htmlFilename);
00229 
00230         //! Destructor CSHTMLHelper
00231         virtual ~CSHTMLHelper(); 
00232 
00233         //! Get the first TAG found in the loaded htmlfile (the root-node).
00234         CSHTMLNode getRootNode();
00235 
00236         //! Get the current error code.
00237         int getError(void);
00238     
00239         //! Textual representation of an error.
00240         std::string getErrorMessage(void);
00241 };
00242 
00243 #endif CSHTMLHelper_H

Generated on Wed Jul 14 00:43:30 2004 for CSLib by doxygen 1.3.6