00001 #ifndef CSHTMLHelper_H 00002 #define CSHTMLHelper_H 00003 00004 #ifdef WIN32 00005 #pragma warning(disable : 4786 ) 00006 #endif 00007 00008 #include "CSLog.h" 00009 #include <map> 00010 #include <string> 00011 #include <iostream.h> 00012 00013 #include "tinyxml.h" 00014 00015 00016 const int CSHTML_OK = 0; //!< Return code of \a getError() if everyting was ok. 00017 const int CSHTML_DOCUMENT_LOADING_ERROR = 1; //!< Return code of \a getError() html-page could not be loaded 00018 00019 typedef std::map<std::string, std::string> HTMLEntityMap; 00020 00021 class CSHTMLHelper; 00022 //! Class that hold all information about a html-node. 00023 /** 00024 \a CSHTMLNode holds all information of a single html-node and its children. 00025 (The root-node of a html-page e.g. holds the complete document structur.) 00026 The root node of a html-page can be got using the class \a CSHTMLHelper 00027 00028 For an example how \a CSHTMLNode is used see \a CSHTMLHelper. 00029 \sa CSHTMLHelper 00030 00031 \warning The corresponding \a CSHTMLHelper must not be deleted, as long as \b any node got from it is in any way active! <B>DON'T</B> delete the \a CSHTMLHelper!!! 00032 */ 00033 class CSHTMLNode 00034 { 00035 friend CSHTMLHelper; //!< to call private constructor 00036 private: 00037 static HTMLEntityMap mHTMLEntityMap; //!< holds all "known" entities 00038 TiXmlNode *mNode; //!< if empty 0, otherwise the corresponding \a tinyxml node 00039 TiXmlNode* mCurrentChild; //!< current child node 00040 TiXmlElement* mCurrentElement; //!< current element 00041 TiXmlAttribute* mCurrentAttribute; //!< current attribute 00042 00043 //! replace all known entities in the given \a text 00044 static std::string replaceHTMLEntities(const std::string &text); 00045 00046 //! Constructor for use with \a tinyxml - node. 00047 /** Only internaly used! 00048 */ 00049 CSHTMLNode(TiXmlNode *node) 00050 { 00051 mNode = node; 00052 mCurrentChild = 0; 00053 mCurrentAttribute = 0; 00054 mCurrentElement = mNode->ToElement(); 00055 } 00056 00057 public: 00058 //! The unique class identifier (the name of the class). 00059 static const char *CLASS; 00060 00061 //! Get the unique class identifier 00062 virtual std::string getType() {return (std::string) CLASS;} 00063 00064 //! Builds an empty \a CSHTMLNode 00065 CSHTMLNode() 00066 { 00067 mNode = 0; 00068 } 00069 00070 //! Copy constructor 00071 /** \param node the other \a CSHTMLNode that is copied. 00072 */ 00073 CSHTMLNode(const CSHTMLNode &node) 00074 { 00075 // pointers are only copied! 00076 mNode = node.mNode; 00077 mCurrentChild = node.mCurrentChild; 00078 mCurrentElement = node.mCurrentElement; 00079 mCurrentAttribute = node.mCurrentAttribute; 00080 } 00081 00082 //! Destructor 00083 virtual ~CSHTMLNode() {} // pointers are never freed either! 00084 00085 //! Get the next sibling node 00086 CSHTMLNode nextSibling(); 00087 00088 //! Get the previous sibling node 00089 CSHTMLNode previousSibling(); 00090 00091 //! Get the parent node 00092 CSHTMLNode parent(); 00093 00094 //! Get the first child node 00095 CSHTMLNode firstChild(); 00096 00097 //! Get the last child node 00098 CSHTMLNode lastChild(); 00099 00100 //! Get the next child node 00101 CSHTMLNode nextChild(); 00102 00103 //! Get the previous child node 00104 CSHTMLNode previousChild(); 00105 00106 //! Whether the node actually exits! 00107 /** All methods return a \a CSHTMLNode, if an error occured, or 00108 a node does not exist, an empty node is returned. An empty 00109 node can be checked with this method. 00110 \return \a true on an empty node 00111 \return \a false on correct node 00112 */ 00113 bool isEmpty() {return (mNode==0);} 00114 00115 //! Is the this node a "text" node? 00116 bool isText(); 00117 00118 //! Get the value of the node. 00119 std::string getValue(); 00120 00121 //! Is this node a single TAG node? 00122 bool isSingleTag(); 00123 00124 //! Get TAG (name) 00125 std::string getTag(); 00126 00127 //! Get first attribute (name) 00128 std::string firstAttribut(); 00129 00130 //! Get next attribute (name) 00131 std::string nextAttribut(); 00132 00133 //! Get value of attribte (with name as in "attribut") 00134 std::string attributValue(const std::string &attribut); 00135 }; 00136 00137 //! Class to load a html-page, and give access to it's root node. 00138 /** 00139 A html-page can only be loaded from a local filesystem, not via an URL 00140 from the internet. This class is responsible for loading the page and giving 00141 access to it by calling the \a getRootNode() method. This method returns an \a CSHTMLNode, 00142 with which one can navigate thru the html-page and it's tags. 00143 \sa CSHTMLNode 00144 00145 To parse a HTML-file use e.g. the following code: 00146 <PRE> 00147 00148 CSHTMLHelper html("index.html"); 00149 CSHTMLNode rootNode = html.getRootNode(); 00150 00151 if (!rootNode.isEmpty()) 00152 { 00153 printf("Root Node Value: %s\n", rootNode.getValue().c_str()); 00154 printf("Root Node TAG: %s\n", rootNode.getTag().c_str()); 00155 printf("Tree follows...\n"); 00156 printChildren(rootNode); 00157 } 00158 </PRE> 00159 with the helper function as follows: 00160 <PRE> 00161 void indent(int ind) 00162 { 00163 for (int i=0; i<ind;i++) printf(" "); 00164 } 00165 00166 void printChildren(CSHTMLNode &rootNode) 00167 { 00168 static int ind = 0; 00169 ind++; 00170 CSHTMLNode node = rootNode.firstChild(); 00171 while (!node.isEmpty()) 00172 { 00173 std::string value = node.getValue(); 00174 std::string tag = node.getTag(); 00175 if (tag.size()) 00176 { 00177 indent(ind); 00178 printf("<%s", tag.c_str()); 00179 00180 std::string attribute = node.firstAttribut(); 00181 while (attribute.size()) 00182 { 00183 std::string avalue = node.attributValue(attribute); 00184 printf(" %s=", attribute.c_str()); 00185 printf("\"%s\"", avalue.c_str()); 00186 attribute = node.nextAttribut(); 00187 } 00188 printf(">\n"); 00189 } 00190 if (value.size()) 00191 { 00192 indent(ind); 00193 printf("%s\n", value.c_str()); 00194 } 00195 printChildren(node); 00196 if (!node.isSingleTag()) 00197 { 00198 if (tag.size()) 00199 { 00200 indent(ind); 00201 printf("</%s>\n", tag.c_str()); 00202 } 00203 } 00204 node = rootNode.nextChild(); 00205 } 00206 ind--; 00207 } 00208 </PRE> 00209 00210 \warning As long as the node got via \a getRootNode() is in any way active <B>DON'T</B> delete the \a CSHTMLHelper!!! 00211 */ 00212 00213 class CSHTMLHelper 00214 { 00215 private: 00216 std::string mErrorMessageString; 00217 int mError; 00218 TiXmlDocument *mXMLdoc; 00219 00220 public: 00221 //! The unique class identifier (the name of the class). 00222 static const char *CLASS; 00223 00224 //! Get the unique class identifier 00225 virtual std::string getType() {return (std::string) CLASS;} 00226 00227 //! Constructor of CSHTMLHelper 00228 CSHTMLHelper(const std::string &htmlFilename); 00229 00230 //! Destructor CSHTMLHelper 00231 virtual ~CSHTMLHelper(); 00232 00233 //! Get the first TAG found in the loaded htmlfile (the root-node). 00234 CSHTMLNode getRootNode(); 00235 00236 //! Get the current error code. 00237 int getError(void); 00238 00239 //! Textual representation of an error. 00240 std::string getErrorMessage(void); 00241 }; 00242 00243 #endif CSHTMLHelper_H