Main Page | Namespace List | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

CSHTMLHelper.cpp

Go to the documentation of this file.
00001 #include "CSHTMLHelper.h"
00002 #include "CSHelper.h"
00003 
00004 #ifdef WIN32
00005 #pragma warning(disable : 4786 )
00006 #endif
00007 
00008 const char *CSHTMLHelper::CLASS = "CSHTMLHelper";
00009 const char *CSHTMLNode::CLASS = "CSHTMLNode";
00010 
00011 /** Holds the mapping all known entities (not all official html-entities are used!):
00012     <PRE>
00013     \verbatim 
00014         "&nbsp;" <-> " "
00015         "&quot;" <-> "\""
00016         "&#60;" <-> "<"
00017         "&#62;" <-> ">"
00018         "&lt;" <-> "<"
00019         "&gt;" <-> ">"
00020         "&amp;" <-> "&"
00021         "&copy;" <-> "©"
00022         "&reg;" <-> "®"
00023         "&cent;" <-> "¢"
00024         "&deg;" <-> "°"
00025         "&sup2;" <-> "²"
00026         "&raquo;" <-> "»"
00027         "&laquo;" <-> "«"
00028         "&frac14;" <-> "¼"
00029         "&frac12;" <-> "½"
00030         "&frac34;" <-> "¾"
00031         "&plusmn;" <-> "±"
00032         "&uuml;" <-> "ü"
00033         "&auml;" <-> "ä"
00034         "&ouml;" <-> "ö"
00035         "&Oslash;" <-> "Ø"
00036         "&iexcl;" <-> "¡"
00037         "&ntilde;" <-> "ñ"
00038         "&szlig;" <-> "ß"
00039     \endverbatim.
00040     </PRE>
00041 */
00042 HTMLEntityMap CSHTMLNode::mHTMLEntityMap;
00043 
00044 /** Return the next sibling-node (brother or sister :-)), if none is available an empty \a CSHTMLNode is returned.
00045     \return the next sibling-node (if available)
00046     \return an empty \a CSHTMLNode otherwise
00047   */
00048 CSHTMLNode CSHTMLNode::nextSibling()
00049 {
00050     static char *functionName="nextSibling";
00051     LOG_ENTER 
00052     if (!mNode) 
00053     {
00054         LOG_EXIT
00055         return CSHTMLNode(0);
00056     }
00057     LOG_EXIT
00058     return CSHTMLNode(mNode->NextSibling());
00059 }
00060 
00061 /** Return the previous sibling-node (brother or sister :-)), if none is available an empty \a CSHTMLNode is returned.
00062     \return the previous sibling-node (if available)
00063     \return an empty \a CSHTMLNode otherwise
00064 
00065     \todo This is not implemented! (returns allways an empty node!)
00066   */
00067 CSHTMLNode CSHTMLNode::previousSibling()
00068 {
00069     static char *functionName="previousSibling";
00070     LOG_ENTER 
00071     if (!mNode) 
00072     {
00073         LOG_EXIT
00074         return CSHTMLNode(0);
00075     }
00076     LOG_EXIT
00077     return CSHTMLNode(0);
00078 }
00079 
00080 /** Return the parent-node (father or mother :-)), if none is available an empty \a CSHTMLNode is returned.
00081     \return the parent-node
00082     \return an empty \a CSHTMLNode otherwise
00083   */
00084 CSHTMLNode CSHTMLNode::parent()
00085 {
00086     static char *functionName="parent";
00087     LOG_ENTER 
00088     if (!mNode) 
00089     {
00090         LOG_EXIT
00091         return CSHTMLNode(0);
00092     }
00093     LOG_EXIT
00094     return CSHTMLNode(mNode->Parent());
00095 }
00096 
00097 /** Return the first child-node (son or daughter :-)), if none is available an empty \a CSHTMLNode is returned.
00098     \a firstChild() or \a lastChild() must be called in order to
00099     successfully call \a nextChild() or \a previousChild().
00100 
00101     \return the first child-node
00102     \return an empty \a CSHTMLNode otherwise
00103   */
00104 CSHTMLNode CSHTMLNode::firstChild()
00105 {
00106     static char *functionName="firstChild";
00107     LOG_ENTER 
00108     if (!mNode) 
00109     {
00110         LOG_EXIT
00111         return CSHTMLNode(0);
00112     }
00113     mCurrentChild = mNode->FirstChild();
00114     LOG_EXIT
00115     return CSHTMLNode(mCurrentChild);
00116 }
00117 
00118 /** Return the last child-node (son or daughter :-)), if none is available an empty \a CSHTMLNode is returned.
00119     \a firstChild() or \a lastChild() must be called in order to
00120     successfully call \a nextChild() or \a previousChild().
00121 
00122     \return the last child-node
00123     \return an empty \a CSHTMLNode otherwise
00124   */
00125 CSHTMLNode CSHTMLNode::lastChild()
00126 {
00127     static char *functionName="lastChild";
00128     LOG_ENTER 
00129     if (!mNode) 
00130     {
00131         LOG_EXIT
00132         return CSHTMLNode(0);
00133     }
00134     mCurrentChild = mNode->LastChild();
00135     LOG_EXIT
00136     return CSHTMLNode(mCurrentChild);
00137 }
00138 
00139 /** Return the previous child-node (son or daughter :-)), if none is available an empty \a CSHTMLNode is returned.
00140     \a firstChild() or \a lastChild() must be called in order to
00141     successfully call \a nextChild() or \a previousChild().
00142 
00143     \return a child-node
00144     \return an empty \a CSHTMLNode otherwise
00145   */
00146 CSHTMLNode CSHTMLNode::previousChild()
00147 {
00148     static char *functionName="previousChild";
00149     LOG_ENTER 
00150     if (mCurrentChild)
00151     {
00152         mCurrentChild = mCurrentChild->PreviousSibling();
00153         if (!mCurrentChild)
00154         {
00155             std::string message="No previous child found!\n";
00156             message += "Tag="+getTag();
00157             message += ", ";
00158             message += "Value="+getValue();
00159             LOG_DEBUG_MESSAGE(message) 
00160         }
00161     }
00162     LOG_EXIT
00163     return CSHTMLNode(mCurrentChild);
00164 }
00165 
00166 /** Return the next child-node (son or daughter :-)), if none is available an empty \a CSHTMLNode is returned.
00167     \a firstChild() or \a lastChild() must be called in order to
00168     successfully call \a nextChild() or \a previousChild().
00169 
00170     \return a child-node
00171     \return an empty \a CSHTMLNode otherwise
00172   */
00173 CSHTMLNode CSHTMLNode::nextChild()
00174 {
00175     static char *functionName="nextChild";
00176     LOG_ENTER 
00177     if (mCurrentChild)
00178     {
00179         mCurrentChild = mCurrentChild->NextSibling();
00180         if (!mCurrentChild)
00181         {
00182             std::string message="No next child found!\n";
00183             message += "Tag="+getTag();
00184             message += ", ";
00185             message += "Value="+getValue();
00186             LOG_DEBUG_MESSAGE(message) 
00187         }
00188     }
00189     LOG_EXIT
00190     return CSHTMLNode(mCurrentChild);
00191 }
00192 
00193 /** A Text node is a node that does not hold TAG, or attribute information. The plain
00194     good old html-text!
00195 
00196     \return true if this node countains text 
00197     \return false if not
00198   */
00199 bool CSHTMLNode::isText()
00200 {
00201     return mNode->ToText() != 0;
00202 }
00203 
00204 /** If the node is a text-node, the string representing the text is returned.
00205     Known entities e.g.  "&amp;" <-> "&" are mapped to their "meaning".
00206 
00207     \return text if textnode
00208     \return an empty string otherwise
00209   */
00210 std::string CSHTMLNode::getValue()
00211 {
00212     static char *functionName="getValue";
00213     LOG_ENTER 
00214     if (!mNode) 
00215     {
00216         LOG_EXIT
00217         return std::string();
00218     }
00219     if (mNode->ToText() != 0)
00220     {
00221         LOG_EXIT
00222         std::string text = mNode->ToText()->Value();
00223         std::string entityFreeText = replaceHTMLEntities(text);
00224         return entityFreeText;
00225     }
00226     LOG_EXIT
00227     return std::string();
00228 }
00229 
00230 /** Check whether the current node is a single TAG-node (e.g. <HR>, <BR>).
00231 
00232     \return true if this node is a single TAG-node
00233     \return false if not (also for text only nodes)
00234   */
00235 bool CSHTMLNode::isSingleTag()
00236 {
00237     static char *functionName="isSingleTag";
00238     LOG_ENTER 
00239     if (!mNode) 
00240     {
00241         LOG_EXIT
00242         return false;
00243     }
00244     if (mNode->ToElement() != 0)
00245     {
00246         LOG_EXIT
00247         return mNode->ToElement()->isSingleTagElement();
00248     }
00249     LOG_EXIT
00250     return false;
00251 }
00252 
00253 /** Gets the TAG the current node is representing (e.g. HTML, BODY...)
00254 
00255     \return string that represents the TAG
00256     \return empty string if not a TAG-node
00257   */
00258 std::string CSHTMLNode::getTag()
00259 {
00260     static char *functionName="getTag";
00261     LOG_ENTER 
00262     if (!mNode) 
00263     {
00264         LOG_EXIT
00265         return std::string();
00266     }
00267     if (mNode->ToElement() != 0)
00268     {
00269         LOG_EXIT
00270         return mNode->ToElement()->Value();
00271     }
00272     LOG_EXIT
00273     return std::string();
00274 }
00275 
00276 /** Return the first attribut of the node (e.g. "color", "HREF", ...).
00277     \a firstAttribut() must be called in order to successfully call \a nextAttribut().
00278 
00279     \return string that represents the attribute
00280     \return empty string if not a TAG-node or no attribute was found
00281   */
00282 std::string CSHTMLNode::firstAttribut()
00283 {
00284     static char *functionName="firstAttribut";
00285     LOG_ENTER 
00286     if (!mCurrentElement)
00287     {
00288         LOG_EXIT
00289         return std::string();
00290     }
00291     mCurrentAttribute = mCurrentElement->FirstAttribute();
00292     if (!mCurrentAttribute)
00293     {
00294         LOG_EXIT
00295         return std::string();
00296     }
00297     LOG_EXIT
00298     return mCurrentAttribute->Name();
00299 }
00300 
00301 /** Return the next attribut of the node (e.g. "color", "HREF", ...).
00302     \a firstAttribut() must be called in order to successfully call \a nextAttribut().
00303 
00304     \return string that represents the attribute
00305     \return empty string if not a TAG-node or no attribute was found
00306   */
00307 std::string CSHTMLNode::nextAttribut()
00308 {
00309     static char *functionName="nextAttribut";
00310     LOG_ENTER 
00311     if (!mCurrentElement)
00312     {
00313         LOG_EXIT
00314         return std::string();
00315     }
00316     if (!mCurrentAttribute)
00317     {
00318         LOG_EXIT
00319         return std::string();
00320     }
00321     mCurrentAttribute = mCurrentAttribute->Next();
00322     if (!mCurrentAttribute)
00323     {
00324         LOG_EXIT
00325         return std::string();
00326     }
00327     LOG_EXIT
00328     return mCurrentAttribute->Name();
00329 }
00330 
00331 
00332 /** Return the value of an attribut of the node (e.g. for "color" -> "#ffffff").
00333     Attribute value given is indepenedent of upper or lower case!
00334 
00335     \return string that represents the value of the attribute
00336     \return empty string if not a TAG-node or no corresponding attribute was found
00337   */
00338 std::string CSHTMLNode::attributValue(const std::string &attribute)
00339 {
00340     static char *functionName="attributValue";
00341     LOG_ENTER 
00342     const char *p;
00343     if (!mCurrentElement)
00344     {
00345         LOG_EXIT
00346         return std::string();
00347     }
00348 
00349     // case independency, make the given attribute name UPPER
00350     std::string upperAttribute;
00351     
00352     for( p = attribute.c_str(); p < attribute.c_str() + strlen( attribute.c_str() ); p++ )
00353     {
00354         upperAttribute += toupper(*p);
00355     }
00356 
00357     mCurrentAttribute = mCurrentElement->FirstAttribute();
00358     while (mCurrentAttribute)
00359     {
00360         std::string name =  mCurrentAttribute->Name();
00361 
00362         // case independency, make this attribute name UPPER
00363         std::string upperName;
00364         for( p = name.c_str(); p < name.c_str() + strlen( name.c_str() ); p++ )
00365         {
00366             upperName += toupper(*p);
00367         }
00368 
00369         // and compare them
00370         if (strcmp(upperAttribute.c_str(), upperName.c_str()) == 0)
00371         {
00372             // equal -> great, return it
00373             LOG_EXIT
00374             return mCurrentAttribute->Value();
00375         }
00376         mCurrentAttribute = mCurrentAttribute->Next();
00377     }
00378     LOG_EXIT
00379     return std::string();
00380 }
00381 /** this is realy only a quick hack!
00382     \todo Entities are not all done (at least numbers (e.g "&#55;") should done alright)
00383 */
00384 std::string CSHTMLNode::replaceHTMLEntities(const std::string &text)
00385 {
00386     std::string newText = text;
00387     // first time here -> fill the map!
00388     if (mHTMLEntityMap.size() == 0)
00389     {
00390         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&nbsp;", " "));
00391         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&quot;", "\""));
00392         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&#60;", "<"));
00393         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&#62;", ">"));
00394         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&lt;", "<"));
00395         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&gt;", ">"));
00396         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&amp;", "&"));
00397         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&copy;", "©"));
00398         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&reg;", "®"));
00399         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&cent;", "¢"));
00400         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&deg;", "°"));
00401         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&sup2;", "²"));
00402         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&raquo;", "»"));
00403         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&laquo;", "«"));
00404         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&frac14;", "¼"));
00405         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&frac12;", "½"));
00406         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&frac34;", "¾"));
00407         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&plusmn;", "±"));
00408         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&uuml;", "ü"));
00409         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&auml;", "ä"));
00410         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&ouml;", "ö"));
00411         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&Oslash;", "Ø"));
00412         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&iexcl;", "¡"));
00413         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&ntilde;", "ñ"));
00414         mHTMLEntityMap.insert(HTMLEntityMap::value_type("&szlig;", "ß"));
00415 
00416         // this could go endlessly, there are also numerical codes,
00417         // that one could replace...
00418     }
00419     std::string key;
00420     std::string value;
00421     HTMLEntityMap::iterator iter = mHTMLEntityMap.begin();
00422     while (iter != mHTMLEntityMap.end())
00423     {
00424         key = iter->first;
00425         value = iter->second;
00426         while (CSHelper::replace(newText, key, value))
00427             ;
00428         iter++; 
00429     }
00430     return newText;
00431 }
00432 /******************************************************************************************
00433 *******************************************************************************************/
00434 
00435 /** If an error occured while opening, \a getError() returns
00436     the error code. (This should be checked!)
00437     \param htmlFilename Must be a valid html file.
00438     \sa getError()
00439 */
00440 CSHTMLHelper::CSHTMLHelper(const std::string &htmlFilename)
00441 {
00442     static char *functionName="CSHTMLHelper";
00443     LOG_ENTER 
00444     mError = CSHTML_OK;
00445     mXMLdoc = 0;
00446     mXMLdoc = new TiXmlDocument(htmlFilename);
00447     mXMLdoc->setIgnoreNoEndTag(true);       // for html the xml-Parser must also understand tags
00448                                             // which have no endtag (e.g. "<HR>")
00449     mXMLdoc->SetCondenseWhiteSpace(false);  // important for PRE Tag!
00450     mXMLdoc->ClearError();
00451     if (!mXMLdoc->LoadFile() )
00452     {
00453         mError = CSHTML_DOCUMENT_LOADING_ERROR;
00454         std::string errorMessageString;
00455         errorMessageString = std::string("Error - while loading document \"" + htmlFilename +"\"!\n");
00456         errorMessageString += mXMLdoc->ErrorDesc();
00457         mErrorMessageString = mErrorMessageString + "\n"+ errorMessageString;
00458 
00459         LOG_DEBUG_MESSAGE("HTML \"" + errorMessageString) 
00460         LOG_EXIT
00461         return;         
00462     }
00463     LOG_DEBUG_MESSAGE("HTML loaded!") 
00464     LOG_EXIT
00465 }
00466 
00467 //! Destructor cleans up all internal stuff!
00468 //! \warning As long as the node got via \a getRootNode() is in any way active <B>DON'T</B> delete the \a CSHTMLHelper!!!
00469 //! 
00470 CSHTMLHelper::~CSHTMLHelper()
00471 {
00472     static char *functionName="~CSHTMLHelper";
00473     LOG_ENTER 
00474     if (mXMLdoc)
00475     {
00476         delete (mXMLdoc);
00477         mXMLdoc = 0;
00478     }
00479     LOG_EXIT
00480 }
00481 
00482 /** Get the root-node of the currently loaded html-file. This will usually be
00483     the <HTML> - tag node!
00484     \return the \a rootNode 
00485     \sa CSHTMLNode
00486     \warning As long as the node got via \a getRootNode() is in any way active <B>DON'T</B> delete the \a CSHTMLHelper!!!
00487 */
00488 CSHTMLNode CSHTMLHelper::getRootNode()
00489 {
00490     return CSHTMLNode(mXMLdoc->FirstChildElement());
00491 }
00492 
00493 int CSHTMLHelper::getError(void)
00494 {
00495     return mError;
00496 }
00497 
00498 std::string CSHTMLHelper::getErrorMessage(void)
00499 {
00500     return mErrorMessageString;
00501 }
00502 

Generated on Wed Jul 14 00:43:30 2004 for CSLib by doxygen 1.3.6