00001 #include "CSHTMLHelper.h"
00002 #include "CSHelper.h"
00003
00004 #ifdef WIN32
00005 #pragma warning(disable : 4786 )
00006 #endif
00007
00008 const char *CSHTMLHelper::CLASS = "CSHTMLHelper";
00009 const char *CSHTMLNode::CLASS = "CSHTMLNode";
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 HTMLEntityMap CSHTMLNode::mHTMLEntityMap;
00043
00044
00045
00046
00047
00048 CSHTMLNode CSHTMLNode::nextSibling()
00049 {
00050 static char *functionName="nextSibling";
00051 LOG_ENTER
00052 if (!mNode)
00053 {
00054 LOG_EXIT
00055 return CSHTMLNode(0);
00056 }
00057 LOG_EXIT
00058 return CSHTMLNode(mNode->NextSibling());
00059 }
00060
00061
00062
00063
00064
00065
00066
00067 CSHTMLNode CSHTMLNode::previousSibling()
00068 {
00069 static char *functionName="previousSibling";
00070 LOG_ENTER
00071 if (!mNode)
00072 {
00073 LOG_EXIT
00074 return CSHTMLNode(0);
00075 }
00076 LOG_EXIT
00077 return CSHTMLNode(0);
00078 }
00079
00080
00081
00082
00083
00084 CSHTMLNode CSHTMLNode::parent()
00085 {
00086 static char *functionName="parent";
00087 LOG_ENTER
00088 if (!mNode)
00089 {
00090 LOG_EXIT
00091 return CSHTMLNode(0);
00092 }
00093 LOG_EXIT
00094 return CSHTMLNode(mNode->Parent());
00095 }
00096
00097
00098
00099
00100
00101
00102
00103
00104 CSHTMLNode CSHTMLNode::firstChild()
00105 {
00106 static char *functionName="firstChild";
00107 LOG_ENTER
00108 if (!mNode)
00109 {
00110 LOG_EXIT
00111 return CSHTMLNode(0);
00112 }
00113 mCurrentChild = mNode->FirstChild();
00114 LOG_EXIT
00115 return CSHTMLNode(mCurrentChild);
00116 }
00117
00118
00119
00120
00121
00122
00123
00124
00125 CSHTMLNode CSHTMLNode::lastChild()
00126 {
00127 static char *functionName="lastChild";
00128 LOG_ENTER
00129 if (!mNode)
00130 {
00131 LOG_EXIT
00132 return CSHTMLNode(0);
00133 }
00134 mCurrentChild = mNode->LastChild();
00135 LOG_EXIT
00136 return CSHTMLNode(mCurrentChild);
00137 }
00138
00139
00140
00141
00142
00143
00144
00145
00146 CSHTMLNode CSHTMLNode::previousChild()
00147 {
00148 static char *functionName="previousChild";
00149 LOG_ENTER
00150 if (mCurrentChild)
00151 {
00152 mCurrentChild = mCurrentChild->PreviousSibling();
00153 if (!mCurrentChild)
00154 {
00155 std::string message="No previous child found!\n";
00156 message += "Tag="+getTag();
00157 message += ", ";
00158 message += "Value="+getValue();
00159 LOG_DEBUG_MESSAGE(message)
00160 }
00161 }
00162 LOG_EXIT
00163 return CSHTMLNode(mCurrentChild);
00164 }
00165
00166
00167
00168
00169
00170
00171
00172
00173 CSHTMLNode CSHTMLNode::nextChild()
00174 {
00175 static char *functionName="nextChild";
00176 LOG_ENTER
00177 if (mCurrentChild)
00178 {
00179 mCurrentChild = mCurrentChild->NextSibling();
00180 if (!mCurrentChild)
00181 {
00182 std::string message="No next child found!\n";
00183 message += "Tag="+getTag();
00184 message += ", ";
00185 message += "Value="+getValue();
00186 LOG_DEBUG_MESSAGE(message)
00187 }
00188 }
00189 LOG_EXIT
00190 return CSHTMLNode(mCurrentChild);
00191 }
00192
00193
00194
00195
00196
00197
00198
00199 bool CSHTMLNode::isText()
00200 {
00201 return mNode->ToText() != 0;
00202 }
00203
00204
00205
00206
00207
00208
00209
00210 std::string CSHTMLNode::getValue()
00211 {
00212 static char *functionName="getValue";
00213 LOG_ENTER
00214 if (!mNode)
00215 {
00216 LOG_EXIT
00217 return std::string();
00218 }
00219 if (mNode->ToText() != 0)
00220 {
00221 LOG_EXIT
00222 std::string text = mNode->ToText()->Value();
00223 std::string entityFreeText = replaceHTMLEntities(text);
00224 return entityFreeText;
00225 }
00226 LOG_EXIT
00227 return std::string();
00228 }
00229
00230
00231
00232
00233
00234
00235 bool CSHTMLNode::isSingleTag()
00236 {
00237 static char *functionName="isSingleTag";
00238 LOG_ENTER
00239 if (!mNode)
00240 {
00241 LOG_EXIT
00242 return false;
00243 }
00244 if (mNode->ToElement() != 0)
00245 {
00246 LOG_EXIT
00247 return mNode->ToElement()->isSingleTagElement();
00248 }
00249 LOG_EXIT
00250 return false;
00251 }
00252
00253
00254
00255
00256
00257
00258 std::string CSHTMLNode::getTag()
00259 {
00260 static char *functionName="getTag";
00261 LOG_ENTER
00262 if (!mNode)
00263 {
00264 LOG_EXIT
00265 return std::string();
00266 }
00267 if (mNode->ToElement() != 0)
00268 {
00269 LOG_EXIT
00270 return mNode->ToElement()->Value();
00271 }
00272 LOG_EXIT
00273 return std::string();
00274 }
00275
00276
00277
00278
00279
00280
00281
00282 std::string CSHTMLNode::firstAttribut()
00283 {
00284 static char *functionName="firstAttribut";
00285 LOG_ENTER
00286 if (!mCurrentElement)
00287 {
00288 LOG_EXIT
00289 return std::string();
00290 }
00291 mCurrentAttribute = mCurrentElement->FirstAttribute();
00292 if (!mCurrentAttribute)
00293 {
00294 LOG_EXIT
00295 return std::string();
00296 }
00297 LOG_EXIT
00298 return mCurrentAttribute->Name();
00299 }
00300
00301
00302
00303
00304
00305
00306
00307 std::string CSHTMLNode::nextAttribut()
00308 {
00309 static char *functionName="nextAttribut";
00310 LOG_ENTER
00311 if (!mCurrentElement)
00312 {
00313 LOG_EXIT
00314 return std::string();
00315 }
00316 if (!mCurrentAttribute)
00317 {
00318 LOG_EXIT
00319 return std::string();
00320 }
00321 mCurrentAttribute = mCurrentAttribute->Next();
00322 if (!mCurrentAttribute)
00323 {
00324 LOG_EXIT
00325 return std::string();
00326 }
00327 LOG_EXIT
00328 return mCurrentAttribute->Name();
00329 }
00330
00331
00332
00333
00334
00335
00336
00337
00338 std::string CSHTMLNode::attributValue(const std::string &attribute)
00339 {
00340 static char *functionName="attributValue";
00341 LOG_ENTER
00342 const char *p;
00343 if (!mCurrentElement)
00344 {
00345 LOG_EXIT
00346 return std::string();
00347 }
00348
00349
00350 std::string upperAttribute;
00351
00352 for( p = attribute.c_str(); p < attribute.c_str() + strlen( attribute.c_str() ); p++ )
00353 {
00354 upperAttribute += toupper(*p);
00355 }
00356
00357 mCurrentAttribute = mCurrentElement->FirstAttribute();
00358 while (mCurrentAttribute)
00359 {
00360 std::string name = mCurrentAttribute->Name();
00361
00362
00363 std::string upperName;
00364 for( p = name.c_str(); p < name.c_str() + strlen( name.c_str() ); p++ )
00365 {
00366 upperName += toupper(*p);
00367 }
00368
00369
00370 if (strcmp(upperAttribute.c_str(), upperName.c_str()) == 0)
00371 {
00372
00373 LOG_EXIT
00374 return mCurrentAttribute->Value();
00375 }
00376 mCurrentAttribute = mCurrentAttribute->Next();
00377 }
00378 LOG_EXIT
00379 return std::string();
00380 }
00381
00382
00383
00384 std::string CSHTMLNode::replaceHTMLEntities(const std::string &text)
00385 {
00386 std::string newText = text;
00387
00388 if (mHTMLEntityMap.size() == 0)
00389 {
00390 mHTMLEntityMap.insert(HTMLEntityMap::value_type(" ", " "));
00391 mHTMLEntityMap.insert(HTMLEntityMap::value_type(""", "\""));
00392 mHTMLEntityMap.insert(HTMLEntityMap::value_type("<", "<"));
00393 mHTMLEntityMap.insert(HTMLEntityMap::value_type(">", ">"));
00394 mHTMLEntityMap.insert(HTMLEntityMap::value_type("<", "<"));
00395 mHTMLEntityMap.insert(HTMLEntityMap::value_type(">", ">"));
00396 mHTMLEntityMap.insert(HTMLEntityMap::value_type("&", "&"));
00397 mHTMLEntityMap.insert(HTMLEntityMap::value_type("©", "©"));
00398 mHTMLEntityMap.insert(HTMLEntityMap::value_type("®", "®"));
00399 mHTMLEntityMap.insert(HTMLEntityMap::value_type("¢", "¢"));
00400 mHTMLEntityMap.insert(HTMLEntityMap::value_type("°", "°"));
00401 mHTMLEntityMap.insert(HTMLEntityMap::value_type("²", "²"));
00402 mHTMLEntityMap.insert(HTMLEntityMap::value_type("»", "»"));
00403 mHTMLEntityMap.insert(HTMLEntityMap::value_type("«", "«"));
00404 mHTMLEntityMap.insert(HTMLEntityMap::value_type("¼", "¼"));
00405 mHTMLEntityMap.insert(HTMLEntityMap::value_type("½", "½"));
00406 mHTMLEntityMap.insert(HTMLEntityMap::value_type("¾", "¾"));
00407 mHTMLEntityMap.insert(HTMLEntityMap::value_type("±", "±"));
00408 mHTMLEntityMap.insert(HTMLEntityMap::value_type("ü", "ü"));
00409 mHTMLEntityMap.insert(HTMLEntityMap::value_type("ä", "ä"));
00410 mHTMLEntityMap.insert(HTMLEntityMap::value_type("ö", "ö"));
00411 mHTMLEntityMap.insert(HTMLEntityMap::value_type("Ø", "Ø"));
00412 mHTMLEntityMap.insert(HTMLEntityMap::value_type("¡", "¡"));
00413 mHTMLEntityMap.insert(HTMLEntityMap::value_type("ñ", "ñ"));
00414 mHTMLEntityMap.insert(HTMLEntityMap::value_type("ß", "ß"));
00415
00416
00417
00418 }
00419 std::string key;
00420 std::string value;
00421 HTMLEntityMap::iterator iter = mHTMLEntityMap.begin();
00422 while (iter != mHTMLEntityMap.end())
00423 {
00424 key = iter->first;
00425 value = iter->second;
00426 while (CSHelper::replace(newText, key, value))
00427 ;
00428 iter++;
00429 }
00430 return newText;
00431 }
00432
00433
00434
00435
00436
00437
00438
00439
00440 CSHTMLHelper::CSHTMLHelper(const std::string &htmlFilename)
00441 {
00442 static char *functionName="CSHTMLHelper";
00443 LOG_ENTER
00444 mError = CSHTML_OK;
00445 mXMLdoc = 0;
00446 mXMLdoc = new TiXmlDocument(htmlFilename);
00447 mXMLdoc->setIgnoreNoEndTag(true);
00448
00449 mXMLdoc->SetCondenseWhiteSpace(false);
00450 mXMLdoc->ClearError();
00451 if (!mXMLdoc->LoadFile() )
00452 {
00453 mError = CSHTML_DOCUMENT_LOADING_ERROR;
00454 std::string errorMessageString;
00455 errorMessageString = std::string("Error - while loading document \"" + htmlFilename +"\"!\n");
00456 errorMessageString += mXMLdoc->ErrorDesc();
00457 mErrorMessageString = mErrorMessageString + "\n"+ errorMessageString;
00458
00459 LOG_DEBUG_MESSAGE("HTML \"" + errorMessageString)
00460 LOG_EXIT
00461 return;
00462 }
00463 LOG_DEBUG_MESSAGE("HTML loaded!")
00464 LOG_EXIT
00465 }
00466
00467
00468
00469
00470 CSHTMLHelper::~CSHTMLHelper()
00471 {
00472 static char *functionName="~CSHTMLHelper";
00473 LOG_ENTER
00474 if (mXMLdoc)
00475 {
00476 delete (mXMLdoc);
00477 mXMLdoc = 0;
00478 }
00479 LOG_EXIT
00480 }
00481
00482
00483
00484
00485
00486
00487
00488 CSHTMLNode CSHTMLHelper::getRootNode()
00489 {
00490 return CSHTMLNode(mXMLdoc->FirstChildElement());
00491 }
00492
00493 int CSHTMLHelper::getError(void)
00494 {
00495 return mError;
00496 }
00497
00498 std::string CSHTMLHelper::getErrorMessage(void)
00499 {
00500 return mErrorMessageString;
00501 }
00502