00001 <?php 00002 00015 // $Id: SearchHTMLParser.inc.php,v 1.11 2008/07/01 01:16:10 asmecher Exp $ 00016 00017 00018 import('search.SearchFileParser'); 00019 import('core.String'); 00020 00021 class SearchHTMLParser extends SearchFileParser { 00022 00023 function doRead() { 00024 // strip HTML tags from the read line 00025 $line = fgetss($this->fp, 4096); 00026 00027 // convert HTML entities to valid UTF-8 characters 00028 $line = String::html2utf($line); 00029 00030 // slightly (~10%) faster than above, but not quite as accurate, and requires html_entity_decode() 00031 // $line = html_entity_decode($line, ENT_COMPAT, strtoupper(Config::getVar('i18n', 'client_charset'))); 00032 00033 return $line; 00034 } 00035 00036 } 00037 00038 ?>
1.5.6