00001 <?php
00002
00022
00023
00024
00025 define('PAPER_SEARCH_AUTHOR', 0x00000001);
00026 define('PAPER_SEARCH_TITLE', 0x00000002);
00027 define('PAPER_SEARCH_ABSTRACT', 0x00000004);
00028 define('PAPER_SEARCH_DISCIPLINE', 0x00000008);
00029 define('PAPER_SEARCH_SUBJECT', 0x00000010);
00030 define('PAPER_SEARCH_TYPE', 0x00000020);
00031 define('PAPER_SEARCH_COVERAGE', 0x00000040);
00032 define('PAPER_SEARCH_GALLEY_FILE', 0x00000080);
00033 define('PAPER_SEARCH_SUPPLEMENTARY_FILE', 0x00000100);
00034 define('PAPER_SEARCH_INDEX_TERMS', 0x00000078);
00035
00036 import('search.PaperSearchIndex');
00037
00038 class PaperSearch {
00039
00046 function parseQuery($query) {
00047 $count = preg_match_all('/(\+|\-|)("[^"]+"|\(|\)|[^\s\)]+)/', $query, $matches);
00048 $pos = 0;
00049 $keywords = PaperSearch::_parseQuery($matches[1], $matches[2], $pos, $count);
00050 return $keywords;
00051 }
00052
00057 function _parseQuery($signTokens, $tokens, &$pos, $total) {
00058 $return = array('+' => array(), '' => array(), '-' => array());
00059 $postBool = $preBool = '';
00060
00061 $notOperator = String::strtolower(__('search.operator.not'));
00062 $andOperator = String::strtolower(__('search.operator.and'));
00063 $orOperator = String::strtolower(__('search.operator.or'));
00064 while ($pos < $total) {
00065 if (!empty($signTokens[$pos])) $sign = $signTokens[$pos];
00066 else if (empty($sign)) $sign = '+';
00067 $token = String::strtolower($tokens[$pos++]);
00068 switch ($token) {
00069 case $notOperator:
00070 $sign = '-';
00071 break;
00072 case ')':
00073 return $return;
00074 case '(':
00075 $token = PaperSearch::_parseQuery($signTokens, $tokens, $pos, $total);
00076 default:
00077 $postBool = '';
00078 if ($pos < $total) {
00079 $peek = String::strtolower($tokens[$pos]);
00080 if ($peek == $orOperator) {
00081 $postBool = 'or';
00082 $pos++;
00083 } else if ($peek == $andOperator) {
00084 $postBool = 'and';
00085 $pos++;
00086 }
00087 }
00088 $bool = empty($postBool) ? $preBool : $postBool;
00089 $preBool = $postBool;
00090 if ($bool == 'or') $sign = '';
00091 if (is_array($token)) $k = $token;
00092 else $k = PaperSearchIndex::filterKeywords($token, true);
00093 if (!empty($k)) $return[$sign][] = $k;
00094 $sign = '';
00095 break;
00096 }
00097 }
00098 return $return;
00099 }
00100
00105 function &_getMergedArray(&$conference, &$keywords, $publishedFrom, $publishedTo, &$resultCount) {
00106 $resultsPerKeyword = Config::getVar('search', 'results_per_keyword');
00107 $resultCacheHours = Config::getVar('search', 'result_cache_hours');
00108 if (!is_numeric($resultsPerKeyword)) $resultsPerKeyword = 100;
00109 if (!is_numeric($resultCacheHours)) $resultCacheHours = 24;
00110
00111 $mergedKeywords = array('+' => array(), '' => array(), '-' => array());
00112 foreach ($keywords as $type => $keyword) {
00113 if (!empty($keyword['+']))
00114 $mergedKeywords['+'][] = array('type' => $type, '+' => $keyword['+'], '' => array(), '-' => array());
00115 if (!empty($keyword['']))
00116 $mergedKeywords[''][] = array('type' => $type, '+' => array(), '' => $keyword[''], '-' => array());
00117 if (!empty($keyword['-']))
00118 $mergedKeywords['-'][] = array('type' => $type, '+' => array(), '' => $keyword['-'], '-' => array());
00119 }
00120 $mergedResults =& PaperSearch::_getMergedKeywordResults($conference, $mergedKeywords, null, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00121
00122 $resultCount = count($mergedResults);
00123 return $mergedResults;
00124 }
00125
00129 function &_getMergedKeywordResults(&$conference, &$keyword, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00130 $mergedResults = null;
00131
00132 if (isset($keyword['type'])) {
00133 $type = $keyword['type'];
00134 }
00135
00136 foreach ($keyword['+'] as $phrase) {
00137 $results =& PaperSearch::_getMergedPhraseResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00138 if ($mergedResults == null) {
00139 $mergedResults = $results;
00140 } else {
00141 foreach ($mergedResults as $paperId => $count) {
00142 if (isset($results[$paperId])) {
00143 $mergedResults[$paperId] += $results[$paperId];
00144 } else {
00145 unset($mergedResults[$paperId]);
00146 }
00147 }
00148 }
00149 }
00150
00151 if ($mergedResults == null) {
00152 $mergedResults = array();
00153 }
00154
00155 if (!empty($mergedResults) || empty($keyword['+'])) {
00156 foreach ($keyword[''] as $phrase) {
00157 $results =& PaperSearch::_getMergedPhraseResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00158 foreach ($results as $paperId => $count) {
00159 if (isset($mergedResults[$paperId])) {
00160 $mergedResults[$paperId] += $count;
00161 } else if (empty($keyword['+'])) {
00162 $mergedResults[$paperId] = $count;
00163 }
00164 }
00165 }
00166
00167 foreach ($keyword['-'] as $phrase) {
00168 $results =& PaperSearch::_getMergedPhraseResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00169 foreach ($results as $paperId => $count) {
00170 if (isset($mergedResults[$paperId])) {
00171 unset($mergedResults[$paperId]);
00172 }
00173 }
00174 }
00175 }
00176
00177 return $mergedResults;
00178 }
00179
00183 function &_getMergedPhraseResults(&$conference, &$phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00184 if (isset($phrase['+'])) {
00185 $mergedResults =& PaperSearch::_getMergedKeywordResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00186 return $mergedResults;
00187 }
00188
00189 $mergedResults = array();
00190 $paperSearchDao =& DAORegistry::getDAO('PaperSearchDAO');
00191 $results =& $paperSearchDao->getPhraseResults(
00192 $conference,
00193 $phrase,
00194 $publishedFrom,
00195 $publishedTo,
00196 $type,
00197 $resultsPerKeyword,
00198 $resultCacheHours
00199 );
00200 while (!$results->eof()) {
00201 $result =& $results->next();
00202 $paperId = $result['paper_id'];
00203 if (!isset($mergedResults[$paperId])) {
00204 $mergedResults[$paperId] = $result['count'];
00205 } else {
00206 $mergedResults[$paperId] += $result['count'];
00207 }
00208 }
00209 return $mergedResults;
00210 }
00211
00216 function &_getSparseArray(&$mergedResults, $resultCount) {
00217 $results = array();
00218 $i = 0;
00219 foreach ($mergedResults as $paperId => $count) {
00220 $frequencyIndicator = ($resultCount * $count) + $i++;
00221 $results[$frequencyIndicator] = $paperId;
00222 }
00223 krsort($results);
00224 return $results;
00225 }
00226
00233 function &formatResults(&$results) {
00234 $paperDao =& DAORegistry::getDAO('PaperDAO');
00235 $publishedPaperDao =& DAORegistry::getDAO('PublishedPaperDAO');
00236 $schedConfDao =& DAORegistry::getDAO('SchedConfDAO');
00237 $conferenceDao =& DAORegistry::getDAO('ConferenceDAO');
00238 $trackDao =& DAORegistry::getDAO('TrackDAO');
00239
00240 $publishedPaperCache = array();
00241 $paperCache = array();
00242 $schedConfCache = array();
00243 $schedConfAvailabilityCache = array();
00244 $conferenceCache = array();
00245 $trackCache = array();
00246
00247 $returner = array();
00248 foreach ($results as $paperId) {
00249 // Get the paper, storing in cache if necessary.
00250 if (!isset($paperCache[$paperId])) {
00251 $publishedPaperCache[$paperId] =& $publishedPaperDao->getPublishedPaperByPaperId($paperId);
00252 $paperCache[$paperId] =& $paperDao->getPaper($paperId);
00253 }
00254 unset($paper, $publishedPaper);
00255 $paper =& $paperCache[$paperId];
00256 $publishedPaper =& $publishedPaperCache[$paperId];
00257
00258 if ($publishedPaper && $paper) {
00259 $trackId = $paper->getTrackId();
00260 if (!isset($trackCache[$trackId])) {
00261 $trackCache[$trackId] =& $trackDao->getTrack($trackId);
00262 }
00263
00264 // Get the conference, storing in cache if necessary.
00265 $schedConfId = $publishedPaper->getSchedConfId();
00266 $schedConf =& $schedConfDao->getSchedConf($schedConfId);
00267 $conferenceId = $schedConf->getConferenceId();
00268 if (!isset($conferenceCache[$conferenceId])) {
00269 $conferenceCache[$conferenceId] = $conferenceDao->getConference($conferenceId);
00270 }
00271
00272 // Get the scheduled conference, storing in cache if necessary.
00273 if (!isset($schedConfCache[$schedConfId])) {
00274 $schedConfCache[$schedConfId] =& $schedConf;
00275 import('schedConf.SchedConfAction');
00276 $schedConfAvailabilityCache[$schedConfId] = SchedConfAction::mayViewProceedings($schedConf);
00277 }
00278
00279 // Store the retrieved objects in the result array.
00280 if($schedConfAvailabilityCache[$schedConfId]) {
00281 $returner[] = array(
00282 'paper' => &$paper,
00283 'publishedPaper' => &$publishedPaperCache[$paperId],
00284 'schedConf' => &$schedConfCache[$schedConfId],
00285 'conference' => &$conferenceCache[$conferenceId],
00286 'schedConfAvailable' => $schedConfAvailabilityCache[$schedConfId],
00287 'track' => &$trackCache[$trackId]
00288 );
00289 }
00290 }
00291 }
00292 return $returner;
00293 }
00294
00308 function &retrieveResults(&$conference, &$keywords, $publishedFrom = null, $publishedTo = null, $rangeInfo = null) {
00309 // Fetch all the results from all the keywords into one array
00310 // (mergedResults), where mergedResults[paper_id]
00311 // = sum of all the occurences for all keywords associated with
00312 // that paper ID.
00313 // resultCount contains the sum of result counts for all keywords.
00314 $mergedResults =& PaperSearch::_getMergedArray($conference, $keywords, $publishedFrom, $publishedTo, $resultCount);
00315
00316 // Convert mergedResults into an array (frequencyIndicator =>
00317 // $paperId).
00318 // The frequencyIndicator is a synthetically-generated number,
00319 // where higher is better, indicating the quality of the match.
00320 // It is generated here in such a manner that matches with
00321 // identical frequency do not collide.
00322 $results =& PaperSearch::_getSparseArray($mergedResults, $resultCount);
00323
00324 $totalResults = count($results);
00325
00326 // Use only the results for the specified page, if specified.
00327 if ($rangeInfo && $rangeInfo->isValid()) {
00328 $results = array_slice(
00329 $results,
00330 $rangeInfo->getCount() * ($rangeInfo->getPage()-1),
00331 $rangeInfo->getCount()
00332 );
00333 $page = $rangeInfo->getPage();
00334 $itemsPerPage = $rangeInfo->getCount();
00335 } else {
00336 $page = 1;
00337 $itemsPerPage = max($totalResults, 1);
00338 }
00339
00340 // Take the range of results and retrieve the Paper, Conference,
00341 // and associated objects.
00342 $results =& PaperSearch::formatResults($results);
00343
00344 // Return the appropriate iterator.
00345 import('core.VirtualArrayIterator');
00346 $returner = new VirtualArrayIterator($results, $totalResults, $page, $itemsPerPage);
00347 return $returner;
00348 }
00349 }
00350
00351 ?>