classes/search/PaperSearch.inc.php

Go to the documentation of this file.
00001 <?php
00002 
00022 //$Id$
00023 
00024 // Search types
00025 define('PAPER_SEARCH_AUTHOR',    0x00000001);
00026 define('PAPER_SEARCH_TITLE',        0x00000002);
00027 define('PAPER_SEARCH_ABSTRACT',        0x00000004);
00028 define('PAPER_SEARCH_DISCIPLINE',      0x00000008);
00029 define('PAPER_SEARCH_SUBJECT',         0x00000010);
00030 define('PAPER_SEARCH_TYPE',         0x00000020);
00031 define('PAPER_SEARCH_COVERAGE',        0x00000040);
00032 define('PAPER_SEARCH_GALLEY_FILE',     0x00000080);
00033 define('PAPER_SEARCH_SUPPLEMENTARY_FILE', 0x00000100);
00034 define('PAPER_SEARCH_INDEX_TERMS',     0x00000078);
00035 
00036 import('search.PaperSearchIndex');
00037 
00038 class PaperSearch {
00039 
00046    function parseQuery($query) {
00047       $count = preg_match_all('/(\+|\-|)("[^"]+"|\(|\)|[^\s\)]+)/', $query, $matches);
00048       $pos = 0;
00049       $keywords = PaperSearch::_parseQuery($matches[1], $matches[2], $pos, $count);
00050       return $keywords;
00051    }
00052 
00057    function _parseQuery($signTokens, $tokens, &$pos, $total) {
00058       $return = array('+' => array(), '' => array(), '-' => array());
00059       $postBool = $preBool = '';
00060 
00061       $notOperator = String::strtolower(__('search.operator.not'));
00062       $andOperator = String::strtolower(__('search.operator.and'));
00063       $orOperator = String::strtolower(__('search.operator.or'));
00064       while ($pos < $total) {
00065          if (!empty($signTokens[$pos])) $sign = $signTokens[$pos];
00066          else if (empty($sign)) $sign = '+';
00067          $token = String::strtolower($tokens[$pos++]);
00068          switch ($token) {
00069             case $notOperator:
00070                $sign = '-';
00071                break;
00072             case ')':
00073                return $return;
00074             case '(':
00075                $token = PaperSearch::_parseQuery($signTokens, $tokens, $pos, $total);
00076             default:
00077                $postBool = '';
00078                if ($pos < $total) {
00079                   $peek = String::strtolower($tokens[$pos]);
00080                   if ($peek == $orOperator) {
00081                      $postBool = 'or';
00082                      $pos++;
00083                   } else if ($peek == $andOperator) {
00084                      $postBool = 'and';
00085                      $pos++;
00086                   }
00087                }
00088                $bool = empty($postBool) ? $preBool : $postBool;
00089                $preBool = $postBool;
00090                if ($bool == 'or') $sign = '';
00091                if (is_array($token)) $k = $token;
00092                else $k = PaperSearchIndex::filterKeywords($token, true);
00093                if (!empty($k)) $return[$sign][] = $k;
00094                $sign = '';
00095                break;
00096          }
00097       }
00098       return $return;
00099    }
00100 
00105    function &_getMergedArray(&$conference, &$keywords, $publishedFrom, $publishedTo, &$resultCount) {
00106       $resultsPerKeyword = Config::getVar('search', 'results_per_keyword');
00107       $resultCacheHours = Config::getVar('search', 'result_cache_hours');
00108       if (!is_numeric($resultsPerKeyword)) $resultsPerKeyword = 100;
00109       if (!is_numeric($resultCacheHours)) $resultCacheHours = 24;
00110 
00111       $mergedKeywords = array('+' => array(), '' => array(), '-' => array());
00112       foreach ($keywords as $type => $keyword) {
00113          if (!empty($keyword['+']))
00114             $mergedKeywords['+'][] = array('type' => $type, '+' => $keyword['+'], '' => array(), '-' => array());
00115          if (!empty($keyword['']))
00116             $mergedKeywords[''][] = array('type' => $type, '+' => array(), '' => $keyword[''], '-' => array());
00117          if (!empty($keyword['-']))
00118             $mergedKeywords['-'][] = array('type' => $type, '+' => array(), '' => $keyword['-'], '-' => array());
00119       }
00120       $mergedResults =& PaperSearch::_getMergedKeywordResults($conference, $mergedKeywords, null, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00121 
00122       $resultCount = count($mergedResults);
00123       return $mergedResults;
00124    }
00125 
00129    function &_getMergedKeywordResults(&$conference, &$keyword, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00130       $mergedResults = null;
00131 
00132       if (isset($keyword['type'])) {
00133          $type = $keyword['type'];
00134       }
00135 
00136       foreach ($keyword['+'] as $phrase) {
00137          $results =& PaperSearch::_getMergedPhraseResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00138          if ($mergedResults == null) {
00139             $mergedResults = $results;
00140          } else {
00141             foreach ($mergedResults as $paperId => $count) {
00142                if (isset($results[$paperId])) {
00143                   $mergedResults[$paperId] += $results[$paperId];
00144                } else {
00145                   unset($mergedResults[$paperId]);
00146                }
00147             }
00148          }
00149       }
00150 
00151       if ($mergedResults == null) {
00152          $mergedResults = array();
00153       }
00154 
00155       if (!empty($mergedResults) || empty($keyword['+'])) {
00156          foreach ($keyword[''] as $phrase) {
00157             $results =& PaperSearch::_getMergedPhraseResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00158             foreach ($results as $paperId => $count) {
00159                if (isset($mergedResults[$paperId])) {
00160                   $mergedResults[$paperId] += $count;
00161                } else if (empty($keyword['+'])) {
00162                   $mergedResults[$paperId] = $count;
00163                }
00164             }
00165          }
00166 
00167          foreach ($keyword['-'] as $phrase) {
00168             $results =& PaperSearch::_getMergedPhraseResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00169             foreach ($results as $paperId => $count) {
00170                if (isset($mergedResults[$paperId])) {
00171                   unset($mergedResults[$paperId]);
00172                }
00173             }
00174          }
00175       }
00176 
00177       return $mergedResults;
00178    }
00179 
00183    function &_getMergedPhraseResults(&$conference, &$phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00184       if (isset($phrase['+'])) {
00185          $mergedResults =& PaperSearch::_getMergedKeywordResults($conference, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00186          return $mergedResults;
00187       }
00188 
00189       $mergedResults = array();
00190       $paperSearchDao =& DAORegistry::getDAO('PaperSearchDAO');
00191       $results =& $paperSearchDao->getPhraseResults(
00192          $conference,
00193          $phrase,
00194          $publishedFrom,
00195          $publishedTo,
00196          $type,
00197          $resultsPerKeyword,
00198          $resultCacheHours
00199       );
00200       while (!$results->eof()) {
00201          $result =& $results->next();
00202          $paperId = $result['paper_id'];
00203          if (!isset($mergedResults[$paperId])) {
00204             $mergedResults[$paperId] = $result['count'];
00205          } else {
00206             $mergedResults[$paperId] += $result['count'];
00207          }
00208       }
00209       return $mergedResults;
00210    }
00211 
00216    function &_getSparseArray(&$mergedResults, $resultCount) {
00217       $results = array();
00218       $i = 0;
00219       foreach ($mergedResults as $paperId => $count) {
00220             $frequencyIndicator = ($resultCount * $count) + $i++;
00221             $results[$frequencyIndicator] = $paperId;
00222       }
00223       krsort($results);
00224       return $results;
00225    }
00226 
00233    function &formatResults(&$results) {
00234       $paperDao =& DAORegistry::getDAO('PaperDAO');
00235       $publishedPaperDao =& DAORegistry::getDAO('PublishedPaperDAO');
00236       $schedConfDao =& DAORegistry::getDAO('SchedConfDAO');
00237       $conferenceDao =& DAORegistry::getDAO('ConferenceDAO');
00238       $trackDao =& DAORegistry::getDAO('TrackDAO');
00239 
00240       $publishedPaperCache = array();
00241       $paperCache = array();
00242       $schedConfCache = array();
00243       $schedConfAvailabilityCache = array();
00244       $conferenceCache = array();
00245       $trackCache = array();
00246 
00247       $returner = array();
00248       foreach ($results as $paperId) {
00249          // Get the paper, storing in cache if necessary.
00250          if (!isset($paperCache[$paperId])) {
00251             $publishedPaperCache[$paperId] =& $publishedPaperDao->getPublishedPaperByPaperId($paperId);
00252             $paperCache[$paperId] =& $paperDao->getPaper($paperId);
00253          }
00254          unset($paper, $publishedPaper);
00255          $paper =& $paperCache[$paperId];
00256          $publishedPaper =& $publishedPaperCache[$paperId];
00257 
00258          if ($publishedPaper && $paper) {
00259             $trackId = $paper->getTrackId();
00260             if (!isset($trackCache[$trackId])) {
00261                $trackCache[$trackId] =& $trackDao->getTrack($trackId);
00262             }
00263 
00264             // Get the conference, storing in cache if necessary.
00265             $schedConfId = $publishedPaper->getSchedConfId();
00266             $schedConf =& $schedConfDao->getSchedConf($schedConfId);
00267             $conferenceId = $schedConf->getConferenceId();
00268             if (!isset($conferenceCache[$conferenceId])) {
00269                $conferenceCache[$conferenceId] = $conferenceDao->getConference($conferenceId);
00270             }
00271 
00272             // Get the scheduled conference, storing in cache if necessary.
00273             if (!isset($schedConfCache[$schedConfId])) {
00274                $schedConfCache[$schedConfId] =& $schedConf;
00275                import('schedConf.SchedConfAction');
00276                $schedConfAvailabilityCache[$schedConfId] = SchedConfAction::mayViewProceedings($schedConf);
00277             }
00278 
00279             // Store the retrieved objects in the result array.
00280             if($schedConfAvailabilityCache[$schedConfId]) {
00281                $returner[] = array(
00282                   'paper' => &$paper,
00283                   'publishedPaper' => &$publishedPaperCache[$paperId],
00284                   'schedConf' => &$schedConfCache[$schedConfId],
00285                   'conference' => &$conferenceCache[$conferenceId],
00286                   'schedConfAvailable' => $schedConfAvailabilityCache[$schedConfId],
00287                   'track' => &$trackCache[$trackId]
00288                );
00289             }
00290          }
00291       }
00292       return $returner;
00293    }
00294 
00308    function &retrieveResults(&$conference, &$keywords, $publishedFrom = null, $publishedTo = null, $rangeInfo = null) {
00309       // Fetch all the results from all the keywords into one array
00310       // (mergedResults), where mergedResults[paper_id]
00311       // = sum of all the occurences for all keywords associated with
00312       // that paper ID.
00313       // resultCount contains the sum of result counts for all keywords.
00314       $mergedResults =& PaperSearch::_getMergedArray($conference, $keywords, $publishedFrom, $publishedTo, $resultCount);
00315 
00316       // Convert mergedResults into an array (frequencyIndicator =>
00317       // $paperId).
00318       // The frequencyIndicator is a synthetically-generated number,
00319       // where higher is better, indicating the quality of the match.
00320       // It is generated here in such a manner that matches with
00321       // identical frequency do not collide.
00322       $results =& PaperSearch::_getSparseArray($mergedResults, $resultCount);
00323 
00324       $totalResults = count($results);
00325 
00326       // Use only the results for the specified page, if specified.
00327       if ($rangeInfo && $rangeInfo->isValid()) {
00328          $results = array_slice(
00329             $results,
00330             $rangeInfo->getCount() * ($rangeInfo->getPage()-1),
00331             $rangeInfo->getCount()
00332          );
00333          $page = $rangeInfo->getPage();
00334          $itemsPerPage = $rangeInfo->getCount();
00335       } else {
00336          $page = 1;
00337          $itemsPerPage = max($totalResults, 1);
00338       }
00339 
00340       // Take the range of results and retrieve the Paper, Conference,
00341       // and associated objects.
00342       $results =& PaperSearch::formatResults($results);
00343 
00344       // Return the appropriate iterator.
00345       import('core.VirtualArrayIterator');
00346       $returner = new VirtualArrayIterator($results, $totalResults, $page, $itemsPerPage);
00347       return $returner;
00348    }
00349 }
00350 
00351 ?>

Generated on 25 Jul 2013 for Open Conference Systems by  doxygen 1.4.7