00001 <?php
00002
00018
00019
00020
00021
00022 define('ARTICLE_SEARCH_AUTHOR', 0x00000001);
00023 define('ARTICLE_SEARCH_TITLE', 0x00000002);
00024 define('ARTICLE_SEARCH_ABSTRACT', 0x00000004);
00025 define('ARTICLE_SEARCH_DISCIPLINE', 0x00000008);
00026 define('ARTICLE_SEARCH_SUBJECT', 0x00000010);
00027 define('ARTICLE_SEARCH_TYPE', 0x00000020);
00028 define('ARTICLE_SEARCH_COVERAGE', 0x00000040);
00029 define('ARTICLE_SEARCH_GALLEY_FILE', 0x00000080);
00030 define('ARTICLE_SEARCH_SUPPLEMENTARY_FILE', 0x00000100);
00031 define('ARTICLE_SEARCH_INDEX_TERMS', 0x00000078);
00032
00033 import('search.ArticleSearchIndex');
00034
00035 class ArticleSearch {
00036
00043 function parseQuery($query) {
00044 $count = preg_match_all('/(\+|\-|)("[^"]+"|\(|\)|[^\s\)]+)/', $query, $matches);
00045 $pos = 0;
00046 $keywords = ArticleSearch::_parseQuery($matches[1], $matches[2], $pos, $count);
00047 return $keywords;
00048 }
00049
00054 function _parseQuery($signTokens, $tokens, &$pos, $total) {
00055 $return = array('+' => array(), '' => array(), '-' => array());
00056 $postBool = $preBool = '';
00057
00058 $notOperator = String::strtolower(Locale::translate('search.operator.not'));
00059 $andOperator = String::strtolower(Locale::translate('search.operator.and'));
00060 $orOperator = String::strtolower(Locale::translate('search.operator.or'));
00061 while ($pos < $total) {
00062 if (!empty($signTokens[$pos])) $sign = $signTokens[$pos];
00063 else if (empty($sign)) $sign = '+';
00064 $token = String::strtolower($tokens[$pos++]);
00065 switch ($token) {
00066 case $notOperator:
00067 $sign = '-';
00068 break;
00069 case ')':
00070 return $return;
00071 case '(':
00072 $token = ArticleSearch::_parseQuery($signTokens, $tokens, $pos, $total);
00073 default:
00074 $postBool = '';
00075 if ($pos < $total) {
00076 $peek = String::strtolower($tokens[$pos]);
00077 if ($peek == $orOperator) {
00078 $postBool = 'or';
00079 $pos++;
00080 } else if ($peek == $andOperator) {
00081 $postBool = 'and';
00082 $pos++;
00083 }
00084 }
00085 $bool = empty($postBool) ? $preBool : $postBool;
00086 $preBool = $postBool;
00087 if ($bool == 'or') $sign = '';
00088 if (is_array($token)) $k = $token;
00089 else $k = ArticleSearchIndex::filterKeywords($token, true);
00090 if (!empty($k)) $return[$sign][] = $k;
00091 $sign = '';
00092 break;
00093 }
00094 }
00095 return $return;
00096 }
00097
00102 function &_getMergedArray(&$journal, &$keywords, $publishedFrom, $publishedTo, &$resultCount) {
00103 $resultsPerKeyword = Config::getVar('search', 'results_per_keyword');
00104 $resultCacheHours = Config::getVar('search', 'result_cache_hours');
00105 if (!is_numeric($resultsPerKeyword)) $resultsPerKeyword = 100;
00106 if (!is_numeric($resultCacheHours)) $resultCacheHours = 24;
00107
00108 $mergedKeywords = array('+' => array(), '' => array(), '-' => array());
00109 foreach ($keywords as $type => $keyword) {
00110 if (!empty($keyword['+']))
00111 $mergedKeywords['+'][] = array('type' => $type, '+' => $keyword['+'], '' => array(), '-' => array());
00112 if (!empty($keyword['']))
00113 $mergedKeywords[''][] = array('type' => $type, '+' => array(), '' => $keyword[''], '-' => array());
00114 if (!empty($keyword['-']))
00115 $mergedKeywords['-'][] = array('type' => $type, '+' => array(), '' => $keyword['-'], '-' => array());
00116 }
00117 $mergedResults = &ArticleSearch::_getMergedKeywordResults($journal, $mergedKeywords, null, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00118
00119 $resultCount = count($mergedResults);
00120 return $mergedResults;
00121 }
00122
00126 function &_getMergedKeywordResults(&$journal, &$keyword, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00127 $mergedResults = null;
00128
00129 if (isset($keyword['type'])) {
00130 $type = $keyword['type'];
00131 }
00132
00133 foreach ($keyword['+'] as $phrase) {
00134 $results = &ArticleSearch::_getMergedPhraseResults($journal, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00135 if ($mergedResults == null) {
00136 $mergedResults = $results;
00137 } else {
00138 foreach ($mergedResults as $articleId => $count) {
00139 if (isset($results[$articleId])) {
00140 $mergedResults[$articleId] += $results[$articleId];
00141 } else {
00142 unset($mergedResults[$articleId]);
00143 }
00144 }
00145 }
00146 }
00147
00148 if ($mergedResults == null) {
00149 $mergedResults = array();
00150 }
00151
00152 if (!empty($mergedResults) || empty($keyword['+'])) {
00153 foreach ($keyword[''] as $phrase) {
00154 $results = &ArticleSearch::_getMergedPhraseResults($journal, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00155 foreach ($results as $articleId => $count) {
00156 if (isset($mergedResults[$articleId])) {
00157 $mergedResults[$articleId] += $count;
00158 } else if (empty($keyword['+'])) {
00159 $mergedResults[$articleId] = $count;
00160 }
00161 }
00162 }
00163
00164 foreach ($keyword['-'] as $phrase) {
00165 $results = &ArticleSearch::_getMergedPhraseResults($journal, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00166 foreach ($results as $articleId => $count) {
00167 if (isset($mergedResults[$articleId])) {
00168 unset($mergedResults[$articleId]);
00169 }
00170 }
00171 }
00172 }
00173
00174 return $mergedResults;
00175 }
00176
00180 function &_getMergedPhraseResults(&$journal, &$phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00181 if (isset($phrase['+'])) {
00182 $mergedResults = &ArticleSearch::_getMergedKeywordResults($journal, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00183 return $mergedResults;
00184 }
00185
00186 $mergedResults = array();
00187 $articleSearchDao = &DAORegistry::getDAO('ArticleSearchDAO');
00188 $results = &$articleSearchDao->getPhraseResults(
00189 $journal,
00190 $phrase,
00191 $publishedFrom,
00192 $publishedTo,
00193 $type,
00194 $resultsPerKeyword,
00195 $resultCacheHours
00196 );
00197 while (!$results->eof()) {
00198 $result = &$results->next();
00199 $articleId = $result['article_id'];
00200 if (!isset($mergedResults[$articleId])) {
00201 $mergedResults[$articleId] = $result['count'];
00202 } else {
00203 $mergedResults[$articleId] += $result['count'];
00204 }
00205 }
00206 return $mergedResults;
00207 }
00208
00213 function &_getSparseArray(&$mergedResults, $resultCount) {
00214 $results = array();
00215 $i = 0;
00216 foreach ($mergedResults as $articleId => $count) {
00217 $frequencyIndicator = ($resultCount * $count) + $i++;
00218 $results[$frequencyIndicator] = $articleId;
00219 }
00220 krsort($results);
00221 return $results;
00222 }
00223
00230 function &formatResults(&$results) {
00231 $articleDao = &DAORegistry::getDAO('ArticleDAO');
00232 $publishedArticleDao = &DAORegistry::getDAO('PublishedArticleDAO');
00233 $issueDao = &DAORegistry::getDAO('IssueDAO');
00234 $journalDao = &DAORegistry::getDAO('JournalDAO');
00235 $sectionDao = &DAORegistry::getDAO('SectionDAO');
00236
00237 $publishedArticleCache = array();
00238 $articleCache = array();
00239 $issueCache = array();
00240 $issueAvailabilityCache = array();
00241 $journalCache = array();
00242 $sectionCache = array();
00243
00244 $returner = array();
00245 foreach ($results as $articleId) {
00246
00247 if (!isset($articleCache[$articleId])) {
00248 $publishedArticleCache[$articleId] = &$publishedArticleDao->getPublishedArticleByArticleId($articleId);
00249 $articleCache[$articleId] = &$articleDao->getArticle($articleId);
00250 }
00251 unset($article);
00252 $article = &$articleCache[$articleId];
00253 $publishedArticle = &$publishedArticleCache[$articleId];
00254
00255 $sectionId = $article->getSectionId();
00256 if (!isset($sectionCache[$sectionId])) {
00257 $sectionCache[$sectionId] = &$sectionDao->getSection($sectionId);
00258 }
00259
00260 if ($publishedArticle && $article) {
00261
00262 $journalId = $article->getJournalId();
00263 if (!isset($journalCache[$journalId])) {
00264 $journalCache[$journalId] = $journalDao->getJournal($journalId);
00265 }
00266
00267
00268 $issueId = $publishedArticle->getIssueId();
00269 if (!isset($issueCache[$issueId])) {
00270 unset($issue);
00271 $issue = &$issueDao->getIssueById($issueId);
00272 $issueCache[$issueId] = &$issue;
00273 import('issue.IssueAction');
00274 $issueAvailabilityCache[$issueId] = !IssueAction::subscriptionRequired($issue) || IssueAction::subscribedUser($journalCache[$journalId], $issueId, $articleId) || IssueAction::subscribedDomain($journalCache[$journalId], $issueId, $articleId);
00275 }
00276
00277
00278 $returner[] = array(
00279 'article' => &$article,
00280 'publishedArticle' => &$publishedArticleCache[$articleId],
00281 'issue' => &$issueCache[$issueId],
00282 'journal' => &$journalCache[$journalId],
00283 'issueAvailable' => $issueAvailabilityCache[$issueId],
00284 'section' => &$sectionCache[$sectionId]
00285 );
00286 }
00287 }
00288 return $returner;
00289 }
00290
00304 function &retrieveResults(&$journal, &$keywords, $publishedFrom = null, $publishedTo = null, $rangeInfo = null) {
00305
00306
00307
00308
00309
00310 $mergedResults = &ArticleSearch::_getMergedArray($journal, $keywords, $publishedFrom, $publishedTo, $resultCount);
00311
00312
00313
00314
00315
00316
00317
00318 $results = &ArticleSearch::_getSparseArray($mergedResults, $resultCount);
00319
00320 $totalResults = count($results);
00321
00322
00323 if ($rangeInfo && $rangeInfo->isValid()) {
00324 $results = array_slice(
00325 $results,
00326 $rangeInfo->getCount() * ($rangeInfo->getPage()-1),
00327 $rangeInfo->getCount()
00328 );
00329 $page = $rangeInfo->getPage();
00330 $itemsPerPage = $rangeInfo->getCount();
00331 } else {
00332 $page = 1;
00333 $itemsPerPage = max($totalResults, 1);
00334 }
00335
00336
00337
00338 $results =& ArticleSearch::formatResults($results);
00339
00340
00341 $returner = &new VirtualArrayIterator($results, $totalResults, $page, $itemsPerPage);
00342 return $returner;
00343 }
00344 }
00345
00346 ?>