• Main Page
  • Modules
  • Classes
  • Files
  • File List

classes/search/MonographSearch.inc.php

00001 <?php
00002 
00020 // Search types
00021 define('MONOGRAPH_SEARCH_AUTHOR',      0x00000001);
00022 define('MONOGRAPH_SEARCH_TITLE',    0x00000002);
00023 define('MONOGRAPH_SEARCH_ABSTRACT',    0x00000004);
00024 define('MONOGRAPH_SEARCH_DISCIPLINE',     0x00000008);
00025 define('MONOGRAPH_SEARCH_SUBJECT',     0x00000010);
00026 define('MONOGRAPH_SEARCH_TYPE',        0x00000020);
00027 define('MONOGRAPH_SEARCH_COVERAGE',    0x00000040);
00028 define('MONOGRAPH_SEARCH_GALLEY_FILE',    0x00000080);
00029 define('MONOGRAPH_SEARCH_SUPPLEMENTARY_FILE',   0x00000100);
00030 define('MONOGRAPH_SEARCH_INDEX_TERMS',    0x00000078);
00031 
00032 import('classes.search.MonographSearchIndex');
00033 
00034 class MonographSearch {
00035 
00042    function parseQuery($query) {
00043       $count = preg_match_all('/(\+|\-|)("[^"]+"|\(|\)|[^\s\)]+)/', $query, $matches);
00044       $pos = 0;
00045       $keywords = MonographSearch::_parseQuery($matches[1], $matches[2], $pos, $count);
00046       return $keywords;
00047    }
00048 
00053    function _parseQuery($signTokens, $tokens, &$pos, $total) {
00054       $return = array('+' => array(), '' => array(), '-' => array());
00055       $postBool = $preBool = '';
00056 
00057       $notOperator = String::strtolower(__('search.operator.not'));
00058       $andOperator = String::strtolower(__('search.operator.and'));
00059       $orOperator = String::strtolower(__('search.operator.or'));
00060       while ($pos < $total) {
00061          if (!empty($signTokens[$pos])) $sign = $signTokens[$pos];
00062          else if (empty($sign)) $sign = '+';
00063          $token = String::strtolower($tokens[$pos++]);
00064          switch ($token) {
00065             case $notOperator:
00066                $sign = '-';
00067                break;
00068             case ')':
00069                return $return;
00070             case '(':
00071                $token = MonographSearch::_parseQuery($signTokens, $tokens, $pos, $total);
00072             default:
00073                $postBool = '';
00074                if ($pos < $total) {
00075                   $peek = String::strtolower($tokens[$pos]);
00076                   if ($peek == $orOperator) {
00077                      $postBool = 'or';
00078                      $pos++;
00079                   } else if ($peek == $andOperator) {
00080                      $postBool = 'and';
00081                      $pos++;
00082                   }
00083                }
00084                $bool = empty($postBool) ? $preBool : $postBool;
00085                $preBool = $postBool;
00086                if ($bool == 'or') $sign = '';
00087                if (is_array($token)) $k = $token;
00088                else $k = MonographSearchIndex::filterKeywords($token, true);
00089                if (!empty($k)) $return[$sign][] = $k;
00090                $sign = '';
00091                break;
00092          }
00093       }
00094       return $return;
00095    }
00096 
00101    function &_getMergedArray(&$press, &$keywords, $publishedFrom, $publishedTo, &$resultCount) {
00102       $resultsPerKeyword = Config::getVar('search', 'results_per_keyword');
00103       $resultCacheHours = Config::getVar('search', 'result_cache_hours');
00104       if (!is_numeric($resultsPerKeyword)) $resultsPerKeyword = 100;
00105       if (!is_numeric($resultCacheHours)) $resultCacheHours = 24;
00106 
00107       $mergedKeywords = array('+' => array(), '' => array(), '-' => array());
00108       foreach ($keywords as $type => $keyword) {
00109          if (!empty($keyword['+']))
00110             $mergedKeywords['+'][] = array('type' => $type, '+' => $keyword['+'], '' => array(), '-' => array());
00111          if (!empty($keyword['']))
00112             $mergedKeywords[''][] = array('type' => $type, '+' => array(), '' => $keyword[''], '-' => array());
00113          if (!empty($keyword['-']))
00114             $mergedKeywords['-'][] = array('type' => $type, '+' => array(), '' => $keyword['-'], '-' => array());
00115       }
00116       $mergedResults =& MonographSearch::_getMergedKeywordResults($press, $mergedKeywords, null, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00117 
00118       $resultCount = count($mergedResults);
00119       return $mergedResults;
00120    }
00121 
00125    function &_getMergedKeywordResults(&$press, &$keyword, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00126       $mergedResults = null;
00127 
00128       if (isset($keyword['type'])) {
00129          $type = $keyword['type'];
00130       }
00131 
00132       foreach ($keyword['+'] as $phrase) {
00133          $results =& MonographSearch::_getMergedPhraseResults($press, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00134          if ($mergedResults === null) {
00135             $mergedResults = $results;
00136          } else {
00137             foreach ($mergedResults as $monographId => $count) {
00138                if (isset($results[$monographId])) {
00139                   $mergedResults[$monographId] += $results[$monographId];
00140                } else {
00141                   unset($mergedResults[$monographId]);
00142                }
00143             }
00144          }
00145       }
00146 
00147       if ($mergedResults == null) {
00148          $mergedResults = array();
00149       }
00150 
00151       if (!empty($mergedResults) || empty($keyword['+'])) {
00152          foreach ($keyword[''] as $phrase) {
00153             $results =& MonographSearch::_getMergedPhraseResults($press, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00154             foreach ($results as $monographId => $count) {
00155                if (isset($mergedResults[$monographId])) {
00156                   $mergedResults[$monographId] += $count;
00157                } else if (empty($keyword['+'])) {
00158                   $mergedResults[$monographId] = $count;
00159                }
00160             }
00161          }
00162 
00163          foreach ($keyword['-'] as $phrase) {
00164             $results =& MonographSearch::_getMergedPhraseResults($press, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00165             foreach ($results as $monographId => $count) {
00166                if (isset($mergedResults[$monographId])) {
00167                   unset($mergedResults[$monographId]);
00168                }
00169             }
00170          }
00171       }
00172 
00173       return $mergedResults;
00174    }
00175 
00179    function &_getMergedPhraseResults(&$press, &$phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
00180       if (isset($phrase['+'])) {
00181          $mergedResults =& MonographSearch::_getMergedKeywordResults($press, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
00182          return $mergedResults;
00183       }
00184 
00185       $mergedResults = array();
00186       $monographSearchDao =& DAORegistry::getDAO('MonographSearchDAO');
00187       $results =& $monographSearchDao->getPhraseResults(
00188          $press,
00189          $phrase,
00190          $publishedFrom,
00191          $publishedTo,
00192          $type,
00193          $resultsPerKeyword,
00194          $resultCacheHours
00195       );
00196       while (!$results->eof()) {
00197          $result =& $results->next();
00198          $monographId = $result['monograph_id'];
00199          if (!isset($mergedResults[$monographId])) {
00200             $mergedResults[$monographId] = $result['count'];
00201          } else {
00202             $mergedResults[$monographId] += $result['count'];
00203          }
00204       }
00205       return $mergedResults;
00206    }
00207 
00212    function &_getSparseArray(&$mergedResults, $resultCount) {
00213       $results = array();
00214       $i = 0;
00215       foreach ($mergedResults as $monographId => $count) {
00216             $frequencyIndicator = ($resultCount * $count) + $i++;
00217             $results[$frequencyIndicator] = $monographId;
00218       }
00219       krsort($results);
00220       return $results;
00221    }
00222 
00229    function &formatResults(&$results) {
00230       $pressDao =& DAORegistry::getDAO('PressDAO');
00231       $monographDao =& DAORegistry::getDAO('MonographDAO');
00232       $seriesDao =& DAORegistry::getDAO('SeriesDAO');
00233       $publishedMonographDao =& DAORegistry::getDAO('PublishedMonographDAO');
00234 
00235       $publishedMonographCache = array();
00236       $monographCache = array();
00237       $pressCache = array();
00238       $seriesCache = array();
00239 
00240       $returner = array();
00241       foreach ($results as $monographId) {
00242          // Get the monograph, storing in cache if necessary.
00243          if (!isset($monographCache[$monographId])) {
00244             $monographCache[$monographId] =& $monographDao->getById($monographId);
00245             $publishedMonographCache[$monographId] =& $publishedMonographDao->getById($monographId);
00246          }
00247          unset($monograph, $publishedMonograph);
00248          $monograph =& $monographCache[$monographId];
00249          $publishedMonograph =& $publishedMonographCache[$monographId];
00250 
00251          if ($monograph) {
00252             $seriesId = $monograph->getSeriesId();
00253             if (!isset($seriesCache[$seriesId])) {
00254                $seriesCache[$seriesId] =& $seriesDao->getById($seriesId);
00255             }
00256 
00257             // Get the press, storing in cache if necessary.
00258             $pressId = $monograph->getPressId();
00259             if (!isset($pressCache[$pressId])) {
00260                $pressCache[$pressId] = $pressDao->getById($pressId);
00261             }
00262 
00263             // Store the retrieved objects in the result array.
00264             $returner[] = array(
00265                'press' => &$pressCache[$pressId],
00266                'monograph' => &$monograph,
00267                'publishedMonograph' => &$publishedMonograph,
00268                'seriesArrangment' => &$seriesCache[$seriesId]
00269             );
00270          }
00271       }
00272       return $returner;
00273    }
00274 
00288    function &retrieveResults(&$press, &$keywords, $publishedFrom = null, $publishedTo = null, $rangeInfo = null) {
00289       // Fetch all the results from all the keywords into one array
00290       // (mergedResults), where mergedResults[monograph_id]
00291       // = sum of all the occurences for all keywords associated with
00292       // that monograph ID.
00293       // resultCount contains the sum of result counts for all keywords.
00294       $mergedResults =& MonographSearch::_getMergedArray($press, $keywords, $publishedFrom, $publishedTo, $resultCount);
00295 
00296       // Convert mergedResults into an array (frequencyIndicator =>
00297       // $monographId).
00298       // The frequencyIndicator is a synthetically-generated number,
00299       // where higher is better, indicating the quality of the match.
00300       // It is generated here in such a manner that matches with
00301       // identical frequency do not collide.
00302       $results =& MonographSearch::_getSparseArray($mergedResults, $resultCount);
00303 
00304       $totalResults = count($results);
00305 
00306       // Use only the results for the specified page, if specified.
00307       if ($rangeInfo && $rangeInfo->isValid()) {
00308          $results = array_slice(
00309             $results,
00310             $rangeInfo->getCount() * ($rangeInfo->getPage()-1),
00311             $rangeInfo->getCount()
00312          );
00313          $page = $rangeInfo->getPage();
00314          $itemsPerPage = $rangeInfo->getCount();
00315       } else {
00316          $page = 1;
00317          $itemsPerPage = max($totalResults, 1);
00318       }
00319 
00320       // Take the range of results and retrieve the Monograph, Press,
00321       // and associated objects.
00322       $results =& MonographSearch::formatResults($results);
00323 
00324       // Return the appropriate iterator.
00325       import('lib.pkp.classes.core.VirtualArrayIterator');
00326       $returner = new VirtualArrayIterator($results, $totalResults, $page, $itemsPerPage);
00327       return $returner;
00328    }
00329 }
00330 
00331 ?>

Generated on Mon Sep 17 2012 13:58:55 for Open Monograph Press by  doxygen 1.7.1