Open Journal Systems  3.3.0
SubmissionSearch.inc.php
1 <?php
2 
19 // Search types
20 define('SUBMISSION_SEARCH_AUTHOR', 0x00000001);
21 define('SUBMISSION_SEARCH_TITLE', 0x00000002);
22 define('SUBMISSION_SEARCH_ABSTRACT', 0x00000004);
23 define('SUBMISSION_SEARCH_DISCIPLINE', 0x00000008);
24 define('SUBMISSION_SEARCH_SUBJECT', 0x00000010);
25 define('SUBMISSION_SEARCH_TYPE', 0x00000020);
26 define('SUBMISSION_SEARCH_COVERAGE', 0x00000040);
27 define('SUBMISSION_SEARCH_GALLEY_FILE', 0x00000080);
28 define('SUBMISSION_SEARCH_SUPPLEMENTARY_FILE', 0x00000100);
29 define('SUBMISSION_SEARCH_INDEX_TERMS', 0x00000078);
30 
31 define('SUBMISSION_SEARCH_DEFAULT_RESULT_LIMIT', 20);
32 
33 import('lib.pkp.classes.search.SubmissionSearchIndex');
34 
35 abstract class SubmissionSearch {
39  function __construct() {
40  }
41 
48  function _parseQuery($query) {
49  $count = PKPString::regexp_match_all('/(\+|\-|)("[^"]+"|\(|\)|[^\s\)]+)/', $query, $matches);
50  $pos = 0;
51  return $this->_parseQueryInternal($matches[1], $matches[2], $pos, $count);
52  }
53 
58  function _parseQueryInternal($signTokens, $tokens, &$pos, $total) {
59  $return = array('+' => array(), '' => array(), '-' => array());
60  $postBool = $preBool = '';
61 
62  $submissionSearchIndex = Application::getSubmissionSearchIndex();
63 
64  $notOperator = PKPString::strtolower(__('search.operator.not'));
65  $andOperator = PKPString::strtolower(__('search.operator.and'));
66  $orOperator = PKPString::strtolower(__('search.operator.or'));
67  while ($pos < $total) {
68  if (!empty($signTokens[$pos])) $sign = $signTokens[$pos];
69  else if (empty($sign)) $sign = '+';
70  $token = PKPString::strtolower($tokens[$pos++]);
71  switch ($token) {
72  case $notOperator:
73  $sign = '-';
74  break;
75  case ')':
76  return $return;
77  case '(':
78  $token = $this->_parseQueryInternal($signTokens, $tokens, $pos, $total);
79  default:
80  $postBool = '';
81  if ($pos < $total) {
82  $peek = PKPString::strtolower($tokens[$pos]);
83  if ($peek == $orOperator) {
84  $postBool = 'or';
85  $pos++;
86  } else if ($peek == $andOperator) {
87  $postBool = 'and';
88  $pos++;
89  }
90  }
91  $bool = empty($postBool) ? $preBool : $postBool;
92  $preBool = $postBool;
93  if ($bool == 'or') $sign = '';
94  if (is_array($token)) {
95  $k = $token;
96  } else {
97  $k = $submissionSearchIndex->filterKeywords($token, true);
98  }
99  if (!empty($k)) $return[$sign][] = $k;
100  $sign = '';
101  break;
102  }
103  }
104  return $return;
105  }
106 
117  function _getMergedArray($context, &$keywords, $publishedFrom, $publishedTo) {
118  $resultsPerKeyword = Config::getVar('search', 'results_per_keyword');
119  $resultCacheHours = Config::getVar('search', 'result_cache_hours');
120  if (!is_numeric($resultsPerKeyword)) $resultsPerKeyword = 100;
121  if (!is_numeric($resultCacheHours)) $resultCacheHours = 24;
122 
123  $mergedKeywords = array('+' => array(), '' => array(), '-' => array());
124  foreach ($keywords as $type => $keyword) {
125  if (!empty($keyword['+']))
126  $mergedKeywords['+'][] = array('type' => $type, '+' => $keyword['+'], '' => array(), '-' => array());
127  if (!empty($keyword['']))
128  $mergedKeywords[''][] = array('type' => $type, '+' => array(), '' => $keyword[''], '-' => array());
129  if (!empty($keyword['-']))
130  $mergedKeywords['-'][] = array('type' => $type, '+' => array(), '' => $keyword['-'], '-' => array());
131  }
132  return $this->_getMergedKeywordResults($context, $mergedKeywords, null, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
133  }
134 
138  function _getMergedKeywordResults($context, &$keyword, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
139  $mergedResults = null;
140 
141  if (isset($keyword['type'])) {
142  $type = $keyword['type'];
143  }
144 
145  foreach ($keyword['+'] as $phrase) {
146  $results = $this->_getMergedPhraseResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
147  if ($mergedResults === null) {
148  $mergedResults = $results;
149  } else {
150  foreach ($mergedResults as $submissionId => $data) {
151  if (isset($results[$submissionId])) {
152  $mergedResults[$submissionId]['count'] += $results[$submissionId]['count'];
153  } else {
154  unset($mergedResults[$submissionId]);
155  }
156  }
157  }
158  }
159 
160  if ($mergedResults == null) {
161  $mergedResults = array();
162  }
163 
164  if (!empty($mergedResults) || empty($keyword['+'])) {
165  foreach ($keyword[''] as $phrase) {
166  $results = $this->_getMergedPhraseResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
167  foreach ($results as $submissionId => $data) {
168  if (isset($mergedResults[$submissionId])) {
169  $mergedResults[$submissionId]['count'] += $data['count'];
170  } else if (empty($keyword['+'])) {
171  $mergedResults[$submissionId] = $data;
172  }
173  }
174  }
175 
176  foreach ($keyword['-'] as $phrase) {
177  $results = $this->_getMergedPhraseResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
178  foreach ($results as $submissionId => $count) {
179  if (isset($mergedResults[$submissionId])) {
180  unset($mergedResults[$submissionId]);
181  }
182  }
183  }
184  }
185 
186  return $mergedResults;
187  }
188 
192  function _getMergedPhraseResults($context, &$phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours) {
193  if (isset($phrase['+'])) {
194  return $this->_getMergedKeywordResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours);
195  }
196 
197  $mergedResults = array();
198  $searchDao = $this->getSearchDao();
199 
200  return $searchDao->getPhraseResults(
201  $context,
202  $phrase,
203  $publishedFrom,
204  $publishedTo,
205  $type,
206  $resultsPerKeyword,
207  $resultCacheHours
208  );
209  }
210 
231  function retrieveResults($request, $context, $keywords, &$error, $publishedFrom = null, $publishedTo = null, $rangeInfo = null, $exclude = array()) {
232  // Pagination
233  if ($rangeInfo && $rangeInfo->isValid()) {
234  $page = $rangeInfo->getPage();
235  $itemsPerPage = $rangeInfo->getCount();
236  } else {
237  $page = 1;
238  $itemsPerPage = SUBMISSION_SEARCH_DEFAULT_RESULT_LIMIT;
239  }
240 
241  // Result set ordering.
242  list($orderBy, $orderDir) = $this->getResultSetOrdering($request);
243 
244  // Check whether a search plug-in jumps in to provide ranked search results.
245  $totalResults = null;
246  $hookResult = HookRegistry::call(
247  'SubmissionSearch::retrieveResults',
248  array(&$context, &$keywords, $publishedFrom, $publishedTo, $orderBy, $orderDir, $exclude, $page, $itemsPerPage, &$totalResults, &$error, &$results)
249  );
250 
251  // If no search plug-in is activated then fall back to the
252  // default database search implementation.
253  if ($hookResult === false) {
254  // Parse the query.
255  foreach($keywords as $searchType => $query) {
256  $keywords[$searchType] = $this->_parseQuery($query);
257  }
258 
259  // Fetch all the results from all the keywords into one array
260  // (mergedResults), where mergedResults[submission_id]
261  // = sum of all the occurences for all keywords associated with
262  // that article ID.
263  $mergedResults = $this->_getMergedArray($context, $keywords, $publishedFrom, $publishedTo);
264 
265  // Convert mergedResults into an array (frequencyIndicator =>
266  // $submissionId).
267  // The frequencyIndicator is a synthetically-generated number,
268  // where higher is better, indicating the quality of the match.
269  // It is generated here in such a manner that matches with
270  // identical frequency do not collide.
271  $results = $this->getSparseArray($mergedResults, $orderBy, $orderDir, $exclude);
272  $totalResults = count($results);
273 
274  // Use only the results for the specified page.
275  $offset = $itemsPerPage * ($page-1);
276  $length = max($totalResults - $offset, 0);
277  $length = min($itemsPerPage, $length);
278  if ($length == 0) {
279  $results = array();
280  } else {
281  $results = array_slice(
282  $results,
283  $offset,
284  $length
285  );
286  }
287  }
288 
289  // Take the range of results and retrieve the Article, Journal,
290  // and associated objects.
291  $results = $this->formatResults($results, $request->getUser());
292 
293  // Return the appropriate iterator.
294  import('lib.pkp.classes.core.VirtualArrayIterator');
295  return new VirtualArrayIterator($results, $totalResults, $page, $itemsPerPage);
296  }
297 
304  return array(
305  'asc' => __('search.results.orderDir.asc'),
306  'desc' => __('search.results.orderDir.desc')
307  );
308  }
309 
318  function getResultSetOrdering($request) {
319  // Order field.
320  $orderBy = $request->getUserVar('orderBy');
321  $orderByOptions = $this->getResultSetOrderingOptions($request);
322  if (is_null($orderBy) || !in_array($orderBy, array_keys($orderByOptions))) {
323  $orderBy = 'score';
324  }
325 
326  // Ordering direction.
327  $orderDir = $request->getUserVar('orderDir');
328  $orderDirOptions = $this->getResultSetOrderingDirectionOptions();
329  if (is_null($orderDir) || !in_array($orderDir, array_keys($orderDirOptions))) {
330  $orderDir = $this->getDefaultOrderDir($orderBy);
331  }
332 
333  return array($orderBy, $orderDir);
334  }
335 
336  //
337  // Methods to be implemented by subclasses.
338  //
350  abstract function formatResults($results, $user = null);
351 
357  abstract function getResultSetOrderingOptions($request);
358 
363  abstract protected function getSparseArray($unorderedResults, $orderBy, $orderDir, $exclude);
364 
370  abstract protected function getDefaultOrderDir($orderBy);
371 
376  abstract protected function getSearchDao();
377 }
378 
379 
SubmissionSearch\_getMergedArray
_getMergedArray($context, &$keywords, $publishedFrom, $publishedTo)
Definition: SubmissionSearch.inc.php:117
SubmissionSearch\_getMergedPhraseResults
_getMergedPhraseResults($context, &$phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours)
Definition: SubmissionSearch.inc.php:192
SubmissionSearch
Class for retrieving search results.
Definition: SubmissionSearch.inc.php:35
SubmissionSearch\getResultSetOrderingOptions
getResultSetOrderingOptions($request)
SubmissionSearch\retrieveResults
retrieveResults($request, $context, $keywords, &$error, $publishedFrom=null, $publishedTo=null, $rangeInfo=null, $exclude=array())
Definition: SubmissionSearch.inc.php:231
SubmissionSearch\getSparseArray
getSparseArray($unorderedResults, $orderBy, $orderDir, $exclude)
SubmissionSearch\getResultSetOrdering
getResultSetOrdering($request)
Definition: SubmissionSearch.inc.php:318
PKPString\regexp_match_all
static regexp_match_all($pattern, $subject, &$matches)
Definition: PKPString.inc.php:267
SubmissionSearch\_parseQuery
_parseQuery($query)
Definition: SubmissionSearch.inc.php:48
VirtualArrayIterator
Provides paging and iteration for "virtual" arrays – arrays for which only the current "page" is avai...
Definition: VirtualArrayIterator.inc.php:20
SubmissionSearch\_getMergedKeywordResults
_getMergedKeywordResults($context, &$keyword, $type, $publishedFrom, $publishedTo, $resultsPerKeyword, $resultCacheHours)
Definition: SubmissionSearch.inc.php:138
SubmissionSearch\getSearchDao
getSearchDao()
SubmissionSearch\__construct
__construct()
Definition: SubmissionSearch.inc.php:39
Config\getVar
static getVar($section, $key, $default=null)
Definition: Config.inc.php:35
SubmissionSearch\formatResults
formatResults($results, $user=null)
SubmissionSearch\getResultSetOrderingDirectionOptions
getResultSetOrderingDirectionOptions()
Definition: SubmissionSearch.inc.php:303
PKPString\strtolower
static strtolower($string)
Definition: PKPString.inc.php:169
SubmissionSearch\getDefaultOrderDir
getDefaultOrderDir($orderBy)
Application\getSubmissionSearchIndex
static getSubmissionSearchIndex()
Definition: Application.inc.php:169
SubmissionSearch\_parseQueryInternal
_parseQueryInternal($signTokens, $tokens, &$pos, $total)
Definition: SubmissionSearch.inc.php:58
HookRegistry\call
static call($hookName, $args=null)
Definition: HookRegistry.inc.php:86