Open Journal Systems  3.3.0
SubmissionSearchIndex.inc.php
1 <?php
2 
16 import('lib.pkp.classes.search.SearchFileParser');
17 import('lib.pkp.classes.search.SearchHTMLParser');
18 import('lib.pkp.classes.search.SearchHelperParser');
19 
20 define('SEARCH_STOPWORDS_FILE', 'lib/pkp/registry/stopwords.txt');
21 
22 // Words are truncated to at most this length
23 define('SEARCH_KEYWORD_MAX_LENGTH', 40);
24 
25 abstract class SubmissionSearchIndex {
32  public function filterKeywords($text, $allowWildcards = false) {
33  $minLength = Config::getVar('search', 'min_word_length');
34  $stopwords = $this->_loadStopwords();
35 
36  // Join multiple lines into a single string
37  if (is_array($text)) $text = join("\n", $text);
38 
39  // Remove punctuation
40  $text = PKPString::regexp_replace('/[!"\#\$%\'\(\)\.\?@\[\]\^`\{\}~]/', '', $text);
41  $text = PKPString::regexp_replace('/[\+,:;&\/<=>\|\\\]/', ' ', $text);
42  $text = PKPString::regexp_replace('/[\*]/', $allowWildcards ? '%' : ' ', $text);
43  $text = PKPString::strtolower($text);
44 
45  // Split into words
46  $words = PKPString::regexp_split('/\s+/', $text);
47 
48  // FIXME Do not perform further filtering for some fields, e.g., author names?
49 
50  // Remove stopwords
51  $keywords = array();
52  foreach ($words as $k) {
53  if (!isset($stopwords[$k]) && PKPString::strlen($k) >= $minLength && !is_numeric($k)) {
54  $keywords[] = PKPString::substr($k, 0, SEARCH_KEYWORD_MAX_LENGTH);
55  }
56  }
57  return $keywords;
58  }
59 
65  protected function _loadStopwords() {
66  static $searchStopwords;
67 
68  if (!isset($searchStopwords)) {
69  // Load stopwords only once per request
70  $searchStopwords = array_count_values(
71  array_filter(
72  array_map('trim', file(SEARCH_STOPWORDS_FILE)),
73  function($a) {
74  return !empty($a) && $a[0] != '#';
75  }
76  )
77  );
78  $searchStopwords[''] = 1;
79  }
80 
81  return $searchStopwords;
82  }
83 
88  abstract function submissionChangesFinished();
89 
101  abstract public function submissionMetadataChanged($submission);
102 
107  abstract function clearSubmissionFiles($submission);
108 }
109 
SubmissionSearchIndex\filterKeywords
filterKeywords($text, $allowWildcards=false)
Definition: SubmissionSearchIndex.inc.php:32
PKPString\regexp_replace
static regexp_replace($pattern, $replacement, $subject, $limit=-1)
Definition: PKPString.inc.php:279
SubmissionSearchIndex\submissionMetadataChanged
submissionMetadataChanged($submission)
PKPString\substr
static substr($string, $start, $length=null)
Definition: PKPString.inc.php:160
PKPString\regexp_split
static regexp_split($pattern, $subject, $limit=-1)
Definition: PKPString.inc.php:302
SubmissionSearchIndex\_loadStopwords
_loadStopwords()
Definition: SubmissionSearchIndex.inc.php:65
PKPString\strlen
static strlen($string)
Definition: PKPString.inc.php:128
Config\getVar
static getVar($section, $key, $default=null)
Definition: Config.inc.php:35
SubmissionSearchIndex
Class to maintain a submission search index.
Definition: SubmissionSearchIndex.inc.php:25
PKPString\strtolower
static strtolower($string)
Definition: PKPString.inc.php:169
SubmissionSearchIndex\submissionChangesFinished
submissionChangesFinished()
SubmissionSearchIndex\clearSubmissionFiles
clearSubmissionFiles($submission)