Open Journal Systems  3.3.0
Core.inc.php
1 <?php
2 
22 define('PKP_LIB_PATH', 'lib' . DIRECTORY_SEPARATOR . 'pkp');
23 define('COUNTER_USER_AGENTS_FILE', Core::getBaseDir() . DIRECTORY_SEPARATOR . PKP_LIB_PATH . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'counterBots' . DIRECTORY_SEPARATOR . 'generated' . DIRECTORY_SEPARATOR . 'COUNTER_Robots_list.txt');
24 
25 class Core {
26 
28  static $botRegexps = array();
29 
34  static function getBaseDir() {
35  static $baseDir;
36 
37  if (!isset($baseDir)) {
38  // Need to change if the index file moves
39  $baseDir = dirname(INDEX_FILE_LOCATION);
40  }
41 
42  return $baseDir;
43  }
44 
51  static function cleanFileVar($var) {
52  return PKPString::regexp_replace('/[^\w\-]/', '', $var);
53  }
54 
60  static function getCurrentDate($ts = null) {
61  return date('Y-m-d H:i:s', isset($ts) ? $ts : time());
62  }
63 
68  static function microtime() {
69  list($usec, $sec) = explode(' ', microtime());
70  return (float)$sec + (float)$usec;
71  }
72 
77  static function isWindows() {
78  return strtolower_codesafe(substr(PHP_OS, 0, 3)) == 'win';
79  }
80 
86  static function checkGeneralPHPModule($moduleName) {
87  if (extension_loaded($moduleName)) {
88  return true;
89  }
90  return false;
91  }
92 
100  static function isUserAgentBot($userAgent, $botRegexpsFile = COUNTER_USER_AGENTS_FILE) {
101  static $botRegexps;
102  Registry::set('currentUserAgentsFile', $botRegexpsFile);
103 
104  if (!isset($botRegexps[$botRegexpsFile])) {
105  $botFileCacheId = md5($botRegexpsFile);
106  $cacheManager = CacheManager::getManager();
107  $cache = $cacheManager->getCache('core', $botFileCacheId, array('Core', '_botFileListCacheMiss'), CACHE_TYPE_FILE);
108  $botRegexps[$botRegexpsFile] = $cache->getContents();
109  }
110 
111  foreach ($botRegexps[$botRegexpsFile] as $regexp) {
112  // make the search case insensitive
113  $regexp .= 'i';
114  if (PKPString::regexp_match($regexp, $userAgent)) {
115  return true;
116  }
117  }
118 
119  return false;
120  }
121 
134  static function getContextPaths($urlInfo, $isPathInfo, $contextList = null, $contextDepth = null, $userVars = array()) {
135  $contextPaths = array();
137 
138  if (!$contextList) {
139  $contextList = $application->getContextList();
140  }
141  if (!$contextDepth) {
142  $contextDepth = $application->getContextDepth();
143  }
144 
145  // Handle context depth 0
146  if (!$contextDepth) return $contextPaths;
147 
148  if ($isPathInfo) {
149  // Split the path info into its constituents. Save all non-context
150  // path info in $contextPaths[$contextDepth]
151  // by limiting the explode statement.
152  $contextPaths = explode('/', trim($urlInfo, '/'), $contextDepth + 1);
153  // Remove the part of the path info that is not relevant for context (if present)
154  unset($contextPaths[$contextDepth]);
155  } else {
156  // Retrieve context from url query string
157  foreach($contextList as $key => $contextName) {
158  $contextPaths[$key] = Core::_getUserVar($urlInfo, $contextName, $userVars);
159  }
160  }
161 
162  // Canonicalize and clean context paths
163  for($key = 0; $key < $contextDepth; $key++) {
164  $contextPaths[$key] = (
165  isset($contextPaths[$key]) && !empty($contextPaths[$key]) ?
166  $contextPaths[$key] : 'index'
167  );
168  $contextPaths[$key] = Core::cleanFileVar($contextPaths[$key]);
169  }
170 
171  return $contextPaths;
172  }
173 
185  static function getPage($urlInfo, $isPathInfo, $userVars = array()) {
186  $page = Core::_getUrlComponents($urlInfo, $isPathInfo, 0, 'page', $userVars);
187  return Core::cleanFileVar(is_null($page) ? '' : $page);
188  }
189 
201  static function getOp($urlInfo, $isPathInfo, $userVars = array()) {
202  $operation = Core::_getUrlComponents($urlInfo, $isPathInfo, 1, 'op', $userVars);
203  return Core::cleanFileVar(empty($operation) ? 'index' : $operation);
204  }
205 
218  static function getArgs($urlInfo, $isPathInfo, $userVars = array()) {
219  return Core::_getUrlComponents($urlInfo, $isPathInfo, 2, 'path', $userVars);
220  }
221 
230  function removeBaseUrl($url) {
231  list($baseUrl, $contextPath) = Core::_getBaseUrlAndPath($url);
232 
233  if (!$baseUrl) return false;
234 
235  // Remove base url from url, if any.
236  $url = str_replace($baseUrl, '', $url);
237 
238  // If url doesn't have the entire protocol and host part,
239  // remove any possible base url path from url.
240  $baseUrlPath = parse_url($baseUrl, PHP_URL_PATH);
241  if ($baseUrlPath == $url) {
242  // Access to the base url, no context, the entire
243  // url is part of the base url and we can return empty.
244  $url = '';
245  } else {
246  // Handle case where index.php was removed by rewrite rules,
247  // and we have base url followed by the args.
248  if (strpos($url, $baseUrlPath . '?') === 0) {
249  $replacement = '?'; // Url path replacement.
250  $baseSystemEscapedPath = preg_quote($baseUrlPath . '?', '/');
251  } else {
252  $replacement = '/'; // Url path replacement.
253  $baseSystemEscapedPath = preg_quote($baseUrlPath . '/', '/');
254  }
255  $url = preg_replace('/^' . $baseSystemEscapedPath . '/', $replacement, $url);
256 
257  // Remove possible index.php page from url.
258  $url = str_replace('/index.php', '', $url);
259  }
260 
261  if ($contextPath) {
262  // We found the contextPath using the base_url
263  // config file settings. Check if the url starts
264  // with the context path, if not, prepend it.
265  if (strpos($url, '/' . $contextPath . '/') !== 0) {
266  $url = '/' . $contextPath . $url;
267  }
268  }
269 
270  // Remove any possible trailing slashes.
271  $url = rtrim($url, '/');
272 
273  return $url;
274  }
275 
283  function _getBaseUrlAndPath($url) {
284  $baseUrl = false;
285  $contextPath = false;
286 
287  // Check for override base url settings.
288  $contextBaseUrls = Config::getContextBaseUrls();
289 
290  if (empty($contextBaseUrls)) {
291  $baseUrl = Config::getVar('general', 'base_url');
292  } else {
293  // We are just interested in context base urls, remove the index one.
294  if (isset($contextBaseUrls['index'])) {
295  unset($contextBaseUrls['index']);
296  }
297 
298  // Arrange them in length order, so we make sure
299  // we get the correct one, in case there's an overlaping
300  // of contexts, eg.:
301  // base_url[context1] = http://somesite.com/
302  // base_url[context2] = http://somesite.com/context2
303  $sortedBaseUrls = array_combine($contextBaseUrls, array_map('strlen', $contextBaseUrls));
304  arsort($sortedBaseUrls);
305 
306  foreach (array_keys($sortedBaseUrls) as $workingBaseUrl) {
307  $urlHost = parse_url($url, PHP_URL_HOST);
308  if (is_null($urlHost)) {
309  // Check the base url without the host part.
310  $baseUrlHost = parse_url($workingBaseUrl, PHP_URL_HOST);
311  if (is_null($baseUrlHost)) break;
312  $baseUrlToSearch = substr($workingBaseUrl, strpos($workingBaseUrl, $baseUrlHost) + strlen($baseUrlHost));
313  // Base url with only host part, add trailing slash
314  // so it can be checked below.
315  if (!$baseUrlToSearch) $baseUrlToSearch = '/';
316  } else {
317  $baseUrlToSearch = $workingBaseUrl;
318  }
319 
320  $baseUrlCheck = Core::_checkBaseUrl($baseUrlToSearch, $url);
321  if (is_null($baseUrlCheck)) {
322  // Can't decide. Stop searching.
323  break;
324  } else if ($baseUrlCheck === true) {
325  $contextPath = array_search($workingBaseUrl, $contextBaseUrls);
326  $baseUrl = $workingBaseUrl;
327  break;
328  }
329  }
330  }
331 
332  return array($baseUrl, $contextPath);
333  }
334 
347  function _checkBaseUrl($baseUrl, $url) {
348  // Check if both base url and url have host
349  // component or not.
350  $baseUrlHasHost = (boolean) parse_url($baseUrl, PHP_URL_HOST);
351  $urlHasHost = (boolean) parse_url($url, PHP_URL_HOST);
352  if ($baseUrlHasHost !== $urlHasHost) return false;
353 
354  $contextBaseUrls =& Config::getContextBaseUrls();
355 
356  // If the base url is found inside the passed url,
357  // then we might found the right context path.
358  if (strpos($url, $baseUrl) === 0) {
359  if (strpos($url, '/index.php') == strlen($baseUrl) - 1) {
360  // index.php appears right after the base url,
361  // no more possible paths.
362  return true;
363  } else {
364  // Still have to check if there is no other context
365  // base url that combined with it's context path is
366  // equal to this base url. If it exists, we can't
367  // tell which base url is contained in url.
368  foreach ($contextBaseUrls as $contextPath => $workingBaseUrl) {
369  $urlToCheck = $workingBaseUrl . '/' . $contextPath;
370  if (!$baseUrlHasHost) $urlToCheck = parse_url($urlToCheck, PHP_URL_PATH);
371  if ($baseUrl == $urlToCheck) {
372  return null;
373  }
374  }
375 
376  return true;
377  }
378  }
379 
380  return false;
381  }
382 
388  static function _botFileListCacheMiss($cache) {
389  $id = $cache->getCacheId();
390  $filteredBotRegexps = array_filter(file(Registry::get('currentUserAgentsFile')),
391  function ($regexp) {
392  $regexp = trim($regexp);
393  return !empty($regexp) && $regexp[0] != '#';
394  }
395  );
396  $botRegexps = array_map(function ($regexp) {
397  $delimiter = '/';
398  $regexp = trim($regexp);
399  if(strpos($regexp, $delimiter) !== 0) {
400  // Make sure delimiters are in place.
401  $regexp = $delimiter . $regexp . $delimiter;
402  }
403  return $regexp;
404  },
405  $filteredBotRegexps
406  );
407  $cache->setEntireCache($botRegexps);
408  return $botRegexps;
409  }
410 
418  private static function _getUserVar($url, $varName, $userVars = array()) {
419  $returner = null;
420  parse_str(parse_url($url, PHP_URL_QUERY), $userVarsFromUrl);
421  if (isset($userVarsFromUrl[$varName])) $returner = $userVarsFromUrl[$varName];
422 
423  if (is_null($returner)) {
424  // Try to retrieve from passed user vars, if any.
425  if (!empty($userVars) && isset($userVars[$varName])) {
426  $returner = $userVars[$varName];
427  }
428  }
429 
430  return $returner;
431  }
432 
444  private static function _getUrlComponents($urlInfo, $isPathInfo, $offset, $varName = '', $userVars = array()) {
445  $component = null;
446 
447  $isArrayComponent = false;
448  if ($varName == 'path') {
449  $isArrayComponent = true;
450  }
451  if ($isPathInfo) {
453  $contextDepth = $application->getContextDepth();
454 
455  $vars = explode('/', trim($urlInfo, '/'));
456  if (count($vars) > $contextDepth + $offset) {
457  if ($isArrayComponent) {
458  $component = array_slice($vars, $contextDepth + $offset);
459  } else {
460  $component = $vars[$contextDepth + $offset];
461  }
462  }
463  } else {
464  $component = Core::_getUserVar($urlInfo, $varName, $userVars);
465  }
466 
467  if ($isArrayComponent) {
468  if (empty($component)) $component = array();
469  elseif (!is_array($component)) $component = array($component);
470  }
471 
472  return $component;
473  }
474 }
475 
476 
PKPString\regexp_replace
static regexp_replace($pattern, $replacement, $subject, $limit=-1)
Definition: PKPString.inc.php:279
$application
$application
Definition: index.php:65
Core\isWindows
static isWindows()
Definition: Core.inc.php:80
Core\getArgs
static getArgs($urlInfo, $isPathInfo, $userVars=array())
Definition: Core.inc.php:221
Core\microtime
static microtime()
Definition: Core.inc.php:71
Core\_checkBaseUrl
_checkBaseUrl($baseUrl, $url)
Definition: Core.inc.php:350
Registry\set
static set($key, &$value)
Definition: Registry.inc.php:53
INDEX_FILE_LOCATION
const INDEX_FILE_LOCATION
Definition: index.php:64
Core\getContextPaths
static getContextPaths($urlInfo, $isPathInfo, $contextList=null, $contextDepth=null, $userVars=array())
Definition: Core.inc.php:137
Config\getContextBaseUrls
static & getContextBaseUrls()
Definition: Config.inc.php:94
CacheManager\getManager
static getManager()
Definition: CacheManager.inc.php:27
Registry\get
static & get($key, $createIfEmpty=false, $createWithDefault=null)
Definition: Registry.inc.php:35
Config\getVar
static getVar($section, $key, $default=null)
Definition: Config.inc.php:35
Core\$botRegexps
static $botRegexps
Definition: Core.inc.php:31
Core\getOp
static getOp($urlInfo, $isPathInfo, $userVars=array())
Definition: Core.inc.php:204
Seboettg\Collection\count
count()
Definition: ArrayListTrait.php:253
Core\isUserAgentBot
static isUserAgentBot($userAgent, $botRegexpsFile=COUNTER_USER_AGENTS_FILE)
Definition: Core.inc.php:103
CACHE_TYPE_FILE
const CACHE_TYPE_FILE
Definition: CacheManager.inc.php:19
Core\getPage
static getPage($urlInfo, $isPathInfo, $userVars=array())
Definition: Core.inc.php:188
strtolower_codesafe
strtolower_codesafe($str)
Definition: functions.inc.php:280
Core\getCurrentDate
static getCurrentDate($ts=null)
Definition: Core.inc.php:63
Core\checkGeneralPHPModule
static checkGeneralPHPModule($moduleName)
Definition: Core.inc.php:89
Core\getBaseDir
static getBaseDir()
Definition: Core.inc.php:37
Core\_getBaseUrlAndPath
_getBaseUrlAndPath($url)
Definition: Core.inc.php:286
PKPApplication\get
static get()
Definition: PKPApplication.inc.php:235
PKPString\regexp_match
static regexp_match($pattern, $subject)
Definition: PKPString.inc.php:245
Core\_botFileListCacheMiss
static _botFileListCacheMiss($cache)
Definition: Core.inc.php:391
Core\removeBaseUrl
removeBaseUrl($url)
Definition: Core.inc.php:233
Core\cleanFileVar
static cleanFileVar($var)
Definition: Core.inc.php:54
Core
Class containing system-wide functions.
Definition: Core.inc.php:25