Open Journal Systems  2.4.8
 All Classes Namespaces Functions Variables Groups Pages
Core.inc.php
1 <?php
2 
21 define('USER_AGENTS_FILE', Core::getBaseDir() . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'pkp' . DIRECTORY_SEPARATOR . 'registry' . DIRECTORY_SEPARATOR . 'botAgents.txt');
22 
23 class Core {
24 
26  static $botRegexps = array();
27 
32  function getBaseDir() {
33  static $baseDir;
34 
35  if (!isset($baseDir)) {
36  // Need to change if the index file moves
37  $baseDir = dirname(INDEX_FILE_LOCATION);
38  }
39 
40  return $baseDir;
41  }
42 
49  function cleanVar($var) {
50  // only normalize strings that are not UTF-8 already, and when the system is using UTF-8
51  if ( Config::getVar('i18n', 'charset_normalization') == 'On' && strtolower_codesafe(Config::getVar('i18n', 'client_charset')) == 'utf-8' && !String::utf8_is_valid($var) ) {
52 
53  $var = String::utf8_normalize($var);
54 
55  // convert HTML entities into valid UTF-8 characters (do not transcode)
56  if (checkPhpVersion('5.0.0')) {
57  $var = html_entity_decode($var, ENT_COMPAT, 'UTF-8');
58  } else {
59  $var = String::html2utf($var);
60  }
61 
62  // strip any invalid UTF-8 sequences
63  $var = String::utf8_bad_strip($var);
64 
65  // re-encode special HTML characters
66  if (checkPhpVersion('5.2.3')) {
67  $var = htmlspecialchars($var, ENT_NOQUOTES, 'UTF-8', false);
68  } else {
69  $var = htmlspecialchars($var, ENT_NOQUOTES, 'UTF-8');
70  }
71  }
72 
73  // strip any invalid ASCII control characters
74  $var = String::utf8_strip_ascii_ctrl($var);
75 
76  return trim($var);
77  }
78 
85  function cleanFileVar($var) {
86  return String::regexp_replace('/[^\w\-]/', '', $var);
87  }
88 
94  function getCurrentDate($ts = null) {
95  return date('Y-m-d H:i:s', isset($ts) ? $ts : time());
96  }
97 
102  function microtime() {
103  list($usec, $sec) = explode(' ', microtime());
104  return (float)$sec + (float)$usec;
105  }
106 
111  function serverPHPOS() {
112  return PHP_OS;
113  }
114 
119  function serverPHPVersion() {
120  return phpversion();
121  }
122 
127  function isWindows() {
128  return strtolower_codesafe(substr(Core::serverPHPOS(), 0, 3)) == 'win';
129  }
130 
138  function isUserAgentBot($userAgent, $botRegexpsFile = USER_AGENTS_FILE) {
139  static $botRegexps;
140  Registry::set('currentUserAgentsFile', $botRegexpsFile);
141 
142  if (!isset($botRegexps[$botRegexpsFile])) {
143  $botFileCacheId = md5($botRegexpsFile);
144  $cacheManager =& CacheManager::getManager();
145  $cache =& $cacheManager->getCache('core', $botFileCacheId, array('Core', '_botFileListCacheMiss'), CACHE_TYPE_FILE);
146  $botRegexps[$botRegexpsFile] = $cache->getContents();
147  }
148 
149  foreach ($botRegexps[$botRegexpsFile] as $regexp) {
150  if (String::regexp_match($regexp, $userAgent)) {
151  return true;
152  }
153  }
154 
155  return false;
156  }
157 
170  function getContextPaths($urlInfo, $isPathInfo, $contextList = null, $contextDepth = null, $userVars = array()) {
171  $contextPaths = array();
172  $application =& Application::getApplication();
173 
174  if (!$contextList) {
175  $contextList = $application->getContextList();
176  }
177  if (!$contextDepth) {
178  $contextDepth = $application->getContextDepth();
179  }
180 
181  // Handle context depth 0
182  if (!$contextDepth) return $contextPaths;
183 
184  if ($isPathInfo) {
185  // Split the path info into its constituents. Save all non-context
186  // path info in $contextPaths[$contextDepth]
187  // by limiting the explode statement.
188  $contextPaths = explode('/', trim($urlInfo, '/'), $contextDepth + 1);
189  // Remove the part of the path info that is not relevant for context (if present)
190  unset($contextPaths[$contextDepth]);
191  } else {
192  // Retrieve context from url query string
193  foreach($contextList as $key => $contextName) {
194  $contextPaths[$key] = Core::_getUserVar($urlInfo, $contextName, $userVars);
195  }
196  }
197 
198  // Canonicalize and clean context paths
199  for($key = 0; $key < $contextDepth; $key++) {
200  $contextPaths[$key] = (
201  isset($contextPaths[$key]) && !empty($contextPaths[$key]) ?
202  $contextPaths[$key] : 'index'
203  );
204  $contextPaths[$key] = Core::cleanFileVar($contextPaths[$key]);
205  }
206 
207  return $contextPaths;
208  }
209 
221  function getPage($urlInfo, $isPathInfo, $userVars = array()) {
222  $page = Core::_getUrlComponents($urlInfo, $isPathInfo, 0, 'page', $userVars);
223  return Core::cleanFileVar(is_null($page) ? '' : $page);
224  }
225 
237  function getOp($urlInfo, $isPathInfo, $userVars = array()) {
238  $operation = Core::_getUrlComponents($urlInfo, $isPathInfo, 1, 'op', $userVars);
239  return Core::cleanFileVar(empty($operation) ? 'index' : $operation);
240  }
241 
254  function getArgs($urlInfo, $isPathInfo, $userVars = array()) {
255  return Core::_getUrlComponents($urlInfo, $isPathInfo, 2, 'path', $userVars);
256  }
257 
266  function removeBaseUrl($url) {
267  list($baseUrl, $contextPath) = Core::_getBaseUrlAndPath($url);
268 
269  if (!$baseUrl) return false;
270 
271  // Remove base url from url, if any.
272  $url = str_replace($baseUrl, '', $url);
273 
274  // If url doesn't have the entire protocol and host part,
275  // remove any possible base url path from url.
276  $baseUrlPath = parse_url($baseUrl, PHP_URL_PATH);
277  if ($baseUrlPath == $url) {
278  // Access to the base url, no context, the entire
279  // url is part of the base url and we can return empty.
280  $url = '';
281  } else {
282  // Handle case where index.php was removed by rewrite rules,
283  // and we have base url followed by the args.
284  if (strpos($url, $baseUrlPath . '?') === 0) {
285  $replacement = '?'; // Url path replacement.
286  $baseSystemEscapedPath = preg_quote($baseUrlPath . '?', '/');
287  } else {
288  $replacement = '/'; // Url path replacement.
289  $baseSystemEscapedPath = preg_quote($baseUrlPath . '/', '/');
290  }
291  $url = preg_replace('/^' . $baseSystemEscapedPath . '/', $replacement, $url);
292 
293  // Remove possible index.php page from url.
294  $url = str_replace('/index.php', '', $url);
295  }
296 
297  if ($contextPath) {
298  // We found the contextPath using the base_url
299  // config file settings. Check if the url starts
300  // with the context path, if not, apend it.
301  if (strpos($url, '/' . $contextPath) !== 0) {
302  $url = '/' . $contextPath . $url;
303  }
304  }
305 
306  // Remove any possible trailing slashes.
307  $url = rtrim($url, '/');
308 
309  return $url;
310  }
311 
320  function _getBaseUrlAndPath($url) {
321  $baseUrl = false;
322  $contextPath = false;
323 
324  // Check for override base url settings.
325  $contextBaseUrls =& Config::getContextBaseUrls();
326 
327  if (empty($contextBaseUrls)) {
328  $baseUrl = Config::getVar('general', 'base_url');
329  } else {
330  // Arrange them in length order, so we make sure
331  // we get the correct one, in case there's an overlaping
332  // of contexts, eg.:
333  // base_url[context1] = http://somesite.com/
334  // base_url[context2] = http://somesite.com/context2
335  $sortedBaseUrls = array_combine($contextBaseUrls, array_map('strlen', $contextBaseUrls));
336  arsort($sortedBaseUrls);
337 
338  foreach ($sortedBaseUrls as $workingBaseUrl => $baseUrlLength) {
339  $urlHost = parse_url($url, PHP_URL_HOST);
340  if (is_null($urlHost)) {
341  // Check the base url without the host part.
342  $baseUrlHost = parse_url($workingBaseUrl, PHP_URL_HOST);
343  if (is_null($baseUrlHost)) break;
344  $baseUrlToSearch = substr($workingBaseUrl, strpos($workingBaseUrl, $baseUrlHost) + strlen($baseUrlHost));
345  // Base url with only host part, add trailing slash
346  // so it can be checked below.
347  if (!$baseUrlToSearch) $baseUrlToSearch = '/';
348  } else {
349  $baseUrlToSearch = $workingBaseUrl;
350  }
351 
352  $baseUrlCheck = Core::_checkBaseUrl($baseUrlToSearch, $url);
353  if (is_null($baseUrlCheck)) {
354  // Can't decide. Stop searching.
355  break;
356  } else if ($baseUrlCheck === true) {
357  $contextPath = array_search($workingBaseUrl, $contextBaseUrls);
358  $baseUrl = $workingBaseUrl;
359  break;
360  }
361  }
362  }
363 
364  return array($baseUrl, $contextPath);
365  }
366 
379  function _checkBaseUrl($baseUrl, $url) {
380  // Check if both base url and url have host
381  // component or not.
382  $baseUrlHasHost = (boolean) parse_url($baseUrl, PHP_URL_HOST);
383  $urlHasHost = (boolean) parse_url($url, PHP_URL_HOST);
384  if ($baseUrlHasHost !== $urlHasHost) return false;
385 
386  $contextBaseUrls =& Config::getContextBaseUrls();
387 
388  // If the base url is found inside the passed url,
389  // then we might found the right context path.
390  if (strpos($url, $baseUrl) === 0) {
391  if (strpos($url, '/index.php') == strlen($baseUrl) - 1) {
392  // index.php appears right after the base url,
393  // no more possible paths.
394  return true;
395  } else {
396  // Still have to check if there is no other context
397  // base url that combined with it's context path is
398  // equal to this base url. If it exists, we can't
399  // tell which base url is contained in url.
400  foreach ($contextBaseUrls as $contextPath => $workingBaseUrl) {
401  $urlToCheck = $workingBaseUrl . '/' . $contextPath;
402  if (!$baseUrlHasHost) $urlToCheck = parse_url($urlToCheck, PHP_URL_PATH);
403  if ($baseUrl == $urlToCheck) {
404  return null;
405  }
406  }
407 
408  return true;
409  }
410  }
411 
412  return false;
413  }
414 
420  function _botFileListCacheMiss(&$cache) {
421  $id = $cache->getCacheId();
422  $botRegexps = array_filter(file(Registry::get('currentUserAgentsFile')),
423  array('Core', '_filterBotRegexps'));
424 
425  $cache->setEntireCache($botRegexps);
426  return $botRegexps;
427  }
428 
434  function _filterBotRegexps(&$regexp) {
435  $delimiter = '/';
436  $regexp = trim($regexp);
437  if (!empty($regexp) && $regexp[0] != '#') {
438  if(strpos($regexp, $delimiter) !== 0) {
439  // Make sure delimiters are in place.
440  $regexp = $delimiter . $regexp . $delimiter;
441  }
442  return true;
443  } else {
444  return false;
445  }
446  }
447 
455  function _getUserVar($url, $varName, $userVars = array()) {
456  $returner = null;
457  parse_str(parse_url($url, PHP_URL_QUERY), $userVarsFromUrl);
458  if (isset($userVarsFromUrl[$varName])) $returner = $userVarsFromUrl[$varName];
459 
460  if (is_null($returner)) {
461  // Try to retrieve from passed user vars, if any.
462  if (!empty($userVars) && isset($userVars[$varName])) {
463  $returner = $userVars[$varName];
464  }
465  }
466 
467  return $returner;
468  }
469 
481  function _getUrlComponents($urlInfo, $isPathInfo, $offset, $varName = '', $userVars = array()) {
482  $component = null;
483 
484  $isArrayComponent = false;
485  if ($varName == 'path') {
486  $isArrayComponent = true;
487  }
488  if ($isPathInfo) {
489  $application = Application::getApplication();
490  $contextDepth = $application->getContextDepth();
491 
492  $vars = explode('/', trim($urlInfo, '/'));
493  if (count($vars) > $contextDepth + $offset) {
494  if ($isArrayComponent) {
495  $component = array_slice($vars, $contextDepth + $offset);
496  for ($i=0, $count=count($component); $i<$count; $i++) {
497  $component[$i] = Core::cleanVar(get_magic_quotes_gpc() ? stripslashes($component[$i]) : $component[$i]);
498  }
499  } else {
500  $component = $vars[$contextDepth + $offset];
501  }
502  }
503  } else {
504  $component = Core::_getUserVar($urlInfo, $varName, $userVars);
505  }
506 
507  if ($isArrayComponent) {
508  if (empty($component)) $component = array();
509  elseif (!is_array($component)) $component = array($component);
510  }
511 
512  return $component;
513  }
514 }
515 
516 ?>
serverPHPOS()
Definition: Core.inc.php:111
regexp_match($pattern, $subject)
Definition: String.inc.php:325
html2utf($str)
Definition: String.inc.php:716
serverPHPVersion()
Definition: Core.inc.php:119
getOp($urlInfo, $isPathInfo, $userVars=array())
Definition: Core.inc.php:237
getContextPaths($urlInfo, $isPathInfo, $contextList=null, $contextDepth=null, $userVars=array())
Definition: Core.inc.php:170
utf8_normalize($str)
Definition: String.inc.php:607
_botFileListCacheMiss(&$cache)
Definition: Core.inc.php:420
Class containing system-wide functions.
Definition: Core.inc.php:23
& getContextBaseUrls()
Definition: Config.inc.php:93
getCurrentDate($ts=null)
Definition: Core.inc.php:94
_getUrlComponents($urlInfo, $isPathInfo, $offset, $varName= '', $userVars=array())
Definition: Core.inc.php:481
microtime()
Definition: Core.inc.php:102
isUserAgentBot($userAgent, $botRegexpsFile=USER_AGENTS_FILE)
Definition: Core.inc.php:138
set($key, &$value)
regexp_replace($pattern, $replacement, $subject, $limit=-1)
Definition: String.inc.php:350
_filterBotRegexps(&$regexp)
Definition: Core.inc.php:434
utf8_is_valid($str)
Definition: String.inc.php:545
getVar($section, $key, $default=null)
Definition: Config.inc.php:34
utf8_strip_ascii_ctrl($str)
Definition: String.inc.php:597
cleanFileVar($var)
Definition: Core.inc.php:85
_checkBaseUrl($baseUrl, $url)
Definition: Core.inc.php:379
getBaseDir()
Definition: Core.inc.php:32
utf8_bad_strip($str)
Definition: String.inc.php:576
cleanVar($var)
Definition: Core.inc.php:49
getArgs($urlInfo, $isPathInfo, $userVars=array())
Definition: Core.inc.php:254
removeBaseUrl($url)
Definition: Core.inc.php:266
& get($key, $createIfEmpty=false, $createWithDefault=null)
_getBaseUrlAndPath($url)
Definition: Core.inc.php:320
isWindows()
Definition: Core.inc.php:127
_getUserVar($url, $varName, $userVars=array())
Definition: Core.inc.php:455
getPage($urlInfo, $isPathInfo, $userVars=array())
Definition: Core.inc.php:221