Open Monograph Press  1.1
 All Classes Namespaces Functions Variables Groups Pages
String.inc.php
1 <?php
2 
18 /*
19  * Perl-compatibile regular expression (PCRE) constants:
20  * These are defined application-wide for consistency
21  */
22 
23 /*
24  * RFC-2396 URIs
25  *
26  * Thanks to the PEAR Validation package (Tomas V.V.Cox <cox@idecnet.com>,
27  * Pierre-Alain Joye <pajoye@php.net>, Amir Mohammad Saied <amir@php.net>)
28  *
29  * Originally published under the "New BSD License"
30  * http://www.opensource.org/licenses/bsd-license.php
31  */
32 define('PCRE_URI', '(?:([a-z][-+.a-z0-9]*):)?' . // Scheme
33  '(?://' .
34  '(?:((?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'();:\&=+$,])*)@)?' . // User
35  '(?:((?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)*[a-z](?:[a-z0-9]+)?\.?)' . // Hostname
36  '|([0-9]{1,3}(?:\.[0-9]{1,3}){3}))' . // IP Address
37  '(?::([0-9]*))?)' . // Port
38  '((?:/(?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'():@\&=+$,;])*)*/?)?' . // Path
39  '(?:\?([^#]*))?' . // Query String
40  '(?:\#((?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'();/?:@\&=+$,])*))?'); // Fragment
41 
42 // RFC-2822 email addresses
43 define('PCRE_EMAIL_ADDRESS',
44  '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+' . // One or more atom characters.
45  '(\.' . '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+)*'. // Followed by zero or more dot separated sets of one or more atom characters.
46  '@'. // Followed by an "at" character.
47  '(' . '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{1,63}\.)+'. // Followed by one or max 63 domain characters (dot separated).
48  '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{2,63}' // Must be followed by one set consisting a period of two or max 63 domain characters.
49  );
50 
51 // Two different types of camel case: one for class names and one for method names
52 define ('CAMEL_CASE_HEAD_UP', 0x01);
53 define ('CAMEL_CASE_HEAD_DOWN', 0x02);
54 
55 class String {
60  static function init() {
61  $clientCharset = strtolower_codesafe(Config::getVar('i18n', 'client_charset'));
62 
63  // Check if mbstring is installed (requires PHP >= 4.3.0)
64  if (String::hasMBString()) {
65  // mbstring routines are available
66  define('ENABLE_MBSTRING', true);
67 
68  // Set up required ini settings for mbstring
69  // FIXME Do any other mbstring settings need to be set?
70  mb_internal_encoding($clientCharset);
71  mb_substitute_character('63'); // question mark
72  }
73 
74  // Define modifier to be used in regexp_* routines
75  // FIXME Should non-UTF-8 encodings be supported with mbstring?
76  if ($clientCharset == 'utf-8' && String::hasPCREUTF8()) {
77  define('PCRE_UTF8', 'u');
78  } else {
79  define('PCRE_UTF8', '');
80  }
81  }
82 
89  static function hasMBString() {
90  static $hasMBString;
91  if (isset($hasMBString)) return $hasMBString;
92 
93  // If string overloading is active, it will break many of the
94  // native implementations. mbstring.func_overload must be set
95  // to 0, 1 or 4 in php.ini (string overloading disabled).
96  if (ini_get('mbstring.func_overload') && defined('MB_OVERLOAD_STRING')) {
97  $hasMBString = false;
98  } else {
99  $hasMBString = (
100  extension_loaded('mbstring') &&
101  function_exists('mb_strlen') &&
102  function_exists('mb_strpos') &&
103  function_exists('mb_strrpos') &&
104  function_exists('mb_substr') &&
105  function_exists('mb_strtolower') &&
106  function_exists('mb_strtoupper') &&
107  function_exists('mb_substr_count') &&
108  function_exists('mb_send_mail')
109  );
110  }
111  return $hasMBString;
112  }
113 
118  static function hasPCREUTF8() {
119  // The PCRE_UTF8 modifier is only supported on PHP >= 4.1.0 (*nix) or PHP >= 4.2.3 (win32)
120  // Evil check to see if PCRE_UTF8 is supported
121  if (@preg_match('//u', '')) {
122  return true;
123  } else {
124  return false;
125  }
126  }
127 
128  //
129  // Wrappers for basic string manipulation routines.
130  // See the phputf8 documentation for usage.
131  //
132 
138  static function strlen($string) {
139  if (defined('ENABLE_MBSTRING')) {
140  require_once './lib/pkp/lib/phputf8/mbstring/core.php';
141  } else {
142  require_once './lib/pkp/lib/phputf8/utils/unicode.php';
143  require_once './lib/pkp/lib/phputf8/native/core.php';
144  }
145  return utf8_strlen($string);
146  }
147 
155  static function strpos($haystack, $needle, $offset = 0) {
156  if (defined('ENABLE_MBSTRING')) {
157  require_once './lib/pkp/lib/phputf8/mbstring/core.php';
158  } else {
159  require_once './lib/pkp/lib/phputf8/utils/unicode.php';
160  require_once './lib/pkp/lib/phputf8/native/core.php';
161  }
162  return utf8_strpos($haystack, $needle, $offset);
163  }
164 
171  static function strrpos($haystack, $needle) {
172  if (defined('ENABLE_MBSTRING')) {
173  require_once './lib/pkp/lib/phputf8/mbstring/core.php';
174  } else {
175  require_once './lib/pkp/lib/phputf8/utils/unicode.php';
176  require_once './lib/pkp/lib/phputf8/native/core.php';
177  }
178  return utf8_strrpos($haystack, $needle);
179  }
180 
188  static function substr($string, $start, $length = false) {
189  if (defined('ENABLE_MBSTRING')) {
190  require_once './lib/pkp/lib/phputf8/mbstring/core.php';
191  } else {
192  require_once './lib/pkp/lib/phputf8/utils/unicode.php';
193  require_once './lib/pkp/lib/phputf8/native/core.php';
194  // The default length value for the native implementation
195  // differs
196  if ($length === false) $length = null;
197  }
198  return utf8_substr($string, $start, $length);
199  }
200 
209  static function substr_replace($string, $replacement, $start, $length = null) {
210  if (extension_loaded('mbstring') === true) {
211  $string_length = String::strlen($string);
212 
213  if ($start < 0) {
214  $start = max(0, $string_length + $start);
215  } else if ($start > $string_length) {
216  $start = $string_length;
217  }
218 
219  if ($length < 0) {
220  $length = max(0, $string_length - $start + $length);
221  } else if ((is_null($length) === true) || ($length > $string_length)) {
222  $length = $string_length;
223  }
224 
225  if (($start + $length) > $string_length) {
226  $length = $string_length - $start;
227  }
228 
229  return String::substr($string, 0, $start) . $replacement . String::substr($string, $start + $length, $string_length - $start - $length);
230  }
231  return (is_null($length) === true) ? substr_replace($string, $replacement, $start) : substr_replace($string, $replacement, $start, $length);
232  }
233 
239  static function strtolower($string) {
240  if (defined('ENABLE_MBSTRING')) {
241  require_once './lib/pkp/lib/phputf8/mbstring/core.php';
242  } else {
243  require_once './lib/pkp/lib/phputf8/utils/unicode.php';
244  require_once './lib/pkp/lib/phputf8/native/core.php';
245  }
246  return utf8_strtolower($string);
247  }
248 
254  static function strtoupper($string) {
255  if (defined('ENABLE_MBSTRING')) {
256  require_once './lib/pkp/lib/phputf8/mbstring/core.php';
257  } else {
258  require_once './lib/pkp/lib/phputf8/utils/unicode.php';
259  require_once './lib/pkp/lib/phputf8/native/core.php';
260  }
261  return utf8_strtoupper($string);
262  }
263 
269  static function ucfirst($string) {
270  if (defined('ENABLE_MBSTRING')) {
271  require_once './lib/pkp/lib/phputf8/mbstring/core.php';
272  require_once './lib/pkp/lib/phputf8/ucfirst.php';
273  } else {
274  require_once './lib/pkp/lib/phputf8/utils/unicode.php';
275  require_once './lib/pkp/lib/phputf8/native/core.php';
276  require_once './lib/pkp/lib/phputf8/ucfirst.php';
277  }
278  return utf8_ucfirst($string);
279  }
280 
287  static function substr_count($haystack, $needle) {
288  if (defined('ENABLE_MBSTRING')) {
289  return mb_substr_count($haystack, $needle); // Requires PHP >= 4.3.0
290  } else {
291  return substr_count($haystack, $needle);
292  }
293  }
294 
300  static function encode_mime_header($string) {
301  if (defined('ENABLE_MBSTRING')) {
302  return mb_encode_mimeheader($string, mb_internal_encoding(), 'B', MAIL_EOL);
303  } else {
304  return $string;
305  }
306  }
307 
308  //
309  // Wrappers for PCRE-compatible regular expression routines.
310  // See the php.net documentation for usage.
311  //
312 
319  static function regexp_quote($string, $delimiter = '/') {
320  return preg_quote($string, $delimiter);
321  }
322 
329  static function regexp_grep($pattern, $input) {
330  if (PCRE_UTF8 && !String::utf8_compliant($input)) $input = String::utf8_bad_strip($input);
331  return preg_grep($pattern . PCRE_UTF8, $input);
332  }
333 
340  static function regexp_match($pattern, $subject) {
341  if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
342  return preg_match($pattern . PCRE_UTF8, $subject);
343  }
344 
352  static function regexp_match_get($pattern, $subject, &$matches) {
353  // NOTE: This function was created since PHP < 5.x does not support optional reference parameters
354  if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
355  return preg_match($pattern . PCRE_UTF8, $subject, $matches);
356  }
357 
365  static function regexp_match_all($pattern, $subject, &$matches) {
366  if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
367  return preg_match_all($pattern . PCRE_UTF8, $subject, $matches);
368  }
369 
378  static function regexp_replace($pattern, $replacement, $subject, $limit = -1) {
379  if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
380  return preg_replace($pattern . PCRE_UTF8, $replacement, $subject, $limit);
381  }
382 
391  static function regexp_replace_callback($pattern, $callback, $subject, $limit = -1) {
392  if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
393  return preg_replace_callback($pattern . PCRE_UTF8, $callback, $subject, $limit);
394  }
395 
403  static function regexp_split($pattern, $subject, $limit = -1) {
404  if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
405  return preg_split($pattern . PCRE_UTF8, $subject, $limit);
406  }
407 
413  static function mime_content_type($filename) {
414  if (function_exists('finfo_open')) {
415  $fi =& Registry::get('fileInfo', true, null);
416  if ($fi === null) {
417  $fi = finfo_open(FILEINFO_MIME, Config::getVar('finfo', 'mime_database_path'));
418  }
419  if ($fi !== false) {
420  return strtok(finfo_file($fi, $filename), ' ;');
421  }
422  }
423 
424  // Fall back on an external "file" tool
425  $f = escapeshellarg($filename);
426  $result = trim(`file --brief --mime $f`);
427  // Make sure we just return the mime type.
428  if (($i = strpos($result, ';')) !== false) {
429  $result = trim(substr($result, 0, $i));
430  }
431  return $result;
432  }
433 
434 
441  static function stripUnsafeHtml($input) {
442  require_once('lib/pkp/lib/htmlpurifier/library/HTMLPurifier.path.php');
443  require_once('HTMLPurifier.includes.php');
444  static $purifier;
445  if (!isset($purifier)) {
446  $config = HTMLPurifier_Config::createDefault();
447  $config->set('Core.Encoding', Config::getVar('i18n', 'client_charset'));
448  $config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
449  $config->set('HTML.Allowed', Config::getVar('security', 'allowed_html'));
450  $config->set('Cache.SerializerPath', 'cache');
451  $purifier = new HTMLPurifier($config);
452  }
453  return $purifier->purify($input);
454  }
455 
461  static function html2text($html) {
462  $html = String::regexp_replace('/<[\/]?p>/', "\n", $html);
463  $html = String::regexp_replace('/<li>/', '&bull; ', $html);
464  $html = String::regexp_replace('/<\/li>/', "\n", $html);
465  $html = String::regexp_replace('/<br[ ]?[\/]?>/', "\n", $html);
466  $html = html_entity_decode(strip_tags($html), ENT_COMPAT, 'UTF-8');
467  return $html;
468  }
469 
470  //
471  // Wrappers for UTF-8 validation routines
472  // See the phputf8 documentation for usage.
473  //
474 
480  static function utf8_is_valid($str) {
481  require_once './lib/pkp/lib/phputf8/utils/validation.php';
482  return utf8_is_valid($str);
483  }
484 
491  static function utf8_compliant($str) {
492  require_once './lib/pkp/lib/phputf8/utils/validation.php';
493  return utf8_compliant($str);
494  }
495 
501  static function utf8_bad_find($str) {
502  require_once './lib/pkp/lib/phputf8/utils/bad.php';
503  return utf8_bad_find($str);
504  }
505 
511  static function utf8_bad_strip($str) {
512  require_once './lib/pkp/lib/phputf8/utils/bad.php';
513  return utf8_bad_strip($str);
514  }
515 
522  static function utf8_bad_replace($str, $replace = '?') {
523  require_once './lib/pkp/lib/phputf8/utils/bad.php';
524  return utf8_bad_replace($str, $replace);
525  }
526 
532  static function utf8_strip_ascii_ctrl($str) {
533  require_once './lib/pkp/lib/phputf8/utils/ascii.php';
534  return utf8_strip_ascii_ctrl($str);
535  }
536 
542  static function utf8_normalize($str) {
543  import('lib.pkp.classes.core.Transcoder');
544 
545  if (String::hasMBString()) {
546  // NB: CP-1252 often segfaults; we've left it out here but it will detect as 'ISO-8859-1'
547  $mb_encoding_order = 'UTF-8, UTF-7, ASCII, ISO-8859-1, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP';
548 
549  $detected_encoding = mb_detect_encoding($str, $mb_encoding_order, false);
550 
551  } elseif (function_exists('iconv') && strlen(iconv('CP1252', 'UTF-8', $str)) != strlen(iconv('ISO-8859-1', 'UTF-8', $str))) {
552  // use iconv to detect CP-1252, assuming default ISO-8859-1
553  $detected_encoding = 'CP1252';
554  } else {
555  // assume ISO-8859-1, PHP default
556  $detected_encoding = 'ISO-8859-1';
557  }
558 
559  // transcode CP-1252/ISO-8859-1 into HTML entities; this works because CP-1252 is mapped onto ISO-8859-1
560  if ('ISO-8859-1' == $detected_encoding || 'CP1252' == $detected_encoding) {
561  $trans = new Transcoder('CP1252', 'HTML-ENTITIES');
562  $str = $trans->trans($str);
563  }
564 
565  // transcode from detected encoding to to UTF-8
566  $trans = new Transcoder($detected_encoding, 'UTF-8');
567  $str = $trans->trans($str);
568 
569  return $str;
570  }
571 
577  static function utf8_to_ascii($str) {
578  require_once('./lib/pkp/lib/phputf8/utf8_to_ascii.php');
579  return utf8_to_ascii($str);
580  }
581 
588  static function getHTMLEntities () {
589  // define the conversion table
590  $html_entities = array(
591  "&Aacute;" => "&#193;", "&aacute;" => "&#225;", "&Acirc;" => "&#194;",
592  "&acirc;" => "&#226;", "&acute;" => "&#180;", "&AElig;" => "&#198;",
593  "&aelig;" => "&#230;", "&Agrave;" => "&#192;", "&agrave;" => "&#224;",
594  "&alefsym;" => "&#8501;","&Alpha;" => "&#913;", "&alpha;" => "&#945;",
595  "&amp;" => "&#38;", "&and;" => "&#8743;", "&ang;" => "&#8736;",
596  "&apos;" => "&#39;", "&Aring;" => "&#197;", "&aring;" => "&#229;",
597  "&asymp;" => "&#8776;", "&Atilde;" => "&#195;", "&atilde;" => "&#227;",
598  "&Auml;" => "&#196;", "&auml;" => "&#228;", "&bdquo;" => "&#8222;",
599  "&Beta;" => "&#914;", "&beta;" => "&#946;", "&brvbar;" => "&#166;",
600  "&bull;" => "&#8226;", "&cap;" => "&#8745;", "&Ccedil;" => "&#199;",
601  "&ccedil;" => "&#231;", "&cedil;" => "&#184;", "&cent;" => "&#162;",
602  "&Chi;" => "&#935;", "&chi;" => "&#967;", "&circ;" => "&#94;",
603  "&clubs;" => "&#9827;", "&cong;" => "&#8773;", "&copy;" => "&#169;",
604  "&crarr;" => "&#8629;", "&cup;" => "&#8746;", "&curren;" => "&#164;",
605  "&dagger;" => "&#8224;","&Dagger;" => "&#8225;", "&darr;" => "&#8595;",
606  "&dArr;" => "&#8659;", "&deg;" => "&#176;", "&Delta;" => "&#916;",
607  "&delta;" => "&#948;", "&diams;" => "&#9830;", "&divide;" => "&#247;",
608  "&Eacute;" => "&#201;", "&eacute;" => "&#233;", "&Ecirc;" => "&#202;",
609  "&ecirc;" => "&#234;", "&Egrave;" => "&#200;", "&egrave;" => "&#232;",
610  "&empty;" => "&#8709;", "&emsp;" => "&#8195;", "&ensp;" => "&#8194;",
611  "&Epsilon;" => "&#917;","&epsilon;" => "&#949;","&equiv;" => "&#8801;",
612  "&Eta;" => "&#919;", "&eta;" => "&#951;", "&ETH;" => "&#208;",
613  "&eth;" => "&#240;", "&Euml;" => "&#203;", "&euml;" => "&#235;",
614  "&euro;" => "&#8364;", "&exist;" => "&#8707;", "&fnof;" => "&#402;",
615  "&forall;" => "&#8704;","&frac12;" => "&#189;", "&frac14;" => "&#188;",
616  "&frac34;" => "&#190;", "&frasl;" => "&#8260;", "&Gamma;" => "&#915;",
617  "&gamma;" => "&#947;", "&ge;" => "&#8805;", "&gt;" => "&#62;",
618  "&harr;" => "&#8596;", "&hArr;" => "&#8660;", "&hearts;" => "&#9829;",
619  "&hellip;" => "&#8230;","&Iacute;" => "&#205;", "&iacute;" => "&#237;",
620  "&Icirc;" => "&#206;", "&icirc;" => "&#238;", "&iexcl;" => "&#161;",
621  "&Igrave;" => "&#204;", "&igrave;" => "&#236;", "&image;" => "&#8465;",
622  "&infin;" => "&#8734;", "&int;" => "&#8747;", "&Iota;" => "&#921;",
623  "&iota;" => "&#953;", "&iquest;" => "&#191;", "&isin;" => "&#8712;",
624  "&Iuml;" => "&#207;", "&iuml;" => "&#239;", "&Kappa;" => "&#922;",
625  "&kappa;" => "&#954;", "&Lambda;" => "&#923;", "&lambda;" => "&#955;",
626  "&lang;" => "&#9001;", "&laquo;" => "&#171;", "&larr;" => "&#8592;",
627  "&lArr;" => "&#8656;", "&lceil;" => "&#8968;",
628  "&ldquo;" => "&#8220;", "&le;" => "&#8804;", "&lfloor;" => "&#8970;",
629  "&lowast;" => "&#8727;","&loz;" => "&#9674;", "&lrm;" => "&#8206;",
630  "&lsaquo;" => "&#8249;","&lsquo;" => "&#8216;", "&lt;" => "&#60;",
631  "&macr;" => "&#175;", "&mdash;" => "&#8212;", "&micro;" => "&#181;",
632  "&middot;" => "&#183;", "&minus;" => "&#45;", "&Mu;" => "&#924;",
633  "&mu;" => "&#956;", "&nabla;" => "&#8711;", "&nbsp;" => "&#160;",
634  "&ndash;" => "&#8211;", "&ne;" => "&#8800;", "&ni;" => "&#8715;",
635  "&not;" => "&#172;", "&notin;" => "&#8713;", "&nsub;" => "&#8836;",
636  "&Ntilde;" => "&#209;", "&ntilde;" => "&#241;", "&Nu;" => "&#925;",
637  "&nu;" => "&#957;", "&Oacute;" => "&#211;", "&oacute;" => "&#243;",
638  "&Ocirc;" => "&#212;", "&ocirc;" => "&#244;", "&OElig;" => "&#338;",
639  "&oelig;" => "&#339;", "&Ograve;" => "&#210;", "&ograve;" => "&#242;",
640  "&oline;" => "&#8254;", "&Omega;" => "&#937;", "&omega;" => "&#969;",
641  "&Omicron;" => "&#927;","&omicron;" => "&#959;","&oplus;" => "&#8853;",
642  "&or;" => "&#8744;", "&ordf;" => "&#170;", "&ordm;" => "&#186;",
643  "&Oslash;" => "&#216;", "&oslash;" => "&#248;", "&Otilde;" => "&#213;",
644  "&otilde;" => "&#245;", "&otimes;" => "&#8855;","&Ouml;" => "&#214;",
645  "&ouml;" => "&#246;", "&para;" => "&#182;", "&part;" => "&#8706;",
646  "&permil;" => "&#8240;","&perp;" => "&#8869;", "&Phi;" => "&#934;",
647  "&phi;" => "&#966;", "&Pi;" => "&#928;", "&pi;" => "&#960;",
648  "&piv;" => "&#982;", "&plusmn;" => "&#177;", "&pound;" => "&#163;",
649  "&prime;" => "&#8242;", "&Prime;" => "&#8243;", "&prod;" => "&#8719;",
650  "&prop;" => "&#8733;", "&Psi;" => "&#936;", "&psi;" => "&#968;",
651  "&quot;" => "&#34;", "&radic;" => "&#8730;", "&rang;" => "&#9002;",
652  "&raquo;" => "&#187;", "&rarr;" => "&#8594;", "&rArr;" => "&#8658;",
653  "&rceil;" => "&#8969;", "&rdquo;" => "&#8221;", "&real;" => "&#8476;",
654  "&reg;" => "&#174;", "&rfloor;" => "&#8971;","&Rho;" => "&#929;",
655  "&rho;" => "&#961;", "&rlm;" => "&#8207;", "&rsaquo;" => "&#8250;",
656  "&rsquo;" => "&#8217;", "&sbquo;" => "&#8218;", "&Scaron;" => "&#352;",
657  "&scaron;" => "&#353;", "&sdot;" => "&#8901;", "&sect;" => "&#167;",
658  "&shy;" => "&#173;", "&Sigma;" => "&#931;", "&sigma;" => "&#963;",
659  "&sigmaf;" => "&#962;", "&sim;" => "&#8764;", "&spades;" => "&#9824;",
660  "&sub;" => "&#8834;", "&sube;" => "&#8838;", "&sum;" => "&#8721;",
661  "&sup1;" => "&#185;", "&sup2;" => "&#178;", "&sup3;" => "&#179;",
662  "&sup;" => "&#8835;", "&supe;" => "&#8839;", "&szlig;" => "&#223;",
663  "&Tau;" => "&#932;", "&tau;" => "&#964;", "&there4;" => "&#8756;",
664  "&Theta;" => "&#920;", "&theta;" => "&#952;", "&thetasym;" => "&#977;",
665  "&thinsp;" => "&#8201;","&THORN;" => "&#222;", "&thorn;" => "&#254;",
666  "&tilde;" => "&#126;", "&times;" => "&#215;", "&trade;" => "&#8482;",
667  "&Uacute;" => "&#218;", "&uacute;" => "&#250;", "&uarr;" => "&#8593;",
668  "&uArr;" => "&#8657;", "&Ucirc;" => "&#219;", "&ucirc;" => "&#251;",
669  "&Ugrave;" => "&#217;", "&ugrave;" => "&#249;", "&uml;" => "&#168;",
670  "&upsih;" => "&#978;", "&Upsilon;" => "&#933;","&upsilon;" => "&#965;",
671  "&Uuml;" => "&#220;", "&uuml;" => "&#252;", "&weierp;" => "&#8472;",
672  "&Xi;" => "&#926;", "&xi;" => "&#958;", "&Yacute;" => "&#221;",
673  "&yacute;" => "&#253;", "&yen;" => "&#165;", "&yuml;" => "&#255;",
674  "&Yuml;" => "&#376;", "&Zeta;" => "&#918;", "&zeta;" => "&#950;",
675  "&zwj;" => "&#8205;", "&zwnj;" => "&#8204;"
676  );
677 
678  return $html_entities;
679  }
680 
686  static function trimPunctuation($string) {
687  return trim($string, ' ,.;:!?&()[]\\/');
688  }
689 
695  static function titleCase($title) {
696  AppLocale::requireComponents(LOCALE_COMPONENT_PKP_COMMON);
697  $smallWords = explode(' ', __('common.titleSmallWords'));
698 
699  $words = explode(' ', $title);
700  foreach ($words as $key => $word) {
701  if ($key == 0 or !in_array(String::strtolower($word), $smallWords)) {
702  $words[$key] = ucfirst(String::strtolower($word));
703  } else {
704  $words[$key] = String::strtolower($word);
705  }
706  }
707 
708  $newTitle = implode(' ', $words);
709  return $newTitle;
710  }
711 
718  static function concatTitleFields($fields) {
719  // Set the characters that will avoid the use of
720  // a semicolon between title and subtitle.
721  $avoidColonChars = array('?', '!', '/', '&');
722 
723  // if the first field ends in a character in $avoidColonChars,
724  // concat with a space, otherwise use a colon.
725  // Check for any of these characters in
726  // the last position of current full title value.
727  if (in_array(substr($fields[0], -1, 1), $avoidColonChars)) {
728  $fullTitle = join(' ', $fields);
729  } else {
730  $fullTitle = join(': ', $fields);
731  }
732 
733  return $fullTitle;
734  }
735 
744  static function iterativeExplode($delimiters, $input) {
745  // Run through the delimiters and try them out
746  // one by one.
747  foreach($delimiters as $delimiter) {
748  if (strstr($input, $delimiter) !== false) {
749  return explode($delimiter, $input);
750  }
751  }
752 
753  // If none of the delimiters works then return
754  // the original string as an array.
755  return (array($input));
756  }
757 
758 
759 
767  static function camelize($string, $type = CAMEL_CASE_HEAD_UP) {
768  assert($type == CAMEL_CASE_HEAD_UP || $type == CAMEL_CASE_HEAD_DOWN);
769 
770  // Transform "handler-class" to "HandlerClass" and "my-op" to "MyOp"
771  $string = str_replace(' ', '', ucwords(str_replace('-', ' ', $string)));
772 
773  // Transform "MyOp" to "myOp"
774  if ($type == CAMEL_CASE_HEAD_DOWN) {
775  // lcfirst() is PHP>5.3, so use workaround
776  $string = strtolower(substr($string, 0, 1)).substr($string, 1);
777  }
778 
779  return $string;
780  }
781 
788  static function uncamelize($string) {
789  assert(!empty($string));
790 
791  // Transform "myOp" to "MyOp"
792  $string = ucfirst($string);
793 
794  // Insert hyphens between words and return the string in lowercase
795  $words = array();
796  String::regexp_match_all('/[A-Z][a-z0-9]*/', $string, $words);
797  assert(isset($words[0]) && !empty($words[0]) && strlen(implode('', $words[0])) == strlen($string));
798  return strtolower(implode('-', $words[0]));
799  }
800 
825  static function diff($originalString, $editedString) {
826  // Split strings into character arrays (multi-byte compatible).
827  foreach(array('originalStringCharacters' => $originalString, 'editedStringCharacters' => $editedString) as $characterArrayName => $string) {
828  ${$characterArrayName} = array();
829  String::regexp_match_all('/./', $string, ${$characterArrayName});
830  if (isset(${$characterArrayName}[0])) {
831  ${$characterArrayName} = ${$characterArrayName}[0];
832  }
833  }
834 
835  // Determine the length of the strings.
836  $originalStringLength = count($originalStringCharacters);
837  $editedStringLength = count($editedStringCharacters);
838 
839  // Is there anything to compare?
840  if ($originalStringLength == 0 && $editedStringLength == 0) return array();
841 
842  // Is the original string empty?
843  if ($originalStringLength == 0) {
844  // Return the edited string as addition.
845  return array(array(1 => $editedString));
846  }
847 
848  // Is the edited string empty?
849  if ($editedStringLength == 0) {
850  // Return the original string as deletion.
851  return array(array(-1 => $originalString));
852  }
853 
854  // Initialize the local indices:
855  // 1) Create a character index for the edited string.
856  $characterIndex = array();
857  for($characterPosition = 0; $characterPosition < $editedStringLength; $characterPosition++) {
858  $characterIndex[$editedStringCharacters[$characterPosition]][] = $characterPosition;
859  }
860  // 2) Initialize the substring and the length index.
861  $substringIndex = $lengthIndex = array();
862 
863  // Iterate over the original string to identify
864  // the largest common string.
865  for($originalPosition = 0; $originalPosition < $originalStringLength; $originalPosition++) {
866  // Find all occurrences of the original character
867  // in the target string.
868  $comparedCharacter = $originalStringCharacters[$originalPosition];
869 
870  // Do we have a commonality between the original string
871  // and the edited string?
872  if (isset($characterIndex[$comparedCharacter])) {
873  // Loop over all commonalities.
874  foreach($characterIndex[$comparedCharacter] as $editedPosition) {
875  // Calculate the current and the preceding position
876  // ids for indexation.
877  $currentPosition = $originalPosition . '-' . $editedPosition;
878  $previousPosition = ($originalPosition-1) . '-' . ($editedPosition-1);
879 
880  // Does the occurrence in the target string continue
881  // an existing common substring or does it start
882  // a new one?
883  if (isset($substringIndex[$previousPosition])) {
884  // This is a continuation of an existing common
885  // substring...
886  $newSubstring = $substringIndex[$previousPosition].$comparedCharacter;
887  $newSubstringLength = String::strlen($newSubstring);
888 
889  // Move the substring in the substring index.
890  $substringIndex[$currentPosition] = $newSubstring;
891  unset($substringIndex[$previousPosition]);
892 
893  // Move the substring in the length index.
894  $lengthIndex[$newSubstringLength][$currentPosition] = $newSubstring;
895  unset($lengthIndex[$newSubstringLength - 1][$previousPosition]);
896  } else {
897  // Start a new common substring...
898  // Add the substring to the substring index.
899  $substringIndex[$currentPosition] = $comparedCharacter;
900 
901  // Add the substring to the length index.
902  $lengthIndex[1][$currentPosition] = $comparedCharacter;
903  }
904  }
905  }
906  }
907 
908  // If we have no commonalities at all then mark the original
909  // string as deleted and the edited string as added and
910  // return.
911  if (empty($lengthIndex)) {
912  return array(
913  array( -1 => $originalString ),
914  array( 1 => $editedString )
915  );
916  }
917 
918  // Pop the largest common substrings from the length index.
919  end($lengthIndex);
920  $largestSubstringLength = key($lengthIndex);
921 
922  // Take the first common substring if we have more than
923  // one substring with the same length.
924  // FIXME: Find a better heuristic for this decision.
925  reset($lengthIndex[$largestSubstringLength]);
926  $largestSubstringPosition = key($lengthIndex[$largestSubstringLength]);
927  list($largestSubstringEndOriginal, $largestSubstringEndEdited) = explode('-', $largestSubstringPosition);
928  $largestSubstring = $lengthIndex[$largestSubstringLength][$largestSubstringPosition];
929 
930  // Add the largest common substring to the result set
931  $diffResult = array(array( 0 => $largestSubstring ));
932 
933  // Prepend the diff of the substrings before the common substring
934  // to the result diff (by recursion).
935  $precedingSubstringOriginal = String::substr($originalString, 0, $largestSubstringEndOriginal-$largestSubstringLength+1);
936  $precedingSubstringEdited = String::substr($editedString, 0, $largestSubstringEndEdited-$largestSubstringLength+1);
937  $diffResult = array_merge(String::diff($precedingSubstringOriginal, $precedingSubstringEdited), $diffResult);
938 
939  // Append the diff of the substrings after thr common substring
940  // to the result diff (by recursion).
941  $succeedingSubstringOriginal = String::substr($originalString, $largestSubstringEndOriginal+1);
942  $succeedingSubstringEdited = String::substr($editedString, $largestSubstringEndEdited+1);
943  $diffResult = array_merge($diffResult, String::diff($succeedingSubstringOriginal, $succeedingSubstringEdited));
944 
945  // Return the array representing the diff.
946  return $diffResult;
947  }
948 
954  static function enumerateAlphabetically($steps) {
955  return chr(ord('A') + $steps);
956  }
957 }
958 
959 ?>
static strlen($string)
Definition: String.inc.php:137
static utf8_to_ascii($str)
Definition: String.inc.php:576
static utf8_bad_strip($str)
Definition: String.inc.php:510
static ucfirst($string)
Definition: String.inc.php:268
static strrpos($haystack, $needle)
Definition: String.inc.php:170
static substr($string, $start, $length=false)
Definition: String.inc.php:187
static regexp_replace($pattern, $replacement, $subject, $limit=-1)
Definition: String.inc.php:377
static trimPunctuation($string)
Definition: String.inc.php:685
static enumerateAlphabetically($steps)
Definition: String.inc.php:953
static iterativeExplode($delimiters, $input)
Definition: String.inc.php:743
static init()
Definition: String.inc.php:59
static requireComponents()
static utf8_bad_replace($str, $replace= '?')
Definition: String.inc.php:521
static getHTMLEntities()
Definition: String.inc.php:587
static strpos($haystack, $needle, $offset=0)
Definition: String.inc.php:154
static utf8_normalize($str)
Definition: String.inc.php:541
static regexp_grep($pattern, $input)
Definition: String.inc.php:328
static regexp_quote($string, $delimiter= '/')
Definition: String.inc.php:318
static regexp_replace_callback($pattern, $callback, $subject, $limit=-1)
Definition: String.inc.php:390
static regexp_match($pattern, $subject)
Definition: String.inc.php:339
static utf8_is_valid($str)
Definition: String.inc.php:479
String manipulation wrapper class.
Definition: String.inc.php:54
static strtoupper($string)
Definition: String.inc.php:253
static utf8_bad_find($str)
Definition: String.inc.php:500
static stripUnsafeHtml($input)
Definition: String.inc.php:440
static utf8_strip_ascii_ctrl($str)
Definition: String.inc.php:531
static getVar($section, $key, $default=null)
Definition: Config.inc.php:35
static titleCase($title)
Definition: String.inc.php:694
static regexp_split($pattern, $subject, $limit=-1)
Definition: String.inc.php:402
static mime_content_type($filename)
Definition: String.inc.php:412
static hasMBString()
Definition: String.inc.php:88
static regexp_match_get($pattern, $subject, &$matches)
Definition: String.inc.php:351
static diff($originalString, $editedString)
Definition: String.inc.php:824
static strtolower($string)
Definition: String.inc.php:238
static uncamelize($string)
Definition: String.inc.php:787
static camelize($string, $type=CAMEL_CASE_HEAD_UP)
Definition: String.inc.php:766
static concatTitleFields($fields)
Definition: String.inc.php:717
static & get($key, $createIfEmpty=false, $createWithDefault=null)
static substr_replace($string, $replacement, $start, $length=null)
Definition: String.inc.php:208
Multi-class transcoder; uses mbstring and iconv if available, otherwise falls back to built-in classe...
static substr_count($haystack, $needle)
Definition: String.inc.php:286
static regexp_match_all($pattern, $subject, &$matches)
Definition: String.inc.php:364
static html2text($html)
Definition: String.inc.php:460
static hasPCREUTF8()
Definition: String.inc.php:117
static utf8_compliant($str)
Definition: String.inc.php:490
static encode_mime_header($string)
Definition: String.inc.php:299