Open Journal Systems  3.3.0
StringHelper.php
1 <?php
3 /*
4  * citeproc-php
5  *
6  * @link http://github.com/seboettg/citeproc-php for the source repository
7  * @copyright Copyright (c) 2016 Sebastian Böttger.
8  * @license https://opensource.org/licenses/MIT
9  */
10 
11 namespace Seboettg\CiteProc\Util;
12 
15 
23 {
24  const PREPOSITIONS = [
25  'on', 'in', 'at', 'since', 'for', 'ago', 'before', 'to', 'past', 'till', 'until', 'by', 'under', 'below',
26  'over', 'above', 'across', 'through', 'into', 'towards', 'onto', 'from', 'of', 'off', 'about', 'via'
27  ];
28 
29  const ARTICLES = [
30  'a', 'an', 'the'
31  ];
32 
33  const ADVERBS = [
34  'yet', 'so', 'just', 'only'
35  ];
36 
37  const CONJUNCTIONS = [
38  'nor', 'so', 'and', 'or'
39  ];
40 
41  const ADJECTIVES = [
42  'down', 'up'
43  ];
44 
45  const ISO_ENCODINGS = [
46  'ISO-8859-1',
47  'ISO-8859-2',
48  'ISO-8859-3',
49  'ISO-8859-4',
50  'ISO-8859-5',
51  'ISO-8859-6',
52  'ISO-8859-7',
53  'ISO-8859-8',
54  'ISO-8859-9',
55  'ISO-8859-10',
56  'ISO-8859-11',
57  'ISO-8859-13',
58  'ISO-8859-14',
59  'ISO-8859-15',
60  'ISO-8859-16'
61  ];
62 
66  const OPENING_QUOTE = "“";
67 
71  const CLOSING_QUOTE = "”";
72 
77  public static function capitalizeAll($text)
78  {
79  $wordArray = explode(" ", $text);
80 
81  array_walk($wordArray, function (&$word) {
82  $word = ucfirst($word);
83  });
84 
85  return implode(" ", $wordArray);
86  }
87 
92  public static function capitalizeForTitle($titleString)
93  {
94  if (preg_match('/(.+[^<>][\.:\/;\?\!]\s?)([a-z])(.+)/', $titleString, $match)) {
95  $titleString = $match[1].StringHelper::mb_ucfirst($match[2]).$match[3];
96  }
97 
98  $wordArray = explode(" ", $titleString);
99 
100  array_walk($wordArray, function (&$word) {
101  $words = explode("-", $word);
102  if (count($words) > 1) {
103  array_walk($words, function (&$w) {
105  });
106  $word = implode("-", $words);
107  }
108  $word = StringHelper::keepLowerCase($word) ? $word : StringHelper::mb_ucfirst($word);
109  });
110 
111  return implode(" ", $wordArray);
112  }
113 
118  public static function keepLowerCase($word)
119  {
120  // keep lower case if the first char is not an utf-8 letter
121  return in_array($word, self::PREPOSITIONS) ||
122  in_array($word, self::ARTICLES) ||
123  in_array($word, self::CONJUNCTIONS) ||
124  in_array($word, self::ADJECTIVES) ||
125  (bool) preg_match("/[^\p{L}].+/", $word);
126  }
127 
133  public static function mb_ucfirst($string, $encoding = 'UTF-8')
134  {
135  $strlen = mb_strlen($string, $encoding);
136  $firstChar = mb_substr($string, 0, 1, $encoding);
137  $then = mb_substr($string, 1, $strlen - 1, $encoding);
138 
140  $encoding = Mbstring::mb_detect_encoding($firstChar, self::ISO_ENCODINGS, true);
141  return in_array($encoding, self::ISO_ENCODINGS) ?
142  Mbstring::mb_strtoupper($firstChar, $encoding).$then : $firstChar.$then;
143  }
144 
145  public static function mb_strrev($string)
146  {
147  $result = '';
148  for ($i = mb_strlen($string); $i >= 0; --$i) {
149  $result .= mb_substr($string, $i, 1);
150  }
151  return $result;
152  }
153 
159  public static function implodeAndPreventConsecutiveChars($delimiter, $arrayOfStrings)
160  {
161  $delim = trim($delimiter);
162  if (!empty($delim)) {
163  foreach ($arrayOfStrings as $key => $textPart) {
164  $pos = mb_strpos(StringHelper::mb_strrev($textPart), StringHelper::mb_strrev($delim));
165  if ($pos === 0) {
166  $length = mb_strlen($textPart) - mb_strlen($delim);
167  $textPart = mb_substr($textPart, 0, $length);
168  $arrayOfStrings[$key] = $textPart;
169  }
170  }
171  }
172  return implode($delimiter, $arrayOfStrings);
173  }
174 
180  public static function initializeBySpaceOrHyphen($string, $initializeSign)
181  {
182  $initializeWithHyphen = CiteProc::getContext()->getGlobalOptions()->isInitializeWithHyphen();
183  $res = "";
184  $exploded = explode("-", $string);
185  $i = 0;
186  foreach ($exploded as $explode) {
187  $spaceExploded = explode(" ", $explode);
188  foreach ($spaceExploded as $givenPart) {
189  $firstLetter = mb_substr($givenPart, 0, 1, "UTF-8");
190  if (StringHelper::isLatinString($firstLetter)) {
191  $res .= ctype_upper($firstLetter) ? $firstLetter.$initializeSign : " ".$givenPart." ";
192  } else {
193  $res .= $firstLetter.$initializeSign;
194  }
195  }
196  if ($i < count($exploded) - 1 && $initializeWithHyphen) {
197  $res = rtrim($res)."-";
198  }
199  ++$i;
200  }
201  return $res;
202  }
203 
208  public static function camelCase2Hyphen($string)
209  {
210  $hyphenated = preg_replace("/([A-Z])/", "-$1", $string);
211  $hyphenated = substr($hyphenated, 0, 1) === "-" ? substr($hyphenated, 1) : $hyphenated;
212  return mb_strtolower($hyphenated);
213  }
214 
219  public static function checkLowerCaseString($string)
220  {
221  return ($string === mb_strtolower($string));
222  }
223 
228  public static function checkUpperCaseString($string)
229  {
230  return ($string === mb_strtoupper($string));
231  }
232 
237  public static function clearApostrophes($string)
238  {
239  return preg_replace("/\'/", "’", $string);
240  }
241 
252  public static function replaceOuterQuotes(
253  $text,
254  $outerOpenQuote,
255  $outerCloseQuote,
256  $innerOpenQuote,
257  $innerCloseQuote
258  ) {
259  if (preg_match("/(.*)$outerOpenQuote(.+)$outerCloseQuote(.*)/u", $text, $match)) {
260  return $match[1].$innerOpenQuote.$match[2].$innerCloseQuote.$match[3];
261  }
262  return $text;
263  }
264 
269  public static function isLatinString($string)
270  {
271  return boolval(preg_match_all("/^[\p{Latin}\p{Common}]+$/u", $string));
272  //return !$noLatin;
273  }
274 
279  public static function isCyrillicString($string)
280  {
281  return boolval(preg_match("/^[\p{Cyrillic}\p{Common}]+$/u", $string));
282  }
283 
288  public static function isAsianString($string)
289  {
290  return boolval(preg_match("/^[\p{Han}\s\p{P}]*$/u", $string));
291  }
292 
298  public static function removeBrackets($datePart)
299  {
300  return str_replace(["[", "]", "(", ")", "{", "}"], "", $datePart);
301  }
302 }
Seboettg\CiteProc\Util\StringHelper\capitalizeAll
static capitalizeAll($text)
Definition: StringHelper.php:77
Seboettg\CiteProc\Util\StringHelper\isLatinString
static isLatinString($string)
Definition: StringHelper.php:269
Seboettg\CiteProc\Util\StringHelper\CLOSING_QUOTE
const CLOSING_QUOTE
Definition: StringHelper.php:71
Seboettg\CiteProc\Util\StringHelper\implodeAndPreventConsecutiveChars
static implodeAndPreventConsecutiveChars($delimiter, $arrayOfStrings)
Definition: StringHelper.php:159
Seboettg\CiteProc\Util\StringHelper\isCyrillicString
static isCyrillicString($string)
Definition: StringHelper.php:279
Seboettg\CiteProc\Util\StringHelper\ADVERBS
const ADVERBS
Definition: StringHelper.php:33
Seboettg\CiteProc\Util\StringHelper\PREPOSITIONS
const PREPOSITIONS
Definition: StringHelper.php:24
Seboettg\CiteProc\Util\StringHelper\keepLowerCase
static keepLowerCase($word)
Definition: StringHelper.php:118
Seboettg\CiteProc\Util\StringHelper\ARTICLES
const ARTICLES
Definition: StringHelper.php:29
Seboettg\CiteProc\Util\StringHelper\mb_ucfirst
static mb_ucfirst($string, $encoding='UTF-8')
Definition: StringHelper.php:133
Seboettg\CiteProc\Util\StringHelper\mb_strrev
static mb_strrev($string)
Definition: StringHelper.php:145
Seboettg\CiteProc\Util\StringHelper\replaceOuterQuotes
static replaceOuterQuotes( $text, $outerOpenQuote, $outerCloseQuote, $innerOpenQuote, $innerCloseQuote)
Definition: StringHelper.php:252
Seboettg\CiteProc\Util\StringHelper
Definition: StringHelper.php:22
Seboettg\CiteProc\CiteProc
Definition: CiteProc.php:32
Seboettg\CiteProc\Util\StringHelper\checkLowerCaseString
static checkLowerCaseString($string)
Definition: StringHelper.php:219
Symfony\Polyfill\Mbstring\Mbstring\mb_strtoupper
static mb_strtoupper($s, $encoding=null)
Definition: generic/citationStyleLanguage/lib/vendor/symfony/polyfill-mbstring/Mbstring.php:531
Seboettg\CiteProc\Util
Definition: CiteProcHelper.php:10
Symfony\Polyfill\Mbstring\Mbstring
Definition: generic/citationStyleLanguage/lib/vendor/symfony/polyfill-mbstring/Mbstring.php:67
Seboettg\CiteProc\Util\StringHelper\removeBrackets
static removeBrackets($datePart)
Definition: StringHelper.php:298
Seboettg\CiteProc\Util\StringHelper\OPENING_QUOTE
const OPENING_QUOTE
Definition: StringHelper.php:66
Seboettg\CiteProc\CiteProc\getContext
static getContext()
Definition: CiteProc.php:45
Seboettg\CiteProc\Util\StringHelper\capitalizeForTitle
static capitalizeForTitle($titleString)
Definition: StringHelper.php:92
Seboettg\CiteProc\Util\StringHelper\clearApostrophes
static clearApostrophes($string)
Definition: StringHelper.php:237
Seboettg\CiteProc\Util\StringHelper\CONJUNCTIONS
const CONJUNCTIONS
Definition: StringHelper.php:37
Seboettg\CiteProc\Util\StringHelper\initializeBySpaceOrHyphen
static initializeBySpaceOrHyphen($string, $initializeSign)
Definition: StringHelper.php:180
Seboettg\CiteProc\Util\StringHelper\ISO_ENCODINGS
const ISO_ENCODINGS
Definition: StringHelper.php:45
Seboettg\CiteProc\Util\StringHelper\camelCase2Hyphen
static camelCase2Hyphen($string)
Definition: StringHelper.php:208
Seboettg\CiteProc\Util\StringHelper\checkUpperCaseString
static checkUpperCaseString($string)
Definition: StringHelper.php:228
Seboettg\CiteProc\Util\StringHelper\isAsianString
static isAsianString($string)
Definition: StringHelper.php:288
Seboettg\CiteProc\Util\StringHelper\ADJECTIVES
const ADJECTIVES
Definition: StringHelper.php:41
Symfony\Polyfill\Mbstring\Mbstring\mb_detect_encoding
static mb_detect_encoding($str, $encodingList=null, $strict=false)
Definition: generic/citationStyleLanguage/lib/vendor/symfony/polyfill-mbstring/Mbstring.php:411