We are moving to Git Issues for bug tracking in future releases. During transition, content will be in both tools. If you'd like to file a new bug, please create an issue.

View | Details | Raw Unified | Return to bug 3089 | Differences between
and this patch

Collapse All | Expand All

(-)ocs2/classes/form/Form.inc.php (-19 / +2 lines)
 Lines 10-16    Link Here 
10
 *
10
 *
11
 * Class defining basic operations for handling HTML forms.
11
 * Class defining basic operations for handling HTML forms.
12
 *
12
 *
13
 * $Id: Form.inc.php,v 1.6 2007/05/11 03:20:45 mj Exp $
13
 * $Id: Form.inc.php,v 1.10 2007/06/08 15:59:59 mj Exp $
14
 */
14
 */
15
15
16
import('form.FormError');
16
import('form.FormError');
 Lines 88-112   class Form { Link Here 
88
	 */
88
	 */
89
	function setData($key, $value) {
89
	function setData($key, $value) {
90
90
91
		if (is_string($value)) {
91
		if (is_string($value)) $value = Core::cleanVar($value);
92
92
93
			// check for Windows-1252 encoding, and transliterate if necessary	
94
			if ($value === utf8_decode($value) && $value !== utf8_encode($value)) {
95
				// string is cp1252
96
				// transliterate cp1252->utf8 to work in utf-8 
97
				// utf8_decode to work in latin-1 (information may be lost)
98
				import('core.Transcoder');
99
				$trans =& new Transcoder('CP1252', 'UTF-8');
100
				$value = $trans->trans($value);
101
102
			} elseif ($value !== utf8_decode($value) && $value !== utf8_encode($value)) {
103
				// string is not within utf-8(?)
104
				// normalize to ASCII (lowest common encoding) - information will be lost
105
				import('core.Transcoder');
106
				$trans =& new Transcoder('UTF-8', 'ASCII');
107
				$value = $trans->trans($value);
108
			}
109
		}
110
		$this->_data[$key] = $value;
93
		$this->_data[$key] = $value;
111
	}
94
	}
(-)classes/core/Core.inc.php (-2 / +18 lines)
 Lines 37-44    Link Here 
37
	 * @param $stripHtml boolean optional, will encode HTML if set to true
37
	 * @param $stripHtml boolean optional, will encode HTML if set to true
38
	 * @return string
38
	 * @return string
39
	 */
39
	 */
40
	function cleanVar($var, $stripHtml = false) {
40
	function cleanVar($var) {
41
		return $stripHtml ? htmlspecialchars(trim($var), ENT_NOQUOTES, Config::getVar('i18n', 'client_charset')) : trim($var);
41
		// normalize existing HTML special characters to ASCII
42
		$var = strtr(trim($var), array("&amp;" => "&", "&quot" => '"', "&lt;" => "<", "&gt;" => ">"));
43
44
		// only process strings that are not UTF-8 already
45
		if ( !String::isUTF8($var) && Config::getVar('i18n', 'charset_normalization') == 'On' ) {
46
			import('core.Transcoder');
47
48
			// convert string to HTML entities (numeric and named)
49
			$trans =& new Transcoder('CP1252', 'HTML-ENTITIES');
50
			$var = $trans->trans($var);
51
52
			// convert UTF-8 entities back to UTF-8 characters
53
			$trans =& new Transcoder('HTML-ENTITIES', 'UTF-8');
54
			$var = $trans->trans($var);
55
		}		
56
57
		return $var;
42
	}
58
	}
43
	/**
59
	/**
(-)classes/core/String.inc.php (-30 / +267 lines)
 Lines 287-294    Link Here 
287
	}
287
	}
288
	/**
288
	/**
289
	 * Detect whether a string contains non-ascii multibyte sequences in the UTF-8 range
290
	 * Does not require any multibyte PHP libraries
291
	 * @param $input string input string
292
	 * @return boolean
293
	 */
294
	function isUTF8 ($str) {
295
		// From http://w3.org/International/questions/qa-forms-utf-8.html
296
		return preg_match('%(?:
297
				[\xC2-\xDF][\x80-\xBF]								# non-overlong 2-byte
298
				|\xE0[\xA0-\xBF][\x80-\xBF]					# excluding overlongs
299
				|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}		# straight 3-byte
300
				|\xED[\x80-\x9F][\x80-\xBF]					# excluding surrogates
301
				|\xF0[\x90-\xBF][\x80-\xBF]{2}				# planes 1-3
302
				|[\xF1-\xF3][\x80-\xBF]{3}						# planes 4-15
303
				|\xF4[\x80-\x8F][\x80-\xBF]{2}				# plane 16
304
				)+%xs', $str);
305
	}
306
307
	/**
308
	 * Returns the UTF-8 string corresponding to the unicode value
309
	 * Does not require any multibyte PHP libraries
310
	 * (from php.net, courtesy - romans@void.lv)
311
	 * @param $input string input string
312
	 * @return boolean
313
	 */
314
	function code2utf ($num) {
315
		if ($num < 128) return chr($num);
316
		if ($num < 2048) return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
317
		if ($num < 65536) return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
318
		if ($num < 2097152) return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
319
		return '';
320
	}
321
322
	/**
289
	 * Convert UTF-8 encoded characters in a string to escaped HTML entities
323
	 * Convert UTF-8 encoded characters in a string to escaped HTML entities
290
	 * This is a helper function for transcoding HTML (perhaps move to core?)
324
	 * This is a helper function for transcoding into HTML
291
	 * @param $input string input string
325
	 * @param $input string input string
292
	 * @return string
326
	 * @return string
293
	 */
327
	 */
 Lines 296-335    Link Here 
296
		$ret = "";
330
		$ret = "";
297
		$max = strlen($str);
331
		$max = strlen($str);
298
		$last = 0;  // keeps the index of the last regular character
332
		$last = 0;  // keeps the index of the last regular character
299
		
333
300
   for ($i=0; $i<$max; $i++) {
334
		for ($i=0; $i<$max; $i++) {
301
       $c = $str{$i};
335
			$c = $str{$i};
302
       $c1 = ord($c);
336
			$c1 = ord($c);
303
       if ($c1>>5 == 6) {  // 110x xxxx, 110 prefix for 2 bytes unicode
337
			if ($c1>>5 == 6) {										// 110x xxxx, 110 prefix for 2 bytes unicode
304
           $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed
338
				$ret .= substr($str, $last, $i-$last);			// append all the regular characters we've passed
305
           $c1 &= 31; // remove the 3 bit two bytes prefix
339
				$c1 &= 31;													// remove the 3 bit two bytes prefix
306
           $c2 = ord($str{++$i}); // the next byte
340
				$c2 = ord($str{++$i});								// the next byte
307
           $c2 &= 63;  // remove the 2 bit trailing byte prefix
341
				$c2 &= 63;													// remove the 2 bit trailing byte prefix
308
           $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2
342
				$c2 |= (($c1 & 3) << 6);							// last 2 bits of c1 become first 2 of c2
309
           $c1 >>= 2; // c1 shifts 2 to the right
343
				$c1 >>= 2;													// c1 shifts 2 to the right
310
           $ret .= "&#" . ($c1 * 0x100 + $c2) . ";"; // this is the fastest string concatenation
344
				$ret .= "&#" . ($c1 * 0x100 + $c2) . ";";	// this is the fastest string concatenation
311
           $last = $i+1;     
345
				$last = $i+1;     
312
       }
346
			}
313
       elseif ($c1>>4 == 14) {  // 1110 xxxx, 110 prefix for 3 bytes unicode
347
			elseif ($c1>>4 == 14) { 								// 1110 xxxx, 110 prefix for 3 bytes unicode
314
           $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed
348
				$ret .= substr($str, $last, $i-$last);			// append all the regular characters we've passed
315
           $c2 = ord($str{++$i}); // the next byte
349
				$c2 = ord($str{++$i}); 								// the next byte
316
           $c3 = ord($str{++$i}); // the third byte
350
				$c3 = ord($str{++$i}); 								// the third byte
317
           $c1 &= 15; // remove the 4 bit three bytes prefix
351
				$c1 &= 15; 												// remove the 4 bit three bytes prefix
318
           $c2 &= 63;  // remove the 2 bit trailing byte prefix
352
				$c2 &= 63; 												// remove the 2 bit trailing byte prefix
319
           $c3 &= 63;  // remove the 2 bit trailing byte prefix
353
				$c3 &= 63; 												// remove the 2 bit trailing byte prefix
320
           $c3 |= (($c2 & 3) << 6); // last 2 bits of c2 become first 2 of c3
354
				$c3 |= (($c2 & 3) << 6);							// last 2 bits of c2 become first 2 of c3
321
           $c2 >>=2; //c2 shifts 2 to the right
355
				$c2 >>=2; 													//c2 shifts 2 to the right
322
           $c2 |= (($c1 & 15) << 4); // last 4 bits of c1 become first 4 of c2
356
				$c2 |= (($c1 & 15) << 4);							// last 4 bits of c1 become first 4 of c2
323
           $c1 >>= 4; // c1 shifts 4 to the right
357
				$c1 >>= 4; 												// c1 shifts 4 to the right
324
           $ret .= '&#' . (($c1 * 0x10000) + ($c2 * 0x100) + $c3) . ';'; // this is the fastest string concatenation
358
				$ret .= '&#' . (($c1 * 0x10000) + ($c2 * 0x100) + $c3) . ';'; // this is the fastest string concatenation
325
           $last = $i+1;     
359
				$last = $i+1;     
326
       }
360
			}
327
   }
361
		}
328
		$str=$ret . substr($str, $last, $i); // append the last batch of regular characters
362
		$str=$ret . substr($str, $last, $i); // append the last batch of regular characters
329
		return $str;   
363
		return $str;   
330
	}
364
	}
365
	/**
366
	 * Convert numeric HTML entities in a string to UTF-8 encoded characters 
367
	 * This is a native alternative to the buggy html_entity_decode() using UTF8
368
	 * @param $input string input string
369
	 * @return string
370
	 */
371
	function html2utf($str) {
372
		// convert named entities to numeric entities
373
		$str = strtr($str, String::getHTMLEntities());
374
375
		// use PCRE-aware replace function to replace numeric entities
376
		$str = String::regexp_replace('~&#x([0-9a-f]+);~ei', 'String::code2utf(hexdec("\\1"))', $str);
377
		$str = String::regexp_replace('~&#([0-9]+);~e', 'String::code2utf(\\1)', $str);
378
379
		return $str;
380
	 }
381
382
	/**
383
	 * Convert UTF-8 numeric entities in a string to ASCII values
384
	 * This is a helper function for transcoding into HTML/XML
385
	 * @param $input string input string
386
	 * @return string
387
	 */
388
	function html2ascii ($str) {
389
		// define the conversion table
390
		$entities = array(
391
			"&#126;" => "~",			"&#160;" => " ",				"&#161;" => "!",
392
			"&#166;" => "|",				"&#177;" => "+/-",		"&#178;" => "2",
393
			"&#179;" => "3",			"&#180;" => "'",				"&#185;" => "1",
394
			"&#188;" => "1/4",		"&#189;" => "1/2",		"&#190;" => "3/4",
395
			"&#191;" => "?",				"&#192;" => "A",			"&#193;" => "A",
396
			"&#194;" => "A",			"&#195;" => "A",			"&#196;" => "A",
397
			"&#197;" => "A",			"&#198;" => "AE",			"&#199;" => "C",
398
			"&#200;" => "E",			"&#201;" => "E",			"&#202;" => "E",
399
			"&#203;" => "E",			"&#204;" => "I",				"&#205;" => "I",
400
			"&#206;" => "I",				"&#207;" => "I",				"&#208;" => "D",
401
			"&#209;" => "N",			"&#210;" => "O",			"&#211;" => "O",
402
			"&#212;" => "O",			"&#213;" => "O",			"&#214;" => "O",
403
			"&#215;" => "x",			"&#216;" => "O",			"&#217;" => "U",
404
			"&#218;" => "U",			"&#220;" => "U",			"&#221;" => "Y",
405
			"&#224;" => "a",			"&#225;" => "a",			"&#226;" => "a",
406
			"&#227;" => "a",			"&#228;" => "a",			"&#229;" => "a",
407
			"&#230;" => "ae",			"&#231;" => "c",				"&#232;" => "e",
408
			"&#233;" => "e",			"&#234;" => "e",			"&#235;" => "e",
409
			"&#236;" => "i",				"&#237;" => "i",				"&#238;" => "i",
410
			"&#239;" => "i",				"&#240;" => "o",			"&#241;" => "n",
411
			"&#242;" => "o",			"&#243;" => "o",			"&#244;" => "o",
412
			"&#245;" => "o",			"&#246;" => "o",			"&#248;" => "o",
413
			"&#249;" => "u",			"&#250;" => "u",			"&#252;" => "u",
414
			"&#253;" => "y",				"&#255;" => "y",				"&#338;" => "OE",
415
			"&#339;" => "oe",			"&#352;" => "S",			"&#353;" => "s",
416
			"&#376;" => "Y",			"&#39;" => "'",				"&#402;" => "f",
417
			"&#45;" => "-",				"&#710;" => "^",			"&#732;" => "~",
418
			"&#8194;" => " ",			"&#8195;" => " ",			"&#8201;" => " ",
419
			"&#8211;" => "-",			"&#8212;" => "--",		"&#8216;" => "'",
420
			"&#8217;" => "'",			"&#8218;" => ",",			"&#8220;" => '"',
421
			"&#8221;" => '"',			"&#8222;" => ",,",			"&#8226;" => "*",
422
			"&#8230;" => "...",			"&#8240;" => "%o",		"&#8242;" => "'",
423
			"&#8243;" => "''",			"&#8482;" => "TM",		"&#8722;" => "-",
424
			"&#8727;" => "*",			"&#8743;" => "/\\",		"&#8744;" => "\/",
425
			"&#8764;" => "~",			"&#8901;" => "*",			"&#913;" => "A",
426
			"&#914;" => "B",			"&#917;" => "E",			"&#918;" => "Z",
427
			"&#919;" => "H",			"&#921;" => "|",				"&#922;" => "K",
428
			"&#924;" => "M",			"&#925;" => "N",			"&#927;" => "O",
429
			"&#929;" => "P",			"&#932;" => "T",			"&#933;" => "Y",
430
			"&#935;" => "X",			"&#94;" => "^",				"&#959;" => "o",
431
			"&#961;" => "p",			"&#962;" => "?",				"&#977;" => "?",
432
			"&#982;" => "?");
433
434
		return strtr($str, $entities);
435
	}
436
437
	/**
438
	 * Convert Windows CP-1252 numeric entities in a string to named HTML entities
439
	 * This is a helper function for transcoding into HTML/XML
440
	 * @param $input string input string
441
	 * @return string
442
	 */
443
	function cp1252ToEntities ($str) {
444
		// define the conversion table;  from: http://www.noqta.it/tc.html
445
		$cp1252 = array(	"&#128;" => "",						"&#129;" => "",
446
										"&#130;" => "&lsquor;",		"&#131;" => "&fnof;",
447
										"&#132;" => "&ldquor;",		"&#133;" => "&hellip;",
448
										"&#134;" => "&dagger;",		"&#135;" => "&Dagger;",
449
										"&#136;" => "",						"&#137;" => "&permil;",
450
										"&#138;" => "&Scaron;",		"&#139;" => "&lsaquo;",
451
										"&#140;" => "&OElig;",			"&#141;" => "",
452
										"&#142;" => "",						"&#143;" => "",
453
										"&#144;" => "",						"&#145;" => "&lsquo;",
454
										"&#146;" => "&rsquo;",			"&#147;" => "&ldquo;",
455
										"&#148;" => "&rdquo;",		"&#149;" => "&bull;",
456
										"&#150;" => "&ndash;",		"&#151;" => "&mdash;",
457
										"&#152;" => "&tilde;",			"&#153;" => "&trade;",
458
										"&#154;" => "&scaron;",		"&#155;" => "&rsaquo;",
459
										"&#156;" => "&oelig;",			"&#157;" => "",
460
										"&#158;" => "",						"&#159;" => "&Yuml;");
461
462
		// corrections to map to valid ISO entities
463
		$cp1252["&#130;"] = "&lsquo;";
464
		$cp1252["&#132;"] = "&ldquo;";
465
		$cp1252["&#146;"] = "&rsquo;";
466
		$cp1252["&#148;"] = "&rdquo;";
467
468
		return strtr($str, $cp1252);
469
	}
470
471
	/**
472
	 * Return an associative array of named->numeric HTML entities
473
	 * Required to support HTML functions without objects in PHP4/PHP5
474
	 * From php.net: function.get-html-translation-table.php
475
	 * @return string
476
	 */
477
	function getHTMLEntities () {
478
		// define the conversion table
479
		$html_entities = array(
480
			"&Aacute;" => "&#193;",			"&aacute;" => "&#225;",			"&Acirc;" => "&#194;",
481
			"&acirc;" => "&#226;",				"&acute;" => "&#180;",				"&AElig;" => "&#198;",
482
			"&aelig;" => "&#230;",				"&Agrave;" => "&#192;",			"&agrave;" => "&#224;",
483
			"&alefsym;" => "&#8501;",		"&Alpha;" => "&#913;",				"&alpha;" => "&#945;",
484
			"&amp;" => "&#38;",					"&and;" => "&#8743;",				"&ang;" => "&#8736;",
485
			"&apos;" => "&#39;",					"&Aring;" => "&#197;",				"&aring;" => "&#229;",
486
			"&asymp;" => "&#8776;",			"&Atilde;" => "&#195;",				"&atilde;" => "&#227;",
487
			"&Auml;" => "&#196;",				"&auml;" => "&#228;",				"&bdquo;" => "&#8222;",
488
			"&Beta;" => "&#914;",				"&beta;" => "&#946;",				"&brvbar;" => "&#166;",
489
			"&bull;" => "&#8226;",				"&cap;" => "&#8745;",				"&Ccedil;" => "&#199;",
490
			"&ccedil;" => "&#231;",				"&cedil;" => "&#184;",				"&cent;" => "&#162;",
491
			"&Chi;" => "&#935;",					"&chi;" => "&#967;",					"&circ;" => "&#94;",
492
			"&clubs;" => "&#9827;",			"&cong;" => "&#8773;",			"&copy;" => "&#169;",
493
			"&crarr;" => "&#8629;",			"&cup;" => "&#8746;",				"&curren;" => "&#164;",
494
			"&dagger;" => "&#8224;",		"&Dagger;" => "&#8225;",		"&darr;" => "&#8595;",
495
			"&dArr;" => "&#8659;",				"&deg;" => "&#176;",				"&Delta;" => "&#916;",
496
			"&delta;" => "&#948;",				"&diams;" => "&#9830;",			"&divide;" => "&#247;",
497
			"&Eacute;" => "&#201;",			"&eacute;" => "&#233;",			"&Ecirc;" => "&#202;",
498
			"&ecirc;" => "&#234;",				"&Egrave;" => "&#200;",			"&egrave;" => "&#232;",
499
			"&empty;" => "&#8709;",			"&emsp;" => "&#8195;",			"&ensp;" => "&#8194;",
500
			"&Epsilon;" => "&#917;",			"&epsilon;" => "&#949;",			"&equiv;" => "&#8801;",
501
			"&Eta;" => "&#919;",					"&eta;" => "&#951;",					"&ETH;" => "&#208;",
502
			"&eth;" => "&#240;",					"&Euml;" => "&#203;",				"&euml;" => "&#235;",
503
			"&euro;" => "&#8364;",				"&exist;" => "&#8707;",			"&fnof;" => "&#402;",
504
			"&forall;" => "&#8704;",			"&frac12;" => "&#189;",			"&frac14;" => "&#188;",
505
			"&frac34;" => "&#190;",			"&frasl;" => "&#8260;",				"&Gamma;" => "&#915;",
506
			"&gamma;" => "&#947;",			"&ge;" => "&#8805;",				"&gt;" => "&#62;",
507
			"&harr;" => "&#8596;",				"&hArr;" => "&#8660;",				"&hearts;" => "&#9829;",
508
			"&hellip;" => "&#8230;",			"&Iacute;" => "&#205;",				"&iacute;" => "&#237;",
509
			"&Icirc;" => "&#206;",				"&icirc;" => "&#238;",				"&iexcl;" => "&#161;",
510
			"&Igrave;" => "&#204;",			"&igrave;" => "&#236;",			"&image;" => "&#8465;",
511
			"&infin;" => "&#8734;",				"&int;" => "&#8747;",				"&Iota;" => "&#921;",
512
			"&iota;" => "&#953;",				"&iquest;" => "&#191;",			"&isin;" => "&#8712;",
513
			"&Iuml;" => "&#207;",				"&iuml;" => "&#239;",				"&Kappa;" => "&#922;",
514
			"&kappa;" => "&#954;",			"&Lambda;" => "&#923;",			"&lambda;" => "&#955;",
515
			"&lang;" => "&#9001;",				"&laquo;" => "&#171;",				"&larr;" => "&#8592;",
516
			"&lArr;" => "&#8656;",				"&lceil;" => "&#8968;",				
517
			"&ldquo;" => "&#8220;",			"&le;" => "&#8804;",					"&lfloor;" => "&#8970;",
518
			"&lowast;" => "&#8727;",			"&loz;" => "&#9674;",				"&lrm;" => "&#8206;",
519
			"&lsaquo;" => "&#8249;",			"&lsquo;" => "&#8216;",			"&lt;" => "&#60;",
520
			"&macr;" => "&#175;",				"&mdash;" => "&#8212;",			"&micro;" => "&#181;",
521
			"&middot;" => "&#183;",			"&minus;" => "&#45;",				"&Mu;" => "&#924;",
522
			"&mu;" => "&#956;",					"&nabla;" => "&#8711;",			"&nbsp;" => "&#160;",
523
			"&ndash;" => "&#8211;",			"&ne;" => "&#8800;",				"&ni;" => "&#8715;",
524
			"&not;" => "&#172;",					"&notin;" => "&#8713;",			"&nsub;" => "&#8836;",
525
			"&Ntilde;" => "&#209;",				"&ntilde;" => "&#241;",				"&Nu;" => "&#925;",
526
			"&nu;" => "&#957;",					"&Oacute;" => "&#211;",			"&oacute;" => "&#243;",
527
			"&Ocirc;" => "&#212;",				"&ocirc;" => "&#244;",				"&OElig;" => "&#338;",
528
			"&oelig;" => "&#339;",				"&Ograve;" => "&#210;",			"&ograve;" => "&#242;",
529
			"&oline;" => "&#8254;",			"&Omega;" => "&#937;",			"&omega;" => "&#969;",
530
			"&Omicron;" => "&#927;",		"&omicron;" => "&#959;",			"&oplus;" => "&#8853;",
531
			"&or;" => "&#8744;",					"&ordf;" => "&#170;",				"&ordm;" => "&#186;",
532
			"&Oslash;" => "&#216;",			"&oslash;" => "&#248;",			"&Otilde;" => "&#213;",
533
			"&otilde;" => "&#245;",				"&otimes;" => "&#8855;",			"&Ouml;" => "&#214;",
534
			"&ouml;" => "&#246;",				"&para;" => "&#182;",				"&part;" => "&#8706;",
535
			"&permil;" => "&#8240;",			"&perp;" => "&#8869;",				"&Phi;" => "&#934;",
536
			"&phi;" => "&#966;",					"&Pi;" => "&#928;",					"&pi;" => "&#960;",
537
			"&piv;" => "&#982;",					"&plusmn;" => "&#177;",			"&pound;" => "&#163;",
538
			"&prime;" => "&#8242;",			"&Prime;" => "&#8243;",			"&prod;" => "&#8719;",
539
			"&prop;" => "&#8733;",			"&Psi;" => "&#936;",					"&psi;" => "&#968;",
540
			"&quot;" => "&#34;",					"&radic;" => "&#8730;",			"&rang;" => "&#9002;",
541
			"&raquo;" => "&#187;",				"&rarr;" => "&#8594;",				"&rArr;" => "&#8658;",
542
			"&rceil;" => "&#8969;",				"&rdquo;" => "&#8221;",			"&real;" => "&#8476;",
543
			"&reg;" => "&#174;",					"&rfloor;" => "&#8971;",			"&Rho;" => "&#929;",
544
			"&rho;" => "&#961;",					"&rlm;" => "&#8207;",				"&rsaquo;" => "&#8250;",
545
			"&rsquo;" => "&#8217;",			"&sbquo;" => "&#8218;",			"&Scaron;" => "&#352;",
546
			"&scaron;" => "&#353;",			"&sdot;" => "&#8901;",				"&sect;" => "&#167;",
547
			"&shy;" => "&#173;",					"&Sigma;" => "&#931;",			"&sigma;" => "&#963;",
548
			"&sigmaf;" => "&#962;",			"&sim;" => "&#8764;",				"&spades;" => "&#9824;",
549
			"&sub;" => "&#8834;",				"&sube;" => "&#8838;",			"&sum;" => "&#8721;",
550
			"&sup1;" => "&#185;",				"&sup2;" => "&#178;",				"&sup3;" => "&#179;",
551
			"&sup;" => "&#8835;",				"&supe;" => "&#8839;",			"&szlig;" => "&#223;",
552
			"&Tau;" => "&#932;",				"&tau;" => "&#964;",					"&there4;" => "&#8756;",
553
			"&Theta;" => "&#920;",				"&theta;" => "&#952;",				"&thetasym;" => "&#977;",
554
			"&thinsp;" => "&#8201;",			"&THORN;" => "&#222;",			"&thorn;" => "&#254;",
555
			"&tilde;" => "&#126;",				"&times;" => "&#215;",				"&trade;" => "&#8482;",
556
			"&Uacute;" => "&#218;",			"&uacute;" => "&#250;",			"&uarr;" => "&#8593;",
557
			"&uArr;" => "&#8657;",				"&Ucirc;" => "&#219;",				"&ucirc;" => "&#251;",
558
			"&Ugrave;" => "&#217;",			"&ugrave;" => "&#249;",			"&uml;" => "&#168;",
559
			"&upsih;" => "&#978;",				"&Upsilon;" => "&#933;",			"&upsilon;" => "&#965;",
560
			"&Uuml;" => "&#220;",				"&uuml;" => "&#252;",				"&weierp;" => "&#8472;",
561
			"&Xi;" => "&#926;",					"&xi;" => "&#958;",					"&Yacute;" => "&#221;",
562
			"&yacute;" => "&#253;",			"&yen;" => "&#165;",					"&yuml;" => "&#255;",
563
			"&Yuml;" => "&#376;",				"&Zeta;" => "&#918;",				"&zeta;" => "&#950;",
564
			"&zwj;" => "&#8205;",				"&zwnj;" => "&#8204;");
565
566
		return $html_entities;
567
	}
331
}
568
}
332
?>
569
?>
(-)classes/core/Transcoder.inc.php (-7 / +53 lines)
 Lines 16-41    Link Here 
16
class Transcoder {
16
class Transcoder {
17
	var $fromEncoding;
17
	var $fromEncoding;
18
	var $toEncoding;
18
	var $toEncoding;
19
	function Transcoder($fromEncoding, $toEncoding) {
19
	var $translit;
20
	function Transcoder($fromEncoding, $toEncoding, $translit = false) {
20
		$this->fromEncoding = $fromEncoding;
21
		$this->fromEncoding = $fromEncoding;
21
		$this->toEncoding = $toEncoding;
22
		$this->toEncoding = $toEncoding;
23
		$this->translit = $translit;
22
	}
24
	}
23
	function trans($string) {
25
	function trans($string) {
24
		if (function_exists('iconv')) {
26
		// detect existence of encoding conversion libraries
27
		$mbstring = function_exists('mb_convert_encoding');
28
		$iconv = function_exists('iconv');
29
30
		// ===	special cases for HTML entities to handle various PHP platforms
31
		// 'HTML-ENTITIES' is not a valid encoding for iconv, so transcode manually
32
33
		if ($this->toEncoding == 'HTML-ENTITIES' && !$mbstring) {
34
35
			if ( strtoupper($this->fromEncoding) == 'UTF-8' ) {
36
				return String::utf2html($string);		// NB: this will return all numeric entities
37
			} else {
38
				// NB: old PHP versions may have issues with htmlentities()
39
				if ($string == html_entity_decode($string, ENT_COMPAT, $this->fromEncoding)) {
40
					return htmlentities($string, ENT_COMPAT, $this->fromEncoding);
41
				} else {
42
					return $string;
43
				}
44
			}
45
46
		} elseif ($this->fromEncoding == 'HTML-ENTITIES' && !$mbstring) {
47
48
			if ( strtoupper($this->toEncoding) == 'UTF-8' ) {
49
				// use built-in transcoding to UTF8
50
				return String::html2utf($string);
51
52
			} else {
53
				// NB: old PHP versions may have issues with html_entity_decode()
54
				return html_entity_decode($string, ENT_COMPAT, $this->toEncoding);
55
			}
56
57
		// === end special cases for HTML entities
58
59
		} elseif ($this->translit == true && $iconv) {
25
			// use the iconv library to transliterate
60
			// use the iconv library to transliterate
26
			return iconv($this->fromEncoding, $this->toEncoding . '//TRANSLIT', $string);
61
			return iconv($this->fromEncoding, $this->toEncoding . '//TRANSLIT', $string);
27
		} elseif (function_exists('mb_convert_encoding')) {
62
		} elseif ($this->translit == true && $this->fromEncoding == "UTF-8" && $this->toEncoding == "ASCII") {
28
			// fall back to using the multibyte library if necessary (no transliteration)
63
			// transliterate using built-in mapping
29
			return mb_convert_encoding($string, $this->toEncoding, $this->fromEncoding);
64
			return String::html2utf(String::html2ascii(String::utf2html($string)));
65
66
		// === end special cases for transliteration
67
68
		} elseif ($mbstring) {
69
			// use the multibyte library to transcode (no transliteration)
70
			// this call semantic uses backwards-compatible by-reference for better reliability
71
			return call_user_func_array('mb_convert_encoding', array(&$string, $this->toEncoding, $this->fromEncoding));
72
73
		} elseif ($iconv) {
74
			// use the iconv library to transcode
75
			return iconv($this->fromEncoding, $this->toEncoding . '//IGNORE', $string);
30
		} else {
76
		} else {
31
			// fail gracefully by returning the original string unchanged
77
			// fail gracefully by returning the original string unchanged
32
			return $string;
78
			return $string;
33
		}
79
		}
34
	}
80
	}
35
}
81
}
36
?>
82
?>
(-)config.TEMPLATE.inc.php (+4 lines)
 Lines 131-136    Link Here 
131
; Must be set to "Off" if not supported by the database server
131
; Must be set to "Off" if not supported by the database server
132
database_charset = Off
132
database_charset = Off
133
; Enable character normalization to utf-8 (recommended)
134
; If disabled, strings will be passed through in their native encoding
135
charset_normalization = On
136
133
; Default time zone
137
; Default time zone
134
; default_timezone = 
138
; default_timezone = 

Return to bug 3089