138 lines
5 KiB
PHP
138 lines
5 KiB
PHP
|
<?php
|
|||
|
/*
|
|||
|
* @package AJAX_Chat
|
|||
|
* @author Sebastian Tschan
|
|||
|
* @copyright (c) Sebastian Tschan
|
|||
|
* @license GNU Affero General Public License
|
|||
|
* @link https://blueimp.net/ajax/
|
|||
|
*/
|
|||
|
|
|||
|
// Class to provide static encoding methods
|
|||
|
class AJAXChatEncoding {
|
|||
|
|
|||
|
// Helper function to store special chars as we cannot use static class members in PHP4:
|
|||
|
public static function getSpecialChars() {
|
|||
|
static $specialChars;
|
|||
|
if(!$specialChars) {
|
|||
|
// As ' is not supported by IE, we use ' as replacement for "'":
|
|||
|
$specialChars = array('&'=>'&', '<'=>'<', '>'=>'>', "'"=>''', '"'=>'"');
|
|||
|
}
|
|||
|
return $specialChars;
|
|||
|
}
|
|||
|
|
|||
|
// Helper function to store Regular expression for NO-WS-CTL as we cannot use static class members in PHP4:
|
|||
|
public static function getRegExp_NO_WS_CTL() {
|
|||
|
static $regExp_NO_WS_CTL;
|
|||
|
if(!$regExp_NO_WS_CTL) {
|
|||
|
// Regular expression for NO-WS-CTL, non-whitespace control characters (RFC 2822), decimal 1–8, 11–12, 14–31, and 127:
|
|||
|
$regExp_NO_WS_CTL = '/[\x0\x1\x2\x3\x4\x5\x6\x7\x8\xB\xC\xE\xF\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F]/';
|
|||
|
}
|
|||
|
return $regExp_NO_WS_CTL;
|
|||
|
}
|
|||
|
|
|||
|
public static function convertEncoding($str, $charsetFrom, $charsetTo) {
|
|||
|
if(function_exists('mb_convert_encoding')) {
|
|||
|
return mb_convert_encoding($str, $charsetTo, $charsetFrom);
|
|||
|
}
|
|||
|
if(function_exists('iconv')) {
|
|||
|
return iconv($charsetFrom, $charsetTo, $str);
|
|||
|
}
|
|||
|
if(($charsetFrom == 'UTF-8') && ($charsetTo == 'ISO-8859-1')) {
|
|||
|
return utf8_decode($str);
|
|||
|
}
|
|||
|
if(($charsetFrom == 'ISO-8859-1') && ($charsetTo == 'UTF-8')) {
|
|||
|
return utf8_encode($str);
|
|||
|
}
|
|||
|
return $str;
|
|||
|
}
|
|||
|
|
|||
|
public static function htmlEncode($str, $contentCharset='UTF-8') {
|
|||
|
switch($contentCharset) {
|
|||
|
case 'UTF-8':
|
|||
|
// Encode only special chars (&, <, >, ', ") as entities:
|
|||
|
return AJAXChatEncoding::encodeSpecialChars($str);
|
|||
|
break;
|
|||
|
case 'ISO-8859-1':
|
|||
|
case 'ISO-8859-15':
|
|||
|
// Encode special chars and all extended characters above ISO-8859-1 charset as entities, then convert to content charset:
|
|||
|
return AJAXChatEncoding::convertEncoding(AJAXChatEncoding::encodeEntities($str, 'UTF-8', array(
|
|||
|
0x26, 0x26, 0, 0xFFFF, // &
|
|||
|
0x3C, 0x3C, 0, 0xFFFF, // <
|
|||
|
0x3E, 0x3E, 0, 0xFFFF, // >
|
|||
|
0x27, 0x27, 0, 0xFFFF, // '
|
|||
|
0x22, 0x22, 0, 0xFFFF, // "
|
|||
|
0x100, 0x2FFFF, 0, 0xFFFF // above ISO-8859-1
|
|||
|
)), 'UTF-8', $contentCharset);
|
|||
|
break;
|
|||
|
default:
|
|||
|
// Encode special chars and all characters above ASCII charset as entities, then convert to content charset:
|
|||
|
return AJAXChatEncoding::convertEncoding(AJAXChatEncoding::encodeEntities($str, 'UTF-8', array(
|
|||
|
0x26, 0x26, 0, 0xFFFF, // &
|
|||
|
0x3C, 0x3C, 0, 0xFFFF, // <
|
|||
|
0x3E, 0x3E, 0, 0xFFFF, // >
|
|||
|
0x27, 0x27, 0, 0xFFFF, // '
|
|||
|
0x22, 0x22, 0, 0xFFFF, // "
|
|||
|
0x80, 0x2FFFF, 0, 0xFFFF // above ASCII
|
|||
|
)), 'UTF-8', $contentCharset);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
public static function encodeSpecialChars($str) {
|
|||
|
return strtr($str, AJAXChatEncoding::getSpecialChars());
|
|||
|
}
|
|||
|
|
|||
|
public static function decodeSpecialChars($str) {
|
|||
|
return strtr($str, array_flip(AJAXChatEncoding::getSpecialChars()));
|
|||
|
}
|
|||
|
|
|||
|
public static function encodeEntities($str, $encoding='UTF-8', $convmap=null) {
|
|||
|
if($convmap && function_exists('mb_encode_numericentity')) {
|
|||
|
return mb_encode_numericentity($str, $convmap, $encoding);
|
|||
|
}
|
|||
|
return htmlentities($str, ENT_QUOTES, $encoding);
|
|||
|
}
|
|||
|
|
|||
|
public static function decodeEntities($str, $encoding='UTF-8', $htmlEntitiesMap=null) {
|
|||
|
// Due to PHP bug #25670, html_entity_decode does not work with UTF-8 for PHP versions < 5:
|
|||
|
if(function_exists('html_entity_decode') && version_compare(phpversion(), 5, '>=')) {
|
|||
|
// Replace numeric and literal entities:
|
|||
|
$str = html_entity_decode($str, ENT_QUOTES, $encoding);
|
|||
|
// Replace additional literal HTML entities if an HTML entities map is given:
|
|||
|
if($htmlEntitiesMap) {
|
|||
|
$str = strtr($str, $htmlEntitiesMap);
|
|||
|
}
|
|||
|
} else {
|
|||
|
// Replace numeric entities:
|
|||
|
$str = preg_replace('~&#([0-9]+);~e', 'AJAXChatEncoding::unicodeChar("\\1")', $str);
|
|||
|
$str = preg_replace('~&#x([0-9a-f]+);~ei', 'AJAXChatEncoding::unicodeChar(hexdec("\\1"))', $str);
|
|||
|
// Replace literal entities:
|
|||
|
$htmlEntitiesMap = $htmlEntitiesMap ? $htmlEntitiesMap : array_flip(get_html_translation_table(HTML_ENTITIES, ENT_QUOTES));
|
|||
|
$str = strtr($str, $htmlEntitiesMap);
|
|||
|
}
|
|||
|
return $str;
|
|||
|
}
|
|||
|
|
|||
|
public static function unicodeChar($c) {
|
|||
|
if($c <= 0x7F) {
|
|||
|
return chr($c);
|
|||
|
} else if($c <= 0x7FF) {
|
|||
|
return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
|
|||
|
} else if($c <= 0xFFFF) {
|
|||
|
return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
|
|||
|
. chr(0x80 | $c & 0x3F);
|
|||
|
} else if($c <= 0x10FFFF) {
|
|||
|
return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 & 0x3F)
|
|||
|
. chr(0x80 | $c >> 6 & 0x3F)
|
|||
|
. chr(0x80 | $c & 0x3F);
|
|||
|
} else {
|
|||
|
return null;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
public static function removeUnsafeCharacters($str) {
|
|||
|
// Remove NO-WS-CTL, non-whitespace control characters (RFC 2822), decimal 1–8, 11–12, 14–31, and 127:
|
|||
|
return preg_replace(AJAXChatEncoding::getRegExp_NO_WS_CTL(), '', $str);
|
|||
|
}
|
|||
|
|
|||
|
}
|
|||
|
?>
|