diff options
Diffstat (limited to 'plugins/Irc/extlib/phergie/Phergie/Plugin/Encoding.php')
-rw-r--r-- | plugins/Irc/extlib/phergie/Phergie/Plugin/Encoding.php | 182 |
1 files changed, 182 insertions, 0 deletions
diff --git a/plugins/Irc/extlib/phergie/Phergie/Plugin/Encoding.php b/plugins/Irc/extlib/phergie/Phergie/Plugin/Encoding.php new file mode 100644 index 000000000..419322bd7 --- /dev/null +++ b/plugins/Irc/extlib/phergie/Phergie/Plugin/Encoding.php @@ -0,0 +1,182 @@ +<?php +/** + * Phergie + * + * PHP version 5 + * + * LICENSE + * + * This source file is subject to the new BSD license that is bundled + * with this package in the file LICENSE. + * It is also available through the world-wide-web at this URL: + * http://phergie.org/license + * + * @category Phergie + * @package Phergie_Plugin_Encoding + * @author Phergie Development Team <team@phergie.org> + * @copyright 2008-2010 Phergie Development Team (http://phergie.org) + * @license http://phergie.org/license New BSD License + * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding + */ + +/** + * Handles decoding markup entities and converting text between character + * encodings. + * + * @category Phergie + * @package Phergie_Plugin_Encoding + * @author Phergie Development Team <team@phergie.org> + * @license http://phergie.org/license New BSD License + * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding + */ +class Phergie_Plugin_Encoding extends Phergie_Plugin_Abstract +{ + /** + * Lookup table for entity conversions not supported by + * html_entity_decode() + * + * @var array + * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73409 + * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73410 + */ + protected static $entities = array( + 'α' => 913, + ''' => 39, + 'β' => 914, + '•' => 149, + 'χ' => 935, + 'ˆ' => 94, + 'δ' => 916, + 'ε' => 917, + 'η' => 919, + 'ƒ' => 402, + 'γ' => 915, + 'ι' => 921, + 'κ' => 922, + 'λ' => 923, + '“' => 147, + '‹' => 139, + '‘' => 145, + '—' => 151, + '−' => 45, + 'μ' => 924, + '–' => 150, + 'ν' => 925, + 'œ' => 140, + 'ω' => 937, + 'ο' => 927, + 'φ' => 934, + 'π' => 928, + 'ϖ' => 982, + 'ψ' => 936, + '”' => 148, + 'ρ' => 929, + '›' => 155, + '’' => 146, + 'š' => 138, + 'σ' => 931, + 'ς' => 962, + 'τ' => 932, + 'θ' => 920, + 'ϑ' => 977, + '˜' => 126, + '™' => 153, + 'ϒ' => 978, + 'υ' => 933, + 'ξ' => 926, + 'ÿ' => 159, + 'ζ' => 918, + ); + + /** + * Decodes markup entities in a given string. + * + * @param string $string String containing markup entities + * @param string $charset Optional character set name to use in decoding + * entities, defaults to UTF-8 + * + * @return string String with markup entities decoded + */ + public function decodeEntities($string, $charset = 'UTF-8') + { + $string = str_ireplace( + array_keys(self::$entities), + array_map('chr', self::$entities), + $string + ); + $string = html_entity_decode($string, ENT_QUOTES, $charset); + $string = preg_replace( + array('/�*([0-9]+);/me', '/�*([a-f0-9]+);/mei'), + array('$this->codeToUtf(\\1)', '$this->codeToUtf(hexdec(\\1))'), + $string + ); + return $string; + } + + /** + * Converts a given unicode to its UTF-8 equivalent. + * + * @param int $code Code to convert + * @return string Character corresponding to code + */ + public function codeToUtf8($code) + { + $code = (int) $code; + switch ($code) { + // 1 byte, 7 bits + case 0: + return chr(0); + case ($code & 0x7F): + return chr($code); + + // 2 bytes, 11 bits + case ($code & 0x7FF): + return chr(0xC0 | (($code >> 6) & 0x1F)) . + chr(0x80 | ($code & 0x3F)); + + // 3 bytes, 16 bits + case ($code & 0xFFFF): + return chr(0xE0 | (($code >> 12) & 0x0F)) . + chr(0x80 | (($code >> 6) & 0x3F)) . + chr(0x80 | ($code & 0x3F)); + + // 4 bytes, 21 bits + case ($code & 0x1FFFFF): + return chr(0xF0 | ($code >> 18)) . + chr(0x80 | (($code >> 12) & 0x3F)) . + chr(0x80 | (($code >> 6) & 0x3F)) . + chr(0x80 | ($code & 0x3F)); + } + } + + /** + * Transliterates characters in a given string where possible. + * + * @param string $string String containing characters to + * transliterate + * @param string $charsetFrom Optional character set of the string, + * defaults to UTF-8 + * @param string $charsetTo Optional character set to which the string + * should be converted, defaults to ISO-8859-1 + * + * @return string String with characters transliterated or the original + * string if transliteration was not possible + */ + public function transliterate($string, $charsetFrom = 'UTF-8', $charsetTo = 'ISO-8859-1') + { + // @link http://pecl.php.net/package/translit + if (function_exists('transliterate')) { + $string = transliterate($string, array('han_transliterate', 'diacritical_remove'), $charsetFrom, $charsetTo); + } elseif (function_exists('iconv')) { + $string = iconv($charsetFrom, $charsetTo . '//TRANSLIT', $string); + } else { + // @link http://stackoverflow.com/questions/1284535/php-transliteration/1285491#1285491 + $string = preg_replace( + '~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i', + '$1', + htmlentities($string, ENT_COMPAT, $charsetFrom) + ); + } + return $string; + } +} |