diff options
author | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-05-01 15:12:12 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-05-01 15:12:12 -0400 |
commit | c9aa36da061816dee256a979c2ff8d2ee41824d9 (patch) | |
tree | 29f7002b80ee984b488bd047dbbd80b36bf892e9 /includes/normal/UtfNormalUtil.php | |
parent | b4274e0e33eafb5e9ead9d949ebf031a9fb8363b (diff) | |
parent | d1ba966140d7a60cd5ae4e8667ceb27c1a138592 (diff) |
Merge branch 'archwiki'
# Conflicts:
# skins/ArchLinux.php
# skins/ArchLinux/archlogo.gif
Diffstat (limited to 'includes/normal/UtfNormalUtil.php')
-rw-r--r-- | includes/normal/UtfNormalUtil.php | 42 |
1 files changed, 28 insertions, 14 deletions
diff --git a/includes/normal/UtfNormalUtil.php b/includes/normal/UtfNormalUtil.php index e8fec936..6c925dfa 100644 --- a/includes/normal/UtfNormalUtil.php +++ b/includes/normal/UtfNormalUtil.php @@ -4,7 +4,7 @@ * Should probably merge them for consistency. * * Copyright © 2004 Brion Vibber <brion@pobox.com> - * http://www.mediawiki.org/ + * https://www.mediawiki.org/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,16 +34,27 @@ * @public */ function codepointToUtf8( $codepoint ) { - if($codepoint < 0x80) return chr($codepoint); - if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . - chr($codepoint & 0x3f | 0x80); - if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . - chr($codepoint >> 6 & 0x3f | 0x80) . - chr($codepoint & 0x3f | 0x80); - if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) . - chr($codepoint >> 12 & 0x3f | 0x80) . - chr($codepoint >> 6 & 0x3f | 0x80) . - chr($codepoint & 0x3f | 0x80); + if ( $codepoint < 0x80 ) { + return chr( $codepoint ); + } + + if ( $codepoint < 0x800 ) { + return chr( $codepoint >> 6 & 0x3f | 0xc0 ) . + chr( $codepoint & 0x3f | 0x80 ); + } + + if ( $codepoint < 0x10000 ) { + return chr( $codepoint >> 12 & 0x0f | 0xe0 ) . + chr( $codepoint >> 6 & 0x3f | 0x80 ) . + chr( $codepoint & 0x3f | 0x80 ); + } + + if ( $codepoint < 0x110000 ) { + return chr( $codepoint >> 18 & 0x07 | 0xf0 ) . + chr( $codepoint >> 12 & 0x3f | 0x80 ) . + chr( $codepoint >> 6 & 0x3f | 0x80 ) . + chr( $codepoint & 0x3f | 0x80 ); + } echo "Asked for code outside of range ($codepoint)\n"; die( -1 ); @@ -60,10 +71,11 @@ function codepointToUtf8( $codepoint ) { */ function hexSequenceToUtf8( $sequence ) { $utf = ''; - foreach( explode( ' ', $sequence ) as $hex ) { + foreach ( explode( ' ', $sequence ) as $hex ) { $n = hexdec( $hex ); $utf .= codepointToUtf8( $n ); } + return $utf; } @@ -80,6 +92,7 @@ function utf8ToHexSequence( $str ) { foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) { $buf .= sprintf( '%04x ', utf8ToCodepoint( $cp ) ); } + return rtrim( $buf ); } @@ -107,6 +120,7 @@ function utf8ToCodepoint( $char ) { if ( $length != strlen( $char ) ) { return false; } + if ( $length == 1 ) { return ord( $char ); } @@ -116,7 +130,7 @@ function utf8ToCodepoint( $char ) { $z >>= $length; # Add in the free bits from subsequent bytes - for ( $i=1; $i < $length; $i++ ) { + for ( $i = 1; $i < $length; $i++ ) { $z <<= 6; $z |= ord( $char[$i] ) & 0x3f; } @@ -136,5 +150,5 @@ function escapeSingleString( $string ) { array( '\\' => '\\\\', '\'' => '\\\'' - )); + ) ); } |