diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2013-01-18 16:46:04 +0100 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2013-01-18 16:46:04 +0100 |
commit | 63601400e476c6cf43d985f3e7b9864681695ed4 (patch) | |
tree | f7846203a952e38aaf66989d0a4702779f549962 /maintenance/benchmarks/bench_utf8_title_check.php | |
parent | 8ff01378c9e0207f9169b81966a51def645b6a51 (diff) |
Update to MediaWiki 1.20.2
this update includes:
* adjusted Arch Linux skin
* updated FluxBBAuthPlugin
* patch for https://bugzilla.wikimedia.org/show_bug.cgi?id=44024
Diffstat (limited to 'maintenance/benchmarks/bench_utf8_title_check.php')
-rw-r--r-- | maintenance/benchmarks/bench_utf8_title_check.php | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/maintenance/benchmarks/bench_utf8_title_check.php b/maintenance/benchmarks/bench_utf8_title_check.php new file mode 100644 index 00000000..f5987800 --- /dev/null +++ b/maintenance/benchmarks/bench_utf8_title_check.php @@ -0,0 +1,126 @@ +<?php +/** + * Benchmark for using a regexp vs. mb_check_encoding to check for UTF-8 encoding. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Benchmark + */ + +require_once( __DIR__ . '/Benchmarker.php' ); + +/** + * This little benchmark executes the regexp used in Language->checkTitleEncoding() + * and compares its execution time against that of mb_check_encoding, if available. + * + * @ingroup Benchmark + */ +class bench_utf8_title_check extends Benchmarker { + + private $canRun; + + private $data; + + public function __construct() { + parent::__construct(); + + $this->data = array ( + "", + "United States of America", // 7bit ASCII + "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e", + "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn", + // This comes from bug 36839 + "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C" + . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C" + . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C" + . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C" + . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C" + . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C" + . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C" + . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C" + . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C" + . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C" + . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C" + . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C" + . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C" + . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis" + ); + + $this->canRun = function_exists ( 'mb_check_encoding' ); + + if ( $this->canRun ) { + $this->mDescription = "Benchmark for using a regexp vs. mb_check_encoding to check for UTF-8 encoding."; + mb_internal_encoding( 'UTF-8' ); + } else { + $this->mDescription = "CANNOT RUN benchmark using mb_check_encoding: function not available."; + } + } + + public function execute() { + if ( !$this->canRun ) { + return; + } + $benchmarks = array(); + foreach ($this->data as $val) { + $benchmarks[] = array( + 'function' => array( $this, 'use_regexp' ), + 'args' => array( rawurldecode ( $val ) ) + ); + $benchmarks[] = array( + 'function' => array( $this, 'use_regexp_non_capturing' ), + 'args' => array( rawurldecode ( $val ) ) + ); + $benchmarks[] = array( + 'function' => array( $this, 'use_regexp_once_only' ), + 'args' => array( rawurldecode ( $val ) ) + ); + $benchmarks[] = array( + 'function' => array( $this, 'use_mb_check_encoding' ), + 'args' => array( rawurldecode ( $val ) ) + ); + } + $this->bench( $benchmarks ); + print $this->getFormattedResults(); + } + + private $isutf8; + + function use_regexp( $s ) { + $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . + '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); + } + + function use_regexp_non_capturing( $s ) { + // Same as above with a non-capturing subgroup. + $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . + '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); + } + + function use_regexp_once_only( $s ) { + // Same as above with a once-only subgroup. + $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . + '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); + } + + function use_mb_check_encoding( $s ) { + $this->isutf8 = mb_check_encoding( $s, 'UTF-8' ); + } + +} + +$maintClass = 'bench_utf8_title_check'; +require_once( RUN_MAINTENANCE_IF_MAIN ); |