diff options
author | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-05-01 15:30:02 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-05-01 15:30:02 -0400 |
commit | 1de335ad3f395ca6861085393ba366a9e3fb4a0d (patch) | |
tree | f1fdd326034e05177596851be6a7127615d81498 /includes/title/MediaWikiTitleCodec.php | |
parent | 9c75fa8ff6d4d38ef552c00fef5969fb154765e8 (diff) | |
parent | f6d65e533c62f6deb21342d4901ece24497b433e (diff) |
Merge commit 'f6d65'
# Conflicts:
# skins/ArchLinux/ArchLinux.php
Diffstat (limited to 'includes/title/MediaWikiTitleCodec.php')
-rw-r--r-- | includes/title/MediaWikiTitleCodec.php | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php index 6ca0799c..20034b74 100644 --- a/includes/title/MediaWikiTitleCodec.php +++ b/includes/title/MediaWikiTitleCodec.php @@ -31,6 +31,7 @@ * via parseTitle() or from a (semi)trusted source, such as the database. * * @see https://www.mediawiki.org/wiki/Requests_for_comment/TitleValue + * @since 1.23 */ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { /** @@ -229,7 +230,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { ); $dbkey = trim( $dbkey, '_' ); - if ( strpos( $dbkey, UTF8_REPLACEMENT ) !== false ) { + if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) { # Contained illegal UTF-8 sequences or forbidden Unicode chars. throw new MalformedTitleException( 'Bad UTF-8 sequences found in title: ' . $text ); } @@ -322,7 +323,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { } # Reject illegal characters. - $rxTc = Title::getTitleInvalidRegex(); + $rxTc = self::getTitleInvalidRegex(); if ( preg_match( $rxTc, $dbkey ) ) { throw new MalformedTitleException( 'Illegal characters found in title: ' . $text ); } @@ -397,4 +398,33 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { return $parts; } + + /** + * Returns a simple regex that will match on characters and sequences invalid in titles. + * Note that this doesn't pick up many things that could be wrong with titles, but that + * replacing this regex with something valid will make many titles valid. + * Previously Title::getTitleInvalidRegex() + * + * @return string Regex string + * @since 1.25 + */ + public static function getTitleInvalidRegex() { + static $rxTc = false; + if ( !$rxTc ) { + # Matching titles will be held as illegal. + $rxTc = '/' . + # Any character not allowed is forbidden... + '[^' . Title::legalChars() . ']' . + # URL percent encoding sequences interfere with the ability + # to round-trip titles -- you can't link to them consistently. + '|%[0-9A-Fa-f]{2}' . + # XML/HTML character references produce similar issues. + '|&[A-Za-z0-9\x80-\xff]+;' . + '|&#[0-9]+;' . + '|&#x[0-9A-Fa-f]+;' . + '/S'; + } + + return $rxTc; + } } |