diff options
Diffstat (limited to 'includes/title/MediaWikiTitleCodec.php')
-rw-r--r-- | includes/title/MediaWikiTitleCodec.php | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php index 6ca0799c..20034b74 100644 --- a/includes/title/MediaWikiTitleCodec.php +++ b/includes/title/MediaWikiTitleCodec.php @@ -31,6 +31,7 @@ * via parseTitle() or from a (semi)trusted source, such as the database. * * @see https://www.mediawiki.org/wiki/Requests_for_comment/TitleValue + * @since 1.23 */ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { /** @@ -229,7 +230,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { ); $dbkey = trim( $dbkey, '_' ); - if ( strpos( $dbkey, UTF8_REPLACEMENT ) !== false ) { + if ( strpos( $dbkey, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) { # Contained illegal UTF-8 sequences or forbidden Unicode chars. throw new MalformedTitleException( 'Bad UTF-8 sequences found in title: ' . $text ); } @@ -322,7 +323,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { } # Reject illegal characters. - $rxTc = Title::getTitleInvalidRegex(); + $rxTc = self::getTitleInvalidRegex(); if ( preg_match( $rxTc, $dbkey ) ) { throw new MalformedTitleException( 'Illegal characters found in title: ' . $text ); } @@ -397,4 +398,33 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { return $parts; } + + /** + * Returns a simple regex that will match on characters and sequences invalid in titles. + * Note that this doesn't pick up many things that could be wrong with titles, but that + * replacing this regex with something valid will make many titles valid. + * Previously Title::getTitleInvalidRegex() + * + * @return string Regex string + * @since 1.25 + */ + public static function getTitleInvalidRegex() { + static $rxTc = false; + if ( !$rxTc ) { + # Matching titles will be held as illegal. + $rxTc = '/' . + # Any character not allowed is forbidden... + '[^' . Title::legalChars() . ']' . + # URL percent encoding sequences interfere with the ability + # to round-trip titles -- you can't link to them consistently. + '|%[0-9A-Fa-f]{2}' . + # XML/HTML character references produce similar issues. + '|&[A-Za-z0-9\x80-\xff]+;' . + '|&#[0-9]+;' . + '|&#x[0-9A-Fa-f]+;' . + '/S'; + } + + return $rxTc; + } } |