diff options
author | Craig Andrews <candrews@integralblue.com> | 2009-08-25 11:21:45 -0400 |
---|---|---|
committer | Craig Andrews <candrews@integralblue.com> | 2009-08-25 11:21:45 -0400 |
commit | 6a3a25b5a2b65e54841cc60c4f2254f6d7b6b54b (patch) | |
tree | 00e43400f3a117fe570a8c6bfe8a29b3333888c6 /lib | |
parent | 9ec39110201dc1b088786c8e31ca96847645d8bc (diff) |
Improved the URL tests, and improve the matcher so more tests are passed. The remaining failing tests I believe are incorrect.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/util.php | 24 |
1 files changed, 14 insertions, 10 deletions
diff --git a/lib/util.php b/lib/util.php index 2be4213e7..ee3fe5ddc 100644 --- a/lib/util.php +++ b/lib/util.php @@ -412,30 +412,34 @@ function common_render_text($text) function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. - '(?:^|[\s\(\)\[\]\{\}]+)'. - '('. + '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'. + '('. '(?:'. '(?:'. //Known protocols '(?:'. '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. '|'. '(?:mailto|aim|tel|xmpp):'. - ')[^\s\/]+'. + ')'. + '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns ')'. '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 '|(?:'. //IPv6 '(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))'. ')|(?:'. //DNS - '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'. - ')'. + '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.(?:museum|travel|onion|local|[a-z]{2,4})'. + ')(?![\pN\pL\-\_])'. ')'. '(?:'. - '$|(?:'. - '/[^\s\(\)\[\]\{\}]*'. - ')'. - ')'. + '(?:\:\d+)?'. //:port + '(?:/[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;]*)?'. // /path + '(?:\?[\pN\pL\$\!\(\)\.\-\_\+\/\=\&\;\/]*)?'. // ?query string + '(?:\#[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;\/\?\#]*)?'. // #fragment + ')(?<![\?\.\,\#\)])'. ')'. - '#ix'; + '#ixu'; return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text); } |