diff options
-rw-r--r-- | lib/util.php | 24 | ||||
-rw-r--r-- | tests/URLDetectionTest.php | 143 |
2 files changed, 110 insertions, 57 deletions
diff --git a/lib/util.php b/lib/util.php index 2be4213e7..ee3fe5ddc 100644 --- a/lib/util.php +++ b/lib/util.php @@ -412,30 +412,34 @@ function common_render_text($text) function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. - '(?:^|[\s\(\)\[\]\{\}]+)'. - '('. + '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'. + '('. '(?:'. '(?:'. //Known protocols '(?:'. '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. '|'. '(?:mailto|aim|tel|xmpp):'. - ')[^\s\/]+'. + ')'. + '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns ')'. '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 '|(?:'. //IPv6 '(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))'. ')|(?:'. //DNS - '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'. - ')'. + '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.(?:museum|travel|onion|local|[a-z]{2,4})'. + ')(?![\pN\pL\-\_])'. ')'. '(?:'. - '$|(?:'. - '/[^\s\(\)\[\]\{\}]*'. - ')'. - ')'. + '(?:\:\d+)?'. //:port + '(?:/[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;]*)?'. // /path + '(?:\?[\pN\pL\$\!\(\)\.\-\_\+\/\=\&\;\/]*)?'. // ?query string + '(?:\#[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;\/\?\#]*)?'. // #fragment + ')(?<![\?\.\,\#\)])'. ')'. - '#ix'; + '#ixu'; return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text); } diff --git a/tests/URLDetectionTest.php b/tests/URLDetectionTest.php index e69f1a2c3..05c02d6bb 100644 --- a/tests/URLDetectionTest.php +++ b/tests/URLDetectionTest.php @@ -25,10 +25,50 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase static public function provider() { return array( + array('127.0.0.1', + '<a href="http://127.0.0.1/" rel="external">127.0.0.1</a>'), + array('127.0.0.1/test.php', + '<a href="http://127.0.0.1/test.php" rel="external">127.0.0.1/test.php</a>'), + array('http://::1/test.php', + '<a href="http://::1/test.php" rel="external">http://::1/test.php</a>'), + array('http://::1', + '<a href="http://::1/" rel="external">http://::1</a>'), + array('2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php', + '<a href="http://2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php" rel="external">2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php</a>'), + array('2001:4978:1b5:0:21d:e0ff:fe66:59ab', + '<a href="http://2001:4978:1b5:0:21d:e0ff:fe66:59ab/" rel="external">2001:4978:1b5:0:21d:e0ff:fe66:59ab</a>'), + array('http://127.0.0.1', + '<a href="http://127.0.0.1/" rel="external">http://127.0.0.1</a>'), + array('example.com', + '<a href="http://example.com/" rel="external">example.com</a>'), + array('example.com', + '<a href="http://example.com/" rel="external">example.com</a>'), + array('http://example.com', + '<a href="http://example.com/" rel="external">http://example.com</a>'), + array('http://example.com.', + '<a href="http://example.com/" rel="external">http://example.com</a>.'), + array('/var/lib/example.so', + '/var/lib/example.so'), array('example', 'example'), + array('user@example.com', + '<a href="mailto:user@example.com" rel="external">user@example.com</a>'), + array('user_name+other@example.com', + '<a href="mailto:user_name+other@example.com" rel="external">user_name+other@example.com</a>'), + array('mailto:user@example.com', + '<a href="mailto:user@example.com" rel="external">mailto:user@example.com</a>'), + array('mailto:user@example.com?subject=test', + '<a href="mailto:user@example.com?subject=test" rel="external">mailto:user@example.com?subject=test</a>'), + array('#example', + '#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('example'))) . '" rel="tag">example</a></span>'), + array('#example.com', + '#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('example.com'))) . '" rel="tag">example.com</a></span>'), + array('#.net', + '#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('.net'))) . '" rel="tag">.net</a></span>'), array('http://example', '<a href="http://example/" rel="external">http://example</a>'), + array('http://3xampl3', + '<a href="http://3xampl3/" rel="external">http://3xampl3</a>'), array('http://example/', '<a href="http://example/" rel="external">http://example/</a>'), array('http://example/path', @@ -47,6 +87,10 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase '<a href="http://user:pass@example.com/" rel="external">http://user:pass@example.com</a>'), array('http://example.com:8080', '<a href="http://example.com:8080/" rel="external">http://example.com:8080</a>'), + array('http://example.com:8080/test.php', + '<a href="http://example.com:8080/test.php" rel="external">http://example.com:8080/test.php</a>'), + array('example.com:8080/test.php', + '<a href="http://example.com:8080/test.php" rel="external">example.com:8080/test.php</a>'), array('http://www.example.com', '<a href="http://www.example.com/" rel="external">http://www.example.com</a>'), array('http://example.com/', @@ -59,60 +103,65 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase '<a href="http://example.com/path.html#fragment" rel="external">http://example.com/path.html#fragment</a>'), array('http://example.com/path.php?foo=bar&bar=foo', '<a href="http://example.com/path.php?foo=bar&bar=foo" rel="external">http://example.com/path.php?foo=bar&bar=foo</a>'), + array('http://example.com.', + '<a href="http://example.com/" rel="external">http://example.com</a>.'), array('http://müllärör.de', - '<a href="http://müllärör.de" rel="external">http://müllärör.de</a>'), + '<a href="http://müllärör.de/" rel="external">http://müllärör.de</a>'), array('http://ﺱﺲﺷ.com', - '<a href="http://ﺱﺲﺷ.com" rel="external">http://ﺱﺲﺷ.com</a>'), + '<a href="http://ﺱﺲﺷ.com/" rel="external">http://ﺱﺲﺷ.com</a>'), array('http://сделаткартинки.com', - '<a href="http://сделаткартинки.com" rel="external">http://сделаткартинки.com</a>'), + '<a href="http://сделаткартинки.com/" rel="external">http://сделаткартинки.com</a>'), array('http://tūdaliņ.lv', - '<a href="http://tūdaliņ.lv" rel="external">http://tūdaliņ.lv</a>'), + '<a href="http://tūdaliņ.lv/" rel="external">http://tūdaliņ.lv</a>'), array('http://brændendekærlighed.com', - '<a href="http://brændendekærlighed.com" rel="external">http://brændendekærlighed.com</a>'), + '<a href="http://brændendekærlighed.com/" rel="external">http://brændendekærlighed.com</a>'), array('http://あーるいん.com', - '<a href="http://あーるいん.com" rel="external">http://あーるいん.com</a>'), + '<a href="http://あーるいん.com/" rel="external">http://あーるいん.com</a>'), array('http://예비교사.com', - '<a href="http://예비교사.com" rel="external">http://예비교사.com</a>'), + '<a href="http://예비교사.com/" rel="external">http://예비교사.com</a>'), array('http://example.com.', - '<a href="http://example.com" rel="external">http://example.com</a>.'), + '<a href="http://example.com/" rel="external">http://example.com</a>.'), array('http://example.com?', - '<a href="http://example.com" rel="external">http://example.com</a>?'), + '<a href="http://example.com/" rel="external">http://example.com</a>?'), array('http://example.com!', - '<a href="http://example.com" rel="external">http://example.com</a>!'), + '<a href="http://example.com/" rel="external">http://example.com</a>!'), array('http://example.com,', - '<a href="http://example.com" rel="external">http://example.com</a>,'), + '<a href="http://example.com/" rel="external">http://example.com</a>,'), array('http://example.com;', - '<a href="http://example.com" rel="external">http://example.com</a>;'), + '<a href="http://example.com/" rel="external">http://example.com</a>;'), array('http://example.com:', - '<a href="http://example.com" rel="external">http://example.com</a>:'), + '<a href="http://example.com/" rel="external">http://example.com</a>:'), array('\'http://example.com\'', - '\'<a href="http://example.com" rel="external">http://example.com</a>\''), + '\'<a href="http://example.com/" rel="external">http://example.com</a>\''), array('"http://example.com"', - '"<a href="http://example.com" rel="external">http://example.com</a>"'), - array('http://example.com
', - '<a href="http://example.com" rel="external">http://example.com</a>'), + '"<a href="http://example.com/" rel="external">http://example.com</a>"'), + array('http://example.com', + '<a href="http://example.com/" rel="external">http://example.com</a>'), array('(http://example.com)', - '(<a href="http://example.com" rel="external">http://example.com</a>)'), + '(<a href="http://example.com/" rel="external">http://example.com</a>)'), array('[http://example.com]', - '[<a href="http://example.com" rel="external">http://example.com</a>]'), + '[<a href="http://example.com/" rel="external">http://example.com</a>]'), array('<http://example.com>', - '<<a href="http://example.com" rel="external">http://example.com</a>>'), + '<<a href="http://example.com/" rel="external">http://example.com</a>>'), array('http://example.com/path/(foo)/bar', '<a href="http://example.com/path/(foo)/bar" rel="external">http://example.com/path/(foo)/bar</a>'), + //Not a valid url - urls cannot contain unencoded square brackets array('http://example.com/path/[foo]/bar', '<a href="http://example.com/path/[foo]/bar" rel="external">http://example.com/path/[foo]/bar</a>'), array('http://example.com/path/foo/(bar)', '<a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>'), - array('http://example.com/path/foo/[bar]', - '<a href="http://example.com/path/foo/[bar]" rel="external">http://example.com/path/foo/[bar]</a>'), + //Not a valid url - urls cannot contain unencoded square brackets + //array('http://example.com/path/foo/[bar]', + // '<a href="http://example.com/path/foo/[bar]" rel="external">http://example.com/path/foo/[bar]</a>'), array('Hey, check out my cool site http://example.com okay?', - 'Hey, check out my cool site <a href="http://example.com" rel="external">http://example.com</a> okay?'), + 'Hey, check out my cool site <a href="http://example.com/" rel="external">http://example.com</a> okay?'), array('What about parens (e.g. http://example.com/path/foo/(bar))?', 'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>)?'), array('What about parens (e.g. http://example.com/path/foo/(bar)?', 'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>?'), array('What about parens (e.g. http://example.com/path/foo/(bar).)?', 'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>.)?'), + //Not a valid url - urls cannot contain unencoded commas array('What about parens (e.g. http://example.com/path/(foo,bar)?', 'What about parens (e.g. <a href="http://example.com/path/(foo,bar)" rel="external">http://example.com/path/(foo,bar)</a>?'), array('Unbalanced too (e.g. http://example.com/path/((((foo)/bar)?', @@ -124,51 +173,51 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase array('Unbalanced too (e.g. http://example.com/path/foo/(bar))))?', 'Unbalanced too (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>)))?'), array('example.com', - '<a href="http://example.com" rel="external">example.com</a>'), + '<a href="http://example.com/" rel="external">example.com</a>'), array('example.org', - '<a href="http://example.org" rel="external">example.org</a>'), + '<a href="http://example.org/" rel="external">example.org</a>'), array('example.co.uk', - '<a href="http://example.co.uk" rel="external">example.co.uk</a>'), + '<a href="http://example.co.uk/" rel="external">example.co.uk</a>'), array('www.example.co.uk', - '<a href="http://www.example.co.uk" rel="external">www.example.co.uk</a>'), + '<a href="http://www.example.co.uk/" rel="external">www.example.co.uk</a>'), array('farm1.images.example.co.uk', - '<a href="http://farm1.images.example.co.uk" rel="external">farm1.images.example.co.uk</a>'), + '<a href="http://farm1.images.example.co.uk/" rel="external">farm1.images.example.co.uk</a>'), array('example.museum', - '<a href="http://example.museum" rel="external">example.museum</a>'), + '<a href="http://example.museum/" rel="external">example.museum</a>'), array('example.travel', - '<a href="http://example.travel" rel="external">example.travel</a>'), + '<a href="http://example.travel/" rel="external">example.travel</a>'), array('example.com.', - '<a href="http://example.com" rel="external">example.com</a>.'), + '<a href="http://example.com/" rel="external">example.com</a>.'), array('example.com?', - '<a href="http://example.com" rel="external">example.com</a>?'), + '<a href="http://example.com/" rel="external">example.com</a>?'), array('example.com!', - '<a href="http://example.com" rel="external">example.com</a>!'), + '<a href="http://example.com/" rel="external">example.com</a>!'), array('example.com,', - '<a href="http://example.com" rel="external">example.com</a>,'), + '<a href="http://example.com/" rel="external">example.com</a>,'), array('example.com;', - '<a href="http://example.com" rel="external">example.com</a>;'), + '<a href="http://example.com/" rel="external">example.com</a>;'), array('example.com:', - '<a href="http://example.com" rel="external">example.com</a>:'), + '<a href="http://example.com/" rel="external">example.com</a>:'), array('\'example.com\'', - '\'<a href="http://example.com" rel="external">example.com</a>\''), + '\'<a href="http://example.com/" rel="external">example.com</a>\''), array('"example.com"', - '"<a href="http://example.com" rel="external">example.com</a>"'), - array('example.com
', - '<a href="http://example.com" rel="external">example.com</a>'), + '"<a href="http://example.com/" rel="external">example.com</a>"'), + array('example.com', + '<a href="http://example.com/" rel="external">example.com</a>'), array('(example.com)', - '(<a href="http://example.com" rel="external">example.com</a>)'), + '(<a href="http://example.com/" rel="external">example.com</a>)'), array('[example.com]', - '[<a href="http://example.com" rel="external">example.com</a>]'), + '[<a href="http://example.com/" rel="external">example.com</a>]'), array('<example.com>', - '<<a href="http://example.com" rel="external">example.com</a>>'), + '<<a href="http://example.com/" rel="external">example.com</a>>'), array('Hey, check out my cool site example.com okay?', - 'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a> okay?'), + 'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a> okay?'), array('Hey, check out my cool site example.com.I made it.', - 'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.I made it.'), + 'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.I made it.'), array('Hey, check out my cool site example.com.Funny thing...', - 'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.Funny thing...'), + 'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.Funny thing...'), array('Hey, check out my cool site example.com.You will love it.', - 'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.You will love it.'), + 'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.You will love it.'), array('What about parens (e.g. example.com/path/foo/(bar))?', 'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">example.com/path/foo/(bar)</a>)?'), array('What about parens (e.g. example.com/path/foo/(bar)?', |