From 70ca03f33615f0d6504df91cd40fa04a228fa42f Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Fri, 21 Aug 2009 12:47:01 -0400 Subject: Use currying to call the url callbacks, and use preg_replace_callback This definitely looks neater than the string maniplation it replaces --- lib/util.php | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 748c8332f..f732d8b55 100644 --- a/lib/util.php +++ b/lib/util.php @@ -445,24 +445,32 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { '(?:[:/][^\s]*)?'. ')'. '#ix'; - preg_match_all($regex, $text, $matches); - // Then clean up what the regex left behind - $offset = 0; - foreach($matches[1] as $url) { - // Call user specified func - if (empty($notice_id)) { - $modified_url = call_user_func($callback, $url); - } else { - $modified_url = call_user_func($callback, array($url, $notice_id)); - } - // Replace it! - $start = mb_strpos($text, $url, $offset); - $text = mb_substr($text, 0, $start).$modified_url.mb_substr($text, $start + mb_strlen($url), mb_strlen($text)); - $offset = $start + mb_strlen($modified_url); + $callback_helper = curry(callback_helper, 3); + return preg_replace_callback($regex, $callback_helper($callback,$notice_id) ,$text); +} + +function callback_helper($callback, $notice_id, $matches) { + if(empty($notice_id)){ + return $callback($matches[1],$notice_id); + }else{ + return $callback($matches[1]); } +} - return $text; +function curry($func, $arity) { + return create_function('', " + \$args = func_get_args(); + if(count(\$args) >= $arity) + return call_user_func_array('$func', \$args); + \$args = var_export(\$args, 1); + return create_function('',' + \$a = func_get_args(); + \$z = ' . \$args . '; + \$a = array_merge(\$z,\$a); + return call_user_func_array(\'$func\', \$a); + '); + "); } function common_linkify($url) { -- cgit v1.2.3-54-g00ecf From 871903a319aab5704504991208d992fadb72fdf7 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Fri, 21 Aug 2009 15:56:15 -0400 Subject: Linkifier support many more urls, and less mismatches --- lib/util.php | 77 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 32 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index f732d8b55..4e809029f 100644 --- a/lib/util.php +++ b/lib/util.php @@ -413,15 +413,16 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. '(?:^|\s+)('. - '(?:'. - '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. - '|'. - '(?:mailto|aim|tel|xmpp):'. - ')?'. - '(?:'. - '(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 - '|(?:'. - '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'. //IPv6 + '(?:'. //Known protocols + '(?:'. + '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. + '|'. + '(?:mailto|aim|tel|xmpp):'. + ')\S+'. + ')'. + '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 + '|(?:'. //IPv6 + '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'. '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}|'. '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}|'. '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}|'. @@ -438,39 +439,46 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,1}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. '(?:(?:[0-9a-f]{1,4}:){1,5}|:):(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. ':(?::[0-9a-f]{1,4}){1,5}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'. - ')|'. - '(?:[^.\s/:]+\.)+'. //DNS - '(?:museum|travel|onion|[a-z]{2,4})'. + ')|(?:'. //DNS + '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'. ')'. - '(?:[:/][^\s]*)?'. - ')'. + '(?:[:/]\S*)?'. + ')(?:$|\s+)'. '#ix'; - $callback_helper = curry(callback_helper, 3); - return preg_replace_callback($regex, $callback_helper($callback,$notice_id) ,$text); +//preg_match_all($regex,$text,$matches); +//print_r($matches); +//die("here"); + return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text); } -function callback_helper($callback, $notice_id, $matches) { +function callback_helper($matches, $callback, $notice_id) { + $spaces_left = (strlen($matches[0]) - strlen(ltrim($matches[0]))); + $spaces_right = (strlen($matches[0]) - strlen(rtrim($matches[0]))); if(empty($notice_id)){ - return $callback($matches[1],$notice_id); + $result = call_user_func_array($callback,$matches[1]); }else{ - return $callback($matches[1]); + $result = call_user_func_array($callback, array($matches[1],$notice_id) ); } + return str_repeat(' ',$spaces_left) . $result . str_repeat(' ',$spaces_right); } -function curry($func, $arity) { - return create_function('', " - \$args = func_get_args(); - if(count(\$args) >= $arity) - return call_user_func_array('$func', \$args); - \$args = var_export(\$args, 1); - return create_function('',' - \$a = func_get_args(); - \$z = ' . \$args . '; - \$a = array_merge(\$z,\$a); - return call_user_func_array(\'$func\', \$a); - '); - "); +function curry($fn) { + //TODO switch to a PHP 5.3 function closure based approach if PHP 5.3 is used + $args = func_get_args(); + array_shift($args); + $id = uniqid('_partial'); + $GLOBALS[$id] = array($fn, $args); + return create_function( + '', + ' + $args = func_get_args(); + return call_user_func_array( + $GLOBALS["'.$id.'"][0], + array_merge( + $args, + $GLOBALS["'.$id.'"][1])); + '); } function common_linkify($url) { @@ -478,6 +486,11 @@ function common_linkify($url) { // functions $url = htmlspecialchars_decode($url); + if(strpos($url, '@')!==false && strpos($url, ':')===false){ + //url is an email address without the mailto: protocol + return XMLStringer::estring('a', array('href' => "mailto:$url", 'rel' => 'external'), $url); + } + $canon = File_redirection::_canonUrl($url); $longurl_data = File_redirection::where($url); -- cgit v1.2.3-54-g00ecf From 0615fda25e81d686c7d7a67b4d0674716eeb5929 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Fri, 21 Aug 2009 20:01:33 -0400 Subject: URLs surrounded by (),{}, and [] are correctly handled now! --- lib/util.php | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 4e809029f..575e796f1 100644 --- a/lib/util.php +++ b/lib/util.php @@ -412,13 +412,13 @@ function common_render_text($text) function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. - '(?:^|\s+)('. + '(?:^|[\s\(\)\[\]\{\}]+)('. '(?:'. //Known protocols '(?:'. '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. '|'. '(?:mailto|aim|tel|xmpp):'. - ')\S+'. + ')[^\s\(\)\[\]\{\}]+'. ')'. '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 '|(?:'. //IPv6 @@ -442,25 +442,23 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { ')|(?:'. //DNS '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'. ')'. - '(?:[:/]\S*)?'. - ')(?:$|\s+)'. + '([^\s\(\)\[\]\{\}]*)'. + ')'. '#ix'; - -//preg_match_all($regex,$text,$matches); -//print_r($matches); -//die("here"); return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text); } function callback_helper($matches, $callback, $notice_id) { - $spaces_left = (strlen($matches[0]) - strlen(ltrim($matches[0]))); - $spaces_right = (strlen($matches[0]) - strlen(rtrim($matches[0]))); + $pos = strpos($matches[0],$matches[1]); + $left = substr($matches[0],0,$pos); + $right = substr($matches[0],$pos+strlen($matches[1])); + if(empty($notice_id)){ $result = call_user_func_array($callback,$matches[1]); }else{ $result = call_user_func_array($callback, array($matches[1],$notice_id) ); } - return str_repeat(' ',$spaces_left) . $result . str_repeat(' ',$spaces_right); + return $left . $result . $right; } function curry($fn) { -- cgit v1.2.3-54-g00ecf From 579a41b56f3d11f77d837b286753fd7ac528590f Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Fri, 21 Aug 2009 21:11:23 -0400 Subject: Improve url finding more. Properly end urls when a space is caught. --- lib/util.php | 65 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 575e796f1..091533afb 100644 --- a/lib/util.php +++ b/lib/util.php @@ -412,37 +412,44 @@ function common_render_text($text) function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. - '(?:^|[\s\(\)\[\]\{\}]+)('. - '(?:'. //Known protocols - '(?:'. - '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. - '|'. - '(?:mailto|aim|tel|xmpp):'. - ')[^\s\(\)\[\]\{\}]+'. + '(?:^|[\s\(\)\[\]\{\}]+)'. + '('. + '(?:'. + '(?:'. //Known protocols + '(?:'. + '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. + '|'. + '(?:mailto|aim|tel|xmpp):'. + ')[^\s\/]+'. + ')'. + '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 + '|(?:'. //IPv6 + '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'. + '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}|'. + '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}|'. + '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}|'. + '(?:[0-9a-f]{1,4}:){1,5}(?::[0-9a-f]{1,4}){1,2}|'. + '(?:[0-9a-f]{1,4}:){1,6}(?::[0-9a-f]{1,4}){1,1}|'. + '(?:(?:[0-9a-f]{1,4}:){1,7}|:):|'. + ':(?::[0-9a-f]{1,4}){1,7}|'. + '(?:(?:(?:[0-9a-f]{1,4}:){6})(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'. + '(?:(?:[0-9a-f]{1,4}:){5}[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'. + '(?:[0-9a-f]{1,4}:){5}:[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. + '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. + '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,3}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. + '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,2}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. + '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,1}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. + '(?:(?:[0-9a-f]{1,4}:){1,5}|:):(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. + ':(?::[0-9a-f]{1,4}){1,5}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'. + ')|(?:'. //DNS + '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'. + ')'. ')'. - '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 - '|(?:'. //IPv6 - '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'. - '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}|'. - '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}|'. - '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}|'. - '(?:[0-9a-f]{1,4}:){1,5}(?::[0-9a-f]{1,4}){1,2}|'. - '(?:[0-9a-f]{1,4}:){1,6}(?::[0-9a-f]{1,4}){1,1}|'. - '(?:(?:[0-9a-f]{1,4}:){1,7}|:):|'. - ':(?::[0-9a-f]{1,4}){1,7}|'. - '(?:(?:(?:[0-9a-f]{1,4}:){6})(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'. - '(?:(?:[0-9a-f]{1,4}:){5}[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'. - '(?:[0-9a-f]{1,4}:){5}:[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,3}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,2}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,1}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:(?:[0-9a-f]{1,4}:){1,5}|:):(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - ':(?::[0-9a-f]{1,4}){1,5}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'. - ')|(?:'. //DNS - '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'. + '(?:'. + '$|(?:'. + '/[^\s\(\)\[\]\{\}]*'. + ')'. ')'. - '([^\s\(\)\[\]\{\}]*)'. ')'. '#ix'; return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text); -- cgit v1.2.3-54-g00ecf From 86ba7b13c2c48280fe371b81ffa14a8b387cd61b Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Fri, 21 Aug 2009 22:58:43 -0400 Subject: Finally got the IPv6 regex right in the url finder --- lib/util.php | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 091533afb..2be4213e7 100644 --- a/lib/util.php +++ b/lib/util.php @@ -423,24 +423,8 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { ')[^\s\/]+'. ')'. '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4 - '|(?:'. //IPv6 - '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'. - '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}|'. - '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}|'. - '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}|'. - '(?:[0-9a-f]{1,4}:){1,5}(?::[0-9a-f]{1,4}){1,2}|'. - '(?:[0-9a-f]{1,4}:){1,6}(?::[0-9a-f]{1,4}){1,1}|'. - '(?:(?:[0-9a-f]{1,4}:){1,7}|:):|'. - ':(?::[0-9a-f]{1,4}){1,7}|'. - '(?:(?:(?:[0-9a-f]{1,4}:){6})(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'. - '(?:(?:[0-9a-f]{1,4}:){5}[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'. - '(?:[0-9a-f]{1,4}:){5}:[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,3}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,2}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,1}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - '(?:(?:[0-9a-f]{1,4}:){1,5}|:):(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'. - ':(?::[0-9a-f]{1,4}){1,5}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'. + '|(?:'. //IPv6 + '(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))'. ')|(?:'. //DNS '\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'. ')'. -- cgit v1.2.3-54-g00ecf From 6a3a25b5a2b65e54841cc60c4f2254f6d7b6b54b Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 11:21:45 -0400 Subject: Improved the URL tests, and improve the matcher so more tests are passed. The remaining failing tests I believe are incorrect. --- lib/util.php | 24 ++++---- tests/URLDetectionTest.php | 143 ++++++++++++++++++++++++++++++--------------- 2 files changed, 110 insertions(+), 57 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 2be4213e7..ee3fe5ddc 100644 --- a/lib/util.php +++ b/lib/util.php @@ -412,30 +412,34 @@ function common_render_text($text) function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. - '(?:^|[\s\(\)\[\]\{\}]+)'. - '('. + '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'. + '('. '(?:'. '(?:'. //Known protocols '(?:'. '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. '|'. '(?:mailto|aim|tel|xmpp):'. - ')[^\s\/]+'. + ')'. + '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '[\pN\pL\-\_\:\.]+(?127.0.0.1'), + array('127.0.0.1/test.php', + '127.0.0.1/test.php'), + array('http://::1/test.php', + 'http://::1/test.php'), + array('http://::1', + 'http://::1'), + array('2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php', + '2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php'), + array('2001:4978:1b5:0:21d:e0ff:fe66:59ab', + '2001:4978:1b5:0:21d:e0ff:fe66:59ab'), + array('http://127.0.0.1', + 'http://127.0.0.1'), + array('example.com', + 'example.com'), + array('example.com', + 'example.com'), + array('http://example.com', + 'http://example.com'), + array('http://example.com.', + 'http://example.com.'), + array('/var/lib/example.so', + '/var/lib/example.so'), array('example', 'example'), + array('user@example.com', + 'user@example.com'), + array('user_name+other@example.com', + 'user_name+other@example.com'), + array('mailto:user@example.com', + 'mailto:user@example.com'), + array('mailto:user@example.com?subject=test', + 'mailto:user@example.com?subject=test'), + array('#example', + '#'), + array('#example.com', + '#'), + array('#.net', + '#'), array('http://example', 'http://example'), + array('http://3xampl3', + 'http://3xampl3'), array('http://example/', 'http://example/'), array('http://example/path', @@ -47,6 +87,10 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase 'http://user:pass@example.com'), array('http://example.com:8080', 'http://example.com:8080'), + array('http://example.com:8080/test.php', + 'http://example.com:8080/test.php'), + array('example.com:8080/test.php', + 'example.com:8080/test.php'), array('http://www.example.com', 'http://www.example.com'), array('http://example.com/', @@ -59,60 +103,65 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase 'http://example.com/path.html#fragment'), array('http://example.com/path.php?foo=bar&bar=foo', 'http://example.com/path.php?foo=bar&bar=foo'), + array('http://example.com.', + 'http://example.com.'), array('http://müllärör.de', - 'http://müllärör.de'), + 'http://müllärör.de'), array('http://ﺱﺲﺷ.com', - 'http://ﺱﺲﺷ.com'), + 'http://ﺱﺲﺷ.com'), array('http://сделаткартинки.com', - 'http://сделаткартинки.com'), + 'http://сделаткартинки.com'), array('http://tūdaliņ.lv', - 'http://tūdaliņ.lv'), + 'http://tūdaliņ.lv'), array('http://brændendekærlighed.com', - 'http://brændendekærlighed.com'), + 'http://brændendekærlighed.com'), array('http://あーるいん.com', - 'http://あーるいん.com'), + 'http://あーるいん.com'), array('http://예비교사.com', - 'http://예비교사.com'), + 'http://예비교사.com'), array('http://example.com.', - 'http://example.com.'), + 'http://example.com.'), array('http://example.com?', - 'http://example.com?'), + 'http://example.com?'), array('http://example.com!', - 'http://example.com!'), + 'http://example.com!'), array('http://example.com,', - 'http://example.com,'), + 'http://example.com,'), array('http://example.com;', - 'http://example.com;'), + 'http://example.com;'), array('http://example.com:', - 'http://example.com:'), + 'http://example.com:'), array('\'http://example.com\'', - '\'http://example.com\''), + '\'http://example.com\''), array('"http://example.com"', - '"http://example.com"'), - array('http://example.com ', - 'http://example.com'), + '"http://example.com"'), + array('http://example.com', + 'http://example.com'), array('(http://example.com)', - '(http://example.com)'), + '(http://example.com)'), array('[http://example.com]', - '[http://example.com]'), + '[http://example.com]'), array('', - '<http://example.com>'), + '<http://example.com>'), array('http://example.com/path/(foo)/bar', 'http://example.com/path/(foo)/bar'), + //Not a valid url - urls cannot contain unencoded square brackets array('http://example.com/path/[foo]/bar', 'http://example.com/path/[foo]/bar'), array('http://example.com/path/foo/(bar)', 'http://example.com/path/foo/(bar)'), - array('http://example.com/path/foo/[bar]', - 'http://example.com/path/foo/[bar]'), + //Not a valid url - urls cannot contain unencoded square brackets + //array('http://example.com/path/foo/[bar]', + // 'http://example.com/path/foo/[bar]'), array('Hey, check out my cool site http://example.com okay?', - 'Hey, check out my cool site http://example.com okay?'), + 'Hey, check out my cool site http://example.com okay?'), array('What about parens (e.g. http://example.com/path/foo/(bar))?', 'What about parens (e.g. http://example.com/path/foo/(bar))?'), array('What about parens (e.g. http://example.com/path/foo/(bar)?', 'What about parens (e.g. http://example.com/path/foo/(bar)?'), array('What about parens (e.g. http://example.com/path/foo/(bar).)?', 'What about parens (e.g. http://example.com/path/foo/(bar).)?'), + //Not a valid url - urls cannot contain unencoded commas array('What about parens (e.g. http://example.com/path/(foo,bar)?', 'What about parens (e.g. http://example.com/path/(foo,bar)?'), array('Unbalanced too (e.g. http://example.com/path/((((foo)/bar)?', @@ -124,51 +173,51 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase array('Unbalanced too (e.g. http://example.com/path/foo/(bar))))?', 'Unbalanced too (e.g. http://example.com/path/foo/(bar))))?'), array('example.com', - 'example.com'), + 'example.com'), array('example.org', - 'example.org'), + 'example.org'), array('example.co.uk', - 'example.co.uk'), + 'example.co.uk'), array('www.example.co.uk', - 'www.example.co.uk'), + 'www.example.co.uk'), array('farm1.images.example.co.uk', - 'farm1.images.example.co.uk'), + 'farm1.images.example.co.uk'), array('example.museum', - 'example.museum'), + 'example.museum'), array('example.travel', - 'example.travel'), + 'example.travel'), array('example.com.', - 'example.com.'), + 'example.com.'), array('example.com?', - 'example.com?'), + 'example.com?'), array('example.com!', - 'example.com!'), + 'example.com!'), array('example.com,', - 'example.com,'), + 'example.com,'), array('example.com;', - 'example.com;'), + 'example.com;'), array('example.com:', - 'example.com:'), + 'example.com:'), array('\'example.com\'', - '\'example.com\''), + '\'example.com\''), array('"example.com"', - '"example.com"'), - array('example.com ', - 'example.com'), + '"example.com"'), + array('example.com', + 'example.com'), array('(example.com)', - '(example.com)'), + '(example.com)'), array('[example.com]', - '[example.com]'), + '[example.com]'), array('', - '<example.com>'), + '<example.com>'), array('Hey, check out my cool site example.com okay?', - 'Hey, check out my cool site example.com okay?'), + 'Hey, check out my cool site example.com okay?'), array('Hey, check out my cool site example.com.I made it.', - 'Hey, check out my cool site example.com.I made it.'), + 'Hey, check out my cool site example.com.I made it.'), array('Hey, check out my cool site example.com.Funny thing...', - 'Hey, check out my cool site example.com.Funny thing...'), + 'Hey, check out my cool site example.com.Funny thing...'), array('Hey, check out my cool site example.com.You will love it.', - 'Hey, check out my cool site example.com.You will love it.'), + 'Hey, check out my cool site example.com.You will love it.'), array('What about parens (e.g. example.com/path/foo/(bar))?', 'What about parens (e.g. example.com/path/foo/(bar))?'), array('What about parens (e.g. example.com/path/foo/(bar)?', -- cgit v1.2.3-54-g00ecf From 210bc4248b5e0c8bd577ac51c2f8df9ac05166ae Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 14:12:31 -0400 Subject: All tests pass except for those that require matching parens or brackets --- lib/util.php | 40 +++++++++++++++++++++++++--------------- tests/URLDetectionTest.php | 13 ++++++++++--- 2 files changed, 35 insertions(+), 18 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index ee3fe5ddc..36d1e4c0a 100644 --- a/lib/util.php +++ b/lib/util.php @@ -413,45 +413,55 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'. - '('. + '(?P'. '(?:'. '(?:'. //Known protocols '(?:'. - '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. + '(?:(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://)'. '|'. - '(?:mailto|aim|tel|xmpp):'. + '(?:(?:mailto|aim|tel|xmpp):)'. + ')'. + '(?:[\pN\pL\-\_\+]+(?::[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '(?:'. + '(?:'. + '\[[\pN\pL\-\_\:\.]+(?http://127.0.0.1'), array('127.0.0.1', '127.0.0.1'), + array('127.0.0.1:99', + '127.0.0.1:99'), array('127.0.0.1/test.php', '127.0.0.1/test.php'), + array('http://[::1]:99/test.php', + 'http://[::1]:99/test.php'), array('http://::1/test.php', 'http://::1/test.php'), array('http://::1', 'http://::1'), array('2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php', '2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php'), + array('[2001:4978:1b5:0:21d:e0ff:fe66:59ab]:99/test.php', + '[2001:4978:1b5:0:21d:e0ff:fe66:59ab]:99/test.php'), array('2001:4978:1b5:0:21d:e0ff:fe66:59ab', '2001:4978:1b5:0:21d:e0ff:fe66:59ab'), array('http://127.0.0.1', @@ -145,14 +153,13 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase '<http://example.com>'), array('http://example.com/path/(foo)/bar', 'http://example.com/path/(foo)/bar'), - //Not a valid url - urls cannot contain unencoded square brackets array('http://example.com/path/[foo]/bar', 'http://example.com/path/[foo]/bar'), array('http://example.com/path/foo/(bar)', 'http://example.com/path/foo/(bar)'), //Not a valid url - urls cannot contain unencoded square brackets - //array('http://example.com/path/foo/[bar]', - // 'http://example.com/path/foo/[bar]'), + array('http://example.com/path/foo/[bar]', + 'http://example.com/path/foo/[bar]'), array('Hey, check out my cool site http://example.com okay?', 'Hey, check out my cool site http://example.com okay?'), array('What about parens (e.g. http://example.com/path/foo/(bar))?', -- cgit v1.2.3-54-g00ecf From ff836eb38adbff2ca01e076ac66e6fc3d5833506 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 14:19:05 -0400 Subject: Add UTF-8 encodings of the IDN TLDs --- lib/util.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 36d1e4c0a..d4e2841cd 100644 --- a/lib/util.php +++ b/lib/util.php @@ -437,7 +437,7 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.'. //tld list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt, also added local, loc, and onion - '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|XN--11B5BS3A9AJ6G|XN--80AKHBYKNJ4F|XN--9T4B11YI5A|XN--DEBA0AD|XN--G6W251D|XN--HGBK6AJ7F53BBA|XN--HLCJ6AYA9ESC7A|XN--JXALPDLP|XN--KGBECHTV|XN--ZCKZAH|YE|YT|YU|ZA|ZM|ZW|local|loc|onion)'. + '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|测试|XN--11B5BS3A9AJ6G|परीक्षा|XN--80AKHBYKNJ4F|испытание|XN--9T4B11YI5A|테스트|XN--DEBA0AD|טעסט|XN--G6W251D|測試|XN--HGBK6AJ7F53BBA|آزمایشی|XN--HLCJ6AYA9ESC7A|பரிட்சை|XN--JXALPDLP|δοκιμή|XN--KGBECHTV|إختبار|XN--ZCKZAH|テスト|YE|YT|YU|ZA|ZM|ZW|local|loc|onion)'. ')(?![\pN\pL\-\_])'. ')'. '(?:'. -- cgit v1.2.3-54-g00ecf From 31329c33aee7f3d4b5c1dd80f32fcd45e7877850 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 16:41:44 -0400 Subject: Handle grouping symbols ()[]{} correctly. Now passing all tests! --- lib/util.php | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index d4e2841cd..29eb6cbbc 100644 --- a/lib/util.php +++ b/lib/util.php @@ -445,7 +445,7 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { '(?:/[\pN\pL$\[\]\,\!\(\)\.\-\_\+\/\=\&\;]*)?'. // /path '(?:\?[\pN\pL\$\[\]\,\!\(\)\.\-\_\+\/\=\&\;\/]*)?'. // ?query string '(?:\#[\pN\pL$\[\]\,\!\(\)\.\-\_\+\/\=\&\;\/\?\#]*)?'. // #fragment - ')(?'(', + 'right'=>')' + ), + array( + 'left'=>'[', + 'right'=>']' + ), + array( + 'left'=>'{', + 'right'=>'}' + ) + ); + $cannotEndWith=array('.','?',',','#'); + $original_url=$url; + do{ + $original_url=$url; + foreach($groupSymbolSets as $groupSymbolSet){ + if(substr($url,-1)==$groupSymbolSet['right']){ + $group_left_count = substr_count($url,$groupSymbolSet['left']); + $group_right_count = substr_count($url,$groupSymbolSet['right']); + if($group_left_count<$group_right_count){ + $right-=1; + $url=substr($url,0,-1); + } + } + } + if(in_array(substr($url,-1),$cannotEndWith)){ + $right-=1; + $url=substr($url,0,-1); + } + }while($original_url!=$url); + + if(empty($notice_id)){ - $result = call_user_func_array($callback,$matches['url']); + $result = call_user_func_array($callback,$url); }else{ - $result = call_user_func_array($callback, array($matches['url'],$notice_id) ); + $result = call_user_func_array($callback, array($url,$notice_id) ); } - return $left . $result . $right; + return substr($matches[0],0,$left) . $result . substr($matches[0],$right); } function curry($fn) { -- cgit v1.2.3-54-g00ecf From a2117961be463d566c8f3c6ccd9b8e840f171804 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 17:52:16 -0400 Subject: Allow ({['" to preceded #tags --- lib/util.php | 2 +- tests/HashTagDetectionTest.php | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 29eb6cbbc..7c1e21913 100644 --- a/lib/util.php +++ b/lib/util.php @@ -404,7 +404,7 @@ function common_render_text($text) $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r); $r = common_replace_urls_callback($r, 'common_linkify'); - $r = preg_replace('/(^|\(|\[|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); + $r = preg_replace('/(^|\"\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); // XXX: machine tags return $r; } diff --git a/tests/HashTagDetectionTest.php b/tests/HashTagDetectionTest.php index 55e1f65bf..901e0611c 100644 --- a/tests/HashTagDetectionTest.php +++ b/tests/HashTagDetectionTest.php @@ -27,8 +27,20 @@ class HashTagDetectionTest extends PHPUnit_Framework_TestCase return array( array('hello', 'hello'), - array('#hello', - '#'), + array('#hello people', + '# people'), + array('"#hello" people', + '"#" people'), + array('say "#hello" people', + 'say "#" people'), + array('say (#hello) people', + 'say (#) people'), + array('say [#hello] people', + 'say [#] people'), + array('say {#hello} people', + 'say {#} people'), + array('say \'#hello\' people', + 'say \'#\' people'), ); } } -- cgit v1.2.3-54-g00ecf