From 6a3a25b5a2b65e54841cc60c4f2254f6d7b6b54b Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 11:21:45 -0400 Subject: Improved the URL tests, and improve the matcher so more tests are passed. The remaining failing tests I believe are incorrect. --- lib/util.php | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/util.php b/lib/util.php index 2be4213e7..ee3fe5ddc 100644 --- a/lib/util.php +++ b/lib/util.php @@ -412,30 +412,34 @@ function common_render_text($text) function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. - '(?:^|[\s\(\)\[\]\{\}]+)'. - '('. + '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'. + '('. '(?:'. '(?:'. //Known protocols '(?:'. '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. '|'. '(?:mailto|aim|tel|xmpp):'. - ')[^\s\/]+'. + ')'. + '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '[\pN\pL\-\_\:\.]+(? Date: Tue, 25 Aug 2009 14:12:31 -0400 Subject: All tests pass except for those that require matching parens or brackets --- lib/util.php | 40 +++++++++++++++++++++++++--------------- tests/URLDetectionTest.php | 13 ++++++++++--- 2 files changed, 35 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/util.php b/lib/util.php index ee3fe5ddc..36d1e4c0a 100644 --- a/lib/util.php +++ b/lib/util.php @@ -413,45 +413,55 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { // Start off with a regex $regex = '#'. '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'. - '('. + '(?P'. '(?:'. '(?:'. //Known protocols '(?:'. - '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'. + '(?:(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://)'. '|'. - '(?:mailto|aim|tel|xmpp):'. + '(?:(?:mailto|aim|tel|xmpp):)'. + ')'. + '(?:[\pN\pL\-\_\+]+(?::[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ + '(?:'. + '(?:'. + '\[[\pN\pL\-\_\:\.]+(?http://127.0.0.1'), array('127.0.0.1', '127.0.0.1'), + array('127.0.0.1:99', + '127.0.0.1:99'), array('127.0.0.1/test.php', '127.0.0.1/test.php'), + array('http://[::1]:99/test.php', + 'http://[::1]:99/test.php'), array('http://::1/test.php', 'http://::1/test.php'), array('http://::1', 'http://::1'), array('2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php', '2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php'), + array('[2001:4978:1b5:0:21d:e0ff:fe66:59ab]:99/test.php', + '[2001:4978:1b5:0:21d:e0ff:fe66:59ab]:99/test.php'), array('2001:4978:1b5:0:21d:e0ff:fe66:59ab', '2001:4978:1b5:0:21d:e0ff:fe66:59ab'), array('http://127.0.0.1', @@ -145,14 +153,13 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase '<http://example.com>'), array('http://example.com/path/(foo)/bar', 'http://example.com/path/(foo)/bar'), - //Not a valid url - urls cannot contain unencoded square brackets array('http://example.com/path/[foo]/bar', 'http://example.com/path/[foo]/bar'), array('http://example.com/path/foo/(bar)', 'http://example.com/path/foo/(bar)'), //Not a valid url - urls cannot contain unencoded square brackets - //array('http://example.com/path/foo/[bar]', - // 'http://example.com/path/foo/[bar]'), + array('http://example.com/path/foo/[bar]', + 'http://example.com/path/foo/[bar]'), array('Hey, check out my cool site http://example.com okay?', 'Hey, check out my cool site http://example.com okay?'), array('What about parens (e.g. http://example.com/path/foo/(bar))?', -- cgit v1.2.3-54-g00ecf From ff836eb38adbff2ca01e076ac66e6fc3d5833506 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 14:19:05 -0400 Subject: Add UTF-8 encodings of the IDN TLDs --- lib/util.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/util.php b/lib/util.php index 36d1e4c0a..d4e2841cd 100644 --- a/lib/util.php +++ b/lib/util.php @@ -437,7 +437,7 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { '(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@ '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.'. //tld list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt, also added local, loc, and onion - '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|XN--11B5BS3A9AJ6G|XN--80AKHBYKNJ4F|XN--9T4B11YI5A|XN--DEBA0AD|XN--G6W251D|XN--HGBK6AJ7F53BBA|XN--HLCJ6AYA9ESC7A|XN--JXALPDLP|XN--KGBECHTV|XN--ZCKZAH|YE|YT|YU|ZA|ZM|ZW|local|loc|onion)'. + '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|测试|XN--11B5BS3A9AJ6G|परीक्षा|XN--80AKHBYKNJ4F|испытание|XN--9T4B11YI5A|테스트|XN--DEBA0AD|טעסט|XN--G6W251D|測試|XN--HGBK6AJ7F53BBA|آزمایشی|XN--HLCJ6AYA9ESC7A|பரிட்சை|XN--JXALPDLP|δοκιμή|XN--KGBECHTV|إختبار|XN--ZCKZAH|テスト|YE|YT|YU|ZA|ZM|ZW|local|loc|onion)'. ')(?![\pN\pL\-\_])'. ')'. '(?:'. -- cgit v1.2.3-54-g00ecf From 31329c33aee7f3d4b5c1dd80f32fcd45e7877850 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 16:41:44 -0400 Subject: Handle grouping symbols ()[]{} correctly. Now passing all tests! --- lib/util.php | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/util.php b/lib/util.php index d4e2841cd..29eb6cbbc 100644 --- a/lib/util.php +++ b/lib/util.php @@ -445,7 +445,7 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { '(?:/[\pN\pL$\[\]\,\!\(\)\.\-\_\+\/\=\&\;]*)?'. // /path '(?:\?[\pN\pL\$\[\]\,\!\(\)\.\-\_\+\/\=\&\;\/]*)?'. // ?query string '(?:\#[\pN\pL$\[\]\,\!\(\)\.\-\_\+\/\=\&\;\/\?\#]*)?'. // #fragment - ')(?'(', + 'right'=>')' + ), + array( + 'left'=>'[', + 'right'=>']' + ), + array( + 'left'=>'{', + 'right'=>'}' + ) + ); + $cannotEndWith=array('.','?',',','#'); + $original_url=$url; + do{ + $original_url=$url; + foreach($groupSymbolSets as $groupSymbolSet){ + if(substr($url,-1)==$groupSymbolSet['right']){ + $group_left_count = substr_count($url,$groupSymbolSet['left']); + $group_right_count = substr_count($url,$groupSymbolSet['right']); + if($group_left_count<$group_right_count){ + $right-=1; + $url=substr($url,0,-1); + } + } + } + if(in_array(substr($url,-1),$cannotEndWith)){ + $right-=1; + $url=substr($url,0,-1); + } + }while($original_url!=$url); + + if(empty($notice_id)){ - $result = call_user_func_array($callback,$matches['url']); + $result = call_user_func_array($callback,$url); }else{ - $result = call_user_func_array($callback, array($matches['url'],$notice_id) ); + $result = call_user_func_array($callback, array($url,$notice_id) ); } - return $left . $result . $right; + return substr($matches[0],0,$left) . $result . substr($matches[0],$right); } function curry($fn) { -- cgit v1.2.3-54-g00ecf From fc89c345ed9fa7a72ce72e3b467a575580050b4f Mon Sep 17 00:00:00 2001 From: Sarven Capadisli Date: Tue, 25 Aug 2009 21:48:55 +0000 Subject: userdesign.go.js was incorrectly removed in commit 304db1d30b4ad96f8a2ca500d224bb1609588fed --- lib/designsettings.php | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/designsettings.php b/lib/designsettings.php index a48ec9d22..b86265971 100644 --- a/lib/designsettings.php +++ b/lib/designsettings.php @@ -326,6 +326,7 @@ class DesignSettingsAction extends AccountSettingsAction $this->script('js/farbtastic/farbtastic.js'); $this->script('js/farbtastic/farbtastic.go.js'); + $this->script('js/userdesign.go.js'); } /** -- cgit v1.2.3-54-g00ecf From a2117961be463d566c8f3c6ccd9b8e840f171804 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Tue, 25 Aug 2009 17:52:16 -0400 Subject: Allow ({['" to preceded #tags --- lib/util.php | 2 +- tests/HashTagDetectionTest.php | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/util.php b/lib/util.php index 29eb6cbbc..7c1e21913 100644 --- a/lib/util.php +++ b/lib/util.php @@ -404,7 +404,7 @@ function common_render_text($text) $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r); $r = common_replace_urls_callback($r, 'common_linkify'); - $r = preg_replace('/(^|\(|\[|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); + $r = preg_replace('/(^|\"\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); // XXX: machine tags return $r; } diff --git a/tests/HashTagDetectionTest.php b/tests/HashTagDetectionTest.php index 55e1f65bf..901e0611c 100644 --- a/tests/HashTagDetectionTest.php +++ b/tests/HashTagDetectionTest.php @@ -27,8 +27,20 @@ class HashTagDetectionTest extends PHPUnit_Framework_TestCase return array( array('hello', 'hello'), - array('#hello', - '#'), + array('#hello people', + '# people'), + array('"#hello" people', + '"#" people'), + array('say "#hello" people', + 'say "#" people'), + array('say (#hello) people', + 'say (#) people'), + array('say [#hello] people', + 'say [#] people'), + array('say {#hello} people', + 'say {#} people'), + array('say \'#hello\' people', + 'say \'#\' people'), ); } } -- cgit v1.2.3-54-g00ecf