[heuristics for paren linking (thanks @brion!) Evan Prodromou **20081102090241] { hunk ./lib/util.php 714 - $r = preg_replace('@https?://[^)\]>\s]+@', '\0', $r); + $r = preg_replace_callback('@https?://[^\]>\s]+@', 'common_render_uri_thingy', $r); hunk ./lib/util.php 720 +function common_render_uri_thingy($matches) { + $uri = $matches[0]; + $trailer = ''; + + # Some heuristics for extracting URIs from surrounding punctuation + # Strip from trailing text... + if (preg_match('/^(.*)([,.:"\']+)$/', $uri, $matches)) { + $uri = $matches[1]; + $trailer = $matches[2]; + } + + $pairs = array( + ']' => '[', # technically disallowed in URIs, but used in Java docs + ')' => '(', # far too frequent in Wikipedia and MSDN + ); + $final = substr($uri, -1, 1); + if (isset($pairs[$final])) { + $openers = substr_count($uri, $pairs[$final]); + $closers = substr_count($uri, $final); + if ($closers > $openers) { + // Assume the paren was opened outside the URI + $uri = substr($uri, 0, -1); + $trailer = $final . $trailer; + } + } + return '' . $uri . '' . $trailer; +} + }