From e0c3bf5a75094937315aa09dd9635833f9eb2925 Mon Sep 17 00:00:00 2001 From: zach Date: Tue, 4 Nov 2008 03:40:27 -0500 Subject: Better UTF-8 escaped entity handling. Fixed bad chars in all RSS feeds. darcs-hash:20081104084027-462f3-ea7ab93938358bf90a1c1851d6f665973beae767.gz --- lib/util.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib/util.php') diff --git a/lib/util.php b/lib/util.php index 88841d3f6..23abb1426 100644 --- a/lib/util.php +++ b/lib/util.php @@ -708,7 +708,7 @@ function common_render_content($text, $notice) { } function common_render_text($text) { - $r = htmlentities($text, ENT_NOQUOTES, 'UTF-8'); + $r = htmlspecialchars($text); $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r); $r = preg_replace_callback('@https?://[^\]>\s]+@', 'common_render_uri_thingy', $r); @@ -745,6 +745,13 @@ function common_render_uri_thingy($matches) { return '' . $uri . '' . $trailer; } +function common_xml_safe_str($str) { + $xmlStr = htmlentities(iconv('UTF-8', 'UTF-8//IGNORE', $str), ENT_NOQUOTES, 'UTF-8'); + + // Replace control, formatting, and surrogate characters with '*', ala Twitter + return preg_replace('/[\p{Cc}\p{Cf}\p{Cs}]/u', '*', $str); +} + function common_tag_link($tag) { $canonical = common_canonical_tag($tag); $url = common_local_url('tag', array('tag' => $canonical)); -- cgit v1.2.3-54-g00ecf