From e0c3bf5a75094937315aa09dd9635833f9eb2925 Mon Sep 17 00:00:00 2001 From: zach Date: Tue, 4 Nov 2008 03:40:27 -0500 Subject: Better UTF-8 escaped entity handling. Fixed bad chars in all RSS feeds. darcs-hash:20081104084027-462f3-ea7ab93938358bf90a1c1851d6f665973beae767.gz --- lib/rssaction.php | 2 +- lib/twitterapi.php | 16 ++++++++-------- lib/util.php | 9 ++++++++- 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/rssaction.php b/lib/rssaction.php index 76859a876..0152f25bc 100644 --- a/lib/rssaction.php +++ b/lib/rssaction.php @@ -123,7 +123,7 @@ class Rss10Action extends Action { $nurl = common_local_url('shownotice', array('notice' => $notice->id)); $creator_uri = common_profile_uri($profile); common_element_start('item', array('rdf:about' => $notice->uri)); - $title = $profile->nickname . ': ' . $notice->content; + $title = $profile->nickname . ': ' . common_xml_safe_str($notice->content); common_element('title', NULL, $title); common_element('link', NULL, $nurl); common_element('description', NULL, $profile->nickname."'s status on ".common_exact_date($notice->created)); diff --git a/lib/twitterapi.php b/lib/twitterapi.php index d4b6fff4f..378716eaa 100644 --- a/lib/twitterapi.php +++ b/lib/twitterapi.php @@ -60,7 +60,7 @@ class TwitterapiAction extends Action { $profile = $notice->getProfile(); $twitter_status = array(); - $twitter_status['text'] = $notice->content; + $twitter_status['text'] = common_xml_safe_str($notice->content); $twitter_status['truncated'] = 'false'; # Not possible on Laconica $twitter_status['created_at'] = $this->date_twitter($notice->created); $twitter_status['in_reply_to_status_id'] = ($notice->reply_to) ? intval($notice->reply_to) : NULL; @@ -91,8 +91,8 @@ class TwitterapiAction extends Action { $server = common_config('site', 'server'); $entry = array(); - - $entry['content'] = $profile->nickname . ': ' . $notice->content; + + $entry['content'] = $profile->nickname . ': ' . common_xml_safe_str($notice->content); $entry['title'] = $entry['content']; $entry['link'] = common_local_url('shownotice', array('notice' => $notice->id)); $entry['published'] = common_date_iso8601($notice->created); @@ -115,14 +115,14 @@ class TwitterapiAction extends Action { $entry['title'] = sprintf('Message from %s to %s', $message->getFrom()->nickname, $message->getTo()->nickname); - $entry['content'] = $message->content; + $entry['content'] = common_xml_safe_str($message->content); $entry['link'] = $message->uri; $entry['published'] = common_date_iso8601($message->created); $entry['id'] = "tag:$server,2008:$entry[link]"; $entry['updated'] = $entry['published']; # RSS Item specific - $entry['description'] = $message->content; + $entry['description'] = $entry['content']; $entry['pubDate'] = common_date_rfc2822($message->created); $entry['guid'] = $entry['link']; @@ -137,8 +137,8 @@ class TwitterapiAction extends Action { $to_profile = $message->getTo(); $twitter_dm['id'] = $message->id; - $twitter_dm['sender_id'] = $message->from_profile; - $twitter_dm['text'] = $message->content; + $twitter_dm['sender_id'] = $message->from_profile; + $twitter_dm['text'] = common_xml_safe_str($message->content); $twitter_dm['recipient_id'] = $message->to_profile; $twitter_dm['created_at'] = $this->date_twitter($message->created); $twitter_dm['sender_screen_name'] = $from_profile->nickname; @@ -569,5 +569,5 @@ class TwitterapiAction extends Action { } return $source_name; } - + } \ No newline at end of file diff --git a/lib/util.php b/lib/util.php index 88841d3f6..23abb1426 100644 --- a/lib/util.php +++ b/lib/util.php @@ -708,7 +708,7 @@ function common_render_content($text, $notice) { } function common_render_text($text) { - $r = htmlentities($text, ENT_NOQUOTES, 'UTF-8'); + $r = htmlspecialchars($text); $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r); $r = preg_replace_callback('@https?://[^\]>\s]+@', 'common_render_uri_thingy', $r); @@ -745,6 +745,13 @@ function common_render_uri_thingy($matches) { return '' . $uri . '' . $trailer; } +function common_xml_safe_str($str) { + $xmlStr = htmlentities(iconv('UTF-8', 'UTF-8//IGNORE', $str), ENT_NOQUOTES, 'UTF-8'); + + // Replace control, formatting, and surrogate characters with '*', ala Twitter + return preg_replace('/[\p{Cc}\p{Cf}\p{Cs}]/u', '*', $str); +} + function common_tag_link($tag) { $canonical = common_canonical_tag($tag); $url = common_local_url('tag', array('tag' => $canonical)); -- cgit v1.2.3-54-g00ecf