From 8fd0059bf69ed16ed4efad7b8e16dc2afda32e18 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 23 Apr 2010 15:40:48 -0700 Subject: Test cases and fixes for Atom and RSS content decoding. Fix extraction of Atom and ; we were failing to escape plaintext source data to HTML, and doing an extraneous double-deescape on HTML source resulting in breakage of notices containing text that looks like HTML. Only was working correctly previously. Fixes for RSS2 content processing: we were failing to load at all due to using wrong element name, and were applying an extraneous de-escape for rather than the escaping that is required to turn plaintext into HTML. (Per spec, must be plaintext.) --- lib/activityutils.php | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'lib/activityutils.php') diff --git a/lib/activityutils.php b/lib/activityutils.php index a7e99fb11..401fd7fc2 100644 --- a/lib/activityutils.php +++ b/lib/activityutils.php @@ -213,11 +213,19 @@ class ActivityUtils // slavishly following http://atompub.org/rfc4287.html#rfc.section.4.1.3.3 if (empty($type) || $type == 'text') { - return $el->textContent; + // We have plaintext saved as the XML text content. + // Since we want HTML, we need to escape any special chars. + return htmlspecialchars($el->textContent); } else if ($type == 'html') { + // We have HTML saved as the XML text content. + // No additional processing required once we've got it. $text = $el->textContent; - return htmlspecialchars_decode($text, ENT_QUOTES); + return $text; } else if ($type == 'xhtml') { + // Per spec, the contains a single + // HTML
with XHTML namespace on it as a child node. + // We need to pull all of that
's child nodes and + // serialize them back to an (X)HTML source fragment. $divEl = ActivityUtils::child($el, 'div', 'http://www.w3.org/1999/xhtml'); if (empty($divEl)) { return null; -- cgit v1.2.3-54-g00ecf