From 8fd0059bf69ed16ed4efad7b8e16dc2afda32e18 Mon Sep 17 00:00:00 2001
From: Brion Vibber <brion@pobox.com>
Date: Fri, 23 Apr 2010 15:40:48 -0700
Subject: Test cases and fixes for Atom and RSS content decoding.

Fix extraction of Atom <content type="text"> and <content type="html">; we were failing to escape plaintext source data to HTML, and doing an extraneous double-deescape on HTML source resulting in breakage of notices containing text that looks like HTML. Only <content type="xhtml"> was working correctly previously.
Fixes for RSS2 content processing: we were failing to load <content:encoded> at all due to using wrong element name, and were applying an extraneous de-escape for <description> rather than the escaping that is required to turn plaintext into HTML. (Per spec, <description> must be plaintext.)
---
 lib/activityutils.php | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'lib/activityutils.php')
diff --git a/lib/activityutils.php b/lib/activityutils.php
index a7e99fb11..401fd7fc2 100644
--- a/lib/activityutils.php
+++ b/lib/activityutils.php
@@ -213,11 +213,19 @@ class ActivityUtils
         // slavishly following http://atompub.org/rfc4287.html#rfc.section.4.1.3.3
 
         if (empty($type) || $type == 'text') {
-            return $el->textContent;
+            // We have plaintext saved as the XML text content.
+            // Since we want HTML, we need to escape any special chars.
+            return htmlspecialchars($el->textContent);
         } else if ($type == 'html') {
+            // We have HTML saved as the XML text content.
+            // No additional processing required once we've got it.
             $text = $el->textContent;
-            return htmlspecialchars_decode($text, ENT_QUOTES);
+            return $text;
         } else if ($type == 'xhtml') {
+            // Per spec, the <content type="xhtml"> contains a single
+            // HTML <div> with XHTML namespace on it as a child node.
+            // We need to pull all of that <div>'s child nodes and
+            // serialize them back to an (X)HTML source fragment.
             $divEl = ActivityUtils::child($el, 'div', 'http://www.w3.org/1999/xhtml');
             if (empty($divEl)) {
                 return null;
-- 
cgit v1.2.3-54-g00ecf