From 8fd0059bf69ed16ed4efad7b8e16dc2afda32e18 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 23 Apr 2010 15:40:48 -0700 Subject: Test cases and fixes for Atom and RSS content decoding. Fix extraction of Atom and ; we were failing to escape plaintext source data to HTML, and doing an extraneous double-deescape on HTML source resulting in breakage of notices containing text that looks like HTML. Only was working correctly previously. Fixes for RSS2 content processing: we were failing to load at all due to using wrong element name, and were applying an extraneous de-escape for rather than the escaping that is required to turn plaintext into HTML. (Per spec, must be plaintext.) --- lib/activity.php | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'lib/activity.php') diff --git a/lib/activity.php b/lib/activity.php index 5d6230c6d..27f09ab4d 100644 --- a/lib/activity.php +++ b/lib/activity.php @@ -83,6 +83,7 @@ class Activity const CREATOR = 'creator'; const CONTENTNS = 'http://purl.org/rss/1.0/modules/content/'; + const ENCODED = 'encoded'; public $actor; // an ActivityObject public $verb; // a string (the URL) @@ -268,14 +269,21 @@ class Activity $this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, self::RSS); - $contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, self::CONTENTNS); + $contentEl = ActivityUtils::child($item, self::ENCODED, self::CONTENTNS); if (!empty($contentEl)) { - $this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES); + // XML node's text content is HTML; no further processing needed. + $this->content = $contentEl->textContent; } else { $descriptionEl = ActivityUtils::child($item, self::DESCRIPTION, self::RSS); if (!empty($descriptionEl)) { - $this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES); + // Per spec, must be plaintext. + // In practice, often there's HTML... but these days good + // feeds are using which is explicitly + // real HTML. + // We'll treat this following spec, and do HTML escaping + // to convert from plaintext to HTML. + $this->content = htmlspecialchars($descriptionEl->textContent); } } -- cgit v1.2.3-54-g00ecf