From 05e3768e6a833d99a5180d4306d68f59e2d8f8c9 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 19 Mar 2010 09:48:39 -0500 Subject: Parse RSS items as activities First steps to parsing RSS items as activities. RSS feeds don't seem to have enough data to make good remote profiles, but this may work with some "hints". --- lib/activity.php | 272 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 239 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/activity.php b/lib/activity.php index c67d090f7..5b304020d 100644 --- a/lib/activity.php +++ b/lib/activity.php @@ -643,38 +643,11 @@ class ActivityObject ); if ($element->tagName == 'author') { - - $this->type = self::PERSON; // XXX: is this fair? - $this->title = $this->_childContent($element, self::NAME); - $this->id = $this->_childContent($element, self::URI); - - if (empty($this->id)) { - $email = $this->_childContent($element, self::EMAIL); - if (!empty($email)) { - // XXX: acct: ? - $this->id = 'mailto:'.$email; - } - } - + $this->_fromAuthor($element); + } else if ($element->tagName == 'item') { + $this->_fromRssItem($element); } else { - - $this->type = $this->_childContent($element, Activity::OBJECTTYPE, - Activity::SPEC); - - if (empty($this->type)) { - $this->type = ActivityObject::NOTE; - } - - $this->id = $this->_childContent($element, self::ID); - $this->title = $this->_childContent($element, self::TITLE); - $this->summary = $this->_childContent($element, self::SUMMARY); - - $this->source = $this->_getSource($element); - - $this->content = ActivityUtils::getContent($element); - - $this->link = ActivityUtils::getPermalink($element); - + $this->_fromAtomEntry($element); } // Some per-type attributes... @@ -697,6 +670,72 @@ class ActivityObject } } + private function _fromAuthor($element) + { + $this->type = self::PERSON; // XXX: is this fair? + $this->title = $this->_childContent($element, self::NAME); + $this->id = $this->_childContent($element, self::URI); + + if (empty($this->id)) { + $email = $this->_childContent($element, self::EMAIL); + if (!empty($email)) { + // XXX: acct: ? + $this->id = 'mailto:'.$email; + } + } + } + + private function _fromAtomEntry($element) + { + $this->type = $this->_childContent($element, Activity::OBJECTTYPE, + Activity::SPEC); + + if (empty($this->type)) { + $this->type = ActivityObject::NOTE; + } + + $this->id = $this->_childContent($element, self::ID); + $this->title = $this->_childContent($element, self::TITLE); + $this->summary = $this->_childContent($element, self::SUMMARY); + + $this->source = $this->_getSource($element); + + $this->content = ActivityUtils::getContent($element); + + $this->link = ActivityUtils::getPermalink($element); + } + + // @fixme rationalize with Activity::_fromRssItem() + + private function _fromRssItem($item) + { + $this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, Activity::RSS); + + $contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, Activity::CONTENTNS); + + if (!empty($contentEl)) { + $this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES); + } else { + $descriptionEl = ActivityUtils::child($item, Activity::DESCRIPTION, Activity::RSS); + if (!empty($descriptionEl)) { + $this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES); + } + } + + $this->link = ActivityUtils::childContent($item, ActivityUtils::LINK, Activity::RSS); + + $guidEl = ActivityUtils::child($item, Activity::GUID, Activity::RSS); + + if (!empty($guidEl)) { + $this->id = $guidEl->textContent; + + if ($guidEl->hasAttribute('isPermaLink')) { + // overwrites + $this->link = $this->id; + } + } + } + private function _childContent($element, $tag, $namespace=ActivityUtils::ATOM) { return ActivityUtils::childContent($element, $tag, $namespace); @@ -1051,6 +1090,21 @@ class Activity const PUBLISHED = 'published'; const UPDATED = 'updated'; + const RSS = null; // no namespace! + + const PUBDATE = 'pubDate'; + const DESCRIPTION = 'description'; + const GUID = 'guid'; + const SELF = 'self'; + const IMAGE = 'image'; + const URL = 'url'; + + const DC = 'http://purl.org/dc/elements/1.1/'; + + const CREATOR = 'creator'; + + const CONTENTNS = 'http://purl.org/rss/1.0/modules/content/'; + public $actor; // an ActivityObject public $verb; // a string (the URL) public $object; // an ActivityObject @@ -1081,8 +1135,6 @@ class Activity return; } - $this->entry = $entry; - // Insist on a feed's root DOMElement; don't allow a DOMDocument if ($feed instanceof DOMDocument) { throw new ClientException( @@ -1090,8 +1142,22 @@ class Activity ); } + $this->entry = $entry; $this->feed = $feed; + if ($entry->namespaceURI == Activity::ATOM && + $entry->localName == 'entry') { + $this->_fromAtomEntry($entry, $feed); + } else if ($entry->namespaceURI == Activity::RSS && + $entry->localName == 'item') { + $this->_fromRssItem($entry, $feed); + } else { + throw new Exception("Unknown DOM element: {$entry->namespaceURI} {$entry->localName}"); + } + } + + function _fromAtomEntry($entry, $feed) + { $pubEl = $this->_child($entry, self::PUBLISHED, self::ATOM); if (!empty($pubEl)) { @@ -1177,6 +1243,69 @@ class Activity } } + function _fromRssItem($item, $rss) + { + $verbEl = $this->_child($item, self::VERB); + + if (!empty($verbEl)) { + $this->verb = trim($verbEl->textContent); + } else { + $this->verb = ActivityVerb::POST; + // XXX: do other implied stuff here + } + + $pubDateEl = $this->_child($item, self::PUBDATE, self::RSS); + + if (!empty($pubDateEl)) { + $this->time = strtotime($pubDateEl->textContent); + } + + $authorEl = $this->_child($item, self::AUTHOR, self::RSS); + + if (!empty($authorEl)) { + $this->actor = $this->_fromRssAuthor($authorEl); + } else { + $dcCreatorEl = $this->_child($item, self::CREATOR, self::DC); + if (!empty($dcCreatorEl)) { + $this->actor = $this->_fromDcCreator($dcCreatorEl); + } else if (!empty($rss)) { + $this->actor = $this->_fromRss($rss); + } + } + + $this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, self::RSS); + + $contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, self::CONTENTNS); + + if (!empty($contentEl)) { + $this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES); + } else { + $descriptionEl = ActivityUtils::child($item, self::DESCRIPTION, self::RSS); + if (!empty($descriptionEl)) { + $this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES); + } + } + + $this->link = ActivityUtils::childContent($item, ActivityUtils::LINK, self::RSS); + + // @fixme enclosures + // @fixme thumbnails... maybe + + $guidEl = ActivityUtils::child($item, self::GUID, self::RSS); + + if (!empty($guidEl)) { + $this->id = $guidEl->textContent; + + if ($guidEl->hasAttribute('isPermaLink') && $guidEl->getAttribute('isPermaLink') != 'false') { + // overwrites + $this->link = $this->id; + } + } + + $this->object = new ActivityObject($item); + $this->context = new ActivityContext($item); + } + /** * Returns an Atom based on this activity * @@ -1249,6 +1378,83 @@ class Activity return $xs->getString(); } + function _fromRssAuthor($el) + { + $text = $el->textContent; + + if (preg_match('/^(.*?) \((.*)\)$/', $text, $match)) { + $email = $match[1]; + $name = $match[2]; + } else if (preg_match('/^(.*?) <(.*)>$/', $text, $match)) { + $name = $match[1]; + $email = $match[2]; + } else if (preg_match('/.*@.*/', $text)) { + $email = $text; + $name = null; + } else { + $name = $text; + $email = null; + } + + // Not really enough info + + $actor = new ActivityObject(); + + $actor->element = $el; + + $actor->type = ActivityObject::PERSON; + $actor->title = $name; + + if (!empty($email)) { + $actor->id = 'mailto:'.$email; + } + + return $actor; + } + + function _fromDcCreator($el) + { + // Not really enough info + + $text = $el->textContent; + + $actor = new ActivityObject(); + + $actor->element = $el; + + $actor->title = $text; + $actor->type = ActivityObject::PERSON; + + return $actor; + } + + function _fromRss($el) + { + $actor = new ActivityObject(); + + $actor->element = $el; + + $actor->type = ActivityObject::PERSON; // @fixme guess better + + $actor->title = ActivityUtils::childContent($el, ActivityObject::TITLE, self::RSS); + $actor->link = ActivityUtils::childContent($el, ActivityUtils::LINK, self::RSS); + $actor->id = ActivityUtils::getLink($el, self::SELF); + + $desc = ActivityUtils::childContent($el, self::DESCRIPTION, self::RSS); + + if (!empty($desc)) { + $actor->content = htmlspecialchars_decode($desc, ENT_QUOTES); + } + + $imageEl = ActivityUtils::child($el, self::IMAGE, self::RSS); + + if (!empty($imageEl)) { + $actor->avatarLinks[] = ActivityUtils::childContent($imageEl, self::URL, self::RSS); + } + + return $actor; + } + private function _child($element, $tag, $namespace=self::SPEC) { return ActivityUtils::child($element, $tag, $namespace); -- cgit v1.2.3-54-g00ecf From 13b1acdd084289d1f1a0c02c51633bdd92b46cf1 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 19 Mar 2010 15:28:25 -0500 Subject: only search elements for links --- lib/activity.php | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/activity.php b/lib/activity.php index 5b304020d..8546b474d 100644 --- a/lib/activity.php +++ b/lib/activity.php @@ -347,6 +347,11 @@ class ActivityUtils $els = $element->childNodes; foreach ($els as $link) { + + if (!($link instanceof DOMElement)) { + continue; + } + if ($link->localName == self::LINK && $link->namespaceURI == self::ATOM) { $linkRel = $link->getAttribute(self::REL); -- cgit v1.2.3-54-g00ecf From 1292230e380300e194f0695f95fe1554cd7863a5 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 19 Mar 2010 15:41:48 -0500 Subject: move ActivityObject constructors from Activity to ActivityObject --- lib/activity.php | 160 +++++++++++++++++++++++++++---------------------------- 1 file changed, 80 insertions(+), 80 deletions(-) (limited to 'lib') diff --git a/lib/activity.php b/lib/activity.php index 8546b474d..dcd079c7a 100644 --- a/lib/activity.php +++ b/lib/activity.php @@ -741,6 +741,83 @@ class ActivityObject } } + public static function fromRssAuthor($el) + { + $text = $el->textContent; + + if (preg_match('/^(.*?) \((.*)\)$/', $text, $match)) { + $email = $match[1]; + $name = $match[2]; + } else if (preg_match('/^(.*?) <(.*)>$/', $text, $match)) { + $name = $match[1]; + $email = $match[2]; + } else if (preg_match('/.*@.*/', $text)) { + $email = $text; + $name = null; + } else { + $name = $text; + $email = null; + } + + // Not really enough info + + $obj = new ActivityObject(); + + $obj->element = $el; + + $obj->type = ActivityObject::PERSON; + $obj->title = $name; + + if (!empty($email)) { + $obj->id = 'mailto:'.$email; + } + + return $obj; + } + + public static function fromDcCreator($el) + { + // Not really enough info + + $text = $el->textContent; + + $obj = new ActivityObject(); + + $obj->element = $el; + + $obj->title = $text; + $obj->type = ActivityObject::PERSON; + + return $obj; + } + + public static function fromRssChannel($el) + { + $obj = new ActivityObject(); + + $obj->element = $el; + + $obj->type = ActivityObject::PERSON; // @fixme guess better + + $obj->title = ActivityUtils::childContent($el, ActivityObject::TITLE, self::RSS); + $obj->link = ActivityUtils::childContent($el, ActivityUtils::LINK, self::RSS); + $obj->id = ActivityUtils::getLink($el, self::SELF); + + $desc = ActivityUtils::childContent($el, self::DESCRIPTION, self::RSS); + + if (!empty($desc)) { + $obj->content = htmlspecialchars_decode($desc, ENT_QUOTES); + } + + $imageEl = ActivityUtils::child($el, self::IMAGE, self::RSS); + + if (!empty($imageEl)) { + $obj->avatarLinks[] = ActivityUtils::childContent($imageEl, self::URL, self::RSS); + } + + return $obj; + } + private function _childContent($element, $tag, $namespace=ActivityUtils::ATOM) { return ActivityUtils::childContent($element, $tag, $namespace); @@ -1268,13 +1345,13 @@ class Activity $authorEl = $this->_child($item, self::AUTHOR, self::RSS); if (!empty($authorEl)) { - $this->actor = $this->_fromRssAuthor($authorEl); + $this->actor = ActivityObject::fromRssAuthor($authorEl); } else { $dcCreatorEl = $this->_child($item, self::CREATOR, self::DC); if (!empty($dcCreatorEl)) { - $this->actor = $this->_fromDcCreator($dcCreatorEl); + $this->actor = ActivityObject::fromDcCreator($dcCreatorEl); } else if (!empty($rss)) { - $this->actor = $this->_fromRss($rss); + $this->actor = ActivityObject::fromRssChannel($rss); } } @@ -1383,83 +1460,6 @@ class Activity return $xs->getString(); } - function _fromRssAuthor($el) - { - $text = $el->textContent; - - if (preg_match('/^(.*?) \((.*)\)$/', $text, $match)) { - $email = $match[1]; - $name = $match[2]; - } else if (preg_match('/^(.*?) <(.*)>$/', $text, $match)) { - $name = $match[1]; - $email = $match[2]; - } else if (preg_match('/.*@.*/', $text)) { - $email = $text; - $name = null; - } else { - $name = $text; - $email = null; - } - - // Not really enough info - - $actor = new ActivityObject(); - - $actor->element = $el; - - $actor->type = ActivityObject::PERSON; - $actor->title = $name; - - if (!empty($email)) { - $actor->id = 'mailto:'.$email; - } - - return $actor; - } - - function _fromDcCreator($el) - { - // Not really enough info - - $text = $el->textContent; - - $actor = new ActivityObject(); - - $actor->element = $el; - - $actor->title = $text; - $actor->type = ActivityObject::PERSON; - - return $actor; - } - - function _fromRss($el) - { - $actor = new ActivityObject(); - - $actor->element = $el; - - $actor->type = ActivityObject::PERSON; // @fixme guess better - - $actor->title = ActivityUtils::childContent($el, ActivityObject::TITLE, self::RSS); - $actor->link = ActivityUtils::childContent($el, ActivityUtils::LINK, self::RSS); - $actor->id = ActivityUtils::getLink($el, self::SELF); - - $desc = ActivityUtils::childContent($el, self::DESCRIPTION, self::RSS); - - if (!empty($desc)) { - $actor->content = htmlspecialchars_decode($desc, ENT_QUOTES); - } - - $imageEl = ActivityUtils::child($el, self::IMAGE, self::RSS); - - if (!empty($imageEl)) { - $actor->avatarLinks[] = ActivityUtils::childContent($imageEl, self::URL, self::RSS); - } - - return $actor; - } - private function _child($element, $tag, $namespace=self::SPEC) { return ActivityUtils::child($element, $tag, $namespace); -- cgit v1.2.3-54-g00ecf