From dc09453a77f33c4dfdff306321ce93cf5fbd2d57 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 8 Feb 2010 11:06:03 -0800 Subject: First steps on converting FeedSub into the pub/sub basis for OStatus communications: * renamed FeedSub plugin to OStatus * now setting avatar on subscriptions * general fixes for subscription * integrated PuSH hub to handle only user timelines on canonical ID url; sends updates directly * set $config['feedsub']['nohub'] = true to test w/ foreign feeds that don't have hubs (won't actually receive updates though) * a few bits of code documentation * HMAC support for verified distributions (safest if sub setup is on HTTPS) And a couple core changes: * minimizing HTML output for exceptions in API requests to aid in debugging * fix for rel=self link in apitimelineuser when id given This does not not yet include any of the individual subscription management (Salmon notifications for sub/unsub, etc) nor a nice UI for user subscriptions. Needs some further cleanup to treat posts as status updates instead of link references. --- plugins/OStatus/lib/feeddiscovery.php | 221 ++++++++++++++++++++ plugins/OStatus/lib/feedmunger.php | 270 +++++++++++++++++++++++++ plugins/OStatus/lib/hubdistribqueuehandler.php | 87 ++++++++ plugins/OStatus/lib/huboutqueuehandler.php | 52 +++++ plugins/OStatus/lib/hubverifyqueuehandler.php | 53 +++++ 5 files changed, 683 insertions(+) create mode 100644 plugins/OStatus/lib/feeddiscovery.php create mode 100644 plugins/OStatus/lib/feedmunger.php create mode 100644 plugins/OStatus/lib/hubdistribqueuehandler.php create mode 100644 plugins/OStatus/lib/huboutqueuehandler.php create mode 100644 plugins/OStatus/lib/hubverifyqueuehandler.php (limited to 'plugins/OStatus/lib') diff --git a/plugins/OStatus/lib/feeddiscovery.php b/plugins/OStatus/lib/feeddiscovery.php new file mode 100644 index 000000000..9bc7892fb --- /dev/null +++ b/plugins/OStatus/lib/feeddiscovery.php @@ -0,0 +1,221 @@ +. + */ + +/** + * @package FeedSubPlugin + * @maintainer Brion Vibber + */ + +if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } + +class FeedSubBadURLException extends FeedSubException +{ +} + +class FeedSubBadResponseException extends FeedSubException +{ +} + +class FeedSubEmptyException extends FeedSubException +{ +} + +class FeedSubBadHTMLException extends FeedSubException +{ +} + +class FeedSubUnrecognizedTypeException extends FeedSubException +{ +} + +class FeedSubNoFeedException extends FeedSubException +{ +} + +/** + * Given a web page or feed URL, discover the final location of the feed + * and return its current contents. + * + * @example + * $feed = new FeedDiscovery(); + * if ($feed->discoverFromURL($url)) { + * print $feed->uri; + * print $feed->type; + * processFeed($feed->body); + * } + */ +class FeedDiscovery +{ + public $uri; + public $type; + public $body; + + + public function feedMunger() + { + require_once 'XML/Feed/Parser.php'; + $feed = new XML_Feed_Parser($this->body, false, false, true); // @fixme + return new FeedMunger($feed, $this->uri); + } + + /** + * @param string $url + * @param bool $htmlOk pass false here if you don't want to follow web pages. + * @return string with validated URL + * @throws FeedSubBadURLException + * @throws FeedSubBadHtmlException + * @throws FeedSubNoFeedException + * @throws FeedSubEmptyException + * @throws FeedSubUnrecognizedTypeException + */ + function discoverFromURL($url, $htmlOk=true) + { + try { + $client = new HTTPClient(); + $response = $client->get($url); + } catch (HTTP_Request2_Exception $e) { + throw new FeedSubBadURLException($e); + } + + if ($htmlOk) { + $type = $response->getHeader('Content-Type'); + $isHtml = preg_match('!^(text/html|application/xhtml\+xml)!i', $type); + if ($isHtml) { + $target = $this->discoverFromHTML($response->getUrl(), $response->getBody()); + if (!$target) { + throw new FeedSubNoFeedException($url); + } + return $this->discoverFromURL($target, false); + } + } + + return $this->initFromResponse($response); + } + + function initFromResponse($response) + { + if (!$response->isOk()) { + throw new FeedSubBadResponseException($response->getCode()); + } + + $sourceurl = $response->getUrl(); + $body = $response->getBody(); + if (!$body) { + throw new FeedSubEmptyException($sourceurl); + } + + $type = $response->getHeader('Content-Type'); + if (preg_match('!^(text/xml|application/xml|application/(rss|atom)\+xml)!i', $type)) { + $this->uri = $sourceurl; + $this->type = $type; + $this->body = $body; + return true; + } else { + common_log(LOG_WARNING, "Unrecognized feed type $type for $sourceurl"); + throw new FeedSubUnrecognizedTypeException($type); + } + } + + /** + * @param string $url source URL, used to resolve relative links + * @param string $body HTML body text + * @return mixed string with URL or false if no target found + */ + function discoverFromHTML($url, $body) + { + // DOMDocument::loadHTML may throw warnings on unrecognized elements. + $old = error_reporting(error_reporting() & ~E_WARNING); + $dom = new DOMDocument(); + $ok = $dom->loadHTML($body); + error_reporting($old); + + if (!$ok) { + throw new FeedSubBadHtmlException(); + } + + // Autodiscovery links may be relative to the page's URL or + $base = false; + $nodes = $dom->getElementsByTagName('base'); + for ($i = 0; $i < $nodes->length; $i++) { + $node = $nodes->item($i); + if ($node->hasAttributes()) { + $href = $node->attributes->getNamedItem('href'); + if ($href) { + $base = trim($href->value); + } + } + } + if ($base) { + $base = $this->resolveURI($base, $url); + } else { + $base = $url; + } + + // Ok... now on to the links! + // @fixme merge with the munger link checks + $nodes = $dom->getElementsByTagName('link'); + for ($i = 0; $i < $nodes->length; $i++) { + $node = $nodes->item($i); + if ($node->hasAttributes()) { + $rel = $node->attributes->getNamedItem('rel'); + $type = $node->attributes->getNamedItem('type'); + $href = $node->attributes->getNamedItem('href'); + if ($rel && $type && $href) { + $rel = trim($rel->value); + $type = trim($type->value); + $href = trim($href->value); + + $feedTypes = array( + 'application/rss+xml', + 'application/atom+xml', + ); + if (trim($rel) == 'alternate' && in_array($type, $feedTypes)) { + return $this->resolveURI($href, $base); + } + } + } + } + + return false; + } + + /** + * Resolve a possibly relative URL against some absolute base URL + * @param string $rel relative or absolute URL + * @param string $base absolute URL + * @return string absolute URL, or original URL if could not be resolved. + */ + function resolveURI($rel, $base) + { + require_once "Net/URL2.php"; + try { + $relUrl = new Net_URL2($rel); + if ($relUrl->isAbsolute()) { + return $rel; + } + $baseUrl = new Net_URL2($base); + $absUrl = $baseUrl->resolve($relUrl); + return $absUrl->getURL(); + } catch (Exception $e) { + common_log(LOG_WARNING, 'Unable to resolve relative link "' . + $rel . '" against base "' . $base . '": ' . $e->getMessage()); + return $rel; + } + } +} diff --git a/plugins/OStatus/lib/feedmunger.php b/plugins/OStatus/lib/feedmunger.php new file mode 100644 index 000000000..eeb8d2df3 --- /dev/null +++ b/plugins/OStatus/lib/feedmunger.php @@ -0,0 +1,270 @@ +. + */ + +/** + * @package FeedSubPlugin + * @maintainer Brion Vibber + */ + +if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } + +class FeedSubPreviewNotice extends Notice +{ + protected $fetched = true; + + function __construct($profile) + { + $this->profile = $profile; + $this->profile_id = 0; + } + + function getProfile() + { + return $this->profile; + } + + function find() + { + return true; + } + + function fetch() + { + $got = $this->fetched; + $this->fetched = false; + return $got; + } +} + +class FeedSubPreviewProfile extends Profile +{ + function getAvatar($width, $height=null) + { + return new FeedSubPreviewAvatar($width, $height, $this->avatar); + } +} + +class FeedSubPreviewAvatar extends Avatar +{ + function __construct($width, $height, $remote) + { + $this->remoteImage = $remote; + } + + function displayUrl() { + return $this->remoteImage; + } +} + +class FeedMunger +{ + /** + * @param XML_Feed_Parser $feed + */ + function __construct($feed, $url=null) + { + $this->feed = $feed; + $this->url = $url; + } + + function feedinfo() + { + $feedinfo = new Feedinfo(); + $feedinfo->feeduri = $this->url; + $feedinfo->homeuri = $this->feed->link; + $feedinfo->huburi = $this->getHubLink(); + return $feedinfo; + } + + function getAtomLink($item, $attribs=array()) + { + // XML_Feed_Parser gets confused by multiple elements. + $dom = $item->model; + + // Note that RSS feeds would embed an so this should work for both. + /// http://code.google.com/p/pubsubhubbub/wiki/RssFeeds + // + $links = $dom->getElementsByTagNameNS('http://www.w3.org/2005/Atom', 'link'); + for ($i = 0; $i < $links->length; $i++) { + $node = $links->item($i); + if ($node->hasAttributes()) { + $href = $node->attributes->getNamedItem('href'); + if ($href) { + $matches = 0; + foreach ($attribs as $name => $val) { + $attrib = $node->attributes->getNamedItem($name); + if ($attrib && $attrib->value == $val) { + $matches++; + } + } + if ($matches == count($attribs)) { + return $href->value; + } + } + } + } + return false; + } + + function getRssLink($item) + { + // XML_Feed_Parser gets confused by multiple elements. + $dom = $item->model; + + // Note that RSS feeds would embed an so this should work for both. + /// http://code.google.com/p/pubsubhubbub/wiki/RssFeeds + // + $links = $dom->getElementsByTagName('link'); + for ($i = 0; $i < $links->length; $i++) { + $node = $links->item($i); + if (!$node->hasAttributes()) { + return $node->textContent; + } + } + return false; + } + + function getAltLink($item) + { + // Check for an atom link... + $link = $this->getAtomLink($item, array('rel' => 'alternate', 'type' => 'text/html')); + if (!$link) { + $link = $this->getRssLink($item); + } + return $link; + } + + function getHubLink() + { + return $this->getAtomLink($this->feed, array('rel' => 'hub')); + } + + /** + * Get an appropriate avatar image source URL, if available. + * @return mixed string or false + */ + function getAvatar() + { + $logo = $this->feed->logo; + if ($logo) { + return $logo; + } + $icon = $this->feed->icon; + if ($icon) { + return $icon; + } + return common_path('plugins/OStatus/images/48px-Feed-icon.svg.png'); + } + + function profile($preview=false) + { + if ($preview) { + $profile = new FeedSubPreviewProfile(); + } else { + $profile = new Profile(); + } + + // @todo validate/normalize nick? + $profile->nickname = $this->feed->title; + $profile->fullname = $this->feed->title; + $profile->homepage = $this->getAltLink($this->feed); + $profile->bio = $this->feed->description; + $profile->profileurl = $this->getAltLink($this->feed); + + if ($preview) { + $profile->avatar = $this->getAvatar(); + } + + // @todo tags from categories + // @todo lat/lon/location? + + return $profile; + } + + function notice($index=1, $preview=false) + { + $entry = $this->feed->getEntryByOffset($index); + if (!$entry) { + return null; + } + + if ($preview) { + $notice = new FeedSubPreviewNotice($this->profile(true)); + $notice->id = -1; + } else { + $notice = new Notice(); + } + + $link = $this->getAltLink($entry); + if (empty($link)) { + if (preg_match('!^https?://!', $entry->id)) { + $link = $entry->id; + common_log(LOG_DEBUG, "No link on entry, using URL from id: $link"); + } + } + $notice->uri = $link; + $notice->url = $link; + $notice->content = $this->noticeFromEntry($entry); + $notice->rendered = common_render_content($notice->content, $notice); + $notice->created = common_sql_date($entry->updated); // @fixme + $notice->is_local = Notice::GATEWAY; + $notice->source = 'feed'; + + return $notice; + } + + /** + * @param XML_Feed_Type $entry + * @return string notice text, within post size limit + */ + function noticeFromEntry($entry) + { + $title = $entry->title; + $link = $entry->link; + + // @todo We can get entries like this: + // $cats = $entry->getCategory('category', array(0, true)); + // but it feels like an awful hack. If it's accessible cleanly, + // try adding #hashtags from the categories/tags on a post. + + // @todo Should we force a language here? + $format = _m('New post: "%1$s" %2$s'); + $title = $entry->title; + $link = $this->getAltLink($entry); + $out = sprintf($format, $title, $link); + + // Trim link if needed... + $max = Notice::maxContent(); + if (mb_strlen($out) > $max) { + $link = common_shorten_url($link); + $out = sprintf($format, $title, $link); + } + + // Trim title if needed... + if (mb_strlen($out) > $max) { + $ellipsis = "\xe2\x80\xa6"; // U+2026 HORIZONTAL ELLIPSIS + $used = mb_strlen($out) - mb_strlen($title); + $available = $max - $used - mb_strlen($ellipsis); + $title = mb_substr($title, 0, $available) . $ellipsis; + $out = sprintf($format, $title, $link); + } + + return $out; + } +} diff --git a/plugins/OStatus/lib/hubdistribqueuehandler.php b/plugins/OStatus/lib/hubdistribqueuehandler.php new file mode 100644 index 000000000..126f1355f --- /dev/null +++ b/plugins/OStatus/lib/hubdistribqueuehandler.php @@ -0,0 +1,87 @@ +. + */ + +/** + * Send a PuSH subscription verification from our internal hub. + * Queue up final distribution for + * @package Hub + * @author Brion Vibber + */ +class HubDistribQueueHandler extends QueueHandler +{ + function transport() + { + return 'hubdistrib'; + } + + function handle($notice) + { + assert($notice instanceof Notice); + + // See if there's any PuSH subscriptions, including OStatus clients. + // @fixme handle group subscriptions as well + // http://identi.ca/api/statuses/user_timeline/1.atom + $feed = common_local_url('ApiTimelineUser', + array('id' => $notice->profile_id, + 'format' => 'atom')); + $sub = new HubSub(); + $sub->topic = $feed; + if ($sub->find()) { + common_log(LOG_INFO, "Preparing $sub->N PuSH distribution(s) for $feed"); + $qm = QueueManager::get(); + $atom = $this->userFeedForNotice($notice); + while ($sub->fetch()) { + common_log(LOG_INFO, "Prepping PuSH distribution to $sub->callback for $feed"); + $data = array('sub' => clone($sub), + 'atom' => $atom); + $qm->enqueue($data, 'hubout'); + } + } else { + common_log(LOG_INFO, "No PuSH subscribers for $feed"); + } + } + + /** + * Build a single-item version of the sending user's Atom feed. + * @param Notice $notice + * @return string + */ + function userFeedForNotice($notice) + { + // @fixme this feels VERY hacky... + // should probably be a cleaner way to do it + + ob_start(); + $api = new ApiTimelineUserAction(); + $api->prepare(array('id' => $notice->profile_id, + 'format' => 'atom', + 'max_id' => $notice->id, + 'since_id' => $notice->id - 1)); + $api->showTimeline(); + $feed = ob_get_clean(); + + // ...and override the content-type back to something normal... eww! + // hope there's no other headers that got set while we weren't looking. + header('Content-Type: text/html; charset=utf-8'); + + common_log(LOG_DEBUG, $feed); + return $feed; + } +} + diff --git a/plugins/OStatus/lib/huboutqueuehandler.php b/plugins/OStatus/lib/huboutqueuehandler.php new file mode 100644 index 000000000..cb44ad2c4 --- /dev/null +++ b/plugins/OStatus/lib/huboutqueuehandler.php @@ -0,0 +1,52 @@ +. + */ + +/** + * Send a raw PuSH atom update from our internal hub. + * @package Hub + * @author Brion Vibber + */ +class HubOutQueueHandler extends QueueHandler +{ + function transport() + { + return 'hubout'; + } + + function handle($data) + { + $sub = $data['sub']; + $atom = $data['atom']; + + assert($sub instanceof HubSub); + assert(is_string($atom)); + + try { + $sub->push($atom); + } catch (Exception $e) { + common_log(LOG_ERR, "Failed PuSH to $sub->callback for $sub->topic: " . + $e->getMessage()); + // @fixme Reschedule a later delivery? + // Currently we have no way to do this other than 'send NOW' + } + + return true; + } +} + diff --git a/plugins/OStatus/lib/hubverifyqueuehandler.php b/plugins/OStatus/lib/hubverifyqueuehandler.php new file mode 100644 index 000000000..125d13a77 --- /dev/null +++ b/plugins/OStatus/lib/hubverifyqueuehandler.php @@ -0,0 +1,53 @@ +. + */ + +/** + * Send a PuSH subscription verification from our internal hub. + * @package Hub + * @author Brion Vibber + */ +class HubVerifyQueueHandler extends QueueHandler +{ + function transport() + { + return 'hubverify'; + } + + function handle($data) + { + $sub = $data['sub']; + $mode = $data['mode']; + + assert($sub instanceof HubSub); + assert($mode === 'subscribe' || $mode === 'unsubscribe'); + + common_log(LOG_INFO, __METHOD__ . ": $mode $sub->callback $sub->topic"); + try { + $sub->verify($mode); + } catch (Exception $e) { + common_log(LOG_ERR, "Failed PuSH $mode verify to $sub->callback for $sub->topic: " . + $e->getMessage()); + // @fixme schedule retry? + // @fixme just kill it? + } + + return true; + } +} + -- cgit v1.2.3-54-g00ecf