diff options
author | Brion Vibber <brion@pobox.com> | 2009-11-20 09:42:19 -0800 |
---|---|---|
committer | Brion Vibber <brion@pobox.com> | 2009-11-20 09:42:19 -0800 |
commit | b189c9500c3ea5362edfcbf818c2f847b4892da5 (patch) | |
tree | 760ed684d9595b872ec3258ff338c5cf22e27bf0 /plugins/FeedSub/feeddiscovery.php | |
parent | d645db38ae4ff4179f7fc31d4501d48054c4b8e6 (diff) |
Initial functional version of feed subscription plugin, currently supporting only PuSH-enabled feeds.
Diffstat (limited to 'plugins/FeedSub/feeddiscovery.php')
-rw-r--r-- | plugins/FeedSub/feeddiscovery.php | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/plugins/FeedSub/feeddiscovery.php b/plugins/FeedSub/feeddiscovery.php new file mode 100644 index 000000000..35edaca33 --- /dev/null +++ b/plugins/FeedSub/feeddiscovery.php @@ -0,0 +1,209 @@ +<?php +/* + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2009, StatusNet, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * @package FeedSubPlugin + * @maintainer Brion Vibber <brion@status.net> + */ + +if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } + +class FeedSubBadURLException extends FeedSubException +{ +} + +class FeedSubBadResponseException extends FeedSubException +{ +} + +class FeedSubEmptyException extends FeedSubException +{ +} + +class FeedSubBadHTMLException extends FeedSubException +{ +} + +class FeedSubUnrecognizedTypeException extends FeedSubException +{ +} + +class FeedSubNoFeedException extends FeedSubException +{ +} + +class FeedDiscovery +{ + public $uri; + public $type; + public $body; + + + public function feedMunger() + { + require_once 'XML/Feed/Parser.php'; + $feed = new XML_Feed_Parser($this->body, false, false, true); // @fixme + return new FeedMunger($feed, $this->uri); + } + + /** + * @param string $url + * @param bool $htmlOk + * @return string with validated URL + * @throws FeedSubBadURLException + * @throws FeedSubBadHtmlException + * @throws FeedSubNoFeedException + * @throws FeedSubEmptyException + * @throws FeedSubUnrecognizedTypeException + */ + function discoverFromURL($url, $htmlOk=true) + { + try { + $client = new HTTPClient(); + $response = $client->get($url); + } catch (HTTP_Request2_Exception $e) { + throw new FeedSubBadURLException($e); + } + + if ($htmlOk) { + $type = $response->getHeader('Content-Type'); + $isHtml = preg_match('!^(text/html|application/xhtml\+xml)!i', $type); + if ($isHtml) { + $target = $this->discoverFromHTML($response->getUrl(), $response->getBody()); + if (!$target) { + throw new FeedSubNoFeedException($url); + } + return $this->discoverFromURL($target, false); + } + } + + return $this->initFromResponse($response); + } + + function initFromResponse($response) + { + if (!$response->isOk()) { + throw new FeedSubBadResponseException($response->getCode()); + } + + $sourceurl = $response->getUrl(); + $body = $response->getBody(); + if (!$body) { + throw new FeedSubEmptyException($sourceurl); + } + + $type = $response->getHeader('Content-Type'); + if (preg_match('!^(text/xml|application/xml|application/(rss|atom)\+xml)!i', $type)) { + $this->uri = $sourceurl; + $this->type = $type; + $this->body = $body; + return true; + } else { + common_log(LOG_WARNING, "Unrecognized feed type $type for $sourceurl"); + throw new FeedSubUnrecognizedTypeException($type); + } + } + + /** + * @param string $url source URL, used to resolve relative links + * @param string $body HTML body text + * @return mixed string with URL or false if no target found + */ + function discoverFromHTML($url, $body) + { + // DOMDocument::loadHTML may throw warnings on unrecognized elements. + $old = error_reporting(error_reporting() & ~E_WARNING); + $dom = new DOMDocument(); + $ok = $dom->loadHTML($body); + error_reporting($old); + + if (!$ok) { + throw new FeedSubBadHtmlException(); + } + + // Autodiscovery links may be relative to the page's URL or <base href> + $base = false; + $nodes = $dom->getElementsByTagName('base'); + for ($i = 0; $i < $nodes->length; $i++) { + $node = $nodes->item($i); + if ($node->hasAttributes()) { + $href = $node->attributes->getNamedItem('href'); + if ($href) { + $base = trim($href->value); + } + } + } + if ($base) { + $base = $this->resolveURI($base, $url); + } else { + $base = $url; + } + + // Ok... now on to the links! + // @fixme merge with the munger link checks + $nodes = $dom->getElementsByTagName('link'); + for ($i = 0; $i < $nodes->length; $i++) { + $node = $nodes->item($i); + if ($node->hasAttributes()) { + $rel = $node->attributes->getNamedItem('rel'); + $type = $node->attributes->getNamedItem('type'); + $href = $node->attributes->getNamedItem('href'); + if ($rel && $type && $href) { + $rel = trim($rel->value); + $type = trim($type->value); + $href = trim($href->value); + + $feedTypes = array( + 'application/rss+xml', + 'application/atom+xml', + ); + if (trim($rel) == 'alternate' && in_array($type, $feedTypes)) { + return $this->resolveURI($href, $base); + } + } + } + } + + return false; + } + + /** + * Resolve a possibly relative URL against some absolute base URL + * @param string $rel relative or absolute URL + * @param string $base absolute URL + * @return string absolute URL, or original URL if could not be resolved. + */ + function resolveURI($rel, $base) + { + require_once "Net/URL2.php"; + try { + $relUrl = new Net_URL2($rel); + if ($relUrl->isAbsolute()) { + return $rel; + } + $baseUrl = new Net_URL2($base); + $absUrl = $baseUrl->resolve($relUrl); + return $absUrl->getURL(); + } catch (Exception $e) { + common_log(LOG_WARNING, 'Unable to resolve relative link "' . + $rel . '" against base "' . $base . '": ' . $e->getMessage()); + return $rel; + } + } +} |