diff options
author | Brion Vibber <brion@pobox.com> | 2010-11-15 11:25:38 -0800 |
---|---|---|
committer | Brion Vibber <brion@pobox.com> | 2010-11-15 11:25:38 -0800 |
commit | c8445299c71c17e8f652584a05aac65ef7b55ac5 (patch) | |
tree | 27b9f0d0ce4b2c2e0ef36272cf776860c0a865c5 | |
parent | 1c90d09ec8bfbbbad80c74c3feb5bf70fd76d926 (diff) |
Swap the Services_oEmbed wrapper in oEmbedHelper out for doing it ourselves...
- workaround for providers that are skimpy on their data, such as missing width/height or thumbnail_width/thumbnail_height
- workaround for YFrog listing "image" instead of "photo" type
- generally more lax about formatting: if it comes back and looks kinda ok, we'll take it.
- discovery uses system HTML parser, should be more robust if the links include things like ampersands with proper HTML-level escaping
-rw-r--r-- | lib/oembedhelper.php | 173 |
1 files changed, 125 insertions, 48 deletions
diff --git a/lib/oembedhelper.php b/lib/oembedhelper.php index ef2b59214..0ffea468b 100644 --- a/lib/oembedhelper.php +++ b/lib/oembedhelper.php @@ -20,11 +20,10 @@ if (!defined('STATUSNET')) { exit(1); } -require_once INSTALLDIR.'/extlib/Services/oEmbed.php'; /** - * Utility class to wrap Services_oEmbed: + * Utility class to wrap basic oEmbed lookups. * * Blacklisted hosts will use an alternate lookup method: * - Twitpic @@ -89,43 +88,134 @@ class oEmbedHelper $api = self::$apiMap[$host]; common_log(LOG_INFO, 'QQQ: going to: ' . $api); } else { - $api = false; common_log(LOG_INFO, 'QQQ: no map for ' . $host); + try { + $api = self::discover($url); + } catch (Exception $e) { + // Discovery failed... fall back to oohembed if enabled. + $oohembed = common_config('oohembed', 'endpoint'); + if ($oohembed) { + $api = $oohembed; + } else { + throw $e; + } + } + common_log(LOG_INFO, 'QQQ: going to: ' . $api); } return self::getObjectFrom($api, $url, $params); } /** - * Actually do an oEmbed lookup to a particular API endpoint, - * or to the autodiscovered target, or to oohembed. + * Perform basic discovery. + * @return string + */ + static function discover($url) + { + // @fixme ideally skip this for non-HTML stuff! + $body = self::http($url); + return self::discoverFromHTML($url, $body); + } + + /** + * Partially ripped from OStatus' FeedDiscovery class. * - * @param mixed $api string or false: oEmbed API endpoint URL + * @param string $url source URL, used to resolve relative links + * @param string $body HTML body text + * @return mixed string with URL or false if no target found + */ + static function discoverFromHTML($url, $body) + { + // DOMDocument::loadHTML may throw warnings on unrecognized elements, + // and notices on unrecognized namespaces. + $old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE)); + $dom = new DOMDocument(); + $ok = $dom->loadHTML($body); + error_reporting($old); + + if (!$ok) { + throw new oEmbedHelper_BadHtmlException(); + } + + // Ok... now on to the links! + $feeds = array( + 'application/json+oembed' => false, + ); + + $nodes = $dom->getElementsByTagName('link'); + for ($i = 0; $i < $nodes->length; $i++) { + $node = $nodes->item($i); + if ($node->hasAttributes()) { + $rel = $node->attributes->getNamedItem('rel'); + $type = $node->attributes->getNamedItem('type'); + $href = $node->attributes->getNamedItem('href'); + if ($rel && $type && $href) { + $rel = array_filter(explode(" ", $rel->value)); + $type = trim($type->value); + $href = trim($href->value); + + if (in_array('alternate', $rel) && array_key_exists($type, $feeds) && empty($feeds[$type])) { + // Save the first feed found of each type... + $feeds[$type] = $href; + } + } + } + } + + // Return the highest-priority feed found + foreach ($feeds as $type => $url) { + if ($url) { + return $url; + } + } + + return false; + throw new oEmbedHelper_DiscoveryException(); + } + + /** + * Actually do an oEmbed lookup to a particular API endpoint. + * + * @param string $api oEmbed API endpoint URL * @param string $url target URL to look up info about * @param array $params * @return object */ - static protected function getObjectFrom($api, $url, $params=array()) + static function getObjectFrom($api, $url, $params=array()) { - $options = array(); - if ($api) { - $options[Services_oEmbed::OPTION_API] = $api; + $params['url'] = $url; + $params['format'] = 'json'; + $data = self::json($api, $params); + return self::normalize($data); + } + + /** + * Normalize oEmbed format. + * + * @param object $orig + * @return object + */ + static function normalize($orig) + { + $data = clone($orig); + + if (empty($data->type)) { + throw new Exception('Invalid oEmbed data: no type field.'); } - try { - $oEmbed = new Services_oEmbed_Tweaked($url, $options); - } catch (Services_oEmbed_Exception_NoSupport $e) { - // Discovery failed... fall back to oohembed if enabled. - $oohembed = common_config('oohembed', 'endpoint'); - if ($oohembed) { - $options[Services_oEmbed::OPTION_API] = $oohembed; - $oEmbed = new Services_oEmbed_Tweaked($url, $options); - } else { - throw $e; + if ($data->type == 'image') { + // YFrog does this. + $data->type = 'photo'; + } + + if (isset($data->thumbnail_url)) { + if (!isset($data->thumbnail_width)) { + // !?!?! + $data->thumbnail_width = common_config('attachments', 'thumb_width'); + $data->thumbnail_height = common_config('attachments', 'thumb_height'); } } - // And.... let's go look it up! - return $oEmbed->getObject($params); + return $data; } /** @@ -212,35 +302,22 @@ class oEmbedHelper } } -class Services_oEmbed_Tweaked extends Services_oEmbed +class oEmbedHelper_Exception extends Exception +{ +} + +class oEmbedHelper_BadHtmlException extends oEmbedHelper_Exception { - protected function discover($url) + function __construct($previous=null) { - $api = parent::discover($url); - if (strpos($api, '?') !== false) { - // Services_oEmbed doesn't expect to find existing params - // on its API endpoint, which may surprise you since the - // spec says discovery URLs should include parameters... :) - // - // Appending a '&' on the end keeps the later-appended '?' - // from breaking whatever the first parameters was. - return $api . '&'; - } - return $api; + return parent::__construct('Bad HTML in discovery data.', 0, $previous); } +} - public function getObject(array $params = array()) +class oEmbedHelper_DiscoveryException extends oEmbedHelper_Exception +{ + function __construct($previous=null) { - $api = $this->options[self::OPTION_API]; - if (strpos($api, '?') !== false) { - // The Services_oEmbed code appends a '?' on the end, which breaks - // the next parameter which may be something important like - // maxwidth. - // - // Sticking this bogus entry into our parameters gets us past it. - $params = array_merge(array('statusnet' => 1), $params); - } - return parent::getObject($params); + return parent::__construct('No oEmbed discovery data.', 0, $previous); } - -}
\ No newline at end of file +} |