diff options
Diffstat (limited to 'plugins/OStatus/lib/discoveryhints.php')
-rw-r--r-- | plugins/OStatus/lib/discoveryhints.php | 229 |
1 files changed, 134 insertions, 95 deletions
diff --git a/plugins/OStatus/lib/discoveryhints.php b/plugins/OStatus/lib/discoveryhints.php index 4da2ec0f1..1bb0ad2ae 100644 --- a/plugins/OStatus/lib/discoveryhints.php +++ b/plugins/OStatus/lib/discoveryhints.php @@ -63,54 +63,12 @@ class DiscoveryHints { static function hcardHints($body, $url) { - common_debug("starting tidy"); - - $body = self::_tidy($body, $url); - - common_debug("done with tidy"); - - set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/'); - require_once('hkit.class.php'); - - // hKit code is not clean for notices and warnings - $old = error_reporting(); - error_reporting($old & ~E_NOTICE & ~E_WARNING); - - $h = new hKit; - $hcards = $h->getByString('hcard', $body); - - error_reporting($old); - - if (empty($hcards)) { - return array(); - } - - if (count($hcards) == 1) { - $hcard = $hcards[0]; - } else { - foreach ($hcards as $try) { - if (array_key_exists('url', $try)) { - if (is_string($try['url']) && $try['url'] == $url) { - $hcard = $try; - break; - } else if (is_array($try['url'])) { - foreach ($try['url'] as $tryurl) { - if ($tryurl == $url) { - $hcard = $try; - break 2; - } - } - } - } - } - // last chance; grab the first one - if (empty($hcard)) { - $hcard = $hcards[0]; - } - } + $hcard = self::_hcard($body, $url); $hints = array(); + // XXX: don't copy stuff into an array and then copy it again + if (array_key_exists('nickname', $hcard)) { $hints['nickname'] = $hcard['nickname']; } @@ -122,7 +80,7 @@ class DiscoveryHints { } if (array_key_exists('photo', $hcard)) { - $hints['avatar'] = $hcard['photo']; + $hints['avatar'] = $hcard['photo'][0]; } if (array_key_exists('note', $hcard)) { @@ -149,61 +107,142 @@ class DiscoveryHints { return $hints; } - /** - * hKit needs well-formed XML for its parsing. - * We'll take the HTML body here and normalize it to XML. - * - * @param string $body HTML document source, possibly not-well-formed - * @param string $url source URL - * @return string well-formed XML document source - * @throws Exception if HTML parsing failed. - */ - private static function _tidy($body, $url) + static function _hcard($body, $url) { - if (empty($body)) { - throw new Exception("Empty HTML could not be parsed."); - } - $dom = new DOMDocument(); + // DOMDocument::loadHTML may throw warnings on unrecognized elements. + + $old = error_reporting(error_reporting() & ~E_WARNING); - // Some HTML errors will trigger warnings, but still work. - $old = error_reporting(); - error_reporting($old & ~E_WARNING); - - $ok = $dom->loadHTML($body); + $doc = new DOMDocument(); + $doc->loadHTML($body); error_reporting($old); - - if ($ok) { - // hKit doesn't give us a chance to pass the source URL for - // resolving relative links, such as the avatar photo on a - // Google profile. We'll slip it into a <base> tag if there's - // not already one present. - $bases = $dom->getElementsByTagName('base'); - if ($bases && $bases->length >= 1) { - $base = $bases->item(0); - if ($base->hasAttribute('href')) { - $base->setAttribute('href', $url); - } - } else { - $base = $dom->createElement('base'); - $base->setAttribute('href', $url); - $heads = $dom->getElementsByTagName('head'); - if ($heads || $heads->length) { - $head = $heads->item(0); - } else { - $head = $dom->createElement('head'); - $root = $dom->documentRoot; - if ($root->firstChild) { - $root->insertBefore($head, $root->firstChild); - } else { - $root->appendChild($head); - } - } - $head->appendChild($base); + + $xp = new DOMXPath($doc); + + $hcardNodes = self::_getChildrenByClass($doc->documentElement, 'vcard', $xp); + + $hcards = array(); + + for ($i = 0; $i < $hcardNodes->length; $i++) { + + $hcardNode = $hcardNodes->item($i); + + $hcard = self::_hcardFromNode($hcardNode, $xp, $url); + + $hcards[] = $hcard; + } + + $repr = null; + + foreach ($hcards as $hcard) { + if (in_array($url, $hcard['url'])) { + $repr = $hcard; + break; } - return $dom->saveXML(); + } + + if (!is_null($repr)) { + return $repr; + } else if (count($hcards) > 0) { + return $hcards[0]; } else { - throw new Exception("Invalid HTML could not be parsed."); + return null; } } + + function _getChildrenByClass($el, $cls, $xp) + { + // borrowed from hkit. Thanks dudes! + + $qry = ".//*[contains(concat(' ',normalize-space(@class),' '),' $cls ')]"; + + $nodes = $xp->query($qry, $el); + + return $nodes; + } + + function _hcardFromNode($hcardNode, $xp, $base) + { + $hcard = array(); + + $hcard['url'] = array(); + + $urlNodes = self::_getChildrenByClass($hcardNode, 'url', $xp); + + for ($j = 0; $j < $urlNodes->length; $j++) { + + $urlNode = $urlNodes->item($j); + + if ($urlNode->hasAttribute('href')) { + $url = $urlNode->getAttribute('href'); + } else { + $url = $urlNode->textContent; + } + + $hcard['url'][] = self::_rel2abs($url, $base); + } + + $hcard['photo'] = array(); + + $photoNodes = self::_getChildrenByClass($hcardNode, 'photo', $xp); + + for ($j = 0; $j < $photoNodes->length; $j++) { + $photoNode = $photoNodes->item($j); + if ($photoNode->hasAttribute('src')) { + $url = $photoNode->getAttribute('src'); + } else if ($photoNode->hasAttribute('href')) { + $url = $photoNode->getAttribute('href'); + } else { + $url = $photoNode->textContent; + } + $hcard['photo'][] = self::_rel2abs($url, $base); + } + + $singles = array('nickname', 'note', 'fn', 'n', 'adr'); + + foreach ($singles as $single) { + + $nodes = self::_getChildrenByClass($hcardNode, $single, $xp); + + if ($nodes->length > 0) { + $node = $nodes->item(0); + $hcard[$single] = $node->textContent; + } + } + + return $hcard; + } + + // XXX: this is a first pass; we probably need + // to handle things like ../ and ./ and so on + + static function _rel2abs($rel, $wrt) + { + $parts = parse_url($rel); + + if ($parts === false) { + return false; + } + + // If it's got a scheme, use it + + if ($parts['scheme'] != '') { + return $rel; + } + + $w = parse_url($wrt); + + $base = $w['scheme'].'://'.$w['host']; + + if ($rel[0] == '/') { + return $base.$rel; + } + + $wp = explode('/', $w['path']); + + array_pop($wp); + + return $base.implode('/', $wp).'/'.$rel; + } } |