diff options
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/OStatus/lib/discoveryhints.php | 91 | ||||
-rw-r--r-- | plugins/OStatus/lib/linkheader.php | 24 |
2 files changed, 71 insertions, 44 deletions
diff --git a/plugins/OStatus/lib/discoveryhints.php b/plugins/OStatus/lib/discoveryhints.php index db13793dd..4da2ec0f1 100644 --- a/plugins/OStatus/lib/discoveryhints.php +++ b/plugins/OStatus/lib/discoveryhints.php @@ -65,17 +65,22 @@ class DiscoveryHints { { common_debug("starting tidy"); - $body = self::_tidy($body); + $body = self::_tidy($body, $url); common_debug("done with tidy"); set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/'); require_once('hkit.class.php'); - $h = new hKit; + // hKit code is not clean for notices and warnings + $old = error_reporting(); + error_reporting($old & ~E_NOTICE & ~E_WARNING); + $h = new hKit; $hcards = $h->getByString('hcard', $body); + error_reporting($old); + if (empty($hcards)) { return array(); } @@ -144,39 +149,61 @@ class DiscoveryHints { return $hints; } - private static function _tidy($body) + /** + * hKit needs well-formed XML for its parsing. + * We'll take the HTML body here and normalize it to XML. + * + * @param string $body HTML document source, possibly not-well-formed + * @param string $url source URL + * @return string well-formed XML document source + * @throws Exception if HTML parsing failed. + */ + private static function _tidy($body, $url) { - if (function_exists('tidy_parse_string')) { - common_debug("Tidying with extension"); - $text = tidy_parse_string($body); - $text = tidy_clean_repair($text); - return $body; - } else if ($fullpath = self::_findProgram('tidy')) { - common_debug("Tidying with program $fullpath"); - $tempfile = tempnam('/tmp', 'snht'); // statusnet hcard tidy - file_put_contents($tempfile, $source); - exec("$fullpath -utf8 -indent -asxhtml -numeric -bare -quiet $tempfile", $tidy); - unlink($tempfile); - return implode("\n", $tidy); - } else { - common_debug("Not tidying."); - return $body; + if (empty($body)) { + throw new Exception("Empty HTML could not be parsed."); } - } - - private static function _findProgram($name) - { - $path = $_ENV['PATH']; - - $parts = explode(':', $path); - - foreach ($parts as $part) { - $fullpath = $part . '/' . $name; - if (is_executable($fullpath)) { - return $fullpath; + $dom = new DOMDocument(); + + // Some HTML errors will trigger warnings, but still work. + $old = error_reporting(); + error_reporting($old & ~E_WARNING); + + $ok = $dom->loadHTML($body); + + error_reporting($old); + + if ($ok) { + // hKit doesn't give us a chance to pass the source URL for + // resolving relative links, such as the avatar photo on a + // Google profile. We'll slip it into a <base> tag if there's + // not already one present. + $bases = $dom->getElementsByTagName('base'); + if ($bases && $bases->length >= 1) { + $base = $bases->item(0); + if ($base->hasAttribute('href')) { + $base->setAttribute('href', $url); + } + } else { + $base = $dom->createElement('base'); + $base->setAttribute('href', $url); + $heads = $dom->getElementsByTagName('head'); + if ($heads || $heads->length) { + $head = $heads->item(0); + } else { + $head = $dom->createElement('head'); + $root = $dom->documentRoot; + if ($root->firstChild) { + $root->insertBefore($head, $root->firstChild); + } else { + $root->appendChild($head); + } + } + $head->appendChild($base); } + return $dom->saveXML(); + } else { + throw new Exception("Invalid HTML could not be parsed."); } - - return null; } } diff --git a/plugins/OStatus/lib/linkheader.php b/plugins/OStatus/lib/linkheader.php index 2f6c66dc9..afcd66d26 100644 --- a/plugins/OStatus/lib/linkheader.php +++ b/plugins/OStatus/lib/linkheader.php @@ -43,21 +43,21 @@ class LinkHeader static function getLink($response, $rel=null, $type=null) { $headers = $response->getHeader('Link'); + if ($headers) { + // Can get an array or string, so try to simplify the path + if (!is_array($headers)) { + $headers = array($headers); + } - // Can get an array or string, so try to simplify the path - if (!is_array($headers)) { - $headers = array($headers); - } - - foreach ($headers as $header) { - $lh = new LinkHeader($header); + foreach ($headers as $header) { + $lh = new LinkHeader($header); - if ((is_null($rel) || $lh->rel == $rel) && - (is_null($type) || $lh->type == $type)) { - return $lh->href; + if ((is_null($rel) || $lh->rel == $rel) && + (is_null($type) || $lh->type == $type)) { + return $lh->href; + } } } - return null; } -}
\ No newline at end of file +} |