diff options
author | Zach Copley <zach@status.net> | 2010-03-18 17:11:06 -0700 |
---|---|---|
committer | Zach Copley <zach@status.net> | 2010-03-18 17:11:06 -0700 |
commit | a6c9445f0de1a74ffc81dda12eaba6ee36410716 (patch) | |
tree | 873d6a67626ef324c0102153c6a027291767b878 /plugins/OStatus/lib/discoveryhints.php | |
parent | 051bee988caa4a8d3648fae9f963cb35d26c13c5 (diff) | |
parent | 1301877dfe89c57c182246c0d7ba0ff6335fd17b (diff) |
Merge branch 'testing' of gitorious.org:statusnet/mainline into testing
* 'testing' of gitorious.org:statusnet/mainline:
OStatus discover fixes:
Remove xpm support (no one really uses it, and IMAGETYPE_XPM is undefined, causing warnings)
Fix notice warning about unused var -- was renamed during refactoring.
Diffstat (limited to 'plugins/OStatus/lib/discoveryhints.php')
-rw-r--r-- | plugins/OStatus/lib/discoveryhints.php | 91 |
1 files changed, 59 insertions, 32 deletions
diff --git a/plugins/OStatus/lib/discoveryhints.php b/plugins/OStatus/lib/discoveryhints.php index db13793dd..4da2ec0f1 100644 --- a/plugins/OStatus/lib/discoveryhints.php +++ b/plugins/OStatus/lib/discoveryhints.php @@ -65,17 +65,22 @@ class DiscoveryHints { { common_debug("starting tidy"); - $body = self::_tidy($body); + $body = self::_tidy($body, $url); common_debug("done with tidy"); set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/'); require_once('hkit.class.php'); - $h = new hKit; + // hKit code is not clean for notices and warnings + $old = error_reporting(); + error_reporting($old & ~E_NOTICE & ~E_WARNING); + $h = new hKit; $hcards = $h->getByString('hcard', $body); + error_reporting($old); + if (empty($hcards)) { return array(); } @@ -144,39 +149,61 @@ class DiscoveryHints { return $hints; } - private static function _tidy($body) + /** + * hKit needs well-formed XML for its parsing. + * We'll take the HTML body here and normalize it to XML. + * + * @param string $body HTML document source, possibly not-well-formed + * @param string $url source URL + * @return string well-formed XML document source + * @throws Exception if HTML parsing failed. + */ + private static function _tidy($body, $url) { - if (function_exists('tidy_parse_string')) { - common_debug("Tidying with extension"); - $text = tidy_parse_string($body); - $text = tidy_clean_repair($text); - return $body; - } else if ($fullpath = self::_findProgram('tidy')) { - common_debug("Tidying with program $fullpath"); - $tempfile = tempnam('/tmp', 'snht'); // statusnet hcard tidy - file_put_contents($tempfile, $source); - exec("$fullpath -utf8 -indent -asxhtml -numeric -bare -quiet $tempfile", $tidy); - unlink($tempfile); - return implode("\n", $tidy); - } else { - common_debug("Not tidying."); - return $body; + if (empty($body)) { + throw new Exception("Empty HTML could not be parsed."); } - } - - private static function _findProgram($name) - { - $path = $_ENV['PATH']; - - $parts = explode(':', $path); - - foreach ($parts as $part) { - $fullpath = $part . '/' . $name; - if (is_executable($fullpath)) { - return $fullpath; + $dom = new DOMDocument(); + + // Some HTML errors will trigger warnings, but still work. + $old = error_reporting(); + error_reporting($old & ~E_WARNING); + + $ok = $dom->loadHTML($body); + + error_reporting($old); + + if ($ok) { + // hKit doesn't give us a chance to pass the source URL for + // resolving relative links, such as the avatar photo on a + // Google profile. We'll slip it into a <base> tag if there's + // not already one present. + $bases = $dom->getElementsByTagName('base'); + if ($bases && $bases->length >= 1) { + $base = $bases->item(0); + if ($base->hasAttribute('href')) { + $base->setAttribute('href', $url); + } + } else { + $base = $dom->createElement('base'); + $base->setAttribute('href', $url); + $heads = $dom->getElementsByTagName('head'); + if ($heads || $heads->length) { + $head = $heads->item(0); + } else { + $head = $dom->createElement('head'); + $root = $dom->documentRoot; + if ($root->firstChild) { + $root->insertBefore($head, $root->firstChild); + } else { + $root->appendChild($head); + } + } + $head->appendChild($base); } + return $dom->saveXML(); + } else { + throw new Exception("Invalid HTML could not be parsed."); } - - return null; } } |