summaryrefslogtreecommitdiff
path: root/plugins/OStatus/lib/discoveryhints.php
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/OStatus/lib/discoveryhints.php')
-rw-r--r--plugins/OStatus/lib/discoveryhints.php91
1 files changed, 59 insertions, 32 deletions
diff --git a/plugins/OStatus/lib/discoveryhints.php b/plugins/OStatus/lib/discoveryhints.php
index db13793dd..4da2ec0f1 100644
--- a/plugins/OStatus/lib/discoveryhints.php
+++ b/plugins/OStatus/lib/discoveryhints.php
@@ -65,17 +65,22 @@ class DiscoveryHints {
{
common_debug("starting tidy");
- $body = self::_tidy($body);
+ $body = self::_tidy($body, $url);
common_debug("done with tidy");
set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/');
require_once('hkit.class.php');
- $h = new hKit;
+ // hKit code is not clean for notices and warnings
+ $old = error_reporting();
+ error_reporting($old & ~E_NOTICE & ~E_WARNING);
+ $h = new hKit;
$hcards = $h->getByString('hcard', $body);
+ error_reporting($old);
+
if (empty($hcards)) {
return array();
}
@@ -144,39 +149,61 @@ class DiscoveryHints {
return $hints;
}
- private static function _tidy($body)
+ /**
+ * hKit needs well-formed XML for its parsing.
+ * We'll take the HTML body here and normalize it to XML.
+ *
+ * @param string $body HTML document source, possibly not-well-formed
+ * @param string $url source URL
+ * @return string well-formed XML document source
+ * @throws Exception if HTML parsing failed.
+ */
+ private static function _tidy($body, $url)
{
- if (function_exists('tidy_parse_string')) {
- common_debug("Tidying with extension");
- $text = tidy_parse_string($body);
- $text = tidy_clean_repair($text);
- return $body;
- } else if ($fullpath = self::_findProgram('tidy')) {
- common_debug("Tidying with program $fullpath");
- $tempfile = tempnam('/tmp', 'snht'); // statusnet hcard tidy
- file_put_contents($tempfile, $source);
- exec("$fullpath -utf8 -indent -asxhtml -numeric -bare -quiet $tempfile", $tidy);
- unlink($tempfile);
- return implode("\n", $tidy);
- } else {
- common_debug("Not tidying.");
- return $body;
+ if (empty($body)) {
+ throw new Exception("Empty HTML could not be parsed.");
}
- }
-
- private static function _findProgram($name)
- {
- $path = $_ENV['PATH'];
-
- $parts = explode(':', $path);
-
- foreach ($parts as $part) {
- $fullpath = $part . '/' . $name;
- if (is_executable($fullpath)) {
- return $fullpath;
+ $dom = new DOMDocument();
+
+ // Some HTML errors will trigger warnings, but still work.
+ $old = error_reporting();
+ error_reporting($old & ~E_WARNING);
+
+ $ok = $dom->loadHTML($body);
+
+ error_reporting($old);
+
+ if ($ok) {
+ // hKit doesn't give us a chance to pass the source URL for
+ // resolving relative links, such as the avatar photo on a
+ // Google profile. We'll slip it into a <base> tag if there's
+ // not already one present.
+ $bases = $dom->getElementsByTagName('base');
+ if ($bases && $bases->length >= 1) {
+ $base = $bases->item(0);
+ if ($base->hasAttribute('href')) {
+ $base->setAttribute('href', $url);
+ }
+ } else {
+ $base = $dom->createElement('base');
+ $base->setAttribute('href', $url);
+ $heads = $dom->getElementsByTagName('head');
+ if ($heads || $heads->length) {
+ $head = $heads->item(0);
+ } else {
+ $head = $dom->createElement('head');
+ $root = $dom->documentRoot;
+ if ($root->firstChild) {
+ $root->insertBefore($head, $root->firstChild);
+ } else {
+ $root->appendChild($head);
+ }
+ }
+ $head->appendChild($base);
}
+ return $dom->saveXML();
+ } else {
+ throw new Exception("Invalid HTML could not be parsed.");
}
-
- return null;
}
}