summaryrefslogtreecommitdiff
path: root/plugins/OStatus/lib
diff options
context:
space:
mode:
authorEvan Prodromou <evan@status.net>2010-03-16 11:25:18 -0500
committerEvan Prodromou <evan@status.net>2010-03-16 11:25:18 -0500
commitf21f78364a9cbde2ca535a3983b384707ad097ae (patch)
treea9e0836d7aaa42d7a0f272213d11cbccef9e354b /plugins/OStatus/lib
parentc1e96cbdefa66e66815c421378b9452d7c8d5548 (diff)
Change the workflow to get better discovery
Tried to re-structure the workflow of discovery to get more and richer data and hints.
Diffstat (limited to 'plugins/OStatus/lib')
-rw-r--r--plugins/OStatus/lib/discovery.php78
-rw-r--r--plugins/OStatus/lib/discoveryhints.php182
-rw-r--r--plugins/OStatus/lib/feeddiscovery.php4
-rw-r--r--plugins/OStatus/lib/linkheader.php63
4 files changed, 267 insertions, 60 deletions
diff --git a/plugins/OStatus/lib/discovery.php b/plugins/OStatus/lib/discovery.php
index f8449b309..6d245677a 100644
--- a/plugins/OStatus/lib/discovery.php
+++ b/plugins/OStatus/lib/discovery.php
@@ -40,7 +40,7 @@ class Discovery
const PROFILEPAGE = 'http://webfinger.net/rel/profile-page';
const UPDATESFROM = 'http://schemas.google.com/g/2010#updates-from';
const HCARD = 'http://microformats.org/profile/hcard';
-
+
public $methods = array();
public function __construct()
@@ -50,12 +50,11 @@ class Discovery
$this->registerMethod('Discovery_LRDD_Link_HTML');
}
-
public function registerMethod($class)
{
$this->methods[] = $class;
}
-
+
/**
* Given a "user id" make sure it's normalized to either a webfinger
* acct: uri or a profile HTTP URL.
@@ -78,7 +77,7 @@ class Discovery
public static function isWebfinger($user_id)
{
$uri = Discovery::normalize($user_id);
-
+
return (substr($uri, 0, 5) == 'acct:');
}
@@ -99,7 +98,7 @@ class Discovery
} else {
$xrd_uri = $link['href'];
}
-
+
$xrd = $this->fetchXrd($xrd_uri);
if ($xrd) {
return $xrd;
@@ -114,14 +113,13 @@ class Discovery
if (!is_array($links)) {
return false;
}
-
+
foreach ($links as $link) {
if ($link['rel'] == $service) {
return $link;
}
}
}
-
public static function applyTemplate($template, $id)
{
@@ -130,7 +128,6 @@ class Discovery
return $template;
}
-
public static function fetchXrd($url)
{
try {
@@ -171,7 +168,7 @@ class Discovery_LRDD_Host_Meta implements Discovery_LRDD
if ($xrd->host != $domain) {
return false;
}
-
+
return $xrd->links;
}
}
@@ -187,7 +184,7 @@ class Discovery_LRDD_Link_Header implements Discovery_LRDD
} catch (HTTP_Request2_Exception $e) {
return false;
}
-
+
if ($response->getStatus() != 200) {
return false;
}
@@ -196,51 +193,17 @@ class Discovery_LRDD_Link_Header implements Discovery_LRDD
if (!$link_header) {
// return false;
}
-
+
return Discovery_LRDD_Link_Header::parseHeader($link_header);
}
protected static function parseHeader($header)
{
- preg_match('/^<[^>]+>/', $header, $uri_reference);
- //if (empty($uri_reference)) return;
-
- $links = array();
-
- $link_uri = trim($uri_reference[0], '<>');
- $link_rel = array();
- $link_type = null;
-
- // remove uri-reference from header
- $header = substr($header, strlen($uri_reference[0]));
-
- // parse link-params
- $params = explode(';', $header);
-
- foreach ($params as $param) {
- if (empty($param)) continue;
- list($param_name, $param_value) = explode('=', $param, 2);
- $param_name = trim($param_name);
- $param_value = preg_replace('(^"|"$)', '', trim($param_value));
-
- // for now we only care about 'rel' and 'type' link params
- // TODO do something with the other links-params
- switch ($param_name) {
- case 'rel':
- $link_rel = trim($param_value);
- break;
-
- case 'type':
- $link_type = trim($param_value);
- }
- }
-
- $links[] = array(
- 'href' => $link_uri,
- 'rel' => $link_rel,
- 'type' => $link_type);
+ $lh = new LinkHeader($header);
- return $links;
+ return array('href' => $lh->href,
+ 'rel' => $lh->rel,
+ 'type' => $lh->type);
}
}
@@ -262,49 +225,48 @@ class Discovery_LRDD_Link_HTML implements Discovery_LRDD
return Discovery_LRDD_Link_HTML::parse($response->getBody());
}
-
public function parse($html)
{
$links = array();
-
+
preg_match('/<head(\s[^>]*)?>(.*?)<\/head>/is', $html, $head_matches);
$head_html = $head_matches[2];
-
+
preg_match_all('/<link\s[^>]*>/i', $head_html, $link_matches);
-
+
foreach ($link_matches[0] as $link_html) {
$link_url = null;
$link_rel = null;
$link_type = null;
-
+
preg_match('/\srel=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $rel_matches);
if ( isset($rel_matches[3]) ) {
$link_rel = $rel_matches[3];
} else if ( isset($rel_matches[1]) ) {
$link_rel = $rel_matches[1];
}
-
+
preg_match('/\shref=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $href_matches);
if ( isset($href_matches[3]) ) {
$link_uri = $href_matches[3];
} else if ( isset($href_matches[1]) ) {
$link_uri = $href_matches[1];
}
-
+
preg_match('/\stype=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $type_matches);
if ( isset($type_matches[3]) ) {
$link_type = $type_matches[3];
} else if ( isset($type_matches[1]) ) {
$link_type = $type_matches[1];
}
-
+
$links[] = array(
'href' => $link_url,
'rel' => $link_rel,
'type' => $link_type,
);
}
-
+
return $links;
}
}
diff --git a/plugins/OStatus/lib/discoveryhints.php b/plugins/OStatus/lib/discoveryhints.php
new file mode 100644
index 000000000..db13793dd
--- /dev/null
+++ b/plugins/OStatus/lib/discoveryhints.php
@@ -0,0 +1,182 @@
+<?php
+/*
+ * StatusNet - the distributed open-source microblogging tool
+ * Copyright (C) 2010, StatusNet, Inc.
+ *
+ * Some utilities for generating hint data
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+class DiscoveryHints {
+
+ static function fromXRD($xrd)
+ {
+ $hints = array();
+
+ foreach ($xrd->links as $link) {
+ switch ($link['rel']) {
+ case Discovery::PROFILEPAGE:
+ $hints['profileurl'] = $link['href'];
+ break;
+ case Salmon::NS_REPLIES:
+ $hints['salmon'] = $link['href'];
+ break;
+ case Discovery::UPDATESFROM:
+ $hints['feedurl'] = $link['href'];
+ break;
+ case Discovery::HCARD:
+ $hints['hcardurl'] = $link['href'];
+ break;
+ default:
+ break;
+ }
+ }
+
+ return $hints;
+ }
+
+ static function fromHcardUrl($url)
+ {
+ $client = new HTTPClient();
+ $client->setHeader('Accept', 'text/html,application/xhtml+xml');
+ $response = $client->get($url);
+
+ if (!$response->isOk()) {
+ return null;
+ }
+
+ return self::hcardHints($response->getBody(),
+ $response->getUrl());
+ }
+
+ static function hcardHints($body, $url)
+ {
+ common_debug("starting tidy");
+
+ $body = self::_tidy($body);
+
+ common_debug("done with tidy");
+
+ set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/');
+ require_once('hkit.class.php');
+
+ $h = new hKit;
+
+ $hcards = $h->getByString('hcard', $body);
+
+ if (empty($hcards)) {
+ return array();
+ }
+
+ if (count($hcards) == 1) {
+ $hcard = $hcards[0];
+ } else {
+ foreach ($hcards as $try) {
+ if (array_key_exists('url', $try)) {
+ if (is_string($try['url']) && $try['url'] == $url) {
+ $hcard = $try;
+ break;
+ } else if (is_array($try['url'])) {
+ foreach ($try['url'] as $tryurl) {
+ if ($tryurl == $url) {
+ $hcard = $try;
+ break 2;
+ }
+ }
+ }
+ }
+ }
+ // last chance; grab the first one
+ if (empty($hcard)) {
+ $hcard = $hcards[0];
+ }
+ }
+
+ $hints = array();
+
+ if (array_key_exists('nickname', $hcard)) {
+ $hints['nickname'] = $hcard['nickname'];
+ }
+
+ if (array_key_exists('fn', $hcard)) {
+ $hints['fullname'] = $hcard['fn'];
+ } else if (array_key_exists('n', $hcard)) {
+ $hints['fullname'] = implode(' ', $hcard['n']);
+ }
+
+ if (array_key_exists('photo', $hcard)) {
+ $hints['avatar'] = $hcard['photo'];
+ }
+
+ if (array_key_exists('note', $hcard)) {
+ $hints['bio'] = $hcard['note'];
+ }
+
+ if (array_key_exists('adr', $hcard)) {
+ if (is_string($hcard['adr'])) {
+ $hints['location'] = $hcard['adr'];
+ } else if (is_array($hcard['adr'])) {
+ $hints['location'] = implode(' ', $hcard['adr']);
+ }
+ }
+
+ if (array_key_exists('url', $hcard)) {
+ if (is_string($hcard['url'])) {
+ $hints['homepage'] = $hcard['url'];
+ } else if (is_array($hcard['url'])) {
+ // HACK get the last one; that's how our hcards look
+ $hints['homepage'] = $hcard['url'][count($hcard['url'])-1];
+ }
+ }
+
+ return $hints;
+ }
+
+ private static function _tidy($body)
+ {
+ if (function_exists('tidy_parse_string')) {
+ common_debug("Tidying with extension");
+ $text = tidy_parse_string($body);
+ $text = tidy_clean_repair($text);
+ return $body;
+ } else if ($fullpath = self::_findProgram('tidy')) {
+ common_debug("Tidying with program $fullpath");
+ $tempfile = tempnam('/tmp', 'snht'); // statusnet hcard tidy
+ file_put_contents($tempfile, $source);
+ exec("$fullpath -utf8 -indent -asxhtml -numeric -bare -quiet $tempfile", $tidy);
+ unlink($tempfile);
+ return implode("\n", $tidy);
+ } else {
+ common_debug("Not tidying.");
+ return $body;
+ }
+ }
+
+ private static function _findProgram($name)
+ {
+ $path = $_ENV['PATH'];
+
+ $parts = explode(':', $path);
+
+ foreach ($parts as $part) {
+ $fullpath = $part . '/' . $name;
+ if (is_executable($fullpath)) {
+ return $fullpath;
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/plugins/OStatus/lib/feeddiscovery.php b/plugins/OStatus/lib/feeddiscovery.php
index ff76b229e..f9ea3e713 100644
--- a/plugins/OStatus/lib/feeddiscovery.php
+++ b/plugins/OStatus/lib/feeddiscovery.php
@@ -117,7 +117,7 @@ class FeedDiscovery
return $this->discoverFromURL($target, false);
}
}
-
+
return $this->initFromResponse($response);
}
@@ -202,7 +202,7 @@ class FeedDiscovery
'application/atom+xml' => false,
'application/rss+xml' => false,
);
-
+
$nodes = $dom->getElementsByTagName('link');
for ($i = 0; $i < $nodes->length; $i++) {
$node = $nodes->item($i);
diff --git a/plugins/OStatus/lib/linkheader.php b/plugins/OStatus/lib/linkheader.php
new file mode 100644
index 000000000..2f6c66dc9
--- /dev/null
+++ b/plugins/OStatus/lib/linkheader.php
@@ -0,0 +1,63 @@
+<?php
+
+class LinkHeader
+{
+ var $href;
+ var $rel;
+ var $type;
+
+ function __construct($str)
+ {
+ preg_match('/^<[^>]+>/', $str, $uri_reference);
+ //if (empty($uri_reference)) return;
+
+ $this->uri = trim($uri_reference[0], '<>');
+ $this->rel = array();
+ $this->type = null;
+
+ // remove uri-reference from header
+ $str = substr($str, strlen($uri_reference[0]));
+
+ // parse link-params
+ $params = explode(';', $str);
+
+ foreach ($params as $param) {
+ if (empty($param)) continue;
+ list($param_name, $param_value) = explode('=', $param, 2);
+ $param_name = trim($param_name);
+ $param_value = preg_replace('(^"|"$)', '', trim($param_value));
+
+ // for now we only care about 'rel' and 'type' link params
+ // TODO do something with the other links-params
+ switch ($param_name) {
+ case 'rel':
+ $this->rel = trim($param_value);
+ break;
+
+ case 'type':
+ $this->type = trim($param_value);
+ }
+ }
+ }
+
+ static function getLink($response, $rel=null, $type=null)
+ {
+ $headers = $response->getHeader('Link');
+
+ // Can get an array or string, so try to simplify the path
+ if (!is_array($headers)) {
+ $headers = array($headers);
+ }
+
+ foreach ($headers as $header) {
+ $lh = new LinkHeader($header);
+
+ if ((is_null($rel) || $lh->rel == $rel) &&
+ (is_null($type) || $lh->type == $type)) {
+ return $lh->href;
+ }
+ }
+
+ return null;
+ }
+} \ No newline at end of file