From dc09453a77f33c4dfdff306321ce93cf5fbd2d57 Mon Sep 17 00:00:00 2001
From: Brion Vibber <brion@pobox.com>
Date: Mon, 8 Feb 2010 11:06:03 -0800
Subject: First steps on converting FeedSub into the pub/sub basis for OStatus
 communications: * renamed FeedSub plugin to OStatus * now setting avatar on
 subscriptions * general fixes for subscription * integrated PuSH hub to
 handle only user timelines on canonical ID url; sends updates directly * set
 $config['feedsub']['nohub'] = true to test w/ foreign feeds that don't have
 hubs (won't actually receive updates though) * a few bits of code
 documentation * HMAC support for verified distributions (safest if sub setup
 is on HTTPS)

And a couple core changes:
* minimizing HTML output for exceptions in API requests to aid in debugging
* fix for rel=self link in apitimelineuser when id given

This does not not yet include any of the individual subscription management (Salmon notifications for sub/unsub, etc) nor a nice UI for user subscriptions.
Needs some further cleanup to treat posts as status updates instead of link references.
---
 plugins/OStatus/extlib/XML/Feed/Parser/Type.php | 467 ++++++++++++++++++++++++
 1 file changed, 467 insertions(+)
 create mode 100644 plugins/OStatus/extlib/XML/Feed/Parser/Type.php

(limited to 'plugins/OStatus/extlib/XML/Feed/Parser/Type.php')

diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/Type.php b/plugins/OStatus/extlib/XML/Feed/Parser/Type.php
new file mode 100644
index 000000000..75052619b
--- /dev/null
+++ b/plugins/OStatus/extlib/XML/Feed/Parser/Type.php
@@ -0,0 +1,467 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * Abstract class providing common methods for XML_Feed_Parser feeds.
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category   XML
+ * @package    XML_Feed_Parser
+ * @author     James Stewart <james@jystewart.net>
+ * @copyright  2005 James Stewart <james@jystewart.net>
+ * @license    http://www.gnu.org/copyleft/lesser.html  GNU LGPL 2.1
+ * @version    CVS: $Id: Type.php,v 1.25 2008/03/08 18:39:09 jystewart Exp $
+ * @link       http://pear.php.net/package/XML_Feed_Parser/
+ */
+
+/**
+ * This abstract class provides some general methods that are likely to be
+ * implemented exactly the same way for all feed types.
+ *
+ * @package XML_Feed_Parser
+ * @author  James Stewart <james@jystewart.net>
+ * @version Release: 1.0.3
+ */
+abstract class XML_Feed_Parser_Type
+{
+    /**
+     * Where we store our DOM object for this feed 
+     * @var DOMDocument
+     */
+    public $model;
+
+    /**
+     * For iteration we'll want a count of the number of entries 
+     * @var int
+     */
+    public $numberEntries;
+
+    /**
+     * Where we store our entry objects once instantiated 
+     * @var array
+     */
+    public $entries = array();
+
+    /**
+     * Store mappings between entry IDs and their position in the feed
+     */
+    public $idMappings = array();
+
+    /**
+     * Proxy to allow use of element names as method names
+     *
+     * We are not going to provide methods for every entry type so this
+     * function will allow for a lot of mapping. We rely pretty heavily
+     * on this to handle our mappings between other feed types and atom.
+     *
+     * @param   string  $call - the method attempted
+     * @param   array   $arguments - arguments to that method
+     * @return  mixed
+     */
+    function __call($call, $arguments = array())
+    {
+        if (! is_array($arguments)) {
+            $arguments = array();
+        }
+
+        if (isset($this->compatMap[$call])) {
+            $tempMap = $this->compatMap;
+            $tempcall = array_pop($tempMap[$call]);
+            if (! empty($tempMap)) {
+                $arguments = array_merge($arguments, $tempMap[$call]);
+            }
+            $call = $tempcall;
+        }
+
+        /* To be helpful, we allow a case-insensitive search for this method */
+        if (! isset($this->map[$call])) {
+            foreach (array_keys($this->map) as $key) {
+                if (strtoupper($key) == strtoupper($call)) {
+                    $call = $key;
+                    break;
+                }
+            }
+        }
+
+        if (empty($this->map[$call])) {
+            return false;
+        }
+
+        $method = 'get' . $this->map[$call][0];
+        if ($method == 'getLink') {
+            $offset = empty($arguments[0]) ? 0 : $arguments[0];
+            $attribute = empty($arguments[1]) ? 'href' : $arguments[1];
+            $params = isset($arguments[2]) ? $arguments[2] : array();
+            return $this->getLink($offset, $attribute, $params);
+        }
+        if (method_exists($this, $method)) {
+            return $this->$method($call, $arguments);
+        }
+
+        return false;
+    }
+
+    /**
+     * Proxy to allow use of element names as attribute names
+     *
+     * For many elements variable-style access will be desirable. This function
+     * provides for that.
+     *
+     * @param   string  $value - the variable required
+     * @return  mixed
+     */
+    function __get($value)
+    {
+        return $this->__call($value, array());
+    }
+
+    /**
+     * Utility function to help us resolve xml:base values
+     *
+     * We have other methods which will traverse the DOM and work out the different
+     * xml:base declarations we need to be aware of. We then need to combine them.
+     * If a declaration starts with a protocol then we restart the string. If it 
+     * starts with a / then we add on to the domain name. Otherwise we simply tag 
+     * it on to the end.
+     *
+     * @param   string  $base - the base to add the link to
+     * @param   string  $link
+     */
+    function combineBases($base, $link)
+    {
+        if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
+            return $link;
+        } else if (preg_match('/^\//', $link)) {
+            /* Extract domain and suffix link to that */
+            preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results);
+            $firstLayer = $results[0];
+            return $firstLayer . "/" . $link;
+        } else if (preg_match('/^\.\.\//', $base)) {
+            /* Step up link to find place to be */
+            preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases);
+            $suffix = $bases[3];
+            $count = preg_match_all('/\.\.\//', $bases[1], $steps);
+            $url = explode("/", $base);
+            for ($i = 0; $i <= $count; $i++) {
+                array_pop($url);
+            }
+            return implode("/", $url) . "/" . $suffix;
+        } else if (preg_match('/^(?!\/$)/', $base)) {
+            $base = preg_replace('/(.*\/).*$/', '$1', $base)  ;
+            return $base . $link;
+        } else {
+            /* Just stick it on the end */
+            return $base . $link;
+        }
+    }
+
+    /**
+     * Determine whether we need to apply our xml:base rules
+     *
+     * Gets us the xml:base data and then processes that with regard
+     * to our current link.
+     *
+     * @param   string
+     * @param   DOMElement
+     * @return  string
+     */
+    function addBase($link, $element)
+    {
+        if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
+            return $link;
+        }
+
+        return $this->combineBases($element->baseURI, $link);
+    }
+
+    /**
+     * Get an entry by its position in the feed, starting from zero
+     *
+     * As well as allowing the items to be iterated over we want to allow
+     * users to be able to access a specific entry. This is one of two ways of
+     * doing that, the other being by ID.
+     * 
+     * @param   int $offset
+     * @return  XML_Feed_Parser_RSS1Element
+     */
+    function getEntryByOffset($offset)
+    {
+        if (! isset($this->entries[$offset])) {
+            $entries = $this->model->getElementsByTagName($this->itemElement);
+            if ($entries->length > $offset) {
+                $xmlBase = $entries->item($offset)->baseURI;
+                $this->entries[$offset] = new $this->itemClass(
+                    $entries->item($offset), $this, $xmlBase);
+                if ($id = $this->entries[$offset]->id) {
+                    $this->idMappings[$id] = $this->entries[$offset];
+                }
+            } else {
+                throw new XML_Feed_Parser_Exception('No entries found');
+            }
+        }
+
+        return $this->entries[$offset];
+    }
+
+    /**
+     * Return a date in seconds since epoch.
+     *
+     * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which
+     * is the number of seconds since 1970-01-01 00:00:00.
+     * 
+     * @link    http://php.net/strtotime
+     * @param    string    $method        The name of the date construct we want
+     * @param    array     $arguments    Included for compatibility with our __call usage
+     * @return    int|false datetime
+     */
+    protected function getDate($method, $arguments)
+    {
+        $time = $this->model->getElementsByTagName($method);
+        if ($time->length == 0 || empty($time->item(0)->nodeValue)) {
+            return false;
+        }
+        return strtotime($time->item(0)->nodeValue);
+    }
+
+    /**
+     * Get a text construct. 
+     *
+     * @param    string    $method    The name of the text construct we want
+     * @param    array     $arguments    Included for compatibility with our __call usage
+     * @return    string
+     */
+    protected function getText($method, $arguments = array())
+    {
+        $tags = $this->model->getElementsByTagName($method);
+        if ($tags->length > 0) {
+            $value = $tags->item(0)->nodeValue;
+            return $value;
+        }
+        return false;
+    }
+
+    /**
+     * Apply various rules to retrieve category data.
+     *
+     * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2 
+     * and  Atom. Instead the usual approach is to use the dublin core namespace to 
+     * declare  categories. For example delicious use both: 
+     * <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag>
+     * <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
+     * to declare a categorisation of 'PEAR'.
+     *
+     * We need to be sensitive to this where possible.
+     *
+     * @param    string    $call    for compatibility with our overloading
+     * @param   array $arguments - arg 0 is the offset, arg 1 is whether to return as array
+     * @return  string|array|false
+     */
+    protected function getCategory($call, $arguments)
+    {
+        $categories = $this->model->getElementsByTagName('subject');
+        $offset = empty($arguments[0]) ? 0 : $arguments[0];
+        $array = empty($arguments[1]) ? false : true;
+        if ($categories->length <= $offset) {
+            return false;
+        }
+        if ($array) {
+            $list = array();
+            foreach ($categories as $category) {
+                array_push($list, $category->nodeValue);
+            }
+            return $list;
+        }
+        return $categories->item($offset)->nodeValue;
+    }
+
+    /**
+     * Count occurrences of an element
+     *
+     * This function will tell us how many times the element $type
+     * appears at this level of the feed.
+     * 
+     * @param    string    $type    the element we want to get a count of
+     * @return    int
+     */
+    protected function count($type)
+    {
+        if ($tags = $this->model->getElementsByTagName($type)) {
+            return $tags->length;
+        }
+        return 0;
+    }
+
+    /**
+     * Part of our xml:base processing code
+     *
+     * We need a couple of methods to access XHTML content stored in feeds. 
+     * This is because we dereference all xml:base references before returning
+     * the element. This method handles the attributes.
+     *
+     * @param   DOMElement $node    The DOM node we are iterating over
+     * @return  string
+     */
+    function processXHTMLAttributes($node) {
+        $return = '';
+        foreach ($node->attributes as $attribute) {
+            if ($attribute->name == 'src' or $attribute->name == 'href') {
+                $attribute->value = $this->addBase(htmlentities($attribute->value, NULL, 'utf-8'), $attribute);
+            }
+            if ($attribute->name == 'base') {
+                continue;
+            }
+            $return .= $attribute->name . '="' . htmlentities($attribute->value, NULL, 'utf-8') .'" ';
+        }
+        if (! empty($return)) {
+            return ' ' . trim($return);
+        }
+        return '';
+    }
+
+    /**
+     * Convert HTML entities based on the current character set.
+     * 
+     * @param String
+     * @return String
+     */
+    function processEntitiesForNodeValue($node) 
+    {
+        if (function_exists('iconv')) {
+          $current_encoding = $node->ownerDocument->encoding;
+          $value = iconv($current_encoding, 'UTF-8', $node->nodeValue);
+        } else if ($current_encoding == 'iso-8859-1') {
+          $value = utf8_encode($node->nodeValue);
+        } else {
+          $value = $node->nodeValue;
+        }
+
+        $decoded = html_entity_decode($value, NULL, 'UTF-8');
+        return htmlentities($decoded, NULL, 'UTF-8');
+    }
+
+    /**
+     * Part of our xml:base processing code
+     *
+     * We need a couple of methods to access XHTML content stored in feeds. 
+     * This is because we dereference all xml:base references before returning
+     * the element. This method recurs through the tree descending from the node
+     * and builds our string.
+     *
+     * @param   DOMElement $node    The DOM node we are processing
+     * @return   string
+     */
+    function traverseNode($node)
+    {
+        $content = '';
+
+        /* Add the opening of this node to the content */
+        if ($node instanceof DOMElement) {
+            $content .= '<' . $node->tagName . 
+                $this->processXHTMLAttributes($node) . '>';
+        }
+
+        /* Process children */
+        if ($node->hasChildNodes()) {
+            foreach ($node->childNodes as $child) {
+                $content .= $this->traverseNode($child);
+            }
+        }
+
+        if ($node instanceof DOMText) {
+            $content .= $this->processEntitiesForNodeValue($node);
+        }
+
+        /* Add the closing of this node to the content */
+        if ($node instanceof DOMElement) {
+            $content .= '</' . $node->tagName . '>';
+        }
+
+        return $content;
+    }
+
+    /**
+     * Get content from RSS feeds (atom has its own implementation)
+     *
+     * The official way to include full content in an RSS1 entry is to use
+     * the content module's element 'encoded', and RSS2 feeds often duplicate that.
+     * Often, however, the 'description' element is used instead. We will offer that 
+     * as a fallback. Atom uses its own approach and overrides this method.
+     *
+     * @return  string|false
+     */
+    protected function getContent()
+    {
+        $options = array('encoded', 'description');
+        foreach ($options as $element) {
+            $test = $this->model->getElementsByTagName($element);
+            if ($test->length == 0) {
+                continue;
+            }
+            if ($test->item(0)->hasChildNodes()) {
+                $value = '';
+                foreach ($test->item(0)->childNodes as $child) {
+                    if ($child instanceof DOMText) {
+                        $value .= $child->nodeValue;
+                    } else {
+                        $simple = simplexml_import_dom($child);
+                        $value .= $simple->asXML();
+                    }
+                }
+                return $value;
+            } else if ($test->length > 0) {
+                return $test->item(0)->nodeValue;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Checks if this element has a particular child element.
+     *
+     * @param   String
+     * @param   Integer
+     * @return  bool
+     **/
+    function hasKey($name, $offset = 0)
+    {
+        $search = $this->model->getElementsByTagName($name);
+        return $search->length > $offset;
+    }
+
+    /**
+     * Return an XML serialization of the feed, should it be required. Most 
+     * users however, will already have a serialization that they used when 
+     * instantiating the object.
+     *
+     * @return    string    XML serialization of element
+     */    
+    function __toString()
+    {
+        $simple = simplexml_import_dom($this->model);
+        return $simple->asXML();
+    }
+    
+    /**
+     * Get directory holding RNG schemas. Method is based on that 
+     * found in Contact_AddressBook.
+     *
+     * @return string PEAR data directory.
+     * @access public
+     * @static
+     */
+    static function getSchemaDir()
+    {
+        require_once 'PEAR/Config.php';
+        $config = new PEAR_Config;
+        return $config->get('data_dir') . '/XML_Feed_Parser/schemas';
+    }
+}
+
+?>
\ No newline at end of file
-- 
cgit v1.2.3-54-g00ecf