From dc09453a77f33c4dfdff306321ce93cf5fbd2d57 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 8 Feb 2010 11:06:03 -0800 Subject: First steps on converting FeedSub into the pub/sub basis for OStatus communications: * renamed FeedSub plugin to OStatus * now setting avatar on subscriptions * general fixes for subscription * integrated PuSH hub to handle only user timelines on canonical ID url; sends updates directly * set $config['feedsub']['nohub'] = true to test w/ foreign feeds that don't have hubs (won't actually receive updates though) * a few bits of code documentation * HMAC support for verified distributions (safest if sub setup is on HTTPS) And a couple core changes: * minimizing HTML output for exceptions in API requests to aid in debugging * fix for rel=self link in apitimelineuser when id given This does not not yet include any of the individual subscription management (Salmon notifications for sub/unsub, etc) nor a nice UI for user subscriptions. Needs some further cleanup to treat posts as status updates instead of link references. --- plugins/OStatus/extlib/XML/Feed/Parser/Type.php | 467 ++++++++++++++++++++++++ 1 file changed, 467 insertions(+) create mode 100644 plugins/OStatus/extlib/XML/Feed/Parser/Type.php (limited to 'plugins/OStatus/extlib/XML/Feed/Parser/Type.php') diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/Type.php b/plugins/OStatus/extlib/XML/Feed/Parser/Type.php new file mode 100644 index 000000000..75052619b --- /dev/null +++ b/plugins/OStatus/extlib/XML/Feed/Parser/Type.php @@ -0,0 +1,467 @@ + + * @copyright 2005 James Stewart + * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 + * @version CVS: $Id: Type.php,v 1.25 2008/03/08 18:39:09 jystewart Exp $ + * @link http://pear.php.net/package/XML_Feed_Parser/ + */ + +/** + * This abstract class provides some general methods that are likely to be + * implemented exactly the same way for all feed types. + * + * @package XML_Feed_Parser + * @author James Stewart + * @version Release: 1.0.3 + */ +abstract class XML_Feed_Parser_Type +{ + /** + * Where we store our DOM object for this feed + * @var DOMDocument + */ + public $model; + + /** + * For iteration we'll want a count of the number of entries + * @var int + */ + public $numberEntries; + + /** + * Where we store our entry objects once instantiated + * @var array + */ + public $entries = array(); + + /** + * Store mappings between entry IDs and their position in the feed + */ + public $idMappings = array(); + + /** + * Proxy to allow use of element names as method names + * + * We are not going to provide methods for every entry type so this + * function will allow for a lot of mapping. We rely pretty heavily + * on this to handle our mappings between other feed types and atom. + * + * @param string $call - the method attempted + * @param array $arguments - arguments to that method + * @return mixed + */ + function __call($call, $arguments = array()) + { + if (! is_array($arguments)) { + $arguments = array(); + } + + if (isset($this->compatMap[$call])) { + $tempMap = $this->compatMap; + $tempcall = array_pop($tempMap[$call]); + if (! empty($tempMap)) { + $arguments = array_merge($arguments, $tempMap[$call]); + } + $call = $tempcall; + } + + /* To be helpful, we allow a case-insensitive search for this method */ + if (! isset($this->map[$call])) { + foreach (array_keys($this->map) as $key) { + if (strtoupper($key) == strtoupper($call)) { + $call = $key; + break; + } + } + } + + if (empty($this->map[$call])) { + return false; + } + + $method = 'get' . $this->map[$call][0]; + if ($method == 'getLink') { + $offset = empty($arguments[0]) ? 0 : $arguments[0]; + $attribute = empty($arguments[1]) ? 'href' : $arguments[1]; + $params = isset($arguments[2]) ? $arguments[2] : array(); + return $this->getLink($offset, $attribute, $params); + } + if (method_exists($this, $method)) { + return $this->$method($call, $arguments); + } + + return false; + } + + /** + * Proxy to allow use of element names as attribute names + * + * For many elements variable-style access will be desirable. This function + * provides for that. + * + * @param string $value - the variable required + * @return mixed + */ + function __get($value) + { + return $this->__call($value, array()); + } + + /** + * Utility function to help us resolve xml:base values + * + * We have other methods which will traverse the DOM and work out the different + * xml:base declarations we need to be aware of. We then need to combine them. + * If a declaration starts with a protocol then we restart the string. If it + * starts with a / then we add on to the domain name. Otherwise we simply tag + * it on to the end. + * + * @param string $base - the base to add the link to + * @param string $link + */ + function combineBases($base, $link) + { + if (preg_match('/^[A-Za-z]+:\/\//', $link)) { + return $link; + } else if (preg_match('/^\//', $link)) { + /* Extract domain and suffix link to that */ + preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results); + $firstLayer = $results[0]; + return $firstLayer . "/" . $link; + } else if (preg_match('/^\.\.\//', $base)) { + /* Step up link to find place to be */ + preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases); + $suffix = $bases[3]; + $count = preg_match_all('/\.\.\//', $bases[1], $steps); + $url = explode("/", $base); + for ($i = 0; $i <= $count; $i++) { + array_pop($url); + } + return implode("/", $url) . "/" . $suffix; + } else if (preg_match('/^(?!\/$)/', $base)) { + $base = preg_replace('/(.*\/).*$/', '$1', $base) ; + return $base . $link; + } else { + /* Just stick it on the end */ + return $base . $link; + } + } + + /** + * Determine whether we need to apply our xml:base rules + * + * Gets us the xml:base data and then processes that with regard + * to our current link. + * + * @param string + * @param DOMElement + * @return string + */ + function addBase($link, $element) + { + if (preg_match('/^[A-Za-z]+:\/\//', $link)) { + return $link; + } + + return $this->combineBases($element->baseURI, $link); + } + + /** + * Get an entry by its position in the feed, starting from zero + * + * As well as allowing the items to be iterated over we want to allow + * users to be able to access a specific entry. This is one of two ways of + * doing that, the other being by ID. + * + * @param int $offset + * @return XML_Feed_Parser_RSS1Element + */ + function getEntryByOffset($offset) + { + if (! isset($this->entries[$offset])) { + $entries = $this->model->getElementsByTagName($this->itemElement); + if ($entries->length > $offset) { + $xmlBase = $entries->item($offset)->baseURI; + $this->entries[$offset] = new $this->itemClass( + $entries->item($offset), $this, $xmlBase); + if ($id = $this->entries[$offset]->id) { + $this->idMappings[$id] = $this->entries[$offset]; + } + } else { + throw new XML_Feed_Parser_Exception('No entries found'); + } + } + + return $this->entries[$offset]; + } + + /** + * Return a date in seconds since epoch. + * + * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which + * is the number of seconds since 1970-01-01 00:00:00. + * + * @link http://php.net/strtotime + * @param string $method The name of the date construct we want + * @param array $arguments Included for compatibility with our __call usage + * @return int|false datetime + */ + protected function getDate($method, $arguments) + { + $time = $this->model->getElementsByTagName($method); + if ($time->length == 0 || empty($time->item(0)->nodeValue)) { + return false; + } + return strtotime($time->item(0)->nodeValue); + } + + /** + * Get a text construct. + * + * @param string $method The name of the text construct we want + * @param array $arguments Included for compatibility with our __call usage + * @return string + */ + protected function getText($method, $arguments = array()) + { + $tags = $this->model->getElementsByTagName($method); + if ($tags->length > 0) { + $value = $tags->item(0)->nodeValue; + return $value; + } + return false; + } + + /** + * Apply various rules to retrieve category data. + * + * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2 + * and Atom. Instead the usual approach is to use the dublin core namespace to + * declare categories. For example delicious use both: + * PEAR and: + * + * to declare a categorisation of 'PEAR'. + * + * We need to be sensitive to this where possible. + * + * @param string $call for compatibility with our overloading + * @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array + * @return string|array|false + */ + protected function getCategory($call, $arguments) + { + $categories = $this->model->getElementsByTagName('subject'); + $offset = empty($arguments[0]) ? 0 : $arguments[0]; + $array = empty($arguments[1]) ? false : true; + if ($categories->length <= $offset) { + return false; + } + if ($array) { + $list = array(); + foreach ($categories as $category) { + array_push($list, $category->nodeValue); + } + return $list; + } + return $categories->item($offset)->nodeValue; + } + + /** + * Count occurrences of an element + * + * This function will tell us how many times the element $type + * appears at this level of the feed. + * + * @param string $type the element we want to get a count of + * @return int + */ + protected function count($type) + { + if ($tags = $this->model->getElementsByTagName($type)) { + return $tags->length; + } + return 0; + } + + /** + * Part of our xml:base processing code + * + * We need a couple of methods to access XHTML content stored in feeds. + * This is because we dereference all xml:base references before returning + * the element. This method handles the attributes. + * + * @param DOMElement $node The DOM node we are iterating over + * @return string + */ + function processXHTMLAttributes($node) { + $return = ''; + foreach ($node->attributes as $attribute) { + if ($attribute->name == 'src' or $attribute->name == 'href') { + $attribute->value = $this->addBase(htmlentities($attribute->value, NULL, 'utf-8'), $attribute); + } + if ($attribute->name == 'base') { + continue; + } + $return .= $attribute->name . '="' . htmlentities($attribute->value, NULL, 'utf-8') .'" '; + } + if (! empty($return)) { + return ' ' . trim($return); + } + return ''; + } + + /** + * Convert HTML entities based on the current character set. + * + * @param String + * @return String + */ + function processEntitiesForNodeValue($node) + { + if (function_exists('iconv')) { + $current_encoding = $node->ownerDocument->encoding; + $value = iconv($current_encoding, 'UTF-8', $node->nodeValue); + } else if ($current_encoding == 'iso-8859-1') { + $value = utf8_encode($node->nodeValue); + } else { + $value = $node->nodeValue; + } + + $decoded = html_entity_decode($value, NULL, 'UTF-8'); + return htmlentities($decoded, NULL, 'UTF-8'); + } + + /** + * Part of our xml:base processing code + * + * We need a couple of methods to access XHTML content stored in feeds. + * This is because we dereference all xml:base references before returning + * the element. This method recurs through the tree descending from the node + * and builds our string. + * + * @param DOMElement $node The DOM node we are processing + * @return string + */ + function traverseNode($node) + { + $content = ''; + + /* Add the opening of this node to the content */ + if ($node instanceof DOMElement) { + $content .= '<' . $node->tagName . + $this->processXHTMLAttributes($node) . '>'; + } + + /* Process children */ + if ($node->hasChildNodes()) { + foreach ($node->childNodes as $child) { + $content .= $this->traverseNode($child); + } + } + + if ($node instanceof DOMText) { + $content .= $this->processEntitiesForNodeValue($node); + } + + /* Add the closing of this node to the content */ + if ($node instanceof DOMElement) { + $content .= 'tagName . '>'; + } + + return $content; + } + + /** + * Get content from RSS feeds (atom has its own implementation) + * + * The official way to include full content in an RSS1 entry is to use + * the content module's element 'encoded', and RSS2 feeds often duplicate that. + * Often, however, the 'description' element is used instead. We will offer that + * as a fallback. Atom uses its own approach and overrides this method. + * + * @return string|false + */ + protected function getContent() + { + $options = array('encoded', 'description'); + foreach ($options as $element) { + $test = $this->model->getElementsByTagName($element); + if ($test->length == 0) { + continue; + } + if ($test->item(0)->hasChildNodes()) { + $value = ''; + foreach ($test->item(0)->childNodes as $child) { + if ($child instanceof DOMText) { + $value .= $child->nodeValue; + } else { + $simple = simplexml_import_dom($child); + $value .= $simple->asXML(); + } + } + return $value; + } else if ($test->length > 0) { + return $test->item(0)->nodeValue; + } + } + return false; + } + + /** + * Checks if this element has a particular child element. + * + * @param String + * @param Integer + * @return bool + **/ + function hasKey($name, $offset = 0) + { + $search = $this->model->getElementsByTagName($name); + return $search->length > $offset; + } + + /** + * Return an XML serialization of the feed, should it be required. Most + * users however, will already have a serialization that they used when + * instantiating the object. + * + * @return string XML serialization of element + */ + function __toString() + { + $simple = simplexml_import_dom($this->model); + return $simple->asXML(); + } + + /** + * Get directory holding RNG schemas. Method is based on that + * found in Contact_AddressBook. + * + * @return string PEAR data directory. + * @access public + * @static + */ + static function getSchemaDir() + { + require_once 'PEAR/Config.php'; + $config = new PEAR_Config; + return $config->get('data_dir') . '/XML_Feed_Parser/schemas'; + } +} + +?> \ No newline at end of file -- cgit v1.2.3-54-g00ecf