diff options
Diffstat (limited to 'extlib/Net/URL2.php')
-rw-r--r-- | extlib/Net/URL2.php | 813 |
1 files changed, 813 insertions, 0 deletions
diff --git a/extlib/Net/URL2.php b/extlib/Net/URL2.php new file mode 100644 index 000000000..7a654aed8 --- /dev/null +++ b/extlib/Net/URL2.php @@ -0,0 +1,813 @@ +<?php +// +-----------------------------------------------------------------------+ +// | Copyright (c) 2007-2008, Christian Schmidt, Peytz & Co. A/S | +// | All rights reserved. | +// | | +// | Redistribution and use in source and binary forms, with or without | +// | modification, are permitted provided that the following conditions | +// | are met: | +// | | +// | o Redistributions of source code must retain the above copyright | +// | notice, this list of conditions and the following disclaimer. | +// | o Redistributions in binary form must reproduce the above copyright | +// | notice, this list of conditions and the following disclaimer in the | +// | documentation and/or other materials provided with the distribution.| +// | o The names of the authors may not be used to endorse or promote | +// | products derived from this software without specific prior written | +// | permission. | +// | | +// | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | +// | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | +// | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | +// | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | +// | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | +// | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | +// | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | +// | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | +// | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | +// | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | +// | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | +// | | +// +-----------------------------------------------------------------------+ +// | Author: Christian Schmidt <schmidt at php dot net> | +// +-----------------------------------------------------------------------+ +// +// $Id: URL2.php,v 1.10 2008/04/26 21:57:08 schmidt Exp $ +// +// Net_URL2 Class (PHP5 Only) + +// This code is released under the BSD License - http://www.opensource.org/licenses/bsd-license.php +/** + * @license BSD License + */ +class Net_URL2 +{ + /** + * Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default + * is true. + */ + const OPTION_STRICT = 'strict'; + + /** + * Represent arrays in query using PHP's [] notation. Default is true. + */ + const OPTION_USE_BRACKETS = 'use_brackets'; + + /** + * URL-encode query variable keys. Default is true. + */ + const OPTION_ENCODE_KEYS = 'encode_keys'; + + /** + * Query variable separators when parsing the query string. Every character + * is considered a separator. Default is specified by the + * arg_separator.input php.ini setting (this defaults to "&"). + */ + const OPTION_SEPARATOR_INPUT = 'input_separator'; + + /** + * Query variable separator used when generating the query string. Default + * is specified by the arg_separator.output php.ini setting (this defaults + * to "&"). + */ + const OPTION_SEPARATOR_OUTPUT = 'output_separator'; + + /** + * Default options corresponds to how PHP handles $_GET. + */ + private $options = array( + self::OPTION_STRICT => true, + self::OPTION_USE_BRACKETS => true, + self::OPTION_ENCODE_KEYS => true, + self::OPTION_SEPARATOR_INPUT => 'x&', + self::OPTION_SEPARATOR_OUTPUT => 'x&', + ); + + /** + * @var string|bool + */ + private $scheme = false; + + /** + * @var string|bool + */ + private $userinfo = false; + + /** + * @var string|bool + */ + private $host = false; + + /** + * @var int|bool + */ + private $port = false; + + /** + * @var string + */ + private $path = ''; + + /** + * @var string|bool + */ + private $query = false; + + /** + * @var string|bool + */ + private $fragment = false; + + /** + * @param string $url an absolute or relative URL + * @param array $options + */ + public function __construct($url, $options = null) + { + $this->setOption(self::OPTION_SEPARATOR_INPUT, + ini_get('arg_separator.input')); + $this->setOption(self::OPTION_SEPARATOR_OUTPUT, + ini_get('arg_separator.output')); + if (is_array($options)) { + foreach ($options as $optionName => $value) { + $this->setOption($optionName); + } + } + + if (preg_match('@^([a-z][a-z0-9.+-]*):@i', $url, $reg)) { + $this->scheme = $reg[1]; + $url = substr($url, strlen($reg[0])); + } + + if (preg_match('@^//([^/#?]+)@', $url, $reg)) { + $this->setAuthority($reg[1]); + $url = substr($url, strlen($reg[0])); + } + + $i = strcspn($url, '?#'); + $this->path = substr($url, 0, $i); + $url = substr($url, $i); + + if (preg_match('@^\?([^#]*)@', $url, $reg)) { + $this->query = $reg[1]; + $url = substr($url, strlen($reg[0])); + } + + if ($url) { + $this->fragment = substr($url, 1); + } + } + + /** + * Returns the scheme, e.g. "http" or "urn", or false if there is no + * scheme specified, i.e. if this is a relative URL. + * + * @return string|bool + */ + public function getScheme() + { + return $this->scheme; + } + + /** + * @param string|bool $scheme + * + * @return void + * @see getScheme() + */ + public function setScheme($scheme) + { + $this->scheme = $scheme; + } + + /** + * Returns the user part of the userinfo part (the part preceding the first + * ":"), or false if there is no userinfo part. + * + * @return string|bool + */ + public function getUser() + { + return $this->userinfo !== false ? preg_replace('@:.*$@', '', $this->userinfo) : false; + } + + /** + * Returns the password part of the userinfo part (the part after the first + * ":"), or false if there is no userinfo part (i.e. the URL does not + * contain "@" in front of the hostname) or the userinfo part does not + * contain ":". + * + * @return string|bool + */ + public function getPassword() + { + return $this->userinfo !== false ? substr(strstr($this->userinfo, ':'), 1) : false; + } + + /** + * Returns the userinfo part, or false if there is none, i.e. if the + * authority part does not contain "@". + * + * @return string|bool + */ + public function getUserinfo() + { + return $this->userinfo; + } + + /** + * Sets the userinfo part. If two arguments are passed, they are combined + * in the userinfo part as username ":" password. + * + * @param string|bool $userinfo userinfo or username + * @param string|bool $password + * + * @return void + */ + public function setUserinfo($userinfo, $password = false) + { + $this->userinfo = $userinfo; + if ($password !== false) { + $this->userinfo .= ':' . $password; + } + } + + /** + * Returns the host part, or false if there is no authority part, e.g. + * relative URLs. + * + * @return string|bool + */ + public function getHost() + { + return $this->host; + } + + /** + * @param string|bool $host + * + * @return void + */ + public function setHost($host) + { + $this->host = $host; + } + + /** + * Returns the port number, or false if there is no port number specified, + * i.e. if the default port is to be used. + * + * @return int|bool + */ + public function getPort() + { + return $this->port; + } + + /** + * @param int|bool $port + * + * @return void + */ + public function setPort($port) + { + $this->port = intval($port); + } + + /** + * Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or + * false if there is no authority none. + * + * @return string|bool + */ + public function getAuthority() + { + if (!$this->host) { + return false; + } + + $authority = ''; + + if ($this->userinfo !== false) { + $authority .= $this->userinfo . '@'; + } + + $authority .= $this->host; + + if ($this->port !== false) { + $authority .= ':' . $this->port; + } + + return $authority; + } + + /** + * @param string|false $authority + * + * @return void + */ + public function setAuthority($authority) + { + $this->user = false; + $this->pass = false; + $this->host = false; + $this->port = false; + if (preg_match('@^(([^\@]+)\@)?([^:]+)(:(\d*))?$@', $authority, $reg)) { + if ($reg[1]) { + $this->userinfo = $reg[2]; + } + + $this->host = $reg[3]; + if (isset($reg[5])) { + $this->port = intval($reg[5]); + } + } + } + + /** + * Returns the path part (possibly an empty string). + * + * @return string + */ + public function getPath() + { + return $this->path; + } + + /** + * @param string $path + * + * @return void + */ + public function setPath($path) + { + $this->path = $path; + } + + /** + * Returns the query string (excluding the leading "?"), or false if "?" + * isn't present in the URL. + * + * @return string|bool + * @see self::getQueryVariables() + */ + public function getQuery() + { + return $this->query; + } + + /** + * @param string|bool $query + * + * @return void + * @see self::setQueryVariables() + */ + public function setQuery($query) + { + $this->query = $query; + } + + /** + * Returns the fragment name, or false if "#" isn't present in the URL. + * + * @return string|bool + */ + public function getFragment() + { + return $this->fragment; + } + + /** + * @param string|bool $fragment + * + * @return void + */ + public function setFragment($fragment) + { + $this->fragment = $fragment; + } + + /** + * Returns the query string like an array as the variables would appear in + * $_GET in a PHP script. + * + * @return array + */ + public function getQueryVariables() + { + $pattern = '/[' . + preg_quote($this->getOption(self::OPTION_SEPARATOR_INPUT), '/') . + ']/'; + $parts = preg_split($pattern, $this->query, -1, PREG_SPLIT_NO_EMPTY); + $return = array(); + + foreach ($parts as $part) { + if (strpos($part, '=') !== false) { + list($key, $value) = explode('=', $part, 2); + } else { + $key = $part; + $value = null; + } + + if ($this->getOption(self::OPTION_ENCODE_KEYS)) { + $key = rawurldecode($key); + } + $value = rawurldecode($value); + + if ($this->getOption(self::OPTION_USE_BRACKETS) && + preg_match('#^(.*)\[([0-9a-z_-]*)\]#i', $key, $matches)) { + + $key = $matches[1]; + $idx = $matches[2]; + + // Ensure is an array + if (empty($return[$key]) || !is_array($return[$key])) { + $return[$key] = array(); + } + + // Add data + if ($idx === '') { + $return[$key][] = $value; + } else { + $return[$key][$idx] = $value; + } + } elseif (!$this->getOption(self::OPTION_USE_BRACKETS) + && !empty($return[$key]) + ) { + $return[$key] = (array) $return[$key]; + $return[$key][] = $value; + } else { + $return[$key] = $value; + } + } + + return $return; + } + + /** + * @param array $array (name => value) array + * + * @return void + */ + public function setQueryVariables(array $array) + { + if (!$array) { + $this->query = false; + } else { + foreach ($array as $name => $value) { + if ($this->getOption(self::OPTION_ENCODE_KEYS)) { + $name = rawurlencode($name); + } + + if (is_array($value)) { + foreach ($value as $k => $v) { + $parts[] = $this->getOption(self::OPTION_USE_BRACKETS) + ? sprintf('%s[%s]=%s', $name, $k, $v) + : ($name . '=' . $v); + } + } elseif (!is_null($value)) { + $parts[] = $name . '=' . $value; + } else { + $parts[] = $name; + } + } + $this->query = implode($this->getOption(self::OPTION_SEPARATOR_OUTPUT), + $parts); + } + } + + /** + * @param string $name + * @param mixed $value + * + * @return array + */ + public function setQueryVariable($name, $value) + { + $array = $this->getQueryVariables(); + $array[$name] = $value; + $this->setQueryVariables($array); + } + + /** + * @param string $name + * + * @return void + */ + public function unsetQueryVariable($name) + { + $array = $this->getQueryVariables(); + unset($array[$name]); + $this->setQueryVariables($array); + } + + /** + * Returns a string representation of this URL. + * + * @return string + */ + public function getURL() + { + // See RFC 3986, section 5.3 + $url = ""; + + if ($this->scheme !== false) { + $url .= $this->scheme . ':'; + } + + $authority = $this->getAuthority(); + if ($authority !== false) { + $url .= '//' . $authority; + } + $url .= $this->path; + + if ($this->query !== false) { + $url .= '?' . $this->query; + } + + if ($this->fragment !== false) { + $url .= '#' . $this->fragment; + } + + return $url; + } + + /** + * Returns a normalized string representation of this URL. This is useful + * for comparison of URLs. + * + * @return string + */ + public function getNormalizedURL() + { + $url = clone $this; + $url->normalize(); + return $url->getUrl(); + } + + /** + * Returns a normalized Net_URL2 instance. + * + * @return Net_URL2 + */ + public function normalize() + { + // See RFC 3886, section 6 + + // Schemes are case-insensitive + if ($this->scheme) { + $this->scheme = strtolower($this->scheme); + } + + // Hostnames are case-insensitive + if ($this->host) { + $this->host = strtolower($this->host); + } + + // Remove default port number for known schemes (RFC 3986, section 6.2.3) + if ($this->port && + $this->scheme && + $this->port == getservbyname($this->scheme, 'tcp')) { + + $this->port = false; + } + + // Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1) + foreach (array('userinfo', 'host', 'path') as $part) { + if ($this->$part) { + $this->$part = preg_replace('/%[0-9a-f]{2}/ie', 'strtoupper("\0")', $this->$part); + } + } + + // Path segment normalization (RFC 3986, section 6.2.2.3) + $this->path = self::removeDotSegments($this->path); + + // Scheme based normalization (RFC 3986, section 6.2.3) + if ($this->host && !$this->path) { + $this->path = '/'; + } + } + + /** + * Returns whether this instance represents an absolute URL. + * + * @return bool + */ + public function isAbsolute() + { + return (bool) $this->scheme; + } + + /** + * Returns an Net_URL2 instance representing an absolute URL relative to + * this URL. + * + * @param Net_URL2|string $reference relative URL + * + * @return Net_URL2 + */ + public function resolve($reference) + { + if (is_string($reference)) { + $reference = new self($reference); + } + if (!$this->isAbsolute()) { + throw new Exception('Base-URL must be absolute'); + } + + // A non-strict parser may ignore a scheme in the reference if it is + // identical to the base URI's scheme. + if (!$this->getOption(self::OPTION_STRICT) && $reference->scheme == $this->scheme) { + $reference->scheme = false; + } + + $target = new self(''); + if ($reference->scheme !== false) { + $target->scheme = $reference->scheme; + $target->setAuthority($reference->getAuthority()); + $target->path = self::removeDotSegments($reference->path); + $target->query = $reference->query; + } else { + $authority = $reference->getAuthority(); + if ($authority !== false) { + $target->setAuthority($authority); + $target->path = self::removeDotSegments($reference->path); + $target->query = $reference->query; + } else { + if ($reference->path == '') { + $target->path = $this->path; + if ($reference->query !== false) { + $target->query = $reference->query; + } else { + $target->query = $this->query; + } + } else { + if (substr($reference->path, 0, 1) == '/') { + $target->path = self::removeDotSegments($reference->path); + } else { + // Merge paths (RFC 3986, section 5.2.3) + if ($this->host !== false && $this->path == '') { + $target->path = '/' . $this->path; + } else { + $i = strrpos($this->path, '/'); + if ($i !== false) { + $target->path = substr($this->path, 0, $i + 1); + } + $target->path .= $reference->path; + } + $target->path = self::removeDotSegments($target->path); + } + $target->query = $reference->query; + } + $target->setAuthority($this->getAuthority()); + } + $target->scheme = $this->scheme; + } + + $target->fragment = $reference->fragment; + + return $target; + } + + /** + * Removes dots as described in RFC 3986, section 5.2.4, e.g. + * "/foo/../bar/baz" => "/bar/baz" + * + * @param string $path a path + * + * @return string a path + */ + private static function removeDotSegments($path) + { + $output = ''; + + // Make sure not to be trapped in an infinite loop due to a bug in this + // method + $j = 0; + while ($path && $j++ < 100) { + // Step A + if (substr($path, 0, 2) == './') { + $path = substr($path, 2); + } elseif (substr($path, 0, 3) == '../') { + $path = substr($path, 3); + + // Step B + } elseif (substr($path, 0, 3) == '/./' || $path == '/.') { + $path = '/' . substr($path, 3); + + // Step C + } elseif (substr($path, 0, 4) == '/../' || $path == '/..') { + $path = '/' . substr($path, 4); + $i = strrpos($output, '/'); + $output = $i === false ? '' : substr($output, 0, $i); + + // Step D + } elseif ($path == '.' || $path == '..') { + $path = ''; + + // Step E + } else { + $i = strpos($path, '/'); + if ($i === 0) { + $i = strpos($path, '/', 1); + } + if ($i === false) { + $i = strlen($path); + } + $output .= substr($path, 0, $i); + $path = substr($path, $i); + } + } + + return $output; + } + + /** + * Returns a Net_URL2 instance representing the canonical URL of the + * currently executing PHP script. + * + * @return string + */ + public static function getCanonical() + { + if (!isset($_SERVER['REQUEST_METHOD'])) { + // ALERT - no current URL + throw new Exception('Script was not called through a webserver'); + } + + // Begin with a relative URL + $url = new self($_SERVER['PHP_SELF']); + $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; + $url->host = $_SERVER['SERVER_NAME']; + $port = intval($_SERVER['SERVER_PORT']); + if ($url->scheme == 'http' && $port != 80 || + $url->scheme == 'https' && $port != 443) { + + $url->port = $port; + } + return $url; + } + + /** + * Returns the URL used to retrieve the current request. + * + * @return string + */ + public static function getRequestedURL() + { + return self::getRequested()->getUrl(); + } + + /** + * Returns a Net_URL2 instance representing the URL used to retrieve the + * current request. + * + * @return Net_URL2 + */ + public static function getRequested() + { + if (!isset($_SERVER['REQUEST_METHOD'])) { + // ALERT - no current URL + throw new Exception('Script was not called through a webserver'); + } + + // Begin with a relative URL + $url = new self($_SERVER['REQUEST_URI']); + $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; + // Set host and possibly port + $url->setAuthority($_SERVER['HTTP_HOST']); + return $url; + } + + /** + * Sets the specified option. + * + * @param string $optionName a self::OPTION_ constant + * @param mixed $value option value + * + * @return void + * @see self::OPTION_STRICT + * @see self::OPTION_USE_BRACKETS + * @see self::OPTION_ENCODE_KEYS + */ + function setOption($optionName, $value) + { + if (!array_key_exists($optionName, $this->options)) { + return false; + } + $this->options[$optionName] = $value; + } + + /** + * Returns the value of the specified option. + * + * @param string $optionName The name of the option to retrieve + * + * @return mixed + */ + function getOption($optionName) + { + return isset($this->options[$optionName]) + ? $this->options[$optionName] : false; + } +} |