diff options
Diffstat (limited to 'includes/libs')
34 files changed, 2046 insertions, 1132 deletions
diff --git a/includes/libs/BufferingStatsdDataFactory.php b/includes/libs/BufferingStatsdDataFactory.php index ea5b09dc..100d2a4e 100644 --- a/includes/libs/BufferingStatsdDataFactory.php +++ b/includes/libs/BufferingStatsdDataFactory.php @@ -20,6 +20,8 @@ * @file */ +use Liuggio\StatsdClient\Entity\StatsdData; +use Liuggio\StatsdClient\Entity\StatsdDataInterface; use Liuggio\StatsdClient\Factory\StatsdDataFactory; /** @@ -38,11 +40,28 @@ class BufferingStatsdDataFactory extends StatsdDataFactory { $this->prefix = $prefix; } - public function produceStatsdData( $key, $value = 1, $metric = self::STATSD_METRIC_COUNT ) { - $this->buffer[] = $entity = $this->produceStatsdDataEntity(); + /** + * Normalize a metric key for StatsD + * + * Replace occurences of '::' with dots and any other non-alphanumeric + * characters with underscores. Combine runs of dots or underscores. + * Then trim leading or trailing dots or underscores. + * + * @param string $key + * @since 1.26 + */ + private static function normalizeMetricKey( $key ) { + $key = preg_replace( '/[:.]+/', '.', $key ); + $key = preg_replace( '/[^a-z0-9.]+/i', '_', $key ); + $key = trim( $key, '_.' ); + return str_replace( array( '._', '_.' ), '.', $key ); + } + + public function produceStatsdData( $key, $value = 1, $metric = StatsdDataInterface::STATSD_METRIC_COUNT ) { + $entity = $this->produceStatsdDataEntity(); if ( $key !== null ) { - $prefixedKey = ltrim( $this->prefix . '.' . $key, '.' ); - $entity->setKey( $prefixedKey ); + $key = self::normalizeMetricKey( "{$this->prefix}.{$key}" ); + $entity->setKey( $key ); } if ( $value !== null ) { $entity->setValue( $value ); @@ -50,9 +69,16 @@ class BufferingStatsdDataFactory extends StatsdDataFactory { if ( $metric !== null ) { $entity->setMetric( $metric ); } + // Don't bother buffering a counter update with a delta of zero. + if ( !( $metric === StatsdDataInterface::STATSD_METRIC_COUNT && !$value ) ) { + $this->buffer[] = $entity; + } return $entity; } + /** + * @return StatsdData[] + */ public function getBuffer() { return $this->buffer; } diff --git a/includes/libs/CSSMin.php b/includes/libs/CSSMin.php index ffe26a96..5a8c4c7b 100644 --- a/includes/libs/CSSMin.php +++ b/includes/libs/CSSMin.php @@ -31,6 +31,9 @@ class CSSMin { /* Constants */ + /** @var string Strip marker for comments. **/ + const PLACEHOLDER = "\x7fPLACEHOLDER\x7f"; + /** * Internet Explorer data URI length limit. See encodeImageAsDataURI(). */ @@ -57,13 +60,15 @@ class CSSMin { /* Static Methods */ /** - * Gets a list of local file paths which are referenced in a CSS style sheet + * Gets a list of local file paths which are referenced in a CSS style sheet. * - * This function will always return an empty array if the second parameter is not given or null - * for backwards-compatibility. + * If you wish non-existent files to be listed too, use getAllLocalFileReferences(). * - * @param string $source CSS data to remap - * @param string $path File path where the source was read from (optional) + * For backwards-compatibility, if the second parameter is not given or null, + * this function will return an empty array instead of erroring out. + * + * @param string $source CSS stylesheet source to process + * @param string $path File path where the source was read from * @return array List of local file references */ public static function getLocalFileReferences( $source, $path = null ) { @@ -71,11 +76,31 @@ class CSSMin { return array(); } + $files = self::getAllLocalFileReferences( $source, $path ); + + // Skip non-existent files + $files = array_filter( $files, function ( $file ) { + return file_exists( $file ); + } ); + + return $files; + } + + /** + * Gets a list of local file paths which are referenced in a CSS style sheet, including + * non-existent files. + * + * @param string $source CSS stylesheet source to process + * @param string $path File path where the source was read from + * @return array List of local file references + */ + public static function getAllLocalFileReferences( $source, $path ) { + $stripped = preg_replace( '/' . self::COMMENT_REGEX . '/s', '', $source ); $path = rtrim( $path, '/' ) . '/'; $files = array(); $rFlags = PREG_OFFSET_CAPTURE | PREG_SET_ORDER; - if ( preg_match_all( '/' . self::URL_REGEX . '/', $source, $matches, $rFlags ) ) { + if ( preg_match_all( '/' . self::URL_REGEX . '/', $stripped, $matches, $rFlags ) ) { foreach ( $matches as $match ) { $url = $match['file'][0]; @@ -84,13 +109,7 @@ class CSSMin { break; } - $file = $path . $url; - // Skip non-existent files - if ( file_exists( $file ) ) { - break; - } - - $files[] = $file; + $files[] = $path . $url; } } return $files; @@ -232,19 +251,22 @@ class CSSMin { $remote = substr( $remote, 0, -1 ); } + // Disallow U+007F DELETE, which is illegal anyway, and which + // we use for comment placeholders. + $source = str_replace( "\x7f", "?", $source ); + // Replace all comments by a placeholder so they will not interfere with the remapping. // Warning: This will also catch on anything looking like the start of a comment between // quotation marks (e.g. "foo /* bar"). $comments = array(); - $placeholder = uniqid( '', true ); $pattern = '/(?!' . CSSMin::EMBED_REGEX . ')(' . CSSMin::COMMENT_REGEX . ')/s'; $source = preg_replace_callback( $pattern, - function ( $match ) use ( &$comments, $placeholder ) { + function ( $match ) use ( &$comments ) { $comments[] = $match[ 0 ]; - return $placeholder . ( count( $comments ) - 1 ) . 'x'; + return CSSMin::PLACEHOLDER . ( count( $comments ) - 1 ) . 'x'; }, $source ); @@ -257,13 +279,13 @@ class CSSMin { $source = preg_replace_callback( $pattern, - function ( $matchOuter ) use ( $local, $remote, $embedData, $placeholder ) { + function ( $matchOuter ) use ( $local, $remote, $embedData ) { $rule = $matchOuter[0]; // Check for global @embed comment and remove it. Allow other comments to be present // before @embed (they have been replaced with placeholders at this point). $embedAll = false; - $rule = preg_replace( '/^((?:\s+|' . $placeholder . '(\d+)x)*)' . CSSMin::EMBED_REGEX . '\s*/', '$1', $rule, 1, $embedAll ); + $rule = preg_replace( '/^((?:\s+|' . CSSMin::PLACEHOLDER . '(\d+)x)*)' . CSSMin::EMBED_REGEX . '\s*/', '$1', $rule, 1, $embedAll ); // Build two versions of current rule: with remapped URLs // and with embedded data: URIs (where possible). @@ -328,7 +350,7 @@ class CSSMin { }, $source ); // Re-insert comments - $pattern = '/' . $placeholder . '(\d+)x/'; + $pattern = '/' . CSSMin::PLACEHOLDER . '(\d+)x/'; $source = preg_replace_callback( $pattern, function( $match ) use ( &$comments ) { return $comments[ $match[1] ]; }, $source ); @@ -393,16 +415,16 @@ class CSSMin { if ( $local === false ) { // Assume that all paths are relative to $remote, and make them absolute - return $remote . '/' . $url; + $url = $remote . '/' . $url; } else { // We drop the query part here and instead make the path relative to $remote $url = "{$remote}/{$file}"; // Path to the actual file on the filesystem $localFile = "{$local}/{$file}"; if ( file_exists( $localFile ) ) { - // Add version parameter as a time-stamp in ISO 8601 format, - // using Z for the timezone, meaning GMT - $url .= '?' . gmdate( 'Y-m-d\TH:i:s\Z', round( filemtime( $localFile ), -2 ) ); + // Add version parameter as the first five hex digits + // of the MD5 hash of the file's contents. + $url .= '?' . substr( md5_file( $localFile ), 0, 5 ); if ( $embed ) { $data = self::encodeImageAsDataURI( $localFile ); if ( $data !== false ) { @@ -412,8 +434,11 @@ class CSSMin { } // If any of these conditions failed (file missing, we don't want to embed it // or it's not embeddable), return the URL (possibly with ?timestamp part) - return $url; } + if ( function_exists( 'wfRemoveDotSegments' ) ) { + $url = wfRemoveDotSegments( $url ); + } + return $url; } /** diff --git a/includes/libs/HashRing.php b/includes/libs/HashRing.php index 2022b225..e7a10997 100644 --- a/includes/libs/HashRing.php +++ b/includes/libs/HashRing.php @@ -223,8 +223,8 @@ class HashRing { * @return array List of locations * @throws UnexpectedValueException */ - public function getLiveLocations( $item ) { - return $this->getLiveRing()->getLocations( $item ); + public function getLiveLocations( $item, $limit ) { + return $this->getLiveRing()->getLocations( $item, $limit ); } /** diff --git a/includes/libs/HttpStatus.php b/includes/libs/HttpStatus.php index 809bfdf5..442298a6 100644 --- a/includes/libs/HttpStatus.php +++ b/includes/libs/HttpStatus.php @@ -26,11 +26,10 @@ class HttpStatus { /** - * Get the message associated with HTTP response code $code + * Get the message associated with an HTTP response status code * - * @param $code Integer: status code - * @return String or null: message or null if $code is not in the list of - * messages + * @param int $code Status code + * @return string|null Message, or null if $code is not known */ public static function getMessage( $code ) { static $statusMessage = array( @@ -88,4 +87,25 @@ class HttpStatus { return isset( $statusMessage[$code] ) ? $statusMessage[$code] : null; } + /** + * Output an HTTP status code header + * + * @since 1.26 + * @param int $code Status code + */ + public static function header( $code ) { + static $version = null; + $message = self::getMessage( $code ); + if ( $message === null ) { + trigger_error( "Unknown HTTP status code $code", E_USER_WARNING ); + return false; + } + + if ( $version === null ) { + $version = isset( $_SERVER['SERVER_PROTOCOL'] ) && $_SERVER['SERVER_PROTOCOL'] === 'HTTP/1.0' ? '1.0' : '1.1'; + } + + header( "HTTP/$version $code $message" ); + } + } diff --git a/includes/libs/IPSet.php b/includes/libs/IPSet.php deleted file mode 100644 index c1c841e6..00000000 --- a/includes/libs/IPSet.php +++ /dev/null @@ -1,276 +0,0 @@ -<?php -/** - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @author Brandon Black <blblack@gmail.com> - */ - -/** - * Matches IP addresses against a set of CIDR specifications - * - * Usage: - * // At startup, calculate the optimized data structure for the set: - * $ipset = new IPSet( $wgSquidServersNoPurge ); - * // runtime check against cached set (returns bool): - * $allowme = $ipset->match( $ip ); - * - * In rough benchmarking, this takes about 80% more time than - * in_array() checks on a short (a couple hundred at most) array - * of addresses. It's fast either way at those levels, though, - * and IPSet would scale better than in_array if the array were - * much larger. - * - * For mixed-family CIDR sets, however, this code gives well over - * 100x speedup vs iterating IP::isInRange() over an array - * of CIDR specs. - * - * The basic implementation is two separate binary trees - * (IPv4 and IPv6) as nested php arrays with keys named 0 and 1. - * The values false and true are terminal match-fail and match-success, - * otherwise the value is a deeper node in the tree. - * - * A simple depth-compression scheme is also implemented: whole-byte - * tree compression at whole-byte boundaries only, where no branching - * occurs during that whole byte of depth. A compressed node has - * keys 'comp' (the byte to compare) and 'next' (the next node to - * recurse into if 'comp' matched successfully). - * - * For example, given these inputs: - * 25.0.0.0/9 - * 25.192.0.0/10 - * - * The v4 tree would look like: - * root4 => array( - * 'comp' => 25, - * 'next' => array( - * 0 => true, - * 1 => array( - * 0 => false, - * 1 => true, - * ), - * ), - * ); - * - * (multi-byte compression nodes were attempted as well, but were - * a net loss in my test scenarios due to additional match complexity) - * - * @since 1.24 - */ -class IPSet { - /** @var array $root4: the root of the IPv4 matching tree */ - private $root4 = array( false, false ); - - /** @var array $root6: the root of the IPv6 matching tree */ - private $root6 = array( false, false ); - - /** - * __construct() instantiate the object from an array of CIDR specs - * - * @param array $cfg array of IPv[46] CIDR specs as strings - * @return IPSet new IPSet object - * - * Invalid input network/mask values in $cfg will result in issuing - * E_WARNING and/or E_USER_WARNING and the bad values being ignored. - */ - public function __construct( array $cfg ) { - foreach ( $cfg as $cidr ) { - $this->addCidr( $cidr ); - } - - self::recOptimize( $this->root4 ); - self::recCompress( $this->root4, 0, 24 ); - self::recOptimize( $this->root6 ); - self::recCompress( $this->root6, 0, 120 ); - } - - /** - * Add a single CIDR spec to the internal matching trees - * - * @param string $cidr string CIDR spec, IPv[46], optional /mask (def all-1's) - */ - private function addCidr( $cidr ) { - // v4 or v6 check - if ( strpos( $cidr, ':' ) === false ) { - $node =& $this->root4; - $defMask = '32'; - } else { - $node =& $this->root6; - $defMask = '128'; - } - - // Default to all-1's mask if no netmask in the input - if ( strpos( $cidr, '/' ) === false ) { - $net = $cidr; - $mask = $defMask; - } else { - list( $net, $mask ) = explode( '/', $cidr, 2 ); - if ( !ctype_digit( $mask ) || intval( $mask ) > $defMask ) { - trigger_error( "IPSet: Bad mask '$mask' from '$cidr', ignored", E_USER_WARNING ); - return; - } - } - $mask = intval( $mask ); // explicit integer convert, checked above - - // convert $net to an array of integer bytes, length 4 or 16: - $raw = inet_pton( $net ); - if ( $raw === false ) { - return; // inet_pton() sends an E_WARNING for us - } - $rawOrd = array_map( 'ord', str_split( $raw ) ); - - // special-case: zero mask overwrites the whole tree with a pair of terminal successes - if ( $mask == 0 ) { - $node = array( true, true ); - return; - } - - // iterate the bits of the address while walking the tree structure for inserts - $curBit = 0; - while ( 1 ) { - $maskShift = 7 - ( $curBit & 7 ); - $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift]; - ++$curBit; - if ( $node === true ) { - // already added a larger supernet, no need to go deeper - return; - } elseif ( $curBit == $mask ) { - // this may wipe out deeper subnets from earlier - $node = true; - return; - } elseif ( $node === false ) { - // create new subarray to go deeper - $node = array( false, false ); - } - } - } - - /** - * Match an IP address against the set - * - * @param string $ip string IPv[46] address - * @return bool true is match success, false is match failure - * - * If $ip is unparseable, inet_pton may issue an E_WARNING to that effect - */ - public function match( $ip ) { - $raw = inet_pton( $ip ); - if ( $raw === false ) { - return false; // inet_pton() sends an E_WARNING for us - } - - $rawOrd = array_map( 'ord', str_split( $raw ) ); - if ( count( $rawOrd ) == 4 ) { - $node =& $this->root4; - } else { - $node =& $this->root6; - } - - $curBit = 0; - while ( 1 ) { - if ( isset( $node['comp'] ) ) { - // compressed node, matches 1 whole byte on a byte boundary - if ( $rawOrd[$curBit >> 3] != $node['comp'] ) { - return false; - } - $curBit += 8; - $node =& $node['next']; - } else { - // uncompressed node, walk in the correct direction for the current bit-value - $maskShift = 7 - ( $curBit & 7 ); - $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift]; - ++$curBit; - } - - if ( $node === true || $node === false ) { - return $node; - } - } - } - - /** - * Recursively merges adjacent nets into larger supernets - * - * @param array &$node Tree node to optimize, by-reference - * - * e.g.: 8.0.0.0/8 + 9.0.0.0/8 -> 8.0.0.0/7 - */ - private static function recOptimize( &$node ) { - if ( $node[0] !== false && $node[0] !== true && self::recOptimize( $node[0] ) ) { - $node[0] = true; - } - if ( $node[1] !== false && $node[1] !== true && self::recOptimize( $node[1] ) ) { - $node[1] = true; - } - if ( $node[0] === true && $node[1] === true ) { - return true; - } - return false; - } - - /** - * Recursively compresses a tree - * - * @param array &$node Tree node to compress, by-reference - * @param integer $curBit current depth in the tree - * @param integer $maxCompStart maximum depth at which compression can start, family-specific - * - * This is a very simplistic compression scheme: if we go through a whole - * byte of address starting at a byte boundary with no real branching - * other than immediate false-vs-(node|true), compress that subtree down to a single - * byte-matching node. - * The $maxCompStart check elides recursing the final 7 levels of depth (family-dependent) - */ - private static function recCompress( &$node, $curBit, $maxCompStart ) { - if ( !( $curBit & 7 ) ) { // byte boundary, check for depth-8 single path(s) - $byte = 0; - $cnode =& $node; - $i = 8; - while ( $i-- ) { - if ( $cnode[0] === false ) { - $byte |= 1 << $i; - $cnode =& $cnode[1]; - } elseif ( $cnode[1] === false ) { - $cnode =& $cnode[0]; - } else { - // partial-byte branching, give up - break; - } - } - if ( $i == -1 ) { // means we did not exit the while() via break - $node = array( - 'comp' => $byte, - 'next' => &$cnode, - ); - $curBit += 8; - if ( $cnode !== true ) { - self::recCompress( $cnode, $curBit, $maxCompStart ); - } - return; - } - } - - ++$curBit; - if ( $curBit <= $maxCompStart ) { - if ( $node[0] !== false && $node[0] !== true ) { - self::recCompress( $node[0], $curBit, $maxCompStart ); - } - if ( $node[1] !== false && $node[1] !== true ) { - self::recCompress( $node[1], $curBit, $maxCompStart ); - } - } - } -} diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php index 2990782c..141a5153 100644 --- a/includes/libs/JavaScriptMinifier.php +++ b/includes/libs/JavaScriptMinifier.php @@ -565,6 +565,14 @@ class JavaScriptMinifier { $out .= ' '; $lineLength++; } + if ( + $type === self::TYPE_LITERAL + && ( $token === 'true' || $token === 'false' ) + && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION ) + && $last !== '.' + ) { + $token = ( $token === 'true' ) ? '!0' : '!1'; + } $out .= $token; $lineLength += $end - $pos; // += strlen( $token ) diff --git a/includes/libs/MapCacheLRU.php b/includes/libs/MapCacheLRU.php index 0b6db32e..a0230bee 100644 --- a/includes/libs/MapCacheLRU.php +++ b/includes/libs/MapCacheLRU.php @@ -20,6 +20,7 @@ * @file * @ingroup Cache */ +use Wikimedia\Assert\Assert; /** * Handles a simple LRU key/value map with a maximum number of entries @@ -41,9 +42,9 @@ class MapCacheLRU { * @throws Exception When $maxCacheKeys is not an int or =< 0. */ public function __construct( $maxKeys ) { - if ( !is_int( $maxKeys ) || $maxKeys < 1 ) { - throw new Exception( __METHOD__ . " must be given an integer and >= 1" ); - } + Assert::parameterType( 'integer', $maxKeys, '$maxKeys' ); + Assert::parameter( $maxKeys >= 1, '$maxKeys', 'must be >= 1' ); + $this->maxCacheKeys = $maxKeys; } diff --git a/includes/libs/MultiHttpClient.php b/includes/libs/MultiHttpClient.php index fb2daa69..5555cbcb 100644 --- a/includes/libs/MultiHttpClient.php +++ b/includes/libs/MultiHttpClient.php @@ -58,8 +58,8 @@ class MultiHttpClient { /** * @param array $options - * - connTimeout : default connection timeout - * - reqTimeout : default request timeout + * - connTimeout : default connection timeout (seconds) + * - reqTimeout : default request timeout (seconds) * - proxy : HTTP proxy to use * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) * - maxConnsPerHost : maximum number of concurrent connections (per host) @@ -72,7 +72,9 @@ class MultiHttpClient { throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath ); } } - static $opts = array( 'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost', 'proxy' ); + static $opts = array( + 'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost', 'proxy' + ); foreach ( $opts as $key ) { if ( isset( $options[$key] ) ) { $this->$key = $options[$key]; @@ -84,19 +86,19 @@ class MultiHttpClient { * Execute an HTTP(S) request * * This method returns a response map of: - * - code : HTTP response code or 0 if there was a serious cURL error - * - reason : HTTP response reason (empty if there was a serious cURL error) - * - headers : <header name/value associative array> - * - body : HTTP response body or resource (if "stream" was set) + * - code : HTTP response code or 0 if there was a serious cURL error + * - reason : HTTP response reason (empty if there was a serious cURL error) + * - headers : <header name/value associative array> + * - body : HTTP response body or resource (if "stream" was set) * - error : Any cURL error string - * The map also stores integer-indexed copies of these values. This lets callers do: - * <code> + * The map also stores integer-indexed copies of these values. This lets callers do: + * @code * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req ); - * </code> + * @endcode * @param array $req HTTP request array * @param array $opts - * - connTimeout : connection timeout per request - * - reqTimeout : post-connection timeout per request + * - connTimeout : connection timeout per request (seconds) + * - reqTimeout : post-connection timeout per request (seconds) * @return array Response array for request */ final public function run( array $req, array $opts = array() ) { @@ -114,17 +116,17 @@ class MultiHttpClient { * - body : HTTP response body or resource (if "stream" was set) * - error : Any cURL error string * The map also stores integer-indexed copies of these values. This lets callers do: - * <code> + * @code * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $req['response']; - * </code> + * @endcode * All headers in the 'headers' field are normalized to use lower case names. * This is true for the request headers and the response headers. Integer-indexed * method/URL entries will also be changed to use the corresponding string keys. * * @param array $reqs Map of HTTP request arrays * @param array $opts - * - connTimeout : connection timeout per request - * - reqTimeout : post-connection timeout per request + * - connTimeout : connection timeout per request (seconds) + * - reqTimeout : post-connection timeout per request (seconds) * - usePipelining : whether to use HTTP pipelining if possible * - maxConnsPerHost : maximum number of concurrent connections (per host) * @return array $reqs With response array populated for each @@ -189,6 +191,7 @@ class MultiHttpClient { // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS) $batches = array_chunk( $indexes, $this->maxConnsPerHost ); + $infos = array(); foreach ( $batches as $batch ) { // Attach all cURL handles for this batch @@ -201,6 +204,10 @@ class MultiHttpClient { // Do any available work... do { $mrc = curl_multi_exec( $chm, $active ); + $info = curl_multi_info_read( $chm ); + if ( $info !== false ) { + $infos[(int)$info['handle']] = $info; + } } while ( $mrc == CURLM_CALL_MULTI_PERFORM ); // Wait (if possible) for available work... if ( $active > 0 && $mrc == CURLM_OK ) { @@ -216,10 +223,20 @@ class MultiHttpClient { foreach ( $reqs as $index => &$req ) { $ch = $handles[$index]; curl_multi_remove_handle( $chm, $ch ); - if ( curl_errno( $ch ) !== 0 ) { - $req['response']['error'] = "(curl error: " . - curl_errno( $ch ) . ") " . curl_error( $ch ); + + if ( isset( $infos[(int)$ch] ) ) { + $info = $infos[(int)$ch]; + $errno = $info['result']; + if ( $errno !== 0 ) { + $req['response']['error'] = "(curl error: $errno)"; + if ( function_exists( 'curl_strerror' ) ) { + $req['response']['error'] .= " " . curl_strerror( $errno ); + } + } + } else { + $req['response']['error'] = "(curl error: no status set)"; } + // For convenience with the list() operator $req['response'][0] = $req['response']['code']; $req['response'][1] = $req['response']['reason']; @@ -318,6 +335,19 @@ class MultiHttpClient { ); } elseif ( $req['method'] === 'POST' ) { curl_setopt( $ch, CURLOPT_POST, 1 ); + // Don't interpret POST parameters starting with '@' as file uploads, because this + // makes it impossible to POST plain values starting with '@' (and causes security + // issues potentially exposing the contents of local files). + // The PHP manual says this option was introduced in PHP 5.5 defaults to true in PHP 5.6, + // but we support lower versions, and the option doesn't exist in HHVM 5.6.99. + if ( defined( 'CURLOPT_SAFE_UPLOAD' ) ) { + curl_setopt( $ch, CURLOPT_SAFE_UPLOAD, true ); + } else if ( is_array( $req['body'] ) ) { + // In PHP 5.2 and later, '@' is interpreted as a file upload if POSTFIELDS + // is an array, but not if it's a string. So convert $req['body'] to a string + // for safety. + $req['body'] = wfArrayToCgi( $req['body'] ); + } curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] ); } else { if ( is_resource( $req['body'] ) || $req['body'] !== '' ) { diff --git a/includes/libs/ObjectFactory.php b/includes/libs/ObjectFactory.php index ec8c36a1..1cb544b8 100644 --- a/includes/libs/ObjectFactory.php +++ b/includes/libs/ObjectFactory.php @@ -49,6 +49,13 @@ class ObjectFactory { * constructor/callable. This behavior can be suppressed by adding * closure_expansion => false to the specification. * + * The specification may also contain a 'calls' key that describes method + * calls to make on the newly created object before returning it. This + * pattern is often known as "setter injection". The value of this key is + * expected to be an associative array with method names as keys and + * argument lists as values. The argument list will be expanded (or not) + * in the same way as the 'args' key for the main object. + * * @param array $spec Object specification * @return object * @throws InvalidArgumentException when object specification does not @@ -58,18 +65,11 @@ class ObjectFactory { */ public static function getObjectFromSpec( $spec ) { $args = isset( $spec['args'] ) ? $spec['args'] : array(); + $expandArgs = !isset( $spec['closure_expansion'] ) || + $spec['closure_expansion'] === true; - if ( !isset( $spec['closure_expansion'] ) || - $spec['closure_expansion'] === true - ) { - $args = array_map( function ( $value ) { - if ( is_object( $value ) && $value instanceof Closure ) { - // If an argument is a Closure, call it. - return $value(); - } else { - return $value; - } - }, $args ); + if ( $expandArgs ) { + $args = static::expandClosures( $args ); } if ( isset( $spec['class'] ) ) { @@ -88,6 +88,33 @@ class ObjectFactory { ); } + if ( isset( $spec['calls'] ) && is_array( $spec['calls'] ) ) { + // Call additional methods on the newly created object + foreach ( $spec['calls'] as $method => $margs ) { + if ( $expandArgs ) { + $margs = static::expandClosures( $margs ); + } + call_user_func_array( array( $obj, $method ), $margs ); + } + } + return $obj; } + + /** + * Iterate a list and call any closures it contains. + * + * @param array $list List of things + * @return array List with any Closures replaced with their output + */ + protected static function expandClosures( $list ) { + return array_map( function ( $value ) { + if ( is_object( $value ) && $value instanceof Closure ) { + // If $value is a Closure, call it. + return $value(); + } else { + return $value; + } + }, $list ); + } } diff --git a/includes/libs/ProcessCacheLRU.php b/includes/libs/ProcessCacheLRU.php index 8d80eb38..b55ff9da 100644 --- a/includes/libs/ProcessCacheLRU.php +++ b/includes/libs/ProcessCacheLRU.php @@ -20,6 +20,7 @@ * @file * @ingroup Cache */ +use Wikimedia\Assert\Assert; /** * Handles per process caching of items @@ -128,9 +129,9 @@ class ProcessCacheLRU { * @throws UnexpectedValueException */ public function resize( $maxKeys ) { - if ( !is_int( $maxKeys ) || $maxKeys < 1 ) { - throw new UnexpectedValueException( __METHOD__ . " must be given an integer >= 1" ); - } + Assert::parameterType( 'integer', $maxKeys, '$maxKeys' ); + Assert::parameter( $maxKeys >= 1, '$maxKeys', 'must be >= 1' ); + $this->maxCacheKeys = $maxKeys; while ( count( $this->cache ) > $this->maxCacheKeys ) { reset( $this->cache ); diff --git a/includes/libs/ReplacementArray.php b/includes/libs/ReplacementArray.php index 7fdb3093..b6faa378 100644 --- a/includes/libs/ReplacementArray.php +++ b/includes/libs/ReplacementArray.php @@ -76,7 +76,7 @@ class ReplacementArray { * @param array $data */ public function mergeArray( $data ) { - $this->data = array_merge( $this->data, $data ); + $this->data = $data + $this->data; $this->fss = false; } @@ -84,7 +84,7 @@ class ReplacementArray { * @param ReplacementArray $other */ public function merge( ReplacementArray $other ) { - $this->data = array_merge( $this->data, $other->data ); + $this->data = $other->data + $this->data; $this->fss = false; } @@ -111,7 +111,10 @@ class ReplacementArray { * @return string */ public function replace( $subject ) { - if ( function_exists( 'fss_prep_replace' ) ) { + if ( + function_exists( 'fss_prep_replace' ) && + version_compare( PHP_VERSION, '5.5.0' ) < 0 + ) { if ( $this->fss === false ) { $this->fss = fss_prep_replace( $this->data ); } diff --git a/includes/libs/RiffExtractor.php b/includes/libs/RiffExtractor.php new file mode 100644 index 00000000..f987c59d --- /dev/null +++ b/includes/libs/RiffExtractor.php @@ -0,0 +1,100 @@ +<?php +/** + * Extractor for the Resource Interchange File Format + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @author Bryan Tong Minh + * @ingroup Media + */ + +class RiffExtractor { + public static function findChunksFromFile( $filename, $maxChunks = -1 ) { + $file = fopen( $filename, 'rb' ); + $info = self::findChunks( $file, $maxChunks ); + fclose( $file ); + return $info; + } + + public static function findChunks( $file, $maxChunks = -1 ) { + $riff = fread( $file, 4 ); + if ( $riff !== 'RIFF' ) { + return false; + } + + // Next four bytes are fileSize + $fileSize = fread( $file, 4 ); + if ( !$fileSize || strlen( $fileSize ) != 4 ) { + return false; + } + + // Next four bytes are the FourCC + $fourCC = fread( $file, 4 ); + if ( !$fourCC || strlen( $fourCC ) != 4 ) { + return false; + } + + // Create basic info structure + $info = array( + 'fileSize' => self::extractUInt32( $fileSize ), + 'fourCC' => $fourCC, + 'chunks' => array(), + ); + $numberOfChunks = 0; + + // Find out the chunks + while ( !feof( $file ) && !( $numberOfChunks >= $maxChunks && $maxChunks >= 0 ) ) { + $chunkStart = ftell( $file ); + + $chunkFourCC = fread( $file, 4 ); + if ( !$chunkFourCC || strlen( $chunkFourCC ) != 4 ) { + return $info; + } + + $chunkSize = fread( $file, 4 ); + if ( !$chunkSize || strlen( $chunkSize ) != 4 ) { + return $info; + } + $intChunkSize = self::extractUInt32( $chunkSize ); + + // Add chunk info to the info structure + $info['chunks'][] = array( + 'fourCC' => $chunkFourCC, + 'start' => $chunkStart, + 'size' => $intChunkSize + ); + + // Uneven chunks have padding bytes + $padding = $intChunkSize % 2; + // Seek to the next chunk + fseek( $file, $intChunkSize + $padding, SEEK_CUR ); + + } + + return $info; + } + + /** + * Extract a little-endian uint32 from a 4 byte string + * @param string $string 4-byte string + * @return int + */ + public static function extractUInt32( $string ) { + $unpacked = unpack( 'V', $string ); + return $unpacked[1]; + } +}; diff --git a/includes/libs/SamplingStatsdClient.php b/includes/libs/SamplingStatsdClient.php new file mode 100644 index 00000000..f7afdb5e --- /dev/null +++ b/includes/libs/SamplingStatsdClient.php @@ -0,0 +1,133 @@ +<?php +/** + * Copyright 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use Liuggio\StatsdClient\StatsdClient; +use Liuggio\StatsdClient\Entity\StatsdData; +use Liuggio\StatsdClient\Entity\StatsdDataInterface; + +/** + * A statsd client that applies the sampling rate to the data items before sending them. + * + * @since 1.26 + */ +class SamplingStatsdClient extends StatsdClient { + /** + * Sets sampling rate for all items in $data. + * The sample rate specified in a StatsdData entity overrides the sample rate specified here. + * + * {@inheritDoc} + */ + public function appendSampleRate( $data, $sampleRate = 1 ) { + if ( $sampleRate < 1 ) { + array_walk( $data, function( $item ) use ( $sampleRate ) { + /** @var $item StatsdData */ + if ( $item->getSampleRate() === 1 ) { + $item->setSampleRate( $sampleRate ); + } + } ); + } + + return $data; + } + + /** + * Sample the metrics according to their sample rate and send the remaining ones. + * + * {@inheritDoc} + */ + public function send( $data, $sampleRate = 1 ) { + if ( !is_array( $data ) ) { + $data = array( $data ); + } + if ( !$data ) { + return; + } + foreach ( $data as $item ) { + if ( !( $item instanceof StatsdDataInterface ) ) { + throw new InvalidArgumentException( + 'SamplingStatsdClient does not accept stringified messages' ); + } + } + + // add sampling + if ( $sampleRate < 1 ) { + $data = $this->appendSampleRate( $data, $sampleRate ); + } + $data = $this->sampleData( $data ); + + $messages = array_map( 'strval', $data ); + + // reduce number of packets + if ( $this->getReducePacket() ) { + $data = $this->reduceCount( $data ); + } + //failures in any of this should be silently ignored if .. + $written = 0; + try { + $fp = $this->getSender()->open(); + if ( !$fp ) { + return; + } + foreach ( $messages as $message ) { + $written += $this->getSender()->write( $fp, $message ); + } + $this->getSender()->close( $fp ); + } catch ( Exception $e ) { + $this->throwException( $e ); + } + + return $written; + } + + /** + * Throw away some of the data according to the sample rate. + * @param StatsdDataInterface[] $data + * @return array + * @throws LogicException + */ + protected function sampleData( $data ) { + $newData = array(); + $mt_rand_max = mt_getrandmax(); + foreach ( $data as $item ) { + $samplingRate = $item->getSampleRate(); + if ( $samplingRate <= 0.0 || $samplingRate > 1.0 ) { + throw new LogicException( 'Sampling rate shall be within ]0, 1]' ); + } + if ( + $samplingRate === 1 || + ( mt_rand() / $mt_rand_max <= $samplingRate ) + ) { + $newData[] = $item; + } + } + return $newData; + } + + /** + * {@inheritDoc} + */ + protected function throwException( Exception $exception ) { + if ( !$this->getFailSilently() ) { + throw $exception; + } + } +} diff --git a/includes/libs/ScopedPHPTimeout.php b/includes/libs/ScopedPHPTimeout.php deleted file mode 100644 index d1493c30..00000000 --- a/includes/libs/ScopedPHPTimeout.php +++ /dev/null @@ -1,84 +0,0 @@ -<?php -/** - * Expansion of the PHP execution time limit feature for a function call. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - */ - -/** - * Class to expand PHP execution time for a function call. - * Use this when performing changes that should not be interrupted. - * - * On construction, set_time_limit() is called and set to $seconds. - * If the client aborts the connection, PHP will continue to run. - * When the object goes out of scope, the timer is restarted, with - * the original time limit minus the time the object existed. - */ -class ScopedPHPTimeout { - protected $startTime; // float; seconds - protected $oldTimeout; // integer; seconds - protected $oldIgnoreAbort; // boolean - - protected static $stackDepth = 0; // integer - protected static $totalCalls = 0; // integer - protected static $totalElapsed = 0; // float; seconds - - /* Prevent callers in infinite loops from running forever */ - const MAX_TOTAL_CALLS = 1000000; - const MAX_TOTAL_TIME = 300; // seconds - - /** - * @param $seconds integer - */ - public function __construct( $seconds ) { - if ( ini_get( 'max_execution_time' ) > 0 ) { // CLI uses 0 - if ( self::$totalCalls >= self::MAX_TOTAL_CALLS ) { - trigger_error( "Maximum invocations of " . __CLASS__ . " exceeded." ); - } elseif ( self::$totalElapsed >= self::MAX_TOTAL_TIME ) { - trigger_error( "Time limit within invocations of " . __CLASS__ . " exceeded." ); - } elseif ( self::$stackDepth > 0 ) { // recursion guard - trigger_error( "Resursive invocation of " . __CLASS__ . " attempted." ); - } else { - $this->oldIgnoreAbort = ignore_user_abort( true ); - $this->oldTimeout = ini_set( 'max_execution_time', $seconds ); - $this->startTime = microtime( true ); - ++self::$stackDepth; - ++self::$totalCalls; // proof against < 1us scopes - } - } - } - - /** - * Restore the original timeout. - * This does not account for the timer value on __construct(). - */ - public function __destruct() { - if ( $this->oldTimeout ) { - $elapsed = microtime( true ) - $this->startTime; - // Note: a limit of 0 is treated as "forever" - set_time_limit( max( 1, $this->oldTimeout - (int)$elapsed ) ); - // If each scoped timeout is for less than one second, we end up - // restoring the original timeout without any decrease in value. - // Thus web scripts in an infinite loop can run forever unless we - // take some measures to prevent this. Track total time and calls. - self::$totalElapsed += $elapsed; - --self::$stackDepth; - ignore_user_abort( $this->oldIgnoreAbort ); - } - } -} diff --git a/includes/libs/XmlTypeCheck.php b/includes/libs/XmlTypeCheck.php index 6d01986d..34afb689 100644 --- a/includes/libs/XmlTypeCheck.php +++ b/includes/libs/XmlTypeCheck.php @@ -39,6 +39,13 @@ class XmlTypeCheck { public $filterMatch = false; /** + * Will contain the type of filter hit if the optional element filter returned + * a match at some point. + * @var mixed + */ + public $filterMatchType = false; + + /** * Name of the document's root element, including any namespace * as an expanded URL. */ @@ -173,7 +180,7 @@ class XmlTypeCheck { // First, move through anything that isn't an element, and // handle any processing instructions with the callback do { - if( !$this->readNext( $reader ) ) { + if ( !$this->readNext( $reader ) ) { // Hit the end of the document before any elements $this->wellFormed = false; return; @@ -294,17 +301,20 @@ class XmlTypeCheck { list( $name, $attribs ) = array_pop( $this->elementDataContext ); $data = array_pop( $this->elementData ); $this->stackDepth--; + $callbackReturn = false; - if ( is_callable( $this->filterCallback ) - && call_user_func( + if ( is_callable( $this->filterCallback ) ) { + $callbackReturn = call_user_func( $this->filterCallback, $name, $attribs, $data - ) - ) { - // Filter hit + ); + } + if ( $callbackReturn ) { + // Filter hit! $this->filterMatch = true; + $this->filterMatchType = $callbackReturn; } } @@ -321,15 +331,18 @@ class XmlTypeCheck { * @param $data */ private function processingInstructionHandler( $target, $data ) { + $callbackReturn = false; if ( $this->parserOptions['processing_instruction_handler'] ) { - if ( call_user_func( + $callbackReturn = call_user_func( $this->parserOptions['processing_instruction_handler'], $target, $data - ) ) { - // Filter hit! - $this->filterMatch = true; - } + ); + } + if ( $callbackReturn ) { + // Filter hit! + $this->filterMatch = true; + $this->filterMatchType = $callbackReturn; } } } diff --git a/includes/libs/composer/ComposerLock.php b/includes/libs/composer/ComposerLock.php index 9c7bf2f9..22c33191 100644 --- a/includes/libs/composer/ComposerLock.php +++ b/includes/libs/composer/ComposerLock.php @@ -30,6 +30,9 @@ class ComposerLock { $deps[$installed['name']] = array( 'version' => ComposerJson::normalizeVersion( $installed['version'] ), 'type' => $installed['type'], + 'licenses' => isset( $installed['license'] ) ? $installed['license'] : array(), + 'authors' => isset( $installed['authors'] ) ? $installed['authors'] : array(), + 'description' => isset( $installed['description'] ) ? $installed['description']: '', ); } diff --git a/includes/libs/eventrelayer/EventRelayer.php b/includes/libs/eventrelayer/EventRelayer.php new file mode 100644 index 00000000..f95ba3f0 --- /dev/null +++ b/includes/libs/eventrelayer/EventRelayer.php @@ -0,0 +1,65 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @author Aaron Schulz + */ + +/** + * Base class for reliable event relays + */ +abstract class EventRelayer { + /** + * @param array $params + */ + public function __construct( array $params ) { + } + + /** + * @param string $channel + * @param array $event Event data map + * @return bool Success + */ + final public function notify( $channel, $event ) { + return $this->doNotify( $channel, array( $event ) ); + } + + /** + * @param string $channel + * @param array $events List of event data maps + * @return bool Success + */ + final public function notifyMulti( $channel, $events ) { + return $this->doNotify( $channel, $events ); + } + + /** + * @param string $channel + * @param array $events List of event data maps + * @return bool Success + */ + abstract protected function doNotify( $channel, array $events ); +} + +/** + * No-op class for publishing messages into a PubSub system + */ +class EventRelayerNull extends EventRelayer { + public function doNotify( $channel, array $events ) { + return true; + } +} diff --git a/includes/libs/eventrelayer/EventRelayerMCRD.php b/includes/libs/eventrelayer/EventRelayerMCRD.php new file mode 100644 index 00000000..1e8b2a40 --- /dev/null +++ b/includes/libs/eventrelayer/EventRelayerMCRD.php @@ -0,0 +1,66 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @author Aaron Schulz + */ + +/** + * Relayed that uses the mcrelaypushd server + */ +class EventRelayerMCRD extends EventRelayer { + /** @var MultiHttpClient */ + protected $http; + /** @var string */ + protected $baseUrl; + + /** + * Additional params include 'mcrdConfig', which is a map of: + * - url : The base URL of the service (without paths) + * @param array $params + */ + public function __construct( array $params ) { + parent::__construct( $params ); + + $this->baseUrl = $params['mcrdConfig']['url']; + + $httpConfig = isset( $params['httpConfig'] ) ? $params['httpConfig'] : array(); + if ( !isset( $httpConfig['connTimeout'] ) ) { + $httpConfig['connTimeout'] = 1; + } + if ( !isset( $httpConfig['reqTimeout'] ) ) { + $httpConfig['reqTimeout'] = .25; + } + + $this->http = new MultiHttpClient( $httpConfig ); + } + + protected function doNotify( $channel, array $events ) { + if ( !count( $events ) ) { + return true; + } + + $response = $this->http->run( array( + 'url' => "{$this->baseUrl}/relayer/api/v1.0/" . rawurlencode( $channel ), + 'method' => 'POST', + 'body' => json_encode( array( 'events' => $events ) ), + 'headers' => array( 'content-type' => 'application/json' ) + ) ); + + return $response['code'] == 201; + } +} diff --git a/includes/libs/normal/UtfNormal.php b/includes/libs/normal/UtfNormal.php deleted file mode 100644 index c9c05a07..00000000 --- a/includes/libs/normal/UtfNormal.php +++ /dev/null @@ -1,129 +0,0 @@ -<?php -/** - * Unicode normalization routines - * - * Copyright © 2004 Brion Vibber <brion@pobox.com> - * https://www.mediawiki.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup UtfNormal - */ - -/** - * @defgroup UtfNormal UtfNormal - */ - -use UtfNormal\Validator; - -/** - * Unicode normalization routines for working with UTF-8 strings. - * Currently assumes that input strings are valid UTF-8! - * - * Not as fast as I'd like, but should be usable for most purposes. - * UtfNormal::toNFC() will bail early if given ASCII text or text - * it can quickly determine is already normalized. - * - * All functions can be called static. - * - * See description of forms at http://www.unicode.org/reports/tr15/ - * - * @deprecated since 1.25, use UtfNormal\Validator directly - * @ingroup UtfNormal - */ -class UtfNormal { - /** - * The ultimate convenience function! Clean up invalid UTF-8 sequences, - * and convert to normal form C, canonical composition. - * - * Fast return for pure ASCII strings; some lesser optimizations for - * strings containing only known-good characters. Not as fast as toNFC(). - * - * @param string $string a UTF-8 string - * @return string a clean, shiny, normalized UTF-8 string - */ - static function cleanUp( $string ) { - return Validator::cleanUp( $string ); - } - - /** - * Convert a UTF-8 string to normal form C, canonical composition. - * Fast return for pure ASCII strings; some lesser optimizations for - * strings containing only known-good characters. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form C - */ - static function toNFC( $string ) { - return Validator::toNFC( $string ); - } - - /** - * Convert a UTF-8 string to normal form D, canonical decomposition. - * Fast return for pure ASCII strings. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form D - */ - static function toNFD( $string ) { - return Validator::toNFD( $string ); - } - - /** - * Convert a UTF-8 string to normal form KC, compatibility composition. - * This may cause irreversible information loss, use judiciously. - * Fast return for pure ASCII strings. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form KC - */ - static function toNFKC( $string ) { - return Validator::toNFKC( $string ); - } - - /** - * Convert a UTF-8 string to normal form KD, compatibility decomposition. - * This may cause irreversible information loss, use judiciously. - * Fast return for pure ASCII strings. - * - * @param string $string a valid UTF-8 string. Input is not validated. - * @return string a UTF-8 string in normal form KD - */ - static function toNFKD( $string ) { - return Validator::toNFKD( $string ); - } - - /** - * Returns true if the string is _definitely_ in NFC. - * Returns false if not or uncertain. - * @param string $string a valid UTF-8 string. Input is not validated. - * @return bool - */ - static function quickIsNFC( $string ) { - return Validator::quickIsNFC( $string ); - } - - /** - * Returns true if the string is _definitely_ in NFC. - * Returns false if not or uncertain. - * @param string $string a UTF-8 string, altered on output to be valid UTF-8 safe for XML. - * @return bool - */ - static function quickIsNFCVerify( &$string ) { - return Validator::quickIsNFCVerify( $string ); - } -} diff --git a/includes/libs/normal/UtfNormalDefines.php b/includes/libs/normal/UtfNormalDefines.php deleted file mode 100644 index b8e44c77..00000000 --- a/includes/libs/normal/UtfNormalDefines.php +++ /dev/null @@ -1,186 +0,0 @@ -<?php -/** - * Backwards-compatability constants which are now provided by the - * UtfNormal library. They are hardcoded here since they are needed - * before the composer autoloader is initialized. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup UtfNormal - */ - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_FIRST', 0xac00 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LAST', 0xd7a3 ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LBASE', 0x1100 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_VBASE', 0x1161 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_TBASE', 0x11a7 ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LCOUNT', 19 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_VCOUNT', 21 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_TCOUNT', 28 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_NCOUNT', UNICODE_HANGUL_VCOUNT * UNICODE_HANGUL_TCOUNT ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_LEND', UNICODE_HANGUL_LBASE + UNICODE_HANGUL_LCOUNT - 1 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_VEND', UNICODE_HANGUL_VBASE + UNICODE_HANGUL_VCOUNT - 1 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_HANGUL_TEND', UNICODE_HANGUL_TBASE + UNICODE_HANGUL_TCOUNT - 1 ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_SURROGATE_FIRST', 0xd800 ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_SURROGATE_LAST', 0xdfff ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_MAX', 0x10ffff ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UNICODE_REPLACEMENT', 0xfffd ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_FIRST', "\xea\xb0\x80" /*codepointToUtf8( UNICODE_HANGUL_FIRST )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_LAST', "\xed\x9e\xa3" /*codepointToUtf8( UNICODE_HANGUL_LAST )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_LBASE', "\xe1\x84\x80" /*codepointToUtf8( UNICODE_HANGUL_LBASE )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_VBASE', "\xe1\x85\xa1" /*codepointToUtf8( UNICODE_HANGUL_VBASE )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_TBASE', "\xe1\x86\xa7" /*codepointToUtf8( UNICODE_HANGUL_TBASE )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_LEND', "\xe1\x84\x92" /*codepointToUtf8( UNICODE_HANGUL_LEND )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_VEND', "\xe1\x85\xb5" /*codepointToUtf8( UNICODE_HANGUL_VEND )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HANGUL_TEND', "\xe1\x87\x82" /*codepointToUtf8( UNICODE_HANGUL_TEND )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_SURROGATE_FIRST', "\xed\xa0\x80" /*codepointToUtf8( UNICODE_SURROGATE_FIRST )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_SURROGATE_LAST', "\xed\xbf\xbf" /*codepointToUtf8( UNICODE_SURROGATE_LAST )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_MAX', "\xf4\x8f\xbf\xbf" /*codepointToUtf8( UNICODE_MAX )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_REPLACEMENT', "\xef\xbf\xbd" /*codepointToUtf8( UNICODE_REPLACEMENT )*/ ); -#define( 'UTF8_REPLACEMENT', '!' ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_OVERLONG_A', "\xc1\xbf" ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_OVERLONG_B', "\xe0\x9f\xbf" ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_OVERLONG_C', "\xf0\x8f\xbf\xbf" ); - -# These two ranges are illegal -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FDD0', "\xef\xb7\x90" /*codepointToUtf8( 0xfdd0 )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FDEF', "\xef\xb7\xaf" /*codepointToUtf8( 0xfdef )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FFFE', "\xef\xbf\xbe" /*codepointToUtf8( 0xfffe )*/ ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_FFFF', "\xef\xbf\xbf" /*codepointToUtf8( 0xffff )*/ ); - -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_HEAD', false ); -/** - * @deprecated since 1.25, use UtfNormal\Constants instead - */ -define( 'UTF8_TAIL', true ); diff --git a/includes/libs/normal/UtfNormalUtil.php b/includes/libs/normal/UtfNormalUtil.php deleted file mode 100644 index ad9a2b9a..00000000 --- a/includes/libs/normal/UtfNormalUtil.php +++ /dev/null @@ -1,99 +0,0 @@ -<?php -/** - * Some of these functions are adapted from places in MediaWiki. - * Should probably merge them for consistency. - * - * Copyright © 2004 Brion Vibber <brion@pobox.com> - * https://www.mediawiki.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup UtfNormal - */ - - -use UtfNormal\Utils; -/** - * Return UTF-8 sequence for a given Unicode code point. - * - * @param $codepoint Integer: - * @return String - * @throws InvalidArgumentException if fed out of range data. - * @public - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function codepointToUtf8( $codepoint ) { - return Utils::codepointToUtf8( $codepoint ); -} - -/** - * Take a series of space-separated hexadecimal numbers representing - * Unicode code points and return a UTF-8 string composed of those - * characters. Used by UTF-8 data generation and testing routines. - * - * @param $sequence String - * @return String - * @throws InvalidArgumentException if fed out of range data. - * @private - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function hexSequenceToUtf8( $sequence ) { - return Utils::hexSequenceToUtf8( $sequence ); -} - -/** - * Take a UTF-8 string and return a space-separated series of hex - * numbers representing Unicode code points. For debugging. - * - * @fixme this is private but extensions + maint scripts are using it - * @param string $str UTF-8 string. - * @return string - * @private - */ -function utf8ToHexSequence( $str ) { - $buf = ''; - foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) { - $buf .= sprintf( '%04x ', UtfNormal\Utils::utf8ToCodepoint( $cp ) ); - } - - return rtrim( $buf ); -} - -/** - * Determine the Unicode codepoint of a single-character UTF-8 sequence. - * Does not check for invalid input data. - * - * @param $char String - * @return Integer - * @public - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function utf8ToCodepoint( $char ) { - return Utils::utf8ToCodepoint( $char ); -} - -/** - * Escape a string for inclusion in a PHP single-quoted string literal. - * - * @param string $string string to be escaped. - * @return String: escaped string. - * @public - * @deprecated since 1.25, use UtfNormal\Utils directly - */ -function escapeSingleString( $string ) { - return Utils::escapeSingleString( $string ); -} diff --git a/includes/libs/objectcache/APCBagOStuff.php b/includes/libs/objectcache/APCBagOStuff.php index eaf11557..35e05e80 100644 --- a/includes/libs/objectcache/APCBagOStuff.php +++ b/includes/libs/objectcache/APCBagOStuff.php @@ -27,43 +27,88 @@ * @ingroup Cache */ class APCBagOStuff extends BagOStuff { - public function get( $key, &$casToken = null ) { - $val = apc_fetch( $key ); + + /** + * @var bool If true, trust the APC implementation to serialize and + * deserialize objects correctly. If false, (de-)serialize in PHP. + */ + protected $nativeSerialize; + + /** + * @var string String to append to each APC key. This may be changed + * whenever the handling of values is changed, to prevent existing code + * from encountering older values which it cannot handle. + */ + const KEY_SUFFIX = ':2'; + + /** + * Constructor + * + * Available parameters are: + * - nativeSerialize: If true, pass objects to apc_store(), and trust it + * to serialize them correctly. If false, serialize + * all values in PHP. + * + * @param array $params + */ + public function __construct( array $params = array() ) { + parent::__construct( $params ); + + if ( isset( $params['nativeSerialize'] ) ) { + $this->nativeSerialize = $params['nativeSerialize']; + } elseif ( extension_loaded( 'apcu' ) && ini_get( 'apc.serializer' ) === 'default' ) { + // APCu has a memory corruption bug when the serializer is set to 'default'. + // See T120267, and upstream bug reports: + // - https://github.com/krakjoe/apcu/issues/38 + // - https://github.com/krakjoe/apcu/issues/35 + // - https://github.com/krakjoe/apcu/issues/111 + $this->logger->warning( + 'The APCu extension is loaded and the apc.serializer INI setting ' . + 'is set to "default". This can cause memory corruption! ' . + 'You should change apc.serializer to "php" instead. ' . + 'See <https://github.com/krakjoe/apcu/issues/38>.' + ); + $this->nativeSerialize = false; + } else { + $this->nativeSerialize = true; + } + } + + public function get( $key, &$casToken = null, $flags = 0 ) { + $val = apc_fetch( $key . self::KEY_SUFFIX ); $casToken = $val; - if ( is_string( $val ) ) { - if ( $this->isInteger( $val ) ) { - $val = intval( $val ); - } else { - $val = unserialize( $val ); - } + if ( is_string( $val ) && !$this->nativeSerialize ) { + $val = $this->isInteger( $val ) + ? intval( $val ) + : unserialize( $val ); } return $val; } public function set( $key, $value, $exptime = 0 ) { - if ( !$this->isInteger( $value ) ) { + if ( !$this->nativeSerialize && !$this->isInteger( $value ) ) { $value = serialize( $value ); } - apc_store( $key, $value, $exptime ); + apc_store( $key . self::KEY_SUFFIX, $value, $exptime ); return true; } public function delete( $key ) { - apc_delete( $key ); + apc_delete( $key . self::KEY_SUFFIX ); return true; } public function incr( $key, $value = 1 ) { - return apc_inc( $key, $value ); + return apc_inc( $key . self::KEY_SUFFIX, $value ); } public function decr( $key, $value = 1 ) { - return apc_dec( $key, $value ); + return apc_dec( $key . self::KEY_SUFFIX, $value ); } } diff --git a/includes/libs/objectcache/BagOStuff.php b/includes/libs/objectcache/BagOStuff.php index 0b791e5a..ddbe8eaa 100644 --- a/includes/libs/objectcache/BagOStuff.php +++ b/includes/libs/objectcache/BagOStuff.php @@ -1,7 +1,5 @@ <?php /** - * Classes to cache objects in PHP accelerators, SQL database or DBA files - * * Copyright © 2003-2004 Brion Vibber <brion@pobox.com> * https://www.mediawiki.org/ * @@ -37,29 +35,34 @@ use Psr\Log\NullLogger; * the PHP memcached client. * * backends for local hash array and SQL table included: - * <code> + * @code * $bag = new HashBagOStuff(); * $bag = new SqlBagOStuff(); # connect to db first - * </code> + * @endcode * * @ingroup Cache */ abstract class BagOStuff implements LoggerAwareInterface { - private $debugMode = false; - + /** @var array[] Lock tracking */ + protected $locks = array(); + /** @var integer */ protected $lastError = self::ERR_NONE; - /** - * @var LoggerInterface - */ + /** @var LoggerInterface */ protected $logger; + /** @var bool */ + private $debugMode = false; + /** Possible values for getLastError() */ const ERR_NONE = 0; // no error const ERR_NO_RESPONSE = 1; // no response const ERR_UNREACHABLE = 2; // can't connect const ERR_UNEXPECTED = 3; // response gave some error + /** Bitfield constants for get()/getMulti() */ + const READ_LATEST = 1; // use latest data for replicated stores + public function __construct( array $params = array() ) { if ( isset( $params['logger'] ) ) { $this->setLogger( $params['logger'] ); @@ -87,9 +90,10 @@ abstract class BagOStuff implements LoggerAwareInterface { * Get an item with the given key. Returns false if it does not exist. * @param string $key * @param mixed $casToken [optional] + * @param integer $flags Bitfield; supports READ_LATEST [optional] * @return mixed Returns false on failure */ - abstract public function get( $key, &$casToken = null ); + abstract public function get( $key, &$casToken = null, $flags = 0 ); /** * Set an item. @@ -109,18 +113,20 @@ abstract class BagOStuff implements LoggerAwareInterface { /** * Merge changes into the existing cache value (possibly creating a new one). - * The callback function returns the new value given the current value (possibly false), - * and takes the arguments: (this BagOStuff object, cache key, current value). + * The callback function returns the new value given the current value + * (which will be false if not present), and takes the arguments: + * (this BagOStuff, cache key, current value). * * @param string $key * @param callable $callback Callback method to be executed * @param int $exptime Either an interval in seconds or a unix timestamp for expiry * @param int $attempts The amount of times to attempt a merge in case of failure * @return bool Success + * @throws InvalidArgumentException */ public function merge( $key, $callback, $exptime = 0, $attempts = 10 ) { if ( !is_callable( $callback ) ) { - throw new Exception( "Got invalid callback." ); + throw new InvalidArgumentException( "Got invalid callback." ); } return $this->mergeViaLock( $key, $callback, $exptime, $attempts ); @@ -137,11 +143,17 @@ abstract class BagOStuff implements LoggerAwareInterface { */ protected function mergeViaCas( $key, $callback, $exptime = 0, $attempts = 10 ) { do { + $this->clearLastError(); $casToken = null; // passed by reference $currentValue = $this->get( $key, $casToken ); + if ( $this->getLastError() ) { + return false; // don't spam retries (retry only on races) + } + // Derive the new value from the old value $value = call_user_func( $callback, $this, $key, $currentValue ); + $this->clearLastError(); if ( $value === false ) { $success = true; // do nothing } elseif ( $currentValue === false ) { @@ -151,6 +163,9 @@ abstract class BagOStuff implements LoggerAwareInterface { // Try to update the key, failing if it gets changed in the meantime $success = $this->cas( $casToken, $key, $value, $exptime ); } + if ( $this->getLastError() ) { + return false; // IO error; don't spam retries + } } while ( !$success && --$attempts ); return $success; @@ -164,6 +179,7 @@ abstract class BagOStuff implements LoggerAwareInterface { * @param mixed $value * @param int $exptime Either an interval in seconds or a unix timestamp for expiry * @return bool Success + * @throws Exception */ protected function cas( $casToken, $key, $value, $exptime = 0 ) { throw new Exception( "CAS is not implemented in " . __CLASS__ ); @@ -183,14 +199,18 @@ abstract class BagOStuff implements LoggerAwareInterface { return false; } + $this->clearLastError(); $currentValue = $this->get( $key ); - // Derive the new value from the old value - $value = call_user_func( $callback, $this, $key, $currentValue ); - - if ( $value === false ) { - $success = true; // do nothing + if ( $this->getLastError() ) { + $success = false; } else { - $success = $this->set( $key, $value, $exptime ); // set the new value + // Derive the new value from the old value + $value = call_user_func( $callback, $this, $key, $currentValue ); + if ( $value === false ) { + $success = true; // do nothing + } else { + $success = $this->set( $key, $value, $exptime ); // set the new value + } } if ( !$this->unlock( $key ) ) { @@ -202,48 +222,116 @@ abstract class BagOStuff implements LoggerAwareInterface { } /** + * Acquire an advisory lock on a key string + * + * Note that if reentry is enabled, duplicate calls ignore $expiry + * * @param string $key - * @param int $timeout Lock wait timeout [optional] - * @param int $expiry Lock expiry [optional] + * @param int $timeout Lock wait timeout; 0 for non-blocking [optional] + * @param int $expiry Lock expiry [optional]; 1 day maximum + * @param string $rclass Allow reentry if set and the current lock used this value * @return bool Success */ - public function lock( $key, $timeout = 6, $expiry = 6 ) { + public function lock( $key, $timeout = 6, $expiry = 6, $rclass = '' ) { + // Avoid deadlocks and allow lock reentry if specified + if ( isset( $this->locks[$key] ) ) { + if ( $rclass != '' && $this->locks[$key]['class'] === $rclass ) { + ++$this->locks[$key]['depth']; + return true; + } else { + return false; + } + } + + $expiry = min( $expiry ?: INF, 86400 ); + $this->clearLastError(); $timestamp = microtime( true ); // starting UNIX timestamp if ( $this->add( "{$key}:lock", 1, $expiry ) ) { - return true; - } elseif ( $this->getLastError() ) { - return false; + $locked = true; + } elseif ( $this->getLastError() || $timeout <= 0 ) { + $locked = false; // network partition or non-blocking + } else { + $uRTT = ceil( 1e6 * ( microtime( true ) - $timestamp ) ); // estimate RTT (us) + $sleep = 2 * $uRTT; // rough time to do get()+set() + + $attempts = 0; // failed attempts + do { + if ( ++$attempts >= 3 && $sleep <= 5e5 ) { + // Exponentially back off after failed attempts to avoid network spam. + // About 2*$uRTT*(2^n-1) us of "sleep" happen for the next n attempts. + $sleep *= 2; + } + usleep( $sleep ); // back off + $this->clearLastError(); + $locked = $this->add( "{$key}:lock", 1, $expiry ); + if ( $this->getLastError() ) { + $locked = false; // network partition + break; + } + } while ( !$locked && ( microtime( true ) - $timestamp ) < $timeout ); } - $uRTT = ceil( 1e6 * ( microtime( true ) - $timestamp ) ); // estimate RTT (us) - $sleep = 2 * $uRTT; // rough time to do get()+set() - - $locked = false; // lock acquired - $attempts = 0; // failed attempts - do { - if ( ++$attempts >= 3 && $sleep <= 5e5 ) { - // Exponentially back off after failed attempts to avoid network spam. - // About 2*$uRTT*(2^n-1) us of "sleep" happen for the next n attempts. - $sleep *= 2; - } - usleep( $sleep ); // back off - $this->clearLastError(); - $locked = $this->add( "{$key}:lock", 1, $expiry ); - if ( $this->getLastError() ) { - return false; - } - } while ( !$locked && ( microtime( true ) - $timestamp ) < $timeout ); + if ( $locked ) { + $this->locks[$key] = array( 'class' => $rclass, 'depth' => 1 ); + } return $locked; } /** + * Release an advisory lock on a key string + * * @param string $key * @return bool Success */ public function unlock( $key ) { - return $this->delete( "{$key}:lock" ); + if ( isset( $this->locks[$key] ) && --$this->locks[$key]['depth'] <= 0 ) { + unset( $this->locks[$key] ); + + return $this->delete( "{$key}:lock" ); + } + + return true; + } + + /** + * Get a lightweight exclusive self-unlocking lock + * + * Note that the same lock cannot be acquired twice. + * + * This is useful for task de-duplication or to avoid obtrusive + * (though non-corrupting) DB errors like INSERT key conflicts + * or deadlocks when using LOCK IN SHARE MODE. + * + * @param string $key + * @param int $timeout Lock wait timeout; 0 for non-blocking [optional] + * @param int $expiry Lock expiry [optional]; 1 day maximum + * @param string $rclass Allow reentry if set and the current lock used this value + * @return ScopedCallback|null Returns null on failure + * @since 1.26 + */ + final public function getScopedLock( $key, $timeout = 6, $expiry = 30, $rclass = '' ) { + $expiry = min( $expiry ?: INF, 86400 ); + + if ( !$this->lock( $key, $timeout, $expiry, $rclass ) ) { + return null; + } + + $lSince = microtime( true ); // lock timestamp + // PHP 5.3: Can't use $this in a closure + $that = $this; + $logger = $this->logger; + + return new ScopedCallback( function() use ( $that, $logger, $key, $lSince, $expiry ) { + $latency = .050; // latency skew (err towards keeping lock present) + $age = ( microtime( true ) - $lSince + $latency ); + if ( ( $age + $latency ) >= $expiry ) { + $logger->warning( "Lock for $key held too long ($age sec)." ); + return; // expired; it's not "safe" to delete the key + } + $that->unlock( $key ); + } ); } /** @@ -260,14 +348,13 @@ abstract class BagOStuff implements LoggerAwareInterface { return false; } - /* *** Emulated functions *** */ - /** * Get an associative array containing the item for each of the keys that have items. * @param array $keys List of strings + * @param integer $flags Bitfield; supports READ_LATEST [optional] * @return array */ - public function getMulti( array $keys ) { + public function getMulti( array $keys, $flags = 0 ) { $res = array(); foreach ( $keys as $key ) { $val = $this->get( $key ); @@ -334,7 +421,7 @@ abstract class BagOStuff implements LoggerAwareInterface { * Decrease stored value of $key by $value while preserving its TTL * @param string $key * @param int $value - * @return int + * @return int|bool New value or false on failure */ public function decr( $key, $value = 1 ) { return $this->incr( $key, - $value ); @@ -384,6 +471,24 @@ abstract class BagOStuff implements LoggerAwareInterface { } /** + * Modify a cache update operation array for EventRelayer::notify() + * + * This is used for relayed writes, e.g. for broadcasting a change + * to multiple data-centers. If the array contains a 'val' field + * then the command involves setting a key to that value. Note that + * for simplicity, 'val' is always a simple scalar value. This method + * is used to possibly serialize the value and add any cache-specific + * key/values needed for the relayer daemon (e.g. memcached flags). + * + * @param array $event + * @return array + * @since 1.26 + */ + public function modifySimpleRelayEvent( array $event ) { + return $event; + } + + /** * @param string $text */ protected function debug( $text ) { diff --git a/includes/libs/objectcache/EmptyBagOStuff.php b/includes/libs/objectcache/EmptyBagOStuff.php index 4ccf2707..55e84b05 100644 --- a/includes/libs/objectcache/EmptyBagOStuff.php +++ b/includes/libs/objectcache/EmptyBagOStuff.php @@ -27,7 +27,7 @@ * @ingroup Cache */ class EmptyBagOStuff extends BagOStuff { - public function get( $key, &$casToken = null ) { + public function get( $key, &$casToken = null, $flags = 0 ) { return false; } diff --git a/includes/libs/objectcache/HashBagOStuff.php b/includes/libs/objectcache/HashBagOStuff.php index 2c8b05a5..b685e41f 100644 --- a/includes/libs/objectcache/HashBagOStuff.php +++ b/includes/libs/objectcache/HashBagOStuff.php @@ -48,7 +48,7 @@ class HashBagOStuff extends BagOStuff { return true; } - public function get( $key, &$casToken = null ) { + public function get( $key, &$casToken = null, $flags = 0 ) { if ( !isset( $this->bag[$key] ) ) { return false; } @@ -68,20 +68,8 @@ class HashBagOStuff extends BagOStuff { } function delete( $key ) { - if ( !isset( $this->bag[$key] ) ) { - return false; - } - unset( $this->bag[$key] ); return true; } - - public function lock( $key, $timeout = 6, $expiry = 6 ) { - return true; - } - - function unlock( $key ) { - return true; - } } diff --git a/includes/libs/objectcache/ReplicatedBagOStuff.php b/includes/libs/objectcache/ReplicatedBagOStuff.php new file mode 100644 index 00000000..9e80e9fd --- /dev/null +++ b/includes/libs/objectcache/ReplicatedBagOStuff.php @@ -0,0 +1,129 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + * @author Aaron Schulz + */ + +/** + * A cache class that directs writes to one set of servers and reads to + * another. This assumes that the servers used for reads are setup to slave + * those that writes go to. This can easily be used with redis for example. + * + * In the WAN scenario (e.g. multi-datacenter case), this is useful when + * writes are rare or they usually take place in the primary datacenter. + * + * @ingroup Cache + * @since 1.26 + */ +class ReplicatedBagOStuff extends BagOStuff { + /** @var BagOStuff */ + protected $writeStore; + /** @var BagOStuff */ + protected $readStore; + + /** + * Constructor. Parameters are: + * - writeFactory : ObjectFactory::getObjectFromSpec parameters yeilding BagOStuff. + * This object will be used for writes (e.g. the master DB). + * - readFactory : ObjectFactory::getObjectFromSpec parameters yeilding BagOStuff. + * This object will be used for reads (e.g. a slave DB). + * + * @param array $params + * @throws InvalidArgumentException + */ + public function __construct( $params ) { + parent::__construct( $params ); + + if ( !isset( $params['writeFactory'] ) ) { + throw new InvalidArgumentException( + __METHOD__ . ': the "writeFactory" parameter is required' ); + } + if ( !isset( $params['readFactory'] ) ) { + throw new InvalidArgumentException( + __METHOD__ . ': the "readFactory" parameter is required' ); + } + + $this->writeStore = ( $params['writeFactory'] instanceof BagOStuff ) + ? $params['writeFactory'] + : ObjectFactory::getObjectFromSpec( $params['writeFactory'] ); + $this->readStore = ( $params['readFactory'] instanceof BagOStuff ) + ? $params['readFactory'] + : ObjectFactory::getObjectFromSpec( $params['readFactory'] ); + } + + public function setDebug( $debug ) { + $this->writeStore->setDebug( $debug ); + $this->readStore->setDebug( $debug ); + } + + public function get( $key, &$casToken = null, $flags = 0 ) { + return ( $flags & self::READ_LATEST ) + ? $this->writeStore->get( $key, $casToken, $flags ) + : $this->readStore->get( $key, $casToken, $flags ); + } + + public function getMulti( array $keys, $flags = 0 ) { + return ( $flags & self::READ_LATEST ) + ? $this->writeStore->getMulti( $keys, $flags ) + : $this->readStore->getMulti( $keys, $flags ); + } + + public function set( $key, $value, $exptime = 0 ) { + return $this->writeStore->set( $key, $value, $exptime ); + } + + public function delete( $key ) { + return $this->writeStore->delete( $key ); + } + + public function add( $key, $value, $exptime = 0 ) { + return $this->writeStore->add( $key, $value, $exptime ); + } + + public function incr( $key, $value = 1 ) { + return $this->writeStore->incr( $key, $value ); + } + + public function decr( $key, $value = 1 ) { + return $this->writeStore->decr( $key, $value ); + } + + public function lock( $key, $timeout = 6, $expiry = 6, $rclass = '' ) { + return $this->writeStore->lock( $key, $timeout, $expiry, $rclass ); + } + + public function unlock( $key ) { + return $this->writeStore->unlock( $key ); + } + + public function merge( $key, $callback, $exptime = 0, $attempts = 10 ) { + return $this->writeStore->merge( $key, $callback, $exptime, $attempts ); + } + + public function getLastError() { + return ( $this->writeStore->getLastError() != self::ERR_NONE ) + ? $this->writeStore->getLastError() + : $this->readStore->getLastError(); + } + + public function clearLastError() { + $this->writeStore->clearLastError(); + $this->readStore->clearLastError(); + } +} diff --git a/includes/libs/objectcache/WANObjectCache.php b/includes/libs/objectcache/WANObjectCache.php new file mode 100644 index 00000000..2d921a70 --- /dev/null +++ b/includes/libs/objectcache/WANObjectCache.php @@ -0,0 +1,746 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + * @author Aaron Schulz + */ + +/** + * Multi-datacenter aware caching interface + * + * All operations go to the local cache, except the delete() + * and touchCheckKey(), which broadcast to all clusters. + * This class is intended for caching data from primary stores. + * If the get() method does not return a value, then the caller + * should query the new value and backfill the cache using set(). + * When the source data changes, the delete() method should be called. + * Since delete() is expensive, it should be avoided. One can do so if: + * - a) The object cached is immutable; or + * - b) Validity is checked against the source after get(); or + * - c) Using a modest TTL is reasonably correct and performant + * Consider using getWithSetCallback() instead of the get()/set() cycle. + * + * Instances of this class must be configured to point to a valid + * PubSub endpoint, and there must be listeners on the cache servers + * that subscribe to the endpoint and update the caches. + * + * Broadcasted operations like delete() and touchCheckKey() are done + * synchronously in the local cluster, but are relayed asynchronously. + * This means that callers in other datacenters will see older values + * for a however many milliseconds the datacenters are apart. As with + * any cache, this should not be relied on for cases where reads are + * used to determine writes to source (e.g. non-cache) data stores. + * + * All values are wrapped in metadata arrays. Keys use a "WANCache:" prefix + * to avoid collisions with keys that are not wrapped as metadata arrays. The + * prefixes are as follows: + * - a) "WANCache:v" : used for regular value keys + * - b) "WANCache:s" : used for temporarily storing values of tombstoned keys + * - c) "WANCache:t" : used for storing timestamp "check" keys + * + * @ingroup Cache + * @since 1.26 + */ +class WANObjectCache { + /** @var BagOStuff The local cluster cache */ + protected $cache; + /** @var string Cache pool name */ + protected $pool; + /** @var EventRelayer */ + protected $relayer; + + /** @var int */ + protected $lastRelayError = self::ERR_NONE; + + /** Seconds to tombstone keys on delete() */ + const HOLDOFF_TTL = 10; + /** Seconds to keep dependency purge keys around */ + const CHECK_KEY_TTL = 31536000; // 1 year + /** Seconds to keep lock keys around */ + const LOCK_TTL = 5; + /** Default remaining TTL at which to consider pre-emptive regeneration */ + const LOW_TTL = 10; + /** Default TTL for temporarily caching tombstoned keys */ + const TEMP_TTL = 5; + + /** Idiom for set()/getWithSetCallback() TTL */ + const TTL_NONE = 0; + /** Idiom for getWithSetCallback() callbacks to avoid calling set() */ + const TTL_UNCACHEABLE = -1; + + /** Cache format version number */ + const VERSION = 1; + + /** Fields of value holder arrays */ + const FLD_VERSION = 0; + const FLD_VALUE = 1; + const FLD_TTL = 2; + const FLD_TIME = 3; + + /** Possible values for getLastError() */ + const ERR_NONE = 0; // no error + const ERR_NO_RESPONSE = 1; // no response + const ERR_UNREACHABLE = 2; // can't connect + const ERR_UNEXPECTED = 3; // response gave some error + const ERR_RELAY = 4; // relay broadcast failed + + const VALUE_KEY_PREFIX = 'WANCache:v:'; + const STASH_KEY_PREFIX = 'WANCache:s:'; + const TIME_KEY_PREFIX = 'WANCache:t:'; + + const PURGE_VAL_PREFIX = 'PURGED:'; + + /** + * @param array $params + * - cache : BagOStuff object + * - pool : pool name + * - relayer : EventRelayer object + */ + public function __construct( array $params ) { + $this->cache = $params['cache']; + $this->pool = $params['pool']; + $this->relayer = $params['relayer']; + } + + /** + * @return WANObjectCache Cache that wraps EmptyBagOStuff + */ + public static function newEmpty() { + return new self( array( + 'cache' => new EmptyBagOStuff(), + 'pool' => 'empty', + 'relayer' => new EventRelayerNull( array() ) + ) ); + } + + /** + * Fetch the value of a key from cache + * + * If passed in, $curTTL is set to the remaining TTL (current time left): + * - a) INF; if the key exists, has no TTL, and is not expired by $checkKeys + * - b) float (>=0); if the key exists, has a TTL, and is not expired by $checkKeys + * - c) float (<0); if the key is tombstoned or existing but expired by $checkKeys + * - d) null; if the key does not exist and is not tombstoned + * + * If a key is tombstoned, $curTTL will reflect the time since delete(). + * + * The timestamp of $key will be checked against the last-purge timestamp + * of each of $checkKeys. Those $checkKeys not in cache will have the last-purge + * initialized to the current timestamp. If any of $checkKeys have a timestamp + * greater than that of $key, then $curTTL will reflect how long ago $key + * became invalid. Callers can use $curTTL to know when the value is stale. + * The $checkKeys parameter allow mass invalidations by updating a single key: + * - a) Each "check" key represents "last purged" of some source data + * - b) Callers pass in relevant "check" keys as $checkKeys in get() + * - c) When the source data that "check" keys represent changes, + * the touchCheckKey() method is called on them + * + * For keys that are hot/expensive, consider using getWithSetCallback() instead. + * + * @param string $key Cache key + * @param mixed $curTTL Approximate TTL left on the key if present [returned] + * @param array $checkKeys List of "check" keys + * @return mixed Value of cache key or false on failure + */ + final public function get( $key, &$curTTL = null, array $checkKeys = array() ) { + $curTTLs = array(); + $values = $this->getMulti( array( $key ), $curTTLs, $checkKeys ); + $curTTL = isset( $curTTLs[$key] ) ? $curTTLs[$key] : null; + + return isset( $values[$key] ) ? $values[$key] : false; + } + + /** + * Fetch the value of several keys from cache + * + * @see WANObjectCache::get() + * + * @param array $keys List of cache keys + * @param array $curTTLs Map of (key => approximate TTL left) for existing keys [returned] + * @param array $checkKeys List of "check" keys + * @return array Map of (key => value) for keys that exist + */ + final public function getMulti( + array $keys, &$curTTLs = array(), array $checkKeys = array() + ) { + $result = array(); + $curTTLs = array(); + + $vPrefixLen = strlen( self::VALUE_KEY_PREFIX ); + $valueKeys = self::prefixCacheKeys( $keys, self::VALUE_KEY_PREFIX ); + $checkKeys = self::prefixCacheKeys( $checkKeys, self::TIME_KEY_PREFIX ); + + // Fetch all of the raw values + $wrappedValues = $this->cache->getMulti( array_merge( $valueKeys, $checkKeys ) ); + $now = microtime( true ); + + // Get/initialize the timestamp of all the "check" keys + $checkKeyTimes = array(); + foreach ( $checkKeys as $checkKey ) { + $timestamp = isset( $wrappedValues[$checkKey] ) + ? self::parsePurgeValue( $wrappedValues[$checkKey] ) + : false; + if ( !is_float( $timestamp ) ) { + // Key is not set or invalid; regenerate + $this->cache->add( $checkKey, + self::PURGE_VAL_PREFIX . $now, self::CHECK_KEY_TTL ); + $timestamp = $now; + } + + $checkKeyTimes[] = $timestamp; + } + + // Get the main cache value for each key and validate them + foreach ( $valueKeys as $vKey ) { + if ( !isset( $wrappedValues[$vKey] ) ) { + continue; // not found + } + + $key = substr( $vKey, $vPrefixLen ); // unprefix + + list( $value, $curTTL ) = $this->unwrap( $wrappedValues[$vKey], $now ); + if ( $value !== false ) { + $result[$key] = $value; + foreach ( $checkKeyTimes as $checkKeyTime ) { + // Force dependant keys to be invalid for a while after purging + // to reduce race conditions involving stale data getting cached + $safeTimestamp = $checkKeyTime + self::HOLDOFF_TTL; + if ( $safeTimestamp >= $wrappedValues[$vKey][self::FLD_TIME] ) { + $curTTL = min( $curTTL, $checkKeyTime - $now ); + } + } + } + + $curTTLs[$key] = $curTTL; + } + + return $result; + } + + /** + * Set the value of a key from cache + * + * Simply calling this method when source data changes is not valid because + * the changes do not replicate to the other WAN sites. In that case, delete() + * should be used instead. This method is intended for use on cache misses. + * + * @param string $key Cache key + * @param mixed $value + * @param integer $ttl Seconds to live [0=forever] + * @return bool Success + */ + final public function set( $key, $value, $ttl = 0 ) { + $key = self::VALUE_KEY_PREFIX . $key; + $wrapped = $this->wrap( $value, $ttl ); + + $func = function ( $cache, $key, $cWrapped ) use ( $wrapped ) { + return ( is_string( $cWrapped ) ) + ? false // key is tombstoned; do nothing + : $wrapped; + }; + + return $this->cache->merge( $key, $func, $ttl, 1 ); + } + + /** + * Purge a key from all clusters + * + * This should only be called when the underlying data (being cached) + * changes in a significant way. This deletes the key and starts a hold-off + * period where the key cannot be written to for a few seconds (HOLDOFF_TTL). + * This is done to avoid the following race condition: + * a) Some DB data changes and delete() is called on a corresponding key + * b) A request refills the key with a stale value from a lagged DB + * c) The stale value is stuck there until the key is expired/evicted + * + * This is implemented by storing a special "tombstone" value at the cache + * key that this class recognizes; get() calls will return false for the key + * and any set() calls will refuse to replace tombstone values at the key. + * For this to always avoid writing stale values, the following must hold: + * a) Replication lag is bounded to being less than HOLDOFF_TTL; or + * b) If lag is higher, the DB will have gone into read-only mode already + * + * If called twice on the same key, then the last hold-off TTL takes + * precedence. For idempotence, the $ttl should not vary for different + * delete() calls on the same key. Also note that lowering $ttl reduces + * the effective range of the 'lockTSE' parameter to getWithSetCallback(). + * + * @param string $key Cache key + * @param integer $ttl How long to block writes to the key [seconds] + * @return bool True if the item was purged or not found, false on failure + */ + final public function delete( $key, $ttl = self::HOLDOFF_TTL ) { + $key = self::VALUE_KEY_PREFIX . $key; + // Avoid indefinite key salting for sanity + $ttl = max( $ttl, 1 ); + // Update the local cluster immediately + $ok = $this->cache->set( $key, self::PURGE_VAL_PREFIX . microtime( true ), $ttl ); + // Publish the purge to all clusters + return $this->relayPurge( $key, $ttl ) && $ok; + } + + /** + * Fetch the value of a timestamp "check" key + * + * The key will be *initialized* to the current time if not set, + * so only call this method if this behavior is actually desired + * + * The timestamp can be used to check whether a cached value is valid. + * Callers should not assume that this returns the same timestamp in + * all datacenters due to relay delays. + * + * The level of staleness can roughly be estimated from this key, but + * if the key was evicted from cache, such calculations may show the + * time since expiry as ~0 seconds. + * + * Note that "check" keys won't collide with other regular keys + * + * @param string $key + * @return float UNIX timestamp of the key + */ + final public function getCheckKeyTime( $key ) { + $key = self::TIME_KEY_PREFIX . $key; + + $time = self::parsePurgeValue( $this->cache->get( $key ) ); + if ( $time === false ) { + // Casting assures identical floats for the next getCheckKeyTime() calls + $time = (string)microtime( true ); + $this->cache->add( $key, self::PURGE_VAL_PREFIX . $time, self::CHECK_KEY_TTL ); + $time = (float)$time; + } + + return $time; + } + + /** + * Purge a "check" key from all clusters, invalidating keys that use it + * + * This should only be called when the underlying data (being cached) + * changes in a significant way, and it is impractical to call delete() + * on all keys that should be changed. When get() is called on those + * keys, the relevant "check" keys must be supplied for this to work. + * + * The "check" key essentially represents a last-modified field. + * It is set in the future a few seconds when this is called, to + * avoid race conditions where dependent keys get updated with a + * stale value (e.g. from a DB slave). + * + * This is typically useful for keys with static names or some cases + * dynamically generated names where a low number of combinations exist. + * When a few important keys get a large number of hits, a high cache + * time is usually desired as well as lockTSE logic. The resetCheckKey() + * method is less appropriate in such cases since the "time since expiry" + * cannot be inferred. + * + * Note that "check" keys won't collide with other regular keys + * + * @see WANObjectCache::get() + * + * @param string $key Cache key + * @return bool True if the item was purged or not found, false on failure + */ + final public function touchCheckKey( $key ) { + $key = self::TIME_KEY_PREFIX . $key; + // Update the local cluster immediately + $ok = $this->cache->set( $key, + self::PURGE_VAL_PREFIX . microtime( true ), self::CHECK_KEY_TTL ); + // Publish the purge to all clusters + return $this->relayPurge( $key, self::CHECK_KEY_TTL ) && $ok; + } + + /** + * Delete a "check" key from all clusters, invalidating keys that use it + * + * This is similar to touchCheckKey() in that keys using it via + * getWithSetCallback() will be invalidated. The differences are: + * a) The timestamp will be deleted from all caches and lazily + * re-initialized when accessed (rather than set everywhere) + * b) Thus, dependent keys will be known to be invalid, but not + * for how long (they are treated as "just" purged), which + * effects any lockTSE logic in getWithSetCallback() + * The advantage is that this does not place high TTL keys on every cache + * server, making it better for code that will cache many different keys + * and either does not use lockTSE or uses a low enough TTL anyway. + * + * This is typically useful for keys with dynamically generated names + * where a high number of combinations exist. + * + * Note that "check" keys won't collide with other regular keys + * + * @see WANObjectCache::touchCheckKey() + * @see WANObjectCache::get() + * + * @param string $key Cache key + * @return bool True if the item was purged or not found, false on failure + */ + final public function resetCheckKey( $key ) { + $key = self::TIME_KEY_PREFIX . $key; + // Update the local cluster immediately + $ok = $this->cache->delete( $key ); + // Publish the purge to all clusters + return $this->relayDelete( $key ) && $ok; + } + + /** + * Method to fetch/regenerate cache keys + * + * On cache miss, the key will be set to the callback result, + * unless the callback returns false. The arguments supplied are: + * (current value or false, &$ttl) + * The callback function returns the new value given the current + * value (false if not present). Preemptive re-caching and $checkKeys + * can result in a non-false current value. The TTL of the new value + * can be set dynamically by altering $ttl in the callback (by reference). + * + * Usually, callbacks ignore the current value, but it can be used + * to maintain "most recent X" values that come from time or sequence + * based source data, provided that the "as of" id/time is tracked. + * + * Usage of $checkKeys is similar to get()/getMulti(). However, + * rather than the caller having to inspect a "current time left" + * variable (e.g. $curTTL, $curTTLs), a cache regeneration will be + * triggered using the callback. + * + * The simplest way to avoid stampedes for hot keys is to use + * the 'lockTSE' option in $opts. If cache purges are needed, also: + * a) Pass $key into $checkKeys + * b) Use touchCheckKey( $key ) instead of delete( $key ) + * Following this pattern lets the old cache be used until a + * single thread updates it as needed. Also consider tweaking + * the 'lowTTL' parameter. + * + * Example usage: + * @code + * $key = wfMemcKey( 'cat-recent-actions', $catId ); + * // Function that derives the new key value given the old value + * $callback = function( $cValue, &$ttl ) { ... }; + * // Get the key value from cache or from source on cache miss; + * // try to only let one cluster thread manage doing cache updates + * $opts = array( 'lockTSE' => 5, 'lowTTL' => 10 ); + * $value = $cache->getWithSetCallback( $key, $callback, 60, array(), $opts ); + * @endcode + * + * Example usage: + * @code + * $key = wfMemcKey( 'cat-state', $catId ); + * // The "check" keys that represent things the value depends on; + * // Calling touchCheckKey() on them invalidates "cat-state" + * $checkKeys = array( + * wfMemcKey( 'water-bowls', $houseId ), + * wfMemcKey( 'food-bowls', $houseId ), + * wfMemcKey( 'people-present', $houseId ) + * ); + * // Function that derives the new key value + * $callback = function() { ... }; + * // Get the key value from cache or from source on cache miss; + * // try to only let one cluster thread manage doing cache updates + * $opts = array( 'lockTSE' => 5, 'lowTTL' => 10 ); + * $value = $cache->getWithSetCallback( $key, $callback, 60, $checkKeys, $opts ); + * @endcode + * + * @see WANObjectCache::get() + * + * @param string $key Cache key + * @param integer $ttl Seconds to live for key updates. Special values are: + * - WANObjectCache::TTL_NONE : Cache forever + * - WANObjectCache::TTL_UNCACHEABLE: Do not cache at all + * @param callable $callback Value generation function + * @param array $opts Options map: + * - checkKeys: List of "check" keys. + * - lowTTL: Consider pre-emptive updates when the current TTL (sec) of the key is less than + * this. It becomes more likely over time, becoming a certainty once the key is expired. + * Default: WANObjectCache::LOW_TTL seconds. + * - lockTSE: If the key is tombstoned or expired (by checkKeys) less than this many seconds + * ago, then try to have a single thread handle cache regeneration at any given time. + * Other threads will try to use stale values if possible. If, on miss, the time since + * expiration is low, the assumption is that the key is hot and that a stampede is worth + * avoiding. Setting this above WANObjectCache::HOLDOFF_TTL makes no difference. The + * higher this is set, the higher the worst-case staleness can be. + * Use WANObjectCache::TSE_NONE to disable this logic. Default: WANObjectCache::TSE_NONE. + * - tempTTL : TTL of the temp key used to cache values while a key is tombstoned. + * This avoids excessive regeneration of hot keys on delete() but may + * result in stale values. + * @return mixed Value to use for the key + */ + final public function getWithSetCallback( + $key, $ttl, $callback, array $opts = array(), $oldOpts = array() + ) { + // Back-compat with 1.26: Swap $ttl and $callback + if ( is_int( $callback ) ) { + $temp = $ttl; + $ttl = $callback; + $callback = $temp; + } + // Back-compat with 1.26: $checkKeys as separate parameter + if ( $oldOpts || ( is_array( $opts ) && isset( $opts[0] ) ) ) { + $checkKeys = $opts; + $opts = $oldOpts; + } else { + $checkKeys = isset( $opts['checkKeys'] ) ? $opts['checkKeys'] : array(); + } + + $lowTTL = isset( $opts['lowTTL'] ) ? $opts['lowTTL'] : min( self::LOW_TTL, $ttl ); + $lockTSE = isset( $opts['lockTSE'] ) ? $opts['lockTSE'] : -1; + $tempTTL = isset( $opts['tempTTL'] ) ? $opts['tempTTL'] : self::TEMP_TTL; + + // Get the current key value + $curTTL = null; + $cValue = $this->get( $key, $curTTL, $checkKeys ); // current value + $value = $cValue; // return value + + // Determine if a regeneration is desired + if ( $value !== false && $curTTL > 0 && !$this->worthRefresh( $curTTL, $lowTTL ) ) { + return $value; + } + + // A deleted key with a negative TTL left must be tombstoned + $isTombstone = ( $curTTL !== null && $value === false ); + // Assume a key is hot if requested soon after invalidation + $isHot = ( $curTTL !== null && $curTTL <= 0 && abs( $curTTL ) <= $lockTSE ); + + $lockAcquired = false; + if ( $isHot ) { + // Acquire a cluster-local non-blocking lock + if ( $this->cache->lock( $key, 0, self::LOCK_TTL ) ) { + // Lock acquired; this thread should update the key + $lockAcquired = true; + } elseif ( $value !== false ) { + // If it cannot be acquired; then the stale value can be used + return $value; + } + } + + if ( !$lockAcquired && ( $isTombstone || $isHot ) ) { + // Use the stash value for tombstoned keys to reduce regeneration load. + // For hot keys, either another thread has the lock or the lock failed; + // use the stash value from the last thread that regenerated it. + $value = $this->cache->get( self::STASH_KEY_PREFIX . $key ); + if ( $value !== false ) { + return $value; + } + } + + if ( !is_callable( $callback ) ) { + throw new InvalidArgumentException( "Invalid cache miss callback provided." ); + } + + // Generate the new value from the callback... + $value = call_user_func_array( $callback, array( $cValue, &$ttl ) ); + // When delete() is called, writes are write-holed by the tombstone, + // so use a special stash key to pass the new value around threads. + if ( $value !== false && ( $isHot || $isTombstone ) && $ttl >= 0 ) { + $this->cache->set( self::STASH_KEY_PREFIX . $key, $value, $tempTTL ); + } + + if ( $lockAcquired ) { + $this->cache->unlock( $key ); + } + + if ( $value !== false && $ttl >= 0 ) { + // Update the cache; this will fail if the key is tombstoned + $this->set( $key, $value, $ttl ); + } + + return $value; + } + + /** + * Get the "last error" registered; clearLastError() should be called manually + * @return int ERR_* constant for the "last error" registry + */ + final public function getLastError() { + if ( $this->lastRelayError ) { + // If the cache and the relayer failed, focus on the later. + // An update not making it to the relayer means it won't show up + // in other DCs (nor will consistent re-hashing see up-to-date values). + // On the other hand, if just the cache update failed, then it should + // eventually be applied by the relayer. + return $this->lastRelayError; + } + + $code = $this->cache->getLastError(); + switch ( $code ) { + case BagOStuff::ERR_NONE: + return self::ERR_NONE; + case BagOStuff::ERR_NO_RESPONSE: + return self::ERR_NO_RESPONSE; + case BagOStuff::ERR_UNREACHABLE: + return self::ERR_UNREACHABLE; + default: + return self::ERR_UNEXPECTED; + } + } + + /** + * Clear the "last error" registry + */ + final public function clearLastError() { + $this->cache->clearLastError(); + $this->lastRelayError = self::ERR_NONE; + } + + /** + * Do the actual async bus purge of a key + * + * This must set the key to "PURGED:<UNIX timestamp>" + * + * @param string $key Cache key + * @param integer $ttl How long to keep the tombstone [seconds] + * @return bool Success + */ + protected function relayPurge( $key, $ttl ) { + $event = $this->cache->modifySimpleRelayEvent( array( + 'cmd' => 'set', + 'key' => $key, + 'val' => 'PURGED:$UNIXTIME$', + 'ttl' => max( $ttl, 1 ), + 'sbt' => true, // substitute $UNIXTIME$ with actual microtime + ) ); + + $ok = $this->relayer->notify( "{$this->pool}:purge", $event ); + if ( !$ok ) { + $this->lastRelayError = self::ERR_RELAY; + } + + return $ok; + } + + /** + * Do the actual async bus delete of a key + * + * @param string $key Cache key + * @return bool Success + */ + protected function relayDelete( $key ) { + $event = $this->cache->modifySimpleRelayEvent( array( + 'cmd' => 'delete', + 'key' => $key, + ) ); + + $ok = $this->relayer->notify( "{$this->pool}:purge", $event ); + if ( !$ok ) { + $this->lastRelayError = self::ERR_RELAY; + } + + return $ok; + } + + /** + * Check if a key should be regenerated (using random probability) + * + * This returns false if $curTTL >= $lowTTL. Otherwise, the chance + * of returning true increases steadily from 0% to 100% as the $curTTL + * moves from $lowTTL to 0 seconds. This handles widely varying + * levels of cache access traffic. + * + * @param float $curTTL Approximate TTL left on the key if present + * @param float $lowTTL Consider a refresh when $curTTL is less than this + * @return bool + */ + protected function worthRefresh( $curTTL, $lowTTL ) { + if ( $curTTL >= $lowTTL ) { + return false; + } elseif ( $curTTL <= 0 ) { + return true; + } + + $chance = ( 1 - $curTTL / $lowTTL ); + + return mt_rand( 1, 1e9 ) <= 1e9 * $chance; + } + + /** + * Do not use this method outside WANObjectCache + * + * @param mixed $value + * @param integer $ttl [0=forever] + * @return string + */ + protected function wrap( $value, $ttl ) { + return array( + self::FLD_VERSION => self::VERSION, + self::FLD_VALUE => $value, + self::FLD_TTL => $ttl, + self::FLD_TIME => microtime( true ) + ); + } + + /** + * Do not use this method outside WANObjectCache + * + * @param array|string|bool $wrapped + * @param float $now Unix Current timestamp (preferrable pre-query) + * @return array (mixed; false if absent/invalid, current time left) + */ + protected function unwrap( $wrapped, $now ) { + // Check if the value is a tombstone + $purgeTimestamp = self::parsePurgeValue( $wrapped ); + if ( is_float( $purgeTimestamp ) ) { + // Purged values should always have a negative current $ttl + $curTTL = min( -0.000001, $purgeTimestamp - $now ); + return array( false, $curTTL ); + } + + if ( !is_array( $wrapped ) // not found + || !isset( $wrapped[self::FLD_VERSION] ) // wrong format + || $wrapped[self::FLD_VERSION] !== self::VERSION // wrong version + ) { + return array( false, null ); + } + + if ( $wrapped[self::FLD_TTL] > 0 ) { + // Get the approximate time left on the key + $age = $now - $wrapped[self::FLD_TIME]; + $curTTL = max( $wrapped[self::FLD_TTL] - $age, 0.0 ); + } else { + // Key had no TTL, so the time left is unbounded + $curTTL = INF; + } + + return array( $wrapped[self::FLD_VALUE], $curTTL ); + } + + /** + * @param array $keys + * @param string $prefix + * @return string[] + */ + protected static function prefixCacheKeys( array $keys, $prefix ) { + $res = array(); + foreach ( $keys as $key ) { + $res[] = $prefix . $key; + } + + return $res; + } + + /** + * @param string $value String like "PURGED:<timestamp>" + * @return float|bool UNIX timestamp or false on failure + */ + protected static function parsePurgeValue( $value ) { + $m = array(); + if ( is_string( $value ) && + preg_match( '/^' . self::PURGE_VAL_PREFIX . '([^:]+)$/', $value, $m ) + ) { + return (float)$m[1]; + } else { + return false; + } + } +} diff --git a/includes/libs/objectcache/WinCacheBagOStuff.php b/includes/libs/objectcache/WinCacheBagOStuff.php index 53625746..c480aa08 100644 --- a/includes/libs/objectcache/WinCacheBagOStuff.php +++ b/includes/libs/objectcache/WinCacheBagOStuff.php @@ -28,15 +28,7 @@ * @ingroup Cache */ class WinCacheBagOStuff extends BagOStuff { - - /** - * Get a value from the WinCache object cache - * - * @param string $key Cache key - * @param int $casToken [optional] Cas token - * @return mixed - */ - public function get( $key, &$casToken = null ) { + public function get( $key, &$casToken = null, $flags = 0 ) { $val = wincache_ucache_get( $key ); $casToken = $val; @@ -48,14 +40,6 @@ class WinCacheBagOStuff extends BagOStuff { return $val; } - /** - * Store a value in the WinCache object cache - * - * @param string $key Cache key - * @param mixed $value Value to store - * @param int $expire Expiration time - * @return bool - */ public function set( $key, $value, $expire = 0 ) { $result = wincache_ucache_set( $key, serialize( $value ), $expire ); @@ -64,25 +48,10 @@ class WinCacheBagOStuff extends BagOStuff { return ( is_array( $result ) && $result === array() ) || $result; } - /** - * Store a value in the WinCache object cache, race condition-safe - * - * @param int $casToken Cas token - * @param string $key Cache key - * @param int $value Object to store - * @param int $exptime Expiration time - * @return bool - */ protected function cas( $casToken, $key, $value, $exptime = 0 ) { return wincache_ucache_cas( $key, $casToken, serialize( $value ) ); } - /** - * Remove a value from the WinCache object cache - * - * @param string $key Cache key - * @return bool - */ public function delete( $key ) { wincache_ucache_delete( $key ); diff --git a/includes/libs/objectcache/XCacheBagOStuff.php b/includes/libs/objectcache/XCacheBagOStuff.php index cfee9236..9dbff6f1 100644 --- a/includes/libs/objectcache/XCacheBagOStuff.php +++ b/includes/libs/objectcache/XCacheBagOStuff.php @@ -28,14 +28,7 @@ * @ingroup Cache */ class XCacheBagOStuff extends BagOStuff { - /** - * Get a value from the XCache object cache - * - * @param string $key Cache key - * @param mixed $casToken Cas token - * @return mixed - */ - public function get( $key, &$casToken = null ) { + public function get( $key, &$casToken = null, $flags = 0 ) { $val = xcache_get( $key ); if ( is_string( $val ) ) { @@ -51,14 +44,6 @@ class XCacheBagOStuff extends BagOStuff { return $val; } - /** - * Store a value in the XCache object cache - * - * @param string $key Cache key - * @param mixed $value Object to store - * @param int $expire Expiration time - * @return bool - */ public function set( $key, $value, $expire = 0 ) { if ( !$this->isInteger( $value ) ) { $value = serialize( $value ); @@ -68,12 +53,6 @@ class XCacheBagOStuff extends BagOStuff { return true; } - /** - * Remove a value from the XCache object cache - * - * @param string $key Cache key - * @return bool - */ public function delete( $key ) { xcache_unset( $key ); return true; diff --git a/includes/libs/virtualrest/ParsoidVirtualRESTService.php b/includes/libs/virtualrest/ParsoidVirtualRESTService.php index 32a27f79..43dfab3c 100644 --- a/includes/libs/virtualrest/ParsoidVirtualRESTService.php +++ b/includes/libs/virtualrest/ParsoidVirtualRESTService.php @@ -24,21 +24,27 @@ */ class ParsoidVirtualRESTService extends VirtualRESTService { /** - * Example requests: - * GET /local/v1/page/$title/html/$oldid - * * $oldid is optional - * POST /local/v1/transform/html/to/wikitext/$title/$oldid + * Example Parsoid v3 requests: + * GET /local/v3/page/html/$title/{$revision} + * * $revision is optional + * POST /local/v3/transform/html/to/wikitext/{$title}{/$revision} * * body: array( 'html' => ... ) - * * $title and $oldid are optional - * POST /local/v1/transform/wikitext/to/html/$title - * * body: array( 'wikitext' => ... ) or array( 'wikitext' => ..., 'body' => true/false ) + * * $title and $revision are optional + * POST /local/v3/transform/wikitext/to/html/{$title}{/$revision} + * * body: array( 'wikitext' => ... ) or array( 'wikitext' => ..., 'bodyOnly' => true/false ) * * $title is optional + * * $revision is optional + * + * There are also deprecated "v1" requests; see onParsoid1Request + * for details. * @param array $params Key/value map * - url : Parsoid server URL - * - prefix : Parsoid prefix for this wiki + * - domain : Wiki domain to use * - timeout : Parsoid timeout (optional) * - forwardCookies : Cookies to forward to Parsoid, or false. (optional) * - HTTPProxy : Parsoid HTTP proxy (optional) + * - restbaseCompat : whether to parse URL as if they were meant for RESTBase + * boolean (optional) */ public function __construct( array $params ) { // for backwards compatibility: @@ -46,7 +52,30 @@ class ParsoidVirtualRESTService extends VirtualRESTService { $params['url'] = $params['URL']; unset( $params['URL'] ); } - parent::__construct( $params ); + // set up defaults and merge them with the given params + $mparams = array_merge( array( + 'name' => 'parsoid', + 'url' => 'http://localhost:8000/', + 'prefix' => 'localhost', + 'domain' => 'localhost', + 'forwardCookies' => false, + 'HTTPProxy' => null, + ), $params ); + // Ensure that the url parameter has a trailing slash. + $mparams['url'] = preg_replace( + '#/?$#', + '/', + $mparams['url'] + ); + // Ensure the correct domain format: strip protocol, port, + // and trailing slash if present. This lets us use + // $wgCanonicalServer as a default value, which is very convenient. + $mparams['domain'] = preg_replace( + '/^(https?:\/\/)?([^\/:]+?)(:\d+)?\/?$/', + '$2', + $mparams['domain'] + ); + parent::__construct( $mparams ); } public function onRequests( array $reqs, Closure $idGeneratorFunc ) { @@ -56,71 +85,143 @@ class ParsoidVirtualRESTService extends VirtualRESTService { list( $targetWiki, // 'local' - $version, // 'v1' - $reqType // 'page' or 'transform' + $version, // 'v3' ('v1' for restbase compatibility) + $reqType, // 'page' or 'transform' + $format, // 'html' or 'wikitext' + // $title (optional) + // $revision (optional) ) = $parts; + if ( isset( $this->params['restbaseCompat'] ) && $this->params['restbaseCompat'] ) { + if ( $version !== 'v1' ) { + throw new Exception( "Only RESTBase v1 API is supported." ); + } + # Map RESTBase v1 API to Parsoid v3 API (pretty easy) + $req['url'] = preg_replace( '#^local/v1/#', 'local/v3/', $req['url'] ); + } elseif ( $version !== 'v3' ) { + $result[$key] = $this->onParsoid1Request( $req, $idGeneratorFunc ); + continue; + } if ( $targetWiki !== 'local' ) { + throw new Exception( "Only 'local' target wiki is currently supported" ); - } elseif ( $version !== 'v1' ) { - throw new Exception( "Only version 1 exists" ); - } elseif ( $reqType !== 'page' && $reqType !== 'transform' ) { - throw new Exception( "Request type must be either 'page' or 'transform'" ); } - - $req['url'] = $this->params['url'] . '/' . urlencode( $this->params['prefix'] ) . '/'; - - if ( $reqType === 'page' ) { - $title = $parts[3]; - if ( $parts[4] !== 'html' ) { - throw new Exception( "Only 'html' output format is currently supported" ); - } - if ( isset( $parts[5] ) ) { - $req['url'] .= $title . '?oldid=' . $parts[5]; - } else { - $req['url'] .= $title; - } - } elseif ( $reqType === 'transform' ) { - if ( $parts[4] !== 'to' ) { - throw new Exception( "Part index 4 is not 'to'" ); - } - - if ( isset( $parts[6] ) ) { - $req['url'] .= $parts[6]; - } - - if ( $parts[3] === 'html' & $parts[5] === 'wikitext' ) { - if ( !isset( $req['body']['html'] ) ) { - throw new Exception( "You must set an 'html' body key for this request" ); - } - if ( isset( $parts[7] ) ) { - $req['body']['oldid'] = $parts[7]; - } - } elseif ( $parts[3] == 'wikitext' && $parts[5] == 'html' ) { - if ( !isset( $req['body']['wikitext'] ) ) { - throw new Exception( "You must set a 'wikitext' body key for this request" ); - } - $req['body']['wt'] = $req['body']['wikitext']; - unset( $req['body']['wikitext'] ); - } else { - throw new Exception( "Transformation unsupported" ); - } + if ( $reqType !== 'page' && $reqType !== 'transform' ) { + throw new Exception( "Request action must be either 'page' or 'transform'" ); } - - if ( isset( $this->params['HTTPProxy'] ) && $this->params['HTTPProxy'] ) { + if ( $format !== 'html' && $format !== 'wikitext' ) { + throw new Exception( "Request format must be either 'html' or 'wt'" ); + } + // replace /local/ with the current domain + $req['url'] = preg_replace( '#^local/#', $this->params['domain'] . '/', $req['url'] ); + // and prefix it with the service URL + $req['url'] = $this->params['url'] . $req['url']; + // set the appropriate proxy, timeout and headers + if ( $this->params['HTTPProxy'] ) { $req['proxy'] = $this->params['HTTPProxy']; } - if ( isset( $this->params['timeout'] ) ) { + if ( $this->params['timeout'] != null ) { $req['reqTimeout'] = $this->params['timeout']; } - - // Forward cookies - if ( isset( $this->params['forwardCookies'] ) ) { + if ( $this->params['forwardCookies'] ) { $req['headers']['Cookie'] = $this->params['forwardCookies']; } - $result[$key] = $req; } return $result; } + + /** + * Remap a Parsoid v1 request to a Parsoid v3 request. + * + * Example Parsoid v1 requests: + * GET /local/v1/page/$title/html/$oldid + * * $oldid is optional + * POST /local/v1/transform/html/to/wikitext/$title/$oldid + * * body: array( 'html' => ... ) + * * $title and $oldid are optional + * POST /local/v1/transform/wikitext/to/html/$title + * * body: array( 'wikitext' => ... ) or array( 'wikitext' => ..., 'body' => true/false ) + * * $title is optional + * + * NOTE: the POST APIs aren't "real" Parsoid v1 APIs, they are just what + * Visual Editor "pretends" the V1 API is like. A previous version of + * ParsoidVirtualRESTService translated these to the "real" Parsoid v1 + * API. We now translate these to the "real" Parsoid v3 API. + */ + public function onParsoid1Request( array $req, Closure $idGeneratorFunc ) { + + $parts = explode( '/', $req['url'] ); + list( + $targetWiki, // 'local' + $version, // 'v1' + $reqType // 'page' or 'transform' + ) = $parts; + if ( $targetWiki !== 'local' ) { + throw new Exception( "Only 'local' target wiki is currently supported" ); + } elseif ( $version !== 'v1' ) { + throw new Exception( "Only v1 and v3 are supported." ); + } elseif ( $reqType !== 'page' && $reqType !== 'transform' ) { + throw new Exception( "Request type must be either 'page' or 'transform'" ); + } + $req['url'] = $this->params['url'] . $this->params['domain'] . '/v3/'; + if ( $reqType === 'page' ) { + $title = $parts[3]; + if ( $parts[4] !== 'html' ) { + throw new Exception( "Only 'html' output format is currently supported" ); + } + $req['url'] .= 'page/html/' . $title; + if ( isset( $parts[5] ) ) { + $req['url'] .= '/' . $parts[5]; + } elseif ( isset( $req['query']['oldid'] ) && $req['query']['oldid'] ) { + $req['url'] .= '/' . $req['query']['oldid']; + unset( $req['query']['oldid'] ); + } + } elseif ( $reqType === 'transform' ) { + $req['url'] .= 'transform/'. $parts[3] . '/to/' . $parts[5]; + // the title + if ( isset( $parts[6] ) ) { + $req['url'] .= '/' . $parts[6]; + } + // revision id + if ( isset( $parts[7] ) ) { + $req['url'] .= '/' . $parts[7]; + } elseif ( isset( $req['body']['oldid'] ) && $req['body']['oldid'] ) { + $req['url'] .= '/' . $req['body']['oldid']; + unset( $req['body']['oldid'] ); + } + if ( $parts[4] !== 'to' ) { + throw new Exception( "Part index 4 is not 'to'" ); + } + if ( $parts[3] === 'html' && $parts[5] === 'wikitext' ) { + if ( !isset( $req['body']['html'] ) ) { + throw new Exception( "You must set an 'html' body key for this request" ); + } + } elseif ( $parts[3] == 'wikitext' && $parts[5] == 'html' ) { + if ( !isset( $req['body']['wikitext'] ) ) { + throw new Exception( "You must set a 'wikitext' body key for this request" ); + } + if ( isset( $req['body']['body'] ) ) { + $req['body']['bodyOnly'] = $req['body']['body']; + unset( $req['body']['body'] ); + } + } else { + throw new Exception( "Transformation unsupported" ); + } + } + // set the appropriate proxy, timeout and headers + if ( $this->params['HTTPProxy'] ) { + $req['proxy'] = $this->params['HTTPProxy']; + } + if ( $this->params['timeout'] != null ) { + $req['reqTimeout'] = $this->params['timeout']; + } + if ( $this->params['forwardCookies'] ) { + $req['headers']['Cookie'] = $this->params['forwardCookies']; + } + + return $req; + + } + } diff --git a/includes/libs/virtualrest/RestbaseVirtualRESTService.php b/includes/libs/virtualrest/RestbaseVirtualRESTService.php index 8fe5b921..3a7bc587 100644 --- a/includes/libs/virtualrest/RestbaseVirtualRESTService.php +++ b/includes/libs/virtualrest/RestbaseVirtualRESTService.php @@ -1,6 +1,6 @@ <?php /** - * Virtual HTTP service client for Restbase + * Virtual HTTP service client for RESTBase * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,23 +19,23 @@ */ /** - * Virtual REST service for Restbase + * Virtual REST service for RESTBase * @since 1.25 */ class RestbaseVirtualRESTService extends VirtualRESTService { /** - * Example requests: - * GET /local/v1/page/{title}/html{/revision} + * Example RESTBase v1 requests: + * GET /local/v1/page/html/{title}{/revision} * POST /local/v1/transform/html/to/wikitext{/title}{/revision} * * body: array( 'html' => ... ) * POST /local/v1/transform/wikitext/to/html{/title}{/revision} * * body: array( 'wikitext' => ... ) or array( 'wikitext' => ..., 'bodyOnly' => true/false ) * * @param array $params Key/value map - * - url : Restbase server URL + * - url : RESTBase server URL * - domain : Wiki domain to use * - timeout : request timeout in seconds (optional) - * - forwardCookies : cookies to forward to Restbase/Parsoid (as a Cookie + * - forwardCookies : cookies to forward to RESTBase/Parsoid (as a Cookie * header string) or false (optional) * Note: forwardCookies will in the future be a boolean * only, signifing request cookies should be forwarded @@ -48,18 +48,27 @@ class RestbaseVirtualRESTService extends VirtualRESTService { public function __construct( array $params ) { // set up defaults and merge them with the given params $mparams = array_merge( array( - 'url' => 'http://localhost:7231', + 'name' => 'restbase', + 'url' => 'http://localhost:7231/', 'domain' => 'localhost', 'timeout' => 100, 'forwardCookies' => false, 'HTTPProxy' => null, 'parsoidCompat' => false ), $params ); - // ensure the correct domain format + // Ensure that the url parameter has a trailing slash. + $mparams['url'] = preg_replace( + '#/?$#', + '/', + $mparams['url'] + ); + // Ensure the correct domain format: strip protocol, port, + // and trailing slash if present. This lets us use + // $wgCanonicalServer as a default value, which is very convenient. $mparams['domain'] = preg_replace( - '/^(https?:\/\/)?([^\/:]+?)(\/|:\d+\/?)?$/', - '$2', - $mparams['domain'] + '/^(https?:\/\/)?([^\/:]+?)(:\d+)?\/?$/', + '$2', + $mparams['domain'] ); parent::__construct( $mparams ); } @@ -73,7 +82,7 @@ class RestbaseVirtualRESTService extends VirtualRESTService { $result = array(); foreach ( $reqs as $key => $req ) { // replace /local/ with the current domain - $req['url'] = preg_replace( '/^\/local\//', '/' . $this->params['domain'] . '/', $req['url'] ); + $req['url'] = preg_replace( '#^local/#', $this->params['domain'] . '/', $req['url'] ); // and prefix it with the service URL $req['url'] = $this->params['url'] . $req['url']; // set the appropriate proxy, timeout and headers @@ -94,83 +103,164 @@ class RestbaseVirtualRESTService extends VirtualRESTService { } /** - * Remaps Parsoid requests to Restbase paths + * Remaps Parsoid v1/v3 requests to RESTBase v1 requests. */ public function onParsoidRequests( array $reqs, Closure $idGeneratorFunc ) { $result = array(); foreach ( $reqs as $key => $req ) { $parts = explode( '/', $req['url'] ); - list( - $targetWiki, // 'local' - $version, // 'v1' - $reqType // 'page' or 'transform' - ) = $parts; - if ( $targetWiki !== 'local' ) { - throw new Exception( "Only 'local' target wiki is currently supported" ); - } elseif ( $reqType !== 'page' && $reqType !== 'transform' ) { - throw new Exception( "Request type must be either 'page' or 'transform'" ); + if ( $parts[1] === 'v3' ) { + $result[$key] = $this->onParsoid3Request( $req, $idGeneratorFunc ); + } elseif ( $parts[1] === 'v1' ) { + $result[$key] = $this->onParsoid1Request( $req, $idGeneratorFunc ); + } else { + throw new Exception( "Only v1 and v3 are supported." ); } - $req['url'] = $this->params['url'] . '/' . $this->params['domain'] . '/v1/' . $reqType . '/'; - if ( $reqType === 'page' ) { - $title = $parts[3]; - if ( $parts[4] !== 'html' ) { - throw new Exception( "Only 'html' output format is currently supported" ); - } - $req['url'] .= 'html/' . $title; - if ( isset( $parts[5] ) ) { - $req['url'] .= '/' . $parts[5]; - } elseif ( isset( $req['query']['oldid'] ) && $req['query']['oldid'] ) { - $req['url'] .= '/' . $req['query']['oldid']; - unset( $req['query']['oldid'] ); - } - } elseif ( $reqType === 'transform' ) { - // from / to transform - $req['url'] .= $parts[3] . '/to/' . $parts[5]; - // the title - if ( isset( $parts[6] ) ) { - $req['url'] .= '/' . $parts[6]; - } - // revision id - if ( isset( $parts[7] ) ) { - $req['url'] .= '/' . $parts[7]; - } elseif ( isset( $req['body']['oldid'] ) && $req['body']['oldid'] ) { - $req['url'] .= '/' . $req['body']['oldid']; - unset( $req['body']['oldid'] ); - } - if ( $parts[4] !== 'to' ) { - throw new Exception( "Part index 4 is not 'to'" ); - } - if ( $parts[3] === 'html' & $parts[5] === 'wikitext' ) { - if ( !isset( $req['body']['html'] ) ) { - throw new Exception( "You must set an 'html' body key for this request" ); - } - } elseif ( $parts[3] == 'wikitext' && $parts[5] == 'html' ) { - if ( !isset( $req['body']['wikitext'] ) ) { - throw new Exception( "You must set a 'wikitext' body key for this request" ); - } - if ( isset( $req['body']['body'] ) ) { - $req['body']['bodyOnly'] = $req['body']['body']; - unset( $req['body']['body'] ); - } - } else { - throw new Exception( "Transformation unsupported" ); - } + } + + return $result; + + } + + /** + * Remap a Parsoid v1 request to a RESTBase v1 request. + * + * Example Parsoid v1 requests: + * GET /local/v1/page/$title/html/$oldid + * * $oldid is optional + * POST /local/v1/transform/html/to/wikitext/$title/$oldid + * * body: array( 'html' => ... ) + * * $title and $oldid are optional + * POST /local/v1/transform/wikitext/to/html/$title + * * body: array( 'wikitext' => ... ) or array( 'wikitext' => ..., 'body' => true/false ) + * * $title is optional + * + * NOTE: the POST APIs aren't "real" Parsoid v1 APIs, they are just what + * Visual Editor "pretends" the V1 API is like. (See + * ParsoidVirtualRESTService.) + */ + public function onParsoid1Request( array $req, Closure $idGeneratorFunc ) { + $parts = explode( '/', $req['url'] ); + list( + $targetWiki, // 'local' + $version, // 'v1' + $reqType // 'page' or 'transform' + ) = $parts; + if ( $targetWiki !== 'local' ) { + throw new Exception( "Only 'local' target wiki is currently supported" ); + } elseif ( $version !== 'v1' ) { + throw new Exception( "Version mismatch: should not happen." ); + } elseif ( $reqType !== 'page' && $reqType !== 'transform' ) { + throw new Exception( "Request type must be either 'page' or 'transform'" ); + } + $req['url'] = $this->params['url'] . $this->params['domain'] . '/v1/' . $reqType . '/'; + if ( $reqType === 'page' ) { + $title = $parts[3]; + if ( $parts[4] !== 'html' ) { + throw new Exception( "Only 'html' output format is currently supported" ); } - // set the appropriate proxy, timeout and headers - if ( $this->params['HTTPProxy'] ) { - $req['proxy'] = $this->params['HTTPProxy']; + $req['url'] .= 'html/' . $title; + if ( isset( $parts[5] ) ) { + $req['url'] .= '/' . $parts[5]; + } elseif ( isset( $req['query']['oldid'] ) && $req['query']['oldid'] ) { + $req['url'] .= '/' . $req['query']['oldid']; + unset( $req['query']['oldid'] ); } - if ( $this->params['timeout'] != null ) { - $req['reqTimeout'] = $this->params['timeout']; + } elseif ( $reqType === 'transform' ) { + // from / to transform + $req['url'] .= $parts[3] . '/to/' . $parts[5]; + // the title + if ( isset( $parts[6] ) ) { + $req['url'] .= '/' . $parts[6]; } - if ( $this->params['forwardCookies'] ) { - $req['headers']['Cookie'] = $this->params['forwardCookies']; + // revision id + if ( isset( $parts[7] ) ) { + $req['url'] .= '/' . $parts[7]; + } elseif ( isset( $req['body']['oldid'] ) && $req['body']['oldid'] ) { + $req['url'] .= '/' . $req['body']['oldid']; + unset( $req['body']['oldid'] ); + } + if ( $parts[4] !== 'to' ) { + throw new Exception( "Part index 4 is not 'to'" ); + } + if ( $parts[3] === 'html' && $parts[5] === 'wikitext' ) { + if ( !isset( $req['body']['html'] ) ) { + throw new Exception( "You must set an 'html' body key for this request" ); + } + } elseif ( $parts[3] == 'wikitext' && $parts[5] == 'html' ) { + if ( !isset( $req['body']['wikitext'] ) ) { + throw new Exception( "You must set a 'wikitext' body key for this request" ); + } + if ( isset( $req['body']['body'] ) ) { + $req['body']['bodyOnly'] = $req['body']['body']; + unset( $req['body']['body'] ); + } + } else { + throw new Exception( "Transformation unsupported" ); } - $result[$key] = $req; + } + // set the appropriate proxy, timeout and headers + if ( $this->params['HTTPProxy'] ) { + $req['proxy'] = $this->params['HTTPProxy']; + } + if ( $this->params['timeout'] != null ) { + $req['reqTimeout'] = $this->params['timeout']; + } + if ( $this->params['forwardCookies'] ) { + $req['headers']['Cookie'] = $this->params['forwardCookies']; } - return $result; + return $req; + + } + + /** + * Remap a Parsoid v3 request to a RESTBase v1 request. + * + * Example Parsoid v3 requests: + * GET /local/v3/page/html/$title/{$revision} + * * $revision is optional + * POST /local/v3/transform/html/to/wikitext/{$title}{/$revision} + * * body: array( 'html' => ... ) + * * $title and $revision are optional + * POST /local/v3/transform/wikitext/to/html/{$title}{/$revision} + * * body: array( 'wikitext' => ... ) or array( 'wikitext' => ..., 'bodyOnly' => true/false ) + * * $title is optional + * * $revision is optional + */ + public function onParsoid3Request( array $req, Closure $idGeneratorFunc ) { + + $parts = explode( '/', $req['url'] ); + list( + $targetWiki, // 'local' + $version, // 'v3' + $action, // 'transform' or 'page' + $format, // 'html' or 'wikitext' + // $title, // optional + // $revision, // optional + ) = $parts; + if ( $targetWiki !== 'local' ) { + throw new Exception( "Only 'local' target wiki is currently supported" ); + } elseif ( $version !== 'v3' ) { + throw new Exception( "Version mismatch: should not happen." ); + } + // replace /local/ with the current domain, change v3 to v1, + $req['url'] = preg_replace( '#^local/v3/#', $this->params['domain'] . '/v1/', $req['url'] ); + // and prefix it with the service URL + $req['url'] = $this->params['url'] . $req['url']; + // set the appropriate proxy, timeout and headers + if ( $this->params['HTTPProxy'] ) { + $req['proxy'] = $this->params['HTTPProxy']; + } + if ( $this->params['timeout'] != null ) { + $req['reqTimeout'] = $this->params['timeout']; + } + if ( $this->params['forwardCookies'] ) { + $req['headers']['Cookie'] = $this->params['forwardCookies']; + } + + return $req; } diff --git a/includes/libs/virtualrest/SwiftVirtualRESTService.php b/includes/libs/virtualrest/SwiftVirtualRESTService.php index 011dabe0..88b0e1f1 100644 --- a/includes/libs/virtualrest/SwiftVirtualRESTService.php +++ b/includes/libs/virtualrest/SwiftVirtualRESTService.php @@ -45,7 +45,11 @@ class SwiftVirtualRESTService extends VirtualRESTService { * - swiftAuthTTL : Swift authentication TTL (seconds) */ public function __construct( array $params ) { - parent::__construct( $params ); + // set up defaults and merge them with the given params + $mparams = array_merge( array( + 'name' => 'swift' + ), $params ); + parent::__construct( $mparams ); } /** diff --git a/includes/libs/virtualrest/VirtualRESTService.php b/includes/libs/virtualrest/VirtualRESTService.php index 05c2afc1..01a4ea6e 100644 --- a/includes/libs/virtualrest/VirtualRESTService.php +++ b/includes/libs/virtualrest/VirtualRESTService.php @@ -45,6 +45,17 @@ abstract class VirtualRESTService { } /** + * Return the name of this service, in a form suitable for error + * reporting or debugging. + * + * @return string The name of the service behind this VRS object. + */ + public function getName() { + return isset( $this->params['name'] ) ? $this->params['name'] : + get_class( $this ); + } + + /** * Prepare virtual HTTP(S) requests (for this service) for execution * * This method should mangle any of the $reqs entry fields as needed: @@ -84,8 +95,8 @@ abstract class VirtualRESTService { * * This method may mangle any of the $reqs entry 'response' fields as needed: * - code : perform any code normalization [as needed] - * - reason : perform any reason normalization [as needed] - * - headers : perform any header normalization [as needed] + * - reason : perform any reason normalization [as needed] + * - headers : perform any header normalization [as needed] * * This method can also remove some of the requests as well as add new ones * (using $idGenerator to set each of the entries' array keys). For any existing diff --git a/includes/libs/virtualrest/VirtualRESTServiceClient.php b/includes/libs/virtualrest/VirtualRESTServiceClient.php index e8bb38d8..519da431 100644 --- a/includes/libs/virtualrest/VirtualRESTServiceClient.php +++ b/includes/libs/virtualrest/VirtualRESTServiceClient.php @@ -127,9 +127,9 @@ class VirtualRESTServiceClient { * - body : HTTP response body or resource (if "stream" was set) * - error : Any cURL error string * The map also stores integer-indexed copies of these values. This lets callers do: - * <code> - * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $client->run( $req ); - * </code> + * @code + * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $client->run( $req ); + * @endcode * @param array $req Virtual HTTP request maps * @return array Response array for request */ @@ -148,9 +148,9 @@ class VirtualRESTServiceClient { * - body : HTTP response body or resource (if "stream" was set) * - error : Any cURL error string * The map also stores integer-indexed copies of these values. This lets callers do: - * <code> - * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $responses[0]; - * </code> + * @code + * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $responses[0]; + * @endcode * * @param array $reqs Map of Virtual HTTP request maps * @return array $reqs Map of corresponding response values with the same keys/order |