summaryrefslogtreecommitdiff
path: root/includes/libs/MultiHttpClient.php
diff options
context:
space:
mode:
authorPierre Schmitz <pierre@archlinux.de>2014-12-27 15:41:37 +0100
committerPierre Schmitz <pierre@archlinux.de>2014-12-31 11:43:28 +0100
commitc1f9b1f7b1b77776192048005dcc66dcf3df2bfb (patch)
tree2b38796e738dd74cb42ecd9bfd151803108386bc /includes/libs/MultiHttpClient.php
parentb88ab0086858470dd1f644e64cb4e4f62bb2be9b (diff)
Update to MediaWiki 1.24.1
Diffstat (limited to 'includes/libs/MultiHttpClient.php')
-rw-r--r--includes/libs/MultiHttpClient.php389
1 files changed, 389 insertions, 0 deletions
diff --git a/includes/libs/MultiHttpClient.php b/includes/libs/MultiHttpClient.php
new file mode 100644
index 00000000..8c982c43
--- /dev/null
+++ b/includes/libs/MultiHttpClient.php
@@ -0,0 +1,389 @@
+<?php
+/**
+ * HTTP service client
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Class to handle concurrent HTTP requests
+ *
+ * HTTP request maps are arrays that use the following format:
+ * - method : GET/HEAD/PUT/POST/DELETE
+ * - url : HTTP/HTTPS URL
+ * - query : <query parameter field/value associative array> (uses RFC 3986)
+ * - headers : <header name/value associative array>
+ * - body : source to get the HTTP request body from;
+ * this can simply be a string (always), a resource for
+ * PUT requests, and a field/value array for POST request;
+ * array bodies are encoded as multipart/form-data and strings
+ * use application/x-www-form-urlencoded (headers sent automatically)
+ * - stream : resource to stream the HTTP response body to
+ * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'.
+ *
+ * @author Aaron Schulz
+ * @since 1.23
+ */
+class MultiHttpClient {
+ /** @var resource */
+ protected $multiHandle = null; // curl_multi handle
+ /** @var string|null SSL certificates path */
+ protected $caBundlePath;
+ /** @var integer */
+ protected $connTimeout = 10;
+ /** @var integer */
+ protected $reqTimeout = 300;
+ /** @var bool */
+ protected $usePipelining = false;
+ /** @var integer */
+ protected $maxConnsPerHost = 50;
+
+ /**
+ * @param array $options
+ * - connTimeout : default connection timeout
+ * - reqTimeout : default request timeout
+ * - usePipelining : whether to use HTTP pipelining if possible (for all hosts)
+ * - maxConnsPerHost : maximum number of concurrent connections (per host)
+ */
+ public function __construct( array $options ) {
+ if ( isset( $options['caBundlePath'] ) ) {
+ $this->caBundlePath = $options['caBundlePath'];
+ if ( !file_exists( $this->caBundlePath ) ) {
+ throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath );
+ }
+ }
+ static $opts = array( 'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost' );
+ foreach ( $opts as $key ) {
+ if ( isset( $options[$key] ) ) {
+ $this->$key = $options[$key];
+ }
+ }
+ }
+
+ /**
+ * Execute an HTTP(S) request
+ *
+ * This method returns a response map of:
+ * - code : HTTP response code or 0 if there was a serious cURL error
+ * - reason : HTTP response reason (empty if there was a serious cURL error)
+ * - headers : <header name/value associative array>
+ * - body : HTTP response body or resource (if "stream" was set)
+ * - err : Any cURL error string
+ * The map also stores integer-indexed copies of these values. This lets callers do:
+ * <code>
+ * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req );
+ * </code>
+ * @param array $req HTTP request array
+ * @param array $opts
+ * - connTimeout : connection timeout per request
+ * - reqTimeout : post-connection timeout per request
+ * @return array Response array for request
+ */
+ final public function run( array $req, array $opts = array() ) {
+ $req = $this->runMulti( array( $req ), $opts );
+ return $req[0]['response'];
+ }
+
+ /**
+ * Execute a set of HTTP(S) requests concurrently
+ *
+ * The maps are returned by this method with the 'response' field set to a map of:
+ * - code : HTTP response code or 0 if there was a serious cURL error
+ * - reason : HTTP response reason (empty if there was a serious cURL error)
+ * - headers : <header name/value associative array>
+ * - body : HTTP response body or resource (if "stream" was set)
+ * - err : Any cURL error string
+ * The map also stores integer-indexed copies of these values. This lets callers do:
+ * <code>
+ * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $req['response'];
+ * </code>
+ * All headers in the 'headers' field are normalized to use lower case names.
+ * This is true for the request headers and the response headers. Integer-indexed
+ * method/URL entries will also be changed to use the corresponding string keys.
+ *
+ * @param array $reqs Map of HTTP request arrays
+ * @param array $opts
+ * - connTimeout : connection timeout per request
+ * - reqTimeout : post-connection timeout per request
+ * - usePipelining : whether to use HTTP pipelining if possible
+ * - maxConnsPerHost : maximum number of concurrent connections (per host)
+ * @return array $reqs With response array populated for each
+ */
+ public function runMulti( array $reqs, array $opts = array() ) {
+ $chm = $this->getCurlMulti();
+
+ // Normalize $reqs and add all of the required cURL handles...
+ $handles = array();
+ foreach ( $reqs as $index => &$req ) {
+ $req['response'] = array(
+ 'code' => 0,
+ 'reason' => '',
+ 'headers' => array(),
+ 'body' => '',
+ 'error' => ''
+ );
+ if ( isset( $req[0] ) ) {
+ $req['method'] = $req[0]; // short-form
+ unset( $req[0] );
+ }
+ if ( isset( $req[1] ) ) {
+ $req['url'] = $req[1]; // short-form
+ unset( $req[1] );
+ }
+ if ( !isset( $req['method'] ) ) {
+ throw new Exception( "Request has no 'method' field set." );
+ } elseif ( !isset( $req['url'] ) ) {
+ throw new Exception( "Request has no 'url' field set." );
+ }
+ $req['query'] = isset( $req['query'] ) ? $req['query'] : array();
+ $headers = array(); // normalized headers
+ if ( isset( $req['headers'] ) ) {
+ foreach ( $req['headers'] as $name => $value ) {
+ $headers[strtolower( $name )] = $value;
+ }
+ }
+ $req['headers'] = $headers;
+ if ( !isset( $req['body'] ) ) {
+ $req['body'] = '';
+ $req['headers']['content-length'] = 0;
+ }
+ $handles[$index] = $this->getCurlHandle( $req, $opts );
+ if ( count( $reqs ) > 1 ) {
+ // https://github.com/guzzle/guzzle/issues/349
+ curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true );
+ }
+ }
+ unset( $req ); // don't assign over this by accident
+
+ $indexes = array_keys( $reqs );
+ if ( function_exists( 'curl_multi_setopt' ) ) { // PHP 5.5
+ if ( isset( $opts['usePipelining'] ) ) {
+ curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] );
+ }
+ if ( isset( $opts['maxConnsPerHost'] ) ) {
+ // Keep these sockets around as they may be needed later in the request
+ curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] );
+ }
+ }
+
+ // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS)
+ $batches = array_chunk( $indexes, $this->maxConnsPerHost );
+
+ foreach ( $batches as $batch ) {
+ // Attach all cURL handles for this batch
+ foreach ( $batch as $index ) {
+ curl_multi_add_handle( $chm, $handles[$index] );
+ }
+ // Execute the cURL handles concurrently...
+ $active = null; // handles still being processed
+ do {
+ // Do any available work...
+ do {
+ $mrc = curl_multi_exec( $chm, $active );
+ } while ( $mrc == CURLM_CALL_MULTI_PERFORM );
+ // Wait (if possible) for available work...
+ if ( $active > 0 && $mrc == CURLM_OK ) {
+ if ( curl_multi_select( $chm, 10 ) == -1 ) {
+ // PHP bug 63411; http://curl.haxx.se/libcurl/c/curl_multi_fdset.html
+ usleep( 5000 ); // 5ms
+ }
+ }
+ } while ( $active > 0 && $mrc == CURLM_OK );
+ }
+
+ // Remove all of the added cURL handles and check for errors...
+ foreach ( $reqs as $index => &$req ) {
+ $ch = $handles[$index];
+ curl_multi_remove_handle( $chm, $ch );
+ if ( curl_errno( $ch ) !== 0 ) {
+ $req['response']['error'] = "(curl error: " .
+ curl_errno( $ch ) . ") " . curl_error( $ch );
+ }
+ // For convenience with the list() operator
+ $req['response'][0] = $req['response']['code'];
+ $req['response'][1] = $req['response']['reason'];
+ $req['response'][2] = $req['response']['headers'];
+ $req['response'][3] = $req['response']['body'];
+ $req['response'][4] = $req['response']['error'];
+ curl_close( $ch );
+ // Close any string wrapper file handles
+ if ( isset( $req['_closeHandle'] ) ) {
+ fclose( $req['_closeHandle'] );
+ unset( $req['_closeHandle'] );
+ }
+ }
+ unset( $req ); // don't assign over this by accident
+
+ // Restore the default settings
+ if ( function_exists( 'curl_multi_setopt' ) ) { // PHP 5.5
+ curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining );
+ curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
+ }
+
+ return $reqs;
+ }
+
+ /**
+ * @param array $req HTTP request map
+ * @param array $opts
+ * - connTimeout : default connection timeout
+ * - reqTimeout : default request timeout
+ * @return resource
+ */
+ protected function getCurlHandle( array &$req, array $opts = array() ) {
+ $ch = curl_init();
+
+ curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT,
+ isset( $opts['connTimeout'] ) ? $opts['connTimeout'] : $this->connTimeout );
+ curl_setopt( $ch, CURLOPT_TIMEOUT,
+ isset( $opts['reqTimeout'] ) ? $opts['reqTimeout'] : $this->reqTimeout );
+ curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
+ curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 );
+ curl_setopt( $ch, CURLOPT_HEADER, 0 );
+ if ( !is_null( $this->caBundlePath ) ) {
+ curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true );
+ curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath );
+ }
+ curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
+
+ $url = $req['url'];
+ // PHP_QUERY_RFC3986 is PHP 5.4+ only
+ $query = str_replace(
+ array( '+', '%7E' ),
+ array( '%20', '~' ),
+ http_build_query( $req['query'], '', '&' )
+ );
+ if ( $query != '' ) {
+ $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
+ }
+ curl_setopt( $ch, CURLOPT_URL, $url );
+
+ curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] );
+ if ( $req['method'] === 'HEAD' ) {
+ curl_setopt( $ch, CURLOPT_NOBODY, 1 );
+ }
+
+ if ( $req['method'] === 'PUT' ) {
+ curl_setopt( $ch, CURLOPT_PUT, 1 );
+ if ( is_resource( $req['body'] ) ) {
+ curl_setopt( $ch, CURLOPT_INFILE, $req['body'] );
+ if ( isset( $req['headers']['content-length'] ) ) {
+ curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] );
+ } elseif ( isset( $req['headers']['transfer-encoding'] ) &&
+ $req['headers']['transfer-encoding'] === 'chunks'
+ ) {
+ curl_setopt( $ch, CURLOPT_UPLOAD, true );
+ } else {
+ throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." );
+ }
+ } elseif ( $req['body'] !== '' ) {
+ $fp = fopen( "php://temp", "wb+" );
+ fwrite( $fp, $req['body'], strlen( $req['body'] ) );
+ rewind( $fp );
+ curl_setopt( $ch, CURLOPT_INFILE, $fp );
+ curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) );
+ $req['_closeHandle'] = $fp; // remember to close this later
+ } else {
+ curl_setopt( $ch, CURLOPT_INFILESIZE, 0 );
+ }
+ curl_setopt( $ch, CURLOPT_READFUNCTION,
+ function ( $ch, $fd, $length ) {
+ $data = fread( $fd, $length );
+ $len = strlen( $data );
+ return $data;
+ }
+ );
+ } elseif ( $req['method'] === 'POST' ) {
+ curl_setopt( $ch, CURLOPT_POST, 1 );
+ curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] );
+ } else {
+ if ( is_resource( $req['body'] ) || $req['body'] !== '' ) {
+ throw new Exception( "HTTP body specified for a non PUT/POST request." );
+ }
+ $req['headers']['content-length'] = 0;
+ }
+
+ $headers = array();
+ foreach ( $req['headers'] as $name => $value ) {
+ if ( strpos( $name, ': ' ) ) {
+ throw new Exception( "Headers cannot have ':' in the name." );
+ }
+ $headers[] = $name . ': ' . trim( $value );
+ }
+ curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers );
+
+ curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
+ function ( $ch, $header ) use ( &$req ) {
+ $length = strlen( $header );
+ $matches = array();
+ if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
+ $req['response']['code'] = (int)$matches[2];
+ $req['response']['reason'] = trim( $matches[3] );
+ return $length;
+ }
+ if ( strpos( $header, ":" ) === false ) {
+ return $length;
+ }
+ list( $name, $value ) = explode( ":", $header, 2 );
+ $req['response']['headers'][strtolower( $name )] = trim( $value );
+ return $length;
+ }
+ );
+
+ if ( isset( $req['stream'] ) ) {
+ // Don't just use CURLOPT_FILE as that might give:
+ // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE*
+ // The callback here handles both normal files and php://temp handles.
+ curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
+ function ( $ch, $data ) use ( &$req ) {
+ return fwrite( $req['stream'], $data );
+ }
+ );
+ } else {
+ curl_setopt( $ch, CURLOPT_WRITEFUNCTION,
+ function ( $ch, $data ) use ( &$req ) {
+ $req['response']['body'] .= $data;
+ return strlen( $data );
+ }
+ );
+ }
+
+ return $ch;
+ }
+
+ /**
+ * @return resource
+ */
+ protected function getCurlMulti() {
+ if ( !$this->multiHandle ) {
+ $cmh = curl_multi_init();
+ if ( function_exists( 'curl_multi_setopt' ) ) { // PHP 5.5
+ curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining );
+ curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost );
+ }
+ $this->multiHandle = $cmh;
+ }
+ return $this->multiHandle;
+ }
+
+ function __destruct() {
+ if ( $this->multiHandle ) {
+ curl_multi_close( $this->multiHandle );
+ }
+ }
+}