From 08aa4418c30cfc18ccc69a0f0f9cb9e17be6c196 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Mon, 12 Aug 2013 09:28:15 +0200 Subject: Update to MediaWiki 1.21.1 --- includes/externalstore/ExternalStore.php | 178 +++++++++++++++++++++++ includes/externalstore/ExternalStoreDB.php | 181 ++++++++++++++++++++++++ includes/externalstore/ExternalStoreHttp.php | 43 ++++++ includes/externalstore/ExternalStoreMedium.php | 60 ++++++++ includes/externalstore/ExternalStoreMwstore.php | 72 ++++++++++ 5 files changed, 534 insertions(+) create mode 100644 includes/externalstore/ExternalStore.php create mode 100644 includes/externalstore/ExternalStoreDB.php create mode 100644 includes/externalstore/ExternalStoreHttp.php create mode 100644 includes/externalstore/ExternalStoreMedium.php create mode 100644 includes/externalstore/ExternalStoreMwstore.php (limited to 'includes/externalstore') diff --git a/includes/externalstore/ExternalStore.php b/includes/externalstore/ExternalStore.php new file mode 100644 index 00000000..4ca193d4 --- /dev/null +++ b/includes/externalstore/ExternalStore.php @@ -0,0 +1,178 @@ +:///". The protocol is used + * to determine what ExternalStoreMedium class is used. The location identifies + * particular storage instances or database clusters for store class to use. + * + * When an object is inserted into a store, the calling code uses a partial URL of + * the form "://" and receives the full object URL on success. + * This is useful since object names can be sequential IDs, UUIDs, or hashes. + * Callers are not responsible for unique name generation. + * + * External repositories might be populated by maintenance/async + * scripts, thus partial moving of data may be possible, as well + * as the possibility to have any storage format (i.e. for archives). + * + * @ingroup ExternalStorage + */ +class ExternalStore { + /** + * Get an external store object of the given type, with the given parameters + * + * @param string $proto Type of external storage, should be a value in $wgExternalStores + * @param array $params Associative array of ExternalStoreMedium parameters + * @return ExternalStoreMedium|bool The store class or false on error + */ + public static function getStoreObject( $proto, array $params = array() ) { + global $wgExternalStores; + + if ( !$wgExternalStores || !in_array( $proto, $wgExternalStores ) ) { + return false; // protocol not enabled + } + + $class = 'ExternalStore' . ucfirst( $proto ); + // Any custom modules should be added to $wgAutoLoadClasses for on-demand loading + return MWInit::classExists( $class ) ? new $class( $params ) : false; + } + + /** + * Fetch data from given URL + * + * @param string $url The URL of the text to get + * @param array $params Associative array of ExternalStoreMedium parameters + * @return string|bool The text stored or false on error + * @throws MWException + */ + public static function fetchFromURL( $url, array $params = array() ) { + $parts = explode( '://', $url, 2 ); + if ( count( $parts ) != 2 ) { + return false; // invalid URL + } + + list( $proto, $path ) = $parts; + if ( $path == '' ) { // bad URL + return false; + } + + $store = self::getStoreObject( $proto, $params ); + if ( $store === false ) { + return false; + } + + return $store->fetchFromURL( $url ); + } + + /** + * Store a data item to an external store, identified by a partial URL + * The protocol part is used to identify the class, the rest is passed to the + * class itself as a parameter. + * + * @param string $url A partial external store URL ("://") + * @param $data string + * @param array $params Associative array of ExternalStoreMedium parameters + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + public static function insert( $url, $data, array $params = array() ) { + $parts = explode( '://', $url, 2 ); + if ( count( $parts ) != 2 ) { + return false; // invalid URL + } + + list( $proto, $path ) = $parts; + if ( $path == '' ) { // bad URL + return false; + } + + $store = self::getStoreObject( $proto, $params ); + if ( $store === false ) { + return false; + } else { + return $store->store( $path, $data ); + } + } + + /** + * Like insert() above, but does more of the work for us. + * This function does not need a url param, it builds it by + * itself. It also fails-over to the next possible clusters. + * + * @param $data string + * @param array $params Associative array of ExternalStoreMedium parameters + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + public static function insertToDefault( $data, array $params = array() ) { + global $wgDefaultExternalStore; + + $error = false; + $tryStores = (array)$wgDefaultExternalStore; + while ( count( $tryStores ) > 0 ) { + $index = mt_rand( 0, count( $tryStores ) - 1 ); + $storeUrl = $tryStores[$index]; + wfDebug( __METHOD__ . ": trying $storeUrl\n" ); + list( $proto, $path ) = explode( '://', $storeUrl, 2 ); + $store = self::getStoreObject( $proto, $params ); + if ( $store === false ) { + throw new MWException( "Invalid external storage protocol - $storeUrl" ); + } + try { + $url = $store->store( $path, $data ); // Try to save the object + } catch ( MWException $error ) { + $url = false; + } + if ( strlen( $url ) ) { + return $url; // Done! + } else { + unset( $tryStores[$index] ); // Don't try this one again! + $tryStores = array_values( $tryStores ); // Must have consecutive keys + wfDebugLog( 'ExternalStorage', + "Unable to store text to external storage $storeUrl" ); + } + } + // All stores failed + if ( $error ) { + throw $error; // rethrow the last error + } else { + throw new MWException( "Unable to store text to external storage" ); + } + } + + /** + * @param $data string + * @param $wiki string + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + public static function insertToForeignDefault( $data, $wiki ) { + return self::insertToDefault( $data, array( 'wiki' => $wiki ) ); + } +} diff --git a/includes/externalstore/ExternalStoreDB.php b/includes/externalstore/ExternalStoreDB.php new file mode 100644 index 00000000..196e7f2c --- /dev/null +++ b/includes/externalstore/ExternalStoreDB.php @@ -0,0 +1,181 @@ +fetchBlob( $cluster, $id, $itemID ); + + if ( $itemID !== false && $ret !== false ) { + return $ret->getItem( $itemID ); + } + return $ret; + } + + /** + * @see ExternalStoreMedium::store() + */ + public function store( $cluster, $data ) { + $dbw = $this->getMaster( $cluster ); + $id = $dbw->nextSequenceValue( 'blob_blob_id_seq' ); + $dbw->insert( $this->getTable( $dbw ), + array( 'blob_id' => $id, 'blob_text' => $data ), + __METHOD__ ); + $id = $dbw->insertId(); + if ( !$id ) { + throw new MWException( __METHOD__.': no insert ID' ); + } + if ( $dbw->getFlag( DBO_TRX ) ) { + $dbw->commit( __METHOD__ ); + } + return "DB://$cluster/$id"; + } + + /** + * Get a LoadBalancer for the specified cluster + * + * @param string $cluster cluster name + * @return LoadBalancer object + */ + function &getLoadBalancer( $cluster ) { + $wiki = isset( $this->params['wiki'] ) ? $this->params['wiki'] : false; + + return wfGetLBFactory()->getExternalLB( $cluster, $wiki ); + } + + /** + * Get a slave database connection for the specified cluster + * + * @param string $cluster cluster name + * @return DatabaseBase object + */ + function &getSlave( $cluster ) { + global $wgDefaultExternalStore; + + $wiki = isset( $this->params['wiki'] ) ? $this->params['wiki'] : false; + $lb =& $this->getLoadBalancer( $cluster ); + + if ( !in_array( "DB://" . $cluster, (array)$wgDefaultExternalStore ) ) { + wfDebug( "read only external store" ); + $lb->allowLagged( true ); + } else { + wfDebug( "writable external store" ); + } + + return $lb->getConnection( DB_SLAVE, array(), $wiki ); + } + + /** + * Get a master database connection for the specified cluster + * + * @param string $cluster cluster name + * @return DatabaseBase object + */ + function &getMaster( $cluster ) { + $wiki = isset( $this->params['wiki'] ) ? $this->params['wiki'] : false; + $lb =& $this->getLoadBalancer( $cluster ); + return $lb->getConnection( DB_MASTER, array(), $wiki ); + } + + /** + * Get the 'blobs' table name for this database + * + * @param $db DatabaseBase + * @return String: table name ('blobs' by default) + */ + function getTable( &$db ) { + $table = $db->getLBInfo( 'blobs table' ); + if ( is_null( $table ) ) { + $table = 'blobs'; + } + return $table; + } + + /** + * Fetch a blob item out of the database; a cache of the last-loaded + * blob will be kept so that multiple loads out of a multi-item blob + * can avoid redundant database access and decompression. + * @param $cluster + * @param $id + * @param $itemID + * @return mixed + * @private + */ + function &fetchBlob( $cluster, $id, $itemID ) { + /** + * One-step cache variable to hold base blobs; operations that + * pull multiple revisions may often pull multiple times from + * the same blob. By keeping the last-used one open, we avoid + * redundant unserialization and decompression overhead. + */ + static $externalBlobCache = array(); + + $cacheID = ( $itemID === false ) ? "$cluster/$id" : "$cluster/$id/"; + if( isset( $externalBlobCache[$cacheID] ) ) { + wfDebugLog( 'ExternalStoreDB-cache', "ExternalStoreDB::fetchBlob cache hit on $cacheID\n" ); + return $externalBlobCache[$cacheID]; + } + + wfDebugLog( 'ExternalStoreDB-cache', "ExternalStoreDB::fetchBlob cache miss on $cacheID\n" ); + + $dbr =& $this->getSlave( $cluster ); + $ret = $dbr->selectField( $this->getTable( $dbr ), 'blob_text', array( 'blob_id' => $id ), __METHOD__ ); + if ( $ret === false ) { + wfDebugLog( 'ExternalStoreDB', "ExternalStoreDB::fetchBlob master fallback on $cacheID\n" ); + // Try the master + $dbw =& $this->getMaster( $cluster ); + $ret = $dbw->selectField( $this->getTable( $dbw ), 'blob_text', array( 'blob_id' => $id ), __METHOD__ ); + if( $ret === false) { + wfDebugLog( 'ExternalStoreDB', "ExternalStoreDB::fetchBlob master failed to find $cacheID\n" ); + } + } + if( $itemID !== false && $ret !== false ) { + // Unserialise object; caller extracts item + $ret = unserialize( $ret ); + } + + $externalBlobCache = array( $cacheID => &$ret ); + return $ret; + } +} diff --git a/includes/externalstore/ExternalStoreHttp.php b/includes/externalstore/ExternalStoreHttp.php new file mode 100644 index 00000000..345c17be --- /dev/null +++ b/includes/externalstore/ExternalStoreHttp.php @@ -0,0 +1,43 @@ +params = $params; + } + + /** + * Fetch data from given external store URL + * + * @param string $url An external store URL + * @return string|bool The text stored or false on error + * @throws MWException + */ + abstract public function fetchFromURL( $url ); + + /** + * Insert a data item into a given location + * + * @param string $location the location name + * @param string $data the data item + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + abstract public function store( $location, $data ); +} diff --git a/includes/externalstore/ExternalStoreMwstore.php b/includes/externalstore/ExternalStoreMwstore.php new file mode 100644 index 00000000..0911cca1 --- /dev/null +++ b/includes/externalstore/ExternalStoreMwstore.php @@ -0,0 +1,72 @@ +backendFromPath( $url ); + if ( $be instanceof FileBackend ) { + // We don't need "latest" since objects are immutable and + // backends should at least have "read-after-create" consistency. + return $be->getFileContents( array( 'src' => $url ) ); + } + return false; + } + + /** + * @see ExternalStoreMedium::store() + */ + public function store( $backend, $data ) { + $be = FileBackendGroup::singleton()->get( $backend ); + if ( $be instanceof FileBackend ) { + // Get three random base 36 characters to act as shard directories + $rand = wfBaseConvert( mt_rand( 0, 46655 ), 10, 36, 3 ); + // Make sure ID is roughly lexicographically increasing for performance + $id = str_pad( UIDGenerator::newTimestampedUID128( 32 ), 26, '0', STR_PAD_LEFT ); + // Segregate items by wiki ID for the sake of bookkeeping + $wiki = isset( $this->params['wiki'] ) ? $this->params['wiki'] : wfWikiID(); + + $url = $be->getContainerStoragePath( 'data' ) . '/' . + rawurlencode( $wiki ) . "/{$rand[0]}/{$rand[1]}/{$rand[2]}/{$id}"; + + $be->prepare( array( 'dir' => dirname( $url ), 'noAccess' => 1, 'noListing' => 1 ) ); + if ( $be->create( array( 'dst' => $url, 'content' => $data ) )->isOK() ) { + return $url; + } + } + return false; + } +} -- cgit v1.2.3-54-g00ecf