diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2013-01-18 16:46:04 +0100 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2013-01-18 16:46:04 +0100 |
commit | 63601400e476c6cf43d985f3e7b9864681695ed4 (patch) | |
tree | f7846203a952e38aaf66989d0a4702779f549962 /includes/filebackend | |
parent | 8ff01378c9e0207f9169b81966a51def645b6a51 (diff) |
Update to MediaWiki 1.20.2
this update includes:
* adjusted Arch Linux skin
* updated FluxBBAuthPlugin
* patch for https://bugzilla.wikimedia.org/show_bug.cgi?id=44024
Diffstat (limited to 'includes/filebackend')
18 files changed, 9804 insertions, 0 deletions
diff --git a/includes/filebackend/FSFile.php b/includes/filebackend/FSFile.php new file mode 100644 index 00000000..e07c99d4 --- /dev/null +++ b/includes/filebackend/FSFile.php @@ -0,0 +1,252 @@ +<?php +/** + * Non-directory file on the file system. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + */ + +/** + * Class representing a non-directory file on the file system + * + * @ingroup FileBackend + */ +class FSFile { + protected $path; // path to file + + /** + * Sets up the file object + * + * @param $path string Path to temporary file on local disk + * @throws MWException + */ + public function __construct( $path ) { + if ( FileBackend::isStoragePath( $path ) ) { + throw new MWException( __METHOD__ . " given storage path `$path`." ); + } + $this->path = $path; + } + + /** + * Returns the file system path + * + * @return String + */ + public function getPath() { + return $this->path; + } + + /** + * Checks if the file exists + * + * @return bool + */ + public function exists() { + return is_file( $this->path ); + } + + /** + * Get the file size in bytes + * + * @return int|bool + */ + public function getSize() { + return filesize( $this->path ); + } + + /** + * Get the file's last-modified timestamp + * + * @return string|bool TS_MW timestamp or false on failure + */ + public function getTimestamp() { + wfSuppressWarnings(); + $timestamp = filemtime( $this->path ); + wfRestoreWarnings(); + if ( $timestamp !== false ) { + $timestamp = wfTimestamp( TS_MW, $timestamp ); + } + return $timestamp; + } + + /** + * Guess the MIME type from the file contents alone + * + * @return string + */ + public function getMimeType() { + return MimeMagic::singleton()->guessMimeType( $this->path, false ); + } + + /** + * Get an associative array containing information about + * a file with the given storage path. + * + * @param $ext Mixed: the file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * + * @return array + */ + public function getProps( $ext = true ) { + wfProfileIn( __METHOD__ ); + wfDebug( __METHOD__.": Getting file info for $this->path\n" ); + + $info = self::placeholderProps(); + $info['fileExists'] = $this->exists(); + + if ( $info['fileExists'] ) { + $magic = MimeMagic::singleton(); + + # get the file extension + if ( $ext === true ) { + $ext = self::extensionFromPath( $this->path ); + } + + # mime type according to file contents + $info['file-mime'] = $this->getMimeType(); + # logical mime type + $info['mime'] = $magic->improveTypeFromExtension( $info['file-mime'], $ext ); + + list( $info['major_mime'], $info['minor_mime'] ) = File::splitMime( $info['mime'] ); + $info['media_type'] = $magic->getMediaType( $this->path, $info['mime'] ); + + # Get size in bytes + $info['size'] = $this->getSize(); + + # Height, width and metadata + $handler = MediaHandler::getHandler( $info['mime'] ); + if ( $handler ) { + $tempImage = (object)array(); + $info['metadata'] = $handler->getMetadata( $tempImage, $this->path ); + $gis = $handler->getImageSize( $tempImage, $this->path, $info['metadata'] ); + if ( is_array( $gis ) ) { + $info = $this->extractImageSizeInfo( $gis ) + $info; + } + } + $info['sha1'] = $this->getSha1Base36(); + + wfDebug(__METHOD__.": $this->path loaded, {$info['size']} bytes, {$info['mime']}.\n"); + } else { + wfDebug(__METHOD__.": $this->path NOT FOUND!\n"); + } + + wfProfileOut( __METHOD__ ); + return $info; + } + + /** + * Placeholder file properties to use for files that don't exist + * + * @return Array + */ + public static function placeholderProps() { + $info = array(); + $info['fileExists'] = false; + $info['mime'] = null; + $info['media_type'] = MEDIATYPE_UNKNOWN; + $info['metadata'] = ''; + $info['sha1'] = ''; + $info['width'] = 0; + $info['height'] = 0; + $info['bits'] = 0; + return $info; + } + + /** + * Exract image size information + * + * @param $gis array + * @return Array + */ + protected function extractImageSizeInfo( array $gis ) { + $info = array(); + # NOTE: $gis[2] contains a code for the image type. This is no longer used. + $info['width'] = $gis[0]; + $info['height'] = $gis[1]; + if ( isset( $gis['bits'] ) ) { + $info['bits'] = $gis['bits']; + } else { + $info['bits'] = 0; + } + return $info; + } + + /** + * Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case + * encoding, zero padded to 31 digits. + * + * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 + * fairly neatly. + * + * @return bool|string False on failure + */ + public function getSha1Base36() { + wfProfileIn( __METHOD__ ); + + wfSuppressWarnings(); + $hash = sha1_file( $this->path ); + wfRestoreWarnings(); + if ( $hash !== false ) { + $hash = wfBaseConvert( $hash, 16, 36, 31 ); + } + + wfProfileOut( __METHOD__ ); + return $hash; + } + + /** + * Get the final file extension from a file system path + * + * @param $path string + * @return string + */ + public static function extensionFromPath( $path ) { + $i = strrpos( $path, '.' ); + return strtolower( $i ? substr( $path, $i + 1 ) : '' ); + } + + /** + * Get an associative array containing information about a file in the local filesystem. + * + * @param $path String: absolute local filesystem path + * @param $ext Mixed: the file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * + * @return array + */ + public static function getPropsFromPath( $path, $ext = true ) { + $fsFile = new self( $path ); + return $fsFile->getProps( $ext ); + } + + /** + * Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case + * encoding, zero padded to 31 digits. + * + * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 + * fairly neatly. + * + * @param $path string + * + * @return bool|string False on failure + */ + public static function getSha1Base36FromPath( $path ) { + $fsFile = new self( $path ); + return $fsFile->getSha1Base36(); + } +} diff --git a/includes/filebackend/FSFileBackend.php b/includes/filebackend/FSFileBackend.php new file mode 100644 index 00000000..93495340 --- /dev/null +++ b/includes/filebackend/FSFileBackend.php @@ -0,0 +1,986 @@ +<?php +/** + * File system based backend. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * @brief Class for a file system (FS) based file backend. + * + * All "containers" each map to a directory under the backend's base directory. + * For backwards-compatibility, some container paths can be set to custom paths. + * The wiki ID will not be used in any custom paths, so this should be avoided. + * + * Having directories with thousands of files will diminish performance. + * Sharding can be accomplished by using FileRepo-style hash paths. + * + * Status messages should avoid mentioning the internal FS paths. + * PHP warnings are assumed to be logged rather than output. + * + * @ingroup FileBackend + * @since 1.19 + */ +class FSFileBackend extends FileBackendStore { + protected $basePath; // string; directory holding the container directories + /** @var Array Map of container names to root paths */ + protected $containerPaths = array(); // for custom container paths + protected $fileMode; // integer; file permission mode + protected $fileOwner; // string; required OS username to own files + protected $currentUser; // string; OS username running this script + + protected $hadWarningErrors = array(); + + /** + * @see FileBackendStore::__construct() + * Additional $config params include: + * - basePath : File system directory that holds containers. + * - containerPaths : Map of container names to custom file system directories. + * This should only be used for backwards-compatibility. + * - fileMode : Octal UNIX file permissions to use on files stored. + */ + public function __construct( array $config ) { + parent::__construct( $config ); + + // Remove any possible trailing slash from directories + if ( isset( $config['basePath'] ) ) { + $this->basePath = rtrim( $config['basePath'], '/' ); // remove trailing slash + } else { + $this->basePath = null; // none; containers must have explicit paths + } + + if ( isset( $config['containerPaths'] ) ) { + $this->containerPaths = (array)$config['containerPaths']; + foreach ( $this->containerPaths as &$path ) { + $path = rtrim( $path, '/' ); // remove trailing slash + } + } + + $this->fileMode = isset( $config['fileMode'] ) ? $config['fileMode'] : 0644; + if ( isset( $config['fileOwner'] ) && function_exists( 'posix_getuid' ) ) { + $this->fileOwner = $config['fileOwner']; + $info = posix_getpwuid( posix_getuid() ); + $this->currentUser = $info['name']; // cache this, assuming it doesn't change + } + } + + /** + * @see FileBackendStore::resolveContainerPath() + * @param $container string + * @param $relStoragePath string + * @return null|string + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + // Check that container has a root directory + if ( isset( $this->containerPaths[$container] ) || isset( $this->basePath ) ) { + // Check for sane relative paths (assume the base paths are OK) + if ( $this->isLegalRelPath( $relStoragePath ) ) { + return $relStoragePath; + } + } + return null; + } + + /** + * Sanity check a relative file system path for validity + * + * @param $path string Normalized relative path + * @return bool + */ + protected function isLegalRelPath( $path ) { + // Check for file names longer than 255 chars + if ( preg_match( '![^/]{256}!', $path ) ) { // ext3/NTFS + return false; + } + if ( wfIsWindows() ) { // NTFS + return !preg_match( '![:*?"<>|]!', $path ); + } else { + return true; + } + } + + /** + * Given the short (unresolved) and full (resolved) name of + * a container, return the file system path of the container. + * + * @param $shortCont string + * @param $fullCont string + * @return string|null + */ + protected function containerFSRoot( $shortCont, $fullCont ) { + if ( isset( $this->containerPaths[$shortCont] ) ) { + return $this->containerPaths[$shortCont]; + } elseif ( isset( $this->basePath ) ) { + return "{$this->basePath}/{$fullCont}"; + } + return null; // no container base path defined + } + + /** + * Get the absolute file system path for a storage path + * + * @param $storagePath string Storage path + * @return string|null + */ + protected function resolveToFSPath( $storagePath ) { + list( $fullCont, $relPath ) = $this->resolveStoragePathReal( $storagePath ); + if ( $relPath === null ) { + return null; // invalid + } + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $storagePath ); + $fsPath = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + if ( $relPath != '' ) { + $fsPath .= "/{$relPath}"; + } + return $fsPath; + } + + /** + * @see FileBackendStore::isPathUsableInternal() + * @return bool + */ + public function isPathUsableInternal( $storagePath ) { + $fsPath = $this->resolveToFSPath( $storagePath ); + if ( $fsPath === null ) { + return false; // invalid + } + $parentDir = dirname( $fsPath ); + + if ( file_exists( $fsPath ) ) { + $ok = is_file( $fsPath ) && is_writable( $fsPath ); + } else { + $ok = is_dir( $parentDir ) && is_writable( $parentDir ); + } + + if ( $this->fileOwner !== null && $this->currentUser !== $this->fileOwner ) { + $ok = false; + trigger_error( __METHOD__ . ": PHP process owner is not '{$this->fileOwner}'." ); + } + + return $ok; + } + + /** + * @see FileBackendStore::doStoreInternal() + * @return Status + */ + protected function doStoreInternal( array $params ) { + $status = Status::newGood(); + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + if ( !empty( $params['async'] ) ) { // deferred + $cmd = implode( ' ', array( wfIsWindows() ? 'COPY' : 'cp', + wfEscapeShellArg( $this->cleanPathSlashes( $params['src'] ) ), + wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) + ) ); + $status->value = new FSFileOpHandle( $this, $params, 'Store', $cmd, $dest ); + } else { // immediate write + $ok = copy( $params['src'], $dest ); + // In some cases (at least over NFS), copy() returns true when it fails + if ( !$ok || ( filesize( $params['src'] ) !== filesize( $dest ) ) ) { + if ( $ok ) { // PHP bug + unlink( $dest ); // remove broken file + trigger_error( __METHOD__ . ": copy() failed but returned true." ); + } + $status->fatal( 'backend-fail-store', $params['src'], $params['dst'] ); + return $status; + } + $this->chmod( $dest ); + } + + return $status; + } + + /** + * @see FSFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseStore( $errors, Status $status, array $params, $cmd ) { + if ( $errors !== '' && !( wfIsWindows() && $errors[0] === " " ) ) { + $status->fatal( 'backend-fail-store', $params['src'], $params['dst'] ); + trigger_error( "$cmd\n$errors", E_USER_WARNING ); // command output + } + } + + /** + * @see FileBackendStore::doCopyInternal() + * @return Status + */ + protected function doCopyInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + if ( !empty( $params['async'] ) ) { // deferred + $cmd = implode( ' ', array( wfIsWindows() ? 'COPY' : 'cp', + wfEscapeShellArg( $this->cleanPathSlashes( $source ) ), + wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) + ) ); + $status->value = new FSFileOpHandle( $this, $params, 'Copy', $cmd, $dest ); + } else { // immediate write + $ok = copy( $source, $dest ); + // In some cases (at least over NFS), copy() returns true when it fails + if ( !$ok || ( filesize( $source ) !== filesize( $dest ) ) ) { + if ( $ok ) { // PHP bug + unlink( $dest ); // remove broken file + trigger_error( __METHOD__ . ": copy() failed but returned true." ); + } + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } + $this->chmod( $dest ); + } + + return $status; + } + + /** + * @see FSFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseCopy( $errors, Status $status, array $params, $cmd ) { + if ( $errors !== '' && !( wfIsWindows() && $errors[0] === " " ) ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + trigger_error( "$cmd\n$errors", E_USER_WARNING ); // command output + } + } + + /** + * @see FileBackendStore::doMoveInternal() + * @return Status + */ + protected function doMoveInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + // Windows does not support moving over existing files + if ( wfIsWindows() ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + if ( !empty( $params['async'] ) ) { // deferred + $cmd = implode( ' ', array( wfIsWindows() ? 'MOVE' : 'mv', + wfEscapeShellArg( $this->cleanPathSlashes( $source ) ), + wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) + ) ); + $status->value = new FSFileOpHandle( $this, $params, 'Move', $cmd ); + } else { // immediate write + $ok = rename( $source, $dest ); + clearstatcache(); // file no longer at source + if ( !$ok ) { + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + return $status; + } + } + + return $status; + } + + /** + * @see FSFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseMove( $errors, Status $status, array $params, $cmd ) { + if ( $errors !== '' && !( wfIsWindows() && $errors[0] === " " ) ) { + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + trigger_error( "$cmd\n$errors", E_USER_WARNING ); // command output + } + } + + /** + * @see FileBackendStore::doDeleteInternal() + * @return Status + */ + protected function doDeleteInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + if ( !is_file( $source ) ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } + return $status; // do nothing; either OK or bad status + } + + if ( !empty( $params['async'] ) ) { // deferred + $cmd = implode( ' ', array( wfIsWindows() ? 'DEL' : 'unlink', + wfEscapeShellArg( $this->cleanPathSlashes( $source ) ) + ) ); + $status->value = new FSFileOpHandle( $this, $params, 'Copy', $cmd ); + } else { // immediate write + $ok = unlink( $source ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + return $status; + } + } + + return $status; + } + + /** + * @see FSFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseDelete( $errors, Status $status, array $params, $cmd ) { + if ( $errors !== '' && !( wfIsWindows() && $errors[0] === " " ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + trigger_error( "$cmd\n$errors", E_USER_WARNING ); // command output + } + } + + /** + * @see FileBackendStore::doCreateInternal() + * @return Status + */ + protected function doCreateInternal( array $params ) { + $status = Status::newGood(); + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + if ( !empty( $params['async'] ) ) { // deferred + $tempFile = TempFSFile::factory( 'create_', 'tmp' ); + if ( !$tempFile ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } + $bytes = file_put_contents( $tempFile->getPath(), $params['content'] ); + if ( $bytes === false ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } + $cmd = implode( ' ', array( wfIsWindows() ? 'COPY' : 'cp', + wfEscapeShellArg( $this->cleanPathSlashes( $tempFile->getPath() ) ), + wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) + ) ); + $status->value = new FSFileOpHandle( $this, $params, 'Create', $cmd, $dest ); + $tempFile->bind( $status->value ); + } else { // immediate write + $bytes = file_put_contents( $dest, $params['content'] ); + if ( $bytes === false ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } + $this->chmod( $dest ); + } + + return $status; + } + + /** + * @see FSFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseCreate( $errors, Status $status, array $params, $cmd ) { + if ( $errors !== '' && !( wfIsWindows() && $errors[0] === " " ) ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + trigger_error( "$cmd\n$errors", E_USER_WARNING ); // command output + } + } + + /** + * @see FileBackendStore::doPrepareInternal() + * @return Status + */ + protected function doPrepareInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + $existed = is_dir( $dir ); // already there? + if ( !wfMkdirParents( $dir ) ) { // make directory and its parents + $status->fatal( 'directorycreateerror', $params['dir'] ); // fails on races + } elseif ( !is_writable( $dir ) ) { + $status->fatal( 'directoryreadonlyerror', $params['dir'] ); + } elseif ( !is_readable( $dir ) ) { + $status->fatal( 'directorynotreadableerror', $params['dir'] ); + } + if ( is_dir( $dir ) && !$existed ) { + // Respect any 'noAccess' or 'noListing' flags... + $status->merge( $this->doSecureInternal( $fullCont, $dirRel, $params ) ); + } + return $status; + } + + /** + * @see FileBackendStore::doSecureInternal() + * @return Status + */ + protected function doSecureInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + // Seed new directories with a blank index.html, to prevent crawling... + if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) { + $bytes = file_put_contents( "{$dir}/index.html", $this->indexHtmlPrivate() ); + if ( $bytes === false ) { + $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' ); + return $status; + } + } + // Add a .htaccess file to the root of the container... + if ( !empty( $params['noAccess'] ) && !file_exists( "{$contRoot}/.htaccess" ) ) { + $bytes = file_put_contents( "{$contRoot}/.htaccess", $this->htaccessPrivate() ); + if ( $bytes === false ) { + $storeDir = "mwstore://{$this->name}/{$shortCont}"; + $status->fatal( 'backend-fail-create', "{$storeDir}/.htaccess" ); + return $status; + } + } + return $status; + } + + /** + * @see FileBackendStore::doPublishInternal() + * @return Status + */ + protected function doPublishInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + // Unseed new directories with a blank index.html, to allow crawling... + if ( !empty( $params['listing'] ) && is_file( "{$dir}/index.html" ) ) { + $exists = ( file_get_contents( "{$dir}/index.html" ) === $this->indexHtmlPrivate() ); + if ( $exists && !unlink( "{$dir}/index.html" ) ) { // reverse secure() + $status->fatal( 'backend-fail-delete', $params['dir'] . '/index.html' ); + return $status; + } + } + // Remove the .htaccess file from the root of the container... + if ( !empty( $params['access'] ) && is_file( "{$contRoot}/.htaccess" ) ) { + $exists = ( file_get_contents( "{$contRoot}/.htaccess" ) === $this->htaccessPrivate() ); + if ( $exists && !unlink( "{$contRoot}/.htaccess" ) ) { // reverse secure() + $storeDir = "mwstore://{$this->name}/{$shortCont}"; + $status->fatal( 'backend-fail-delete', "{$storeDir}/.htaccess" ); + return $status; + } + } + return $status; + } + + /** + * @see FileBackendStore::doCleanInternal() + * @return Status + */ + protected function doCleanInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + wfSuppressWarnings(); + if ( is_dir( $dir ) ) { + rmdir( $dir ); // remove directory if empty + } + wfRestoreWarnings(); + return $status; + } + + /** + * @see FileBackendStore::doFileExists() + * @return array|bool|null + */ + protected function doGetFileStat( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return false; // invalid storage path + } + + $this->trapWarnings(); // don't trust 'false' if there were errors + $stat = is_file( $source ) ? stat( $source ) : false; // regular files only + $hadError = $this->untrapWarnings(); + + if ( $stat ) { + return array( + 'mtime' => wfTimestamp( TS_MW, $stat['mtime'] ), + 'size' => $stat['size'] + ); + } elseif ( !$hadError ) { + return false; // file does not exist + } else { + return null; // failure + } + } + + /** + * @see FileBackendStore::doClearCache() + */ + protected function doClearCache( array $paths = null ) { + clearstatcache(); // clear the PHP file stat cache + } + + /** + * @see FileBackendStore::doDirectoryExists() + * @return bool|null + */ + protected function doDirectoryExists( $fullCont, $dirRel, array $params ) { + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + + $this->trapWarnings(); // don't trust 'false' if there were errors + $exists = is_dir( $dir ); + $hadError = $this->untrapWarnings(); + + return $hadError ? null : $exists; + } + + /** + * @see FileBackendStore::getDirectoryListInternal() + * @return Array|null + */ + public function getDirectoryListInternal( $fullCont, $dirRel, array $params ) { + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + $exists = is_dir( $dir ); + if ( !$exists ) { + wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" ); + return array(); // nothing under this dir + } elseif ( !is_readable( $dir ) ) { + wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" ); + return null; // bad permissions? + } + return new FSFileBackendDirList( $dir, $params ); + } + + /** + * @see FileBackendStore::getFileListInternal() + * @return array|FSFileBackendFileList|null + */ + public function getFileListInternal( $fullCont, $dirRel, array $params ) { + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + $exists = is_dir( $dir ); + if ( !$exists ) { + wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" ); + return array(); // nothing under this dir + } elseif ( !is_readable( $dir ) ) { + wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" ); + return null; // bad permissions? + } + return new FSFileBackendFileList( $dir, $params ); + } + + /** + * @see FileBackendStore::getLocalReference() + * @return FSFile|null + */ + public function getLocalReference( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return null; + } + return new FSFile( $source ); + } + + /** + * @see FileBackendStore::getLocalCopy() + * @return null|TempFSFile + */ + public function getLocalCopy( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return null; + } + + // Create a new temporary file with the same extension... + $ext = FileBackend::extensionFromPath( $params['src'] ); + $tmpFile = TempFSFile::factory( 'localcopy_', $ext ); + if ( !$tmpFile ) { + return null; + } + $tmpPath = $tmpFile->getPath(); + + // Copy the source file over the temp file + $ok = copy( $source, $tmpPath ); + if ( !$ok ) { + return null; + } + + $this->chmod( $tmpPath ); + + return $tmpFile; + } + + /** + * @see FileBackendStore::directoriesAreVirtual() + * @return bool + */ + protected function directoriesAreVirtual() { + return false; + } + + /** + * @see FileBackendStore::doExecuteOpHandlesInternal() + * @return Array List of corresponding Status objects + */ + protected function doExecuteOpHandlesInternal( array $fileOpHandles ) { + $statuses = array(); + + $pipes = array(); + foreach ( $fileOpHandles as $index => $fileOpHandle ) { + $pipes[$index] = popen( "{$fileOpHandle->cmd} 2>&1", 'r' ); + } + + $errs = array(); + foreach ( $pipes as $index => $pipe ) { + // Result will be empty on success in *NIX. On Windows, + // it may be something like " 1 file(s) [copied|moved].". + $errs[$index] = stream_get_contents( $pipe ); + fclose( $pipe ); + } + + foreach ( $fileOpHandles as $index => $fileOpHandle ) { + $status = Status::newGood(); + $function = '_getResponse' . $fileOpHandle->call; + $this->$function( $errs[$index], $status, $fileOpHandle->params, $fileOpHandle->cmd ); + $statuses[$index] = $status; + if ( $status->isOK() && $fileOpHandle->chmodPath ) { + $this->chmod( $fileOpHandle->chmodPath ); + } + } + + clearstatcache(); // files changed + return $statuses; + } + + /** + * Chmod a file, suppressing the warnings + * + * @param $path string Absolute file system path + * @return bool Success + */ + protected function chmod( $path ) { + wfSuppressWarnings(); + $ok = chmod( $path, $this->fileMode ); + wfRestoreWarnings(); + + return $ok; + } + + /** + * Return the text of an index.html file to hide directory listings + * + * @return string + */ + protected function indexHtmlPrivate() { + return ''; + } + + /** + * Return the text of a .htaccess file to make a directory private + * + * @return string + */ + protected function htaccessPrivate() { + return "Deny from all\n"; + } + + /** + * Clean up directory separators for the given OS + * + * @param $path string FS path + * @return string + */ + protected function cleanPathSlashes( $path ) { + return wfIsWindows() ? strtr( $path, '/', '\\' ) : $path; + } + + /** + * Listen for E_WARNING errors and track whether any happen + * + * @return bool + */ + protected function trapWarnings() { + $this->hadWarningErrors[] = false; // push to stack + set_error_handler( array( $this, 'handleWarning' ), E_WARNING ); + return false; // invoke normal PHP error handler + } + + /** + * Stop listening for E_WARNING errors and return true if any happened + * + * @return bool + */ + protected function untrapWarnings() { + restore_error_handler(); // restore previous handler + return array_pop( $this->hadWarningErrors ); // pop from stack + } + + /** + * @return bool + */ + private function handleWarning() { + $this->hadWarningErrors[count( $this->hadWarningErrors ) - 1] = true; + return true; // suppress from PHP handler + } +} + +/** + * @see FileBackendStoreOpHandle + */ +class FSFileOpHandle extends FileBackendStoreOpHandle { + public $cmd; // string; shell command + public $chmodPath; // string; file to chmod + + /** + * @param $backend + * @param $params array + * @param $call + * @param $cmd + * @param $chmodPath null + */ + public function __construct( $backend, array $params, $call, $cmd, $chmodPath = null ) { + $this->backend = $backend; + $this->params = $params; + $this->call = $call; + $this->cmd = $cmd; + $this->chmodPath = $chmodPath; + } +} + +/** + * Wrapper around RecursiveDirectoryIterator/DirectoryIterator that + * catches exception or does any custom behavoir that we may want. + * Do not use this class from places outside FSFileBackend. + * + * @ingroup FileBackend + */ +abstract class FSFileBackendList implements Iterator { + /** @var Iterator */ + protected $iter; + protected $suffixStart; // integer + protected $pos = 0; // integer + /** @var Array */ + protected $params = array(); + + /** + * @param $dir string file system directory + * @param $params array + */ + public function __construct( $dir, array $params ) { + $dir = realpath( $dir ); // normalize + $this->suffixStart = strlen( $dir ) + 1; // size of "path/to/dir/" + $this->params = $params; + + try { + $this->iter = $this->initIterator( $dir ); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; // bad permissions? deleted? + } + } + + /** + * Return an appropriate iterator object to wrap + * + * @param $dir string file system directory + * @return Iterator + */ + protected function initIterator( $dir ) { + if ( !empty( $this->params['topOnly'] ) ) { // non-recursive + # Get an iterator that will get direct sub-nodes + return new DirectoryIterator( $dir ); + } else { // recursive + # Get an iterator that will return leaf nodes (non-directories) + # RecursiveDirectoryIterator extends FilesystemIterator. + # FilesystemIterator::SKIP_DOTS default is inconsistent in PHP 5.3.x. + $flags = FilesystemIterator::CURRENT_AS_SELF | FilesystemIterator::SKIP_DOTS; + return new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $dir, $flags ), + RecursiveIteratorIterator::CHILD_FIRST // include dirs + ); + } + } + + /** + * @see Iterator::key() + * @return integer + */ + public function key() { + return $this->pos; + } + + /** + * @see Iterator::current() + * @return string|bool String or false + */ + public function current() { + return $this->getRelPath( $this->iter->current()->getPathname() ); + } + + /** + * @see Iterator::next() + * @return void + */ + public function next() { + try { + $this->iter->next(); + $this->filterViaNext(); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; + } + ++$this->pos; + } + + /** + * @see Iterator::rewind() + * @return void + */ + public function rewind() { + $this->pos = 0; + try { + $this->iter->rewind(); + $this->filterViaNext(); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; + } + } + + /** + * @see Iterator::valid() + * @return bool + */ + public function valid() { + return $this->iter && $this->iter->valid(); + } + + /** + * Filter out items by advancing to the next ones + */ + protected function filterViaNext() {} + + /** + * Return only the relative path and normalize slashes to FileBackend-style. + * Uses the "real path" since the suffix is based upon that. + * + * @param $path string + * @return string + */ + protected function getRelPath( $path ) { + return strtr( substr( realpath( $path ), $this->suffixStart ), '\\', '/' ); + } +} + +class FSFileBackendDirList extends FSFileBackendList { + protected function filterViaNext() { + while ( $this->iter->valid() ) { + if ( $this->iter->current()->isDot() || !$this->iter->current()->isDir() ) { + $this->iter->next(); // skip non-directories and dot files + } else { + break; + } + } + } +} + +class FSFileBackendFileList extends FSFileBackendList { + protected function filterViaNext() { + while ( $this->iter->valid() ) { + if ( !$this->iter->current()->isFile() ) { + $this->iter->next(); // skip non-files and dot files + } else { + break; + } + } + } +} diff --git a/includes/filebackend/FileBackend.php b/includes/filebackend/FileBackend.php new file mode 100644 index 00000000..76c761b0 --- /dev/null +++ b/includes/filebackend/FileBackend.php @@ -0,0 +1,1173 @@ +<?php +/** + * @defgroup FileBackend File backend + * @ingroup FileRepo + * + * File backend is used to interact with file storage systems, + * such as the local file system, NFS, or cloud storage systems. + */ + +/** + * Base class for all file backends. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * @brief Base class for all file backend classes (including multi-write backends). + * + * This class defines the methods as abstract that subclasses must implement. + * Outside callers can assume that all backends will have these functions. + * + * All "storage paths" are of the format "mwstore://<backend>/<container>/<path>". + * The "<path>" portion is a relative path that uses UNIX file system (FS) + * notation, though any particular backend may not actually be using a local + * filesystem. Therefore, the relative paths are only virtual. + * + * Backend contents are stored under wiki-specific container names by default. + * For legacy reasons, this has no effect for the FS backend class, and per-wiki + * segregation must be done by setting the container paths appropriately. + * + * FS-based backends are somewhat more restrictive due to the existence of real + * directory files; a regular file cannot have the same name as a directory. Other + * backends with virtual directories may not have this limitation. Callers should + * store files in such a way that no files and directories are under the same path. + * + * Methods of subclasses should avoid throwing exceptions at all costs. + * As a corollary, external dependencies should be kept to a minimum. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileBackend { + protected $name; // string; unique backend name + protected $wikiId; // string; unique wiki name + protected $readOnly; // string; read-only explanation message + protected $parallelize; // string; when to do operations in parallel + protected $concurrency; // integer; how many operations can be done in parallel + + /** @var LockManager */ + protected $lockManager; + /** @var FileJournal */ + protected $fileJournal; + + /** + * Create a new backend instance from configuration. + * This should only be called from within FileBackendGroup. + * + * $config includes: + * - name : The unique name of this backend. + * This should consist of alphanumberic, '-', and '_' characters. + * This name should not be changed after use. + * - wikiId : Prefix to container names that is unique to this wiki. + * It should only consist of alphanumberic, '-', and '_' characters. + * - lockManager : Registered name of a file lock manager to use. + * - fileJournal : File journal configuration; see FileJournal::factory(). + * Journals simply log changes to files stored in the backend. + * - readOnly : Write operations are disallowed if this is a non-empty string. + * It should be an explanation for the backend being read-only. + * - parallelize : When to do file operations in parallel (when possible). + * Allowed values are "implicit", "explicit" and "off". + * - concurrency : How many file operations can be done in parallel. + * + * @param $config Array + * @throws MWException + */ + public function __construct( array $config ) { + $this->name = $config['name']; + if ( !preg_match( '!^[a-zA-Z0-9-_]{1,255}$!', $this->name ) ) { + throw new MWException( "Backend name `{$this->name}` is invalid." ); + } + $this->wikiId = isset( $config['wikiId'] ) + ? $config['wikiId'] + : wfWikiID(); // e.g. "my_wiki-en_" + $this->lockManager = ( $config['lockManager'] instanceof LockManager ) + ? $config['lockManager'] + : LockManagerGroup::singleton()->get( $config['lockManager'] ); + $this->fileJournal = isset( $config['fileJournal'] ) + ? ( ( $config['fileJournal'] instanceof FileJournal ) + ? $config['fileJournal'] + : FileJournal::factory( $config['fileJournal'], $this->name ) ) + : FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name ); + $this->readOnly = isset( $config['readOnly'] ) + ? (string)$config['readOnly'] + : ''; + $this->parallelize = isset( $config['parallelize'] ) + ? (string)$config['parallelize'] + : 'off'; + $this->concurrency = isset( $config['concurrency'] ) + ? (int)$config['concurrency'] + : 50; + } + + /** + * Get the unique backend name. + * We may have multiple different backends of the same type. + * For example, we can have two Swift backends using different proxies. + * + * @return string + */ + final public function getName() { + return $this->name; + } + + /** + * Get the wiki identifier used for this backend (possibly empty) + * + * @return string + * @since 1.20 + */ + final public function getWikiId() { + return $this->wikiId; + } + + /** + * Check if this backend is read-only + * + * @return bool + */ + final public function isReadOnly() { + return ( $this->readOnly != '' ); + } + + /** + * Get an explanatory message if this backend is read-only + * + * @return string|bool Returns false if the backend is not read-only + */ + final public function getReadOnlyReason() { + return ( $this->readOnly != '' ) ? $this->readOnly : false; + } + + /** + * This is the main entry point into the backend for write operations. + * Callers supply an ordered list of operations to perform as a transaction. + * Files will be locked, the stat cache cleared, and then the operations attempted. + * If any serious errors occur, all attempted operations will be rolled back. + * + * $ops is an array of arrays. The outer array holds a list of operations. + * Each inner array is a set of key value pairs that specify an operation. + * + * Supported operations and their parameters. The supported actions are: + * - create + * - store + * - copy + * - move + * - delete + * - null + * + * a) Create a new file in storage with the contents of a string + * @code + * array( + * 'op' => 'create', + * 'dst' => <storage path>, + * 'content' => <string of new file contents>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean>, + * 'disposition' => <Content-Disposition header value> + * ); + * @endcode + * + * b) Copy a file system file into storage + * @code + * array( + * 'op' => 'store', + * 'src' => <file system path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean>, + * 'disposition' => <Content-Disposition header value> + * ) + * @endcode + * + * c) Copy a file within storage + * @code + * array( + * 'op' => 'copy', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean>, + * 'disposition' => <Content-Disposition header value> + * ) + * @endcode + * + * d) Move a file within storage + * @code + * array( + * 'op' => 'move', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean>, + * 'disposition' => <Content-Disposition header value> + * ) + * @endcode + * + * e) Delete a file within storage + * @code + * array( + * 'op' => 'delete', + * 'src' => <storage path>, + * 'ignoreMissingSource' => <boolean> + * ) + * @endcode + * + * f) Do nothing (no-op) + * @code + * array( + * 'op' => 'null', + * ) + * @endcode + * + * Boolean flags for operations (operation-specific): + * - ignoreMissingSource : The operation will simply succeed and do + * nothing if the source file does not exist. + * - overwrite : Any destination file will be overwritten. + * - overwriteSame : An error will not be given if a file already + * exists at the destination that has the same + * contents as the new contents to be written there. + * - disposition : When supplied, the backend will add a Content-Disposition + * header when GETs/HEADs of the destination file are made. + * Backends that don't support file metadata will ignore this. + * See http://tools.ietf.org/html/rfc6266 (since 1.20). + * + * $opts is an associative of boolean flags, including: + * - force : Operation precondition errors no longer trigger an abort. + * Any remaining operations are still attempted. Unexpected + * failures may still cause remaning operations to be aborted. + * - nonLocking : No locks are acquired for the operations. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * - allowStale : Don't require the latest available data. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * - nonJournaled : Don't log this operation batch in the file journal. + * This limits the ability of recovery scripts. + * - parallelize : Try to do operations in parallel when possible. + * - bypassReadOnly : Allow writes in read-only mode (since 1.20). + * - preserveCache : Don't clear the process cache before checking files. + * This should only be used if all entries in the process + * cache were added after the files were already locked (since 1.20). + * + * @remarks Remarks on locking: + * File system paths given to operations should refer to files that are + * already locked or otherwise safe from modification from other processes. + * Normally these files will be new temp files, which should be adequate. + * + * @par Return value: + * + * This returns a Status, which contains all warnings and fatals that occurred + * during the operation. The 'failCount', 'successCount', and 'success' members + * will reflect each operation attempted. + * + * The status will be "OK" unless: + * - a) unexpected operation errors occurred (network partitions, disk full...) + * - b) significant operation errors occurred and 'force' was not set + * + * @param $ops Array List of operations to execute in order + * @param $opts Array Batch operation options + * @return Status + */ + final public function doOperations( array $ops, array $opts = array() ) { + if ( empty( $opts['bypassReadOnly'] ) && $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + if ( empty( $opts['force'] ) ) { // sanity + unset( $opts['nonLocking'] ); + unset( $opts['allowStale'] ); + } + $opts['concurrency'] = 1; // off + if ( $this->parallelize === 'implicit' ) { + if ( !isset( $opts['parallelize'] ) || $opts['parallelize'] ) { + $opts['concurrency'] = $this->concurrency; + } + } elseif ( $this->parallelize === 'explicit' ) { + if ( !empty( $opts['parallelize'] ) ) { + $opts['concurrency'] = $this->concurrency; + } + } + return $this->doOperationsInternal( $ops, $opts ); + } + + /** + * @see FileBackend::doOperations() + */ + abstract protected function doOperationsInternal( array $ops, array $opts ); + + /** + * Same as doOperations() except it takes a single operation. + * If you are doing a batch of operations that should either + * all succeed or all fail, then use that function instead. + * + * @see FileBackend::doOperations() + * + * @param $op Array Operation + * @param $opts Array Operation options + * @return Status + */ + final public function doOperation( array $op, array $opts = array() ) { + return $this->doOperations( array( $op ), $opts ); + } + + /** + * Performs a single create operation. + * This sets $params['op'] to 'create' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function create( array $params, array $opts = array() ) { + return $this->doOperation( array( 'op' => 'create' ) + $params, $opts ); + } + + /** + * Performs a single store operation. + * This sets $params['op'] to 'store' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function store( array $params, array $opts = array() ) { + return $this->doOperation( array( 'op' => 'store' ) + $params, $opts ); + } + + /** + * Performs a single copy operation. + * This sets $params['op'] to 'copy' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function copy( array $params, array $opts = array() ) { + return $this->doOperation( array( 'op' => 'copy' ) + $params, $opts ); + } + + /** + * Performs a single move operation. + * This sets $params['op'] to 'move' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function move( array $params, array $opts = array() ) { + return $this->doOperation( array( 'op' => 'move' ) + $params, $opts ); + } + + /** + * Performs a single delete operation. + * This sets $params['op'] to 'delete' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function delete( array $params, array $opts = array() ) { + return $this->doOperation( array( 'op' => 'delete' ) + $params, $opts ); + } + + /** + * Perform a set of independent file operations on some files. + * + * This does no locking, nor journaling, and possibly no stat calls. + * Any destination files that already exist will be overwritten. + * This should *only* be used on non-original files, like cache files. + * + * Supported operations and their parameters: + * - create + * - store + * - copy + * - move + * - delete + * - null + * + * a) Create a new file in storage with the contents of a string + * @code + * array( + * 'op' => 'create', + * 'dst' => <storage path>, + * 'content' => <string of new file contents>, + * 'disposition' => <Content-Disposition header value> + * ) + * @endcode + * b) Copy a file system file into storage + * @code + * array( + * 'op' => 'store', + * 'src' => <file system path>, + * 'dst' => <storage path>, + * 'disposition' => <Content-Disposition header value> + * ) + * @endcode + * c) Copy a file within storage + * @code + * array( + * 'op' => 'copy', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'disposition' => <Content-Disposition header value> + * ) + * @endcode + * d) Move a file within storage + * @code + * array( + * 'op' => 'move', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'disposition' => <Content-Disposition header value> + * ) + * @endcode + * e) Delete a file within storage + * @code + * array( + * 'op' => 'delete', + * 'src' => <storage path>, + * 'ignoreMissingSource' => <boolean> + * ) + * @endcode + * f) Do nothing (no-op) + * @code + * array( + * 'op' => 'null', + * ) + * @endcode + * + * @par Boolean flags for operations (operation-specific): + * - ignoreMissingSource : The operation will simply succeed and do + * nothing if the source file does not exist. + * - disposition : When supplied, the backend will add a Content-Disposition + * header when GETs/HEADs of the destination file are made. + * Backends that don't support file metadata will ignore this. + * See http://tools.ietf.org/html/rfc6266 (since 1.20). + * + * $opts is an associative of boolean flags, including: + * - bypassReadOnly : Allow writes in read-only mode (since 1.20) + * + * @par Return value: + * This returns a Status, which contains all warnings and fatals that occurred + * during the operation. The 'failCount', 'successCount', and 'success' members + * will reflect each operation attempted for the given files. The status will be + * considered "OK" as long as no fatal errors occurred. + * + * @param $ops Array Set of operations to execute + * @param $opts Array Batch operation options + * @return Status + * @since 1.20 + */ + final public function doQuickOperations( array $ops, array $opts = array() ) { + if ( empty( $opts['bypassReadOnly'] ) && $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + foreach ( $ops as &$op ) { + $op['overwrite'] = true; // avoids RTTs in key/value stores + } + return $this->doQuickOperationsInternal( $ops ); + } + + /** + * @see FileBackend::doQuickOperations() + * @since 1.20 + */ + abstract protected function doQuickOperationsInternal( array $ops ); + + /** + * Same as doQuickOperations() except it takes a single operation. + * If you are doing a batch of operations, then use that function instead. + * + * @see FileBackend::doQuickOperations() + * + * @param $op Array Operation + * @return Status + * @since 1.20 + */ + final public function doQuickOperation( array $op ) { + return $this->doQuickOperations( array( $op ) ); + } + + /** + * Performs a single quick create operation. + * This sets $params['op'] to 'create' and passes it to doQuickOperation(). + * + * @see FileBackend::doQuickOperation() + * + * @param $params Array Operation parameters + * @return Status + * @since 1.20 + */ + final public function quickCreate( array $params ) { + return $this->doQuickOperation( array( 'op' => 'create' ) + $params ); + } + + /** + * Performs a single quick store operation. + * This sets $params['op'] to 'store' and passes it to doQuickOperation(). + * + * @see FileBackend::doQuickOperation() + * + * @param $params Array Operation parameters + * @return Status + * @since 1.20 + */ + final public function quickStore( array $params ) { + return $this->doQuickOperation( array( 'op' => 'store' ) + $params ); + } + + /** + * Performs a single quick copy operation. + * This sets $params['op'] to 'copy' and passes it to doQuickOperation(). + * + * @see FileBackend::doQuickOperation() + * + * @param $params Array Operation parameters + * @return Status + * @since 1.20 + */ + final public function quickCopy( array $params ) { + return $this->doQuickOperation( array( 'op' => 'copy' ) + $params ); + } + + /** + * Performs a single quick move operation. + * This sets $params['op'] to 'move' and passes it to doQuickOperation(). + * + * @see FileBackend::doQuickOperation() + * + * @param $params Array Operation parameters + * @return Status + * @since 1.20 + */ + final public function quickMove( array $params ) { + return $this->doQuickOperation( array( 'op' => 'move' ) + $params ); + } + + /** + * Performs a single quick delete operation. + * This sets $params['op'] to 'delete' and passes it to doQuickOperation(). + * + * @see FileBackend::doQuickOperation() + * + * @param $params Array Operation parameters + * @return Status + * @since 1.20 + */ + final public function quickDelete( array $params ) { + return $this->doQuickOperation( array( 'op' => 'delete' ) + $params ); + } + + /** + * Concatenate a list of storage files into a single file system file. + * The target path should refer to a file that is already locked or + * otherwise safe from modification from other processes. Normally, + * the file will be a new temp file, which should be adequate. + * + * @param $params Array Operation parameters + * $params include: + * - srcs : ordered source storage paths (e.g. chunk1, chunk2, ...) + * - dst : file system path to 0-byte temp file + * @return Status + */ + abstract public function concatenate( array $params ); + + /** + * Prepare a storage directory for usage. + * This will create any required containers and parent directories. + * Backends using key/value stores only need to create the container. + * + * The 'noAccess' and 'noListing' parameters works the same as in secure(), + * except they are only applied *if* the directory/container had to be created. + * These flags should always be set for directories that have private files. + * + * @param $params Array + * $params include: + * - dir : storage directory + * - noAccess : try to deny file access (since 1.20) + * - noListing : try to deny file listing (since 1.20) + * - bypassReadOnly : allow writes in read-only mode (since 1.20) + * @return Status + */ + final public function prepare( array $params ) { + if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doPrepare( $params ); + } + + /** + * @see FileBackend::prepare() + */ + abstract protected function doPrepare( array $params ); + + /** + * Take measures to block web access to a storage directory and + * the container it belongs to. FS backends might add .htaccess + * files whereas key/value store backends might revoke container + * access to the storage user representing end-users in web requests. + * This is not guaranteed to actually do anything. + * + * @param $params Array + * $params include: + * - dir : storage directory + * - noAccess : try to deny file access + * - noListing : try to deny file listing + * - bypassReadOnly : allow writes in read-only mode (since 1.20) + * @return Status + */ + final public function secure( array $params ) { + if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doSecure( $params ); + } + + /** + * @see FileBackend::secure() + */ + abstract protected function doSecure( array $params ); + + /** + * Remove measures to block web access to a storage directory and + * the container it belongs to. FS backends might remove .htaccess + * files whereas key/value store backends might grant container + * access to the storage user representing end-users in web requests. + * This essentially can undo the result of secure() calls. + * + * @param $params Array + * $params include: + * - dir : storage directory + * - access : try to allow file access + * - listing : try to allow file listing + * - bypassReadOnly : allow writes in read-only mode (since 1.20) + * @return Status + * @since 1.20 + */ + final public function publish( array $params ) { + if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doPublish( $params ); + } + + /** + * @see FileBackend::publish() + */ + abstract protected function doPublish( array $params ); + + /** + * Delete a storage directory if it is empty. + * Backends using key/value stores may do nothing unless the directory + * is that of an empty container, in which case it will be deleted. + * + * @param $params Array + * $params include: + * - dir : storage directory + * - recursive : recursively delete empty subdirectories first (since 1.20) + * - bypassReadOnly : allow writes in read-only mode (since 1.20) + * @return Status + */ + final public function clean( array $params ) { + if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doClean( $params ); + } + + /** + * @see FileBackend::clean() + */ + abstract protected function doClean( array $params ); + + /** + * Check if a file exists at a storage path in the backend. + * This returns false if only a directory exists at the path. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return bool|null Returns null on failure + */ + abstract public function fileExists( array $params ); + + /** + * Get the last-modified timestamp of the file at a storage path. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return string|bool TS_MW timestamp or false on failure + */ + abstract public function getFileTimestamp( array $params ); + + /** + * Get the contents of a file at a storage path in the backend. + * This should be avoided for potentially large files. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return string|bool Returns false on failure + */ + abstract public function getFileContents( array $params ); + + /** + * Get the size (bytes) of a file at a storage path in the backend. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return integer|bool Returns false on failure + */ + abstract public function getFileSize( array $params ); + + /** + * Get quick information about a file at a storage path in the backend. + * If the file does not exist, then this returns false. + * Otherwise, the result is an associative array that includes: + * - mtime : the last-modified timestamp (TS_MW) + * - size : the file size (bytes) + * Additional values may be included for internal use only. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return Array|bool|null Returns null on failure + */ + abstract public function getFileStat( array $params ); + + /** + * Get a SHA-1 hash of the file at a storage path in the backend. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return string|bool Hash string or false on failure + */ + abstract public function getFileSha1Base36( array $params ); + + /** + * Get the properties of the file at a storage path in the backend. + * Returns FSFile::placeholderProps() on failure. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return Array + */ + abstract public function getFileProps( array $params ); + + /** + * Stream the file at a storage path in the backend. + * If the file does not exists, an HTTP 404 error will be given. + * Appropriate HTTP headers (Status, Content-Type, Content-Length) + * will be sent if streaming began, while none will be sent otherwise. + * Implementations should flush the output buffer before sending data. + * + * @param $params Array + * $params include: + * - src : source storage path + * - headers : list of additional HTTP headers to send on success + * - latest : use the latest available data + * @return Status + */ + abstract public function streamFile( array $params ); + + /** + * Returns a file system file, identical to the file at a storage path. + * The file returned is either: + * - a) A local copy of the file at a storage path in the backend. + * The temporary copy will have the same extension as the source. + * - b) An original of the file at a storage path in the backend. + * Temporary files may be purged when the file object falls out of scope. + * + * Write operations should *never* be done on this file as some backends + * may do internal tracking or may be instances of FileBackendMultiWrite. + * In that later case, there are copies of the file that must stay in sync. + * Additionally, further calls to this function may return the same file. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return FSFile|null Returns null on failure + */ + abstract public function getLocalReference( array $params ); + + /** + * Get a local copy on disk of the file at a storage path in the backend. + * The temporary copy will have the same file extension as the source. + * Temporary files may be purged when the file object falls out of scope. + * + * @param $params Array + * $params include: + * - src : source storage path + * - latest : use the latest available data + * @return TempFSFile|null Returns null on failure + */ + abstract public function getLocalCopy( array $params ); + + /** + * Check if a directory exists at a given storage path. + * Backends using key/value stores will check if the path is a + * virtual directory, meaning there are files under the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * @param $params array + * $params include: + * - dir : storage directory + * @return bool|null Returns null on failure + * @since 1.20 + */ + abstract public function directoryExists( array $params ); + + /** + * Get an iterator to list *all* directories under a storage directory. + * If the directory is of the form "mwstore://backend/container", + * then all directories in the container will be listed. + * If the directory is of form "mwstore://backend/container/dir", + * then all directories directly under that directory will be listed. + * Results will be storage directories relative to the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * @param $params array + * $params include: + * - dir : storage directory + * - topOnly : only return direct child dirs of the directory + * @return Traversable|Array|null Returns null on failure + * @since 1.20 + */ + abstract public function getDirectoryList( array $params ); + + /** + * Same as FileBackend::getDirectoryList() except only lists + * directories that are immediately under the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * @param $params array + * $params include: + * - dir : storage directory + * @return Traversable|Array|null Returns null on failure + * @since 1.20 + */ + final public function getTopDirectoryList( array $params ) { + return $this->getDirectoryList( array( 'topOnly' => true ) + $params ); + } + + /** + * Get an iterator to list *all* stored files under a storage directory. + * If the directory is of the form "mwstore://backend/container", + * then all files in the container will be listed. + * If the directory is of form "mwstore://backend/container/dir", + * then all files under that directory will be listed. + * Results will be storage paths relative to the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * @param $params array + * $params include: + * - dir : storage directory + * - topOnly : only return direct child files of the directory (since 1.20) + * @return Traversable|Array|null Returns null on failure + */ + abstract public function getFileList( array $params ); + + /** + * Same as FileBackend::getFileList() except only lists + * files that are immediately under the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * @param $params array + * $params include: + * - dir : storage directory + * @return Traversable|Array|null Returns null on failure + * @since 1.20 + */ + final public function getTopFileList( array $params ) { + return $this->getFileList( array( 'topOnly' => true ) + $params ); + } + + /** + * Preload persistent file stat and property cache into in-process cache. + * This should be used when stat calls will be made on a known list of a many files. + * + * @param $paths Array Storage paths + * @return void + */ + public function preloadCache( array $paths ) {} + + /** + * Invalidate any in-process file stat and property cache. + * If $paths is given, then only the cache for those files will be cleared. + * + * @param $paths Array Storage paths (optional) + * @return void + */ + public function clearCache( array $paths = null ) {} + + /** + * Lock the files at the given storage paths in the backend. + * This will either lock all the files or none (on failure). + * + * Callers should consider using getScopedFileLocks() instead. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function lockFiles( array $paths, $type ) { + return $this->lockManager->lock( $paths, $type ); + } + + /** + * Unlock the files at the given storage paths in the backend. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function unlockFiles( array $paths, $type ) { + return $this->lockManager->unlock( $paths, $type ); + } + + /** + * Lock the files at the given storage paths in the backend. + * This will either lock all the files or none (on failure). + * On failure, the status object will be updated with errors. + * + * Once the return value goes out scope, the locks will be released and + * the status updated. Unlock fatals will not change the status "OK" value. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status Status to update on lock/unlock + * @return ScopedLock|null Returns null on failure + */ + final public function getScopedFileLocks( array $paths, $type, Status $status ) { + return ScopedLock::factory( $this->lockManager, $paths, $type, $status ); + } + + /** + * Get an array of scoped locks needed for a batch of file operations. + * + * Normally, FileBackend::doOperations() handles locking, unless + * the 'nonLocking' param is passed in. This function is useful if you + * want the files to be locked for a broader scope than just when the + * files are changing. For example, if you need to update DB metadata, + * you may want to keep the files locked until finished. + * + * @see FileBackend::doOperations() + * + * @param $ops Array List of file operations to FileBackend::doOperations() + * @param $status Status Status to update on lock/unlock + * @return Array List of ScopedFileLocks or null values + * @since 1.20 + */ + abstract public function getScopedLocksForOps( array $ops, Status $status ); + + /** + * Get the root storage path of this backend. + * All container paths are "subdirectories" of this path. + * + * @return string Storage path + * @since 1.20 + */ + final public function getRootStoragePath() { + return "mwstore://{$this->name}"; + } + + /** + * Get the file journal object for this backend + * + * @return FileJournal + */ + final public function getJournal() { + return $this->fileJournal; + } + + /** + * Check if a given path is a "mwstore://" path. + * This does not do any further validation or any existence checks. + * + * @param $path string + * @return bool + */ + final public static function isStoragePath( $path ) { + return ( strpos( $path, 'mwstore://' ) === 0 ); + } + + /** + * Split a storage path into a backend name, a container name, + * and a relative file path. The relative path may be the empty string. + * This does not do any path normalization or traversal checks. + * + * @param $storagePath string + * @return Array (backend, container, rel object) or (null, null, null) + */ + final public static function splitStoragePath( $storagePath ) { + if ( self::isStoragePath( $storagePath ) ) { + // Remove the "mwstore://" prefix and split the path + $parts = explode( '/', substr( $storagePath, 10 ), 3 ); + if ( count( $parts ) >= 2 && $parts[0] != '' && $parts[1] != '' ) { + if ( count( $parts ) == 3 ) { + return $parts; // e.g. "backend/container/path" + } else { + return array( $parts[0], $parts[1], '' ); // e.g. "backend/container" + } + } + } + return array( null, null, null ); + } + + /** + * Normalize a storage path by cleaning up directory separators. + * Returns null if the path is not of the format of a valid storage path. + * + * @param $storagePath string + * @return string|null + */ + final public static function normalizeStoragePath( $storagePath ) { + list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath ); + if ( $relPath !== null ) { // must be for this backend + $relPath = self::normalizeContainerPath( $relPath ); + if ( $relPath !== null ) { + return ( $relPath != '' ) + ? "mwstore://{$backend}/{$container}/{$relPath}" + : "mwstore://{$backend}/{$container}"; + } + } + return null; + } + + /** + * Get the parent storage directory of a storage path. + * This returns a path like "mwstore://backend/container", + * "mwstore://backend/container/...", or null if there is no parent. + * + * @param $storagePath string + * @return string|null + */ + final public static function parentStoragePath( $storagePath ) { + $storagePath = dirname( $storagePath ); + list( $b, $cont, $rel ) = self::splitStoragePath( $storagePath ); + return ( $rel === null ) ? null : $storagePath; + } + + /** + * Get the final extension from a storage or FS path + * + * @param $path string + * @return string + */ + final public static function extensionFromPath( $path ) { + $i = strrpos( $path, '.' ); + return strtolower( $i ? substr( $path, $i + 1 ) : '' ); + } + + /** + * Check if a relative path has no directory traversals + * + * @param $path string + * @return bool + * @since 1.20 + */ + final public static function isPathTraversalFree( $path ) { + return ( self::normalizeContainerPath( $path ) !== null ); + } + + /** + * Build a Content-Disposition header value per RFC 6266. + * + * @param $type string One of (attachment, inline) + * @param $filename string Suggested file name (should not contain slashes) + * @return string + * @since 1.20 + */ + final public static function makeContentDisposition( $type, $filename = '' ) { + $parts = array(); + + $type = strtolower( $type ); + if ( !in_array( $type, array( 'inline', 'attachment' ) ) ) { + throw new MWException( "Invalid Content-Disposition type '$type'." ); + } + $parts[] = $type; + + if ( strlen( $filename ) ) { + $parts[] = "filename*=UTF-8''" . rawurlencode( basename( $filename ) ); + } + + return implode( ';', $parts ); + } + + /** + * Validate and normalize a relative storage path. + * Null is returned if the path involves directory traversal. + * Traversal is insecure for FS backends and broken for others. + * + * This uses the same traversal protection as Title::secureAndSplit(). + * + * @param $path string Storage path relative to a container + * @return string|null + */ + final protected static function normalizeContainerPath( $path ) { + // Normalize directory separators + $path = strtr( $path, '\\', '/' ); + // Collapse any consecutive directory separators + $path = preg_replace( '![/]{2,}!', '/', $path ); + // Remove any leading directory separator + $path = ltrim( $path, '/' ); + // Use the same traversal protection as Title::secureAndSplit() + if ( strpos( $path, '.' ) !== false ) { + if ( + $path === '.' || + $path === '..' || + strpos( $path, './' ) === 0 || + strpos( $path, '../' ) === 0 || + strpos( $path, '/./' ) !== false || + strpos( $path, '/../' ) !== false + ) { + return null; + } + } + return $path; + } +} diff --git a/includes/filebackend/FileBackendGroup.php b/includes/filebackend/FileBackendGroup.php new file mode 100644 index 00000000..8bbc96d0 --- /dev/null +++ b/includes/filebackend/FileBackendGroup.php @@ -0,0 +1,187 @@ +<?php +/** + * File backend registration handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Class to handle file backend registration + * + * @ingroup FileBackend + * @since 1.19 + */ +class FileBackendGroup { + /** + * @var FileBackendGroup + */ + protected static $instance = null; + + /** @var Array (name => ('class' => string, 'config' => array, 'instance' => object)) */ + protected $backends = array(); + + protected function __construct() {} + + /** + * @return FileBackendGroup + */ + public static function singleton() { + if ( self::$instance == null ) { + self::$instance = new self(); + self::$instance->initFromGlobals(); + } + return self::$instance; + } + + /** + * Destroy the singleton instance + * + * @return void + */ + public static function destroySingleton() { + self::$instance = null; + } + + /** + * Register file backends from the global variables + * + * @return void + */ + protected function initFromGlobals() { + global $wgLocalFileRepo, $wgForeignFileRepos, $wgFileBackends; + + // Register explicitly defined backends + $this->register( $wgFileBackends ); + + $autoBackends = array(); + // Automatically create b/c backends for file repos... + $repos = array_merge( $wgForeignFileRepos, array( $wgLocalFileRepo ) ); + foreach ( $repos as $info ) { + $backendName = $info['backend']; + if ( is_object( $backendName ) || isset( $this->backends[$backendName] ) ) { + continue; // already defined (or set to the object for some reason) + } + $repoName = $info['name']; + // Local vars that used to be FSRepo members... + $directory = $info['directory']; + $deletedDir = isset( $info['deletedDir'] ) + ? $info['deletedDir'] + : false; // deletion disabled + $thumbDir = isset( $info['thumbDir'] ) + ? $info['thumbDir'] + : "{$directory}/thumb"; + $fileMode = isset( $info['fileMode'] ) + ? $info['fileMode'] + : 0644; + // Get the FS backend configuration + $autoBackends[] = array( + 'name' => $backendName, + 'class' => 'FSFileBackend', + 'lockManager' => 'fsLockManager', + 'containerPaths' => array( + "{$repoName}-public" => "{$directory}", + "{$repoName}-thumb" => $thumbDir, + "{$repoName}-deleted" => $deletedDir, + "{$repoName}-temp" => "{$directory}/temp" + ), + 'fileMode' => $fileMode, + ); + } + + // Register implicitly defined backends + $this->register( $autoBackends ); + } + + /** + * Register an array of file backend configurations + * + * @param $configs Array + * @return void + * @throws MWException + */ + protected function register( array $configs ) { + foreach ( $configs as $config ) { + if ( !isset( $config['name'] ) ) { + throw new MWException( "Cannot register a backend with no name." ); + } + $name = $config['name']; + if ( !isset( $config['class'] ) ) { + throw new MWException( "Cannot register backend `{$name}` with no class." ); + } + $class = $config['class']; + + unset( $config['class'] ); // backend won't need this + $this->backends[$name] = array( + 'class' => $class, + 'config' => $config, + 'instance' => null + ); + } + } + + /** + * Get the backend object with a given name + * + * @param $name string + * @return FileBackend + * @throws MWException + */ + public function get( $name ) { + if ( !isset( $this->backends[$name] ) ) { + throw new MWException( "No backend defined with the name `$name`." ); + } + // Lazy-load the actual backend instance + if ( !isset( $this->backends[$name]['instance'] ) ) { + $class = $this->backends[$name]['class']; + $config = $this->backends[$name]['config']; + $this->backends[$name]['instance'] = new $class( $config ); + } + return $this->backends[$name]['instance']; + } + + /** + * Get the config array for a backend object with a given name + * + * @param $name string + * @return Array + * @throws MWException + */ + public function config( $name ) { + if ( !isset( $this->backends[$name] ) ) { + throw new MWException( "No backend defined with the name `$name`." ); + } + $class = $this->backends[$name]['class']; + return array( 'class' => $class ) + $this->backends[$name]['config']; + } + + /** + * Get an appropriate backend object from a storage path + * + * @param $storagePath string + * @return FileBackend|null Backend or null on failure + */ + public function backendFromPath( $storagePath ) { + list( $backend, $c, $p ) = FileBackend::splitStoragePath( $storagePath ); + if ( $backend !== null && isset( $this->backends[$backend] ) ) { + return $this->get( $backend ); + } + return null; + } +} diff --git a/includes/filebackend/FileBackendMultiWrite.php b/includes/filebackend/FileBackendMultiWrite.php new file mode 100644 index 00000000..4be03231 --- /dev/null +++ b/includes/filebackend/FileBackendMultiWrite.php @@ -0,0 +1,689 @@ +<?php +/** + * Proxy backend that mirrors writes to several internal backends. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * @brief Proxy backend that mirrors writes to several internal backends. + * + * This class defines a multi-write backend. Multiple backends can be + * registered to this proxy backend and it will act as a single backend. + * Use this when all access to those backends is through this proxy backend. + * At least one of the backends must be declared the "master" backend. + * + * Only use this class when transitioning from one storage system to another. + * + * Read operations are only done on the 'master' backend for consistency. + * Write operations are performed on all backends, in the order defined. + * If an operation fails on one backend it will be rolled back from the others. + * + * @ingroup FileBackend + * @since 1.19 + */ +class FileBackendMultiWrite extends FileBackend { + /** @var Array Prioritized list of FileBackendStore objects */ + protected $backends = array(); // array of (backend index => backends) + protected $masterIndex = -1; // integer; index of master backend + protected $syncChecks = 0; // integer; bitfield + protected $autoResync = false; // boolean + + /** @var Array */ + protected $noPushDirConts = array(); + protected $noPushQuickOps = false; // boolean + + /* Possible internal backend consistency checks */ + const CHECK_SIZE = 1; + const CHECK_TIME = 2; + const CHECK_SHA1 = 4; + + /** + * Construct a proxy backend that consists of several internal backends. + * Locking, journaling, and read-only checks are handled by the proxy backend. + * + * Additional $config params include: + * - backends : Array of backend config and multi-backend settings. + * Each value is the config used in the constructor of a + * FileBackendStore class, but with these additional settings: + * - class : The name of the backend class + * - isMultiMaster : This must be set for one backend. + * - template: : If given a backend name, this will use + * the config of that backend as a template. + * Values specified here take precedence. + * - syncChecks : Integer bitfield of internal backend sync checks to perform. + * Possible bits include the FileBackendMultiWrite::CHECK_* constants. + * There are constants for SIZE, TIME, and SHA1. + * The checks are done before allowing any file operations. + * - autoResync : Automatically resync the clone backends to the master backend + * when pre-operation sync checks fail. This should only be used + * if the master backend is stable and not missing any files. + * - noPushQuickOps : (hack) Only apply doQuickOperations() to the master backend. + * - noPushDirConts : (hack) Only apply directory functions to the master backend. + * + * @param $config Array + * @throws MWException + */ + public function __construct( array $config ) { + parent::__construct( $config ); + $this->syncChecks = isset( $config['syncChecks'] ) + ? $config['syncChecks'] + : self::CHECK_SIZE; + $this->autoResync = !empty( $config['autoResync'] ); + $this->noPushQuickOps = isset( $config['noPushQuickOps'] ) + ? $config['noPushQuickOps'] + : false; + $this->noPushDirConts = isset( $config['noPushDirConts'] ) + ? $config['noPushDirConts'] + : array(); + // Construct backends here rather than via registration + // to keep these backends hidden from outside the proxy. + $namesUsed = array(); + foreach ( $config['backends'] as $index => $config ) { + if ( isset( $config['template'] ) ) { + // Config is just a modified version of a registered backend's. + // This should only be used when that config is used only by this backend. + $config = $config + FileBackendGroup::singleton()->config( $config['template'] ); + } + $name = $config['name']; + if ( isset( $namesUsed[$name] ) ) { // don't break FileOp predicates + throw new MWException( "Two or more backends defined with the name $name." ); + } + $namesUsed[$name] = 1; + // Alter certain sub-backend settings for sanity + unset( $config['readOnly'] ); // use proxy backend setting + unset( $config['fileJournal'] ); // use proxy backend journal + $config['wikiId'] = $this->wikiId; // use the proxy backend wiki ID + $config['lockManager'] = 'nullLockManager'; // lock under proxy backend + if ( !empty( $config['isMultiMaster'] ) ) { + if ( $this->masterIndex >= 0 ) { + throw new MWException( 'More than one master backend defined.' ); + } + $this->masterIndex = $index; // this is the "master" + $config['fileJournal'] = $this->fileJournal; // log under proxy backend + } + // Create sub-backend object + if ( !isset( $config['class'] ) ) { + throw new MWException( 'No class given for a backend config.' ); + } + $class = $config['class']; + $this->backends[$index] = new $class( $config ); + } + if ( $this->masterIndex < 0 ) { // need backends and must have a master + throw new MWException( 'No master backend defined.' ); + } + } + + /** + * @see FileBackend::doOperationsInternal() + * @return Status + */ + final protected function doOperationsInternal( array $ops, array $opts ) { + $status = Status::newGood(); + + $mbe = $this->backends[$this->masterIndex]; // convenience + + // Get the paths to lock from the master backend + $realOps = $this->substOpBatchPaths( $ops, $mbe ); + $paths = $mbe->getPathsToLockForOpsInternal( $mbe->getOperationsInternal( $realOps ) ); + // Get the paths under the proxy backend's name + $paths['sh'] = $this->unsubstPaths( $paths['sh'] ); + $paths['ex'] = $this->unsubstPaths( $paths['ex'] ); + // Try to lock those files for the scope of this function... + if ( empty( $opts['nonLocking'] ) ) { + // Try to lock those files for the scope of this function... + $scopeLockS = $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ); + $scopeLockE = $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + return $status; // abort + } + } + // Clear any cache entries (after locks acquired) + $this->clearCache(); + $opts['preserveCache'] = true; // only locked files are cached + // Get the list of paths to read/write... + $relevantPaths = $this->fileStoragePathsForOps( $ops ); + // Check if the paths are valid and accessible on all backends... + $status->merge( $this->accessibilityCheck( $relevantPaths ) ); + if ( !$status->isOK() ) { + return $status; // abort + } + // Do a consistency check to see if the backends are consistent... + $syncStatus = $this->consistencyCheck( $relevantPaths ); + if ( !$syncStatus->isOK() ) { + wfDebugLog( 'FileOperation', get_class( $this ) . + " failed sync check: " . FormatJson::encode( $relevantPaths ) ); + // Try to resync the clone backends to the master on the spot... + if ( !$this->autoResync || !$this->resyncFiles( $relevantPaths )->isOK() ) { + $status->merge( $syncStatus ); + return $status; // abort + } + } + // Actually attempt the operation batch on the master backend... + $masterStatus = $mbe->doOperations( $realOps, $opts ); + $status->merge( $masterStatus ); + // Propagate the operations to the clone backends if there were no fatal errors. + // If $ops only had one operation, this might avoid backend inconsistencies. + // This also avoids inconsistency for expected errors (like "file already exists"). + if ( !count( $masterStatus->getErrorsArray() ) ) { + foreach ( $this->backends as $index => $backend ) { + if ( $index !== $this->masterIndex ) { // not done already + $realOps = $this->substOpBatchPaths( $ops, $backend ); + $status->merge( $backend->doOperations( $realOps, $opts ) ); + } + } + } + // Make 'success', 'successCount', and 'failCount' fields reflect + // the overall operation, rather than all the batches for each backend. + // Do this by only using success values from the master backend's batch. + $status->success = $masterStatus->success; + $status->successCount = $masterStatus->successCount; + $status->failCount = $masterStatus->failCount; + + return $status; + } + + /** + * Check that a set of files are consistent across all internal backends + * + * @param $paths Array List of storage paths + * @return Status + */ + public function consistencyCheck( array $paths ) { + $status = Status::newGood(); + if ( $this->syncChecks == 0 || count( $this->backends ) <= 1 ) { + return $status; // skip checks + } + + $mBackend = $this->backends[$this->masterIndex]; + foreach ( $paths as $path ) { + $params = array( 'src' => $path, 'latest' => true ); + $mParams = $this->substOpPaths( $params, $mBackend ); + // Stat the file on the 'master' backend + $mStat = $mBackend->getFileStat( $mParams ); + if ( $this->syncChecks & self::CHECK_SHA1 ) { + $mSha1 = $mBackend->getFileSha1Base36( $mParams ); + } else { + $mSha1 = false; + } + // Check if all clone backends agree with the master... + foreach ( $this->backends as $index => $cBackend ) { + if ( $index === $this->masterIndex ) { + continue; // master + } + $cParams = $this->substOpPaths( $params, $cBackend ); + $cStat = $cBackend->getFileStat( $cParams ); + if ( $mStat ) { // file is in master + if ( !$cStat ) { // file should exist + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + if ( $this->syncChecks & self::CHECK_SIZE ) { + if ( $cStat['size'] != $mStat['size'] ) { // wrong size + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + } + if ( $this->syncChecks & self::CHECK_TIME ) { + $mTs = wfTimestamp( TS_UNIX, $mStat['mtime'] ); + $cTs = wfTimestamp( TS_UNIX, $cStat['mtime'] ); + if ( abs( $mTs - $cTs ) > 30 ) { // outdated file somewhere + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + } + if ( $this->syncChecks & self::CHECK_SHA1 ) { + if ( $cBackend->getFileSha1Base36( $cParams ) !== $mSha1 ) { // wrong SHA1 + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + } + } else { // file is not in master + if ( $cStat ) { // file should not exist + $status->fatal( 'backend-fail-synced', $path ); + } + } + } + } + + return $status; + } + + /** + * Check that a set of file paths are usable across all internal backends + * + * @param $paths Array List of storage paths + * @return Status + */ + public function accessibilityCheck( array $paths ) { + $status = Status::newGood(); + if ( count( $this->backends ) <= 1 ) { + return $status; // skip checks + } + + foreach ( $paths as $path ) { + foreach ( $this->backends as $backend ) { + $realPath = $this->substPaths( $path, $backend ); + if ( !$backend->isPathUsableInternal( $realPath ) ) { + $status->fatal( 'backend-fail-usable', $path ); + } + } + } + + return $status; + } + + /** + * Check that a set of files are consistent across all internal backends + * and re-synchronize those files againt the "multi master" if needed. + * + * @param $paths Array List of storage paths + * @return Status + */ + public function resyncFiles( array $paths ) { + $status = Status::newGood(); + + $mBackend = $this->backends[$this->masterIndex]; + foreach ( $paths as $path ) { + $mPath = $this->substPaths( $path, $mBackend ); + $mSha1 = $mBackend->getFileSha1Base36( array( 'src' => $mPath ) ); + $mExist = $mBackend->fileExists( array( 'src' => $mPath ) ); + // Check if the master backend is available... + if ( $mExist === null ) { + $status->fatal( 'backend-fail-internal', $this->name ); + } + // Check of all clone backends agree with the master... + foreach ( $this->backends as $index => $cBackend ) { + if ( $index === $this->masterIndex ) { + continue; // master + } + $cPath = $this->substPaths( $path, $cBackend ); + $cSha1 = $cBackend->getFileSha1Base36( array( 'src' => $cPath ) ); + if ( $mSha1 === $cSha1 ) { + // already synced; nothing to do + } elseif ( $mSha1 ) { // file is in master + $fsFile = $mBackend->getLocalReference( array( 'src' => $mPath ) ); + $status->merge( $cBackend->quickStore( + array( 'src' => $fsFile->getPath(), 'dst' => $cPath ) + ) ); + } elseif ( $mExist === false ) { // file is not in master + $status->merge( $cBackend->quickDelete( array( 'src' => $cPath ) ) ); + } + } + } + + return $status; + } + + /** + * Get a list of file storage paths to read or write for a list of operations + * + * @param $ops Array Same format as doOperations() + * @return Array List of storage paths to files (does not include directories) + */ + protected function fileStoragePathsForOps( array $ops ) { + $paths = array(); + foreach ( $ops as $op ) { + if ( isset( $op['src'] ) ) { + $paths[] = $op['src']; + } + if ( isset( $op['srcs'] ) ) { + $paths = array_merge( $paths, $op['srcs'] ); + } + if ( isset( $op['dst'] ) ) { + $paths[] = $op['dst']; + } + } + return array_unique( array_filter( $paths, 'FileBackend::isStoragePath' ) ); + } + + /** + * Substitute the backend name in storage path parameters + * for a set of operations with that of a given internal backend. + * + * @param $ops Array List of file operation arrays + * @param $backend FileBackendStore + * @return Array + */ + protected function substOpBatchPaths( array $ops, FileBackendStore $backend ) { + $newOps = array(); // operations + foreach ( $ops as $op ) { + $newOp = $op; // operation + foreach ( array( 'src', 'srcs', 'dst', 'dir' ) as $par ) { + if ( isset( $newOp[$par] ) ) { // string or array + $newOp[$par] = $this->substPaths( $newOp[$par], $backend ); + } + } + $newOps[] = $newOp; + } + return $newOps; + } + + /** + * Same as substOpBatchPaths() but for a single operation + * + * @param $ops array File operation array + * @param $backend FileBackendStore + * @return Array + */ + protected function substOpPaths( array $ops, FileBackendStore $backend ) { + $newOps = $this->substOpBatchPaths( array( $ops ), $backend ); + return $newOps[0]; + } + + /** + * Substitute the backend of storage paths with an internal backend's name + * + * @param $paths Array|string List of paths or single string path + * @param $backend FileBackendStore + * @return Array|string + */ + protected function substPaths( $paths, FileBackendStore $backend ) { + return preg_replace( + '!^mwstore://' . preg_quote( $this->name ) . '/!', + StringUtils::escapeRegexReplacement( "mwstore://{$backend->getName()}/" ), + $paths // string or array + ); + } + + /** + * Substitute the backend of internal storage paths with the proxy backend's name + * + * @param $paths Array|string List of paths or single string path + * @return Array|string + */ + protected function unsubstPaths( $paths ) { + return preg_replace( + '!^mwstore://([^/]+)!', + StringUtils::escapeRegexReplacement( "mwstore://{$this->name}" ), + $paths // string or array + ); + } + + /** + * @see FileBackend::doQuickOperationsInternal() + * @return Status + */ + protected function doQuickOperationsInternal( array $ops ) { + $status = Status::newGood(); + // Do the operations on the master backend; setting Status fields... + $realOps = $this->substOpBatchPaths( $ops, $this->backends[$this->masterIndex] ); + $masterStatus = $this->backends[$this->masterIndex]->doQuickOperations( $realOps ); + $status->merge( $masterStatus ); + // Propagate the operations to the clone backends... + if ( !$this->noPushQuickOps ) { + foreach ( $this->backends as $index => $backend ) { + if ( $index !== $this->masterIndex ) { // not done already + $realOps = $this->substOpBatchPaths( $ops, $backend ); + $status->merge( $backend->doQuickOperations( $realOps ) ); + } + } + } + // Make 'success', 'successCount', and 'failCount' fields reflect + // the overall operation, rather than all the batches for each backend. + // Do this by only using success values from the master backend's batch. + $status->success = $masterStatus->success; + $status->successCount = $masterStatus->successCount; + $status->failCount = $masterStatus->failCount; + return $status; + } + + /** + * @param $path string Storage path + * @return bool Path container should have dir changes pushed to all backends + */ + protected function replicateContainerDirChanges( $path ) { + list( $b, $shortCont, $r ) = self::splitStoragePath( $path ); + return !in_array( $shortCont, $this->noPushDirConts ); + } + + /** + * @see FileBackend::doPrepare() + * @return Status + */ + protected function doPrepare( array $params ) { + $status = Status::newGood(); + $replicate = $this->replicateContainerDirChanges( $params['dir'] ); + foreach ( $this->backends as $index => $backend ) { + if ( $replicate || $index == $this->masterIndex ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doPrepare( $realParams ) ); + } + } + return $status; + } + + /** + * @see FileBackend::doSecure() + * @param $params array + * @return Status + */ + protected function doSecure( array $params ) { + $status = Status::newGood(); + $replicate = $this->replicateContainerDirChanges( $params['dir'] ); + foreach ( $this->backends as $index => $backend ) { + if ( $replicate || $index == $this->masterIndex ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doSecure( $realParams ) ); + } + } + return $status; + } + + /** + * @see FileBackend::doPublish() + * @param $params array + * @return Status + */ + protected function doPublish( array $params ) { + $status = Status::newGood(); + $replicate = $this->replicateContainerDirChanges( $params['dir'] ); + foreach ( $this->backends as $index => $backend ) { + if ( $replicate || $index == $this->masterIndex ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doPublish( $realParams ) ); + } + } + return $status; + } + + /** + * @see FileBackend::doClean() + * @param $params array + * @return Status + */ + protected function doClean( array $params ) { + $status = Status::newGood(); + $replicate = $this->replicateContainerDirChanges( $params['dir'] ); + foreach ( $this->backends as $index => $backend ) { + if ( $replicate || $index == $this->masterIndex ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doClean( $realParams ) ); + } + } + return $status; + } + + /** + * @see FileBackend::concatenate() + * @param $params array + * @return Status + */ + public function concatenate( array $params ) { + // We are writing to an FS file, so we don't need to do this per-backend + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->concatenate( $realParams ); + } + + /** + * @see FileBackend::fileExists() + * @param $params array + */ + public function fileExists( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->fileExists( $realParams ); + } + + /** + * @see FileBackend::getFileTimestamp() + * @param $params array + * @return bool|string + */ + public function getFileTimestamp( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileTimestamp( $realParams ); + } + + /** + * @see FileBackend::getFileSize() + * @param $params array + * @return bool|int + */ + public function getFileSize( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileSize( $realParams ); + } + + /** + * @see FileBackend::getFileStat() + * @param $params array + * @return Array|bool|null + */ + public function getFileStat( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileStat( $realParams ); + } + + /** + * @see FileBackend::getFileContents() + * @param $params array + * @return bool|string + */ + public function getFileContents( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileContents( $realParams ); + } + + /** + * @see FileBackend::getFileSha1Base36() + * @param $params array + * @return bool|string + */ + public function getFileSha1Base36( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileSha1Base36( $realParams ); + } + + /** + * @see FileBackend::getFileProps() + * @param $params array + * @return Array + */ + public function getFileProps( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileProps( $realParams ); + } + + /** + * @see FileBackend::streamFile() + * @param $params array + * @return \Status + */ + public function streamFile( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->streamFile( $realParams ); + } + + /** + * @see FileBackend::getLocalReference() + * @param $params array + * @return FSFile|null + */ + public function getLocalReference( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getLocalReference( $realParams ); + } + + /** + * @see FileBackend::getLocalCopy() + * @param $params array + * @return null|TempFSFile + */ + public function getLocalCopy( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getLocalCopy( $realParams ); + } + + /** + * @see FileBackend::directoryExists() + * @param $params array + * @return bool|null + */ + public function directoryExists( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->directoryExists( $realParams ); + } + + /** + * @see FileBackend::getSubdirectoryList() + * @param $params array + * @return Array|null|Traversable + */ + public function getDirectoryList( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getDirectoryList( $realParams ); + } + + /** + * @see FileBackend::getFileList() + * @param $params array + * @return Array|null|\Traversable + */ + public function getFileList( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileList( $realParams ); + } + + /** + * @see FileBackend::clearCache() + */ + public function clearCache( array $paths = null ) { + foreach ( $this->backends as $backend ) { + $realPaths = is_array( $paths ) ? $this->substPaths( $paths, $backend ) : null; + $backend->clearCache( $realPaths ); + } + } + + /** + * @see FileBackend::getScopedLocksForOps() + */ + public function getScopedLocksForOps( array $ops, Status $status ) { + $fileOps = $this->backends[$this->masterIndex]->getOperationsInternal( $ops ); + // Get the paths to lock from the master backend + $paths = $this->backends[$this->masterIndex]->getPathsToLockForOpsInternal( $fileOps ); + // Get the paths under the proxy backend's name + $paths['sh'] = $this->unsubstPaths( $paths['sh'] ); + $paths['ex'] = $this->unsubstPaths( $paths['ex'] ); + return array( + $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ), + $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ) + ); + } +} diff --git a/includes/filebackend/FileBackendStore.php b/includes/filebackend/FileBackendStore.php new file mode 100644 index 00000000..083dfea9 --- /dev/null +++ b/includes/filebackend/FileBackendStore.php @@ -0,0 +1,1766 @@ +<?php +/** + * Base class for all backends using particular storage medium. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * @brief Base class for all backends using particular storage medium. + * + * This class defines the methods as abstract that subclasses must implement. + * Outside callers should *not* use functions with "Internal" in the name. + * + * The FileBackend operations are implemented using basic functions + * such as storeInternal(), copyInternal(), deleteInternal() and the like. + * This class is also responsible for path resolution and sanitization. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileBackendStore extends FileBackend { + /** @var BagOStuff */ + protected $memCache; + /** @var ProcessCacheLRU */ + protected $cheapCache; // Map of paths to small (RAM/disk) cache items + /** @var ProcessCacheLRU */ + protected $expensiveCache; // Map of paths to large (RAM/disk) cache items + + /** @var Array Map of container names to sharding settings */ + protected $shardViaHashLevels = array(); // (container name => config array) + + protected $maxFileSize = 4294967296; // integer bytes (4GiB) + + /** + * @see FileBackend::__construct() + * + * @param $config Array + */ + public function __construct( array $config ) { + parent::__construct( $config ); + $this->memCache = new EmptyBagOStuff(); // disabled by default + $this->cheapCache = new ProcessCacheLRU( 300 ); + $this->expensiveCache = new ProcessCacheLRU( 5 ); + } + + /** + * Get the maximum allowable file size given backend + * medium restrictions and basic performance constraints. + * Do not call this function from places outside FileBackend and FileOp. + * + * @return integer Bytes + */ + final public function maxFileSizeInternal() { + return $this->maxFileSize; + } + + /** + * Check if a file can be created at a given storage path. + * FS backends should check if the parent directory exists and the file is writable. + * Backends using key/value stores should check if the container exists. + * + * @param $storagePath string + * @return bool + */ + abstract public function isPathUsableInternal( $storagePath ); + + /** + * Create a file in the backend with the given contents. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * - content : the raw file contents + * - dst : destination storage path + * - overwrite : overwrite any file that exists at the destination + * - disposition : Content-Disposition header value for the destination + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * + * @param $params Array + * @return Status + */ + final public function createInternal( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + if ( strlen( $params['content'] ) > $this->maxFileSizeInternal() ) { + $status = Status::newFatal( 'backend-fail-maxsize', + $params['dst'], $this->maxFileSizeInternal() ); + } else { + $status = $this->doCreateInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->deleteFileCache( $params['dst'] ); // persistent cache + } + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::createInternal() + */ + abstract protected function doCreateInternal( array $params ); + + /** + * Store a file into the backend from a file on disk. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * - src : source path on disk + * - dst : destination storage path + * - overwrite : overwrite any file that exists at the destination + * - disposition : Content-Disposition header value for the destination + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * + * @param $params Array + * @return Status + */ + final public function storeInternal( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + if ( filesize( $params['src'] ) > $this->maxFileSizeInternal() ) { + $status = Status::newFatal( 'backend-fail-maxsize', + $params['dst'], $this->maxFileSizeInternal() ); + } else { + $status = $this->doStoreInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->deleteFileCache( $params['dst'] ); // persistent cache + } + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::storeInternal() + */ + abstract protected function doStoreInternal( array $params ); + + /** + * Copy a file from one storage path to another in the backend. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * - src : source storage path + * - dst : destination storage path + * - overwrite : overwrite any file that exists at the destination + * - disposition : Content-Disposition header value for the destination + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * + * @param $params Array + * @return Status + */ + final public function copyInternal( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = $this->doCopyInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->deleteFileCache( $params['dst'] ); // persistent cache + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::copyInternal() + */ + abstract protected function doCopyInternal( array $params ); + + /** + * Delete a file at the storage path. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * - src : source storage path + * - ignoreMissingSource : do nothing if the source file does not exist + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * + * @param $params Array + * @return Status + */ + final public function deleteInternal( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = $this->doDeleteInternal( $params ); + $this->clearCache( array( $params['src'] ) ); + $this->deleteFileCache( $params['src'] ); // persistent cache + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::deleteInternal() + */ + abstract protected function doDeleteInternal( array $params ); + + /** + * Move a file from one storage path to another in the backend. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * - src : source storage path + * - dst : destination storage path + * - overwrite : overwrite any file that exists at the destination + * - disposition : Content-Disposition header value for the destination + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * + * @param $params Array + * @return Status + */ + final public function moveInternal( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = $this->doMoveInternal( $params ); + $this->clearCache( array( $params['src'], $params['dst'] ) ); + $this->deleteFileCache( $params['src'] ); // persistent cache + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->deleteFileCache( $params['dst'] ); // persistent cache + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::moveInternal() + * @return Status + */ + protected function doMoveInternal( array $params ) { + unset( $params['async'] ); // two steps, won't work here :) + // Copy source to dest + $status = $this->copyInternal( $params ); + if ( $status->isOK() ) { + // Delete source (only fails due to races or medium going down) + $status->merge( $this->deleteInternal( array( 'src' => $params['src'] ) ) ); + $status->setResult( true, $status->value ); // ignore delete() errors + } + return $status; + } + + /** + * No-op file operation that does nothing. + * Do not call this function from places outside FileBackend and FileOp. + * + * @param $params Array + * @return Status + */ + final public function nullInternal( array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::concatenate() + * @return Status + */ + final public function concatenate( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = Status::newGood(); + + // Try to lock the source files for the scope of this function + $scopeLockS = $this->getScopedFileLocks( $params['srcs'], LockManager::LOCK_UW, $status ); + if ( $status->isOK() ) { + // Actually do the file concatenation... + $start_time = microtime( true ); + $status->merge( $this->doConcatenate( $params ) ); + $sec = microtime( true ) - $start_time; + if ( !$status->isOK() ) { + wfDebugLog( 'FileOperation', get_class( $this ) . " failed to concatenate " . + count( $params['srcs'] ) . " file(s) [$sec sec]" ); + } + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::concatenate() + * @return Status + */ + protected function doConcatenate( array $params ) { + $status = Status::newGood(); + $tmpPath = $params['dst']; // convenience + + // Check that the specified temp file is valid... + wfSuppressWarnings(); + $ok = ( is_file( $tmpPath ) && !filesize( $tmpPath ) ); + wfRestoreWarnings(); + if ( !$ok ) { // not present or not empty + $status->fatal( 'backend-fail-opentemp', $tmpPath ); + return $status; + } + + // Build up the temp file using the source chunks (in order)... + $tmpHandle = fopen( $tmpPath, 'ab' ); + if ( $tmpHandle === false ) { + $status->fatal( 'backend-fail-opentemp', $tmpPath ); + return $status; + } + foreach ( $params['srcs'] as $virtualSource ) { + // Get a local FS version of the chunk + $tmpFile = $this->getLocalReference( array( 'src' => $virtualSource ) ); + if ( !$tmpFile ) { + $status->fatal( 'backend-fail-read', $virtualSource ); + return $status; + } + // Get a handle to the local FS version + $sourceHandle = fopen( $tmpFile->getPath(), 'r' ); + if ( $sourceHandle === false ) { + fclose( $tmpHandle ); + $status->fatal( 'backend-fail-read', $virtualSource ); + return $status; + } + // Append chunk to file (pass chunk size to avoid magic quotes) + if ( !stream_copy_to_stream( $sourceHandle, $tmpHandle ) ) { + fclose( $sourceHandle ); + fclose( $tmpHandle ); + $status->fatal( 'backend-fail-writetemp', $tmpPath ); + return $status; + } + fclose( $sourceHandle ); + } + if ( !fclose( $tmpHandle ) ) { + $status->fatal( 'backend-fail-closetemp', $tmpPath ); + return $status; + } + + clearstatcache(); // temp file changed + + return $status; + } + + /** + * @see FileBackend::doPrepare() + * @return Status + */ + final protected function doPrepare( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + + $status = Status::newGood(); + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doPrepareInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doPrepareInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doPrepare() + * @return Status + */ + protected function doPrepareInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::doSecure() + * @return Status + */ + final protected function doSecure( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = Status::newGood(); + + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doSecureInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doSecureInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doSecure() + * @return Status + */ + protected function doSecureInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::doPublish() + * @return Status + */ + final protected function doPublish( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = Status::newGood(); + + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doPublishInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doPublishInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doPublish() + * @return Status + */ + protected function doPublishInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::doClean() + * @return Status + */ + final protected function doClean( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = Status::newGood(); + + // Recursive: first delete all empty subdirs recursively + if ( !empty( $params['recursive'] ) && !$this->directoriesAreVirtual() ) { + $subDirsRel = $this->getTopDirectoryList( array( 'dir' => $params['dir'] ) ); + if ( $subDirsRel !== null ) { // no errors + foreach ( $subDirsRel as $subDirRel ) { + $subDir = $params['dir'] . "/{$subDirRel}"; // full path + $status->merge( $this->doClean( array( 'dir' => $subDir ) + $params ) ); + } + } + } + + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + // Attempt to lock this directory... + $filesLockEx = array( $params['dir'] ); + $scopedLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; // abort + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doCleanInternal( $fullCont, $dir, $params ) ); + $this->deleteContainerCache( $fullCont ); // purge cache + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doCleanInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + $this->deleteContainerCache( "{$fullCont}{$suffix}" ); // purge cache + } + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doClean() + * @return Status + */ + protected function doCleanInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::fileExists() + * @return bool|null + */ + final public function fileExists( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return ( $stat === null ) ? null : (bool)$stat; // null => failure + } + + /** + * @see FileBackend::getFileTimestamp() + * @return bool + */ + final public function getFileTimestamp( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $stat ? $stat['mtime'] : false; + } + + /** + * @see FileBackend::getFileSize() + * @return bool + */ + final public function getFileSize( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $stat ? $stat['size'] : false; + } + + /** + * @see FileBackend::getFileStat() + * @return bool + */ + final public function getFileStat( array $params ) { + $path = self::normalizeStoragePath( $params['src'] ); + if ( $path === null ) { + return false; // invalid storage path + } + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $latest = !empty( $params['latest'] ); // use latest data? + if ( !$this->cheapCache->has( $path, 'stat' ) ) { + $this->primeFileCache( array( $path ) ); // check persistent cache + } + if ( $this->cheapCache->has( $path, 'stat' ) ) { + $stat = $this->cheapCache->get( $path, 'stat' ); + // If we want the latest data, check that this cached + // value was in fact fetched with the latest available data. + if ( !$latest || $stat['latest'] ) { + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $stat; + } + } + wfProfileIn( __METHOD__ . '-miss' ); + wfProfileIn( __METHOD__ . '-miss-' . $this->name ); + $stat = $this->doGetFileStat( $params ); + wfProfileOut( __METHOD__ . '-miss-' . $this->name ); + wfProfileOut( __METHOD__ . '-miss' ); + if ( is_array( $stat ) ) { // don't cache negatives + $stat['latest'] = $latest; + $this->cheapCache->set( $path, 'stat', $stat ); + $this->setFileCache( $path, $stat ); // update persistent cache + if ( isset( $stat['sha1'] ) ) { // some backends store SHA-1 as metadata + $this->cheapCache->set( $path, 'sha1', + array( 'hash' => $stat['sha1'], 'latest' => $latest ) ); + } + } else { + wfDebug( __METHOD__ . ": File $path does not exist.\n" ); + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $stat; + } + + /** + * @see FileBackendStore::getFileStat() + */ + abstract protected function doGetFileStat( array $params ); + + /** + * @see FileBackend::getFileContents() + * @return bool|string + */ + public function getFileContents( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $tmpFile = $this->getLocalReference( $params ); + if ( !$tmpFile ) { + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return false; + } + wfSuppressWarnings(); + $data = file_get_contents( $tmpFile->getPath() ); + wfRestoreWarnings(); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $data; + } + + /** + * @see FileBackend::getFileSha1Base36() + * @return bool|string + */ + final public function getFileSha1Base36( array $params ) { + $path = self::normalizeStoragePath( $params['src'] ); + if ( $path === null ) { + return false; // invalid storage path + } + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $latest = !empty( $params['latest'] ); // use latest data? + if ( $this->cheapCache->has( $path, 'sha1' ) ) { + $stat = $this->cheapCache->get( $path, 'sha1' ); + // If we want the latest data, check that this cached + // value was in fact fetched with the latest available data. + if ( !$latest || $stat['latest'] ) { + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $stat['hash']; + } + } + wfProfileIn( __METHOD__ . '-miss' ); + wfProfileIn( __METHOD__ . '-miss-' . $this->name ); + $hash = $this->doGetFileSha1Base36( $params ); + wfProfileOut( __METHOD__ . '-miss-' . $this->name ); + wfProfileOut( __METHOD__ . '-miss' ); + if ( $hash ) { // don't cache negatives + $this->cheapCache->set( $path, 'sha1', + array( 'hash' => $hash, 'latest' => $latest ) ); + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $hash; + } + + /** + * @see FileBackendStore::getFileSha1Base36() + * @return bool|string + */ + protected function doGetFileSha1Base36( array $params ) { + $fsFile = $this->getLocalReference( $params ); + if ( !$fsFile ) { + return false; + } else { + return $fsFile->getSha1Base36(); + } + } + + /** + * @see FileBackend::getFileProps() + * @return Array + */ + final public function getFileProps( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $fsFile = $this->getLocalReference( $params ); + $props = $fsFile ? $fsFile->getProps() : FSFile::placeholderProps(); + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $props; + } + + /** + * @see FileBackend::getLocalReference() + * @return TempFSFile|null + */ + public function getLocalReference( array $params ) { + $path = self::normalizeStoragePath( $params['src'] ); + if ( $path === null ) { + return null; // invalid storage path + } + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $latest = !empty( $params['latest'] ); // use latest data? + if ( $this->expensiveCache->has( $path, 'localRef' ) ) { + $val = $this->expensiveCache->get( $path, 'localRef' ); + // If we want the latest data, check that this cached + // value was in fact fetched with the latest available data. + if ( !$latest || $val['latest'] ) { + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $val['object']; + } + } + $tmpFile = $this->getLocalCopy( $params ); + if ( $tmpFile ) { // don't cache negatives + $this->expensiveCache->set( $path, 'localRef', + array( 'object' => $tmpFile, 'latest' => $latest ) ); + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $tmpFile; + } + + /** + * @see FileBackend::streamFile() + * @return Status + */ + final public function streamFile( array $params ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = Status::newGood(); + + $info = $this->getFileStat( $params ); + if ( !$info ) { // let StreamFile handle the 404 + $status->fatal( 'backend-fail-notexists', $params['src'] ); + } + + // Set output buffer and HTTP headers for stream + $extraHeaders = isset( $params['headers'] ) ? $params['headers'] : array(); + $res = StreamFile::prepareForStream( $params['src'], $info, $extraHeaders ); + if ( $res == StreamFile::NOT_MODIFIED ) { + // do nothing; client cache is up to date + } elseif ( $res == StreamFile::READY_STREAM ) { + wfProfileIn( __METHOD__ . '-send' ); + wfProfileIn( __METHOD__ . '-send-' . $this->name ); + $status = $this->doStreamFile( $params ); + wfProfileOut( __METHOD__ . '-send-' . $this->name ); + wfProfileOut( __METHOD__ . '-send' ); + } else { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::streamFile() + * @return Status + */ + protected function doStreamFile( array $params ) { + $status = Status::newGood(); + + $fsFile = $this->getLocalReference( $params ); + if ( !$fsFile ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } elseif ( !readfile( $fsFile->getPath() ) ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } + + return $status; + } + + /** + * @see FileBackend::directoryExists() + * @return bool|null + */ + final public function directoryExists( array $params ) { + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + return false; // invalid storage path + } + if ( $shard !== null ) { // confined to a single container/shard + return $this->doDirectoryExists( $fullCont, $dir, $params ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + $res = false; // response + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $exists = $this->doDirectoryExists( "{$fullCont}{$suffix}", $dir, $params ); + if ( $exists ) { + $res = true; + break; // found one! + } elseif ( $exists === null ) { // error? + $res = null; // if we don't find anything, it is indeterminate + } + } + return $res; + } + } + + /** + * @see FileBackendStore::directoryExists() + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return bool|null + */ + abstract protected function doDirectoryExists( $container, $dir, array $params ); + + /** + * @see FileBackend::getDirectoryList() + * @return Traversable|Array|null Returns null on failure + */ + final public function getDirectoryList( array $params ) { + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { // invalid storage path + return null; + } + if ( $shard !== null ) { + // File listing is confined to a single container/shard + return $this->getDirectoryListInternal( $fullCont, $dir, $params ); + } else { + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + // File listing spans multiple containers/shards + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + return new FileBackendStoreShardDirIterator( $this, + $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); + } + } + + /** + * Do not call this function from places outside FileBackend + * + * @see FileBackendStore::getDirectoryList() + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return Traversable|Array|null Returns null on failure + */ + abstract public function getDirectoryListInternal( $container, $dir, array $params ); + + /** + * @see FileBackend::getFileList() + * @return Traversable|Array|null Returns null on failure + */ + final public function getFileList( array $params ) { + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { // invalid storage path + return null; + } + if ( $shard !== null ) { + // File listing is confined to a single container/shard + return $this->getFileListInternal( $fullCont, $dir, $params ); + } else { + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + // File listing spans multiple containers/shards + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + return new FileBackendStoreShardFileIterator( $this, + $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); + } + } + + /** + * Do not call this function from places outside FileBackend + * + * @see FileBackendStore::getFileList() + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return Traversable|Array|null Returns null on failure + */ + abstract public function getFileListInternal( $container, $dir, array $params ); + + /** + * Return a list of FileOp objects from a list of operations. + * Do not call this function from places outside FileBackend. + * + * The result must have the same number of items as the input. + * An exception is thrown if an unsupported operation is requested. + * + * @param $ops Array Same format as doOperations() + * @return Array List of FileOp objects + * @throws MWException + */ + final public function getOperationsInternal( array $ops ) { + $supportedOps = array( + 'store' => 'StoreFileOp', + 'copy' => 'CopyFileOp', + 'move' => 'MoveFileOp', + 'delete' => 'DeleteFileOp', + 'create' => 'CreateFileOp', + 'null' => 'NullFileOp' + ); + + $performOps = array(); // array of FileOp objects + // Build up ordered array of FileOps... + foreach ( $ops as $operation ) { + $opName = $operation['op']; + if ( isset( $supportedOps[$opName] ) ) { + $class = $supportedOps[$opName]; + // Get params for this operation + $params = $operation; + // Append the FileOp class + $performOps[] = new $class( $this, $params ); + } else { + throw new MWException( "Operation '$opName' is not supported." ); + } + } + + return $performOps; + } + + /** + * Get a list of storage paths to lock for a list of operations + * Returns an array with 'sh' (shared) and 'ex' (exclusive) keys, + * each corresponding to a list of storage paths to be locked. + * + * @param $performOps Array List of FileOp objects + * @return Array ('sh' => list of paths, 'ex' => list of paths) + */ + final public function getPathsToLockForOpsInternal( array $performOps ) { + // Build up a list of files to lock... + $paths = array( 'sh' => array(), 'ex' => array() ); + foreach ( $performOps as $fileOp ) { + $paths['sh'] = array_merge( $paths['sh'], $fileOp->storagePathsRead() ); + $paths['ex'] = array_merge( $paths['ex'], $fileOp->storagePathsChanged() ); + } + // Optimization: if doing an EX lock anyway, don't also set an SH one + $paths['sh'] = array_diff( $paths['sh'], $paths['ex'] ); + // Get a shared lock on the parent directory of each path changed + $paths['sh'] = array_merge( $paths['sh'], array_map( 'dirname', $paths['ex'] ) ); + + return $paths; + } + + /** + * @see FileBackend::getScopedLocksForOps() + * @return Array + */ + public function getScopedLocksForOps( array $ops, Status $status ) { + $paths = $this->getPathsToLockForOpsInternal( $this->getOperationsInternal( $ops ) ); + return array( + $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ), + $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ) + ); + } + + /** + * @see FileBackend::doOperationsInternal() + * @return Status + */ + final protected function doOperationsInternal( array $ops, array $opts ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = Status::newGood(); + + // Build up a list of FileOps... + $performOps = $this->getOperationsInternal( $ops ); + + // Acquire any locks as needed... + if ( empty( $opts['nonLocking'] ) ) { + // Build up a list of files to lock... + $paths = $this->getPathsToLockForOpsInternal( $performOps ); + // Try to lock those files for the scope of this function... + $scopeLockS = $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ); + $scopeLockE = $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; // abort + } + } + + // Clear any file cache entries (after locks acquired) + if ( empty( $opts['preserveCache'] ) ) { + $this->clearCache(); + } + + // Load from the persistent file and container caches + $this->primeFileCache( $performOps ); + $this->primeContainerCache( $performOps ); + + // Actually attempt the operation batch... + $subStatus = FileOpBatch::attempt( $performOps, $opts, $this->fileJournal ); + + // Merge errors into status fields + $status->merge( $subStatus ); + $status->success = $subStatus->success; // not done in merge() + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackend::doQuickOperationsInternal() + * @return Status + * @throws MWException + */ + final protected function doQuickOperationsInternal( array $ops ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + $status = Status::newGood(); + + $supportedOps = array( 'create', 'store', 'copy', 'move', 'delete', 'null' ); + $async = ( $this->parallelize === 'implicit' ); + $maxConcurrency = $this->concurrency; // throttle + + $statuses = array(); // array of (index => Status) + $fileOpHandles = array(); // list of (index => handle) arrays + $curFileOpHandles = array(); // current handle batch + // Perform the sync-only ops and build up op handles for the async ops... + foreach ( $ops as $index => $params ) { + if ( !in_array( $params['op'], $supportedOps ) ) { + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + throw new MWException( "Operation '{$params['op']}' is not supported." ); + } + $method = $params['op'] . 'Internal'; // e.g. "storeInternal" + $subStatus = $this->$method( array( 'async' => $async ) + $params ); + if ( $subStatus->value instanceof FileBackendStoreOpHandle ) { // async + if ( count( $curFileOpHandles ) >= $maxConcurrency ) { + $fileOpHandles[] = $curFileOpHandles; // push this batch + $curFileOpHandles = array(); + } + $curFileOpHandles[$index] = $subStatus->value; // keep index + } else { // error or completed + $statuses[$index] = $subStatus; // keep index + } + } + if ( count( $curFileOpHandles ) ) { + $fileOpHandles[] = $curFileOpHandles; // last batch + } + // Do all the async ops that can be done concurrently... + foreach ( $fileOpHandles as $fileHandleBatch ) { + $statuses = $statuses + $this->executeOpHandlesInternal( $fileHandleBatch ); + } + // Marshall and merge all the responses... + foreach ( $statuses as $index => $subStatus ) { + $status->merge( $subStatus ); + if ( $subStatus->isOK() ) { + $status->success[$index] = true; + ++$status->successCount; + } else { + $status->success[$index] = false; + ++$status->failCount; + } + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * Execute a list of FileBackendStoreOpHandle handles in parallel. + * The resulting Status object fields will correspond + * to the order in which the handles where given. + * + * @param $handles Array List of FileBackendStoreOpHandle objects + * @return Array Map of Status objects + * @throws MWException + */ + final public function executeOpHandlesInternal( array $fileOpHandles ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + foreach ( $fileOpHandles as $fileOpHandle ) { + if ( !( $fileOpHandle instanceof FileBackendStoreOpHandle ) ) { + throw new MWException( "Given a non-FileBackendStoreOpHandle object." ); + } elseif ( $fileOpHandle->backend->getName() !== $this->getName() ) { + throw new MWException( "Given a FileBackendStoreOpHandle for the wrong backend." ); + } + } + $res = $this->doExecuteOpHandlesInternal( $fileOpHandles ); + foreach ( $fileOpHandles as $fileOpHandle ) { + $fileOpHandle->closeResources(); + } + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + return $res; + } + + /** + * @see FileBackendStore::executeOpHandlesInternal() + * @return Array List of corresponding Status objects + */ + protected function doExecuteOpHandlesInternal( array $fileOpHandles ) { + foreach ( $fileOpHandles as $fileOpHandle ) { // OK if empty + throw new MWException( "This backend supports no asynchronous operations." ); + } + return array(); + } + + /** + * @see FileBackend::preloadCache() + */ + final public function preloadCache( array $paths ) { + $fullConts = array(); // full container names + foreach ( $paths as $path ) { + list( $fullCont, $r, $s ) = $this->resolveStoragePath( $path ); + $fullConts[] = $fullCont; + } + // Load from the persistent file and container caches + $this->primeContainerCache( $fullConts ); + $this->primeFileCache( $paths ); + } + + /** + * @see FileBackend::clearCache() + */ + final public function clearCache( array $paths = null ) { + if ( is_array( $paths ) ) { + $paths = array_map( 'FileBackend::normalizeStoragePath', $paths ); + $paths = array_filter( $paths, 'strlen' ); // remove nulls + } + if ( $paths === null ) { + $this->cheapCache->clear(); + $this->expensiveCache->clear(); + } else { + foreach ( $paths as $path ) { + $this->cheapCache->clear( $path ); + $this->expensiveCache->clear( $path ); + } + } + $this->doClearCache( $paths ); + } + + /** + * Clears any additional stat caches for storage paths + * + * @see FileBackend::clearCache() + * + * @param $paths Array Storage paths (optional) + * @return void + */ + protected function doClearCache( array $paths = null ) {} + + /** + * Is this a key/value store where directories are just virtual? + * Virtual directories exists in so much as files exists that are + * prefixed with the directory path followed by a forward slash. + * + * @return bool + */ + abstract protected function directoriesAreVirtual(); + + /** + * Check if a container name is valid. + * This checks for for length and illegal characters. + * + * @param $container string + * @return bool + */ + final protected static function isValidContainerName( $container ) { + // This accounts for Swift and S3 restrictions while leaving room + // for things like '.xxx' (hex shard chars) or '.seg' (segments). + // This disallows directory separators or traversal characters. + // Note that matching strings URL encode to the same string; + // in Swift, the length restriction is *after* URL encoding. + return preg_match( '/^[a-z0-9][a-z0-9-_]{0,199}$/i', $container ); + } + + /** + * Splits a storage path into an internal container name, + * an internal relative file name, and a container shard suffix. + * Any shard suffix is already appended to the internal container name. + * This also checks that the storage path is valid and within this backend. + * + * If the container is sharded but a suffix could not be determined, + * this means that the path can only refer to a directory and can only + * be scanned by looking in all the container shards. + * + * @param $storagePath string + * @return Array (container, path, container suffix) or (null, null, null) if invalid + */ + final protected function resolveStoragePath( $storagePath ) { + list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath ); + if ( $backend === $this->name ) { // must be for this backend + $relPath = self::normalizeContainerPath( $relPath ); + if ( $relPath !== null ) { + // Get shard for the normalized path if this container is sharded + $cShard = $this->getContainerShard( $container, $relPath ); + // Validate and sanitize the relative path (backend-specific) + $relPath = $this->resolveContainerPath( $container, $relPath ); + if ( $relPath !== null ) { + // Prepend any wiki ID prefix to the container name + $container = $this->fullContainerName( $container ); + if ( self::isValidContainerName( $container ) ) { + // Validate and sanitize the container name (backend-specific) + $container = $this->resolveContainerName( "{$container}{$cShard}" ); + if ( $container !== null ) { + return array( $container, $relPath, $cShard ); + } + } + } + } + } + return array( null, null, null ); + } + + /** + * Like resolveStoragePath() except null values are returned if + * the container is sharded and the shard could not be determined. + * + * @see FileBackendStore::resolveStoragePath() + * + * @param $storagePath string + * @return Array (container, path) or (null, null) if invalid + */ + final protected function resolveStoragePathReal( $storagePath ) { + list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath ); + if ( $cShard !== null ) { + return array( $container, $relPath ); + } + return array( null, null ); + } + + /** + * Get the container name shard suffix for a given path. + * Any empty suffix means the container is not sharded. + * + * @param $container string Container name + * @param $relPath string Storage path relative to the container + * @return string|null Returns null if shard could not be determined + */ + final protected function getContainerShard( $container, $relPath ) { + list( $levels, $base, $repeat ) = $this->getContainerHashLevels( $container ); + if ( $levels == 1 || $levels == 2 ) { + // Hash characters are either base 16 or 36 + $char = ( $base == 36 ) ? '[0-9a-z]' : '[0-9a-f]'; + // Get a regex that represents the shard portion of paths. + // The concatenation of the captures gives us the shard. + if ( $levels === 1 ) { // 16 or 36 shards per container + $hashDirRegex = '(' . $char . ')'; + } else { // 256 or 1296 shards per container + if ( $repeat ) { // verbose hash dir format (e.g. "a/ab/abc") + $hashDirRegex = $char . '/(' . $char . '{2})'; + } else { // short hash dir format (e.g. "a/b/c") + $hashDirRegex = '(' . $char . ')/(' . $char . ')'; + } + } + // Allow certain directories to be above the hash dirs so as + // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab"). + // They must be 2+ chars to avoid any hash directory ambiguity. + $m = array(); + if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) { + return '.' . implode( '', array_slice( $m, 1 ) ); + } + return null; // failed to match + } + return ''; // no sharding + } + + /** + * Check if a storage path maps to a single shard. + * Container dirs like "a", where the container shards on "x/xy", + * can reside on several shards. Such paths are tricky to handle. + * + * @param $storagePath string Storage path + * @return bool + */ + final public function isSingleShardPathInternal( $storagePath ) { + list( $c, $r, $shard ) = $this->resolveStoragePath( $storagePath ); + return ( $shard !== null ); + } + + /** + * Get the sharding config for a container. + * If greater than 0, then all file storage paths within + * the container are required to be hashed accordingly. + * + * @param $container string + * @return Array (integer levels, integer base, repeat flag) or (0, 0, false) + */ + final protected function getContainerHashLevels( $container ) { + if ( isset( $this->shardViaHashLevels[$container] ) ) { + $config = $this->shardViaHashLevels[$container]; + $hashLevels = (int)$config['levels']; + if ( $hashLevels == 1 || $hashLevels == 2 ) { + $hashBase = (int)$config['base']; + if ( $hashBase == 16 || $hashBase == 36 ) { + return array( $hashLevels, $hashBase, $config['repeat'] ); + } + } + } + return array( 0, 0, false ); // no sharding + } + + /** + * Get a list of full container shard suffixes for a container + * + * @param $container string + * @return Array + */ + final protected function getContainerSuffixes( $container ) { + $shards = array(); + list( $digits, $base ) = $this->getContainerHashLevels( $container ); + if ( $digits > 0 ) { + $numShards = pow( $base, $digits ); + for ( $index = 0; $index < $numShards; $index++ ) { + $shards[] = '.' . wfBaseConvert( $index, 10, $base, $digits ); + } + } + return $shards; + } + + /** + * Get the full container name, including the wiki ID prefix + * + * @param $container string + * @return string + */ + final protected function fullContainerName( $container ) { + if ( $this->wikiId != '' ) { + return "{$this->wikiId}-$container"; + } else { + return $container; + } + } + + /** + * Resolve a container name, checking if it's allowed by the backend. + * This is intended for internal use, such as encoding illegal chars. + * Subclasses can override this to be more restrictive. + * + * @param $container string + * @return string|null + */ + protected function resolveContainerName( $container ) { + return $container; + } + + /** + * Resolve a relative storage path, checking if it's allowed by the backend. + * This is intended for internal use, such as encoding illegal chars or perhaps + * getting absolute paths (e.g. FS based backends). Note that the relative path + * may be the empty string (e.g. the path is simply to the container). + * + * @param $container string Container name + * @param $relStoragePath string Storage path relative to the container + * @return string|null Path or null if not valid + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + return $relStoragePath; + } + + /** + * Get the cache key for a container + * + * @param $container string Resolved container name + * @return string + */ + private function containerCacheKey( $container ) { + return wfMemcKey( 'backend', $this->getName(), 'container', $container ); + } + + /** + * Set the cached info for a container + * + * @param $container string Resolved container name + * @param $val mixed Information to cache + */ + final protected function setContainerCache( $container, $val ) { + $this->memCache->add( $this->containerCacheKey( $container ), $val, 14*86400 ); + } + + /** + * Delete the cached info for a container. + * The cache key is salted for a while to prevent race conditions. + * + * @param $container string Resolved container name + */ + final protected function deleteContainerCache( $container ) { + if ( !$this->memCache->set( $this->containerCacheKey( $container ), 'PURGED', 300 ) ) { + trigger_error( "Unable to delete stat cache for container $container." ); + } + } + + /** + * Do a batch lookup from cache for container stats for all containers + * used in a list of container names, storage paths, or FileOp objects. + * + * @param $items Array + * @return void + */ + final protected function primeContainerCache( array $items ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + + $paths = array(); // list of storage paths + $contNames = array(); // (cache key => resolved container name) + // Get all the paths/containers from the items... + foreach ( $items as $item ) { + if ( $item instanceof FileOp ) { + $paths = array_merge( $paths, $item->storagePathsRead() ); + $paths = array_merge( $paths, $item->storagePathsChanged() ); + } elseif ( self::isStoragePath( $item ) ) { + $paths[] = $item; + } elseif ( is_string( $item ) ) { // full container name + $contNames[$this->containerCacheKey( $item )] = $item; + } + } + // Get all the corresponding cache keys for paths... + foreach ( $paths as $path ) { + list( $fullCont, $r, $s ) = $this->resolveStoragePath( $path ); + if ( $fullCont !== null ) { // valid path for this backend + $contNames[$this->containerCacheKey( $fullCont )] = $fullCont; + } + } + + $contInfo = array(); // (resolved container name => cache value) + // Get all cache entries for these container cache keys... + $values = $this->memCache->getMulti( array_keys( $contNames ) ); + foreach ( $values as $cacheKey => $val ) { + $contInfo[$contNames[$cacheKey]] = $val; + } + + // Populate the container process cache for the backend... + $this->doPrimeContainerCache( array_filter( $contInfo, 'is_array' ) ); + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + } + + /** + * Fill the backend-specific process cache given an array of + * resolved container names and their corresponding cached info. + * Only containers that actually exist should appear in the map. + * + * @param $containerInfo Array Map of resolved container names to cached info + * @return void + */ + protected function doPrimeContainerCache( array $containerInfo ) {} + + /** + * Get the cache key for a file path + * + * @param $path string Storage path + * @return string + */ + private function fileCacheKey( $path ) { + return wfMemcKey( 'backend', $this->getName(), 'file', sha1( $path ) ); + } + + /** + * Set the cached stat info for a file path. + * Negatives (404s) are not cached. By not caching negatives, we can skip cache + * salting for the case when a file is created at a path were there was none before. + * + * @param $path string Storage path + * @param $val mixed Information to cache + */ + final protected function setFileCache( $path, $val ) { + $this->memCache->add( $this->fileCacheKey( $path ), $val, 7*86400 ); + } + + /** + * Delete the cached stat info for a file path. + * The cache key is salted for a while to prevent race conditions. + * + * @param $path string Storage path + */ + final protected function deleteFileCache( $path ) { + if ( !$this->memCache->set( $this->fileCacheKey( $path ), 'PURGED', 300 ) ) { + trigger_error( "Unable to delete stat cache for file $path." ); + } + } + + /** + * Do a batch lookup from cache for file stats for all paths + * used in a list of storage paths or FileOp objects. + * + * @param $items Array List of storage paths or FileOps + * @return void + */ + final protected function primeFileCache( array $items ) { + wfProfileIn( __METHOD__ ); + wfProfileIn( __METHOD__ . '-' . $this->name ); + + $paths = array(); // list of storage paths + $pathNames = array(); // (cache key => storage path) + // Get all the paths/containers from the items... + foreach ( $items as $item ) { + if ( $item instanceof FileOp ) { + $paths = array_merge( $paths, $item->storagePathsRead() ); + $paths = array_merge( $paths, $item->storagePathsChanged() ); + } elseif ( self::isStoragePath( $item ) ) { + $paths[] = $item; + } + } + // Get all the corresponding cache keys for paths... + foreach ( $paths as $path ) { + list( $cont, $rel, $s ) = $this->resolveStoragePath( $path ); + if ( $rel !== null ) { // valid path for this backend + $pathNames[$this->fileCacheKey( $path )] = $path; + } + } + // Get all cache entries for these container cache keys... + $values = $this->memCache->getMulti( array_keys( $pathNames ) ); + foreach ( $values as $cacheKey => $val ) { + if ( is_array( $val ) ) { + $path = $pathNames[$cacheKey]; + $this->cheapCache->set( $path, 'stat', $val ); + if ( isset( $val['sha1'] ) ) { // some backends store SHA-1 as metadata + $this->cheapCache->set( $path, 'sha1', + array( 'hash' => $val['sha1'], 'latest' => $val['latest'] ) ); + } + } + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + wfProfileOut( __METHOD__ ); + } +} + +/** + * FileBackendStore helper class for performing asynchronous file operations. + * + * For example, calling FileBackendStore::createInternal() with the "async" + * param flag may result in a Status that contains this object as a value. + * This class is largely backend-specific and is mostly just "magic" to be + * passed to FileBackendStore::executeOpHandlesInternal(). + */ +abstract class FileBackendStoreOpHandle { + /** @var Array */ + public $params = array(); // params to caller functions + /** @var FileBackendStore */ + public $backend; + /** @var Array */ + public $resourcesToClose = array(); + + public $call; // string; name that identifies the function called + + /** + * Close all open file handles + * + * @return void + */ + public function closeResources() { + array_map( 'fclose', $this->resourcesToClose ); + } +} + +/** + * FileBackendStore helper function to handle listings that span container shards. + * Do not use this class from places outside of FileBackendStore. + * + * @ingroup FileBackend + */ +abstract class FileBackendStoreShardListIterator implements Iterator { + /** @var FileBackendStore */ + protected $backend; + /** @var Array */ + protected $params; + /** @var Array */ + protected $shardSuffixes; + protected $container; // string; full container name + protected $directory; // string; resolved relative path + + /** @var Traversable */ + protected $iter; + protected $curShard = 0; // integer + protected $pos = 0; // integer + + /** @var Array */ + protected $multiShardPaths = array(); // (rel path => 1) + + /** + * @param $backend FileBackendStore + * @param $container string Full storage container name + * @param $dir string Storage directory relative to container + * @param $suffixes Array List of container shard suffixes + * @param $params Array + */ + public function __construct( + FileBackendStore $backend, $container, $dir, array $suffixes, array $params + ) { + $this->backend = $backend; + $this->container = $container; + $this->directory = $dir; + $this->shardSuffixes = $suffixes; + $this->params = $params; + } + + /** + * @see Iterator::key() + * @return integer + */ + public function key() { + return $this->pos; + } + + /** + * @see Iterator::valid() + * @return bool + */ + public function valid() { + if ( $this->iter instanceof Iterator ) { + return $this->iter->valid(); + } elseif ( is_array( $this->iter ) ) { + return ( current( $this->iter ) !== false ); // no paths can have this value + } + return false; // some failure? + } + + /** + * @see Iterator::current() + * @return string|bool String or false + */ + public function current() { + return ( $this->iter instanceof Iterator ) + ? $this->iter->current() + : current( $this->iter ); + } + + /** + * @see Iterator::next() + * @return void + */ + public function next() { + ++$this->pos; + ( $this->iter instanceof Iterator ) ? $this->iter->next() : next( $this->iter ); + do { + $continue = false; // keep scanning shards? + $this->filterViaNext(); // filter out duplicates + // Find the next non-empty shard if no elements are left + if ( !$this->valid() ) { + $this->nextShardIteratorIfNotValid(); + $continue = $this->valid(); // re-filter unless we ran out of shards + } + } while ( $continue ); + } + + /** + * @see Iterator::rewind() + * @return void + */ + public function rewind() { + $this->pos = 0; + $this->curShard = 0; + $this->setIteratorFromCurrentShard(); + do { + $continue = false; // keep scanning shards? + $this->filterViaNext(); // filter out duplicates + // Find the next non-empty shard if no elements are left + if ( !$this->valid() ) { + $this->nextShardIteratorIfNotValid(); + $continue = $this->valid(); // re-filter unless we ran out of shards + } + } while ( $continue ); + } + + /** + * Filter out duplicate items by advancing to the next ones + */ + protected function filterViaNext() { + while ( $this->valid() ) { + $rel = $this->iter->current(); // path relative to given directory + $path = $this->params['dir'] . "/{$rel}"; // full storage path + if ( $this->backend->isSingleShardPathInternal( $path ) ) { + break; // path is only on one shard; no issue with duplicates + } elseif ( isset( $this->multiShardPaths[$rel] ) ) { + // Don't keep listing paths that are on multiple shards + ( $this->iter instanceof Iterator ) ? $this->iter->next() : next( $this->iter ); + } else { + $this->multiShardPaths[$rel] = 1; + break; + } + } + } + + /** + * If the list iterator for this container shard is out of items, + * then move on to the next container that has items. + * If there are none, then it advances to the last container. + */ + protected function nextShardIteratorIfNotValid() { + while ( !$this->valid() && ++$this->curShard < count( $this->shardSuffixes ) ) { + $this->setIteratorFromCurrentShard(); + } + } + + /** + * Set the list iterator to that of the current container shard + */ + protected function setIteratorFromCurrentShard() { + $this->iter = $this->listFromShard( + $this->container . $this->shardSuffixes[$this->curShard], + $this->directory, $this->params ); + // Start loading results so that current() works + if ( $this->iter ) { + ( $this->iter instanceof Iterator ) ? $this->iter->rewind() : reset( $this->iter ); + } + } + + /** + * Get the list for a given container shard + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return Traversable|Array|null + */ + abstract protected function listFromShard( $container, $dir, array $params ); +} + +/** + * Iterator for listing directories + */ +class FileBackendStoreShardDirIterator extends FileBackendStoreShardListIterator { + /** + * @see FileBackendStoreShardListIterator::listFromShard() + * @return Array|null|Traversable + */ + protected function listFromShard( $container, $dir, array $params ) { + return $this->backend->getDirectoryListInternal( $container, $dir, $params ); + } +} + +/** + * Iterator for listing regular files + */ +class FileBackendStoreShardFileIterator extends FileBackendStoreShardListIterator { + /** + * @see FileBackendStoreShardListIterator::listFromShard() + * @return Array|null|Traversable + */ + protected function listFromShard( $container, $dir, array $params ) { + return $this->backend->getFileListInternal( $container, $dir, $params ); + } +} diff --git a/includes/filebackend/FileOp.php b/includes/filebackend/FileOp.php new file mode 100644 index 00000000..7c43c489 --- /dev/null +++ b/includes/filebackend/FileOp.php @@ -0,0 +1,764 @@ +<?php +/** + * Helper class for representing operations with transaction support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * FileBackend helper class for representing operations. + * Do not use this class from places outside FileBackend. + * + * Methods called from FileOpBatch::attempt() should avoid throwing + * exceptions at all costs. FileOp objects should be lightweight in order + * to support large arrays in memory and serialization. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileOp { + /** @var Array */ + protected $params = array(); + /** @var FileBackendStore */ + protected $backend; + + protected $state = self::STATE_NEW; // integer + protected $failed = false; // boolean + protected $async = false; // boolean + protected $useLatest = true; // boolean + protected $batchId; // string + + protected $sourceSha1; // string + protected $destSameAsSource; // boolean + + /* Object life-cycle */ + const STATE_NEW = 1; + const STATE_CHECKED = 2; + const STATE_ATTEMPTED = 3; + + /** + * Build a new file operation transaction + * + * @param $backend FileBackendStore + * @param $params Array + * @throws MWException + */ + final public function __construct( FileBackendStore $backend, array $params ) { + $this->backend = $backend; + list( $required, $optional ) = $this->allowedParams(); + foreach ( $required as $name ) { + if ( isset( $params[$name] ) ) { + $this->params[$name] = $params[$name]; + } else { + throw new MWException( "File operation missing parameter '$name'." ); + } + } + foreach ( $optional as $name ) { + if ( isset( $params[$name] ) ) { + $this->params[$name] = $params[$name]; + } + } + $this->params = $params; + } + + /** + * Set the batch UUID this operation belongs to + * + * @param $batchId string + * @return void + */ + final public function setBatchId( $batchId ) { + $this->batchId = $batchId; + } + + /** + * Whether to allow stale data for file reads and stat checks + * + * @param $allowStale bool + * @return void + */ + final public function allowStaleReads( $allowStale ) { + $this->useLatest = !$allowStale; + } + + /** + * Get the value of the parameter with the given name + * + * @param $name string + * @return mixed Returns null if the parameter is not set + */ + final public function getParam( $name ) { + return isset( $this->params[$name] ) ? $this->params[$name] : null; + } + + /** + * Check if this operation failed precheck() or attempt() + * + * @return bool + */ + final public function failed() { + return $this->failed; + } + + /** + * Get a new empty predicates array for precheck() + * + * @return Array + */ + final public static function newPredicates() { + return array( 'exists' => array(), 'sha1' => array() ); + } + + /** + * Get a new empty dependency tracking array for paths read/written to + * + * @return Array + */ + final public static function newDependencies() { + return array( 'read' => array(), 'write' => array() ); + } + + /** + * Update a dependency tracking array to account for this operation + * + * @param $deps Array Prior path reads/writes; format of FileOp::newPredicates() + * @return Array + */ + final public function applyDependencies( array $deps ) { + $deps['read'] += array_fill_keys( $this->storagePathsRead(), 1 ); + $deps['write'] += array_fill_keys( $this->storagePathsChanged(), 1 ); + return $deps; + } + + /** + * Check if this operation changes files listed in $paths + * + * @param $paths Array Prior path reads/writes; format of FileOp::newPredicates() + * @return boolean + */ + final public function dependsOn( array $deps ) { + foreach ( $this->storagePathsChanged() as $path ) { + if ( isset( $deps['read'][$path] ) || isset( $deps['write'][$path] ) ) { + return true; // "output" or "anti" dependency + } + } + foreach ( $this->storagePathsRead() as $path ) { + if ( isset( $deps['write'][$path] ) ) { + return true; // "flow" dependency + } + } + return false; + } + + /** + * Get the file journal entries for this file operation + * + * @param $oPredicates Array Pre-op info about files (format of FileOp::newPredicates) + * @param $nPredicates Array Post-op info about files (format of FileOp::newPredicates) + * @return Array + */ + final public function getJournalEntries( array $oPredicates, array $nPredicates ) { + $nullEntries = array(); + $updateEntries = array(); + $deleteEntries = array(); + $pathsUsed = array_merge( $this->storagePathsRead(), $this->storagePathsChanged() ); + foreach ( $pathsUsed as $path ) { + $nullEntries[] = array( // assertion for recovery + 'op' => 'null', + 'path' => $path, + 'newSha1' => $this->fileSha1( $path, $oPredicates ) + ); + } + foreach ( $this->storagePathsChanged() as $path ) { + if ( $nPredicates['sha1'][$path] === false ) { // deleted + $deleteEntries[] = array( + 'op' => 'delete', + 'path' => $path, + 'newSha1' => '' + ); + } else { // created/updated + $updateEntries[] = array( + 'op' => $this->fileExists( $path, $oPredicates ) ? 'update' : 'create', + 'path' => $path, + 'newSha1' => $nPredicates['sha1'][$path] + ); + } + } + return array_merge( $nullEntries, $updateEntries, $deleteEntries ); + } + + /** + * Check preconditions of the operation without writing anything + * + * @param $predicates Array + * @return Status + */ + final public function precheck( array &$predicates ) { + if ( $this->state !== self::STATE_NEW ) { + return Status::newFatal( 'fileop-fail-state', self::STATE_NEW, $this->state ); + } + $this->state = self::STATE_CHECKED; + $status = $this->doPrecheck( $predicates ); + if ( !$status->isOK() ) { + $this->failed = true; + } + return $status; + } + + /** + * @return Status + */ + protected function doPrecheck( array &$predicates ) { + return Status::newGood(); + } + + /** + * Attempt the operation + * + * @return Status + */ + final public function attempt() { + if ( $this->state !== self::STATE_CHECKED ) { + return Status::newFatal( 'fileop-fail-state', self::STATE_CHECKED, $this->state ); + } elseif ( $this->failed ) { // failed precheck + return Status::newFatal( 'fileop-fail-attempt-precheck' ); + } + $this->state = self::STATE_ATTEMPTED; + $status = $this->doAttempt(); + if ( !$status->isOK() ) { + $this->failed = true; + $this->logFailure( 'attempt' ); + } + return $status; + } + + /** + * @return Status + */ + protected function doAttempt() { + return Status::newGood(); + } + + /** + * Attempt the operation in the background + * + * @return Status + */ + final public function attemptAsync() { + $this->async = true; + $result = $this->attempt(); + $this->async = false; + return $result; + } + + /** + * Get the file operation parameters + * + * @return Array (required params list, optional params list) + */ + protected function allowedParams() { + return array( array(), array() ); + } + + /** + * Adjust params to FileBackendStore internal file calls + * + * @param $params Array + * @return Array (required params list, optional params list) + */ + protected function setFlags( array $params ) { + return array( 'async' => $this->async ) + $params; + } + + /** + * Get a list of storage paths read from for this operation + * + * @return Array + */ + final public function storagePathsRead() { + return array_map( 'FileBackend::normalizeStoragePath', $this->doStoragePathsRead() ); + } + + /** + * @see FileOp::storagePathsRead() + * @return Array + */ + protected function doStoragePathsRead() { + return array(); + } + + /** + * Get a list of storage paths written to for this operation + * + * @return Array + */ + final public function storagePathsChanged() { + return array_map( 'FileBackend::normalizeStoragePath', $this->doStoragePathsChanged() ); + } + + /** + * @see FileOp::storagePathsChanged() + * @return Array + */ + protected function doStoragePathsChanged() { + return array(); + } + + /** + * Check for errors with regards to the destination file already existing. + * This also updates the destSameAsSource and sourceSha1 member variables. + * A bad status will be returned if there is no chance it can be overwritten. + * + * @param $predicates Array + * @return Status + */ + protected function precheckDestExistence( array $predicates ) { + $status = Status::newGood(); + // Get hash of source file/string and the destination file + $this->sourceSha1 = $this->getSourceSha1Base36(); // FS file or data string + if ( $this->sourceSha1 === null ) { // file in storage? + $this->sourceSha1 = $this->fileSha1( $this->params['src'], $predicates ); + } + $this->destSameAsSource = false; + if ( $this->fileExists( $this->params['dst'], $predicates ) ) { + if ( $this->getParam( 'overwrite' ) ) { + return $status; // OK + } elseif ( $this->getParam( 'overwriteSame' ) ) { + $dhash = $this->fileSha1( $this->params['dst'], $predicates ); + // Check if hashes are valid and match each other... + if ( !strlen( $this->sourceSha1 ) || !strlen( $dhash ) ) { + $status->fatal( 'backend-fail-hashes' ); + } elseif ( $this->sourceSha1 !== $dhash ) { + // Give an error if the files are not identical + $status->fatal( 'backend-fail-notsame', $this->params['dst'] ); + } else { + $this->destSameAsSource = true; // OK + } + return $status; // do nothing; either OK or bad status + } else { + $status->fatal( 'backend-fail-alreadyexists', $this->params['dst'] ); + return $status; + } + } + return $status; + } + + /** + * precheckDestExistence() helper function to get the source file SHA-1. + * Subclasses should overwride this iff the source is not in storage. + * + * @return string|bool Returns false on failure + */ + protected function getSourceSha1Base36() { + return null; // N/A + } + + /** + * Check if a file will exist in storage when this operation is attempted + * + * @param $source string Storage path + * @param $predicates Array + * @return bool + */ + final protected function fileExists( $source, array $predicates ) { + if ( isset( $predicates['exists'][$source] ) ) { + return $predicates['exists'][$source]; // previous op assures this + } else { + $params = array( 'src' => $source, 'latest' => $this->useLatest ); + return $this->backend->fileExists( $params ); + } + } + + /** + * Get the SHA-1 of a file in storage when this operation is attempted + * + * @param $source string Storage path + * @param $predicates Array + * @return string|bool False on failure + */ + final protected function fileSha1( $source, array $predicates ) { + if ( isset( $predicates['sha1'][$source] ) ) { + return $predicates['sha1'][$source]; // previous op assures this + } else { + $params = array( 'src' => $source, 'latest' => $this->useLatest ); + return $this->backend->getFileSha1Base36( $params ); + } + } + + /** + * Get the backend this operation is for + * + * @return FileBackendStore + */ + public function getBackend() { + return $this->backend; + } + + /** + * Log a file operation failure and preserve any temp files + * + * @param $action string + * @return void + */ + final public function logFailure( $action ) { + $params = $this->params; + $params['failedAction'] = $action; + try { + wfDebugLog( 'FileOperation', get_class( $this ) . + " failed (batch #{$this->batchId}): " . FormatJson::encode( $params ) ); + } catch ( Exception $e ) { + // bad config? debug log error? + } + } +} + +/** + * Store a file into the backend from a file on the file system. + * Parameters for this operation are outlined in FileBackend::doOperations(). + */ +class StoreFileOp extends FileOp { + /** + * @return array + */ + protected function allowedParams() { + return array( array( 'src', 'dst' ), + array( 'overwrite', 'overwriteSame', 'disposition' ) ); + } + + /** + * @param $predicates array + * @return Status + */ + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists on the file system + if ( !is_file( $this->params['src'] ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if the source file is too big + } elseif ( filesize( $this->params['src'] ) > $this->backend->maxFileSizeInternal() ) { + $status->fatal( 'backend-fail-maxsize', + $this->params['dst'], $this->backend->maxFileSizeInternal() ); + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-usable', $this->params['dst'] ); + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + /** + * @return Status + */ + protected function doAttempt() { + // Store the file at the destination + if ( !$this->destSameAsSource ) { + return $this->backend->storeInternal( $this->setFlags( $this->params ) ); + } + return Status::newGood(); + } + + /** + * @return bool|string + */ + protected function getSourceSha1Base36() { + wfSuppressWarnings(); + $hash = sha1_file( $this->params['src'] ); + wfRestoreWarnings(); + if ( $hash !== false ) { + $hash = wfBaseConvert( $hash, 16, 36, 31 ); + } + return $hash; + } + + protected function doStoragePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Create a file in the backend with the given content. + * Parameters for this operation are outlined in FileBackend::doOperations(). + */ +class CreateFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'content', 'dst' ), + array( 'overwrite', 'overwriteSame', 'disposition' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source data is too big + if ( strlen( $this->getParam( 'content' ) ) > $this->backend->maxFileSizeInternal() ) { + $status->fatal( 'backend-fail-maxsize', + $this->params['dst'], $this->backend->maxFileSizeInternal() ); + $status->fatal( 'backend-fail-create', $this->params['dst'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-usable', $this->params['dst'] ); + $status->fatal( 'backend-fail-create', $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + /** + * @return Status + */ + protected function doAttempt() { + if ( !$this->destSameAsSource ) { + // Create the file at the destination + return $this->backend->createInternal( $this->setFlags( $this->params ) ); + } + return Status::newGood(); + } + + /** + * @return bool|String + */ + protected function getSourceSha1Base36() { + return wfBaseConvert( sha1( $this->params['content'] ), 16, 36, 31 ); + } + + /** + * @return array + */ + protected function doStoragePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Copy a file from one storage path to another in the backend. + * Parameters for this operation are outlined in FileBackend::doOperations(). + */ +class CopyFileOp extends FileOp { + /** + * @return array + */ + protected function allowedParams() { + return array( array( 'src', 'dst' ), + array( 'overwrite', 'overwriteSame', 'disposition' ) ); + } + + /** + * @param $predicates array + * @return Status + */ + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-usable', $this->params['dst'] ); + $status->fatal( 'backend-fail-copy', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + /** + * @return Status + */ + protected function doAttempt() { + // Do nothing if the src/dst paths are the same + if ( $this->params['src'] !== $this->params['dst'] ) { + // Copy the file into the destination + if ( !$this->destSameAsSource ) { + return $this->backend->copyInternal( $this->setFlags( $this->params ) ); + } + } + return Status::newGood(); + } + + /** + * @return array + */ + protected function doStoragePathsRead() { + return array( $this->params['src'] ); + } + + /** + * @return array + */ + protected function doStoragePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Move a file from one storage path to another in the backend. + * Parameters for this operation are outlined in FileBackend::doOperations(). + */ +class MoveFileOp extends FileOp { + /** + * @return array + */ + protected function allowedParams() { + return array( array( 'src', 'dst' ), + array( 'overwrite', 'overwriteSame', 'disposition' ) ); + } + + /** + * @param $predicates array + * @return Status + */ + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-usable', $this->params['dst'] ); + $status->fatal( 'backend-fail-move', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + /** + * @return Status + */ + protected function doAttempt() { + // Do nothing if the src/dst paths are the same + if ( $this->params['src'] !== $this->params['dst'] ) { + if ( !$this->destSameAsSource ) { + // Move the file into the destination + return $this->backend->moveInternal( $this->setFlags( $this->params ) ); + } else { + // Just delete source as the destination needs no changes + $params = array( 'src' => $this->params['src'] ); + return $this->backend->deleteInternal( $this->setFlags( $params ) ); + } + } + return Status::newGood(); + } + + /** + * @return array + */ + protected function doStoragePathsRead() { + return array( $this->params['src'] ); + } + + /** + * @return array + */ + protected function doStoragePathsChanged() { + return array( $this->params['src'], $this->params['dst'] ); + } +} + +/** + * Delete a file at the given storage path from the backend. + * Parameters for this operation are outlined in FileBackend::doOperations(). + */ +class DeleteFileOp extends FileOp { + /** + * @return array + */ + protected function allowedParams() { + return array( array( 'src' ), array( 'ignoreMissingSource' ) ); + } + + protected $needsDelete = true; + + /** + * @param array $predicates + * @return Status + */ + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + if ( !$this->getParam( 'ignoreMissingSource' ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + } + $this->needsDelete = false; + } + // Update file existence predicates + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + return $status; // safe to call attempt() + } + + /** + * @return Status + */ + protected function doAttempt() { + if ( $this->needsDelete ) { + // Delete the source file + return $this->backend->deleteInternal( $this->setFlags( $this->params ) ); + } + return Status::newGood(); + } + + /** + * @return array + */ + protected function doStoragePathsChanged() { + return array( $this->params['src'] ); + } +} + +/** + * Placeholder operation that has no params and does nothing + */ +class NullFileOp extends FileOp {} diff --git a/includes/filebackend/FileOpBatch.php b/includes/filebackend/FileOpBatch.php new file mode 100644 index 00000000..33558725 --- /dev/null +++ b/includes/filebackend/FileOpBatch.php @@ -0,0 +1,240 @@ +<?php +/** + * Helper class for representing batch file operations. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Helper class for representing batch file operations. + * Do not use this class from places outside FileBackend. + * + * Methods should avoid throwing exceptions at all costs. + * + * @ingroup FileBackend + * @since 1.20 + */ +class FileOpBatch { + /* Timeout related parameters */ + const MAX_BATCH_SIZE = 1000; // integer + + /** + * Attempt to perform a series of file operations. + * Callers are responsible for handling file locking. + * + * $opts is an array of options, including: + * - force : Errors that would normally cause a rollback do not. + * The remaining operations are still attempted if any fail. + * - allowStale : Don't require the latest available data. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * - nonJournaled : Don't log this operation batch in the file journal. + * - concurrency : Try to do this many operations in parallel when possible. + * + * The resulting Status will be "OK" unless: + * - a) unexpected operation errors occurred (network partitions, disk full...) + * - b) significant operation errors occurred and 'force' was not set + * + * @param $performOps Array List of FileOp operations + * @param $opts Array Batch operation options + * @param $journal FileJournal Journal to log operations to + * @return Status + */ + public static function attempt( array $performOps, array $opts, FileJournal $journal ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + $n = count( $performOps ); + if ( $n > self::MAX_BATCH_SIZE ) { + $status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE ); + wfProfileOut( __METHOD__ ); + return $status; + } + + $batchId = $journal->getTimestampedUUID(); + $allowStale = !empty( $opts['allowStale'] ); + $ignoreErrors = !empty( $opts['force'] ); + $journaled = empty( $opts['nonJournaled'] ); + $maxConcurrency = isset( $opts['concurrency'] ) ? $opts['concurrency'] : 1; + + $entries = array(); // file journal entry list + $predicates = FileOp::newPredicates(); // account for previous ops in prechecks + $curBatch = array(); // concurrent FileOp sub-batch accumulation + $curBatchDeps = FileOp::newDependencies(); // paths used in FileOp sub-batch + $pPerformOps = array(); // ordered list of concurrent FileOp sub-batches + $lastBackend = null; // last op backend name + // Do pre-checks for each operation; abort on failure... + foreach ( $performOps as $index => $fileOp ) { + $backendName = $fileOp->getBackend()->getName(); + $fileOp->setBatchId( $batchId ); // transaction ID + $fileOp->allowStaleReads( $allowStale ); // consistency level + // Decide if this op can be done concurrently within this sub-batch + // or if a new concurrent sub-batch must be started after this one... + if ( $fileOp->dependsOn( $curBatchDeps ) + || count( $curBatch ) >= $maxConcurrency + || ( $backendName !== $lastBackend && count( $curBatch ) ) + ) { + $pPerformOps[] = $curBatch; // push this batch + $curBatch = array(); // start a new sub-batch + $curBatchDeps = FileOp::newDependencies(); + } + $lastBackend = $backendName; + $curBatch[$index] = $fileOp; // keep index + // Update list of affected paths in this batch + $curBatchDeps = $fileOp->applyDependencies( $curBatchDeps ); + // Simulate performing the operation... + $oldPredicates = $predicates; + $subStatus = $fileOp->precheck( $predicates ); // updates $predicates + $status->merge( $subStatus ); + if ( $subStatus->isOK() ) { + if ( $journaled ) { // journal log entries + $entries = array_merge( $entries, + $fileOp->getJournalEntries( $oldPredicates, $predicates ) ); + } + } else { // operation failed? + $status->success[$index] = false; + ++$status->failCount; + if ( !$ignoreErrors ) { + wfProfileOut( __METHOD__ ); + return $status; // abort + } + } + } + // Push the last sub-batch + if ( count( $curBatch ) ) { + $pPerformOps[] = $curBatch; + } + + // Log the operations in the file journal... + if ( count( $entries ) ) { + $subStatus = $journal->logChangeBatch( $entries, $batchId ); + if ( !$subStatus->isOK() ) { + wfProfileOut( __METHOD__ ); + return $subStatus; // abort + } + } + + if ( $ignoreErrors ) { // treat precheck() fatals as mere warnings + $status->setResult( true, $status->value ); + } + + // Attempt each operation (in parallel if allowed and possible)... + if ( count( $pPerformOps ) < count( $performOps ) ) { + self::runBatchParallel( $pPerformOps, $status ); + } else { + self::runBatchSeries( $performOps, $status ); + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * Attempt a list of file operations in series. + * This will abort remaining ops on failure. + * + * @param $performOps Array + * @param $status Status + * @return bool Success + */ + protected static function runBatchSeries( array $performOps, Status $status ) { + foreach ( $performOps as $index => $fileOp ) { + if ( $fileOp->failed() ) { + continue; // nothing to do + } + $subStatus = $fileOp->attempt(); + $status->merge( $subStatus ); + if ( $subStatus->isOK() ) { + $status->success[$index] = true; + ++$status->successCount; + } else { + $status->success[$index] = false; + ++$status->failCount; + // We can't continue (even with $ignoreErrors) as $predicates is wrong. + // Log the remaining ops as failed for recovery... + for ( $i = ($index + 1); $i < count( $performOps ); $i++ ) { + $performOps[$i]->logFailure( 'attempt_aborted' ); + } + return false; // bail out + } + } + return true; + } + + /** + * Attempt a list of file operations sub-batches in series. + * + * The operations *in* each sub-batch will be done in parallel. + * The caller is responsible for making sure the operations + * within any given sub-batch do not depend on each other. + * This will abort remaining ops on failure. + * + * @param $pPerformOps Array + * @param $status Status + * @return bool Success + */ + protected static function runBatchParallel( array $pPerformOps, Status $status ) { + $aborted = false; + foreach ( $pPerformOps as $performOpsBatch ) { + if ( $aborted ) { // check batch op abort flag... + // We can't continue (even with $ignoreErrors) as $predicates is wrong. + // Log the remaining ops as failed for recovery... + foreach ( $performOpsBatch as $i => $fileOp ) { + $performOpsBatch[$i]->logFailure( 'attempt_aborted' ); + } + continue; + } + $statuses = array(); + $opHandles = array(); + // Get the backend; all sub-batch ops belong to a single backend + $backend = reset( $performOpsBatch )->getBackend(); + // If attemptAsync() returns synchronously, it was either an + // error Status or the backend just doesn't support async ops. + foreach ( $performOpsBatch as $i => $fileOp ) { + if ( !$fileOp->failed() ) { // failed => already has Status + $subStatus = $fileOp->attemptAsync(); + if ( $subStatus->value instanceof FileBackendStoreOpHandle ) { + $opHandles[$i] = $subStatus->value; // deferred + } else { + $statuses[$i] = $subStatus; // done already + } + } + } + // Try to do all the operations concurrently... + $statuses = $statuses + $backend->executeOpHandlesInternal( $opHandles ); + // Marshall and merge all the responses (blocking)... + foreach ( $performOpsBatch as $i => $fileOp ) { + if ( !$fileOp->failed() ) { // failed => already has Status + $subStatus = $statuses[$i]; + $status->merge( $subStatus ); + if ( $subStatus->isOK() ) { + $status->success[$i] = true; + ++$status->successCount; + } else { + $status->success[$i] = false; + ++$status->failCount; + $aborted = true; // set abort flag; we can't continue + } + } + } + } + return $status; + } +} diff --git a/includes/filebackend/SwiftFileBackend.php b/includes/filebackend/SwiftFileBackend.php new file mode 100644 index 00000000..b6f0aa60 --- /dev/null +++ b/includes/filebackend/SwiftFileBackend.php @@ -0,0 +1,1544 @@ +<?php +/** + * OpenStack Swift based file backend. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + * @author Russ Nelson + * @author Aaron Schulz + */ + +/** + * @brief Class for an OpenStack Swift based file backend. + * + * This requires the SwiftCloudFiles MediaWiki extension, which includes + * the php-cloudfiles library (https://github.com/rackspace/php-cloudfiles). + * php-cloudfiles requires the curl, fileinfo, and mb_string PHP extensions. + * + * Status messages should avoid mentioning the Swift account name. + * Likewise, error suppression should be used to avoid path disclosure. + * + * @ingroup FileBackend + * @since 1.19 + */ +class SwiftFileBackend extends FileBackendStore { + /** @var CF_Authentication */ + protected $auth; // Swift authentication handler + protected $authTTL; // integer seconds + protected $swiftAnonUser; // string; username to handle unauthenticated requests + protected $swiftUseCDN; // boolean; whether CloudFiles CDN is enabled + protected $swiftCDNExpiry; // integer; how long to cache things in the CDN + protected $swiftCDNPurgable; // boolean; whether object CDN purging is enabled + + /** @var CF_Connection */ + protected $conn; // Swift connection handle + protected $sessionStarted = 0; // integer UNIX timestamp + + /** @var CloudFilesException */ + protected $connException; + protected $connErrorTime = 0; // UNIX timestamp + + /** @var BagOStuff */ + protected $srvCache; + + /** @var ProcessCacheLRU */ + protected $connContainerCache; // container object cache + + /** + * @see FileBackendStore::__construct() + * Additional $config params include: + * - swiftAuthUrl : Swift authentication server URL + * - swiftUser : Swift user used by MediaWiki (account:username) + * - swiftKey : Swift authentication key for the above user + * - swiftAuthTTL : Swift authentication TTL (seconds) + * - swiftAnonUser : Swift user used for end-user requests (account:username). + * If set, then views of public containers are assumed to go + * through this user. If not set, then public containers are + * accessible to unauthenticated requests via ".r:*" in the ACL. + * - swiftUseCDN : Whether a Cloud Files Content Delivery Network is set up + * - swiftCDNExpiry : How long (in seconds) to store content in the CDN. + * If files may likely change, this should probably not exceed + * a few days. For example, deletions may take this long to apply. + * If object purging is enabled, however, this is not an issue. + * - swiftCDNPurgable : Whether object purge requests are allowed by the CDN. + * - shardViaHashLevels : Map of container names to sharding config with: + * - base : base of hash characters, 16 or 36 + * - levels : the number of hash levels (and digits) + * - repeat : hash subdirectories are prefixed with all the + * parent hash directory names (e.g. "a/ab/abc") + * - cacheAuthInfo : Whether to cache authentication tokens in APC, XCache, ect. + * If those are not available, then the main cache will be used. + * This is probably insecure in shared hosting environments. + */ + public function __construct( array $config ) { + parent::__construct( $config ); + if ( !MWInit::classExists( 'CF_Constants' ) ) { + throw new MWException( 'SwiftCloudFiles extension not installed.' ); + } + // Required settings + $this->auth = new CF_Authentication( + $config['swiftUser'], + $config['swiftKey'], + null, // account; unused + $config['swiftAuthUrl'] + ); + // Optional settings + $this->authTTL = isset( $config['swiftAuthTTL'] ) + ? $config['swiftAuthTTL'] + : 5 * 60; // some sane number + $this->swiftAnonUser = isset( $config['swiftAnonUser'] ) + ? $config['swiftAnonUser'] + : ''; + $this->shardViaHashLevels = isset( $config['shardViaHashLevels'] ) + ? $config['shardViaHashLevels'] + : ''; + $this->swiftUseCDN = isset( $config['swiftUseCDN'] ) + ? $config['swiftUseCDN'] + : false; + $this->swiftCDNExpiry = isset( $config['swiftCDNExpiry'] ) + ? $config['swiftCDNExpiry'] + : 12*3600; // 12 hours is safe (tokens last 24 hours per http://docs.openstack.org) + $this->swiftCDNPurgable = isset( $config['swiftCDNPurgable'] ) + ? $config['swiftCDNPurgable'] + : true; + // Cache container information to mask latency + $this->memCache = wfGetMainCache(); + // Process cache for container info + $this->connContainerCache = new ProcessCacheLRU( 300 ); + // Cache auth token information to avoid RTTs + if ( !empty( $config['cacheAuthInfo'] ) ) { + if ( php_sapi_name() === 'cli' ) { + $this->srvCache = wfGetMainCache(); // preferrably memcached + } else { + try { // look for APC, XCache, WinCache, ect... + $this->srvCache = ObjectCache::newAccelerator( array() ); + } catch ( Exception $e ) {} + } + } + $this->srvCache = $this->srvCache ? $this->srvCache : new EmptyBagOStuff(); + } + + /** + * @see FileBackendStore::resolveContainerPath() + * @return null + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + if ( !mb_check_encoding( $relStoragePath, 'UTF-8' ) ) { // mb_string required by CF + return null; // not UTF-8, makes it hard to use CF and the swift HTTP API + } elseif ( strlen( urlencode( $relStoragePath ) ) > 1024 ) { + return null; // too long for Swift + } + return $relStoragePath; + } + + /** + * @see FileBackendStore::isPathUsableInternal() + * @return bool + */ + public function isPathUsableInternal( $storagePath ) { + list( $container, $rel ) = $this->resolveStoragePathReal( $storagePath ); + if ( $rel === null ) { + return false; // invalid + } + + try { + $this->getContainer( $container ); + return true; // container exists + } catch ( NoSuchContainerException $e ) { + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, null, __METHOD__, array( 'path' => $storagePath ) ); + } + + return false; + } + + /** + * @param $disposition string Content-Disposition header value + * @return string Truncated Content-Disposition header value to meet Swift limits + */ + protected function truncDisp( $disposition ) { + $res = ''; + foreach ( explode( ';', $disposition ) as $part ) { + $part = trim( $part ); + $new = ( $res === '' ) ? $part : "{$res};{$part}"; + if ( strlen( $new ) <= 255 ) { + $res = $new; + } else { + break; // too long; sigh + } + } + return $res; + } + + /** + * @see FileBackendStore::doCreateInternal() + * @return Status + */ + protected function doCreateInternal( array $params ) { + $status = Status::newGood(); + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the destination container and object + try { + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + // (b) Get a SHA-1 hash of the object + $sha1Hash = wfBaseConvert( sha1( $params['content'] ), 16, 36, 31 ); + + // (c) Actually create the object + try { + // Create a fresh CF_Object with no fields preloaded. + // We don't want to preserve headers, metadata, and such. + $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + // Note: metadata keys stored as [Upper case char][[Lower case char]...] + $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + // Manually set the ETag (https://github.com/rackspace/php-cloudfiles/issues/59). + // The MD5 here will be checked within Swift against its own MD5. + $obj->set_etag( md5( $params['content'] ) ); + // Use the same content type as StreamFile for security + $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); + if ( !strlen( $obj->content_type ) ) { // special case + $obj->content_type = 'unknown/unknown'; + } + // Set the Content-Disposition header if requested + if ( isset( $params['disposition'] ) ) { + $obj->headers['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + } + if ( !empty( $params['async'] ) ) { // deferred + $op = $obj->write_async( $params['content'] ); + $status->value = new SwiftFileOpHandle( $this, $params, 'Create', $op ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $status->value->affectedObjects[] = $obj; + } + } else { // actually write the object in Swift + $obj->write( $params['content'] ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->purgeCDNCache( array( $obj ) ); + } + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see SwiftFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseCreate( CF_Async_Op $cfOp, Status $status, array $params ) { + try { + $cfOp->getLastResponse(); + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } + } + + /** + * @see FileBackendStore::doStoreInternal() + * @return Status + */ + protected function doStoreInternal( array $params ) { + $status = Status::newGood(); + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the destination container and object + try { + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + // (b) Get a SHA-1 hash of the object + $sha1Hash = sha1_file( $params['src'] ); + if ( $sha1Hash === false ) { // source doesn't exist? + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } + $sha1Hash = wfBaseConvert( $sha1Hash, 16, 36, 31 ); + + // (c) Actually store the object + try { + // Create a fresh CF_Object with no fields preloaded. + // We don't want to preserve headers, metadata, and such. + $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + // Note: metadata keys stored as [Upper case char][[Lower case char]...] + $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + // The MD5 here will be checked within Swift against its own MD5. + $obj->set_etag( md5_file( $params['src'] ) ); + // Use the same content type as StreamFile for security + $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); + if ( !strlen( $obj->content_type ) ) { // special case + $obj->content_type = 'unknown/unknown'; + } + // Set the Content-Disposition header if requested + if ( isset( $params['disposition'] ) ) { + $obj->headers['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + } + if ( !empty( $params['async'] ) ) { // deferred + wfSuppressWarnings(); + $fp = fopen( $params['src'], 'rb' ); + wfRestoreWarnings(); + if ( !$fp ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } else { + $op = $obj->write_async( $fp, filesize( $params['src'] ), true ); + $status->value = new SwiftFileOpHandle( $this, $params, 'Store', $op ); + $status->value->resourcesToClose[] = $fp; + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $status->value->affectedObjects[] = $obj; + } + } + } else { // actually write the object in Swift + $obj->load_from_filename( $params['src'], true ); // calls $obj->write() + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->purgeCDNCache( array( $obj ) ); + } + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } catch ( IOException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see SwiftFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseStore( CF_Async_Op $cfOp, Status $status, array $params ) { + try { + $cfOp->getLastResponse(); + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } catch ( IOException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } + } + + /** + * @see FileBackendStore::doCopyInternal() + * @return Status + */ + protected function doCopyInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the source/destination containers and destination object + try { + $sContObj = $this->getContainer( $srcCont ); + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + // (b) Actually copy the file to the destination + try { + $dstObj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + $hdrs = array(); // source file headers to override with new values + if ( isset( $params['disposition'] ) ) { + $hdrs['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + } + if ( !empty( $params['async'] ) ) { // deferred + $op = $sContObj->copy_object_to_async( $srcRel, $dContObj, $dstRel, null, $hdrs ); + $status->value = new SwiftFileOpHandle( $this, $params, 'Copy', $op ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $status->value->affectedObjects[] = $dstObj; + } + } else { // actually write the object in Swift + $sContObj->copy_object_to( $srcRel, $dContObj, $dstRel, null, $hdrs ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->purgeCDNCache( array( $dstObj ) ); + } + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( NoSuchObjectException $e ) { // source object does not exist + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see SwiftFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseCopy( CF_Async_Op $cfOp, Status $status, array $params ) { + try { + $cfOp->getLastResponse(); + } catch ( NoSuchObjectException $e ) { // source object does not exist + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } + } + + /** + * @see FileBackendStore::doMoveInternal() + * @return Status + */ + protected function doMoveInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the source/destination containers and destination object + try { + $sContObj = $this->getContainer( $srcCont ); + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + return $status; + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + // (b) Actually move the file to the destination + try { + $srcObj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + $dstObj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + $hdrs = array(); // source file headers to override with new values + if ( isset( $params['disposition'] ) ) { + $hdrs['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + } + if ( !empty( $params['async'] ) ) { // deferred + $op = $sContObj->move_object_to_async( $srcRel, $dContObj, $dstRel, null, $hdrs ); + $status->value = new SwiftFileOpHandle( $this, $params, 'Move', $op ); + $status->value->affectedObjects[] = $srcObj; + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $status->value->affectedObjects[] = $dstObj; + } + } else { // actually write the object in Swift + $sContObj->move_object_to( $srcRel, $dContObj, $dstRel, null, $hdrs ); + $this->purgeCDNCache( array( $srcObj ) ); + if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + $this->purgeCDNCache( array( $dstObj ) ); + } + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( NoSuchObjectException $e ) { // source object does not exist + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see SwiftFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseMove( CF_Async_Op $cfOp, Status $status, array $params ) { + try { + $cfOp->getLastResponse(); + } catch ( NoSuchObjectException $e ) { // source object does not exist + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + } + } + + /** + * @see FileBackendStore::doDeleteInternal() + * @return Status + */ + protected function doDeleteInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + try { + $sContObj = $this->getContainer( $srcCont ); + $srcObj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + if ( !empty( $params['async'] ) ) { // deferred + $op = $sContObj->delete_object_async( $srcRel ); + $status->value = new SwiftFileOpHandle( $this, $params, 'Delete', $op ); + $status->value->affectedObjects[] = $srcObj; + } else { // actually write the object in Swift + $sContObj->delete_object( $srcRel ); + $this->purgeCDNCache( array( $srcObj ) ); + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } catch ( NoSuchObjectException $e ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see SwiftFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseDelete( CF_Async_Op $cfOp, Status $status, array $params ) { + try { + $cfOp->getLastResponse(); + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } catch ( NoSuchObjectException $e ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } + } + } + + /** + * @see FileBackendStore::doPrepareInternal() + * @return Status + */ + protected function doPrepareInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + // (a) Check if container already exists + try { + $contObj = $this->getContainer( $fullCont ); + // NoSuchContainerException not thrown: container must exist + return $status; // already exists + } catch ( NoSuchContainerException $e ) { + // NoSuchContainerException thrown: container does not exist + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + // (b) Create container as needed + try { + $contObj = $this->createContainer( $fullCont ); + if ( !empty( $params['noAccess'] ) ) { + // Make container private to end-users... + $status->merge( $this->doSecureInternal( $fullCont, $dir, $params ) ); + } else { + // Make container public to end-users... + $status->merge( $this->doPublishInternal( $fullCont, $dir, $params ) ); + } + if ( $this->swiftUseCDN ) { // Rackspace style CDN + $contObj->make_public( $this->swiftCDNExpiry ); + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + return $status; + } + + /** + * @see FileBackendStore::doSecureInternal() + * @return Status + */ + protected function doSecureInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + if ( empty( $params['noAccess'] ) ) { + return $status; // nothing to do + } + + // Restrict container from end-users... + try { + // doPrepareInternal() should have been called, + // so the Swift container should already exist... + $contObj = $this->getContainer( $fullCont ); // normally a cache hit + // NoSuchContainerException not thrown: container must exist + + // Make container private to end-users... + $status->merge( $this->setContainerAccess( + $contObj, + array( $this->auth->username ), // read + array( $this->auth->username ) // write + ) ); + if ( $this->swiftUseCDN && $contObj->is_public() ) { // Rackspace style CDN + $contObj->make_private(); + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doPublishInternal() + * @return Status + */ + protected function doPublishInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + // Unrestrict container from end-users... + try { + // doPrepareInternal() should have been called, + // so the Swift container should already exist... + $contObj = $this->getContainer( $fullCont ); // normally a cache hit + // NoSuchContainerException not thrown: container must exist + + // Make container public to end-users... + if ( $this->swiftAnonUser != '' ) { + $status->merge( $this->setContainerAccess( + $contObj, + array( $this->auth->username, $this->swiftAnonUser ), // read + array( $this->auth->username, $this->swiftAnonUser ) // write + ) ); + } else { + $status->merge( $this->setContainerAccess( + $contObj, + array( $this->auth->username, '.r:*' ), // read + array( $this->auth->username ) // write + ) ); + } + if ( $this->swiftUseCDN && !$contObj->is_public() ) { // Rackspace style CDN + $contObj->make_public(); + } + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doCleanInternal() + * @return Status + */ + protected function doCleanInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + // Only containers themselves can be removed, all else is virtual + if ( $dir != '' ) { + return $status; // nothing to do + } + + // (a) Check the container + try { + $contObj = $this->getContainer( $fullCont, true ); + } catch ( NoSuchContainerException $e ) { + return $status; // ok, nothing to do + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + // (b) Delete the container if empty + if ( $contObj->object_count == 0 ) { + try { + $this->deleteContainer( $fullCont ); + } catch ( NoSuchContainerException $e ) { + return $status; // race? + } catch ( NonEmptyContainerException $e ) { + return $status; // race? consistency delay? + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + } + + return $status; + } + + /** + * @see FileBackendStore::doFileExists() + * @return array|bool|null + */ + protected function doGetFileStat( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return false; // invalid storage path + } + + $stat = false; + try { + $contObj = $this->getContainer( $srcCont ); + $srcObj = $contObj->get_object( $srcRel, $this->headersFromParams( $params ) ); + $this->addMissingMetadata( $srcObj, $params['src'] ); + $stat = array( + // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW + 'mtime' => wfTimestamp( TS_MW, $srcObj->last_modified ), + 'size' => (int)$srcObj->content_length, + 'sha1' => $srcObj->metadata['Sha1base36'] + ); + } catch ( NoSuchContainerException $e ) { + } catch ( NoSuchObjectException $e ) { + } catch ( CloudFilesException $e ) { // some other exception? + $stat = null; + $this->handleException( $e, null, __METHOD__, $params ); + } + + return $stat; + } + + /** + * Fill in any missing object metadata and save it to Swift + * + * @param $obj CF_Object + * @param $path string Storage path to object + * @return bool Success + * @throws Exception cloudfiles exceptions + */ + protected function addMissingMetadata( CF_Object $obj, $path ) { + if ( isset( $obj->metadata['Sha1base36'] ) ) { + return true; // nothing to do + } + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + $scopeLockS = $this->getScopedFileLocks( array( $path ), LockManager::LOCK_UW, $status ); + if ( $status->isOK() ) { + # Do not stat the file in getLocalCopy() to avoid infinite loops + $tmpFile = $this->getLocalCopy( array( 'src' => $path, 'latest' => 1, 'nostat' => 1 ) ); + if ( $tmpFile ) { + $hash = $tmpFile->getSha1Base36(); + if ( $hash !== false ) { + $obj->metadata['Sha1base36'] = $hash; + $obj->sync_metadata(); // save to Swift + wfProfileOut( __METHOD__ ); + return true; // success + } + } + } + $obj->metadata['Sha1base36'] = false; + wfProfileOut( __METHOD__ ); + return false; // failed + } + + /** + * @see FileBackend::getFileContents() + * @return bool|null|string + */ + public function getFileContents( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return false; // invalid storage path + } + + if ( !$this->fileExists( $params ) ) { + return null; + } + + $data = false; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + $data = $obj->read( $this->headersFromParams( $params ) ); + } catch ( NoSuchContainerException $e ) { + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, null, __METHOD__, $params ); + } + + return $data; + } + + /** + * @see FileBackendStore::doDirectoryExists() + * @return bool|null + */ + protected function doDirectoryExists( $fullCont, $dir, array $params ) { + try { + $container = $this->getContainer( $fullCont ); + $prefix = ( $dir == '' ) ? null : "{$dir}/"; + return ( count( $container->list_objects( 1, null, $prefix ) ) > 0 ); + } catch ( NoSuchContainerException $e ) { + return false; + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, null, __METHOD__, + array( 'cont' => $fullCont, 'dir' => $dir ) ); + } + + return null; // error + } + + /** + * @see FileBackendStore::getDirectoryListInternal() + * @return SwiftFileBackendDirList + */ + public function getDirectoryListInternal( $fullCont, $dir, array $params ) { + return new SwiftFileBackendDirList( $this, $fullCont, $dir, $params ); + } + + /** + * @see FileBackendStore::getFileListInternal() + * @return SwiftFileBackendFileList + */ + public function getFileListInternal( $fullCont, $dir, array $params ) { + return new SwiftFileBackendFileList( $this, $fullCont, $dir, $params ); + } + + /** + * Do not call this function outside of SwiftFileBackendFileList + * + * @param $fullCont string Resolved container name + * @param $dir string Resolved storage directory with no trailing slash + * @param $after string|null Storage path of file to list items after + * @param $limit integer Max number of items to list + * @param $params Array Includes flag for 'topOnly' + * @return Array List of relative paths of dirs directly under $dir + */ + public function getDirListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { + $dirs = array(); + if ( $after === INF ) { + return $dirs; // nothing more + } + wfProfileIn( __METHOD__ . '-' . $this->name ); + + try { + $container = $this->getContainer( $fullCont ); + $prefix = ( $dir == '' ) ? null : "{$dir}/"; + // Non-recursive: only list dirs right under $dir + if ( !empty( $params['topOnly'] ) ) { + $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); + foreach ( $objects as $object ) { // files and dirs + if ( substr( $object, -1 ) === '/' ) { + $dirs[] = $object; // directories end in '/' + } + } + // Recursive: list all dirs under $dir and its subdirs + } else { + // Get directory from last item of prior page + $lastDir = $this->getParentDir( $after ); // must be first page + $objects = $container->list_objects( $limit, $after, $prefix ); + foreach ( $objects as $object ) { // files + $objectDir = $this->getParentDir( $object ); // directory of object + if ( $objectDir !== false ) { // file has a parent dir + // Swift stores paths in UTF-8, using binary sorting. + // See function "create_container_table" in common/db.py. + // If a directory is not "greater" than the last one, + // then it was already listed by the calling iterator. + if ( strcmp( $objectDir, $lastDir ) > 0 ) { + $pDir = $objectDir; + do { // add dir and all its parent dirs + $dirs[] = "{$pDir}/"; + $pDir = $this->getParentDir( $pDir ); + } while ( $pDir !== false // sanity + && strcmp( $pDir, $lastDir ) > 0 // not done already + && strlen( $pDir ) > strlen( $dir ) // within $dir + ); + } + $lastDir = $objectDir; + } + } + } + if ( count( $objects ) < $limit ) { + $after = INF; // avoid a second RTT + } else { + $after = end( $objects ); // update last item + } + } catch ( NoSuchContainerException $e ) { + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, null, __METHOD__, + array( 'cont' => $fullCont, 'dir' => $dir ) ); + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + return $dirs; + } + + protected function getParentDir( $path ) { + return ( strpos( $path, '/' ) !== false ) ? dirname( $path ) : false; + } + + /** + * Do not call this function outside of SwiftFileBackendFileList + * + * @param $fullCont string Resolved container name + * @param $dir string Resolved storage directory with no trailing slash + * @param $after string|null Storage path of file to list items after + * @param $limit integer Max number of items to list + * @param $params Array Includes flag for 'topOnly' + * @return Array List of relative paths of files under $dir + */ + public function getFileListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { + $files = array(); + if ( $after === INF ) { + return $files; // nothing more + } + wfProfileIn( __METHOD__ . '-' . $this->name ); + + try { + $container = $this->getContainer( $fullCont ); + $prefix = ( $dir == '' ) ? null : "{$dir}/"; + // Non-recursive: only list files right under $dir + if ( !empty( $params['topOnly'] ) ) { // files and dirs + $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); + foreach ( $objects as $object ) { + if ( substr( $object, -1 ) !== '/' ) { + $files[] = $object; // directories end in '/' + } + } + // Recursive: list all files under $dir and its subdirs + } else { // files + $objects = $container->list_objects( $limit, $after, $prefix ); + $files = $objects; + } + if ( count( $objects ) < $limit ) { + $after = INF; // avoid a second RTT + } else { + $after = end( $objects ); // update last item + } + } catch ( NoSuchContainerException $e ) { + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, null, __METHOD__, + array( 'cont' => $fullCont, 'dir' => $dir ) ); + } + + wfProfileOut( __METHOD__ . '-' . $this->name ); + return $files; + } + + /** + * @see FileBackendStore::doGetFileSha1base36() + * @return bool + */ + protected function doGetFileSha1base36( array $params ) { + $stat = $this->getFileStat( $params ); + if ( $stat ) { + return $stat['sha1']; + } else { + return false; + } + } + + /** + * @see FileBackendStore::doStreamFile() + * @return Status + */ + protected function doStreamFile( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + } + + try { + $cont = $this->getContainer( $srcCont ); + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + return $status; + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + return $status; + } + + try { + $output = fopen( 'php://output', 'wb' ); + $obj = new CF_Object( $cont, $srcRel, false, false ); // skip HEAD + $obj->stream( $output, $this->headersFromParams( $params ) ); + } catch ( NoSuchObjectException $e ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::getLocalCopy() + * @return null|TempFSFile + */ + public function getLocalCopy( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return null; + } + + // Blindly create a tmp file and stream to it, catching any exception if the file does + // not exist. Also, doing a stat here will cause infinite loops when filling metadata. + $tmpFile = null; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + // Get source file extension + $ext = FileBackend::extensionFromPath( $srcRel ); + // Create a new temporary file... + $tmpFile = TempFSFile::factory( 'localcopy_', $ext ); + if ( $tmpFile ) { + $handle = fopen( $tmpFile->getPath(), 'wb' ); + if ( $handle ) { + $obj->stream( $handle, $this->headersFromParams( $params ) ); + fclose( $handle ); + } else { + $tmpFile = null; // couldn't open temp file + } + } + } catch ( NoSuchContainerException $e ) { + $tmpFile = null; + } catch ( NoSuchObjectException $e ) { + $tmpFile = null; + } catch ( CloudFilesException $e ) { // some other exception? + $tmpFile = null; + $this->handleException( $e, null, __METHOD__, $params ); + } + + return $tmpFile; + } + + /** + * @see FileBackendStore::directoriesAreVirtual() + * @return bool + */ + protected function directoriesAreVirtual() { + return true; + } + + /** + * Get headers to send to Swift when reading a file based + * on a FileBackend params array, e.g. that of getLocalCopy(). + * $params is currently only checked for a 'latest' flag. + * + * @param $params Array + * @return Array + */ + protected function headersFromParams( array $params ) { + $hdrs = array(); + if ( !empty( $params['latest'] ) ) { + $hdrs[] = 'X-Newest: true'; + } + return $hdrs; + } + + /** + * @see FileBackendStore::doExecuteOpHandlesInternal() + * @return Array List of corresponding Status objects + */ + protected function doExecuteOpHandlesInternal( array $fileOpHandles ) { + $statuses = array(); + + $cfOps = array(); // list of CF_Async_Op objects + foreach ( $fileOpHandles as $index => $fileOpHandle ) { + $cfOps[$index] = $fileOpHandle->cfOp; + } + $batch = new CF_Async_Op_Batch( $cfOps ); + + $cfOps = $batch->execute(); + foreach ( $cfOps as $index => $cfOp ) { + $status = Status::newGood(); + try { // catch exceptions; update status + $function = '_getResponse' . $fileOpHandles[$index]->call; + $this->$function( $cfOp, $status, $fileOpHandles[$index]->params ); + $this->purgeCDNCache( $fileOpHandles[$index]->affectedObjects ); + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, + __CLASS__ . ":$function", $fileOpHandles[$index]->params ); + } + $statuses[$index] = $status; + } + + return $statuses; + } + + /** + * Set read/write permissions for a Swift container. + * + * $readGrps is a list of the possible criteria for a request to have + * access to read a container. Each item is one of the following formats: + * - account:user : Grants access if the request is by the given user + * - .r:<regex> : Grants access if the request is from a referrer host that + * matches the expression and the request is not for a listing. + * Setting this to '*' effectively makes a container public. + * - .rlistings:<regex> : Grants access if the request is from a referrer host that + * matches the expression and the request for a listing. + * + * $writeGrps is a list of the possible criteria for a request to have + * access to write to a container. Each item is of the following format: + * - account:user : Grants access if the request is by the given user + * + * @see http://swift.openstack.org/misc.html#acls + * + * In general, we don't allow listings to end-users. It's not useful, isn't well-defined + * (lists are truncated to 10000 item with no way to page), and is just a performance risk. + * + * @param $contObj CF_Container Swift container + * @param $readGrps Array List of read access routes + * @param $writeGrps Array List of write access routes + * @return Status + */ + protected function setContainerAccess( + CF_Container $contObj, array $readGrps, array $writeGrps + ) { + $creds = $contObj->cfs_auth->export_credentials(); + + $url = $creds['storage_url'] . '/' . rawurlencode( $contObj->name ); + + // Note: 10 second timeout consistent with php-cloudfiles + $req = MWHttpRequest::factory( $url, array( 'method' => 'POST', 'timeout' => 10 ) ); + $req->setHeader( 'X-Auth-Token', $creds['auth_token'] ); + $req->setHeader( 'X-Container-Read', implode( ',', $readGrps ) ); + $req->setHeader( 'X-Container-Write', implode( ',', $writeGrps ) ); + + return $req->execute(); // should return 204 + } + + /** + * Purge the CDN cache of affected objects if CDN caching is enabled. + * This is for Rackspace/Akamai CDNs. + * + * @param $objects Array List of CF_Object items + * @return void + */ + public function purgeCDNCache( array $objects ) { + if ( $this->swiftUseCDN && $this->swiftCDNPurgable ) { + foreach ( $objects as $object ) { + try { + $object->purge_from_cdn(); + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( CloudFilesException $e ) { + $this->handleException( $e, null, __METHOD__, + array( 'cont' => $object->container->name, 'obj' => $object->name ) ); + } + } + } + } + + /** + * Get an authenticated connection handle to the Swift proxy + * + * @return CF_Connection|bool False on failure + * @throws CloudFilesException + */ + protected function getConnection() { + if ( $this->connException instanceof CloudFilesException ) { + if ( ( time() - $this->connErrorTime ) < 60 ) { + throw $this->connException; // failed last attempt; don't bother + } else { // actually retry this time + $this->connException = null; + $this->connErrorTime = 0; + } + } + // Session keys expire after a while, so we renew them periodically + $reAuth = ( ( time() - $this->sessionStarted ) > $this->authTTL ); + // Authenticate with proxy and get a session key... + if ( !$this->conn || $reAuth ) { + $this->sessionStarted = 0; + $this->connContainerCache->clear(); + $cacheKey = $this->getCredsCacheKey( $this->auth->username ); + $creds = $this->srvCache->get( $cacheKey ); // credentials + if ( is_array( $creds ) ) { // cache hit + $this->auth->load_cached_credentials( + $creds['auth_token'], $creds['storage_url'], $creds['cdnm_url'] ); + $this->sessionStarted = time() - ceil( $this->authTTL/2 ); // skew for worst case + } else { // cache miss + try { + $this->auth->authenticate(); + $creds = $this->auth->export_credentials(); + $this->srvCache->add( $cacheKey, $creds, ceil( $this->authTTL/2 ) ); // cache + $this->sessionStarted = time(); + } catch ( CloudFilesException $e ) { + $this->connException = $e; // don't keep re-trying + $this->connErrorTime = time(); + throw $e; // throw it back + } + } + if ( $this->conn ) { // re-authorizing? + $this->conn->close(); // close active cURL handles in CF_Http object + } + $this->conn = new CF_Connection( $this->auth ); + } + return $this->conn; + } + + /** + * Close the connection to the Swift proxy + * + * @return void + */ + protected function closeConnection() { + if ( $this->conn ) { + $this->conn->close(); // close active cURL handles in CF_Http object + $this->sessionStarted = 0; + $this->connContainerCache->clear(); + } + } + + /** + * Get the cache key for a container + * + * @param $username string + * @return string + */ + private function getCredsCacheKey( $username ) { + return wfMemcKey( 'backend', $this->getName(), 'usercreds', $username ); + } + + /** + * @see FileBackendStore::doClearCache() + */ + protected function doClearCache( array $paths = null ) { + $this->connContainerCache->clear(); // clear container object cache + } + + /** + * Get a Swift container object, possibly from process cache. + * Use $reCache if the file count or byte count is needed. + * + * @param $container string Container name + * @param $bypassCache bool Bypass all caches and load from Swift + * @return CF_Container + * @throws CloudFilesException + */ + protected function getContainer( $container, $bypassCache = false ) { + $conn = $this->getConnection(); // Swift proxy connection + if ( $bypassCache ) { // purge cache + $this->connContainerCache->clear( $container ); + } elseif ( !$this->connContainerCache->has( $container, 'obj' ) ) { + $this->primeContainerCache( array( $container ) ); // check persistent cache + } + if ( !$this->connContainerCache->has( $container, 'obj' ) ) { + $contObj = $conn->get_container( $container ); + // NoSuchContainerException not thrown: container must exist + $this->connContainerCache->set( $container, 'obj', $contObj ); // cache it + if ( !$bypassCache ) { + $this->setContainerCache( $container, // update persistent cache + array( 'bytes' => $contObj->bytes_used, 'count' => $contObj->object_count ) + ); + } + } + return $this->connContainerCache->get( $container, 'obj' ); + } + + /** + * Create a Swift container + * + * @param $container string Container name + * @return CF_Container + * @throws CloudFilesException + */ + protected function createContainer( $container ) { + $conn = $this->getConnection(); // Swift proxy connection + $contObj = $conn->create_container( $container ); + $this->connContainerCache->set( $container, 'obj', $contObj ); // cache + return $contObj; + } + + /** + * Delete a Swift container + * + * @param $container string Container name + * @return void + * @throws CloudFilesException + */ + protected function deleteContainer( $container ) { + $conn = $this->getConnection(); // Swift proxy connection + $this->connContainerCache->clear( $container ); // purge + $conn->delete_container( $container ); + } + + /** + * @see FileBackendStore::doPrimeContainerCache() + * @return void + */ + protected function doPrimeContainerCache( array $containerInfo ) { + try { + $conn = $this->getConnection(); // Swift proxy connection + foreach ( $containerInfo as $container => $info ) { + $contObj = new CF_Container( $conn->cfs_auth, $conn->cfs_http, + $container, $info['count'], $info['bytes'] ); + $this->connContainerCache->set( $container, 'obj', $contObj ); + } + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, null, __METHOD__, array() ); + } + } + + /** + * Log an unexpected exception for this backend. + * This also sets the Status object to have a fatal error. + * + * @param $e Exception + * @param $status Status|null + * @param $func string + * @param $params Array + * @return void + */ + protected function handleException( Exception $e, $status, $func, array $params ) { + if ( $status instanceof Status ) { + if ( $e instanceof AuthenticationException ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } else { + $status->fatal( 'backend-fail-internal', $this->name ); + } + } + if ( $e->getMessage() ) { + trigger_error( "$func: " . $e->getMessage(), E_USER_WARNING ); + } + if ( $e instanceof InvalidResponseException ) { // possibly a stale token + $this->srvCache->delete( $this->getCredsCacheKey( $this->auth->username ) ); + $this->closeConnection(); // force a re-connect and re-auth next time + } + wfDebugLog( 'SwiftBackend', + get_class( $e ) . " in '{$func}' (given '" . FormatJson::encode( $params ) . "')" . + ( $e->getMessage() ? ": {$e->getMessage()}" : "" ) + ); + } +} + +/** + * @see FileBackendStoreOpHandle + */ +class SwiftFileOpHandle extends FileBackendStoreOpHandle { + /** @var CF_Async_Op */ + public $cfOp; + /** @var Array */ + public $affectedObjects = array(); + + public function __construct( $backend, array $params, $call, CF_Async_Op $cfOp ) { + $this->backend = $backend; + $this->params = $params; + $this->call = $call; + $this->cfOp = $cfOp; + } +} + +/** + * SwiftFileBackend helper class to page through listings. + * Swift also has a listing limit of 10,000 objects for sanity. + * Do not use this class from places outside SwiftFileBackend. + * + * @ingroup FileBackend + */ +abstract class SwiftFileBackendList implements Iterator { + /** @var Array */ + protected $bufferIter = array(); + protected $bufferAfter = null; // string; list items *after* this path + protected $pos = 0; // integer + /** @var Array */ + protected $params = array(); + + /** @var SwiftFileBackend */ + protected $backend; + protected $container; // string; container name + protected $dir; // string; storage directory + protected $suffixStart; // integer + + const PAGE_SIZE = 9000; // file listing buffer size + + /** + * @param $backend SwiftFileBackend + * @param $fullCont string Resolved container name + * @param $dir string Resolved directory relative to container + * @param $params Array + */ + public function __construct( SwiftFileBackend $backend, $fullCont, $dir, array $params ) { + $this->backend = $backend; + $this->container = $fullCont; + $this->dir = $dir; + if ( substr( $this->dir, -1 ) === '/' ) { + $this->dir = substr( $this->dir, 0, -1 ); // remove trailing slash + } + if ( $this->dir == '' ) { // whole container + $this->suffixStart = 0; + } else { // dir within container + $this->suffixStart = strlen( $this->dir ) + 1; // size of "path/to/dir/" + } + $this->params = $params; + } + + /** + * @see Iterator::key() + * @return integer + */ + public function key() { + return $this->pos; + } + + /** + * @see Iterator::next() + * @return void + */ + public function next() { + // Advance to the next file in the page + next( $this->bufferIter ); + ++$this->pos; + // Check if there are no files left in this page and + // advance to the next page if this page was not empty. + if ( !$this->valid() && count( $this->bufferIter ) ) { + $this->bufferIter = $this->pageFromList( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE, $this->params + ); // updates $this->bufferAfter + } + } + + /** + * @see Iterator::rewind() + * @return void + */ + public function rewind() { + $this->pos = 0; + $this->bufferAfter = null; + $this->bufferIter = $this->pageFromList( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE, $this->params + ); // updates $this->bufferAfter + } + + /** + * @see Iterator::valid() + * @return bool + */ + public function valid() { + if ( $this->bufferIter === null ) { + return false; // some failure? + } else { + return ( current( $this->bufferIter ) !== false ); // no paths can have this value + } + } + + /** + * Get the given list portion (page) + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $after string|null + * @param $limit integer + * @param $params Array + * @return Traversable|Array|null Returns null on failure + */ + abstract protected function pageFromList( $container, $dir, &$after, $limit, array $params ); +} + +/** + * Iterator for listing directories + */ +class SwiftFileBackendDirList extends SwiftFileBackendList { + /** + * @see Iterator::current() + * @return string|bool String (relative path) or false + */ + public function current() { + return substr( current( $this->bufferIter ), $this->suffixStart, -1 ); + } + + /** + * @see SwiftFileBackendList::pageFromList() + * @return Array|null + */ + protected function pageFromList( $container, $dir, &$after, $limit, array $params ) { + return $this->backend->getDirListPageInternal( $container, $dir, $after, $limit, $params ); + } +} + +/** + * Iterator for listing regular files + */ +class SwiftFileBackendFileList extends SwiftFileBackendList { + /** + * @see Iterator::current() + * @return string|bool String (relative path) or false + */ + public function current() { + return substr( current( $this->bufferIter ), $this->suffixStart ); + } + + /** + * @see SwiftFileBackendList::pageFromList() + * @return Array|null + */ + protected function pageFromList( $container, $dir, &$after, $limit, array $params ) { + return $this->backend->getFileListPageInternal( $container, $dir, $after, $limit, $params ); + } +} diff --git a/includes/filebackend/TempFSFile.php b/includes/filebackend/TempFSFile.php new file mode 100644 index 00000000..5032bf68 --- /dev/null +++ b/includes/filebackend/TempFSFile.php @@ -0,0 +1,121 @@ +<?php +/** + * Location holder of files stored temporarily + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileBackend + */ + +/** + * This class is used to hold the location and do limited manipulation + * of files stored temporarily (this will be whatever wfTempDir() returns) + * + * @ingroup FileBackend + */ +class TempFSFile extends FSFile { + protected $canDelete = false; // bool; garbage collect the temp file + + /** @var Array of active temp files to purge on shutdown */ + protected static $instances = array(); + + /** + * Make a new temporary file on the file system. + * Temporary files may be purged when the file object falls out of scope. + * + * @param $prefix string + * @param $extension string + * @return TempFSFile|null + */ + public static function factory( $prefix, $extension = '' ) { + wfProfileIn( __METHOD__ ); + $base = wfTempDir() . '/' . $prefix . wfRandomString( 12 ); + $ext = ( $extension != '' ) ? ".{$extension}" : ""; + for ( $attempt = 1; true; $attempt++ ) { + $path = "{$base}-{$attempt}{$ext}"; + wfSuppressWarnings(); + $newFileHandle = fopen( $path, 'x' ); + wfRestoreWarnings(); + if ( $newFileHandle ) { + fclose( $newFileHandle ); + break; // got it + } + if ( $attempt >= 5 ) { + wfProfileOut( __METHOD__ ); + return null; // give up + } + } + $tmpFile = new self( $path ); + $tmpFile->canDelete = true; // safely instantiated + wfProfileOut( __METHOD__ ); + return $tmpFile; + } + + /** + * Purge this file off the file system + * + * @return bool Success + */ + public function purge() { + $this->canDelete = false; // done + wfSuppressWarnings(); + $ok = unlink( $this->path ); + wfRestoreWarnings(); + return $ok; + } + + /** + * Clean up the temporary file only after an object goes out of scope + * + * @param $object Object + * @return void + */ + public function bind( $object ) { + if ( is_object( $object ) ) { + $object->tempFSFileReferences[] = $this; + } + } + + /** + * Set flag to not clean up after the temporary file + * + * @return void + */ + public function preserve() { + $this->canDelete = false; + } + + /** + * Set flag clean up after the temporary file + * + * @return void + */ + public function autocollect() { + $this->canDelete = true; + } + + /** + * Cleans up after the temporary file by deleting it + */ + function __destruct() { + if ( $this->canDelete ) { + wfSuppressWarnings(); + unlink( $this->path ); + wfRestoreWarnings(); + } + } +} diff --git a/includes/filebackend/filejournal/DBFileJournal.php b/includes/filebackend/filejournal/DBFileJournal.php new file mode 100644 index 00000000..f6268c25 --- /dev/null +++ b/includes/filebackend/filejournal/DBFileJournal.php @@ -0,0 +1,152 @@ +<?php +/** + * Version of FileJournal that logs to a DB table. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileJournal + * @author Aaron Schulz + */ + +/** + * Version of FileJournal that logs to a DB table + * @since 1.20 + */ +class DBFileJournal extends FileJournal { + /** @var DatabaseBase */ + protected $dbw; + + protected $wiki = false; // string; wiki DB name + + /** + * Construct a new instance from configuration. + * $config includes: + * 'wiki' : wiki name to use for LoadBalancer + * + * @param $config Array + */ + protected function __construct( array $config ) { + parent::__construct( $config ); + + $this->wiki = $config['wiki']; + } + + /** + * @see FileJournal::logChangeBatch() + * @return Status + */ + protected function doLogChangeBatch( array $entries, $batchId ) { + $status = Status::newGood(); + + try { + $dbw = $this->getMasterDB(); + } catch ( DBError $e ) { + $status->fatal( 'filejournal-fail-dbconnect', $this->backend ); + return $status; + } + + $now = wfTimestamp( TS_UNIX ); + + $data = array(); + foreach ( $entries as $entry ) { + $data[] = array( + 'fj_batch_uuid' => $batchId, + 'fj_backend' => $this->backend, + 'fj_op' => $entry['op'], + 'fj_path' => $entry['path'], + 'fj_new_sha1' => $entry['newSha1'], + 'fj_timestamp' => $dbw->timestamp( $now ) + ); + } + + try { + $dbw->insert( 'filejournal', $data, __METHOD__ ); + } catch ( DBError $e ) { + $status->fatal( 'filejournal-fail-dbquery', $this->backend ); + return $status; + } + + return $status; + } + + /** + * @see FileJournal::doGetChangeEntries() + * @return Array + * @throws DBError + */ + protected function doGetChangeEntries( $start, $limit ) { + $dbw = $this->getMasterDB(); + + $res = $dbw->select( 'filejournal', '*', + array( + 'fj_backend' => $this->backend, + 'fj_id >= ' . $dbw->addQuotes( (int)$start ) ), // $start may be 0 + __METHOD__, + array_merge( array( 'ORDER BY' => 'fj_id ASC' ), + $limit ? array( 'LIMIT' => $limit ) : array() ) + ); + + $entries = array(); + foreach ( $res as $row ) { + $item = array(); + foreach ( (array)$row as $key => $value ) { + $item[substr( $key, 3 )] = $value; // "fj_op" => "op" + } + $entries[] = $item; + } + + return $entries; + } + + /** + * @see FileJournal::purgeOldLogs() + * @return Status + * @throws DBError + */ + protected function doPurgeOldLogs() { + $status = Status::newGood(); + if ( $this->ttlDays <= 0 ) { + return $status; // nothing to do + } + + $dbw = $this->getMasterDB(); + $dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays ); + + $dbw->delete( 'filejournal', + array( 'fj_timestamp < ' . $dbw->addQuotes( $dbCutoff ) ), + __METHOD__ + ); + + return $status; + } + + /** + * Get a master connection to the logging DB + * + * @return DatabaseBase + * @throws DBError + */ + protected function getMasterDB() { + if ( !$this->dbw ) { + // Get a separate connection in autocommit mode + $lb = wfGetLBFactory()->newMainLB(); + $this->dbw = $lb->getConnection( DB_MASTER, array(), $this->wiki ); + $this->dbw->clearFlag( DBO_TRX ); + } + return $this->dbw; + } +} diff --git a/includes/filebackend/filejournal/FileJournal.php b/includes/filebackend/filejournal/FileJournal.php new file mode 100644 index 00000000..ce029bbe --- /dev/null +++ b/includes/filebackend/filejournal/FileJournal.php @@ -0,0 +1,196 @@ +<?php +/** + * @defgroup FileJournal File journal + * @ingroup FileBackend + */ + +/** + * File operation journaling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup FileJournal + * @author Aaron Schulz + */ + +/** + * @brief Class for handling file operation journaling. + * + * Subclasses should avoid throwing exceptions at all costs. + * + * @ingroup FileJournal + * @since 1.20 + */ +abstract class FileJournal { + protected $backend; // string + protected $ttlDays; // integer + + /** + * Construct a new instance from configuration. + * $config includes: + * 'ttlDays' : days to keep log entries around (false means "forever") + * + * @param $config Array + */ + protected function __construct( array $config ) { + $this->ttlDays = isset( $config['ttlDays'] ) ? $config['ttlDays'] : false; + } + + /** + * Create an appropriate FileJournal object from config + * + * @param $config Array + * @param $backend string A registered file backend name + * @throws MWException + * @return FileJournal + */ + final public static function factory( array $config, $backend ) { + $class = $config['class']; + $jrn = new $class( $config ); + if ( !$jrn instanceof self ) { + throw new MWException( "Class given is not an instance of FileJournal." ); + } + $jrn->backend = $backend; + return $jrn; + } + + /** + * Get a statistically unique ID string + * + * @return string <9 char TS_MW timestamp in base 36><22 random base 36 chars> + */ + final public function getTimestampedUUID() { + $s = ''; + for ( $i = 0; $i < 5; $i++ ) { + $s .= mt_rand( 0, 2147483647 ); + } + $s = wfBaseConvert( sha1( $s ), 16, 36, 31 ); + return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) . $s, 0, 31 ); + } + + /** + * Log changes made by a batch file operation. + * $entries is an array of log entries, each of which contains: + * op : Basic operation name (create, store, copy, delete) + * path : The storage path of the file + * newSha1 : The final base 36 SHA-1 of the file + * Note that 'false' should be used as the SHA-1 for non-existing files. + * + * @param $entries Array List of file operations (each an array of parameters) + * @param $batchId string UUID string that identifies the operation batch + * @return Status + */ + final public function logChangeBatch( array $entries, $batchId ) { + if ( !count( $entries ) ) { + return Status::newGood(); + } + return $this->doLogChangeBatch( $entries, $batchId ); + } + + /** + * @see FileJournal::logChangeBatch() + * + * @param $entries Array List of file operations (each an array of parameters) + * @param $batchId string UUID string that identifies the operation batch + * @return Status + */ + abstract protected function doLogChangeBatch( array $entries, $batchId ); + + /** + * Get an array of file change log entries. + * A starting change ID and/or limit can be specified. + * + * The result as a list of associative arrays, each having: + * id : unique, monotonic, ID for this change + * batch_uuid : UUID for an operation batch + * backend : the backend name + * op : primitive operation (create,update,delete,null) + * path : affected storage path + * new_sha1 : base 36 sha1 of the new file had the operation succeeded + * timestamp : TS_MW timestamp of the batch change + + * Also, $next is updated to the ID of the next entry. + * + * @param $start integer Starting change ID or null + * @param $limit integer Maximum number of items to return + * @param &$next string + * @return Array + */ + final public function getChangeEntries( $start = null, $limit = 0, &$next = null ) { + $entries = $this->doGetChangeEntries( $start, $limit ? $limit + 1 : 0 ); + if ( $limit && count( $entries ) > $limit ) { + $last = array_pop( $entries ); // remove the extra entry + $next = $last['id']; // update for next call + } else { + $next = null; // end of list + } + return $entries; + } + + /** + * @see FileJournal::getChangeEntries() + * @return Array + */ + abstract protected function doGetChangeEntries( $start, $limit ); + + /** + * Purge any old log entries + * + * @return Status + */ + final public function purgeOldLogs() { + return $this->doPurgeOldLogs(); + } + + /** + * @see FileJournal::purgeOldLogs() + * @return Status + */ + abstract protected function doPurgeOldLogs(); +} + +/** + * Simple version of FileJournal that does nothing + * @since 1.20 + */ +class NullFileJournal extends FileJournal { + /** + * @see FileJournal::logChangeBatch() + * @param $entries array + * @param $batchId string + * @return Status + */ + protected function doLogChangeBatch( array $entries, $batchId ) { + return Status::newGood(); + } + + /** + * @see FileJournal::doGetChangeEntries() + * @return Array + */ + protected function doGetChangeEntries( $start, $limit ) { + return array(); + } + + /** + * @see FileJournal::purgeOldLogs() + * @return Status + */ + protected function doPurgeOldLogs() { + return Status::newGood(); + } +} diff --git a/includes/filebackend/lockmanager/DBLockManager.php b/includes/filebackend/lockmanager/DBLockManager.php new file mode 100644 index 00000000..a8fe258b --- /dev/null +++ b/includes/filebackend/lockmanager/DBLockManager.php @@ -0,0 +1,374 @@ +<?php +/** + * Version of LockManager based on using DB table locks. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + */ + +/** + * Version of LockManager based on using DB table locks. + * This is meant for multi-wiki systems that may share files. + * All locks are blocking, so it might be useful to set a small + * lock-wait timeout via server config to curtail deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map + * to one bucket. Each bucket maps to one or several peer DBs, each on their + * own server, all having the filelocks.sql tables (with row-level locking). + * A majority of peer DBs must agree for a lock to be acquired. + * + * Caching is used to avoid hitting servers that are down. + * + * @ingroup LockManager + * @since 1.19 + */ +class DBLockManager extends QuorumLockManager { + /** @var Array Map of DB names to server config */ + protected $dbServers; // (DB name => server config array) + /** @var BagOStuff */ + protected $statusCache; + + protected $lockExpiry; // integer number of seconds + protected $safeDelay; // integer number of seconds + + protected $session = 0; // random integer + /** @var Array Map Database connections (DB name => Database) */ + protected $conns = array(); + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * - dbServers : Associative array of DB names to server configuration. + * Configuration is an associative array that includes: + * - host : DB server name + * - dbname : DB name + * - type : DB type (mysql,postgres,...) + * - user : DB user + * - password : DB user password + * - tablePrefix : DB table prefix + * - flags : DB flags (see DatabaseBase) + * - dbsByBucket : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of DB names (peers) as values. + * Any DB named 'localDBMaster' will automatically use the DB master + * settings for this wiki (without the need for a dbServers entry). + * - lockExpiry : Lock timeout (seconds) for dropped connections. [optional] + * This tells the DB server how long to wait before assuming + * connection failure and releasing all the locks for a session. + * + * @param Array $config + */ + public function __construct( array $config ) { + parent::__construct( $config ); + + $this->dbServers = isset( $config['dbServers'] ) + ? $config['dbServers'] + : array(); // likely just using 'localDBMaster' + // Sanitize srvsByBucket config to prevent PHP errors + $this->srvsByBucket = array_filter( $config['dbsByBucket'], 'is_array' ); + $this->srvsByBucket = array_values( $this->srvsByBucket ); // consecutive + + if ( isset( $config['lockExpiry'] ) ) { + $this->lockExpiry = $config['lockExpiry']; + } else { + $met = ini_get( 'max_execution_time' ); + $this->lockExpiry = $met ? $met : 60; // use some sane amount if 0 + } + $this->safeDelay = ( $this->lockExpiry <= 0 ) + ? 60 // pick a safe-ish number to match DB timeout default + : $this->lockExpiry; // cover worst case + + foreach ( $this->srvsByBucket as $bucket ) { + if ( count( $bucket ) > 1 ) { // multiple peers + // Tracks peers that couldn't be queried recently to avoid lengthy + // connection timeouts. This is useless if each bucket has one peer. + try { + $this->statusCache = ObjectCache::newAccelerator( array() ); + } catch ( MWException $e ) { + trigger_error( __CLASS__ . + " using multiple DB peers without apc, xcache, or wincache." ); + } + break; + } + } + + $this->session = wfRandomString( 31 ); + } + + /** + * Get a connection to a lock DB and acquire locks on $paths. + * This does not use GET_LOCK() per http://bugs.mysql.com/bug.php?id=1118. + * + * @see QuorumLockManager::getLocksOnServer() + * @return Status + */ + protected function getLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + if ( $type == self::LOCK_EX ) { // writer locks + try { + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fle_key' => $key ); + } + # Wait on any existing writers and block new ones if we get in + $db = $this->getConnection( $lockSrv ); // checked in isServerUp() + $db->insert( 'filelocks_exclusive', $data, __METHOD__ ); + } catch ( DBError $e ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } + } + + return $status; + } + + /** + * @see QuorumLockManager::freeLocksOnServer() + * @return Status + */ + protected function freeLocksOnServer( $lockSrv, array $paths, $type ) { + return Status::newGood(); // not supported + } + + /** + * @see QuorumLockManager::releaseAllLocks() + * @return Status + */ + protected function releaseAllLocks() { + $status = Status::newGood(); + + foreach ( $this->conns as $lockDb => $db ) { + if ( $db->trxLevel() ) { // in transaction + try { + $db->rollback( __METHOD__ ); // finish transaction and kill any rows + } catch ( DBError $e ) { + $status->fatal( 'lockmanager-fail-db-release', $lockDb ); + } + } + } + + return $status; + } + + /** + * @see QuorumLockManager::isServerUp() + * @return bool + */ + protected function isServerUp( $lockSrv ) { + if ( !$this->cacheCheckFailures( $lockSrv ) ) { + return false; // recent failure to connect + } + try { + $this->getConnection( $lockSrv ); + } catch ( DBError $e ) { + $this->cacheRecordFailure( $lockSrv ); + return false; // failed to connect + } + return true; + } + + /** + * Get (or reuse) a connection to a lock DB + * + * @param $lockDb string + * @return DatabaseBase + * @throws DBError + */ + protected function getConnection( $lockDb ) { + if ( !isset( $this->conns[$lockDb] ) ) { + $db = null; + if ( $lockDb === 'localDBMaster' ) { + $lb = wfGetLBFactory()->newMainLB(); + $db = $lb->getConnection( DB_MASTER ); + } elseif ( isset( $this->dbServers[$lockDb] ) ) { + $config = $this->dbServers[$lockDb]; + $db = DatabaseBase::factory( $config['type'], $config ); + } + if ( !$db ) { + return null; // config error? + } + $this->conns[$lockDb] = $db; + $this->conns[$lockDb]->clearFlag( DBO_TRX ); + # If the connection drops, try to avoid letting the DB rollback + # and release the locks before the file operations are finished. + # This won't handle the case of DB server restarts however. + $options = array(); + if ( $this->lockExpiry > 0 ) { + $options['connTimeout'] = $this->lockExpiry; + } + $this->conns[$lockDb]->setSessionOptions( $options ); + $this->initConnection( $lockDb, $this->conns[$lockDb] ); + } + if ( !$this->conns[$lockDb]->trxLevel() ) { + $this->conns[$lockDb]->begin( __METHOD__ ); // start transaction + } + return $this->conns[$lockDb]; + } + + /** + * Do additional initialization for new lock DB connection + * + * @param $lockDb string + * @param $db DatabaseBase + * @return void + * @throws DBError + */ + protected function initConnection( $lockDb, DatabaseBase $db ) {} + + /** + * Checks if the DB has not recently had connection/query errors. + * This just avoids wasting time on doomed connection attempts. + * + * @param $lockDb string + * @return bool + */ + protected function cacheCheckFailures( $lockDb ) { + return ( $this->statusCache && $this->safeDelay > 0 ) + ? !$this->statusCache->get( $this->getMissKey( $lockDb ) ) + : true; + } + + /** + * Log a lock request failure to the cache + * + * @param $lockDb string + * @return bool Success + */ + protected function cacheRecordFailure( $lockDb ) { + return ( $this->statusCache && $this->safeDelay > 0 ) + ? $this->statusCache->set( $this->getMissKey( $lockDb ), 1, $this->safeDelay ) + : true; + } + + /** + * Get a cache key for recent query misses for a DB + * + * @param $lockDb string + * @return string + */ + protected function getMissKey( $lockDb ) { + $lockDb = ( $lockDb === 'localDBMaster' ) ? wfWikiID() : $lockDb; // non-relative + return 'dblockmanager:downservers:' . str_replace( ' ', '_', $lockDb ); + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + foreach ( $this->conns as $db ) { + if ( $db->trxLevel() ) { // in transaction + try { + $db->rollback( __METHOD__ ); // finish transaction and kill any rows + } catch ( DBError $e ) { + // oh well + } + } + $db->close(); + } + } +} + +/** + * MySQL version of DBLockManager that supports shared locks. + * All locks are non-blocking, which avoids deadlocks. + * + * @ingroup LockManager + */ +class MySqlLockManager extends DBLockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + /** + * @param $lockDb string + * @param $db DatabaseBase + */ + protected function initConnection( $lockDb, DatabaseBase $db ) { + # Let this transaction see lock rows from other transactions + $db->query( "SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;" ); + } + + /** + * Get a connection to a lock DB and acquire locks on $paths. + * This does not use GET_LOCK() per http://bugs.mysql.com/bug.php?id=1118. + * + * @see DBLockManager::getLocksOnServer() + * @return Status + */ + protected function getLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + $db = $this->getConnection( $lockSrv ); // checked in isServerUp() + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fls_key' => $key, 'fls_session' => $this->session ); + } + # Block new writers... + $db->insert( 'filelocks_shared', $data, __METHOD__, array( 'IGNORE' ) ); + # Actually do the locking queries... + if ( $type == self::LOCK_SH ) { // reader locks + # Bail if there are any existing writers... + $blocked = $db->selectField( 'filelocks_exclusive', '1', + array( 'fle_key' => $keys ), + __METHOD__ + ); + # Prospective writers that haven't yet updated filelocks_exclusive + # will recheck filelocks_shared after doing so and bail due to our entry. + } else { // writer locks + $encSession = $db->addQuotes( $this->session ); + # Bail if there are any existing writers... + # The may detect readers, but the safe check for them is below. + # Note: if two writers come at the same time, both bail :) + $blocked = $db->selectField( 'filelocks_shared', '1', + array( 'fls_key' => $keys, "fls_session != $encSession" ), + __METHOD__ + ); + if ( !$blocked ) { + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fle_key' => $key ); + } + # Block new readers/writers... + $db->insert( 'filelocks_exclusive', $data, __METHOD__ ); + # Bail if there are any existing readers... + $blocked = $db->selectField( 'filelocks_shared', '1', + array( 'fls_key' => $keys, "fls_session != $encSession" ), + __METHOD__ + ); + } + } + + if ( $blocked ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } + + return $status; + } +} diff --git a/includes/filebackend/lockmanager/FSLockManager.php b/includes/filebackend/lockmanager/FSLockManager.php new file mode 100644 index 00000000..9a6206fd --- /dev/null +++ b/includes/filebackend/lockmanager/FSLockManager.php @@ -0,0 +1,255 @@ +<?php +/** + * Simple version of LockManager based on using FS lock files. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + */ + +/** + * Simple version of LockManager based on using FS lock files. + * All locks are non-blocking, which avoids deadlocks. + * + * This should work fine for small sites running off one server. + * Do not use this with 'lockDirectory' set to an NFS mount unless the + * NFS client is at least version 2.6.12. Otherwise, the BSD flock() + * locks will be ignored; see http://nfs.sourceforge.net/#section_d. + * + * @ingroup LockManager + * @since 1.19 + */ +class FSLockManager extends LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + protected $lockDir; // global dir for all servers + + /** @var Array Map of (locked key => lock type => lock file handle) */ + protected $handles = array(); + + /** + * Construct a new instance from configuration. + * + * $config includes: + * - lockDirectory : Directory containing the lock files + * + * @param array $config + */ + function __construct( array $config ) { + parent::__construct( $config ); + + $this->lockDir = $config['lockDirectory']; + } + + /** + * @see LockManager::doLock() + * @param $paths array + * @param $type int + * @return Status + */ + protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $lockedPaths = array(); // files locked in this attempt + foreach ( $paths as $path ) { + $status->merge( $this->doSingleLock( $path, $type ) ); + if ( $status->isOK() ) { + $lockedPaths[] = $path; + } else { + // Abort and unlock everything + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + } + + return $status; + } + + /** + * @see LockManager::doUnlock() + * @param $paths array + * @param $type int + * @return Status + */ + protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $paths as $path ) { + $status->merge( $this->doSingleUnlock( $path, $type ) ); + } + + return $status; + } + + /** + * Lock a single resource key + * + * @param $path string + * @param $type integer + * @return Status + */ + protected function doSingleLock( $path, $type ) { + $status = Status::newGood(); + + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + wfSuppressWarnings(); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); + wfRestoreWarnings(); + if ( !$handle ) { // lock dir missing? + wfMkdirParents( $this->lockDir ); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); // try again + } + if ( $handle ) { + // Either a shared or exclusive lock + $lock = ( $type == self::LOCK_SH ) ? LOCK_SH : LOCK_EX; + if ( flock( $handle, $lock | LOCK_NB ) ) { + // Record this lock as active + $this->locksHeld[$path][$type] = 1; + $this->handles[$path][$type] = $handle; + } else { + fclose( $handle ); + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } else { + $status->fatal( 'lockmanager-fail-openlock', $path ); + } + } + + return $status; + } + + /** + * Unlock a single resource key + * + * @param $path string + * @param $type integer + * @return Status + */ + protected function doSingleUnlock( $path, $type ) { + $status = Status::newGood(); + + if ( !isset( $this->locksHeld[$path] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } elseif ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + $handlesToClose = array(); + --$this->locksHeld[$path][$type]; + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + // If a LOCK_SH comes in while we have a LOCK_EX, we don't + // actually add a handler, so check for handler existence. + if ( isset( $this->handles[$path][$type] ) ) { + if ( $type === self::LOCK_EX + && isset( $this->locksHeld[$path][self::LOCK_SH] ) + && !isset( $this->handles[$path][self::LOCK_SH] ) ) + { + // EX lock came first: move this handle to the SH one + $this->handles[$path][self::LOCK_SH] = $this->handles[$path][$type]; + } else { + // Mark this handle to be unlocked and closed + $handlesToClose[] = $this->handles[$path][$type]; + } + unset( $this->handles[$path][$type] ); + } + } + if ( !count( $this->locksHeld[$path] ) ) { + unset( $this->locksHeld[$path] ); // no locks on this path + } + // Unlock handles to release locks and delete + // any lock files that end up with no locks on them... + if ( wfIsWindows() ) { + // Windows: for any process, including this one, + // calling unlink() on a locked file will fail + $status->merge( $this->closeLockHandles( $path, $handlesToClose ) ); + $status->merge( $this->pruneKeyLockFiles( $path ) ); + } else { + // Unix: unlink() can be used on files currently open by this + // process and we must do so in order to avoid race conditions + $status->merge( $this->pruneKeyLockFiles( $path ) ); + $status->merge( $this->closeLockHandles( $path, $handlesToClose ) ); + } + } + + return $status; + } + + /** + * @param $path string + * @param $handlesToClose array + * @return Status + */ + private function closeLockHandles( $path, array $handlesToClose ) { + $status = Status::newGood(); + foreach ( $handlesToClose as $handle ) { + if ( !flock( $handle, LOCK_UN ) ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + if ( !fclose( $handle ) ) { + $status->warning( 'lockmanager-fail-closelock', $path ); + } + } + return $status; + } + + /** + * @param $path string + * @return Status + */ + private function pruneKeyLockFiles( $path ) { + $status = Status::newGood(); + if ( !isset( $this->locksHeld[$path] ) ) { + # No locks are held for the lock file anymore + if ( !unlink( $this->getLockPath( $path ) ) ) { + $status->warning( 'lockmanager-fail-deletelock', $path ); + } + unset( $this->handles[$path] ); + } + return $status; + } + + /** + * Get the path to the lock file for a key + * @param $path string + * @return string + */ + protected function getLockPath( $path ) { + $hash = self::sha1Base36( $path ); + return "{$this->lockDir}/{$hash}.lock"; + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + while ( count( $this->locksHeld ) ) { + foreach ( $this->locksHeld as $path => $locks ) { + $this->doSingleUnlock( $path, self::LOCK_EX ); + $this->doSingleUnlock( $path, self::LOCK_SH ); + } + } + } +} diff --git a/includes/filebackend/lockmanager/LSLockManager.php b/includes/filebackend/lockmanager/LSLockManager.php new file mode 100644 index 00000000..89428182 --- /dev/null +++ b/includes/filebackend/lockmanager/LSLockManager.php @@ -0,0 +1,218 @@ +<?php +/** + * Version of LockManager based on using lock daemon servers. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + */ + +/** + * Manage locks using a lock daemon server. + * + * Version of LockManager based on using lock daemon servers. + * This is meant for multi-wiki systems that may share files. + * All locks are non-blocking, which avoids deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map + * to one bucket. Each bucket maps to one or several peer servers, each + * running LockServerDaemon.php, listening on a designated TCP port. + * A majority of peers must agree for a lock to be acquired. + * + * @ingroup LockManager + * @since 1.19 + */ +class LSLockManager extends QuorumLockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + /** @var Array Map of server names to server config */ + protected $lockServers; // (server name => server config array) + + /** @var Array Map Server connections (server name => resource) */ + protected $conns = array(); + + protected $connTimeout; // float number of seconds + protected $session = ''; // random SHA-1 string + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * - lockServers : Associative array of server names to configuration. + * Configuration is an associative array that includes: + * - host : IP address/hostname + * - port : TCP port + * - authKey : Secret string the lock server uses + * - srvsByBucket : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of server names (peers) as values. + * - connTimeout : Lock server connection attempt timeout. [optional] + * + * @param Array $config + */ + public function __construct( array $config ) { + parent::__construct( $config ); + + $this->lockServers = $config['lockServers']; + // Sanitize srvsByBucket config to prevent PHP errors + $this->srvsByBucket = array_filter( $config['srvsByBucket'], 'is_array' ); + $this->srvsByBucket = array_values( $this->srvsByBucket ); // consecutive + + if ( isset( $config['connTimeout'] ) ) { + $this->connTimeout = $config['connTimeout']; + } else { + $this->connTimeout = 3; // use some sane amount + } + + $this->session = wfRandomString( 32 ); // 128 bits + } + + /** + * @see QuorumLockManager::getLocksOnServer() + * @return Status + */ + protected function getLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + // Send out the command and get the response... + $type = ( $type == self::LOCK_SH ) ? 'SH' : 'EX'; + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + $response = $this->sendCommand( $lockSrv, 'ACQUIRE', $type, $keys ); + + if ( $response !== 'ACQUIRED' ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } + + return $status; + } + + /** + * @see QuorumLockManager::freeLocksOnServer() + * @return Status + */ + protected function freeLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + // Send out the command and get the response... + $type = ( $type == self::LOCK_SH ) ? 'SH' : 'EX'; + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + $response = $this->sendCommand( $lockSrv, 'RELEASE', $type, $keys ); + + if ( $response !== 'RELEASED' ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + } + + return $status; + } + + /** + * @see QuorumLockManager::releaseAllLocks() + * @return Status + */ + protected function releaseAllLocks() { + $status = Status::newGood(); + + foreach ( $this->conns as $lockSrv => $conn ) { + $response = $this->sendCommand( $lockSrv, 'RELEASE_ALL', '', array() ); + if ( $response !== 'RELEASED_ALL' ) { + $status->fatal( 'lockmanager-fail-svr-release', $lockSrv ); + } + } + + return $status; + } + + /** + * @see QuorumLockManager::isServerUp() + * @return bool + */ + protected function isServerUp( $lockSrv ) { + return (bool)$this->getConnection( $lockSrv ); + } + + /** + * Send a command and get back the response + * + * @param $lockSrv string + * @param $action string + * @param $type string + * @param $values Array + * @return string|bool + */ + protected function sendCommand( $lockSrv, $action, $type, $values ) { + $conn = $this->getConnection( $lockSrv ); + if ( !$conn ) { + return false; // no connection + } + $authKey = $this->lockServers[$lockSrv]['authKey']; + // Build of the command as a flat string... + $values = implode( '|', $values ); + $key = sha1( $this->session . $action . $type . $values . $authKey ); + // Send out the command... + if ( fwrite( $conn, "{$this->session}:$key:$action:$type:$values\n" ) === false ) { + return false; + } + // Get the response... + $response = fgets( $conn ); + if ( $response === false ) { + return false; + } + return trim( $response ); + } + + /** + * Get (or reuse) a connection to a lock server + * + * @param $lockSrv string + * @return resource + */ + protected function getConnection( $lockSrv ) { + if ( !isset( $this->conns[$lockSrv] ) ) { + $cfg = $this->lockServers[$lockSrv]; + wfSuppressWarnings(); + $errno = $errstr = ''; + $conn = fsockopen( $cfg['host'], $cfg['port'], $errno, $errstr, $this->connTimeout ); + wfRestoreWarnings(); + if ( $conn === false ) { + return null; + } + $sec = floor( $this->connTimeout ); + $usec = floor( ( $this->connTimeout - floor( $this->connTimeout ) ) * 1e6 ); + stream_set_timeout( $conn, $sec, $usec ); + $this->conns[$lockSrv] = $conn; + } + return $this->conns[$lockSrv]; + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + $this->releaseAllLocks(); + foreach ( $this->conns as $conn ) { + fclose( $conn ); + } + } +} diff --git a/includes/filebackend/lockmanager/LockManager.php b/includes/filebackend/lockmanager/LockManager.php new file mode 100644 index 00000000..07853f87 --- /dev/null +++ b/includes/filebackend/lockmanager/LockManager.php @@ -0,0 +1,425 @@ +<?php +/** + * @defgroup LockManager Lock management + * @ingroup FileBackend + */ + +/** + * Resource locking handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + * @author Aaron Schulz + */ + +/** + * @brief Class for handling resource locking. + * + * Locks on resource keys can either be shared or exclusive. + * + * Implementations must keep track of what is locked by this proccess + * in-memory and support nested locking calls (using reference counting). + * At least LOCK_UW and LOCK_EX must be implemented. LOCK_SH can be a no-op. + * Locks should either be non-blocking or have low wait timeouts. + * + * Subclasses should avoid throwing exceptions at all costs. + * + * @ingroup LockManager + * @since 1.19 + */ +abstract class LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_EX, // subclasses may use self::LOCK_SH + self::LOCK_EX => self::LOCK_EX + ); + + /** @var Array Map of (resource path => lock type => count) */ + protected $locksHeld = array(); + + /* Lock types; stronger locks have higher values */ + const LOCK_SH = 1; // shared lock (for reads) + const LOCK_UW = 2; // shared lock (for reads used to write elsewhere) + const LOCK_EX = 3; // exclusive lock (for writes) + + /** + * Construct a new instance from configuration + * + * @param $config Array + */ + public function __construct( array $config ) {} + + /** + * Lock the resources at the given abstract paths + * + * @param $paths Array List of resource names + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function lock( array $paths, $type = self::LOCK_EX ) { + wfProfileIn( __METHOD__ ); + $status = $this->doLock( array_unique( $paths ), $this->lockTypeMap[$type] ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * Unlock the resources at the given abstract paths + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function unlock( array $paths, $type = self::LOCK_EX ) { + wfProfileIn( __METHOD__ ); + $status = $this->doUnlock( array_unique( $paths ), $this->lockTypeMap[$type] ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * Get the base 36 SHA-1 of a string, padded to 31 digits + * + * @param $path string + * @return string + */ + final protected static function sha1Base36( $path ) { + return wfBaseConvert( sha1( $path ), 16, 36, 31 ); + } + + /** + * Lock resources with the given keys and lock type + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return string + */ + abstract protected function doLock( array $paths, $type ); + + /** + * Unlock resources with the given keys and lock type + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return string + */ + abstract protected function doUnlock( array $paths, $type ); +} + +/** + * Self-releasing locks + * + * LockManager helper class to handle scoped locks, which + * release when an object is destroyed or goes out of scope. + * + * @ingroup LockManager + * @since 1.19 + */ +class ScopedLock { + /** @var LockManager */ + protected $manager; + /** @var Status */ + protected $status; + /** @var Array List of resource paths*/ + protected $paths; + + protected $type; // integer lock type + + /** + * @param $manager LockManager + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status + */ + protected function __construct( + LockManager $manager, array $paths, $type, Status $status + ) { + $this->manager = $manager; + $this->paths = $paths; + $this->status = $status; + $this->type = $type; + } + + /** + * Get a ScopedLock object representing a lock on resource paths. + * Any locks are released once this object goes out of scope. + * The status object is updated with any errors or warnings. + * + * @param $manager LockManager + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status + * @return ScopedLock|null Returns null on failure + */ + public static function factory( + LockManager $manager, array $paths, $type, Status $status + ) { + $lockStatus = $manager->lock( $paths, $type ); + $status->merge( $lockStatus ); + if ( $lockStatus->isOK() ) { + return new self( $manager, $paths, $type, $status ); + } + return null; + } + + function __destruct() { + $wasOk = $this->status->isOK(); + $this->status->merge( $this->manager->unlock( $this->paths, $this->type ) ); + if ( $wasOk ) { + // Make sure status is OK, despite any unlockFiles() fatals + $this->status->setResult( true, $this->status->value ); + } + } +} + +/** + * Version of LockManager that uses a quorum from peer servers for locks. + * The resource space can also be sharded into separate peer groups. + * + * @ingroup LockManager + * @since 1.20 + */ +abstract class QuorumLockManager extends LockManager { + /** @var Array Map of bucket indexes to peer server lists */ + protected $srvsByBucket = array(); // (bucket index => (lsrv1, lsrv2, ...)) + + /** + * @see LockManager::doLock() + * @param $paths array + * @param $type int + * @return Status + */ + final protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $pathsToLock = array(); // (bucket => paths) + // Get locks that need to be acquired (buckets => locks)... + foreach ( $paths as $path ) { + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + $bucket = $this->getBucketFromKey( $path ); + $pathsToLock[$bucket][] = $path; + } + } + + $lockedPaths = array(); // files locked in this attempt + // Attempt to acquire these locks... + foreach ( $pathsToLock as $bucket => $paths ) { + // Try to acquire the locks for this bucket + $status->merge( $this->doLockingRequestBucket( $bucket, $paths, $type ) ); + if ( !$status->isOK() ) { + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + // Record these locks as active + foreach ( $paths as $path ) { + $this->locksHeld[$path][$type] = 1; // locked + } + // Keep track of what locks were made in this attempt + $lockedPaths = array_merge( $lockedPaths, $paths ); + } + + return $status; + } + + /** + * @see LockManager::doUnlock() + * @param $paths array + * @param $type int + * @return Status + */ + final protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + $pathsToUnlock = array(); + foreach ( $paths as $path ) { + if ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + --$this->locksHeld[$path][$type]; + // Reference count the locks held and release locks when zero + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + $bucket = $this->getBucketFromKey( $path ); + $pathsToUnlock[$bucket][] = $path; + } + if ( !count( $this->locksHeld[$path] ) ) { + unset( $this->locksHeld[$path] ); // no SH or EX locks left for key + } + } + } + + // Remove these specific locks if possible, or at least release + // all locks once this process is currently not holding any locks. + foreach ( $pathsToUnlock as $bucket => $paths ) { + $status->merge( $this->doUnlockingRequestBucket( $bucket, $paths, $type ) ); + } + if ( !count( $this->locksHeld ) ) { + $status->merge( $this->releaseAllLocks() ); + } + + return $status; + } + + /** + * Attempt to acquire locks with the peers for a bucket. + * This is all or nothing; if any key is locked then this totally fails. + * + * @param $bucket integer + * @param $paths Array List of resource keys to lock + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return Status + */ + final protected function doLockingRequestBucket( $bucket, array $paths, $type ) { + $status = Status::newGood(); + + $yesVotes = 0; // locks made on trustable servers + $votesLeft = count( $this->srvsByBucket[$bucket] ); // remaining peers + $quorum = floor( $votesLeft/2 + 1 ); // simple majority + // Get votes for each peer, in order, until we have enough... + foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { + if ( !$this->isServerUp( $lockSrv ) ) { + --$votesLeft; + $status->warning( 'lockmanager-fail-svr-acquire', $lockSrv ); + continue; // server down? + } + // Attempt to acquire the lock on this peer + $status->merge( $this->getLocksOnServer( $lockSrv, $paths, $type ) ); + if ( !$status->isOK() ) { + return $status; // vetoed; resource locked + } + ++$yesVotes; // success for this peer + if ( $yesVotes >= $quorum ) { + return $status; // lock obtained + } + --$votesLeft; + $votesNeeded = $quorum - $yesVotes; + if ( $votesNeeded > $votesLeft ) { + break; // short-circuit + } + } + // At this point, we must not have met the quorum + $status->setResult( false ); + + return $status; + } + + /** + * Attempt to release locks with the peers for a bucket + * + * @param $bucket integer + * @param $paths Array List of resource keys to lock + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return Status + */ + final protected function doUnlockingRequestBucket( $bucket, array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { + if ( !$this->isServerUp( $lockSrv ) ) { + $status->fatal( 'lockmanager-fail-svr-release', $lockSrv ); + // Attempt to release the lock on this peer + } else { + $status->merge( $this->freeLocksOnServer( $lockSrv, $paths, $type ) ); + } + } + + return $status; + } + + /** + * Get the bucket for resource path. + * This should avoid throwing any exceptions. + * + * @param $path string + * @return integer + */ + protected function getBucketFromKey( $path ) { + $prefix = substr( sha1( $path ), 0, 2 ); // first 2 hex chars (8 bits) + return (int)base_convert( $prefix, 16, 10 ) % count( $this->srvsByBucket ); + } + + /** + * Check if a lock server is up + * + * @param $lockSrv string + * @return bool + */ + abstract protected function isServerUp( $lockSrv ); + + /** + * Get a connection to a lock server and acquire locks on $paths + * + * @param $lockSrv string + * @param $paths array + * @param $type integer + * @return Status + */ + abstract protected function getLocksOnServer( $lockSrv, array $paths, $type ); + + /** + * Get a connection to a lock server and release locks on $paths. + * + * Subclasses must effectively implement this or releaseAllLocks(). + * + * @param $lockSrv string + * @param $paths array + * @param $type integer + * @return Status + */ + abstract protected function freeLocksOnServer( $lockSrv, array $paths, $type ); + + /** + * Release all locks that this session is holding. + * + * Subclasses must effectively implement this or freeLocksOnServer(). + * + * @return Status + */ + abstract protected function releaseAllLocks(); +} + +/** + * Simple version of LockManager that does nothing + * @since 1.19 + */ +class NullLockManager extends LockManager { + /** + * @see LockManager::doLock() + * @param $paths array + * @param $type int + * @return Status + */ + protected function doLock( array $paths, $type ) { + return Status::newGood(); + } + + /** + * @see LockManager::doUnlock() + * @param $paths array + * @param $type int + * @return Status + */ + protected function doUnlock( array $paths, $type ) { + return Status::newGood(); + } +} diff --git a/includes/filebackend/lockmanager/LockManagerGroup.php b/includes/filebackend/lockmanager/LockManagerGroup.php new file mode 100644 index 00000000..8c8c940a --- /dev/null +++ b/includes/filebackend/lockmanager/LockManagerGroup.php @@ -0,0 +1,143 @@ +<?php +/** + * Lock manager registration handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + */ + +/** + * Class to handle file lock manager registration + * + * @ingroup LockManager + * @author Aaron Schulz + * @since 1.19 + */ +class LockManagerGroup { + /** + * @var LockManagerGroup + */ + protected static $instance = null; + + /** @var Array of (name => ('class' =>, 'config' =>, 'instance' =>)) */ + protected $managers = array(); + + protected function __construct() {} + + /** + * @return LockManagerGroup + */ + public static function singleton() { + if ( self::$instance == null ) { + self::$instance = new self(); + self::$instance->initFromGlobals(); + } + return self::$instance; + } + + /** + * Destroy the singleton instance, so that a new one will be created next + * time singleton() is called. + */ + public static function destroySingleton() { + self::$instance = null; + } + + /** + * Register lock managers from the global variables + * + * @return void + */ + protected function initFromGlobals() { + global $wgLockManagers; + + $this->register( $wgLockManagers ); + } + + /** + * Register an array of file lock manager configurations + * + * @param $configs Array + * @return void + * @throws MWException + */ + protected function register( array $configs ) { + foreach ( $configs as $config ) { + if ( !isset( $config['name'] ) ) { + throw new MWException( "Cannot register a lock manager with no name." ); + } + $name = $config['name']; + if ( !isset( $config['class'] ) ) { + throw new MWException( "Cannot register lock manager `{$name}` with no class." ); + } + $class = $config['class']; + unset( $config['class'] ); // lock manager won't need this + $this->managers[$name] = array( + 'class' => $class, + 'config' => $config, + 'instance' => null + ); + } + } + + /** + * Get the lock manager object with a given name + * + * @param $name string + * @return LockManager + * @throws MWException + */ + public function get( $name ) { + if ( !isset( $this->managers[$name] ) ) { + throw new MWException( "No lock manager defined with the name `$name`." ); + } + // Lazy-load the actual lock manager instance + if ( !isset( $this->managers[$name]['instance'] ) ) { + $class = $this->managers[$name]['class']; + $config = $this->managers[$name]['config']; + $this->managers[$name]['instance'] = new $class( $config ); + } + return $this->managers[$name]['instance']; + } + + /** + * Get the default lock manager configured for the site. + * Returns NullLockManager if no lock manager could be found. + * + * @return LockManager + */ + public function getDefault() { + return isset( $this->managers['default'] ) + ? $this->get( 'default' ) + : new NullLockManager( array() ); + } + + /** + * Get the default lock manager configured for the site + * or at least some other effective configured lock manager. + * Throws an exception if no lock manager could be found. + * + * @return LockManager + * @throws MWException + */ + public function getAny() { + return isset( $this->managers['default'] ) + ? $this->get( 'default' ) + : $this->get( 'fsLockManager' ); + } +} diff --git a/includes/filebackend/lockmanager/MemcLockManager.php b/includes/filebackend/lockmanager/MemcLockManager.php new file mode 100644 index 00000000..57c0463d --- /dev/null +++ b/includes/filebackend/lockmanager/MemcLockManager.php @@ -0,0 +1,319 @@ +<?php +/** + * Version of LockManager based on using memcached servers. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + */ + +/** + * Manage locks using memcached servers. + * + * Version of LockManager based on using memcached servers. + * This is meant for multi-wiki systems that may share files. + * All locks are non-blocking, which avoids deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map + * to one bucket. Each bucket maps to one or several peer servers, each running memcached. + * A majority of peers must agree for a lock to be acquired. + * + * @ingroup LockManager + * @since 1.20 + */ +class MemcLockManager extends QuorumLockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + /** @var Array Map server names to MemcachedBagOStuff objects */ + protected $bagOStuffs = array(); + /** @var Array */ + protected $serversUp = array(); // (server name => bool) + + protected $lockExpiry; // integer; maximum time locks can be held + protected $session = ''; // string; random SHA-1 UUID + protected $wikiId = ''; // string + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * - lockServers : Associative array of server names to "<IP>:<port>" strings. + * - srvsByBucket : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of server names (peers) as values. + * - memcConfig : Configuration array for ObjectCache::newFromParams. [optional] + * If set, this must use one of the memcached classes. + * - wikiId : Wiki ID string that all resources are relative to. [optional] + * + * @param Array $config + */ + public function __construct( array $config ) { + parent::__construct( $config ); + + // Sanitize srvsByBucket config to prevent PHP errors + $this->srvsByBucket = array_filter( $config['srvsByBucket'], 'is_array' ); + $this->srvsByBucket = array_values( $this->srvsByBucket ); // consecutive + + $memcConfig = isset( $config['memcConfig'] ) + ? $config['memcConfig'] + : array( 'class' => 'MemcachedPhpBagOStuff' ); + + foreach ( $config['lockServers'] as $name => $address ) { + $params = array( 'servers' => array( $address ) ) + $memcConfig; + $cache = ObjectCache::newFromParams( $params ); + if ( $cache instanceof MemcachedBagOStuff ) { + $this->bagOStuffs[$name] = $cache; + } else { + throw new MWException( + 'Only MemcachedBagOStuff classes are supported by MemcLockManager.' ); + } + } + + $this->wikiId = isset( $config['wikiId'] ) ? $config['wikiId'] : wfWikiID(); + + $met = ini_get( 'max_execution_time' ); // this is 0 in CLI mode + $this->lockExpiry = $met ? 2*(int)$met : 2*3600; + + $this->session = wfRandomString( 32 ); + } + + /** + * @see QuorumLockManager::getLocksOnServer() + * @return Status + */ + protected function getLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + $memc = $this->getCache( $lockSrv ); + $keys = array_map( array( $this, 'recordKeyForPath' ), $paths ); // lock records + + // Lock all of the active lock record keys... + if ( !$this->acquireMutexes( $memc, $keys ) ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + return; + } + + // Fetch all the existing lock records... + $lockRecords = $memc->getMulti( $keys ); + + $now = time(); + // Check if the requested locks conflict with existing ones... + foreach ( $paths as $path ) { + $locksKey = $this->recordKeyForPath( $path ); + $locksHeld = isset( $lockRecords[$locksKey] ) + ? $lockRecords[$locksKey] + : array( self::LOCK_SH => array(), self::LOCK_EX => array() ); // init + foreach ( $locksHeld[self::LOCK_EX] as $session => $expiry ) { + if ( $expiry < $now ) { // stale? + unset( $locksHeld[self::LOCK_EX][$session] ); + } elseif ( $session !== $this->session ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } + if ( $type === self::LOCK_EX ) { + foreach ( $locksHeld[self::LOCK_SH] as $session => $expiry ) { + if ( $expiry < $now ) { // stale? + unset( $locksHeld[self::LOCK_SH][$session] ); + } elseif ( $session !== $this->session ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } + } + if ( $status->isOK() ) { + // Register the session in the lock record array + $locksHeld[$type][$this->session] = $now + $this->lockExpiry; + // We will update this record if none of the other locks conflict + $lockRecords[$locksKey] = $locksHeld; + } + } + + // If there were no lock conflicts, update all the lock records... + if ( $status->isOK() ) { + foreach ( $lockRecords as $locksKey => $locksHeld ) { + $memc->set( $locksKey, $locksHeld ); + wfDebug( __METHOD__ . ": acquired lock on key $locksKey.\n" ); + } + } + + // Unlock all of the active lock record keys... + $this->releaseMutexes( $memc, $keys ); + + return $status; + } + + /** + * @see QuorumLockManager::freeLocksOnServer() + * @return Status + */ + protected function freeLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + $memc = $this->getCache( $lockSrv ); + $keys = array_map( array( $this, 'recordKeyForPath' ), $paths ); // lock records + + // Lock all of the active lock record keys... + if ( !$this->acquireMutexes( $memc, $keys ) ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + return; + } + + // Fetch all the existing lock records... + $lockRecords = $memc->getMulti( $keys ); + + // Remove the requested locks from all records... + foreach ( $paths as $path ) { + $locksKey = $this->recordKeyForPath( $path ); // lock record + if ( !isset( $lockRecords[$locksKey] ) ) { + continue; // nothing to do + } + $locksHeld = $lockRecords[$locksKey]; + if ( is_array( $locksHeld ) && isset( $locksHeld[$type] ) ) { + unset( $locksHeld[$type][$this->session] ); + $ok = $memc->set( $locksKey, $locksHeld ); + } else { + $ok = true; + } + if ( !$ok ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + wfDebug( __METHOD__ . ": released lock on key $locksKey.\n" ); + } + + // Unlock all of the active lock record keys... + $this->releaseMutexes( $memc, $keys ); + + return $status; + } + + /** + * @see QuorumLockManager::releaseAllLocks() + * @return Status + */ + protected function releaseAllLocks() { + return Status::newGood(); // not supported + } + + /** + * @see QuorumLockManager::isServerUp() + * @return bool + */ + protected function isServerUp( $lockSrv ) { + return (bool)$this->getCache( $lockSrv ); + } + + /** + * Get the MemcachedBagOStuff object for a $lockSrv + * + * @param $lockSrv string Server name + * @return MemcachedBagOStuff|null + */ + protected function getCache( $lockSrv ) { + $memc = null; + if ( isset( $this->bagOStuffs[$lockSrv] ) ) { + $memc = $this->bagOStuffs[$lockSrv]; + if ( !isset( $this->serversUp[$lockSrv] ) ) { + $this->serversUp[$lockSrv] = $memc->set( 'MemcLockManager:ping', 1, 1 ); + if ( !$this->serversUp[$lockSrv] ) { + trigger_error( __METHOD__ . ": Could not contact $lockSrv.", E_USER_WARNING ); + } + } + if ( !$this->serversUp[$lockSrv] ) { + return null; // server appears to be down + } + } + return $memc; + } + + /** + * @param $path string + * @return string + */ + protected function recordKeyForPath( $path ) { + $hash = LockManager::sha1Base36( $path ); + list( $db, $prefix ) = wfSplitWikiID( $this->wikiId ); + return wfForeignMemcKey( $db, $prefix, __CLASS__, 'locks', $hash ); + } + + /** + * @param $memc MemcachedBagOStuff + * @param $keys Array List of keys to acquire + * @return bool + */ + protected function acquireMutexes( MemcachedBagOStuff $memc, array $keys ) { + $lockedKeys = array(); + + // Acquire the keys in lexicographical order, to avoid deadlock problems. + // If P1 is waiting to acquire a key P2 has, P2 can't also be waiting for a key P1 has. + sort( $keys ); + + // Try to quickly loop to acquire the keys, but back off after a few rounds. + // This reduces memcached spam, especially in the rare case where a server acquires + // some lock keys and dies without releasing them. Lock keys expire after a few minutes. + $rounds = 0; + $start = microtime( true ); + do { + if ( ( ++$rounds % 4 ) == 0 ) { + usleep( 1000*50 ); // 50 ms + } + foreach ( array_diff( $keys, $lockedKeys ) as $key ) { + if ( $memc->add( "$key:mutex", 1, 180 ) ) { // lock record + $lockedKeys[] = $key; + } else { + continue; // acquire in order + } + } + } while ( count( $lockedKeys ) < count( $keys ) && ( microtime( true ) - $start ) <= 6 ); + + if ( count( $lockedKeys ) != count( $keys ) ) { + $this->releaseMutexes( $lockedKeys ); // failed; release what was locked + return false; + } + + return true; + } + + /** + * @param $memc MemcachedBagOStuff + * @param $keys Array List of acquired keys + * @return void + */ + protected function releaseMutexes( MemcachedBagOStuff $memc, array $keys ) { + foreach ( $keys as $key ) { + $memc->delete( "$key:mutex" ); + } + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + while ( count( $this->locksHeld ) ) { + foreach ( $this->locksHeld as $path => $locks ) { + $this->doUnlock( array( $path ), self::LOCK_EX ); + $this->doUnlock( array( $path ), self::LOCK_SH ); + } + } + } +} |