diff options
Diffstat (limited to 'includes/filerepo/backend')
-rw-r--r-- | includes/filerepo/backend/FSFile.php | 233 | ||||
-rw-r--r-- | includes/filerepo/backend/FSFileBackend.php | 600 | ||||
-rw-r--r-- | includes/filerepo/backend/FileBackend.php | 1739 | ||||
-rw-r--r-- | includes/filerepo/backend/FileBackendGroup.php | 156 | ||||
-rw-r--r-- | includes/filerepo/backend/FileBackendMultiWrite.php | 420 | ||||
-rw-r--r-- | includes/filerepo/backend/FileOp.php | 697 | ||||
-rw-r--r-- | includes/filerepo/backend/SwiftFileBackend.php | 877 | ||||
-rw-r--r-- | includes/filerepo/backend/TempFSFile.php | 92 | ||||
-rw-r--r-- | includes/filerepo/backend/lockmanager/DBLockManager.php | 469 | ||||
-rw-r--r-- | includes/filerepo/backend/lockmanager/FSLockManager.php | 202 | ||||
-rw-r--r-- | includes/filerepo/backend/lockmanager/LSLockManager.php | 295 | ||||
-rw-r--r-- | includes/filerepo/backend/lockmanager/LockManager.php | 182 | ||||
-rw-r--r-- | includes/filerepo/backend/lockmanager/LockManagerGroup.php | 89 |
13 files changed, 6051 insertions, 0 deletions
diff --git a/includes/filerepo/backend/FSFile.php b/includes/filerepo/backend/FSFile.php new file mode 100644 index 00000000..54dd1359 --- /dev/null +++ b/includes/filerepo/backend/FSFile.php @@ -0,0 +1,233 @@ +<?php +/** + * @file + * @ingroup FileBackend + */ + +/** + * Class representing a non-directory file on the file system + * + * @ingroup FileBackend + */ +class FSFile { + protected $path; // path to file + + /** + * Sets up the file object + * + * @param String $path Path to temporary file on local disk + */ + public function __construct( $path ) { + if ( FileBackend::isStoragePath( $path ) ) { + throw new MWException( __METHOD__ . " given storage path `$path`." ); + } + $this->path = $path; + } + + /** + * Returns the file system path + * + * @return String + */ + public function getPath() { + return $this->path; + } + + /** + * Checks if the file exists + * + * @return bool + */ + public function exists() { + return is_file( $this->path ); + } + + /** + * Get the file size in bytes + * + * @return int|false + */ + public function getSize() { + return filesize( $this->path ); + } + + /** + * Get the file's last-modified timestamp + * + * @return string|false TS_MW timestamp or false on failure + */ + public function getTimestamp() { + wfSuppressWarnings(); + $timestamp = filemtime( $this->path ); + wfRestoreWarnings(); + if ( $timestamp !== false ) { + $timestamp = wfTimestamp( TS_MW, $timestamp ); + } + return $timestamp; + } + + /** + * Guess the MIME type from the file contents alone + * + * @return string + */ + public function getMimeType() { + return MimeMagic::singleton()->guessMimeType( $this->path, false ); + } + + /** + * Get an associative array containing information about + * a file with the given storage path. + * + * @param $ext Mixed: the file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * + * @return array + */ + public function getProps( $ext = true ) { + wfProfileIn( __METHOD__ ); + wfDebug( __METHOD__.": Getting file info for $this->path\n" ); + + $info = self::placeholderProps(); + $info['fileExists'] = $this->exists(); + + if ( $info['fileExists'] ) { + $magic = MimeMagic::singleton(); + + # get the file extension + if ( $ext === true ) { + $ext = self::extensionFromPath( $this->path ); + } + + # mime type according to file contents + $info['file-mime'] = $this->getMimeType(); + # logical mime type + $info['mime'] = $magic->improveTypeFromExtension( $info['file-mime'], $ext ); + + list( $info['major_mime'], $info['minor_mime'] ) = File::splitMime( $info['mime'] ); + $info['media_type'] = $magic->getMediaType( $this->path, $info['mime'] ); + + # Get size in bytes + $info['size'] = $this->getSize(); + + # Height, width and metadata + $handler = MediaHandler::getHandler( $info['mime'] ); + if ( $handler ) { + $tempImage = (object)array(); + $info['metadata'] = $handler->getMetadata( $tempImage, $this->path ); + $gis = $handler->getImageSize( $tempImage, $this->path, $info['metadata'] ); + if ( is_array( $gis ) ) { + $info = $this->extractImageSizeInfo( $gis ) + $info; + } + } + $info['sha1'] = $this->getSha1Base36(); + + wfDebug(__METHOD__.": $this->path loaded, {$info['size']} bytes, {$info['mime']}.\n"); + } else { + wfDebug(__METHOD__.": $this->path NOT FOUND!\n"); + } + + wfProfileOut( __METHOD__ ); + return $info; + } + + /** + * Placeholder file properties to use for files that don't exist + * + * @return Array + */ + public static function placeholderProps() { + $info = array(); + $info['fileExists'] = false; + $info['mime'] = null; + $info['media_type'] = MEDIATYPE_UNKNOWN; + $info['metadata'] = ''; + $info['sha1'] = ''; + $info['width'] = 0; + $info['height'] = 0; + $info['bits'] = 0; + return $info; + } + + /** + * Exract image size information + * + * @return Array + */ + protected function extractImageSizeInfo( array $gis ) { + $info = array(); + # NOTE: $gis[2] contains a code for the image type. This is no longer used. + $info['width'] = $gis[0]; + $info['height'] = $gis[1]; + if ( isset( $gis['bits'] ) ) { + $info['bits'] = $gis['bits']; + } else { + $info['bits'] = 0; + } + return $info; + } + + /** + * Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case + * encoding, zero padded to 31 digits. + * + * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 + * fairly neatly. + * + * @return false|string False on failure + */ + public function getSha1Base36() { + wfProfileIn( __METHOD__ ); + + wfSuppressWarnings(); + $hash = sha1_file( $this->path ); + wfRestoreWarnings(); + if ( $hash !== false ) { + $hash = wfBaseConvert( $hash, 16, 36, 31 ); + } + + wfProfileOut( __METHOD__ ); + return $hash; + } + + /** + * Get the final file extension from a file system path + * + * @param $path string + * @return string + */ + public static function extensionFromPath( $path ) { + $i = strrpos( $path, '.' ); + return strtolower( $i ? substr( $path, $i + 1 ) : '' ); + } + + /** + * Get an associative array containing information about a file in the local filesystem. + * + * @param $path String: absolute local filesystem path + * @param $ext Mixed: the file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * + * @return array + */ + public static function getPropsFromPath( $path, $ext = true ) { + $fsFile = new self( $path ); + return $fsFile->getProps( $ext ); + } + + /** + * Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case + * encoding, zero padded to 31 digits. + * + * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 + * fairly neatly. + * + * @param $path string + * + * @return false|string False on failure + */ + public static function getSha1Base36FromPath( $path ) { + $fsFile = new self( $path ); + return $fsFile->getSha1Base36(); + } +} diff --git a/includes/filerepo/backend/FSFileBackend.php b/includes/filerepo/backend/FSFileBackend.php new file mode 100644 index 00000000..1a4c44ad --- /dev/null +++ b/includes/filerepo/backend/FSFileBackend.php @@ -0,0 +1,600 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Class for a file system (FS) based file backend. + * + * All "containers" each map to a directory under the backend's base directory. + * For backwards-compatibility, some container paths can be set to custom paths. + * The wiki ID will not be used in any custom paths, so this should be avoided. + * + * Having directories with thousands of files will diminish performance. + * Sharding can be accomplished by using FileRepo-style hash paths. + * + * Status messages should avoid mentioning the internal FS paths. + * PHP warnings are assumed to be logged rather than output. + * + * @ingroup FileBackend + * @since 1.19 + */ +class FSFileBackend extends FileBackendStore { + protected $basePath; // string; directory holding the container directories + /** @var Array Map of container names to root paths */ + protected $containerPaths = array(); // for custom container paths + protected $fileMode; // integer; file permission mode + + protected $hadWarningErrors = array(); + + /** + * @see FileBackendStore::__construct() + * Additional $config params include: + * basePath : File system directory that holds containers. + * containerPaths : Map of container names to custom file system directories. + * This should only be used for backwards-compatibility. + * fileMode : Octal UNIX file permissions to use on files stored. + */ + public function __construct( array $config ) { + parent::__construct( $config ); + + // Remove any possible trailing slash from directories + if ( isset( $config['basePath'] ) ) { + $this->basePath = rtrim( $config['basePath'], '/' ); // remove trailing slash + } else { + $this->basePath = null; // none; containers must have explicit paths + } + + if ( isset( $config['containerPaths'] ) ) { + $this->containerPaths = (array)$config['containerPaths']; + foreach ( $this->containerPaths as &$path ) { + $path = rtrim( $path, '/' ); // remove trailing slash + } + } + + $this->fileMode = isset( $config['fileMode'] ) + ? $config['fileMode'] + : 0644; + } + + /** + * @see FileBackendStore::resolveContainerPath() + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + // Check that container has a root directory + if ( isset( $this->containerPaths[$container] ) || isset( $this->basePath ) ) { + // Check for sane relative paths (assume the base paths are OK) + if ( $this->isLegalRelPath( $relStoragePath ) ) { + return $relStoragePath; + } + } + return null; + } + + /** + * Sanity check a relative file system path for validity + * + * @param $path string Normalized relative path + * @return bool + */ + protected function isLegalRelPath( $path ) { + // Check for file names longer than 255 chars + if ( preg_match( '![^/]{256}!', $path ) ) { // ext3/NTFS + return false; + } + if ( wfIsWindows() ) { // NTFS + return !preg_match( '![:*?"<>|]!', $path ); + } else { + return true; + } + } + + /** + * Given the short (unresolved) and full (resolved) name of + * a container, return the file system path of the container. + * + * @param $shortCont string + * @param $fullCont string + * @return string|null + */ + protected function containerFSRoot( $shortCont, $fullCont ) { + if ( isset( $this->containerPaths[$shortCont] ) ) { + return $this->containerPaths[$shortCont]; + } elseif ( isset( $this->basePath ) ) { + return "{$this->basePath}/{$fullCont}"; + } + return null; // no container base path defined + } + + /** + * Get the absolute file system path for a storage path + * + * @param $storagePath string Storage path + * @return string|null + */ + protected function resolveToFSPath( $storagePath ) { + list( $fullCont, $relPath ) = $this->resolveStoragePathReal( $storagePath ); + if ( $relPath === null ) { + return null; // invalid + } + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $storagePath ); + $fsPath = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + if ( $relPath != '' ) { + $fsPath .= "/{$relPath}"; + } + return $fsPath; + } + + /** + * @see FileBackendStore::isPathUsableInternal() + */ + public function isPathUsableInternal( $storagePath ) { + $fsPath = $this->resolveToFSPath( $storagePath ); + if ( $fsPath === null ) { + return false; // invalid + } + $parentDir = dirname( $fsPath ); + + if ( file_exists( $fsPath ) ) { + $ok = is_file( $fsPath ) && is_writable( $fsPath ); + } else { + $ok = is_dir( $parentDir ) && is_writable( $parentDir ); + } + + return $ok; + } + + /** + * @see FileBackendStore::doStoreInternal() + */ + protected function doStoreInternal( array $params ) { + $status = Status::newGood(); + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $ok = copy( $params['src'], $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-store', $params['src'], $params['dst'] ); + return $status; + } + + $this->chmod( $dest ); + + return $status; + } + + /** + * @see FileBackendStore::doCopyInternal() + */ + protected function doCopyInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $ok = copy( $source, $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } + + $this->chmod( $dest ); + + return $status; + } + + /** + * @see FileBackendStore::doMoveInternal() + */ + protected function doMoveInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + // Windows does not support moving over existing files + if ( wfIsWindows() ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $ok = rename( $source, $dest ); + clearstatcache(); // file no longer at source + if ( !$ok ) { + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + return $status; + } + + return $status; + } + + /** + * @see FileBackendStore::doDeleteInternal() + */ + protected function doDeleteInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + if ( !is_file( $source ) ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } + return $status; // do nothing; either OK or bad status + } + + $ok = unlink( $source ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + return $status; + } + + return $status; + } + + /** + * @see FileBackendStore::doCreateInternal() + */ + protected function doCreateInternal( array $params ) { + $status = Status::newGood(); + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $bytes = file_put_contents( $dest, $params['content'] ); + if ( $bytes === false ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } + + $this->chmod( $dest ); + + return $status; + } + + /** + * @see FileBackendStore::doPrepareInternal() + */ + protected function doPrepareInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + if ( !wfMkdirParents( $dir ) ) { // make directory and its parents + $status->fatal( 'directorycreateerror', $params['dir'] ); + } elseif ( !is_writable( $dir ) ) { + $status->fatal( 'directoryreadonlyerror', $params['dir'] ); + } elseif ( !is_readable( $dir ) ) { + $status->fatal( 'directorynotreadableerror', $params['dir'] ); + } + return $status; + } + + /** + * @see FileBackendStore::doSecureInternal() + */ + protected function doSecureInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + // Seed new directories with a blank index.html, to prevent crawling... + if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) { + $bytes = file_put_contents( "{$dir}/index.html", '' ); + if ( !$bytes ) { + $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' ); + return $status; + } + } + // Add a .htaccess file to the root of the container... + if ( !empty( $params['noAccess'] ) ) { + if ( !file_exists( "{$contRoot}/.htaccess" ) ) { + $bytes = file_put_contents( "{$contRoot}/.htaccess", "Deny from all\n" ); + if ( !$bytes ) { + $storeDir = "mwstore://{$this->name}/{$shortCont}"; + $status->fatal( 'backend-fail-create', "{$storeDir}/.htaccess" ); + return $status; + } + } + } + return $status; + } + + /** + * @see FileBackendStore::doCleanInternal() + */ + protected function doCleanInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + wfSuppressWarnings(); + if ( is_dir( $dir ) ) { + rmdir( $dir ); // remove directory if empty + } + wfRestoreWarnings(); + return $status; + } + + /** + * @see FileBackendStore::doFileExists() + */ + protected function doGetFileStat( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return false; // invalid storage path + } + + $this->trapWarnings(); // don't trust 'false' if there were errors + $stat = is_file( $source ) ? stat( $source ) : false; // regular files only + $hadError = $this->untrapWarnings(); + + if ( $stat ) { + return array( + 'mtime' => wfTimestamp( TS_MW, $stat['mtime'] ), + 'size' => $stat['size'] + ); + } elseif ( !$hadError ) { + return false; // file does not exist + } else { + return null; // failure + } + } + + /** + * @see FileBackendStore::doClearCache() + */ + protected function doClearCache( array $paths = null ) { + clearstatcache(); // clear the PHP file stat cache + } + + /** + * @see FileBackendStore::getFileListInternal() + */ + public function getFileListInternal( $fullCont, $dirRel, array $params ) { + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + $exists = is_dir( $dir ); + if ( !$exists ) { + wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" ); + return array(); // nothing under this dir + } + $readable = is_readable( $dir ); + if ( !$readable ) { + wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" ); + return null; // bad permissions? + } + return new FSFileBackendFileList( $dir ); + } + + /** + * @see FileBackendStore::getLocalReference() + */ + public function getLocalReference( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return null; + } + return new FSFile( $source ); + } + + /** + * @see FileBackendStore::getLocalCopy() + */ + public function getLocalCopy( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return null; + } + + // Create a new temporary file with the same extension... + $ext = FileBackend::extensionFromPath( $params['src'] ); + $tmpFile = TempFSFile::factory( wfBaseName( $source ) . '_', $ext ); + if ( !$tmpFile ) { + return null; + } + $tmpPath = $tmpFile->getPath(); + + // Copy the source file over the temp file + $ok = copy( $source, $tmpPath ); + if ( !$ok ) { + return null; + } + + $this->chmod( $tmpPath ); + + return $tmpFile; + } + + /** + * Chmod a file, suppressing the warnings + * + * @param $path string Absolute file system path + * @return bool Success + */ + protected function chmod( $path ) { + wfSuppressWarnings(); + $ok = chmod( $path, $this->fileMode ); + wfRestoreWarnings(); + + return $ok; + } + + /** + * Listen for E_WARNING errors and track whether any happen + * + * @return bool + */ + protected function trapWarnings() { + $this->hadWarningErrors[] = false; // push to stack + set_error_handler( array( $this, 'handleWarning' ), E_WARNING ); + return false; // invoke normal PHP error handler + } + + /** + * Stop listening for E_WARNING errors and return true if any happened + * + * @return bool + */ + protected function untrapWarnings() { + restore_error_handler(); // restore previous handler + return array_pop( $this->hadWarningErrors ); // pop from stack + } + + private function handleWarning() { + $this->hadWarningErrors[count( $this->hadWarningErrors ) - 1] = true; + return true; // suppress from PHP handler + } +} + +/** + * Wrapper around RecursiveDirectoryIterator that catches + * exception or does any custom behavoir that we may want. + * Do not use this class from places outside FSFileBackend. + * + * @ingroup FileBackend + */ +class FSFileBackendFileList implements Iterator { + /** @var RecursiveIteratorIterator */ + protected $iter; + protected $suffixStart; // integer + protected $pos = 0; // integer + + /** + * @param $dir string file system directory + */ + public function __construct( $dir ) { + $dir = realpath( $dir ); // normalize + $this->suffixStart = strlen( $dir ) + 1; // size of "path/to/dir/" + try { + # Get an iterator that will return leaf nodes (non-directories) + if ( MWInit::classExists( 'FilesystemIterator' ) ) { // PHP >= 5.3 + # RecursiveDirectoryIterator extends FilesystemIterator. + # FilesystemIterator::SKIP_DOTS default is inconsistent in PHP 5.3.x. + $flags = FilesystemIterator::CURRENT_AS_FILEINFO | FilesystemIterator::SKIP_DOTS; + $this->iter = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $dir, $flags ) ); + } else { // PHP < 5.3 + # RecursiveDirectoryIterator extends DirectoryIterator + $this->iter = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $dir ) ); + } + } catch ( UnexpectedValueException $e ) { + $this->iter = null; // bad permissions? deleted? + } + } + + public function current() { + // Return only the relative path and normalize slashes to FileBackend-style + // Make sure to use the realpath since the suffix is based upon that + return str_replace( '\\', '/', + substr( realpath( $this->iter->current() ), $this->suffixStart ) ); + } + + public function key() { + return $this->pos; + } + + public function next() { + try { + $this->iter->next(); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; + } + ++$this->pos; + } + + public function rewind() { + $this->pos = 0; + try { + $this->iter->rewind(); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; + } + } + + public function valid() { + return $this->iter && $this->iter->valid(); + } +} diff --git a/includes/filerepo/backend/FileBackend.php b/includes/filerepo/backend/FileBackend.php new file mode 100644 index 00000000..9433bcb4 --- /dev/null +++ b/includes/filerepo/backend/FileBackend.php @@ -0,0 +1,1739 @@ +<?php +/** + * @defgroup FileBackend File backend + * @ingroup FileRepo + * + * This module regroup classes meant for MediaWiki to interacts with + */ + +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Base class for all file backend classes (including multi-write backends). + * + * This class defines the methods as abstract that subclasses must implement. + * Outside callers can assume that all backends will have these functions. + * + * All "storage paths" are of the format "mwstore://backend/container/path". + * The paths use UNIX file system (FS) notation, though any particular backend may + * not actually be using a local filesystem. Therefore, the paths are only virtual. + * + * Backend contents are stored under wiki-specific container names by default. + * For legacy reasons, this has no effect for the FS backend class, and per-wiki + * segregation must be done by setting the container paths appropriately. + * + * FS-based backends are somewhat more restrictive due to the existence of real + * directory files; a regular file cannot have the same name as a directory. Other + * backends with virtual directories may not have this limitation. Callers should + * store files in such a way that no files and directories are under the same path. + * + * Methods should avoid throwing exceptions at all costs. + * As a corollary, external dependencies should be kept to a minimum. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileBackend { + protected $name; // string; unique backend name + protected $wikiId; // string; unique wiki name + protected $readOnly; // string; read-only explanation message + /** @var LockManager */ + protected $lockManager; + + /** + * Create a new backend instance from configuration. + * This should only be called from within FileBackendGroup. + * + * $config includes: + * 'name' : The unique name of this backend. + * This should consist of alphanumberic, '-', and '_' characters. + * This name should not be changed after use. + * 'wikiId' : Prefix to container names that is unique to this wiki. + * This should consist of alphanumberic, '-', and '_' characters. + * 'lockManager' : Registered name of a file lock manager to use. + * 'readOnly' : Write operations are disallowed if this is a non-empty string. + * It should be an explanation for the backend being read-only. + * + * @param $config Array + */ + public function __construct( array $config ) { + $this->name = $config['name']; + if ( !preg_match( '!^[a-zA-Z0-9-_]{1,255}$!', $this->name ) ) { + throw new MWException( "Backend name `{$this->name}` is invalid." ); + } + $this->wikiId = isset( $config['wikiId'] ) + ? $config['wikiId'] + : wfWikiID(); // e.g. "my_wiki-en_" + $this->lockManager = ( $config['lockManager'] instanceof LockManager ) + ? $config['lockManager'] + : LockManagerGroup::singleton()->get( $config['lockManager'] ); + $this->readOnly = isset( $config['readOnly'] ) + ? (string)$config['readOnly'] + : ''; + } + + /** + * Get the unique backend name. + * We may have multiple different backends of the same type. + * For example, we can have two Swift backends using different proxies. + * + * @return string + */ + final public function getName() { + return $this->name; + } + + /** + * Check if this backend is read-only + * + * @return bool + */ + final public function isReadOnly() { + return ( $this->readOnly != '' ); + } + + /** + * Get an explanatory message if this backend is read-only + * + * @return string|false Returns falls if the backend is not read-only + */ + final public function getReadOnlyReason() { + return ( $this->readOnly != '' ) ? $this->readOnly : false; + } + + /** + * This is the main entry point into the backend for write operations. + * Callers supply an ordered list of operations to perform as a transaction. + * Files will be locked, the stat cache cleared, and then the operations attempted. + * If any serious errors occur, all attempted operations will be rolled back. + * + * $ops is an array of arrays. The outer array holds a list of operations. + * Each inner array is a set of key value pairs that specify an operation. + * + * Supported operations and their parameters: + * a) Create a new file in storage with the contents of a string + * array( + * 'op' => 'create', + * 'dst' => <storage path>, + * 'content' => <string of new file contents>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * b) Copy a file system file into storage + * array( + * 'op' => 'store', + * 'src' => <file system path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * c) Copy a file within storage + * array( + * 'op' => 'copy', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * d) Move a file within storage + * array( + * 'op' => 'move', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * e) Delete a file within storage + * array( + * 'op' => 'delete', + * 'src' => <storage path>, + * 'ignoreMissingSource' => <boolean> + * ) + * f) Do nothing (no-op) + * array( + * 'op' => 'null', + * ) + * + * Boolean flags for operations (operation-specific): + * 'ignoreMissingSource' : The operation will simply succeed and do + * nothing if the source file does not exist. + * 'overwrite' : Any destination file will be overwritten. + * 'overwriteSame' : An error will not be given if a file already + * exists at the destination that has the same + * contents as the new contents to be written there. + * + * $opts is an associative of boolean flags, including: + * 'force' : Errors that would normally cause a rollback do not. + * The remaining operations are still attempted if any fail. + * 'nonLocking' : No locks are acquired for the operations. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * 'allowStale' : Don't require the latest available data. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * + * Remarks on locking: + * File system paths given to operations should refer to files that are + * already locked or otherwise safe from modification from other processes. + * Normally these files will be new temp files, which should be adequate. + * + * Return value: + * This returns a Status, which contains all warnings and fatals that occured + * during the operation. The 'failCount', 'successCount', and 'success' members + * will reflect each operation attempted. The status will be "OK" unless: + * a) unexpected operation errors occurred (network partitions, disk full...) + * b) significant operation errors occured and 'force' was not set + * + * @param $ops Array List of operations to execute in order + * @param $opts Array Batch operation options + * @return Status + */ + final public function doOperations( array $ops, array $opts = array() ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + if ( empty( $opts['force'] ) ) { // sanity + unset( $opts['nonLocking'] ); + unset( $opts['allowStale'] ); + } + return $this->doOperationsInternal( $ops, $opts ); + } + + /** + * @see FileBackend::doOperations() + */ + abstract protected function doOperationsInternal( array $ops, array $opts ); + + /** + * Same as doOperations() except it takes a single operation. + * If you are doing a batch of operations that should either + * all succeed or all fail, then use that function instead. + * + * @see FileBackend::doOperations() + * + * @param $op Array Operation + * @param $opts Array Operation options + * @return Status + */ + final public function doOperation( array $op, array $opts = array() ) { + return $this->doOperations( array( $op ), $opts ); + } + + /** + * Performs a single create operation. + * This sets $params['op'] to 'create' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function create( array $params, array $opts = array() ) { + $params['op'] = 'create'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single store operation. + * This sets $params['op'] to 'store' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function store( array $params, array $opts = array() ) { + $params['op'] = 'store'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single copy operation. + * This sets $params['op'] to 'copy' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function copy( array $params, array $opts = array() ) { + $params['op'] = 'copy'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single move operation. + * This sets $params['op'] to 'move' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function move( array $params, array $opts = array() ) { + $params['op'] = 'move'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single delete operation. + * This sets $params['op'] to 'delete' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function delete( array $params, array $opts = array() ) { + $params['op'] = 'delete'; + return $this->doOperation( $params, $opts ); + } + + /** + * Concatenate a list of storage files into a single file system file. + * The target path should refer to a file that is already locked or + * otherwise safe from modification from other processes. Normally, + * the file will be a new temp file, which should be adequate. + * $params include: + * srcs : ordered source storage paths (e.g. chunk1, chunk2, ...) + * dst : file system path to 0-byte temp file + * + * @param $params Array Operation parameters + * @return Status + */ + abstract public function concatenate( array $params ); + + /** + * Prepare a storage directory for usage. + * This will create any required containers and parent directories. + * Backends using key/value stores only need to create the container. + * + * $params include: + * dir : storage directory + * + * @param $params Array + * @return Status + */ + final public function prepare( array $params ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doPrepare( $params ); + } + + /** + * @see FileBackend::prepare() + */ + abstract protected function doPrepare( array $params ); + + /** + * Take measures to block web access to a storage directory and + * the container it belongs to. FS backends might add .htaccess + * files whereas key/value store backends might restrict container + * access to the auth user that represents end-users in web request. + * This is not guaranteed to actually do anything. + * + * $params include: + * dir : storage directory + * noAccess : try to deny file access + * noListing : try to deny file listing + * + * @param $params Array + * @return Status + */ + final public function secure( array $params ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + $status = $this->doPrepare( $params ); // dir must exist to restrict it + if ( $status->isOK() ) { + $status->merge( $this->doSecure( $params ) ); + } + return $status; + } + + /** + * @see FileBackend::secure() + */ + abstract protected function doSecure( array $params ); + + /** + * Delete a storage directory if it is empty. + * Backends using key/value stores may do nothing unless the directory + * is that of an empty container, in which case it should be deleted. + * + * $params include: + * dir : storage directory + * + * @param $params Array + * @return Status + */ + final public function clean( array $params ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doClean( $params ); + } + + /** + * @see FileBackend::clean() + */ + abstract protected function doClean( array $params ); + + /** + * Check if a file exists at a storage path in the backend. + * This returns false if only a directory exists at the path. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return bool|null Returns null on failure + */ + abstract public function fileExists( array $params ); + + /** + * Get the last-modified timestamp of the file at a storage path. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return string|false TS_MW timestamp or false on failure + */ + abstract public function getFileTimestamp( array $params ); + + /** + * Get the contents of a file at a storage path in the backend. + * This should be avoided for potentially large files. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return string|false Returns false on failure + */ + abstract public function getFileContents( array $params ); + + /** + * Get the size (bytes) of a file at a storage path in the backend. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return integer|false Returns false on failure + */ + abstract public function getFileSize( array $params ); + + /** + * Get quick information about a file at a storage path in the backend. + * If the file does not exist, then this returns false. + * Otherwise, the result is an associative array that includes: + * mtime : the last-modified timestamp (TS_MW) + * size : the file size (bytes) + * Additional values may be included for internal use only. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return Array|false|null Returns null on failure + */ + abstract public function getFileStat( array $params ); + + /** + * Get a SHA-1 hash of the file at a storage path in the backend. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return string|false Hash string or false on failure + */ + abstract public function getFileSha1Base36( array $params ); + + /** + * Get the properties of the file at a storage path in the backend. + * Returns FSFile::placeholderProps() on failure. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return Array + */ + abstract public function getFileProps( array $params ); + + /** + * Stream the file at a storage path in the backend. + * If the file does not exists, a 404 error will be given. + * Appropriate HTTP headers (Status, Content-Type, Content-Length) + * must be sent if streaming began, while none should be sent otherwise. + * Implementations should flush the output buffer before sending data. + * + * $params include: + * src : source storage path + * headers : additional HTTP headers to send on success + * latest : use the latest available data + * + * @param $params Array + * @return Status + */ + abstract public function streamFile( array $params ); + + /** + * Returns a file system file, identical to the file at a storage path. + * The file returned is either: + * a) A local copy of the file at a storage path in the backend. + * The temporary copy will have the same extension as the source. + * b) An original of the file at a storage path in the backend. + * Temporary files may be purged when the file object falls out of scope. + * + * Write operations should *never* be done on this file as some backends + * may do internal tracking or may be instances of FileBackendMultiWrite. + * In that later case, there are copies of the file that must stay in sync. + * Additionally, further calls to this function may return the same file. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return FSFile|null Returns null on failure + */ + abstract public function getLocalReference( array $params ); + + /** + * Get a local copy on disk of the file at a storage path in the backend. + * The temporary copy will have the same file extension as the source. + * Temporary files may be purged when the file object falls out of scope. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return TempFSFile|null Returns null on failure + */ + abstract public function getLocalCopy( array $params ); + + /** + * Get an iterator to list out all stored files under a storage directory. + * If the directory is of the form "mwstore://backend/container", + * then all files in the container should be listed. + * If the directory is of form "mwstore://backend/container/dir", + * then all files under that container directory should be listed. + * Results should be storage paths relative to the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * $params include: + * dir : storage path directory + * + * @return Traversable|Array|null Returns null on failure + */ + abstract public function getFileList( array $params ); + + /** + * Invalidate any in-process file existence and property cache. + * If $paths is given, then only the cache for those files will be cleared. + * + * @param $paths Array Storage paths (optional) + * @return void + */ + public function clearCache( array $paths = null ) {} + + /** + * Lock the files at the given storage paths in the backend. + * This will either lock all the files or none (on failure). + * + * Callers should consider using getScopedFileLocks() instead. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function lockFiles( array $paths, $type ) { + return $this->lockManager->lock( $paths, $type ); + } + + /** + * Unlock the files at the given storage paths in the backend. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function unlockFiles( array $paths, $type ) { + return $this->lockManager->unlock( $paths, $type ); + } + + /** + * Lock the files at the given storage paths in the backend. + * This will either lock all the files or none (on failure). + * On failure, the status object will be updated with errors. + * + * Once the return value goes out scope, the locks will be released and + * the status updated. Unlock fatals will not change the status "OK" value. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status Status to update on lock/unlock + * @return ScopedLock|null Returns null on failure + */ + final public function getScopedFileLocks( array $paths, $type, Status $status ) { + return ScopedLock::factory( $this->lockManager, $paths, $type, $status ); + } + + /** + * Check if a given path is a "mwstore://" path. + * This does not do any further validation or any existence checks. + * + * @param $path string + * @return bool + */ + final public static function isStoragePath( $path ) { + return ( strpos( $path, 'mwstore://' ) === 0 ); + } + + /** + * Split a storage path into a backend name, a container name, + * and a relative file path. The relative path may be the empty string. + * This does not do any path normalization or traversal checks. + * + * @param $storagePath string + * @return Array (backend, container, rel object) or (null, null, null) + */ + final public static function splitStoragePath( $storagePath ) { + if ( self::isStoragePath( $storagePath ) ) { + // Remove the "mwstore://" prefix and split the path + $parts = explode( '/', substr( $storagePath, 10 ), 3 ); + if ( count( $parts ) >= 2 && $parts[0] != '' && $parts[1] != '' ) { + if ( count( $parts ) == 3 ) { + return $parts; // e.g. "backend/container/path" + } else { + return array( $parts[0], $parts[1], '' ); // e.g. "backend/container" + } + } + } + return array( null, null, null ); + } + + /** + * Normalize a storage path by cleaning up directory separators. + * Returns null if the path is not of the format of a valid storage path. + * + * @param $storagePath string + * @return string|null + */ + final public static function normalizeStoragePath( $storagePath ) { + list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath ); + if ( $relPath !== null ) { // must be for this backend + $relPath = self::normalizeContainerPath( $relPath ); + if ( $relPath !== null ) { + return ( $relPath != '' ) + ? "mwstore://{$backend}/{$container}/{$relPath}" + : "mwstore://{$backend}/{$container}"; + } + } + return null; + } + + /** + * Validate and normalize a relative storage path. + * Null is returned if the path involves directory traversal. + * Traversal is insecure for FS backends and broken for others. + * + * @param $path string Storage path relative to a container + * @return string|null + */ + final protected static function normalizeContainerPath( $path ) { + // Normalize directory separators + $path = strtr( $path, '\\', '/' ); + // Collapse any consecutive directory separators + $path = preg_replace( '![/]{2,}!', '/', $path ); + // Remove any leading directory separator + $path = ltrim( $path, '/' ); + // Use the same traversal protection as Title::secureAndSplit() + if ( strpos( $path, '.' ) !== false ) { + if ( + $path === '.' || + $path === '..' || + strpos( $path, './' ) === 0 || + strpos( $path, '../' ) === 0 || + strpos( $path, '/./' ) !== false || + strpos( $path, '/../' ) !== false + ) { + return null; + } + } + return $path; + } + + /** + * Get the parent storage directory of a storage path. + * This returns a path like "mwstore://backend/container", + * "mwstore://backend/container/...", or null if there is no parent. + * + * @param $storagePath string + * @return string|null + */ + final public static function parentStoragePath( $storagePath ) { + $storagePath = dirname( $storagePath ); + list( $b, $cont, $rel ) = self::splitStoragePath( $storagePath ); + return ( $rel === null ) ? null : $storagePath; + } + + /** + * Get the final extension from a storage or FS path + * + * @param $path string + * @return string + */ + final public static function extensionFromPath( $path ) { + $i = strrpos( $path, '.' ); + return strtolower( $i ? substr( $path, $i + 1 ) : '' ); + } +} + +/** + * @brief Base class for all backends associated with a particular storage medium. + * + * This class defines the methods as abstract that subclasses must implement. + * Outside callers should *not* use functions with "Internal" in the name. + * + * The FileBackend operations are implemented using basic functions + * such as storeInternal(), copyInternal(), deleteInternal() and the like. + * This class is also responsible for path resolution and sanitization. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileBackendStore extends FileBackend { + /** @var Array Map of paths to small (RAM/disk) cache items */ + protected $cache = array(); // (storage path => key => value) + protected $maxCacheSize = 100; // integer; max paths with entries + /** @var Array Map of paths to large (RAM/disk) cache items */ + protected $expensiveCache = array(); // (storage path => key => value) + protected $maxExpensiveCacheSize = 10; // integer; max paths with entries + + /** @var Array Map of container names to sharding settings */ + protected $shardViaHashLevels = array(); // (container name => config array) + + protected $maxFileSize = 1000000000; // integer bytes (1GB) + + /** + * Get the maximum allowable file size given backend + * medium restrictions and basic performance constraints. + * Do not call this function from places outside FileBackend and FileOp. + * + * @return integer Bytes + */ + final public function maxFileSizeInternal() { + return $this->maxFileSize; + } + + /** + * Check if a file can be created at a given storage path. + * FS backends should check if the parent directory exists and the file is writable. + * Backends using key/value stores should check if the container exists. + * + * @param $storagePath string + * @return bool + */ + abstract public function isPathUsableInternal( $storagePath ); + + /** + * Create a file in the backend with the given contents. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * content : the raw file contents + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function createInternal( array $params ) { + wfProfileIn( __METHOD__ ); + if ( strlen( $params['content'] ) > $this->maxFileSizeInternal() ) { + $status = Status::newFatal( 'backend-fail-create', $params['dst'] ); + } else { + $status = $this->doCreateInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + } + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::createInternal() + */ + abstract protected function doCreateInternal( array $params ); + + /** + * Store a file into the backend from a file on disk. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source path on disk + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function storeInternal( array $params ) { + wfProfileIn( __METHOD__ ); + if ( filesize( $params['src'] ) > $this->maxFileSizeInternal() ) { + $status = Status::newFatal( 'backend-fail-store', $params['dst'] ); + } else { + $status = $this->doStoreInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + } + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::storeInternal() + */ + abstract protected function doStoreInternal( array $params ); + + /** + * Copy a file from one storage path to another in the backend. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source storage path + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function copyInternal( array $params ) { + wfProfileIn( __METHOD__ ); + $status = $this->doCopyInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::copyInternal() + */ + abstract protected function doCopyInternal( array $params ); + + /** + * Delete a file at the storage path. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source storage path + * ignoreMissingSource : do nothing if the source file does not exist + * + * @param $params Array + * @return Status + */ + final public function deleteInternal( array $params ) { + wfProfileIn( __METHOD__ ); + $status = $this->doDeleteInternal( $params ); + $this->clearCache( array( $params['src'] ) ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::deleteInternal() + */ + abstract protected function doDeleteInternal( array $params ); + + /** + * Move a file from one storage path to another in the backend. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source storage path + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function moveInternal( array $params ) { + wfProfileIn( __METHOD__ ); + $status = $this->doMoveInternal( $params ); + $this->clearCache( array( $params['src'], $params['dst'] ) ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::moveInternal() + */ + protected function doMoveInternal( array $params ) { + // Copy source to dest + $status = $this->copyInternal( $params ); + if ( $status->isOK() ) { + // Delete source (only fails due to races or medium going down) + $status->merge( $this->deleteInternal( array( 'src' => $params['src'] ) ) ); + $status->setResult( true, $status->value ); // ignore delete() errors + } + return $status; + } + + /** + * @see FileBackend::concatenate() + */ + final public function concatenate( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + // Try to lock the source files for the scope of this function + $scopeLockS = $this->getScopedFileLocks( $params['srcs'], LockManager::LOCK_UW, $status ); + if ( $status->isOK() ) { + // Actually do the concatenation + $status->merge( $this->doConcatenate( $params ) ); + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::concatenate() + */ + protected function doConcatenate( array $params ) { + $status = Status::newGood(); + $tmpPath = $params['dst']; // convenience + + // Check that the specified temp file is valid... + wfSuppressWarnings(); + $ok = ( is_file( $tmpPath ) && !filesize( $tmpPath ) ); + wfRestoreWarnings(); + if ( !$ok ) { // not present or not empty + $status->fatal( 'backend-fail-opentemp', $tmpPath ); + return $status; + } + + // Build up the temp file using the source chunks (in order)... + $tmpHandle = fopen( $tmpPath, 'ab' ); + if ( $tmpHandle === false ) { + $status->fatal( 'backend-fail-opentemp', $tmpPath ); + return $status; + } + foreach ( $params['srcs'] as $virtualSource ) { + // Get a local FS version of the chunk + $tmpFile = $this->getLocalReference( array( 'src' => $virtualSource ) ); + if ( !$tmpFile ) { + $status->fatal( 'backend-fail-read', $virtualSource ); + return $status; + } + // Get a handle to the local FS version + $sourceHandle = fopen( $tmpFile->getPath(), 'r' ); + if ( $sourceHandle === false ) { + fclose( $tmpHandle ); + $status->fatal( 'backend-fail-read', $virtualSource ); + return $status; + } + // Append chunk to file (pass chunk size to avoid magic quotes) + if ( !stream_copy_to_stream( $sourceHandle, $tmpHandle ) ) { + fclose( $sourceHandle ); + fclose( $tmpHandle ); + $status->fatal( 'backend-fail-writetemp', $tmpPath ); + return $status; + } + fclose( $sourceHandle ); + } + if ( !fclose( $tmpHandle ) ) { + $status->fatal( 'backend-fail-closetemp', $tmpPath ); + return $status; + } + + clearstatcache(); // temp file changed + + return $status; + } + + /** + * @see FileBackend::doPrepare() + */ + final protected function doPrepare( array $params ) { + wfProfileIn( __METHOD__ ); + + $status = Status::newGood(); + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doPrepareInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doPrepareInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doPrepare() + */ + protected function doPrepareInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::doSecure() + */ + final protected function doSecure( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doSecureInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doSecureInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doSecure() + */ + protected function doSecureInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::doClean() + */ + final protected function doClean( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + // Attempt to lock this directory... + $filesLockEx = array( $params['dir'] ); + $scopedLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + wfProfileOut( __METHOD__ ); + return $status; // abort + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doCleanInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doCleanInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doClean() + */ + protected function doCleanInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::fileExists() + */ + final public function fileExists( array $params ) { + wfProfileIn( __METHOD__ ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ ); + return ( $stat === null ) ? null : (bool)$stat; // null => failure + } + + /** + * @see FileBackend::getFileTimestamp() + */ + final public function getFileTimestamp( array $params ) { + wfProfileIn( __METHOD__ ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ ); + return $stat ? $stat['mtime'] : false; + } + + /** + * @see FileBackend::getFileSize() + */ + final public function getFileSize( array $params ) { + wfProfileIn( __METHOD__ ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ ); + return $stat ? $stat['size'] : false; + } + + /** + * @see FileBackend::getFileStat() + */ + final public function getFileStat( array $params ) { + wfProfileIn( __METHOD__ ); + $path = self::normalizeStoragePath( $params['src'] ); + if ( $path === null ) { + return false; // invalid storage path + } + $latest = !empty( $params['latest'] ); + if ( isset( $this->cache[$path]['stat'] ) ) { + // If we want the latest data, check that this cached + // value was in fact fetched with the latest available data. + if ( !$latest || $this->cache[$path]['stat']['latest'] ) { + wfProfileOut( __METHOD__ ); + return $this->cache[$path]['stat']; + } + } + $stat = $this->doGetFileStat( $params ); + if ( is_array( $stat ) ) { // don't cache negatives + $this->trimCache(); // limit memory + $this->cache[$path]['stat'] = $stat; + $this->cache[$path]['stat']['latest'] = $latest; + } + wfProfileOut( __METHOD__ ); + return $stat; + } + + /** + * @see FileBackendStore::getFileStat() + */ + abstract protected function doGetFileStat( array $params ); + + /** + * @see FileBackend::getFileContents() + */ + public function getFileContents( array $params ) { + wfProfileIn( __METHOD__ ); + $tmpFile = $this->getLocalReference( $params ); + if ( !$tmpFile ) { + wfProfileOut( __METHOD__ ); + return false; + } + wfSuppressWarnings(); + $data = file_get_contents( $tmpFile->getPath() ); + wfRestoreWarnings(); + wfProfileOut( __METHOD__ ); + return $data; + } + + /** + * @see FileBackend::getFileSha1Base36() + */ + final public function getFileSha1Base36( array $params ) { + wfProfileIn( __METHOD__ ); + $path = $params['src']; + if ( isset( $this->cache[$path]['sha1'] ) ) { + wfProfileOut( __METHOD__ ); + return $this->cache[$path]['sha1']; + } + $hash = $this->doGetFileSha1Base36( $params ); + if ( $hash ) { // don't cache negatives + $this->trimCache(); // limit memory + $this->cache[$path]['sha1'] = $hash; + } + wfProfileOut( __METHOD__ ); + return $hash; + } + + /** + * @see FileBackendStore::getFileSha1Base36() + */ + protected function doGetFileSha1Base36( array $params ) { + $fsFile = $this->getLocalReference( $params ); + if ( !$fsFile ) { + return false; + } else { + return $fsFile->getSha1Base36(); + } + } + + /** + * @see FileBackend::getFileProps() + */ + final public function getFileProps( array $params ) { + wfProfileIn( __METHOD__ ); + $fsFile = $this->getLocalReference( $params ); + $props = $fsFile ? $fsFile->getProps() : FSFile::placeholderProps(); + wfProfileOut( __METHOD__ ); + return $props; + } + + /** + * @see FileBackend::getLocalReference() + */ + public function getLocalReference( array $params ) { + wfProfileIn( __METHOD__ ); + $path = $params['src']; + if ( isset( $this->expensiveCache[$path]['localRef'] ) ) { + wfProfileOut( __METHOD__ ); + return $this->expensiveCache[$path]['localRef']; + } + $tmpFile = $this->getLocalCopy( $params ); + if ( $tmpFile ) { // don't cache negatives + $this->trimExpensiveCache(); // limit memory + $this->expensiveCache[$path]['localRef'] = $tmpFile; + } + wfProfileOut( __METHOD__ ); + return $tmpFile; + } + + /** + * @see FileBackend::streamFile() + */ + final public function streamFile( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + $info = $this->getFileStat( $params ); + if ( !$info ) { // let StreamFile handle the 404 + $status->fatal( 'backend-fail-notexists', $params['src'] ); + } + + // Set output buffer and HTTP headers for stream + $extraHeaders = isset( $params['headers'] ) ? $params['headers'] : array(); + $res = StreamFile::prepareForStream( $params['src'], $info, $extraHeaders ); + if ( $res == StreamFile::NOT_MODIFIED ) { + // do nothing; client cache is up to date + } elseif ( $res == StreamFile::READY_STREAM ) { + $status = $this->doStreamFile( $params ); + } else { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::streamFile() + */ + protected function doStreamFile( array $params ) { + $status = Status::newGood(); + + $fsFile = $this->getLocalReference( $params ); + if ( !$fsFile ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } elseif ( !readfile( $fsFile->getPath() ) ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } + + return $status; + } + + /** + * @copydoc FileBackend::getFileList() + */ + final public function getFileList( array $params ) { + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { // invalid storage path + return null; + } + if ( $shard !== null ) { + // File listing is confined to a single container/shard + return $this->getFileListInternal( $fullCont, $dir, $params ); + } else { + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + // File listing spans multiple containers/shards + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + return new FileBackendStoreShardListIterator( $this, + $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); + } + } + + /** + * Do not call this function from places outside FileBackend + * + * @see FileBackendStore::getFileList() + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return Traversable|Array|null + */ + abstract public function getFileListInternal( $container, $dir, array $params ); + + /** + * Get the list of supported operations and their corresponding FileOp classes. + * + * @return Array + */ + protected function supportedOperations() { + return array( + 'store' => 'StoreFileOp', + 'copy' => 'CopyFileOp', + 'move' => 'MoveFileOp', + 'delete' => 'DeleteFileOp', + 'create' => 'CreateFileOp', + 'null' => 'NullFileOp' + ); + } + + /** + * Return a list of FileOp objects from a list of operations. + * Do not call this function from places outside FileBackend. + * + * The result must have the same number of items as the input. + * An exception is thrown if an unsupported operation is requested. + * + * @param $ops Array Same format as doOperations() + * @return Array List of FileOp objects + * @throws MWException + */ + final public function getOperations( array $ops ) { + $supportedOps = $this->supportedOperations(); + + $performOps = array(); // array of FileOp objects + // Build up ordered array of FileOps... + foreach ( $ops as $operation ) { + $opName = $operation['op']; + if ( isset( $supportedOps[$opName] ) ) { + $class = $supportedOps[$opName]; + // Get params for this operation + $params = $operation; + // Append the FileOp class + $performOps[] = new $class( $this, $params ); + } else { + throw new MWException( "Operation `$opName` is not supported." ); + } + } + + return $performOps; + } + + /** + * @see FileBackend::doOperationsInternal() + */ + protected function doOperationsInternal( array $ops, array $opts ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + // Build up a list of FileOps... + $performOps = $this->getOperations( $ops ); + + // Acquire any locks as needed... + if ( empty( $opts['nonLocking'] ) ) { + // Build up a list of files to lock... + $filesLockEx = $filesLockSh = array(); + foreach ( $performOps as $fileOp ) { + $filesLockSh = array_merge( $filesLockSh, $fileOp->storagePathsRead() ); + $filesLockEx = array_merge( $filesLockEx, $fileOp->storagePathsChanged() ); + } + // Optimization: if doing an EX lock anyway, don't also set an SH one + $filesLockSh = array_diff( $filesLockSh, $filesLockEx ); + // Get a shared lock on the parent directory of each path changed + $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) ); + // Try to lock those files for the scope of this function... + $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status ); + $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + wfProfileOut( __METHOD__ ); + return $status; // abort + } + } + + // Clear any cache entries (after locks acquired) + $this->clearCache(); + + // Actually attempt the operation batch... + $subStatus = FileOp::attemptBatch( $performOps, $opts ); + + // Merge errors into status fields + $status->merge( $subStatus ); + $status->success = $subStatus->success; // not done in merge() + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackend::clearCache() + */ + final public function clearCache( array $paths = null ) { + if ( is_array( $paths ) ) { + $paths = array_map( 'FileBackend::normalizeStoragePath', $paths ); + $paths = array_filter( $paths, 'strlen' ); // remove nulls + } + if ( $paths === null ) { + $this->cache = array(); + $this->expensiveCache = array(); + } else { + foreach ( $paths as $path ) { + unset( $this->cache[$path] ); + unset( $this->expensiveCache[$path] ); + } + } + $this->doClearCache( $paths ); + } + + /** + * Clears any additional stat caches for storage paths + * + * @see FileBackend::clearCache() + * + * @param $paths Array Storage paths (optional) + * @return void + */ + protected function doClearCache( array $paths = null ) {} + + /** + * Prune the inexpensive cache if it is too big to add an item + * + * @return void + */ + protected function trimCache() { + if ( count( $this->cache ) >= $this->maxCacheSize ) { + reset( $this->cache ); + unset( $this->cache[key( $this->cache )] ); + } + } + + /** + * Prune the expensive cache if it is too big to add an item + * + * @return void + */ + protected function trimExpensiveCache() { + if ( count( $this->expensiveCache ) >= $this->maxExpensiveCacheSize ) { + reset( $this->expensiveCache ); + unset( $this->expensiveCache[key( $this->expensiveCache )] ); + } + } + + /** + * Check if a container name is valid. + * This checks for for length and illegal characters. + * + * @param $container string + * @return bool + */ + final protected static function isValidContainerName( $container ) { + // This accounts for Swift and S3 restrictions while leaving room + // for things like '.xxx' (hex shard chars) or '.seg' (segments). + // This disallows directory separators or traversal characters. + // Note that matching strings URL encode to the same string; + // in Swift, the length restriction is *after* URL encoding. + return preg_match( '/^[a-z0-9][a-z0-9-_]{0,199}$/i', $container ); + } + + /** + * Splits a storage path into an internal container name, + * an internal relative file name, and a container shard suffix. + * Any shard suffix is already appended to the internal container name. + * This also checks that the storage path is valid and within this backend. + * + * If the container is sharded but a suffix could not be determined, + * this means that the path can only refer to a directory and can only + * be scanned by looking in all the container shards. + * + * @param $storagePath string + * @return Array (container, path, container suffix) or (null, null, null) if invalid + */ + final protected function resolveStoragePath( $storagePath ) { + list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath ); + if ( $backend === $this->name ) { // must be for this backend + $relPath = self::normalizeContainerPath( $relPath ); + if ( $relPath !== null ) { + // Get shard for the normalized path if this container is sharded + $cShard = $this->getContainerShard( $container, $relPath ); + // Validate and sanitize the relative path (backend-specific) + $relPath = $this->resolveContainerPath( $container, $relPath ); + if ( $relPath !== null ) { + // Prepend any wiki ID prefix to the container name + $container = $this->fullContainerName( $container ); + if ( self::isValidContainerName( $container ) ) { + // Validate and sanitize the container name (backend-specific) + $container = $this->resolveContainerName( "{$container}{$cShard}" ); + if ( $container !== null ) { + return array( $container, $relPath, $cShard ); + } + } + } + } + } + return array( null, null, null ); + } + + /** + * Like resolveStoragePath() except null values are returned if + * the container is sharded and the shard could not be determined. + * + * @see FileBackendStore::resolveStoragePath() + * + * @param $storagePath string + * @return Array (container, path) or (null, null) if invalid + */ + final protected function resolveStoragePathReal( $storagePath ) { + list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath ); + if ( $cShard !== null ) { + return array( $container, $relPath ); + } + return array( null, null ); + } + + /** + * Get the container name shard suffix for a given path. + * Any empty suffix means the container is not sharded. + * + * @param $container string Container name + * @param $relStoragePath string Storage path relative to the container + * @return string|null Returns null if shard could not be determined + */ + final protected function getContainerShard( $container, $relPath ) { + list( $levels, $base, $repeat ) = $this->getContainerHashLevels( $container ); + if ( $levels == 1 || $levels == 2 ) { + // Hash characters are either base 16 or 36 + $char = ( $base == 36 ) ? '[0-9a-z]' : '[0-9a-f]'; + // Get a regex that represents the shard portion of paths. + // The concatenation of the captures gives us the shard. + if ( $levels === 1 ) { // 16 or 36 shards per container + $hashDirRegex = '(' . $char . ')'; + } else { // 256 or 1296 shards per container + if ( $repeat ) { // verbose hash dir format (e.g. "a/ab/abc") + $hashDirRegex = $char . '/(' . $char . '{2})'; + } else { // short hash dir format (e.g. "a/b/c") + $hashDirRegex = '(' . $char . ')/(' . $char . ')'; + } + } + // Allow certain directories to be above the hash dirs so as + // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab"). + // They must be 2+ chars to avoid any hash directory ambiguity. + $m = array(); + if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) { + return '.' . implode( '', array_slice( $m, 1 ) ); + } + return null; // failed to match + } + return ''; // no sharding + } + + /** + * Get the sharding config for a container. + * If greater than 0, then all file storage paths within + * the container are required to be hashed accordingly. + * + * @param $container string + * @return Array (integer levels, integer base, repeat flag) or (0, 0, false) + */ + final protected function getContainerHashLevels( $container ) { + if ( isset( $this->shardViaHashLevels[$container] ) ) { + $config = $this->shardViaHashLevels[$container]; + $hashLevels = (int)$config['levels']; + if ( $hashLevels == 1 || $hashLevels == 2 ) { + $hashBase = (int)$config['base']; + if ( $hashBase == 16 || $hashBase == 36 ) { + return array( $hashLevels, $hashBase, $config['repeat'] ); + } + } + } + return array( 0, 0, false ); // no sharding + } + + /** + * Get a list of full container shard suffixes for a container + * + * @param $container string + * @return Array + */ + final protected function getContainerSuffixes( $container ) { + $shards = array(); + list( $digits, $base ) = $this->getContainerHashLevels( $container ); + if ( $digits > 0 ) { + $numShards = pow( $base, $digits ); + for ( $index = 0; $index < $numShards; $index++ ) { + $shards[] = '.' . wfBaseConvert( $index, 10, $base, $digits ); + } + } + return $shards; + } + + /** + * Get the full container name, including the wiki ID prefix + * + * @param $container string + * @return string + */ + final protected function fullContainerName( $container ) { + if ( $this->wikiId != '' ) { + return "{$this->wikiId}-$container"; + } else { + return $container; + } + } + + /** + * Resolve a container name, checking if it's allowed by the backend. + * This is intended for internal use, such as encoding illegal chars. + * Subclasses can override this to be more restrictive. + * + * @param $container string + * @return string|null + */ + protected function resolveContainerName( $container ) { + return $container; + } + + /** + * Resolve a relative storage path, checking if it's allowed by the backend. + * This is intended for internal use, such as encoding illegal chars or perhaps + * getting absolute paths (e.g. FS based backends). Note that the relative path + * may be the empty string (e.g. the path is simply to the container). + * + * @param $container string Container name + * @param $relStoragePath string Storage path relative to the container + * @return string|null Path or null if not valid + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + return $relStoragePath; + } +} + +/** + * FileBackendStore helper function to handle file listings that span container shards. + * Do not use this class from places outside of FileBackendStore. + * + * @ingroup FileBackend + */ +class FileBackendStoreShardListIterator implements Iterator { + /* @var FileBackendStore */ + protected $backend; + /* @var Array */ + protected $params; + /* @var Array */ + protected $shardSuffixes; + protected $container; // string + protected $directory; // string + + /* @var Traversable */ + protected $iter; + protected $curShard = 0; // integer + protected $pos = 0; // integer + + /** + * @param $backend FileBackendStore + * @param $container string Full storage container name + * @param $dir string Storage directory relative to container + * @param $suffixes Array List of container shard suffixes + * @param $params Array + */ + public function __construct( + FileBackendStore $backend, $container, $dir, array $suffixes, array $params + ) { + $this->backend = $backend; + $this->container = $container; + $this->directory = $dir; + $this->shardSuffixes = $suffixes; + $this->params = $params; + } + + public function current() { + if ( is_array( $this->iter ) ) { + return current( $this->iter ); + } else { + return $this->iter->current(); + } + } + + public function key() { + return $this->pos; + } + + public function next() { + ++$this->pos; + if ( is_array( $this->iter ) ) { + next( $this->iter ); + } else { + $this->iter->next(); + } + // Find the next non-empty shard if no elements are left + $this->nextShardIteratorIfNotValid(); + } + + /** + * If the iterator for this container shard is out of items, + * then move on to the next container that has items. + * If there are none, then it advances to the last container. + */ + protected function nextShardIteratorIfNotValid() { + while ( !$this->valid() ) { + if ( ++$this->curShard >= count( $this->shardSuffixes ) ) { + break; // no more container shards + } + $this->setIteratorFromCurrentShard(); + } + } + + protected function setIteratorFromCurrentShard() { + $suffix = $this->shardSuffixes[$this->curShard]; + $this->iter = $this->backend->getFileListInternal( + "{$this->container}{$suffix}", $this->directory, $this->params ); + } + + public function rewind() { + $this->pos = 0; + $this->curShard = 0; + $this->setIteratorFromCurrentShard(); + // Find the next non-empty shard if this one has no elements + $this->nextShardIteratorIfNotValid(); + } + + public function valid() { + if ( $this->iter == null ) { + return false; // some failure? + } elseif ( is_array( $this->iter ) ) { + return ( current( $this->iter ) !== false ); // no paths can have this value + } else { + return $this->iter->valid(); + } + } +} diff --git a/includes/filerepo/backend/FileBackendGroup.php b/includes/filerepo/backend/FileBackendGroup.php new file mode 100644 index 00000000..73815cfb --- /dev/null +++ b/includes/filerepo/backend/FileBackendGroup.php @@ -0,0 +1,156 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Class to handle file backend registration + * + * @ingroup FileBackend + * @since 1.19 + */ +class FileBackendGroup { + /** + * @var FileBackendGroup + */ + protected static $instance = null; + + /** @var Array (name => ('class' => string, 'config' => array, 'instance' => object)) */ + protected $backends = array(); + + protected function __construct() {} + protected function __clone() {} + + /** + * @return FileBackendGroup + */ + public static function singleton() { + if ( self::$instance == null ) { + self::$instance = new self(); + self::$instance->initFromGlobals(); + } + return self::$instance; + } + + /** + * Destroy the singleton instance + * + * @return void + */ + public static function destroySingleton() { + self::$instance = null; + } + + /** + * Register file backends from the global variables + * + * @return void + */ + protected function initFromGlobals() { + global $wgLocalFileRepo, $wgForeignFileRepos, $wgFileBackends; + + // Register explicitly defined backends + $this->register( $wgFileBackends ); + + $autoBackends = array(); + // Automatically create b/c backends for file repos... + $repos = array_merge( $wgForeignFileRepos, array( $wgLocalFileRepo ) ); + foreach ( $repos as $info ) { + $backendName = $info['backend']; + if ( is_object( $backendName ) || isset( $this->backends[$backendName] ) ) { + continue; // already defined (or set to the object for some reason) + } + $repoName = $info['name']; + // Local vars that used to be FSRepo members... + $directory = $info['directory']; + $deletedDir = isset( $info['deletedDir'] ) + ? $info['deletedDir'] + : false; // deletion disabled + $thumbDir = isset( $info['thumbDir'] ) + ? $info['thumbDir'] + : "{$directory}/thumb"; + $fileMode = isset( $info['fileMode'] ) + ? $info['fileMode'] + : 0644; + // Get the FS backend configuration + $autoBackends[] = array( + 'name' => $backendName, + 'class' => 'FSFileBackend', + 'lockManager' => 'fsLockManager', + 'containerPaths' => array( + "{$repoName}-public" => "{$directory}", + "{$repoName}-thumb" => $thumbDir, + "{$repoName}-deleted" => $deletedDir, + "{$repoName}-temp" => "{$directory}/temp" + ), + 'fileMode' => $fileMode, + ); + } + + // Register implicitly defined backends + $this->register( $autoBackends ); + } + + /** + * Register an array of file backend configurations + * + * @param $configs Array + * @return void + * @throws MWException + */ + protected function register( array $configs ) { + foreach ( $configs as $config ) { + if ( !isset( $config['name'] ) ) { + throw new MWException( "Cannot register a backend with no name." ); + } + $name = $config['name']; + if ( !isset( $config['class'] ) ) { + throw new MWException( "Cannot register backend `{$name}` with no class." ); + } + $class = $config['class']; + + unset( $config['class'] ); // backend won't need this + $this->backends[$name] = array( + 'class' => $class, + 'config' => $config, + 'instance' => null + ); + } + } + + /** + * Get the backend object with a given name + * + * @param $name string + * @return FileBackend + * @throws MWException + */ + public function get( $name ) { + if ( !isset( $this->backends[$name] ) ) { + throw new MWException( "No backend defined with the name `$name`." ); + } + // Lazy-load the actual backend instance + if ( !isset( $this->backends[$name]['instance'] ) ) { + $class = $this->backends[$name]['class']; + $config = $this->backends[$name]['config']; + $this->backends[$name]['instance'] = new $class( $config ); + } + return $this->backends[$name]['instance']; + } + + /** + * Get an appropriate backend object from a storage path + * + * @param $storagePath string + * @return FileBackend|null Backend or null on failure + */ + public function backendFromPath( $storagePath ) { + list( $backend, $c, $p ) = FileBackend::splitStoragePath( $storagePath ); + if ( $backend !== null && isset( $this->backends[$backend] ) ) { + return $this->get( $backend ); + } + return null; + } +} diff --git a/includes/filerepo/backend/FileBackendMultiWrite.php b/includes/filerepo/backend/FileBackendMultiWrite.php new file mode 100644 index 00000000..c0f1ac57 --- /dev/null +++ b/includes/filerepo/backend/FileBackendMultiWrite.php @@ -0,0 +1,420 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * This class defines a multi-write backend. Multiple backends can be + * registered to this proxy backend and it will act as a single backend. + * Use this when all access to those backends is through this proxy backend. + * At least one of the backends must be declared the "master" backend. + * + * Only use this class when transitioning from one storage system to another. + * + * Read operations are only done on the 'master' backend for consistency. + * Write operations are performed on all backends, in the order defined. + * If an operation fails on one backend it will be rolled back from the others. + * + * @ingroup FileBackend + * @since 1.19 + */ +class FileBackendMultiWrite extends FileBackend { + /** @var Array Prioritized list of FileBackendStore objects */ + protected $backends = array(); // array of (backend index => backends) + protected $masterIndex = -1; // integer; index of master backend + protected $syncChecks = 0; // integer bitfield + + /* Possible internal backend consistency checks */ + const CHECK_SIZE = 1; + const CHECK_TIME = 2; + + /** + * Construct a proxy backend that consists of several internal backends. + * Additional $config params include: + * 'backends' : Array of backend config and multi-backend settings. + * Each value is the config used in the constructor of a + * FileBackendStore class, but with these additional settings: + * 'class' : The name of the backend class + * 'isMultiMaster' : This must be set for one backend. + * 'syncChecks' : Integer bitfield of internal backend sync checks to perform. + * Possible bits include self::CHECK_SIZE and self::CHECK_TIME. + * The checks are done before allowing any file operations. + * @param $config Array + */ + public function __construct( array $config ) { + parent::__construct( $config ); + $namesUsed = array(); + // Construct backends here rather than via registration + // to keep these backends hidden from outside the proxy. + foreach ( $config['backends'] as $index => $config ) { + $name = $config['name']; + if ( isset( $namesUsed[$name] ) ) { // don't break FileOp predicates + throw new MWException( "Two or more backends defined with the name $name." ); + } + $namesUsed[$name] = 1; + if ( !isset( $config['class'] ) ) { + throw new MWException( 'No class given for a backend config.' ); + } + $class = $config['class']; + $this->backends[$index] = new $class( $config ); + if ( !empty( $config['isMultiMaster'] ) ) { + if ( $this->masterIndex >= 0 ) { + throw new MWException( 'More than one master backend defined.' ); + } + $this->masterIndex = $index; + } + } + if ( $this->masterIndex < 0 ) { // need backends and must have a master + throw new MWException( 'No master backend defined.' ); + } + $this->syncChecks = isset( $config['syncChecks'] ) + ? $config['syncChecks'] + : self::CHECK_SIZE; + } + + /** + * @see FileBackend::doOperationsInternal() + */ + final protected function doOperationsInternal( array $ops, array $opts ) { + $status = Status::newGood(); + + $performOps = array(); // list of FileOp objects + $filesRead = $filesChanged = array(); // storage paths used + // Build up a list of FileOps. The list will have all the ops + // for one backend, then all the ops for the next, and so on. + // These batches of ops are all part of a continuous array. + // Also build up a list of files read/changed... + foreach ( $this->backends as $index => $backend ) { + $backendOps = $this->substOpBatchPaths( $ops, $backend ); + // Add on the operation batch for this backend + $performOps = array_merge( $performOps, $backend->getOperations( $backendOps ) ); + if ( $index == 0 ) { // first batch + // Get the files used for these operations. Each backend has a batch of + // the same operations, so we only need to get them from the first batch. + foreach ( $performOps as $fileOp ) { + $filesRead = array_merge( $filesRead, $fileOp->storagePathsRead() ); + $filesChanged = array_merge( $filesChanged, $fileOp->storagePathsChanged() ); + } + // Get the paths under the proxy backend's name + $filesRead = $this->unsubstPaths( $filesRead ); + $filesChanged = $this->unsubstPaths( $filesChanged ); + } + } + + // Try to lock those files for the scope of this function... + if ( empty( $opts['nonLocking'] ) ) { + $filesLockSh = array_diff( $filesRead, $filesChanged ); // optimization + $filesLockEx = $filesChanged; + // Get a shared lock on the parent directory of each path changed + $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) ); + // Try to lock those files for the scope of this function... + $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status ); + $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + return $status; // abort + } + } + + // Clear any cache entries (after locks acquired) + $this->clearCache(); + + // Do a consistency check to see if the backends agree + if ( count( $this->backends ) > 1 ) { + $status->merge( $this->consistencyCheck( array_merge( $filesRead, $filesChanged ) ) ); + if ( !$status->isOK() ) { + return $status; // abort + } + } + + // Actually attempt the operation batch... + $subStatus = FileOp::attemptBatch( $performOps, $opts ); + + $success = array(); + $failCount = $successCount = 0; + // Make 'success', 'successCount', and 'failCount' fields reflect + // the overall operation, rather than all the batches for each backend. + // Do this by only using success values from the master backend's batch. + $batchStart = $this->masterIndex * count( $ops ); + $batchEnd = $batchStart + count( $ops ) - 1; + for ( $i = $batchStart; $i <= $batchEnd; $i++ ) { + if ( !isset( $subStatus->success[$i] ) ) { + break; // failed out before trying this op + } elseif ( $subStatus->success[$i] ) { + ++$successCount; + } else { + ++$failCount; + } + $success[] = $subStatus->success[$i]; + } + $subStatus->success = $success; + $subStatus->successCount = $successCount; + $subStatus->failCount = $failCount; + + // Merge errors into status fields + $status->merge( $subStatus ); + $status->success = $subStatus->success; // not done in merge() + + return $status; + } + + /** + * Check that a set of files are consistent across all internal backends + * + * @param $paths Array + * @return Status + */ + public function consistencyCheck( array $paths ) { + $status = Status::newGood(); + if ( $this->syncChecks == 0 ) { + return $status; // skip checks + } + + $mBackend = $this->backends[$this->masterIndex]; + foreach ( array_unique( $paths ) as $path ) { + $params = array( 'src' => $path, 'latest' => true ); + // Stat the file on the 'master' backend + $mStat = $mBackend->getFileStat( $this->substOpPaths( $params, $mBackend ) ); + // Check of all clone backends agree with the master... + foreach ( $this->backends as $index => $cBackend ) { + if ( $index === $this->masterIndex ) { + continue; // master + } + $cStat = $cBackend->getFileStat( $this->substOpPaths( $params, $cBackend ) ); + if ( $mStat ) { // file is in master + if ( !$cStat ) { // file should exist + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + if ( $this->syncChecks & self::CHECK_SIZE ) { + if ( $cStat['size'] != $mStat['size'] ) { // wrong size + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + } + if ( $this->syncChecks & self::CHECK_TIME ) { + $mTs = wfTimestamp( TS_UNIX, $mStat['mtime'] ); + $cTs = wfTimestamp( TS_UNIX, $cStat['mtime'] ); + if ( abs( $mTs - $cTs ) > 30 ) { // outdated file somewhere + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + } + } else { // file is not in master + if ( $cStat ) { // file should not exist + $status->fatal( 'backend-fail-synced', $path ); + } + } + } + } + + return $status; + } + + /** + * Substitute the backend name in storage path parameters + * for a set of operations with that of a given internal backend. + * + * @param $ops Array List of file operation arrays + * @param $backend FileBackendStore + * @return Array + */ + protected function substOpBatchPaths( array $ops, FileBackendStore $backend ) { + $newOps = array(); // operations + foreach ( $ops as $op ) { + $newOp = $op; // operation + foreach ( array( 'src', 'srcs', 'dst', 'dir' ) as $par ) { + if ( isset( $newOp[$par] ) ) { // string or array + $newOp[$par] = $this->substPaths( $newOp[$par], $backend ); + } + } + $newOps[] = $newOp; + } + return $newOps; + } + + /** + * Same as substOpBatchPaths() but for a single operation + * + * @param $op File operation array + * @param $backend FileBackendStore + * @return Array + */ + protected function substOpPaths( array $ops, FileBackendStore $backend ) { + $newOps = $this->substOpBatchPaths( array( $ops ), $backend ); + return $newOps[0]; + } + + /** + * Substitute the backend of storage paths with an internal backend's name + * + * @param $paths Array|string List of paths or single string path + * @param $backend FileBackendStore + * @return Array|string + */ + protected function substPaths( $paths, FileBackendStore $backend ) { + return preg_replace( + '!^mwstore://' . preg_quote( $this->name ) . '/!', + StringUtils::escapeRegexReplacement( "mwstore://{$backend->getName()}/" ), + $paths // string or array + ); + } + + /** + * Substitute the backend of internal storage paths with the proxy backend's name + * + * @param $paths Array|string List of paths or single string path + * @return Array|string + */ + protected function unsubstPaths( $paths ) { + return preg_replace( + '!^mwstore://([^/]+)!', + StringUtils::escapeRegexReplacement( "mwstore://{$this->name}" ), + $paths // string or array + ); + } + + /** + * @see FileBackend::doPrepare() + */ + public function doPrepare( array $params ) { + $status = Status::newGood(); + foreach ( $this->backends as $backend ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doPrepare( $realParams ) ); + } + return $status; + } + + /** + * @see FileBackend::doSecure() + */ + public function doSecure( array $params ) { + $status = Status::newGood(); + foreach ( $this->backends as $backend ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doSecure( $realParams ) ); + } + return $status; + } + + /** + * @see FileBackend::doClean() + */ + public function doClean( array $params ) { + $status = Status::newGood(); + foreach ( $this->backends as $backend ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doClean( $realParams ) ); + } + return $status; + } + + /** + * @see FileBackend::getFileList() + */ + public function concatenate( array $params ) { + // We are writing to an FS file, so we don't need to do this per-backend + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->concatenate( $realParams ); + } + + /** + * @see FileBackend::fileExists() + */ + public function fileExists( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->fileExists( $realParams ); + } + + /** + * @see FileBackend::getFileTimestamp() + */ + public function getFileTimestamp( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileTimestamp( $realParams ); + } + + /** + * @see FileBackend::getFileSize() + */ + public function getFileSize( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileSize( $realParams ); + } + + /** + * @see FileBackend::getFileStat() + */ + public function getFileStat( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileStat( $realParams ); + } + + /** + * @see FileBackend::getFileContents() + */ + public function getFileContents( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileContents( $realParams ); + } + + /** + * @see FileBackend::getFileSha1Base36() + */ + public function getFileSha1Base36( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileSha1Base36( $realParams ); + } + + /** + * @see FileBackend::getFileProps() + */ + public function getFileProps( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileProps( $realParams ); + } + + /** + * @see FileBackend::streamFile() + */ + public function streamFile( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->streamFile( $realParams ); + } + + /** + * @see FileBackend::getLocalReference() + */ + public function getLocalReference( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getLocalReference( $realParams ); + } + + /** + * @see FileBackend::getLocalCopy() + */ + public function getLocalCopy( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getLocalCopy( $realParams ); + } + + /** + * @see FileBackend::getFileList() + */ + public function getFileList( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileList( $realParams ); + } + + /** + * @see FileBackend::clearCache() + */ + public function clearCache( array $paths = null ) { + foreach ( $this->backends as $backend ) { + $realPaths = is_array( $paths ) ? $this->substPaths( $paths, $backend ) : null; + $backend->clearCache( $realPaths ); + } + } +} diff --git a/includes/filerepo/backend/FileOp.php b/includes/filerepo/backend/FileOp.php new file mode 100644 index 00000000..5844c9f2 --- /dev/null +++ b/includes/filerepo/backend/FileOp.php @@ -0,0 +1,697 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Helper class for representing operations with transaction support. + * Do not use this class from places outside FileBackend. + * + * Methods called from attemptBatch() should avoid throwing exceptions at all costs. + * FileOp objects should be lightweight in order to support large arrays in memory. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileOp { + /** @var Array */ + protected $params = array(); + /** @var FileBackendStore */ + protected $backend; + + protected $state = self::STATE_NEW; // integer + protected $failed = false; // boolean + protected $useLatest = true; // boolean + + protected $sourceSha1; // string + protected $destSameAsSource; // boolean + + /* Object life-cycle */ + const STATE_NEW = 1; + const STATE_CHECKED = 2; + const STATE_ATTEMPTED = 3; + + /* Timeout related parameters */ + const MAX_BATCH_SIZE = 1000; + const TIME_LIMIT_SEC = 300; // 5 minutes + + /** + * Build a new file operation transaction + * + * @params $backend FileBackendStore + * @params $params Array + * @throws MWException + */ + final public function __construct( FileBackendStore $backend, array $params ) { + $this->backend = $backend; + list( $required, $optional ) = $this->allowedParams(); + foreach ( $required as $name ) { + if ( isset( $params[$name] ) ) { + $this->params[$name] = $params[$name]; + } else { + throw new MWException( "File operation missing parameter '$name'." ); + } + } + foreach ( $optional as $name ) { + if ( isset( $params[$name] ) ) { + $this->params[$name] = $params[$name]; + } + } + $this->params = $params; + } + + /** + * Allow stale data for file reads and existence checks + * + * @return void + */ + final protected function allowStaleReads() { + $this->useLatest = false; + } + + /** + * Attempt a series of file operations. + * Callers are responsible for handling file locking. + * + * $opts is an array of options, including: + * 'force' : Errors that would normally cause a rollback do not. + * The remaining operations are still attempted if any fail. + * 'allowStale' : Don't require the latest available data. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * + * The resulting Status will be "OK" unless: + * a) unexpected operation errors occurred (network partitions, disk full...) + * b) significant operation errors occured and 'force' was not set + * + * @param $performOps Array List of FileOp operations + * @param $opts Array Batch operation options + * @return Status + */ + final public static function attemptBatch( array $performOps, array $opts ) { + $status = Status::newGood(); + + $allowStale = !empty( $opts['allowStale'] ); + $ignoreErrors = !empty( $opts['force'] ); + + $n = count( $performOps ); + if ( $n > self::MAX_BATCH_SIZE ) { + $status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE ); + return $status; + } + + $predicates = FileOp::newPredicates(); // account for previous op in prechecks + // Do pre-checks for each operation; abort on failure... + foreach ( $performOps as $index => $fileOp ) { + if ( $allowStale ) { + $fileOp->allowStaleReads(); // allow potentially stale reads + } + $subStatus = $fileOp->precheck( $predicates ); + $status->merge( $subStatus ); + if ( !$subStatus->isOK() ) { // operation failed? + $status->success[$index] = false; + ++$status->failCount; + if ( !$ignoreErrors ) { + return $status; // abort + } + } + } + + if ( $ignoreErrors ) { + # Treat all precheck() fatals as merely warnings + $status->setResult( true, $status->value ); + } + + // Restart PHP's execution timer and set the timeout to safe amount. + // This handles cases where the operations take a long time or where we are + // already running low on time left. The old timeout is restored afterwards. + # @TODO: re-enable this for when the number of batches is high. + #$scopedTimeLimit = new FileOpScopedPHPTimeout( self::TIME_LIMIT_SEC ); + + // Attempt each operation... + foreach ( $performOps as $index => $fileOp ) { + if ( $fileOp->failed() ) { + continue; // nothing to do + } + $subStatus = $fileOp->attempt(); + $status->merge( $subStatus ); + if ( $subStatus->isOK() ) { + $status->success[$index] = true; + ++$status->successCount; + } else { + $status->success[$index] = false; + ++$status->failCount; + // We can't continue (even with $ignoreErrors) as $predicates is wrong. + // Log the remaining ops as failed for recovery... + for ( $i = ($index + 1); $i < count( $performOps ); $i++ ) { + $performOps[$i]->logFailure( 'attempt_aborted' ); + } + return $status; // bail out + } + } + + return $status; + } + + /** + * Get the value of the parameter with the given name + * + * @param $name string + * @return mixed Returns null if the parameter is not set + */ + final public function getParam( $name ) { + return isset( $this->params[$name] ) ? $this->params[$name] : null; + } + + /** + * Check if this operation failed precheck() or attempt() + * + * @return bool + */ + final public function failed() { + return $this->failed; + } + + /** + * Get a new empty predicates array for precheck() + * + * @return Array + */ + final public static function newPredicates() { + return array( 'exists' => array(), 'sha1' => array() ); + } + + /** + * Check preconditions of the operation without writing anything + * + * @param $predicates Array + * @return Status + */ + final public function precheck( array &$predicates ) { + if ( $this->state !== self::STATE_NEW ) { + return Status::newFatal( 'fileop-fail-state', self::STATE_NEW, $this->state ); + } + $this->state = self::STATE_CHECKED; + $status = $this->doPrecheck( $predicates ); + if ( !$status->isOK() ) { + $this->failed = true; + } + return $status; + } + + /** + * Attempt the operation, backing up files as needed; this must be reversible + * + * @return Status + */ + final public function attempt() { + if ( $this->state !== self::STATE_CHECKED ) { + return Status::newFatal( 'fileop-fail-state', self::STATE_CHECKED, $this->state ); + } elseif ( $this->failed ) { // failed precheck + return Status::newFatal( 'fileop-fail-attempt-precheck' ); + } + $this->state = self::STATE_ATTEMPTED; + $status = $this->doAttempt(); + if ( !$status->isOK() ) { + $this->failed = true; + $this->logFailure( 'attempt' ); + } + return $status; + } + + /** + * Get the file operation parameters + * + * @return Array (required params list, optional params list) + */ + protected function allowedParams() { + return array( array(), array() ); + } + + /** + * Get a list of storage paths read from for this operation + * + * @return Array + */ + public function storagePathsRead() { + return array(); + } + + /** + * Get a list of storage paths written to for this operation + * + * @return Array + */ + public function storagePathsChanged() { + return array(); + } + + /** + * @return Status + */ + protected function doPrecheck( array &$predicates ) { + return Status::newGood(); + } + + /** + * @return Status + */ + protected function doAttempt() { + return Status::newGood(); + } + + /** + * Check for errors with regards to the destination file already existing. + * This also updates the destSameAsSource and sourceSha1 member variables. + * A bad status will be returned if there is no chance it can be overwritten. + * + * @param $predicates Array + * @return Status + */ + protected function precheckDestExistence( array $predicates ) { + $status = Status::newGood(); + // Get hash of source file/string and the destination file + $this->sourceSha1 = $this->getSourceSha1Base36(); // FS file or data string + if ( $this->sourceSha1 === null ) { // file in storage? + $this->sourceSha1 = $this->fileSha1( $this->params['src'], $predicates ); + } + $this->destSameAsSource = false; + if ( $this->fileExists( $this->params['dst'], $predicates ) ) { + if ( $this->getParam( 'overwrite' ) ) { + return $status; // OK + } elseif ( $this->getParam( 'overwriteSame' ) ) { + $dhash = $this->fileSha1( $this->params['dst'], $predicates ); + // Check if hashes are valid and match each other... + if ( !strlen( $this->sourceSha1 ) || !strlen( $dhash ) ) { + $status->fatal( 'backend-fail-hashes' ); + } elseif ( $this->sourceSha1 !== $dhash ) { + // Give an error if the files are not identical + $status->fatal( 'backend-fail-notsame', $this->params['dst'] ); + } else { + $this->destSameAsSource = true; // OK + } + return $status; // do nothing; either OK or bad status + } else { + $status->fatal( 'backend-fail-alreadyexists', $this->params['dst'] ); + return $status; + } + } + return $status; + } + + /** + * precheckDestExistence() helper function to get the source file SHA-1. + * Subclasses should overwride this iff the source is not in storage. + * + * @return string|false Returns false on failure + */ + protected function getSourceSha1Base36() { + return null; // N/A + } + + /** + * Check if a file will exist in storage when this operation is attempted + * + * @param $source string Storage path + * @param $predicates Array + * @return bool + */ + final protected function fileExists( $source, array $predicates ) { + if ( isset( $predicates['exists'][$source] ) ) { + return $predicates['exists'][$source]; // previous op assures this + } else { + $params = array( 'src' => $source, 'latest' => $this->useLatest ); + return $this->backend->fileExists( $params ); + } + } + + /** + * Get the SHA-1 of a file in storage when this operation is attempted + * + * @param $source string Storage path + * @param $predicates Array + * @return string|false + */ + final protected function fileSha1( $source, array $predicates ) { + if ( isset( $predicates['sha1'][$source] ) ) { + return $predicates['sha1'][$source]; // previous op assures this + } else { + $params = array( 'src' => $source, 'latest' => $this->useLatest ); + return $this->backend->getFileSha1Base36( $params ); + } + } + + /** + * Log a file operation failure and preserve any temp files + * + * @param $action string + * @return void + */ + final protected function logFailure( $action ) { + $params = $this->params; + $params['failedAction'] = $action; + try { + wfDebugLog( 'FileOperation', + get_class( $this ) . ' failed:' . serialize( $params ) ); + } catch ( Exception $e ) { + // bad config? debug log error? + } + } +} + +/** + * FileOp helper class to expand PHP execution time for a function. + * On construction, set_time_limit() is called and set to $seconds. + * When the object goes out of scope, the timer is restarted, with + * the original time limit minus the time the object existed. + */ +class FileOpScopedPHPTimeout { + protected $startTime; // float; seconds + protected $oldTimeout; // integer; seconds + + protected static $stackDepth = 0; // integer + protected static $totalCalls = 0; // integer + protected static $totalElapsed = 0; // float; seconds + + /* Prevent callers in infinite loops from running forever */ + const MAX_TOTAL_CALLS = 1000000; + const MAX_TOTAL_TIME = 300; // seconds + + /** + * @param $seconds integer + */ + public function __construct( $seconds ) { + if ( ini_get( 'max_execution_time' ) > 0 ) { // CLI uses 0 + if ( self::$totalCalls >= self::MAX_TOTAL_CALLS ) { + trigger_error( "Maximum invocations of " . __CLASS__ . " exceeded." ); + } elseif ( self::$totalElapsed >= self::MAX_TOTAL_TIME ) { + trigger_error( "Time limit within invocations of " . __CLASS__ . " exceeded." ); + } elseif ( self::$stackDepth > 0 ) { // recursion guard + trigger_error( "Resursive invocation of " . __CLASS__ . " attempted." ); + } else { + $this->oldTimeout = ini_set( 'max_execution_time', $seconds ); + $this->startTime = microtime( true ); + ++self::$stackDepth; + ++self::$totalCalls; // proof against < 1us scopes + } + } + } + + /** + * Restore the original timeout. + * This does not account for the timer value on __construct(). + */ + public function __destruct() { + if ( $this->oldTimeout ) { + $elapsed = microtime( true ) - $this->startTime; + // Note: a limit of 0 is treated as "forever" + set_time_limit( max( 1, $this->oldTimeout - (int)$elapsed ) ); + // If each scoped timeout is for less than one second, we end up + // restoring the original timeout without any decrease in value. + // Thus web scripts in an infinite loop can run forever unless we + // take some measures to prevent this. Track total time and calls. + self::$totalElapsed += $elapsed; + --self::$stackDepth; + } + } +} + +/** + * Store a file into the backend from a file on the file system. + * Parameters similar to FileBackendStore::storeInternal(), which include: + * src : source path on file system + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class StoreFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists on the file system + if ( !is_file( $this->params['src'] ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if the source file is too big + } elseif ( filesize( $this->params['src'] ) > $this->backend->maxFileSizeInternal() ) { + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Store the file at the destination + if ( !$this->destSameAsSource ) { + $status->merge( $this->backend->storeInternal( $this->params ) ); + } + return $status; + } + + protected function getSourceSha1Base36() { + wfSuppressWarnings(); + $hash = sha1_file( $this->params['src'] ); + wfRestoreWarnings(); + if ( $hash !== false ) { + $hash = wfBaseConvert( $hash, 16, 36, 31 ); + } + return $hash; + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Create a file in the backend with the given content. + * Parameters similar to FileBackendStore::createInternal(), which include: + * content : the raw file contents + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class CreateFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'content', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source data is too big + if ( strlen( $this->getParam( 'content' ) ) > $this->backend->maxFileSizeInternal() ) { + $status->fatal( 'backend-fail-create', $this->params['dst'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-create', $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Create the file at the destination + if ( !$this->destSameAsSource ) { + $status->merge( $this->backend->createInternal( $this->params ) ); + } + return $status; + } + + protected function getSourceSha1Base36() { + return wfBaseConvert( sha1( $this->params['content'] ), 16, 36, 31 ); + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Copy a file from one storage path to another in the backend. + * Parameters similar to FileBackendStore::copyInternal(), which include: + * src : source storage path + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class CopyFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-copy', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Do nothing if the src/dst paths are the same + if ( $this->params['src'] !== $this->params['dst'] ) { + // Copy the file into the destination + if ( !$this->destSameAsSource ) { + $status->merge( $this->backend->copyInternal( $this->params ) ); + } + } + return $status; + } + + public function storagePathsRead() { + return array( $this->params['src'] ); + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Move a file from one storage path to another in the backend. + * Parameters similar to FileBackendStore::moveInternal(), which include: + * src : source storage path + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class MoveFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-move', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Do nothing if the src/dst paths are the same + if ( $this->params['src'] !== $this->params['dst'] ) { + if ( !$this->destSameAsSource ) { + // Move the file into the destination + $status->merge( $this->backend->moveInternal( $this->params ) ); + } else { + // Just delete source as the destination needs no changes + $params = array( 'src' => $this->params['src'] ); + $status->merge( $this->backend->deleteInternal( $params ) ); + } + } + return $status; + } + + public function storagePathsRead() { + return array( $this->params['src'] ); + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Delete a file at the given storage path from the backend. + * Parameters similar to FileBackendStore::deleteInternal(), which include: + * src : source storage path + * ignoreMissingSource : don't return an error if the file does not exist + */ +class DeleteFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src' ), array( 'ignoreMissingSource' ) ); + } + + protected $needsDelete = true; + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + if ( !$this->getParam( 'ignoreMissingSource' ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + } + $this->needsDelete = false; + } + // Update file existence predicates + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + if ( $this->needsDelete ) { + // Delete the source file + $status->merge( $this->backend->deleteInternal( $this->params ) ); + } + return $status; + } + + public function storagePathsChanged() { + return array( $this->params['src'] ); + } +} + +/** + * Placeholder operation that has no params and does nothing + */ +class NullFileOp extends FileOp {} diff --git a/includes/filerepo/backend/SwiftFileBackend.php b/includes/filerepo/backend/SwiftFileBackend.php new file mode 100644 index 00000000..a287f488 --- /dev/null +++ b/includes/filerepo/backend/SwiftFileBackend.php @@ -0,0 +1,877 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Russ Nelson + * @author Aaron Schulz + */ + +/** + * Class for an OpenStack Swift based file backend. + * + * This requires the SwiftCloudFiles MediaWiki extension, which includes + * the php-cloudfiles library (https://github.com/rackspace/php-cloudfiles). + * php-cloudfiles requires the curl, fileinfo, and mb_string PHP extensions. + * + * Status messages should avoid mentioning the Swift account name. + * Likewise, error suppression should be used to avoid path disclosure. + * + * @ingroup FileBackend + * @since 1.19 + */ +class SwiftFileBackend extends FileBackendStore { + /** @var CF_Authentication */ + protected $auth; // Swift authentication handler + protected $authTTL; // integer seconds + protected $swiftAnonUser; // string; username to handle unauthenticated requests + protected $maxContCacheSize = 100; // integer; max containers with entries + + /** @var CF_Connection */ + protected $conn; // Swift connection handle + protected $connStarted = 0; // integer UNIX timestamp + protected $connContainers = array(); // container object cache + + /** + * @see FileBackendStore::__construct() + * Additional $config params include: + * swiftAuthUrl : Swift authentication server URL + * swiftUser : Swift user used by MediaWiki (account:username) + * swiftKey : Swift authentication key for the above user + * swiftAuthTTL : Swift authentication TTL (seconds) + * swiftAnonUser : Swift user used for end-user requests (account:username) + * shardViaHashLevels : Map of container names to sharding config with: + * 'base' : base of hash characters, 16 or 36 + * 'levels' : the number of hash levels (and digits) + * 'repeat' : hash subdirectories are prefixed with all the + * parent hash directory names (e.g. "a/ab/abc") + */ + public function __construct( array $config ) { + parent::__construct( $config ); + // Required settings + $this->auth = new CF_Authentication( + $config['swiftUser'], + $config['swiftKey'], + null, // account; unused + $config['swiftAuthUrl'] + ); + // Optional settings + $this->authTTL = isset( $config['swiftAuthTTL'] ) + ? $config['swiftAuthTTL'] + : 120; // some sane number + $this->swiftAnonUser = isset( $config['swiftAnonUser'] ) + ? $config['swiftAnonUser'] + : ''; + $this->shardViaHashLevels = isset( $config['shardViaHashLevels'] ) + ? $config['shardViaHashLevels'] + : ''; + } + + /** + * @see FileBackendStore::resolveContainerPath() + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + if ( strlen( urlencode( $relStoragePath ) ) > 1024 ) { + return null; // too long for Swift + } + return $relStoragePath; + } + + /** + * @see FileBackendStore::isPathUsableInternal() + */ + public function isPathUsableInternal( $storagePath ) { + list( $container, $rel ) = $this->resolveStoragePathReal( $storagePath ); + if ( $rel === null ) { + return false; // invalid + } + + try { + $this->getContainer( $container ); + return true; // container exists + } catch ( NoSuchContainerException $e ) { + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, array( 'path' => $storagePath ) ); + } + + return false; + } + + /** + * @see FileBackendStore::doCreateInternal() + */ + protected function doCreateInternal( array $params ) { + $status = Status::newGood(); + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the destination container and object + try { + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Get a SHA-1 hash of the object + $sha1Hash = wfBaseConvert( sha1( $params['content'] ), 16, 36, 31 ); + + // (c) Actually create the object + try { + // Create a fresh CF_Object with no fields preloaded. + // We don't want to preserve headers, metadata, and such. + $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + // Note: metadata keys stored as [Upper case char][[Lower case char]...] + $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + // Manually set the ETag (https://github.com/rackspace/php-cloudfiles/issues/59). + // The MD5 here will be checked within Swift against its own MD5. + $obj->set_etag( md5( $params['content'] ) ); + // Use the same content type as StreamFile for security + $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); + // Actually write the object in Swift + $obj->write( $params['content'] ); + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doStoreInternal() + */ + protected function doStoreInternal( array $params ) { + $status = Status::newGood(); + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the destination container and object + try { + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Get a SHA-1 hash of the object + $sha1Hash = sha1_file( $params['src'] ); + if ( $sha1Hash === false ) { // source doesn't exist? + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } + $sha1Hash = wfBaseConvert( $sha1Hash, 16, 36, 31 ); + + // (c) Actually store the object + try { + // Create a fresh CF_Object with no fields preloaded. + // We don't want to preserve headers, metadata, and such. + $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + // Note: metadata keys stored as [Upper case char][[Lower case char]...] + $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + // The MD5 here will be checked within Swift against its own MD5. + $obj->set_etag( md5_file( $params['src'] ) ); + // Use the same content type as StreamFile for security + $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); + // Actually write the object in Swift + $obj->load_from_filename( $params['src'], True ); // calls $obj->write() + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } catch ( IOException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doCopyInternal() + */ + protected function doCopyInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the source/destination containers and destination object + try { + $sContObj = $this->getContainer( $srcCont ); + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Actually copy the file to the destination + try { + $sContObj->copy_object_to( $srcRel, $dContObj, $dstRel ); + } catch ( NoSuchObjectException $e ) { // source object does not exist + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doDeleteInternal() + */ + protected function doDeleteInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + try { + $sContObj = $this->getContainer( $srcCont ); + $sContObj->delete_object( $srcRel ); + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } catch ( NoSuchObjectException $e ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doPrepareInternal() + */ + protected function doPrepareInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + // (a) Check if container already exists + try { + $contObj = $this->getContainer( $fullCont ); + // NoSuchContainerException not thrown: container must exist + return $status; // already exists + } catch ( NoSuchContainerException $e ) { + // NoSuchContainerException thrown: container does not exist + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Create container as needed + try { + $contObj = $this->createContainer( $fullCont ); + if ( $this->swiftAnonUser != '' ) { + // Make container public to end-users... + $status->merge( $this->setContainerAccess( + $contObj, + array( $this->auth->username, $this->swiftAnonUser ), // read + array( $this->auth->username ) // write + ) ); + } + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + return $status; + } + + /** + * @see FileBackendStore::doSecureInternal() + */ + protected function doSecureInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + if ( $this->swiftAnonUser != '' ) { + // Restrict container from end-users... + try { + // doPrepareInternal() should have been called, + // so the Swift container should already exist... + $contObj = $this->getContainer( $fullCont ); // normally a cache hit + // NoSuchContainerException not thrown: container must exist + if ( !isset( $contObj->mw_wasSecured ) ) { + $status->merge( $this->setContainerAccess( + $contObj, + array( $this->auth->username ), // read + array( $this->auth->username ) // write + ) ); + // @TODO: when php-cloudfiles supports container + // metadata, we can make use of that to avoid RTTs + $contObj->mw_wasSecured = true; // avoid useless RTTs + } + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + } + + return $status; + } + + /** + * @see FileBackendStore::doCleanInternal() + */ + protected function doCleanInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + // Only containers themselves can be removed, all else is virtual + if ( $dir != '' ) { + return $status; // nothing to do + } + + // (a) Check the container + try { + $contObj = $this->getContainer( $fullCont, true ); + } catch ( NoSuchContainerException $e ) { + return $status; // ok, nothing to do + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Delete the container if empty + if ( $contObj->object_count == 0 ) { + try { + $this->deleteContainer( $fullCont ); + } catch ( NoSuchContainerException $e ) { + return $status; // race? + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + } + + return $status; + } + + /** + * @see FileBackendStore::doFileExists() + */ + protected function doGetFileStat( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return false; // invalid storage path + } + + $stat = false; + try { + $contObj = $this->getContainer( $srcCont ); + $srcObj = $contObj->get_object( $srcRel, $this->headersFromParams( $params ) ); + $this->addMissingMetadata( $srcObj, $params['src'] ); + $stat = array( + // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW + 'mtime' => wfTimestamp( TS_MW, $srcObj->last_modified ), + 'size' => $srcObj->content_length, + 'sha1' => $srcObj->metadata['Sha1base36'] + ); + } catch ( NoSuchContainerException $e ) { + } catch ( NoSuchObjectException $e ) { + } catch ( InvalidResponseException $e ) { + $stat = null; + } catch ( Exception $e ) { // some other exception? + $stat = null; + $this->logException( $e, __METHOD__, $params ); + } + + return $stat; + } + + /** + * Fill in any missing object metadata and save it to Swift + * + * @param $obj CF_Object + * @param $path string Storage path to object + * @return bool Success + * @throws Exception cloudfiles exceptions + */ + protected function addMissingMetadata( CF_Object $obj, $path ) { + if ( isset( $obj->metadata['Sha1base36'] ) ) { + return true; // nothing to do + } + $status = Status::newGood(); + $scopeLockS = $this->getScopedFileLocks( array( $path ), LockManager::LOCK_UW, $status ); + if ( $status->isOK() ) { + $tmpFile = $this->getLocalCopy( array( 'src' => $path, 'latest' => 1 ) ); + if ( $tmpFile ) { + $hash = $tmpFile->getSha1Base36(); + if ( $hash !== false ) { + $obj->metadata['Sha1base36'] = $hash; + $obj->sync_metadata(); // save to Swift + return true; // success + } + } + } + $obj->metadata['Sha1base36'] = false; + return false; // failed + } + + /** + * @see FileBackend::getFileContents() + */ + public function getFileContents( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return false; // invalid storage path + } + + if ( !$this->fileExists( $params ) ) { + return null; + } + + $data = false; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD request + $data = $obj->read( $this->headersFromParams( $params ) ); + } catch ( NoSuchContainerException $e ) { + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, $params ); + } + + return $data; + } + + /** + * @see FileBackendStore::getFileListInternal() + */ + public function getFileListInternal( $fullCont, $dir, array $params ) { + return new SwiftFileBackendFileList( $this, $fullCont, $dir ); + } + + /** + * Do not call this function outside of SwiftFileBackendFileList + * + * @param $fullCont string Resolved container name + * @param $dir string Resolved storage directory with no trailing slash + * @param $after string Storage path of file to list items after + * @param $limit integer Max number of items to list + * @return Array + */ + public function getFileListPageInternal( $fullCont, $dir, $after, $limit ) { + $files = array(); + + try { + $container = $this->getContainer( $fullCont ); + $prefix = ( $dir == '' ) ? null : "{$dir}/"; + $files = $container->list_objects( $limit, $after, $prefix ); + } catch ( NoSuchContainerException $e ) { + } catch ( NoSuchObjectException $e ) { + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); + } + + return $files; + } + + /** + * @see FileBackendStore::doGetFileSha1base36() + */ + public function doGetFileSha1base36( array $params ) { + $stat = $this->getFileStat( $params ); + if ( $stat ) { + return $stat['sha1']; + } else { + return false; + } + } + + /** + * @see FileBackendStore::doStreamFile() + */ + protected function doStreamFile( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + } + + try { + $cont = $this->getContainer( $srcCont ); + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-stream', $params['src'] ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + try { + $output = fopen( 'php://output', 'wb' ); + $obj = new CF_Object( $cont, $srcRel, false, false ); // skip HEAD request + $obj->stream( $output, $this->headersFromParams( $params ) ); + } catch ( InvalidResponseException $e ) { // 404? connection problem? + $status->fatal( 'backend-fail-stream', $params['src'] ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-stream', $params['src'] ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::getLocalCopy() + */ + public function getLocalCopy( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return null; + } + + if ( !$this->fileExists( $params ) ) { + return null; + } + + $tmpFile = null; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + // Get source file extension + $ext = FileBackend::extensionFromPath( $srcRel ); + // Create a new temporary file... + $tmpFile = TempFSFile::factory( wfBaseName( $srcRel ) . '_', $ext ); + if ( $tmpFile ) { + $handle = fopen( $tmpFile->getPath(), 'wb' ); + if ( $handle ) { + $obj->stream( $handle, $this->headersFromParams( $params ) ); + fclose( $handle ); + } else { + $tmpFile = null; // couldn't open temp file + } + } + } catch ( NoSuchContainerException $e ) { + $tmpFile = null; + } catch ( InvalidResponseException $e ) { + $tmpFile = null; + } catch ( Exception $e ) { // some other exception? + $tmpFile = null; + $this->logException( $e, __METHOD__, $params ); + } + + return $tmpFile; + } + + /** + * Get headers to send to Swift when reading a file based + * on a FileBackend params array, e.g. that of getLocalCopy(). + * $params is currently only checked for a 'latest' flag. + * + * @param $params Array + * @return Array + */ + protected function headersFromParams( array $params ) { + $hdrs = array(); + if ( !empty( $params['latest'] ) ) { + $hdrs[] = 'X-Newest: true'; + } + return $hdrs; + } + + /** + * Set read/write permissions for a Swift container + * + * @param $contObj CF_Container Swift container + * @param $readGrps Array Swift users who can read (account:user) + * @param $writeGrps Array Swift users who can write (account:user) + * @return Status + */ + protected function setContainerAccess( + CF_Container $contObj, array $readGrps, array $writeGrps + ) { + $creds = $contObj->cfs_auth->export_credentials(); + + $url = $creds['storage_url'] . '/' . rawurlencode( $contObj->name ); + + // Note: 10 second timeout consistent with php-cloudfiles + $req = new CurlHttpRequest( $url, array( 'method' => 'POST', 'timeout' => 10 ) ); + $req->setHeader( 'X-Auth-Token', $creds['auth_token'] ); + $req->setHeader( 'X-Container-Read', implode( ',', $readGrps ) ); + $req->setHeader( 'X-Container-Write', implode( ',', $writeGrps ) ); + + return $req->execute(); // should return 204 + } + + /** + * Get a connection to the Swift proxy + * + * @return CF_Connection|false + * @throws InvalidResponseException + */ + protected function getConnection() { + if ( $this->conn === false ) { + throw new InvalidResponseException; // failed last attempt + } + // Session keys expire after a while, so we renew them periodically + if ( $this->conn && ( time() - $this->connStarted ) > $this->authTTL ) { + $this->conn->close(); // close active cURL connections + $this->conn = null; + } + // Authenticate with proxy and get a session key... + if ( $this->conn === null ) { + $this->connContainers = array(); + try { + $this->auth->authenticate(); + $this->conn = new CF_Connection( $this->auth ); + $this->connStarted = time(); + } catch ( AuthenticationException $e ) { + $this->conn = false; // don't keep re-trying + } catch ( InvalidResponseException $e ) { + $this->conn = false; // don't keep re-trying + } + } + if ( !$this->conn ) { + throw new InvalidResponseException; // auth/connection problem + } + return $this->conn; + } + + /** + * @see FileBackendStore::doClearCache() + */ + protected function doClearCache( array $paths = null ) { + $this->connContainers = array(); // clear container object cache + } + + /** + * Get a Swift container object, possibly from process cache. + * Use $reCache if the file count or byte count is needed. + * + * @param $container string Container name + * @param $reCache bool Refresh the process cache + * @return CF_Container + */ + protected function getContainer( $container, $reCache = false ) { + $conn = $this->getConnection(); // Swift proxy connection + if ( $reCache ) { + unset( $this->connContainers[$container] ); // purge cache + } + if ( !isset( $this->connContainers[$container] ) ) { + $contObj = $conn->get_container( $container ); + // NoSuchContainerException not thrown: container must exist + if ( count( $this->connContainers ) >= $this->maxContCacheSize ) { // trim cache? + reset( $this->connContainers ); + $key = key( $this->connContainers ); + unset( $this->connContainers[$key] ); + } + $this->connContainers[$container] = $contObj; // cache it + } + return $this->connContainers[$container]; + } + + /** + * Create a Swift container + * + * @param $container string Container name + * @return CF_Container + */ + protected function createContainer( $container ) { + $conn = $this->getConnection(); // Swift proxy connection + $contObj = $conn->create_container( $container ); + $this->connContainers[$container] = $contObj; // cache it + return $contObj; + } + + /** + * Delete a Swift container + * + * @param $container string Container name + * @return void + */ + protected function deleteContainer( $container ) { + $conn = $this->getConnection(); // Swift proxy connection + $conn->delete_container( $container ); + unset( $this->connContainers[$container] ); // purge cache + } + + /** + * Log an unexpected exception for this backend + * + * @param $e Exception + * @param $func string + * @param $params Array + * @return void + */ + protected function logException( Exception $e, $func, array $params ) { + wfDebugLog( 'SwiftBackend', + get_class( $e ) . " in '{$func}' (given '" . serialize( $params ) . "')" . + ( $e instanceof InvalidResponseException + ? ": {$e->getMessage()}" + : "" + ) + ); + } +} + +/** + * SwiftFileBackend helper class to page through object listings. + * Swift also has a listing limit of 10,000 objects for sanity. + * Do not use this class from places outside SwiftFileBackend. + * + * @ingroup FileBackend + */ +class SwiftFileBackendFileList implements Iterator { + /** @var Array */ + protected $bufferIter = array(); + protected $bufferAfter = null; // string; list items *after* this path + protected $pos = 0; // integer + + /** @var SwiftFileBackend */ + protected $backend; + protected $container; // + protected $dir; // string storage directory + protected $suffixStart; // integer + + const PAGE_SIZE = 5000; // file listing buffer size + + /** + * @param $backend SwiftFileBackend + * @param $fullCont string Resolved container name + * @param $dir string Resolved directory relative to container + */ + public function __construct( SwiftFileBackend $backend, $fullCont, $dir ) { + $this->backend = $backend; + $this->container = $fullCont; + $this->dir = $dir; + if ( substr( $this->dir, -1 ) === '/' ) { + $this->dir = substr( $this->dir, 0, -1 ); // remove trailing slash + } + if ( $this->dir == '' ) { // whole container + $this->suffixStart = 0; + } else { // dir within container + $this->suffixStart = strlen( $this->dir ) + 1; // size of "path/to/dir/" + } + } + + public function current() { + return substr( current( $this->bufferIter ), $this->suffixStart ); + } + + public function key() { + return $this->pos; + } + + public function next() { + // Advance to the next file in the page + next( $this->bufferIter ); + ++$this->pos; + // Check if there are no files left in this page and + // advance to the next page if this page was not empty. + if ( !$this->valid() && count( $this->bufferIter ) ) { + $this->bufferAfter = end( $this->bufferIter ); + $this->bufferIter = $this->backend->getFileListPageInternal( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE + ); + } + } + + public function rewind() { + $this->pos = 0; + $this->bufferAfter = null; + $this->bufferIter = $this->backend->getFileListPageInternal( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE + ); + } + + public function valid() { + return ( current( $this->bufferIter ) !== false ); // no paths can have this value + } +} diff --git a/includes/filerepo/backend/TempFSFile.php b/includes/filerepo/backend/TempFSFile.php new file mode 100644 index 00000000..7843d6cd --- /dev/null +++ b/includes/filerepo/backend/TempFSFile.php @@ -0,0 +1,92 @@ +<?php +/** + * @file + * @ingroup FileBackend + */ + +/** + * This class is used to hold the location and do limited manipulation + * of files stored temporarily (usually this will be $wgTmpDirectory) + * + * @ingroup FileBackend + */ +class TempFSFile extends FSFile { + protected $canDelete = false; // bool; garbage collect the temp file + + /** @var Array of active temp files to purge on shutdown */ + protected static $instances = array(); + + /** + * Make a new temporary file on the file system. + * Temporary files may be purged when the file object falls out of scope. + * + * @param $prefix string + * @param $extension string + * @return TempFSFile|null + */ + public static function factory( $prefix, $extension = '' ) { + $base = wfTempDir() . '/' . $prefix . dechex( mt_rand( 0, 99999999 ) ); + $ext = ( $extension != '' ) ? ".{$extension}" : ""; + for ( $attempt = 1; true; $attempt++ ) { + $path = "{$base}-{$attempt}{$ext}"; + wfSuppressWarnings(); + $newFileHandle = fopen( $path, 'x' ); + wfRestoreWarnings(); + if ( $newFileHandle ) { + fclose( $newFileHandle ); + break; // got it + } + if ( $attempt >= 15 ) { + return null; // give up + } + } + $tmpFile = new self( $path ); + $tmpFile->canDelete = true; // safely instantiated + return $tmpFile; + } + + /** + * Purge this file off the file system + * + * @return bool Success + */ + public function purge() { + $this->canDelete = false; // done + wfSuppressWarnings(); + $ok = unlink( $this->path ); + wfRestoreWarnings(); + return $ok; + } + + /** + * Clean up the temporary file only after an object goes out of scope + * + * @param $object Object + * @return void + */ + public function bind( $object ) { + if ( is_object( $object ) ) { + $object->tempFSFileReferences[] = $this; + } + } + + /** + * Set flag to not clean up after the temporary file + * + * @return void + */ + public function preserve() { + $this->canDelete = false; + } + + /** + * Cleans up after the temporary file by deleting it + */ + function __destruct() { + if ( $this->canDelete ) { + wfSuppressWarnings(); + unlink( $this->path ); + wfRestoreWarnings(); + } + } +} diff --git a/includes/filerepo/backend/lockmanager/DBLockManager.php b/includes/filerepo/backend/lockmanager/DBLockManager.php new file mode 100644 index 00000000..045056ea --- /dev/null +++ b/includes/filerepo/backend/lockmanager/DBLockManager.php @@ -0,0 +1,469 @@ +<?php + +/** + * Version of LockManager based on using DB table locks. + * This is meant for multi-wiki systems that may share files. + * All locks are blocking, so it might be useful to set a small + * lock-wait timeout via server config to curtail deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map + * to one bucket. Each bucket maps to one or several peer DBs, each on their + * own server, all having the filelocks.sql tables (with row-level locking). + * A majority of peer DBs must agree for a lock to be acquired. + * + * Caching is used to avoid hitting servers that are down. + * + * @ingroup LockManager + * @since 1.19 + */ +class DBLockManager extends LockManager { + /** @var Array Map of DB names to server config */ + protected $dbServers; // (DB name => server config array) + /** @var Array Map of bucket indexes to peer DB lists */ + protected $dbsByBucket; // (bucket index => (ldb1, ldb2, ...)) + /** @var BagOStuff */ + protected $statusCache; + + protected $lockExpiry; // integer number of seconds + protected $safeDelay; // integer number of seconds + + protected $session = 0; // random integer + /** @var Array Map Database connections (DB name => Database) */ + protected $conns = array(); + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * 'dbServers' : Associative array of DB names to server configuration. + * Configuration is an associative array that includes: + * 'host' - DB server name + * 'dbname' - DB name + * 'type' - DB type (mysql,postgres,...) + * 'user' - DB user + * 'password' - DB user password + * 'tablePrefix' - DB table prefix + * 'flags' - DB flags (see DatabaseBase) + * 'dbsByBucket' : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of DB names (peers) as values. + * Any DB named 'localDBMaster' will automatically use the DB master + * settings for this wiki (without the need for a dbServers entry). + * 'lockExpiry' : Lock timeout (seconds) for dropped connections. [optional] + * This tells the DB server how long to wait before assuming + * connection failure and releasing all the locks for a session. + * + * @param Array $config + */ + public function __construct( array $config ) { + $this->dbServers = isset( $config['dbServers'] ) + ? $config['dbServers'] + : array(); // likely just using 'localDBMaster' + // Sanitize dbsByBucket config to prevent PHP errors + $this->dbsByBucket = array_filter( $config['dbsByBucket'], 'is_array' ); + $this->dbsByBucket = array_values( $this->dbsByBucket ); // consecutive + + if ( isset( $config['lockExpiry'] ) ) { + $this->lockExpiry = $config['lockExpiry']; + } else { + $met = ini_get( 'max_execution_time' ); + $this->lockExpiry = $met ? $met : 60; // use some sane amount if 0 + } + $this->safeDelay = ( $this->lockExpiry <= 0 ) + ? 60 // pick a safe-ish number to match DB timeout default + : $this->lockExpiry; // cover worst case + + foreach ( $this->dbsByBucket as $bucket ) { + if ( count( $bucket ) > 1 ) { + // Tracks peers that couldn't be queried recently to avoid lengthy + // connection timeouts. This is useless if each bucket has one peer. + $this->statusCache = wfGetMainCache(); + break; + } + } + + $this->session = ''; + for ( $i = 0; $i < 5; $i++ ) { + $this->session .= mt_rand( 0, 2147483647 ); + } + $this->session = wfBaseConvert( sha1( $this->session ), 16, 36, 31 ); + } + + /** + * @see LockManager::doLock() + */ + protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $pathsToLock = array(); + // Get locks that need to be acquired (buckets => locks)... + foreach ( $paths as $path ) { + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + $bucket = $this->getBucketFromKey( $path ); + $pathsToLock[$bucket][] = $path; + } + } + + $lockedPaths = array(); // files locked in this attempt + // Attempt to acquire these locks... + foreach ( $pathsToLock as $bucket => $paths ) { + // Try to acquire the locks for this bucket + $res = $this->doLockingQueryAll( $bucket, $paths, $type ); + if ( $res === 'cantacquire' ) { + // Resources already locked by another process. + // Abort and unlock everything we just locked. + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } elseif ( $res !== true ) { + // Couldn't contact any DBs for this bucket. + // Abort and unlock everything we just locked. + $status->fatal( 'lockmanager-fail-db-bucket', $bucket ); + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + // Record these locks as active + foreach ( $paths as $path ) { + $this->locksHeld[$path][$type] = 1; // locked + } + // Keep track of what locks were made in this attempt + $lockedPaths = array_merge( $lockedPaths, $paths ); + } + + return $status; + } + + /** + * @see LockManager::doUnlock() + */ + protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $paths as $path ) { + if ( !isset( $this->locksHeld[$path] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } elseif ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + --$this->locksHeld[$path][$type]; + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + } + if ( !count( $this->locksHeld[$path] ) ) { + unset( $this->locksHeld[$path] ); // no SH or EX locks left for key + } + } + } + + // Reference count the locks held and COMMIT when zero + if ( !count( $this->locksHeld ) ) { + $status->merge( $this->finishLockTransactions() ); + } + + return $status; + } + + /** + * Get a connection to a lock DB and acquire locks on $paths. + * This does not use GET_LOCK() per http://bugs.mysql.com/bug.php?id=1118. + * + * @param $lockDb string + * @param $paths Array + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool Resources able to be locked + * @throws DBError + */ + protected function doLockingQuery( $lockDb, array $paths, $type ) { + if ( $type == self::LOCK_EX ) { // writer locks + $db = $this->getConnection( $lockDb ); + if ( !$db ) { + return false; // bad config + } + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fle_key' => $key ); + } + # Wait on any existing writers and block new ones if we get in + $db->insert( 'filelocks_exclusive', $data, __METHOD__ ); + } + return true; + } + + /** + * Attempt to acquire locks with the peers for a bucket. + * This should avoid throwing any exceptions. + * + * @param $bucket integer + * @param $paths Array List of resource keys to lock + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool|string One of (true, 'cantacquire', 'dberrors') + */ + protected function doLockingQueryAll( $bucket, array $paths, $type ) { + $yesVotes = 0; // locks made on trustable DBs + $votesLeft = count( $this->dbsByBucket[$bucket] ); // remaining DBs + $quorum = floor( $votesLeft/2 + 1 ); // simple majority + // Get votes for each DB, in order, until we have enough... + foreach ( $this->dbsByBucket[$bucket] as $lockDb ) { + // Check that DB is not *known* to be down + if ( $this->cacheCheckFailures( $lockDb ) ) { + try { + // Attempt to acquire the lock on this DB + if ( !$this->doLockingQuery( $lockDb, $paths, $type ) ) { + return 'cantacquire'; // vetoed; resource locked + } + ++$yesVotes; // success for this peer + if ( $yesVotes >= $quorum ) { + return true; // lock obtained + } + } catch ( DBConnectionError $e ) { + $this->cacheRecordFailure( $lockDb ); + } catch ( DBError $e ) { + if ( $this->lastErrorIndicatesLocked( $lockDb ) ) { + return 'cantacquire'; // vetoed; resource locked + } + } + } + --$votesLeft; + $votesNeeded = $quorum - $yesVotes; + if ( $votesNeeded > $votesLeft ) { + // In "trust cache" mode we don't have to meet the quorum + break; // short-circuit + } + } + // At this point, we must not have meet the quorum + return 'dberrors'; // not enough votes to ensure correctness + } + + /** + * Get (or reuse) a connection to a lock DB + * + * @param $lockDb string + * @return Database + * @throws DBError + */ + protected function getConnection( $lockDb ) { + if ( !isset( $this->conns[$lockDb] ) ) { + $db = null; + if ( $lockDb === 'localDBMaster' ) { + $lb = wfGetLBFactory()->newMainLB(); + $db = $lb->getConnection( DB_MASTER ); + } elseif ( isset( $this->dbServers[$lockDb] ) ) { + $config = $this->dbServers[$lockDb]; + $db = DatabaseBase::factory( $config['type'], $config ); + } + if ( !$db ) { + return null; // config error? + } + $this->conns[$lockDb] = $db; + $this->conns[$lockDb]->clearFlag( DBO_TRX ); + # If the connection drops, try to avoid letting the DB rollback + # and release the locks before the file operations are finished. + # This won't handle the case of DB server restarts however. + $options = array(); + if ( $this->lockExpiry > 0 ) { + $options['connTimeout'] = $this->lockExpiry; + } + $this->conns[$lockDb]->setSessionOptions( $options ); + $this->initConnection( $lockDb, $this->conns[$lockDb] ); + } + if ( !$this->conns[$lockDb]->trxLevel() ) { + $this->conns[$lockDb]->begin(); // start transaction + } + return $this->conns[$lockDb]; + } + + /** + * Do additional initialization for new lock DB connection + * + * @param $lockDb string + * @param $db DatabaseBase + * @return void + * @throws DBError + */ + protected function initConnection( $lockDb, DatabaseBase $db ) {} + + /** + * Commit all changes to lock-active databases. + * This should avoid throwing any exceptions. + * + * @return Status + */ + protected function finishLockTransactions() { + $status = Status::newGood(); + foreach ( $this->conns as $lockDb => $db ) { + if ( $db->trxLevel() ) { // in transaction + try { + $db->rollback(); // finish transaction and kill any rows + } catch ( DBError $e ) { + $status->fatal( 'lockmanager-fail-db-release', $lockDb ); + } + } + } + return $status; + } + + /** + * Check if the last DB error for $lockDb indicates + * that a requested resource was locked by another process. + * This should avoid throwing any exceptions. + * + * @param $lockDb string + * @return bool + */ + protected function lastErrorIndicatesLocked( $lockDb ) { + if ( isset( $this->conns[$lockDb] ) ) { // sanity + $db = $this->conns[$lockDb]; + return ( $db->wasDeadlock() || $db->wasLockTimeout() ); + } + return false; + } + + /** + * Checks if the DB has not recently had connection/query errors. + * This just avoids wasting time on doomed connection attempts. + * + * @param $lockDb string + * @return bool + */ + protected function cacheCheckFailures( $lockDb ) { + if ( $this->statusCache && $this->safeDelay > 0 ) { + $path = $this->getMissKey( $lockDb ); + $misses = $this->statusCache->get( $path ); + return !$misses; + } + return true; + } + + /** + * Log a lock request failure to the cache + * + * @param $lockDb string + * @return bool Success + */ + protected function cacheRecordFailure( $lockDb ) { + if ( $this->statusCache && $this->safeDelay > 0 ) { + $path = $this->getMissKey( $lockDb ); + $misses = $this->statusCache->get( $path ); + if ( $misses ) { + return $this->statusCache->incr( $path ); + } else { + return $this->statusCache->add( $path, 1, $this->safeDelay ); + } + } + return true; + } + + /** + * Get a cache key for recent query misses for a DB + * + * @param $lockDb string + * @return string + */ + protected function getMissKey( $lockDb ) { + return 'lockmanager:querymisses:' . str_replace( ' ', '_', $lockDb ); + } + + /** + * Get the bucket for resource path. + * This should avoid throwing any exceptions. + * + * @param $path string + * @return integer + */ + protected function getBucketFromKey( $path ) { + $prefix = substr( sha1( $path ), 0, 2 ); // first 2 hex chars (8 bits) + return intval( base_convert( $prefix, 16, 10 ) ) % count( $this->dbsByBucket ); + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + foreach ( $this->conns as $lockDb => $db ) { + if ( $db->trxLevel() ) { // in transaction + try { + $db->rollback(); // finish transaction and kill any rows + } catch ( DBError $e ) { + // oh well + } + } + $db->close(); + } + } +} + +/** + * MySQL version of DBLockManager that supports shared locks. + * All locks are non-blocking, which avoids deadlocks. + * + * @ingroup LockManager + */ +class MySqlLockManager extends DBLockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + protected function initConnection( $lockDb, DatabaseBase $db ) { + # Let this transaction see lock rows from other transactions + $db->query( "SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;" ); + } + + protected function doLockingQuery( $lockDb, array $paths, $type ) { + $db = $this->getConnection( $lockDb ); + if ( !$db ) { + return false; + } + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fls_key' => $key, 'fls_session' => $this->session ); + } + # Block new writers... + $db->insert( 'filelocks_shared', $data, __METHOD__, array( 'IGNORE' ) ); + # Actually do the locking queries... + if ( $type == self::LOCK_SH ) { // reader locks + # Bail if there are any existing writers... + $blocked = $db->selectField( 'filelocks_exclusive', '1', + array( 'fle_key' => $keys ), + __METHOD__ + ); + # Prospective writers that haven't yet updated filelocks_exclusive + # will recheck filelocks_shared after doing so and bail due to our entry. + } else { // writer locks + $encSession = $db->addQuotes( $this->session ); + # Bail if there are any existing writers... + # The may detect readers, but the safe check for them is below. + # Note: if two writers come at the same time, both bail :) + $blocked = $db->selectField( 'filelocks_shared', '1', + array( 'fls_key' => $keys, "fls_session != $encSession" ), + __METHOD__ + ); + if ( !$blocked ) { + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fle_key' => $key ); + } + # Block new readers/writers... + $db->insert( 'filelocks_exclusive', $data, __METHOD__ ); + # Bail if there are any existing readers... + $blocked = $db->selectField( 'filelocks_shared', '1', + array( 'fls_key' => $keys, "fls_session != $encSession" ), + __METHOD__ + ); + } + } + return !$blocked; + } +} diff --git a/includes/filerepo/backend/lockmanager/FSLockManager.php b/includes/filerepo/backend/lockmanager/FSLockManager.php new file mode 100644 index 00000000..42074fd3 --- /dev/null +++ b/includes/filerepo/backend/lockmanager/FSLockManager.php @@ -0,0 +1,202 @@ +<?php + +/** + * Simple version of LockManager based on using FS lock files. + * All locks are non-blocking, which avoids deadlocks. + * + * This should work fine for small sites running off one server. + * Do not use this with 'lockDirectory' set to an NFS mount unless the + * NFS client is at least version 2.6.12. Otherwise, the BSD flock() + * locks will be ignored; see http://nfs.sourceforge.net/#section_d. + * + * @ingroup LockManager + * @since 1.19 + */ +class FSLockManager extends LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + protected $lockDir; // global dir for all servers + + /** @var Array Map of (locked key => lock type => lock file handle) */ + protected $handles = array(); + + /** + * Construct a new instance from configuration. + * + * $config includes: + * 'lockDirectory' : Directory containing the lock files + * + * @param array $config + */ + function __construct( array $config ) { + parent::__construct( $config ); + $this->lockDir = $config['lockDirectory']; + } + + protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $lockedPaths = array(); // files locked in this attempt + foreach ( $paths as $path ) { + $status->merge( $this->doSingleLock( $path, $type ) ); + if ( $status->isOK() ) { + $lockedPaths[] = $path; + } else { + // Abort and unlock everything + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + } + + return $status; + } + + protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $paths as $path ) { + $status->merge( $this->doSingleUnlock( $path, $type ) ); + } + + return $status; + } + + /** + * Lock a single resource key + * + * @param $path string + * @param $type integer + * @return Status + */ + protected function doSingleLock( $path, $type ) { + $status = Status::newGood(); + + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + wfSuppressWarnings(); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); + wfRestoreWarnings(); + if ( !$handle ) { // lock dir missing? + wfMkdirParents( $this->lockDir ); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); // try again + } + if ( $handle ) { + // Either a shared or exclusive lock + $lock = ( $type == self::LOCK_SH ) ? LOCK_SH : LOCK_EX; + if ( flock( $handle, $lock | LOCK_NB ) ) { + // Record this lock as active + $this->locksHeld[$path][$type] = 1; + $this->handles[$path][$type] = $handle; + } else { + fclose( $handle ); + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } else { + $status->fatal( 'lockmanager-fail-openlock', $path ); + } + } + + return $status; + } + + /** + * Unlock a single resource key + * + * @param $path string + * @param $type integer + * @return Status + */ + protected function doSingleUnlock( $path, $type ) { + $status = Status::newGood(); + + if ( !isset( $this->locksHeld[$path] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } elseif ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + $handlesToClose = array(); + --$this->locksHeld[$path][$type]; + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + // If a LOCK_SH comes in while we have a LOCK_EX, we don't + // actually add a handler, so check for handler existence. + if ( isset( $this->handles[$path][$type] ) ) { + // Mark this handle to be unlocked and closed + $handlesToClose[] = $this->handles[$path][$type]; + unset( $this->handles[$path][$type] ); + } + } + // Unlock handles to release locks and delete + // any lock files that end up with no locks on them... + if ( wfIsWindows() ) { + // Windows: for any process, including this one, + // calling unlink() on a locked file will fail + $status->merge( $this->closeLockHandles( $path, $handlesToClose ) ); + $status->merge( $this->pruneKeyLockFiles( $path ) ); + } else { + // Unix: unlink() can be used on files currently open by this + // process and we must do so in order to avoid race conditions + $status->merge( $this->pruneKeyLockFiles( $path ) ); + $status->merge( $this->closeLockHandles( $path, $handlesToClose ) ); + } + } + + return $status; + } + + private function closeLockHandles( $path, array $handlesToClose ) { + $status = Status::newGood(); + foreach ( $handlesToClose as $handle ) { + wfSuppressWarnings(); + if ( !flock( $handle, LOCK_UN ) ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + if ( !fclose( $handle ) ) { + $status->warning( 'lockmanager-fail-closelock', $path ); + } + wfRestoreWarnings(); + } + return $status; + } + + private function pruneKeyLockFiles( $path ) { + $status = Status::newGood(); + if ( !count( $this->locksHeld[$path] ) ) { + wfSuppressWarnings(); + # No locks are held for the lock file anymore + if ( !unlink( $this->getLockPath( $path ) ) ) { + $status->warning( 'lockmanager-fail-deletelock', $path ); + } + wfRestoreWarnings(); + unset( $this->locksHeld[$path] ); + unset( $this->handles[$path] ); + } + return $status; + } + + /** + * Get the path to the lock file for a key + * @param $path string + * @return string + */ + protected function getLockPath( $path ) { + $hash = self::sha1Base36( $path ); + return "{$this->lockDir}/{$hash}.lock"; + } + + function __destruct() { + // Make sure remaining locks get cleared for sanity + foreach ( $this->locksHeld as $path => $locks ) { + $this->doSingleUnlock( $path, self::LOCK_EX ); + $this->doSingleUnlock( $path, self::LOCK_SH ); + } + } +} diff --git a/includes/filerepo/backend/lockmanager/LSLockManager.php b/includes/filerepo/backend/lockmanager/LSLockManager.php new file mode 100644 index 00000000..b7ac743c --- /dev/null +++ b/includes/filerepo/backend/lockmanager/LSLockManager.php @@ -0,0 +1,295 @@ +<?php + +/** + * Manage locks using a lock daemon server. + * + * Version of LockManager based on using lock daemon servers. + * This is meant for multi-wiki systems that may share files. + * All locks are non-blocking, which avoids deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map + * to one bucket. Each bucket maps to one or several peer servers, each + * running LockServerDaemon.php, listening on a designated TCP port. + * A majority of peers must agree for a lock to be acquired. + * + * @ingroup LockManager + * @since 1.19 + */ +class LSLockManager extends LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + /** @var Array Map of server names to server config */ + protected $lockServers; // (server name => server config array) + /** @var Array Map of bucket indexes to peer server lists */ + protected $srvsByBucket; // (bucket index => (lsrv1, lsrv2, ...)) + + /** @var Array Map Server connections (server name => resource) */ + protected $conns = array(); + + protected $connTimeout; // float number of seconds + protected $session = ''; // random SHA-1 string + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * 'lockServers' : Associative array of server names to configuration. + * Configuration is an associative array that includes: + * 'host' - IP address/hostname + * 'port' - TCP port + * 'authKey' - Secret string the lock server uses + * 'srvsByBucket' : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of server names (peers) as values. + * 'connTimeout' : Lock server connection attempt timeout. [optional] + * + * @param Array $config + */ + public function __construct( array $config ) { + $this->lockServers = $config['lockServers']; + // Sanitize srvsByBucket config to prevent PHP errors + $this->srvsByBucket = array_filter( $config['srvsByBucket'], 'is_array' ); + $this->srvsByBucket = array_values( $this->srvsByBucket ); // consecutive + + if ( isset( $config['connTimeout'] ) ) { + $this->connTimeout = $config['connTimeout']; + } else { + $this->connTimeout = 3; // use some sane amount + } + + $this->session = ''; + for ( $i = 0; $i < 5; $i++ ) { + $this->session .= mt_rand( 0, 2147483647 ); + } + $this->session = wfBaseConvert( sha1( $this->session ), 16, 36, 31 ); + } + + protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $pathsToLock = array(); + // Get locks that need to be acquired (buckets => locks)... + foreach ( $paths as $path ) { + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + $bucket = $this->getBucketFromKey( $path ); + $pathsToLock[$bucket][] = $path; + } + } + + $lockedPaths = array(); // files locked in this attempt + // Attempt to acquire these locks... + foreach ( $pathsToLock as $bucket => $paths ) { + // Try to acquire the locks for this bucket + $res = $this->doLockingRequestAll( $bucket, $paths, $type ); + if ( $res === 'cantacquire' ) { + // Resources already locked by another process. + // Abort and unlock everything we just locked. + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } elseif ( $res !== true ) { + // Couldn't contact any servers for this bucket. + // Abort and unlock everything we just locked. + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + // Record these locks as active + foreach ( $paths as $path ) { + $this->locksHeld[$path][$type] = 1; // locked + } + // Keep track of what locks were made in this attempt + $lockedPaths = array_merge( $lockedPaths, $paths ); + } + + return $status; + } + + protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $paths as $path ) { + if ( !isset( $this->locksHeld[$path] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } elseif ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + --$this->locksHeld[$path][$type]; + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + } + if ( !count( $this->locksHeld[$path] ) ) { + unset( $this->locksHeld[$path] ); // no SH or EX locks left for key + } + } + } + + // Reference count the locks held and release locks when zero + if ( !count( $this->locksHeld ) ) { + $status->merge( $this->releaseLocks() ); + } + + return $status; + } + + /** + * Get a connection to a lock server and acquire locks on $paths + * + * @param $lockSrv string + * @param $paths Array + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool Resources able to be locked + */ + protected function doLockingRequest( $lockSrv, array $paths, $type ) { + if ( $type == self::LOCK_SH ) { // reader locks + $type = 'SH'; + } elseif ( $type == self::LOCK_EX ) { // writer locks + $type = 'EX'; + } else { + return true; // ok... + } + + // Send out the command and get the response... + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + $response = $this->sendCommand( $lockSrv, 'ACQUIRE', $type, $keys ); + + return ( $response === 'ACQUIRED' ); + } + + /** + * Send a command and get back the response + * + * @param $lockSrv string + * @param $action string + * @param $type string + * @param $values Array + * @return string|false + */ + protected function sendCommand( $lockSrv, $action, $type, $values ) { + $conn = $this->getConnection( $lockSrv ); + if ( !$conn ) { + return false; // no connection + } + $authKey = $this->lockServers[$lockSrv]['authKey']; + // Build of the command as a flat string... + $values = implode( '|', $values ); + $key = sha1( $this->session . $action . $type . $values . $authKey ); + // Send out the command... + if ( fwrite( $conn, "{$this->session}:$key:$action:$type:$values\n" ) === false ) { + return false; + } + // Get the response... + $response = fgets( $conn ); + if ( $response === false ) { + return false; + } + return trim( $response ); + } + + /** + * Attempt to acquire locks with the peers for a bucket + * + * @param $bucket integer + * @param $paths Array List of resource keys to lock + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool|string One of (true, 'cantacquire', 'srverrors') + */ + protected function doLockingRequestAll( $bucket, array $paths, $type ) { + $yesVotes = 0; // locks made on trustable servers + $votesLeft = count( $this->srvsByBucket[$bucket] ); // remaining peers + $quorum = floor( $votesLeft/2 + 1 ); // simple majority + // Get votes for each peer, in order, until we have enough... + foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { + // Attempt to acquire the lock on this peer + if ( !$this->doLockingRequest( $lockSrv, $paths, $type ) ) { + return 'cantacquire'; // vetoed; resource locked + } + ++$yesVotes; // success for this peer + if ( $yesVotes >= $quorum ) { + return true; // lock obtained + } + --$votesLeft; + $votesNeeded = $quorum - $yesVotes; + if ( $votesNeeded > $votesLeft ) { + // In "trust cache" mode we don't have to meet the quorum + break; // short-circuit + } + } + // At this point, we must not have meet the quorum + return 'srverrors'; // not enough votes to ensure correctness + } + + /** + * Get (or reuse) a connection to a lock server + * + * @param $lockSrv string + * @return resource + */ + protected function getConnection( $lockSrv ) { + if ( !isset( $this->conns[$lockSrv] ) ) { + $cfg = $this->lockServers[$lockSrv]; + wfSuppressWarnings(); + $errno = $errstr = ''; + $conn = fsockopen( $cfg['host'], $cfg['port'], $errno, $errstr, $this->connTimeout ); + wfRestoreWarnings(); + if ( $conn === false ) { + return null; + } + $sec = floor( $this->connTimeout ); + $usec = floor( ( $this->connTimeout - floor( $this->connTimeout ) ) * 1e6 ); + stream_set_timeout( $conn, $sec, $usec ); + $this->conns[$lockSrv] = $conn; + } + return $this->conns[$lockSrv]; + } + + /** + * Release all locks that this session is holding + * + * @return Status + */ + protected function releaseLocks() { + $status = Status::newGood(); + foreach ( $this->conns as $lockSrv => $conn ) { + $response = $this->sendCommand( $lockSrv, 'RELEASE_ALL', '', array() ); + if ( $response !== 'RELEASED_ALL' ) { + $status->fatal( 'lockmanager-fail-svr-release', $lockSrv ); + } + } + return $status; + } + + /** + * Get the bucket for resource path. + * This should avoid throwing any exceptions. + * + * @param $path string + * @return integer + */ + protected function getBucketFromKey( $path ) { + $prefix = substr( sha1( $path ), 0, 2 ); // first 2 hex chars (8 bits) + return intval( base_convert( $prefix, 16, 10 ) ) % count( $this->srvsByBucket ); + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + $this->releaseLocks(); + foreach ( $this->conns as $conn ) { + fclose( $conn ); + } + } +} diff --git a/includes/filerepo/backend/lockmanager/LockManager.php b/includes/filerepo/backend/lockmanager/LockManager.php new file mode 100644 index 00000000..23603a4f --- /dev/null +++ b/includes/filerepo/backend/lockmanager/LockManager.php @@ -0,0 +1,182 @@ +<?php +/** + * @defgroup LockManager Lock management + * @ingroup FileBackend + */ + +/** + * @file + * @ingroup LockManager + * @author Aaron Schulz + */ + +/** + * Class for handling resource locking. + * + * Locks on resource keys can either be shared or exclusive. + * + * Implementations must keep track of what is locked by this proccess + * in-memory and support nested locking calls (using reference counting). + * At least LOCK_UW and LOCK_EX must be implemented. LOCK_SH can be a no-op. + * Locks should either be non-blocking or have low wait timeouts. + * + * Subclasses should avoid throwing exceptions at all costs. + * + * @ingroup LockManager + * @since 1.19 + */ +abstract class LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_EX, // subclasses may use self::LOCK_SH + self::LOCK_EX => self::LOCK_EX + ); + + /** @var Array Map of (resource path => lock type => count) */ + protected $locksHeld = array(); + + /* Lock types; stronger locks have higher values */ + const LOCK_SH = 1; // shared lock (for reads) + const LOCK_UW = 2; // shared lock (for reads used to write elsewhere) + const LOCK_EX = 3; // exclusive lock (for writes) + + /** + * Construct a new instance from configuration + * + * @param $config Array + */ + public function __construct( array $config ) {} + + /** + * Lock the resources at the given abstract paths + * + * @param $paths Array List of resource names + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function lock( array $paths, $type = self::LOCK_EX ) { + return $this->doLock( array_unique( $paths ), $this->lockTypeMap[$type] ); + } + + /** + * Unlock the resources at the given abstract paths + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function unlock( array $paths, $type = self::LOCK_EX ) { + return $this->doUnlock( array_unique( $paths ), $this->lockTypeMap[$type] ); + } + + /** + * Get the base 36 SHA-1 of a string, padded to 31 digits + * + * @param $path string + * @return string + */ + final protected static function sha1Base36( $path ) { + return wfBaseConvert( sha1( $path ), 16, 36, 31 ); + } + + /** + * Lock resources with the given keys and lock type + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return string + */ + abstract protected function doLock( array $paths, $type ); + + /** + * Unlock resources with the given keys and lock type + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return string + */ + abstract protected function doUnlock( array $paths, $type ); +} + +/** + * Self releasing locks + * + * LockManager helper class to handle scoped locks, which + * release when an object is destroyed or goes out of scope. + * + * @ingroup LockManager + * @since 1.19 + */ +class ScopedLock { + /** @var LockManager */ + protected $manager; + /** @var Status */ + protected $status; + /** @var Array List of resource paths*/ + protected $paths; + + protected $type; // integer lock type + + /** + * @param $manager LockManager + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status + */ + protected function __construct( + LockManager $manager, array $paths, $type, Status $status + ) { + $this->manager = $manager; + $this->paths = $paths; + $this->status = $status; + $this->type = $type; + } + + protected function __clone() {} + + /** + * Get a ScopedLock object representing a lock on resource paths. + * Any locks are released once this object goes out of scope. + * The status object is updated with any errors or warnings. + * + * @param $manager LockManager + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status + * @return ScopedLock|null Returns null on failure + */ + public static function factory( + LockManager $manager, array $paths, $type, Status $status + ) { + $lockStatus = $manager->lock( $paths, $type ); + $status->merge( $lockStatus ); + if ( $lockStatus->isOK() ) { + return new self( $manager, $paths, $type, $status ); + } + return null; + } + + function __destruct() { + $wasOk = $this->status->isOK(); + $this->status->merge( $this->manager->unlock( $this->paths, $this->type ) ); + if ( $wasOk ) { + // Make sure status is OK, despite any unlockFiles() fatals + $this->status->setResult( true, $this->status->value ); + } + } +} + +/** + * Simple version of LockManager that does nothing + * @since 1.19 + */ +class NullLockManager extends LockManager { + protected function doLock( array $paths, $type ) { + return Status::newGood(); + } + + protected function doUnlock( array $paths, $type ) { + return Status::newGood(); + } +} diff --git a/includes/filerepo/backend/lockmanager/LockManagerGroup.php b/includes/filerepo/backend/lockmanager/LockManagerGroup.php new file mode 100644 index 00000000..11e77972 --- /dev/null +++ b/includes/filerepo/backend/lockmanager/LockManagerGroup.php @@ -0,0 +1,89 @@ +<?php +/** + * Class to handle file lock manager registration + * + * @ingroup LockManager + * @author Aaron Schulz + * @since 1.19 + */ +class LockManagerGroup { + + /** + * @var LockManagerGroup + */ + protected static $instance = null; + + /** @var Array of (name => ('class' =>, 'config' =>, 'instance' =>)) */ + protected $managers = array(); + + protected function __construct() {} + protected function __clone() {} + + /** + * @return LockManagerGroup + */ + public static function singleton() { + if ( self::$instance == null ) { + self::$instance = new self(); + self::$instance->initFromGlobals(); + } + return self::$instance; + } + + /** + * Register lock managers from the global variables + * + * @return void + */ + protected function initFromGlobals() { + global $wgLockManagers; + + $this->register( $wgLockManagers ); + } + + /** + * Register an array of file lock manager configurations + * + * @param $configs Array + * @return void + * @throws MWException + */ + protected function register( array $configs ) { + foreach ( $configs as $config ) { + if ( !isset( $config['name'] ) ) { + throw new MWException( "Cannot register a lock manager with no name." ); + } + $name = $config['name']; + if ( !isset( $config['class'] ) ) { + throw new MWException( "Cannot register lock manager `{$name}` with no class." ); + } + $class = $config['class']; + unset( $config['class'] ); // lock manager won't need this + $this->managers[$name] = array( + 'class' => $class, + 'config' => $config, + 'instance' => null + ); + } + } + + /** + * Get the lock manager object with a given name + * + * @param $name string + * @return LockManager + * @throws MWException + */ + public function get( $name ) { + if ( !isset( $this->managers[$name] ) ) { + throw new MWException( "No lock manager defined with the name `$name`." ); + } + // Lazy-load the actual lock manager instance + if ( !isset( $this->managers[$name]['instance'] ) ) { + $class = $this->managers[$name]['class']; + $config = $this->managers[$name]['config']; + $this->managers[$name]['instance'] = new $class( $config ); + } + return $this->managers[$name]['instance']; + } +} |