diff options
Diffstat (limited to 'includes/filebackend')
22 files changed, 3194 insertions, 2041 deletions
diff --git a/includes/filebackend/FSFile.php b/includes/filebackend/FSFile.php index e07c99d4..8f0a1334 100644 --- a/includes/filebackend/FSFile.php +++ b/includes/filebackend/FSFile.php @@ -28,11 +28,12 @@ */ class FSFile { protected $path; // path to file + protected $sha1Base36; // file SHA-1 in base 36 /** * Sets up the file object * - * @param $path string Path to temporary file on local disk + * @param string $path Path to temporary file on local disk * @throws MWException */ public function __construct( $path ) { @@ -86,8 +87,8 @@ class FSFile { /** * Guess the MIME type from the file contents alone - * - * @return string + * + * @return string */ public function getMimeType() { return MimeMagic::singleton()->guessMimeType( $this->path, false ); @@ -97,14 +98,14 @@ class FSFile { * Get an associative array containing information about * a file with the given storage path. * - * @param $ext Mixed: the file extension, or true to extract it from the filename. + * @param Mixed $ext: the file extension, or true to extract it from the filename. * Set it to false to ignore the extension. * * @return array */ public function getProps( $ext = true ) { wfProfileIn( __METHOD__ ); - wfDebug( __METHOD__.": Getting file info for $this->path\n" ); + wfDebug( __METHOD__ . ": Getting file info for $this->path\n" ); $info = self::placeholderProps(); $info['fileExists'] = $this->exists(); @@ -131,7 +132,7 @@ class FSFile { # Height, width and metadata $handler = MediaHandler::getHandler( $info['mime'] ); if ( $handler ) { - $tempImage = (object)array(); + $tempImage = (object)array(); // XXX (hack for File object) $info['metadata'] = $handler->getMetadata( $tempImage, $this->path ); $gis = $handler->getImageSize( $tempImage, $this->path, $info['metadata'] ); if ( is_array( $gis ) ) { @@ -140,9 +141,9 @@ class FSFile { } $info['sha1'] = $this->getSha1Base36(); - wfDebug(__METHOD__.": $this->path loaded, {$info['size']} bytes, {$info['mime']}.\n"); + wfDebug( __METHOD__ . ": $this->path loaded, {$info['size']} bytes, {$info['mime']}.\n" ); } else { - wfDebug(__METHOD__.": $this->path NOT FOUND!\n"); + wfDebug( __METHOD__ . ": $this->path NOT FOUND!\n" ); } wfProfileOut( __METHOD__ ); @@ -170,7 +171,7 @@ class FSFile { /** * Exract image size information * - * @param $gis array + * @param array $gis * @return Array */ protected function extractImageSizeInfo( array $gis ) { @@ -193,26 +194,33 @@ class FSFile { * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 * fairly neatly. * + * @param bool $recache * @return bool|string False on failure */ - public function getSha1Base36() { + public function getSha1Base36( $recache = false ) { wfProfileIn( __METHOD__ ); + if ( $this->sha1Base36 !== null && !$recache ) { + wfProfileOut( __METHOD__ ); + return $this->sha1Base36; + } + wfSuppressWarnings(); - $hash = sha1_file( $this->path ); + $this->sha1Base36 = sha1_file( $this->path ); wfRestoreWarnings(); - if ( $hash !== false ) { - $hash = wfBaseConvert( $hash, 16, 36, 31 ); + + if ( $this->sha1Base36 !== false ) { + $this->sha1Base36 = wfBaseConvert( $this->sha1Base36, 16, 36, 31 ); } wfProfileOut( __METHOD__ ); - return $hash; + return $this->sha1Base36; } /** * Get the final file extension from a file system path - * - * @param $path string + * + * @param string $path * @return string */ public static function extensionFromPath( $path ) { @@ -223,10 +231,9 @@ class FSFile { /** * Get an associative array containing information about a file in the local filesystem. * - * @param $path String: absolute local filesystem path - * @param $ext Mixed: the file extension, or true to extract it from the filename. + * @param string $path absolute local filesystem path + * @param Mixed $ext: the file extension, or true to extract it from the filename. * Set it to false to ignore the extension. - * * @return array */ public static function getPropsFromPath( $path, $ext = true ) { @@ -241,8 +248,7 @@ class FSFile { * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 * fairly neatly. * - * @param $path string - * + * @param string $path * @return bool|string False on failure */ public static function getSha1Base36FromPath( $path ) { diff --git a/includes/filebackend/FSFileBackend.php b/includes/filebackend/FSFileBackend.php index 93495340..6d642162 100644 --- a/includes/filebackend/FSFileBackend.php +++ b/includes/filebackend/FSFileBackend.php @@ -46,6 +46,7 @@ class FSFileBackend extends FileBackendStore { protected $fileOwner; // string; required OS username to own files protected $currentUser; // string; OS username running this script + /** @var Array */ protected $hadWarningErrors = array(); /** @@ -69,7 +70,7 @@ class FSFileBackend extends FileBackendStore { if ( isset( $config['containerPaths'] ) ) { $this->containerPaths = (array)$config['containerPaths']; foreach ( $this->containerPaths as &$path ) { - $path = rtrim( $path, '/' ); // remove trailing slash + $path = rtrim( $path, '/' ); // remove trailing slash } } @@ -81,12 +82,6 @@ class FSFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::resolveContainerPath() - * @param $container string - * @param $relStoragePath string - * @return null|string - */ protected function resolveContainerPath( $container, $relStoragePath ) { // Check that container has a root directory if ( isset( $this->containerPaths[$container] ) || isset( $this->basePath ) ) { @@ -101,7 +96,7 @@ class FSFileBackend extends FileBackendStore { /** * Sanity check a relative file system path for validity * - * @param $path string Normalized relative path + * @param string $path Normalized relative path * @return bool */ protected function isLegalRelPath( $path ) { @@ -120,8 +115,8 @@ class FSFileBackend extends FileBackendStore { * Given the short (unresolved) and full (resolved) name of * a container, return the file system path of the container. * - * @param $shortCont string - * @param $fullCont string + * @param string $shortCont + * @param string $fullCont * @return string|null */ protected function containerFSRoot( $shortCont, $fullCont ) { @@ -136,7 +131,7 @@ class FSFileBackend extends FileBackendStore { /** * Get the absolute file system path for a storage path * - * @param $storagePath string Storage path + * @param string $storagePath Storage path * @return string|null */ protected function resolveToFSPath( $storagePath ) { @@ -144,7 +139,7 @@ class FSFileBackend extends FileBackendStore { if ( $relPath === null ) { return null; // invalid } - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $storagePath ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $storagePath ); $fsPath = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid if ( $relPath != '' ) { $fsPath .= "/{$relPath}"; @@ -152,10 +147,6 @@ class FSFileBackend extends FileBackendStore { return $fsPath; } - /** - * @see FileBackendStore::isPathUsableInternal() - * @return bool - */ public function isPathUsableInternal( $storagePath ) { $fsPath = $this->resolveToFSPath( $storagePath ); if ( $fsPath === null ) { @@ -177,11 +168,7 @@ class FSFileBackend extends FileBackendStore { return $ok; } - /** - * @see FileBackendStore::doStoreInternal() - * @return Status - */ - protected function doStoreInternal( array $params ) { + protected function doCreateInternal( array $params ) { $status = Status::newGood(); $dest = $this->resolveToFSPath( $params['dst'] ); @@ -190,27 +177,70 @@ class FSFileBackend extends FileBackendStore { return $status; } - if ( file_exists( $dest ) ) { - if ( !empty( $params['overwrite'] ) ) { - $ok = unlink( $dest ); - if ( !$ok ) { - $status->fatal( 'backend-fail-delete', $params['dst'] ); - return $status; - } - } else { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + if ( !empty( $params['async'] ) ) { // deferred + $tempFile = TempFSFile::factory( 'create_', 'tmp' ); + if ( !$tempFile ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); return $status; } + $this->trapWarnings(); + $bytes = file_put_contents( $tempFile->getPath(), $params['content'] ); + $this->untrapWarnings(); + if ( $bytes === false ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } + $cmd = implode( ' ', array( + wfIsWindows() ? 'COPY /B /Y' : 'cp', // (binary, overwrite) + wfEscapeShellArg( $this->cleanPathSlashes( $tempFile->getPath() ) ), + wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) + ) ); + $status->value = new FSFileOpHandle( $this, $params, 'Create', $cmd, $dest ); + $tempFile->bind( $status->value ); + } else { // immediate write + $this->trapWarnings(); + $bytes = file_put_contents( $dest, $params['content'] ); + $this->untrapWarnings(); + if ( $bytes === false ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } + $this->chmod( $dest ); + } + + return $status; + } + + /** + * @see FSFileBackend::doExecuteOpHandlesInternal() + */ + protected function _getResponseCreate( $errors, Status $status, array $params, $cmd ) { + if ( $errors !== '' && !( wfIsWindows() && $errors[0] === " " ) ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + trigger_error( "$cmd\n$errors", E_USER_WARNING ); // command output + } + } + + protected function doStoreInternal( array $params ) { + $status = Status::newGood(); + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; } if ( !empty( $params['async'] ) ) { // deferred - $cmd = implode( ' ', array( wfIsWindows() ? 'COPY' : 'cp', + $cmd = implode( ' ', array( + wfIsWindows() ? 'COPY /B /Y' : 'cp', // (binary, overwrite) wfEscapeShellArg( $this->cleanPathSlashes( $params['src'] ) ), wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) ) ); $status->value = new FSFileOpHandle( $this, $params, 'Store', $cmd, $dest ); } else { // immediate write + $this->trapWarnings(); $ok = copy( $params['src'], $dest ); + $this->untrapWarnings(); // In some cases (at least over NFS), copy() returns true when it fails if ( !$ok || ( filesize( $params['src'] ) !== filesize( $dest ) ) ) { if ( $ok ) { // PHP bug @@ -236,10 +266,6 @@ class FSFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doCopyInternal() - * @return Status - */ protected function doCopyInternal( array $params ) { $status = Status::newGood(); @@ -255,31 +281,30 @@ class FSFileBackend extends FileBackendStore { return $status; } - if ( file_exists( $dest ) ) { - if ( !empty( $params['overwrite'] ) ) { - $ok = unlink( $dest ); - if ( !$ok ) { - $status->fatal( 'backend-fail-delete', $params['dst'] ); - return $status; - } - } else { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); - return $status; + if ( !is_file( $source ) ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-copy', $params['src'] ); } + return $status; // do nothing; either OK or bad status } if ( !empty( $params['async'] ) ) { // deferred - $cmd = implode( ' ', array( wfIsWindows() ? 'COPY' : 'cp', + $cmd = implode( ' ', array( + wfIsWindows() ? 'COPY /B /Y' : 'cp', // (binary, overwrite) wfEscapeShellArg( $this->cleanPathSlashes( $source ) ), wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) ) ); $status->value = new FSFileOpHandle( $this, $params, 'Copy', $cmd, $dest ); } else { // immediate write - $ok = copy( $source, $dest ); + $this->trapWarnings(); + $ok = ( $source === $dest ) ? true : copy( $source, $dest ); + $this->untrapWarnings(); // In some cases (at least over NFS), copy() returns true when it fails if ( !$ok || ( filesize( $source ) !== filesize( $dest ) ) ) { if ( $ok ) { // PHP bug + $this->trapWarnings(); unlink( $dest ); // remove broken file + $this->untrapWarnings(); trigger_error( __METHOD__ . ": copy() failed but returned true." ); } $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); @@ -301,10 +326,6 @@ class FSFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doMoveInternal() - * @return Status - */ protected function doMoveInternal( array $params ) { $status = Status::newGood(); @@ -320,30 +341,24 @@ class FSFileBackend extends FileBackendStore { return $status; } - if ( file_exists( $dest ) ) { - if ( !empty( $params['overwrite'] ) ) { - // Windows does not support moving over existing files - if ( wfIsWindows() ) { - $ok = unlink( $dest ); - if ( !$ok ) { - $status->fatal( 'backend-fail-delete', $params['dst'] ); - return $status; - } - } - } else { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); - return $status; + if ( !is_file( $source ) ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-move', $params['src'] ); } + return $status; // do nothing; either OK or bad status } if ( !empty( $params['async'] ) ) { // deferred - $cmd = implode( ' ', array( wfIsWindows() ? 'MOVE' : 'mv', + $cmd = implode( ' ', array( + wfIsWindows() ? 'MOVE /Y' : 'mv', // (overwrite) wfEscapeShellArg( $this->cleanPathSlashes( $source ) ), wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) ) ); $status->value = new FSFileOpHandle( $this, $params, 'Move', $cmd ); } else { // immediate write - $ok = rename( $source, $dest ); + $this->trapWarnings(); + $ok = ( $source === $dest ) ? true : rename( $source, $dest ); + $this->untrapWarnings(); clearstatcache(); // file no longer at source if ( !$ok ) { $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); @@ -364,10 +379,6 @@ class FSFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doDeleteInternal() - * @return Status - */ protected function doDeleteInternal( array $params ) { $status = Status::newGood(); @@ -385,12 +396,15 @@ class FSFileBackend extends FileBackendStore { } if ( !empty( $params['async'] ) ) { // deferred - $cmd = implode( ' ', array( wfIsWindows() ? 'DEL' : 'unlink', + $cmd = implode( ' ', array( + wfIsWindows() ? 'DEL' : 'unlink', wfEscapeShellArg( $this->cleanPathSlashes( $source ) ) ) ); $status->value = new FSFileOpHandle( $this, $params, 'Copy', $cmd ); } else { // immediate write + $this->trapWarnings(); $ok = unlink( $source ); + $this->untrapWarnings(); if ( !$ok ) { $status->fatal( 'backend-fail-delete', $params['src'] ); return $status; @@ -410,174 +424,96 @@ class FSFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doCreateInternal() - * @return Status - */ - protected function doCreateInternal( array $params ) { - $status = Status::newGood(); - - $dest = $this->resolveToFSPath( $params['dst'] ); - if ( $dest === null ) { - $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); - return $status; - } - - if ( file_exists( $dest ) ) { - if ( !empty( $params['overwrite'] ) ) { - $ok = unlink( $dest ); - if ( !$ok ) { - $status->fatal( 'backend-fail-delete', $params['dst'] ); - return $status; - } - } else { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); - return $status; - } - } - - if ( !empty( $params['async'] ) ) { // deferred - $tempFile = TempFSFile::factory( 'create_', 'tmp' ); - if ( !$tempFile ) { - $status->fatal( 'backend-fail-create', $params['dst'] ); - return $status; - } - $bytes = file_put_contents( $tempFile->getPath(), $params['content'] ); - if ( $bytes === false ) { - $status->fatal( 'backend-fail-create', $params['dst'] ); - return $status; - } - $cmd = implode( ' ', array( wfIsWindows() ? 'COPY' : 'cp', - wfEscapeShellArg( $this->cleanPathSlashes( $tempFile->getPath() ) ), - wfEscapeShellArg( $this->cleanPathSlashes( $dest ) ) - ) ); - $status->value = new FSFileOpHandle( $this, $params, 'Create', $cmd, $dest ); - $tempFile->bind( $status->value ); - } else { // immediate write - $bytes = file_put_contents( $dest, $params['content'] ); - if ( $bytes === false ) { - $status->fatal( 'backend-fail-create', $params['dst'] ); - return $status; - } - $this->chmod( $dest ); - } - - return $status; - } - - /** - * @see FSFileBackend::doExecuteOpHandlesInternal() - */ - protected function _getResponseCreate( $errors, Status $status, array $params, $cmd ) { - if ( $errors !== '' && !( wfIsWindows() && $errors[0] === " " ) ) { - $status->fatal( 'backend-fail-create', $params['dst'] ); - trigger_error( "$cmd\n$errors", E_USER_WARNING ); // command output - } - } - - /** - * @see FileBackendStore::doPrepareInternal() - * @return Status - */ protected function doPrepareInternal( $fullCont, $dirRel, array $params ) { $status = Status::newGood(); - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $params['dir'] ); $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; $existed = is_dir( $dir ); // already there? - if ( !wfMkdirParents( $dir ) ) { // make directory and its parents + // Create the directory and its parents as needed... + $this->trapWarnings(); + if ( !wfMkdirParents( $dir ) ) { $status->fatal( 'directorycreateerror', $params['dir'] ); // fails on races } elseif ( !is_writable( $dir ) ) { $status->fatal( 'directoryreadonlyerror', $params['dir'] ); } elseif ( !is_readable( $dir ) ) { $status->fatal( 'directorynotreadableerror', $params['dir'] ); } + $this->untrapWarnings(); + // Respect any 'noAccess' or 'noListing' flags... if ( is_dir( $dir ) && !$existed ) { - // Respect any 'noAccess' or 'noListing' flags... $status->merge( $this->doSecureInternal( $fullCont, $dirRel, $params ) ); } return $status; } - /** - * @see FileBackendStore::doSecureInternal() - * @return Status - */ protected function doSecureInternal( $fullCont, $dirRel, array $params ) { $status = Status::newGood(); - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $params['dir'] ); $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; // Seed new directories with a blank index.html, to prevent crawling... if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) { + $this->trapWarnings(); $bytes = file_put_contents( "{$dir}/index.html", $this->indexHtmlPrivate() ); + $this->untrapWarnings(); if ( $bytes === false ) { $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' ); - return $status; } } // Add a .htaccess file to the root of the container... if ( !empty( $params['noAccess'] ) && !file_exists( "{$contRoot}/.htaccess" ) ) { + $this->trapWarnings(); $bytes = file_put_contents( "{$contRoot}/.htaccess", $this->htaccessPrivate() ); + $this->untrapWarnings(); if ( $bytes === false ) { $storeDir = "mwstore://{$this->name}/{$shortCont}"; $status->fatal( 'backend-fail-create', "{$storeDir}/.htaccess" ); - return $status; } } return $status; } - /** - * @see FileBackendStore::doPublishInternal() - * @return Status - */ protected function doPublishInternal( $fullCont, $dirRel, array $params ) { $status = Status::newGood(); - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $params['dir'] ); $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; // Unseed new directories with a blank index.html, to allow crawling... if ( !empty( $params['listing'] ) && is_file( "{$dir}/index.html" ) ) { $exists = ( file_get_contents( "{$dir}/index.html" ) === $this->indexHtmlPrivate() ); + $this->trapWarnings(); if ( $exists && !unlink( "{$dir}/index.html" ) ) { // reverse secure() $status->fatal( 'backend-fail-delete', $params['dir'] . '/index.html' ); - return $status; } + $this->untrapWarnings(); } // Remove the .htaccess file from the root of the container... if ( !empty( $params['access'] ) && is_file( "{$contRoot}/.htaccess" ) ) { $exists = ( file_get_contents( "{$contRoot}/.htaccess" ) === $this->htaccessPrivate() ); + $this->trapWarnings(); if ( $exists && !unlink( "{$contRoot}/.htaccess" ) ) { // reverse secure() $storeDir = "mwstore://{$this->name}/{$shortCont}"; $status->fatal( 'backend-fail-delete', "{$storeDir}/.htaccess" ); - return $status; } + $this->untrapWarnings(); } return $status; } - /** - * @see FileBackendStore::doCleanInternal() - * @return Status - */ protected function doCleanInternal( $fullCont, $dirRel, array $params ) { $status = Status::newGood(); - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $params['dir'] ); $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; - wfSuppressWarnings(); + $this->trapWarnings(); if ( is_dir( $dir ) ) { rmdir( $dir ); // remove directory if empty } - wfRestoreWarnings(); + $this->untrapWarnings(); return $status; } - /** - * @see FileBackendStore::doFileExists() - * @return array|bool|null - */ protected function doGetFileStat( array $params ) { $source = $this->resolveToFSPath( $params['src'] ); if ( $source === null ) { @@ -591,7 +527,7 @@ class FSFileBackend extends FileBackendStore { if ( $stat ) { return array( 'mtime' => wfTimestamp( TS_MW, $stat['mtime'] ), - 'size' => $stat['size'] + 'size' => $stat['size'] ); } elseif ( !$hadError ) { return false; // file does not exist @@ -607,12 +543,8 @@ class FSFileBackend extends FileBackendStore { clearstatcache(); // clear the PHP file stat cache } - /** - * @see FileBackendStore::doDirectoryExists() - * @return bool|null - */ protected function doDirectoryExists( $fullCont, $dirRel, array $params ) { - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $params['dir'] ); $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; @@ -628,7 +560,7 @@ class FSFileBackend extends FileBackendStore { * @return Array|null */ public function getDirectoryListInternal( $fullCont, $dirRel, array $params ) { - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $params['dir'] ); $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; $exists = is_dir( $dir ); @@ -644,10 +576,10 @@ class FSFileBackend extends FileBackendStore { /** * @see FileBackendStore::getFileListInternal() - * @return array|FSFileBackendFileList|null + * @return Array|FSFileBackendFileList|null */ public function getFileListInternal( $fullCont, $dirRel, array $params ) { - list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = FileBackend::splitStoragePath( $params['dir'] ); $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; $exists = is_dir( $dir ); @@ -661,59 +593,57 @@ class FSFileBackend extends FileBackendStore { return new FSFileBackendFileList( $dir, $params ); } - /** - * @see FileBackendStore::getLocalReference() - * @return FSFile|null - */ - public function getLocalReference( array $params ) { - $source = $this->resolveToFSPath( $params['src'] ); - if ( $source === null ) { - return null; - } - return new FSFile( $source ); - } + protected function doGetLocalReferenceMulti( array $params ) { + $fsFiles = array(); // (path => FSFile) - /** - * @see FileBackendStore::getLocalCopy() - * @return null|TempFSFile - */ - public function getLocalCopy( array $params ) { - $source = $this->resolveToFSPath( $params['src'] ); - if ( $source === null ) { - return null; + foreach ( $params['srcs'] as $src ) { + $source = $this->resolveToFSPath( $src ); + if ( $source === null || !is_file( $source ) ) { + $fsFiles[$src] = null; // invalid path or file does not exist + } else { + $fsFiles[$src] = new FSFile( $source ); + } } - // Create a new temporary file with the same extension... - $ext = FileBackend::extensionFromPath( $params['src'] ); - $tmpFile = TempFSFile::factory( 'localcopy_', $ext ); - if ( !$tmpFile ) { - return null; - } - $tmpPath = $tmpFile->getPath(); + return $fsFiles; + } - // Copy the source file over the temp file - $ok = copy( $source, $tmpPath ); - if ( !$ok ) { - return null; - } + protected function doGetLocalCopyMulti( array $params ) { + $tmpFiles = array(); // (path => TempFSFile) - $this->chmod( $tmpPath ); + foreach ( $params['srcs'] as $src ) { + $source = $this->resolveToFSPath( $src ); + if ( $source === null ) { + $tmpFiles[$src] = null; // invalid path + } else { + // Create a new temporary file with the same extension... + $ext = FileBackend::extensionFromPath( $src ); + $tmpFile = TempFSFile::factory( 'localcopy_', $ext ); + if ( !$tmpFile ) { + $tmpFiles[$src] = null; + } else { + $tmpPath = $tmpFile->getPath(); + // Copy the source file over the temp file + $this->trapWarnings(); + $ok = copy( $source, $tmpPath ); + $this->untrapWarnings(); + if ( !$ok ) { + $tmpFiles[$src] = null; + } else { + $this->chmod( $tmpPath ); + $tmpFiles[$src] = $tmpFile; + } + } + } + } - return $tmpFile; + return $tmpFiles; } - /** - * @see FileBackendStore::directoriesAreVirtual() - * @return bool - */ protected function directoriesAreVirtual() { return false; } - /** - * @see FileBackendStore::doExecuteOpHandlesInternal() - * @return Array List of corresponding Status objects - */ protected function doExecuteOpHandlesInternal( array $fileOpHandles ) { $statuses = array(); @@ -747,13 +677,13 @@ class FSFileBackend extends FileBackendStore { /** * Chmod a file, suppressing the warnings * - * @param $path string Absolute file system path + * @param string $path Absolute file system path * @return bool Success */ protected function chmod( $path ) { - wfSuppressWarnings(); + $this->trapWarnings(); $ok = chmod( $path, $this->fileMode ); - wfRestoreWarnings(); + $this->untrapWarnings(); return $ok; } @@ -779,7 +709,7 @@ class FSFileBackend extends FileBackendStore { /** * Clean up directory separators for the given OS * - * @param $path string FS path + * @param string $path FS path * @return string */ protected function cleanPathSlashes( $path ) { @@ -789,12 +719,11 @@ class FSFileBackend extends FileBackendStore { /** * Listen for E_WARNING errors and track whether any happen * - * @return bool + * @return void */ protected function trapWarnings() { $this->hadWarningErrors[] = false; // push to stack set_error_handler( array( $this, 'handleWarning' ), E_WARNING ); - return false; // invoke normal PHP error handler } /** @@ -808,9 +737,13 @@ class FSFileBackend extends FileBackendStore { } /** + * @param integer $errno + * @param string $errstr * @return bool + * @access private */ - private function handleWarning() { + public function handleWarning( $errno, $errstr ) { + wfDebugLog( 'FSFileBackend', $errstr ); // more detailed error logging $this->hadWarningErrors[count( $this->hadWarningErrors ) - 1] = true; return true; // suppress from PHP handler } @@ -824,13 +757,15 @@ class FSFileOpHandle extends FileBackendStoreOpHandle { public $chmodPath; // string; file to chmod /** - * @param $backend - * @param $params array - * @param $call - * @param $cmd - * @param $chmodPath null + * @param FSFileBackend $backend + * @param array $params + * @param string $call + * @param string $cmd + * @param integer|null $chmodPath */ - public function __construct( $backend, array $params, $call, $cmd, $chmodPath = null ) { + public function __construct( + FSFileBackend $backend, array $params, $call, $cmd, $chmodPath = null + ) { $this->backend = $backend; $this->params = $params; $this->call = $call; @@ -855,16 +790,19 @@ abstract class FSFileBackendList implements Iterator { protected $params = array(); /** - * @param $dir string file system directory - * @param $params array + * @param string $dir file system directory + * @param array $params */ public function __construct( $dir, array $params ) { - $dir = realpath( $dir ); // normalize - $this->suffixStart = strlen( $dir ) + 1; // size of "path/to/dir/" + $path = realpath( $dir ); // normalize + if ( $path === false ) { + $path = $dir; + } + $this->suffixStart = strlen( $path ) + 1; // size of "path/to/dir/" $this->params = $params; try { - $this->iter = $this->initIterator( $dir ); + $this->iter = $this->initIterator( $path ); } catch ( UnexpectedValueException $e ) { $this->iter = null; // bad permissions? deleted? } @@ -873,7 +811,7 @@ abstract class FSFileBackendList implements Iterator { /** * Return an appropriate iterator object to wrap * - * @param $dir string file system directory + * @param string $dir file system directory * @return Iterator */ protected function initIterator( $dir ) { @@ -916,8 +854,8 @@ abstract class FSFileBackendList implements Iterator { try { $this->iter->next(); $this->filterViaNext(); - } catch ( UnexpectedValueException $e ) { - $this->iter = null; + } catch ( UnexpectedValueException $e ) { // bad permissions? deleted? + throw new FileBackendError( "File iterator gave UnexpectedValueException." ); } ++$this->pos; } @@ -931,8 +869,8 @@ abstract class FSFileBackendList implements Iterator { try { $this->iter->rewind(); $this->filterViaNext(); - } catch ( UnexpectedValueException $e ) { - $this->iter = null; + } catch ( UnexpectedValueException $e ) { // bad permissions? deleted? + throw new FileBackendError( "File iterator gave UnexpectedValueException." ); } } @@ -953,11 +891,15 @@ abstract class FSFileBackendList implements Iterator { * Return only the relative path and normalize slashes to FileBackend-style. * Uses the "real path" since the suffix is based upon that. * - * @param $path string + * @param string $path * @return string */ - protected function getRelPath( $path ) { - return strtr( substr( realpath( $path ), $this->suffixStart ), '\\', '/' ); + protected function getRelPath( $dir ) { + $path = realpath( $dir ); + if ( $path === false ) { + $path = $dir; + } + return strtr( substr( $path, $this->suffixStart ), '\\', '/' ); } } diff --git a/includes/filebackend/FileBackend.php b/includes/filebackend/FileBackend.php index 76c761b0..f586578b 100644 --- a/includes/filebackend/FileBackend.php +++ b/includes/filebackend/FileBackend.php @@ -1,7 +1,6 @@ <?php /** * @defgroup FileBackend File backend - * @ingroup FileRepo * * File backend is used to interact with file storage systems, * such as the local file system, NFS, or cloud storage systems. @@ -37,14 +36,18 @@ * Outside callers can assume that all backends will have these functions. * * All "storage paths" are of the format "mwstore://<backend>/<container>/<path>". - * The "<path>" portion is a relative path that uses UNIX file system (FS) - * notation, though any particular backend may not actually be using a local - * filesystem. Therefore, the relative paths are only virtual. + * The "backend" portion is unique name for MediaWiki to refer to a backend, while + * the "container" portion is a top-level directory of the backend. The "path" portion + * is a relative path that uses UNIX file system (FS) notation, though any particular + * backend may not actually be using a local filesystem. Therefore, the relative paths + * are only virtual. * * Backend contents are stored under wiki-specific container names by default. - * For legacy reasons, this has no effect for the FS backend class, and per-wiki - * segregation must be done by setting the container paths appropriately. + * Global (qualified) backends are achieved by configuring the "wiki ID" to a constant. + * For legacy reasons, the FSFileBackend class allows manually setting the paths of + * containers to ones that do not respect the "wiki ID". * + * In key/value stores, the container is the only hierarchy (the rest is emulated). * FS-based backends are somewhat more restrictive due to the existence of real * directory files; a regular file cannot have the same name as a directory. Other * backends with virtual directories may not have this limitation. Callers should @@ -75,9 +78,13 @@ abstract class FileBackend { * $config includes: * - name : The unique name of this backend. * This should consist of alphanumberic, '-', and '_' characters. - * This name should not be changed after use. - * - wikiId : Prefix to container names that is unique to this wiki. + * This name should not be changed after use (e.g. with journaling). + * Note that the name is *not* used in actual container names. + * - wikiId : Prefix to container names that is unique to this backend. + * If not provided, this defaults to the current wiki ID. * It should only consist of alphanumberic, '-', and '_' characters. + * This ID is what avoids collisions if multiple logical backends + * use the same storage system, so this should be set carefully. * - lockManager : Registered name of a file lock manager to use. * - fileJournal : File journal configuration; see FileJournal::factory(). * Journals simply log changes to files stored in the backend. @@ -87,7 +94,7 @@ abstract class FileBackend { * Allowed values are "implicit", "explicit" and "off". * - concurrency : How many file operations can be done in parallel. * - * @param $config Array + * @param array $config * @throws MWException */ public function __construct( array $config ) { @@ -100,7 +107,7 @@ abstract class FileBackend { : wfWikiID(); // e.g. "my_wiki-en_" $this->lockManager = ( $config['lockManager'] instanceof LockManager ) ? $config['lockManager'] - : LockManagerGroup::singleton()->get( $config['lockManager'] ); + : LockManagerGroup::singleton( $this->wikiId )->get( $config['lockManager'] ); $this->fileJournal = isset( $config['fileJournal'] ) ? ( ( $config['fileJournal'] instanceof FileJournal ) ? $config['fileJournal'] @@ -129,7 +136,8 @@ abstract class FileBackend { } /** - * Get the wiki identifier used for this backend (possibly empty) + * Get the wiki identifier used for this backend (possibly empty). + * Note that this might *not* be in the same format as wfWikiID(). * * @return string * @since 1.20 @@ -171,6 +179,7 @@ abstract class FileBackend { * - copy * - move * - delete + * - describe (since 1.21) * - null * * a) Create a new file in storage with the contents of a string @@ -181,7 +190,7 @@ abstract class FileBackend { * 'content' => <string of new file contents>, * 'overwrite' => <boolean>, * 'overwriteSame' => <boolean>, - * 'disposition' => <Content-Disposition header value> + * 'headers' => <HTTP header name/value map> # since 1.21 * ); * @endcode * @@ -193,7 +202,7 @@ abstract class FileBackend { * 'dst' => <storage path>, * 'overwrite' => <boolean>, * 'overwriteSame' => <boolean>, - * 'disposition' => <Content-Disposition header value> + * 'headers' => <HTTP header name/value map> # since 1.21 * ) * @endcode * @@ -205,7 +214,8 @@ abstract class FileBackend { * 'dst' => <storage path>, * 'overwrite' => <boolean>, * 'overwriteSame' => <boolean>, - * 'disposition' => <Content-Disposition header value> + * 'ignoreMissingSource' => <boolean>, # since 1.21 + * 'headers' => <HTTP header name/value map> # since 1.21 * ) * @endcode * @@ -217,7 +227,8 @@ abstract class FileBackend { * 'dst' => <storage path>, * 'overwrite' => <boolean>, * 'overwriteSame' => <boolean>, - * 'disposition' => <Content-Disposition header value> + * 'ignoreMissingSource' => <boolean>, # since 1.21 + * 'headers' => <HTTP header name/value map> # since 1.21 * ) * @endcode * @@ -230,7 +241,16 @@ abstract class FileBackend { * ) * @endcode * - * f) Do nothing (no-op) + * f) Update metadata for a file within storage + * @code + * array( + * 'op' => 'describe', + * 'src' => <storage path>, + * 'headers' => <HTTP header name/value map> + * ) + * @endcode + * + * g) Do nothing (no-op) * @code * array( * 'op' => 'null', @@ -241,31 +261,35 @@ abstract class FileBackend { * - ignoreMissingSource : The operation will simply succeed and do * nothing if the source file does not exist. * - overwrite : Any destination file will be overwritten. - * - overwriteSame : An error will not be given if a file already - * exists at the destination that has the same - * contents as the new contents to be written there. - * - disposition : When supplied, the backend will add a Content-Disposition - * header when GETs/HEADs of the destination file are made. - * Backends that don't support file metadata will ignore this. - * See http://tools.ietf.org/html/rfc6266 (since 1.20). + * - overwriteSame : If a file already exists at the destination with the + * same contents, then do nothing to the destination file + * instead of giving an error. This does not compare headers. + * This option is ignored if 'overwrite' is already provided. + * - headers : If supplied, the result of merging these headers with any + * existing source file headers (replacing conflicting ones) + * will be set as the destination file headers. Headers are + * deleted if their value is set to the empty string. When a + * file has headers they are included in responses to GET and + * HEAD requests to the backing store for that file. + * Header values should be no larger than 255 bytes, except for + * Content-Disposition. The system might ignore or truncate any + * headers that are too long to store (exact limits will vary). + * Backends that don't support metadata ignore this. (since 1.21) * * $opts is an associative of boolean flags, including: * - force : Operation precondition errors no longer trigger an abort. * Any remaining operations are still attempted. Unexpected - * failures may still cause remaning operations to be aborted. + * failures may still cause remaining operations to be aborted. * - nonLocking : No locks are acquired for the operations. * This can increase performance for non-critical writes. * This has no effect unless the 'force' flag is set. - * - allowStale : Don't require the latest available data. - * This can increase performance for non-critical writes. - * This has no effect unless the 'force' flag is set. * - nonJournaled : Don't log this operation batch in the file journal. * This limits the ability of recovery scripts. * - parallelize : Try to do operations in parallel when possible. - * - bypassReadOnly : Allow writes in read-only mode (since 1.20). + * - bypassReadOnly : Allow writes in read-only mode. (since 1.20) * - preserveCache : Don't clear the process cache before checking files. * This should only be used if all entries in the process - * cache were added after the files were already locked (since 1.20). + * cache were added after the files were already locked. (since 1.20) * * @remarks Remarks on locking: * File system paths given to operations should refer to files that are @@ -282,28 +306,26 @@ abstract class FileBackend { * - a) unexpected operation errors occurred (network partitions, disk full...) * - b) significant operation errors occurred and 'force' was not set * - * @param $ops Array List of operations to execute in order - * @param $opts Array Batch operation options + * @param array $ops List of operations to execute in order + * @param array $opts Batch operation options * @return Status */ final public function doOperations( array $ops, array $opts = array() ) { if ( empty( $opts['bypassReadOnly'] ) && $this->isReadOnly() ) { return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); } + if ( !count( $ops ) ) { + return Status::newGood(); // nothing to do + } if ( empty( $opts['force'] ) ) { // sanity unset( $opts['nonLocking'] ); - unset( $opts['allowStale'] ); } - $opts['concurrency'] = 1; // off - if ( $this->parallelize === 'implicit' ) { - if ( !isset( $opts['parallelize'] ) || $opts['parallelize'] ) { - $opts['concurrency'] = $this->concurrency; - } - } elseif ( $this->parallelize === 'explicit' ) { - if ( !empty( $opts['parallelize'] ) ) { - $opts['concurrency'] = $this->concurrency; + foreach ( $ops as &$op ) { + if ( isset( $op['disposition'] ) ) { // b/c (MW 1.20) + $op['headers']['Content-Disposition'] = $op['disposition']; } } + $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore client aborts return $this->doOperationsInternal( $ops, $opts ); } @@ -319,8 +341,8 @@ abstract class FileBackend { * * @see FileBackend::doOperations() * - * @param $op Array Operation - * @param $opts Array Operation options + * @param array $op Operation + * @param array $opts Operation options * @return Status */ final public function doOperation( array $op, array $opts = array() ) { @@ -333,8 +355,8 @@ abstract class FileBackend { * * @see FileBackend::doOperation() * - * @param $params Array Operation parameters - * @param $opts Array Operation options + * @param array $params Operation parameters + * @param array $opts Operation options * @return Status */ final public function create( array $params, array $opts = array() ) { @@ -347,8 +369,8 @@ abstract class FileBackend { * * @see FileBackend::doOperation() * - * @param $params Array Operation parameters - * @param $opts Array Operation options + * @param array $params Operation parameters + * @param array $opts Operation options * @return Status */ final public function store( array $params, array $opts = array() ) { @@ -361,8 +383,8 @@ abstract class FileBackend { * * @see FileBackend::doOperation() * - * @param $params Array Operation parameters - * @param $opts Array Operation options + * @param array $params Operation parameters + * @param array $opts Operation options * @return Status */ final public function copy( array $params, array $opts = array() ) { @@ -375,8 +397,8 @@ abstract class FileBackend { * * @see FileBackend::doOperation() * - * @param $params Array Operation parameters - * @param $opts Array Operation options + * @param array $params Operation parameters + * @param array $opts Operation options * @return Status */ final public function move( array $params, array $opts = array() ) { @@ -389,8 +411,8 @@ abstract class FileBackend { * * @see FileBackend::doOperation() * - * @param $params Array Operation parameters - * @param $opts Array Operation options + * @param array $params Operation parameters + * @param array $opts Operation options * @return Status */ final public function delete( array $params, array $opts = array() ) { @@ -398,6 +420,21 @@ abstract class FileBackend { } /** + * Performs a single describe operation. + * This sets $params['op'] to 'describe' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param array $params Operation parameters + * @param array $opts Operation options + * @return Status + * @since 1.21 + */ + final public function describe( array $params, array $opts = array() ) { + return $this->doOperation( array( 'op' => 'describe' ) + $params, $opts ); + } + + /** * Perform a set of independent file operations on some files. * * This does no locking, nor journaling, and possibly no stat calls. @@ -410,6 +447,7 @@ abstract class FileBackend { * - copy * - move * - delete + * - describe (since 1.21) * - null * * a) Create a new file in storage with the contents of a string @@ -418,36 +456,42 @@ abstract class FileBackend { * 'op' => 'create', * 'dst' => <storage path>, * 'content' => <string of new file contents>, - * 'disposition' => <Content-Disposition header value> + * 'headers' => <HTTP header name/value map> # since 1.21 * ) * @endcode + * * b) Copy a file system file into storage * @code * array( * 'op' => 'store', * 'src' => <file system path>, * 'dst' => <storage path>, - * 'disposition' => <Content-Disposition header value> + * 'headers' => <HTTP header name/value map> # since 1.21 * ) * @endcode + * * c) Copy a file within storage * @code * array( * 'op' => 'copy', * 'src' => <storage path>, * 'dst' => <storage path>, - * 'disposition' => <Content-Disposition header value> + * 'ignoreMissingSource' => <boolean>, # since 1.21 + * 'headers' => <HTTP header name/value map> # since 1.21 * ) * @endcode + * * d) Move a file within storage * @code * array( * 'op' => 'move', * 'src' => <storage path>, * 'dst' => <storage path>, - * 'disposition' => <Content-Disposition header value> + * 'ignoreMissingSource' => <boolean>, # since 1.21 + * 'headers' => <HTTP header name/value map> # since 1.21 * ) * @endcode + * * e) Delete a file within storage * @code * array( @@ -456,7 +500,17 @@ abstract class FileBackend { * 'ignoreMissingSource' => <boolean> * ) * @endcode - * f) Do nothing (no-op) + * + * f) Update metadata for a file within storage + * @code + * array( + * 'op' => 'describe', + * 'src' => <storage path>, + * 'headers' => <HTTP header name/value map> + * ) + * @endcode + * + * g) Do nothing (no-op) * @code * array( * 'op' => 'null', @@ -466,10 +520,15 @@ abstract class FileBackend { * @par Boolean flags for operations (operation-specific): * - ignoreMissingSource : The operation will simply succeed and do * nothing if the source file does not exist. - * - disposition : When supplied, the backend will add a Content-Disposition - * header when GETs/HEADs of the destination file are made. - * Backends that don't support file metadata will ignore this. - * See http://tools.ietf.org/html/rfc6266 (since 1.20). + * - headers : If supplied with a header name/value map, the backend will + * reply with these headers when GETs/HEADs of the destination + * file are made. Header values should be smaller than 256 bytes. + * Content-Disposition headers can be longer, though the system + * might ignore or truncate ones that are too long to store. + * Existing headers will remain, but these will replace any + * conflicting previous headers, and headers will be removed + * if they are set to an empty string. + * Backends that don't support metadata ignore this. (since 1.21) * * $opts is an associative of boolean flags, including: * - bypassReadOnly : Allow writes in read-only mode (since 1.20) @@ -480,8 +539,8 @@ abstract class FileBackend { * will reflect each operation attempted for the given files. The status will be * considered "OK" as long as no fatal errors occurred. * - * @param $ops Array Set of operations to execute - * @param $opts Array Batch operation options + * @param array $ops Set of operations to execute + * @param array $opts Batch operation options * @return Status * @since 1.20 */ @@ -489,9 +548,16 @@ abstract class FileBackend { if ( empty( $opts['bypassReadOnly'] ) && $this->isReadOnly() ) { return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); } + if ( !count( $ops ) ) { + return Status::newGood(); // nothing to do + } foreach ( $ops as &$op ) { $op['overwrite'] = true; // avoids RTTs in key/value stores + if ( isset( $op['disposition'] ) ) { // b/c (MW 1.20) + $op['headers']['Content-Disposition'] = $op['disposition']; + } } + $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore client aborts return $this->doQuickOperationsInternal( $ops ); } @@ -507,7 +573,7 @@ abstract class FileBackend { * * @see FileBackend::doQuickOperations() * - * @param $op Array Operation + * @param array $op Operation * @return Status * @since 1.20 */ @@ -521,7 +587,7 @@ abstract class FileBackend { * * @see FileBackend::doQuickOperation() * - * @param $params Array Operation parameters + * @param array $params Operation parameters * @return Status * @since 1.20 */ @@ -535,7 +601,7 @@ abstract class FileBackend { * * @see FileBackend::doQuickOperation() * - * @param $params Array Operation parameters + * @param array $params Operation parameters * @return Status * @since 1.20 */ @@ -549,7 +615,7 @@ abstract class FileBackend { * * @see FileBackend::doQuickOperation() * - * @param $params Array Operation parameters + * @param array $params Operation parameters * @return Status * @since 1.20 */ @@ -563,7 +629,7 @@ abstract class FileBackend { * * @see FileBackend::doQuickOperation() * - * @param $params Array Operation parameters + * @param array $params Operation parameters * @return Status * @since 1.20 */ @@ -577,7 +643,7 @@ abstract class FileBackend { * * @see FileBackend::doQuickOperation() * - * @param $params Array Operation parameters + * @param array $params Operation parameters * @return Status * @since 1.20 */ @@ -586,15 +652,30 @@ abstract class FileBackend { } /** + * Performs a single quick describe operation. + * This sets $params['op'] to 'describe' and passes it to doQuickOperation(). + * + * @see FileBackend::doQuickOperation() + * + * @param array $params Operation parameters + * @return Status + * @since 1.21 + */ + final public function quickDescribe( array $params ) { + return $this->doQuickOperation( array( 'op' => 'describe' ) + $params ); + } + + /** * Concatenate a list of storage files into a single file system file. * The target path should refer to a file that is already locked or * otherwise safe from modification from other processes. Normally, * the file will be a new temp file, which should be adequate. * - * @param $params Array Operation parameters + * @param array $params Operation parameters * $params include: - * - srcs : ordered source storage paths (e.g. chunk1, chunk2, ...) - * - dst : file system path to 0-byte temp file + * - srcs : ordered source storage paths (e.g. chunk1, chunk2, ...) + * - dst : file system path to 0-byte temp file + * - parallelize : try to do operations in parallel when possible * @return Status */ abstract public function concatenate( array $params ); @@ -607,8 +688,10 @@ abstract class FileBackend { * The 'noAccess' and 'noListing' parameters works the same as in secure(), * except they are only applied *if* the directory/container had to be created. * These flags should always be set for directories that have private files. + * However, setting them is not guaranteed to actually do anything. + * Additional server configuration may be needed to achieve the desired effect. * - * @param $params Array + * @param array $params * $params include: * - dir : storage directory * - noAccess : try to deny file access (since 1.20) @@ -620,6 +703,7 @@ abstract class FileBackend { if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); } + $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore client aborts return $this->doPrepare( $params ); } @@ -633,9 +717,11 @@ abstract class FileBackend { * the container it belongs to. FS backends might add .htaccess * files whereas key/value store backends might revoke container * access to the storage user representing end-users in web requests. - * This is not guaranteed to actually do anything. * - * @param $params Array + * This is not guaranteed to actually make files or listings publically hidden. + * Additional server configuration may be needed to achieve the desired effect. + * + * @param array $params * $params include: * - dir : storage directory * - noAccess : try to deny file access @@ -647,6 +733,7 @@ abstract class FileBackend { if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); } + $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore client aborts return $this->doSecure( $params ); } @@ -662,7 +749,10 @@ abstract class FileBackend { * access to the storage user representing end-users in web requests. * This essentially can undo the result of secure() calls. * - * @param $params Array + * This is not guaranteed to actually make files or listings publically viewable. + * Additional server configuration may be needed to achieve the desired effect. + * + * @param array $params * $params include: * - dir : storage directory * - access : try to allow file access @@ -675,6 +765,7 @@ abstract class FileBackend { if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); } + $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore client aborts return $this->doPublish( $params ); } @@ -688,7 +779,7 @@ abstract class FileBackend { * Backends using key/value stores may do nothing unless the directory * is that of an empty container, in which case it will be deleted. * - * @param $params Array + * @param array $params * $params include: * - dir : storage directory * - recursive : recursively delete empty subdirectories first (since 1.20) @@ -699,6 +790,7 @@ abstract class FileBackend { if ( empty( $params['bypassReadOnly'] ) && $this->isReadOnly() ) { return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); } + $scope = $this->getScopedPHPBehaviorForOps(); // try to ignore client aborts return $this->doClean( $params ); } @@ -708,10 +800,27 @@ abstract class FileBackend { abstract protected function doClean( array $params ); /** + * Enter file operation scope. + * This just makes PHP ignore user aborts/disconnects until the return + * value leaves scope. This returns null and does nothing in CLI mode. + * + * @return ScopedCallback|null + */ + final protected function getScopedPHPBehaviorForOps() { + if ( php_sapi_name() != 'cli' ) { // http://bugs.php.net/bug.php?id=47540 + $old = ignore_user_abort( true ); // avoid half-finished operations + return new ScopedCallback( function() use ( $old ) { + ignore_user_abort( $old ); + } ); + } + return null; + } + + /** * Check if a file exists at a storage path in the backend. * This returns false if only a directory exists at the path. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data @@ -722,7 +831,7 @@ abstract class FileBackend { /** * Get the last-modified timestamp of the file at a storage path. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data @@ -734,18 +843,40 @@ abstract class FileBackend { * Get the contents of a file at a storage path in the backend. * This should be avoided for potentially large files. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data * @return string|bool Returns false on failure */ - abstract public function getFileContents( array $params ); + final public function getFileContents( array $params ) { + $contents = $this->getFileContentsMulti( + array( 'srcs' => array( $params['src'] ) ) + $params ); + + return $contents[$params['src']]; + } + + /** + * Like getFileContents() except it takes an array of storage paths + * and returns a map of storage paths to strings (or null on failure). + * The map keys (paths) are in the same order as the provided list of paths. + * + * @see FileBackend::getFileContents() + * + * @param array $params + * $params include: + * - srcs : list of source storage paths + * - latest : use the latest available data + * - parallelize : try to do operations in parallel when possible + * @return Array Map of (path name => string or false on failure) + * @since 1.20 + */ + abstract public function getFileContentsMulti( array $params ); /** * Get the size (bytes) of a file at a storage path in the backend. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data @@ -761,7 +892,7 @@ abstract class FileBackend { * - size : the file size (bytes) * Additional values may be included for internal use only. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data @@ -772,7 +903,7 @@ abstract class FileBackend { /** * Get a SHA-1 hash of the file at a storage path in the backend. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data @@ -782,13 +913,13 @@ abstract class FileBackend { /** * Get the properties of the file at a storage path in the backend. - * Returns FSFile::placeholderProps() on failure. + * This gives the result of FSFile::getProps() on a local copy of the file. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data - * @return Array + * @return Array Returns FSFile::placeholderProps() on failure */ abstract public function getFileProps( array $params ); @@ -799,7 +930,7 @@ abstract class FileBackend { * will be sent if streaming began, while none will be sent otherwise. * Implementations should flush the output buffer before sending data. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - headers : list of additional HTTP headers to send on success @@ -821,26 +952,89 @@ abstract class FileBackend { * In that later case, there are copies of the file that must stay in sync. * Additionally, further calls to this function may return the same file. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data * @return FSFile|null Returns null on failure */ - abstract public function getLocalReference( array $params ); + final public function getLocalReference( array $params ) { + $fsFiles = $this->getLocalReferenceMulti( + array( 'srcs' => array( $params['src'] ) ) + $params ); + + return $fsFiles[$params['src']]; + } + + /** + * Like getLocalReference() except it takes an array of storage paths + * and returns a map of storage paths to FSFile objects (or null on failure). + * The map keys (paths) are in the same order as the provided list of paths. + * + * @see FileBackend::getLocalReference() + * + * @param array $params + * $params include: + * - srcs : list of source storage paths + * - latest : use the latest available data + * - parallelize : try to do operations in parallel when possible + * @return Array Map of (path name => FSFile or null on failure) + * @since 1.20 + */ + abstract public function getLocalReferenceMulti( array $params ); /** * Get a local copy on disk of the file at a storage path in the backend. * The temporary copy will have the same file extension as the source. * Temporary files may be purged when the file object falls out of scope. * - * @param $params Array + * @param array $params * $params include: * - src : source storage path * - latest : use the latest available data * @return TempFSFile|null Returns null on failure */ - abstract public function getLocalCopy( array $params ); + final public function getLocalCopy( array $params ) { + $tmpFiles = $this->getLocalCopyMulti( + array( 'srcs' => array( $params['src'] ) ) + $params ); + + return $tmpFiles[$params['src']]; + } + + /** + * Like getLocalCopy() except it takes an array of storage paths and + * returns a map of storage paths to TempFSFile objects (or null on failure). + * The map keys (paths) are in the same order as the provided list of paths. + * + * @see FileBackend::getLocalCopy() + * + * @param array $params + * $params include: + * - srcs : list of source storage paths + * - latest : use the latest available data + * - parallelize : try to do operations in parallel when possible + * @return Array Map of (path name => TempFSFile or null on failure) + * @since 1.20 + */ + abstract public function getLocalCopyMulti( array $params ); + + /** + * Return an HTTP URL to a given file that requires no authentication to use. + * The URL may be pre-authenticated (via some token in the URL) and temporary. + * This will return null if the backend cannot make an HTTP URL for the file. + * + * This is useful for key/value stores when using scripts that seek around + * large files and those scripts (and the backend) support HTTP Range headers. + * Otherwise, one would need to use getLocalReference(), which involves loading + * the entire file on to local disk. + * + * @param array $params + * $params include: + * - src : source storage path + * - ttl : lifetime (seconds) if pre-authenticated; default is 1 day + * @return string|null + * @since 1.21 + */ + abstract public function getFileHttpUrl( array $params ); /** * Check if a directory exists at a given storage path. @@ -849,7 +1043,7 @@ abstract class FileBackend { * * Storage backends with eventual consistency might return stale data. * - * @param $params array + * @param array $params * $params include: * - dir : storage directory * @return bool|null Returns null on failure @@ -867,7 +1061,9 @@ abstract class FileBackend { * * Storage backends with eventual consistency might return stale data. * - * @param $params array + * Failures during iteration can result in FileBackendError exceptions (since 1.22). + * + * @param array $params * $params include: * - dir : storage directory * - topOnly : only return direct child dirs of the directory @@ -882,7 +1078,9 @@ abstract class FileBackend { * * Storage backends with eventual consistency might return stale data. * - * @param $params array + * Failures during iteration can result in FileBackendError exceptions (since 1.22). + * + * @param array $params * $params include: * - dir : storage directory * @return Traversable|Array|null Returns null on failure @@ -902,10 +1100,13 @@ abstract class FileBackend { * * Storage backends with eventual consistency might return stale data. * - * @param $params array + * Failures during iteration can result in FileBackendError exceptions (since 1.22). + * + * @param array $params * $params include: - * - dir : storage directory - * - topOnly : only return direct child files of the directory (since 1.20) + * - dir : storage directory + * - topOnly : only return direct child files of the directory (since 1.20) + * - adviseStat : set to true if stat requests will be made on the files (since 1.22) * @return Traversable|Array|null Returns null on failure */ abstract public function getFileList( array $params ); @@ -916,9 +1117,12 @@ abstract class FileBackend { * * Storage backends with eventual consistency might return stale data. * - * @param $params array + * Failures during iteration can result in FileBackendError exceptions (since 1.22). + * + * @param array $params * $params include: - * - dir : storage directory + * - dir : storage directory + * - adviseStat : set to true if stat requests will be made on the files (since 1.22) * @return Traversable|Array|null Returns null on failure * @since 1.20 */ @@ -930,7 +1134,7 @@ abstract class FileBackend { * Preload persistent file stat and property cache into in-process cache. * This should be used when stat calls will be made on a known list of a many files. * - * @param $paths Array Storage paths + * @param array $paths Storage paths * @return void */ public function preloadCache( array $paths ) {} @@ -939,7 +1143,7 @@ abstract class FileBackend { * Invalidate any in-process file stat and property cache. * If $paths is given, then only the cache for those files will be cleared. * - * @param $paths Array Storage paths (optional) + * @param array $paths Storage paths (optional) * @return void */ public function clearCache( array $paths = null ) {} @@ -950,22 +1154,24 @@ abstract class FileBackend { * * Callers should consider using getScopedFileLocks() instead. * - * @param $paths Array Storage paths - * @param $type integer LockManager::LOCK_* constant + * @param array $paths Storage paths + * @param integer $type LockManager::LOCK_* constant * @return Status */ final public function lockFiles( array $paths, $type ) { + $paths = array_map( 'FileBackend::normalizeStoragePath', $paths ); return $this->lockManager->lock( $paths, $type ); } /** * Unlock the files at the given storage paths in the backend. * - * @param $paths Array Storage paths - * @param $type integer LockManager::LOCK_* constant + * @param array $paths Storage paths + * @param integer $type LockManager::LOCK_* constant * @return Status */ final public function unlockFiles( array $paths, $type ) { + $paths = array_map( 'FileBackend::normalizeStoragePath', $paths ); return $this->lockManager->unlock( $paths, $type ); } @@ -977,12 +1183,21 @@ abstract class FileBackend { * Once the return value goes out scope, the locks will be released and * the status updated. Unlock fatals will not change the status "OK" value. * - * @param $paths Array Storage paths - * @param $type integer LockManager::LOCK_* constant - * @param $status Status Status to update on lock/unlock + * @see ScopedLock::factory() + * + * @param array $paths List of storage paths or map of lock types to path lists + * @param integer|string $type LockManager::LOCK_* constant or "mixed" + * @param Status $status Status to update on lock/unlock * @return ScopedLock|null Returns null on failure */ final public function getScopedFileLocks( array $paths, $type, Status $status ) { + if ( $type === 'mixed' ) { + foreach ( $paths as &$typePaths ) { + $typePaths = array_map( 'FileBackend::normalizeStoragePath', $typePaths ); + } + } else { + $paths = array_map( 'FileBackend::normalizeStoragePath', $paths ); + } return ScopedLock::factory( $this->lockManager, $paths, $type, $status ); } @@ -997,8 +1212,8 @@ abstract class FileBackend { * * @see FileBackend::doOperations() * - * @param $ops Array List of file operations to FileBackend::doOperations() - * @param $status Status Status to update on lock/unlock + * @param array $ops List of file operations to FileBackend::doOperations() + * @param Status $status Status to update on lock/unlock * @return Array List of ScopedFileLocks or null values * @since 1.20 */ @@ -1016,6 +1231,17 @@ abstract class FileBackend { } /** + * Get the storage path for the given container for this backend + * + * @param string $container Container name + * @return string Storage path + * @since 1.21 + */ + final public function getContainerStoragePath( $container ) { + return $this->getRootStoragePath() . "/{$container}"; + } + + /** * Get the file journal object for this backend * * @return FileJournal @@ -1028,7 +1254,7 @@ abstract class FileBackend { * Check if a given path is a "mwstore://" path. * This does not do any further validation or any existence checks. * - * @param $path string + * @param string $path * @return bool */ final public static function isStoragePath( $path ) { @@ -1040,7 +1266,7 @@ abstract class FileBackend { * and a relative file path. The relative path may be the empty string. * This does not do any path normalization or traversal checks. * - * @param $storagePath string + * @param string $storagePath * @return Array (backend, container, rel object) or (null, null, null) */ final public static function splitStoragePath( $storagePath ) { @@ -1062,7 +1288,7 @@ abstract class FileBackend { * Normalize a storage path by cleaning up directory separators. * Returns null if the path is not of the format of a valid storage path. * - * @param $storagePath string + * @param string $storagePath * @return string|null */ final public static function normalizeStoragePath( $storagePath ) { @@ -1083,19 +1309,19 @@ abstract class FileBackend { * This returns a path like "mwstore://backend/container", * "mwstore://backend/container/...", or null if there is no parent. * - * @param $storagePath string + * @param string $storagePath * @return string|null */ final public static function parentStoragePath( $storagePath ) { $storagePath = dirname( $storagePath ); - list( $b, $cont, $rel ) = self::splitStoragePath( $storagePath ); + list( , , $rel ) = self::splitStoragePath( $storagePath ); return ( $rel === null ) ? null : $storagePath; } /** * Get the final extension from a storage or FS path * - * @param $path string + * @param string $path * @return string */ final public static function extensionFromPath( $path ) { @@ -1106,7 +1332,7 @@ abstract class FileBackend { /** * Check if a relative path has no directory traversals * - * @param $path string + * @param string $path * @return bool * @since 1.20 */ @@ -1117,8 +1343,9 @@ abstract class FileBackend { /** * Build a Content-Disposition header value per RFC 6266. * - * @param $type string One of (attachment, inline) - * @param $filename string Suggested file name (should not contain slashes) + * @param string $type One of (attachment, inline) + * @param string $filename Suggested file name (should not contain slashes) + * @throws MWException * @return string * @since 1.20 */ @@ -1145,7 +1372,7 @@ abstract class FileBackend { * * This uses the same traversal protection as Title::secureAndSplit(). * - * @param $path string Storage path relative to a container + * @param string $path Storage path relative to a container * @return string|null */ final protected static function normalizeContainerPath( $path ) { @@ -1171,3 +1398,9 @@ abstract class FileBackend { return $path; } } + +/** + * @ingroup FileBackend + * @since 1.22 + */ +class FileBackendError extends MWException {} diff --git a/includes/filebackend/FileBackendGroup.php b/includes/filebackend/FileBackendGroup.php index 8bbc96d0..be8a2076 100644 --- a/includes/filebackend/FileBackendGroup.php +++ b/includes/filebackend/FileBackendGroup.php @@ -87,21 +87,25 @@ class FileBackendGroup { $thumbDir = isset( $info['thumbDir'] ) ? $info['thumbDir'] : "{$directory}/thumb"; + $transcodedDir = isset( $info['transcodedDir'] ) + ? $info['transcodedDir'] + : "{$directory}/transcoded"; $fileMode = isset( $info['fileMode'] ) ? $info['fileMode'] : 0644; // Get the FS backend configuration $autoBackends[] = array( - 'name' => $backendName, - 'class' => 'FSFileBackend', - 'lockManager' => 'fsLockManager', + 'name' => $backendName, + 'class' => 'FSFileBackend', + 'lockManager' => 'fsLockManager', 'containerPaths' => array( - "{$repoName}-public" => "{$directory}", - "{$repoName}-thumb" => $thumbDir, + "{$repoName}-public" => "{$directory}", + "{$repoName}-thumb" => $thumbDir, + "{$repoName}-transcoded" => $transcodedDir, "{$repoName}-deleted" => $deletedDir, - "{$repoName}-temp" => "{$directory}/temp" + "{$repoName}-temp" => "{$directory}/temp" ), - 'fileMode' => $fileMode, + 'fileMode' => $fileMode, ); } @@ -112,7 +116,7 @@ class FileBackendGroup { /** * Register an array of file backend configurations * - * @param $configs Array + * @param Array $configs * @return void * @throws MWException */ @@ -122,15 +126,17 @@ class FileBackendGroup { throw new MWException( "Cannot register a backend with no name." ); } $name = $config['name']; - if ( !isset( $config['class'] ) ) { + if ( isset( $this->backends[$name] ) ) { + throw new MWException( "Backend with name `{$name}` already registered." ); + } elseif ( !isset( $config['class'] ) ) { throw new MWException( "Cannot register backend `{$name}` with no class." ); } $class = $config['class']; unset( $config['class'] ); // backend won't need this $this->backends[$name] = array( - 'class' => $class, - 'config' => $config, + 'class' => $class, + 'config' => $config, 'instance' => null ); } @@ -139,7 +145,7 @@ class FileBackendGroup { /** * Get the backend object with a given name * - * @param $name string + * @param string $name * @return FileBackend * @throws MWException */ @@ -159,7 +165,7 @@ class FileBackendGroup { /** * Get the config array for a backend object with a given name * - * @param $name string + * @param string $name * @return Array * @throws MWException */ @@ -174,11 +180,11 @@ class FileBackendGroup { /** * Get an appropriate backend object from a storage path * - * @param $storagePath string + * @param string $storagePath * @return FileBackend|null Backend or null on failure */ public function backendFromPath( $storagePath ) { - list( $backend, $c, $p ) = FileBackend::splitStoragePath( $storagePath ); + list( $backend, , ) = FileBackend::splitStoragePath( $storagePath ); if ( $backend !== null && isset( $this->backends[$backend] ) ) { return $this->get( $backend ); } diff --git a/includes/filebackend/FileBackendMultiWrite.php b/includes/filebackend/FileBackendMultiWrite.php index 4be03231..97584a71 100644 --- a/includes/filebackend/FileBackendMultiWrite.php +++ b/includes/filebackend/FileBackendMultiWrite.php @@ -62,7 +62,7 @@ class FileBackendMultiWrite extends FileBackend { * Additional $config params include: * - backends : Array of backend config and multi-backend settings. * Each value is the config used in the constructor of a - * FileBackendStore class, but with these additional settings: + * FileBackendStore class, but with these additional settings: * - class : The name of the backend class * - isMultiMaster : This must be set for one backend. * - template: : If given a backend name, this will use @@ -75,10 +75,13 @@ class FileBackendMultiWrite extends FileBackend { * - autoResync : Automatically resync the clone backends to the master backend * when pre-operation sync checks fail. This should only be used * if the master backend is stable and not missing any files. + * Use "conservative" to limit resyncing to copying newer master + * backend files over older (or non-existing) clone backend files. + * Cases that cannot be handled will result in operation abortion. * - noPushQuickOps : (hack) Only apply doQuickOperations() to the master backend. * - noPushDirConts : (hack) Only apply directory functions to the master backend. * - * @param $config Array + * @param Array $config * @throws MWException */ public function __construct( array $config ) { @@ -86,7 +89,9 @@ class FileBackendMultiWrite extends FileBackend { $this->syncChecks = isset( $config['syncChecks'] ) ? $config['syncChecks'] : self::CHECK_SIZE; - $this->autoResync = !empty( $config['autoResync'] ); + $this->autoResync = isset( $config['autoResync'] ) + ? $config['autoResync'] + : false; $this->noPushQuickOps = isset( $config['noPushQuickOps'] ) ? $config['noPushQuickOps'] : false; @@ -131,26 +136,15 @@ class FileBackendMultiWrite extends FileBackend { } } - /** - * @see FileBackend::doOperationsInternal() - * @return Status - */ final protected function doOperationsInternal( array $ops, array $opts ) { $status = Status::newGood(); $mbe = $this->backends[$this->masterIndex]; // convenience - // Get the paths to lock from the master backend - $realOps = $this->substOpBatchPaths( $ops, $mbe ); - $paths = $mbe->getPathsToLockForOpsInternal( $mbe->getOperationsInternal( $realOps ) ); - // Get the paths under the proxy backend's name - $paths['sh'] = $this->unsubstPaths( $paths['sh'] ); - $paths['ex'] = $this->unsubstPaths( $paths['ex'] ); // Try to lock those files for the scope of this function... if ( empty( $opts['nonLocking'] ) ) { // Try to lock those files for the scope of this function... - $scopeLockS = $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ); - $scopeLockE = $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ); + $scopeLock = $this->getScopedLocksForOps( $ops, $status ); if ( !$status->isOK() ) { return $status; // abort } @@ -177,12 +171,14 @@ class FileBackendMultiWrite extends FileBackend { } } // Actually attempt the operation batch on the master backend... + $realOps = $this->substOpBatchPaths( $ops, $mbe ); $masterStatus = $mbe->doOperations( $realOps, $opts ); $status->merge( $masterStatus ); - // Propagate the operations to the clone backends if there were no fatal errors. - // If $ops only had one operation, this might avoid backend inconsistencies. - // This also avoids inconsistency for expected errors (like "file already exists"). - if ( !count( $masterStatus->getErrorsArray() ) ) { + // Propagate the operations to the clone backends if there were no unexpected errors + // and if there were either no expected errors or if the 'force' option was used. + // However, if nothing succeeded at all, then don't replicate any of the operations. + // If $ops only had one operation, this might avoid backend sync inconsistencies. + if ( $masterStatus->isOK() && $masterStatus->successCount > 0 ) { foreach ( $this->backends as $index => $backend ) { if ( $index !== $this->masterIndex ) { // not done already $realOps = $this->substOpBatchPaths( $ops, $backend ); @@ -203,7 +199,7 @@ class FileBackendMultiWrite extends FileBackend { /** * Check that a set of files are consistent across all internal backends * - * @param $paths Array List of storage paths + * @param array $paths List of storage paths * @return Status */ public function consistencyCheck( array $paths ) { @@ -269,7 +265,7 @@ class FileBackendMultiWrite extends FileBackend { /** * Check that a set of file paths are usable across all internal backends * - * @param $paths Array List of storage paths + * @param array $paths List of storage paths * @return Status */ public function accessibilityCheck( array $paths ) { @@ -294,7 +290,7 @@ class FileBackendMultiWrite extends FileBackend { * Check that a set of files are consistent across all internal backends * and re-synchronize those files againt the "multi master" if needed. * - * @param $paths Array List of storage paths + * @param array $paths List of storage paths * @return Status */ public function resyncFiles( array $paths ) { @@ -302,12 +298,12 @@ class FileBackendMultiWrite extends FileBackend { $mBackend = $this->backends[$this->masterIndex]; foreach ( $paths as $path ) { - $mPath = $this->substPaths( $path, $mBackend ); - $mSha1 = $mBackend->getFileSha1Base36( array( 'src' => $mPath ) ); - $mExist = $mBackend->fileExists( array( 'src' => $mPath ) ); - // Check if the master backend is available... - if ( $mExist === null ) { + $mPath = $this->substPaths( $path, $mBackend ); + $mSha1 = $mBackend->getFileSha1Base36( array( 'src' => $mPath, 'latest' => true ) ); + $mStat = $mBackend->getFileStat( array( 'src' => $mPath, 'latest' => true ) ); + if ( $mStat === null || ( $mSha1 !== false && !$mStat ) ) { // sanity $status->fatal( 'backend-fail-internal', $this->name ); + continue; // file is not available on the master backend... } // Check of all clone backends agree with the master... foreach ( $this->backends as $index => $cBackend ) { @@ -315,15 +311,31 @@ class FileBackendMultiWrite extends FileBackend { continue; // master } $cPath = $this->substPaths( $path, $cBackend ); - $cSha1 = $cBackend->getFileSha1Base36( array( 'src' => $cPath ) ); + $cSha1 = $cBackend->getFileSha1Base36( array( 'src' => $cPath, 'latest' => true ) ); + $cStat = $cBackend->getFileStat( array( 'src' => $cPath, 'latest' => true ) ); + if ( $cStat === null || ( $cSha1 !== false && !$cStat ) ) { // sanity + $status->fatal( 'backend-fail-internal', $cBackend->getName() ); + continue; // file is not available on the clone backend... + } if ( $mSha1 === $cSha1 ) { // already synced; nothing to do - } elseif ( $mSha1 ) { // file is in master - $fsFile = $mBackend->getLocalReference( array( 'src' => $mPath ) ); + } elseif ( $mSha1 !== false ) { // file is in master + if ( $this->autoResync === 'conservative' + && $cStat && $cStat['mtime'] > $mStat['mtime'] ) + { + $status->fatal( 'backend-fail-synced', $path ); + continue; // don't rollback data + } + $fsFile = $mBackend->getLocalReference( + array( 'src' => $mPath, 'latest' => true ) ); $status->merge( $cBackend->quickStore( array( 'src' => $fsFile->getPath(), 'dst' => $cPath ) ) ); - } elseif ( $mExist === false ) { // file is not in master + } elseif ( $mStat === false ) { // file is not in master + if ( $this->autoResync === 'conservative' ) { + $status->fatal( 'backend-fail-synced', $path ); + continue; // don't delete data + } $status->merge( $cBackend->quickDelete( array( 'src' => $cPath ) ) ); } } @@ -335,14 +347,20 @@ class FileBackendMultiWrite extends FileBackend { /** * Get a list of file storage paths to read or write for a list of operations * - * @param $ops Array Same format as doOperations() + * @param array $ops Same format as doOperations() * @return Array List of storage paths to files (does not include directories) */ protected function fileStoragePathsForOps( array $ops ) { $paths = array(); foreach ( $ops as $op ) { if ( isset( $op['src'] ) ) { - $paths[] = $op['src']; + // For things like copy/move/delete with "ignoreMissingSource" and there + // is no source file, nothing should happen and there should be no errors. + if ( empty( $op['ignoreMissingSource'] ) + || $this->fileExists( array( 'src' => $op['src'] ) ) ) + { + $paths[] = $op['src']; + } } if ( isset( $op['srcs'] ) ) { $paths = array_merge( $paths, $op['srcs'] ); @@ -351,15 +369,15 @@ class FileBackendMultiWrite extends FileBackend { $paths[] = $op['dst']; } } - return array_unique( array_filter( $paths, 'FileBackend::isStoragePath' ) ); + return array_values( array_unique( array_filter( $paths, 'FileBackend::isStoragePath' ) ) ); } /** * Substitute the backend name in storage path parameters * for a set of operations with that of a given internal backend. * - * @param $ops Array List of file operation arrays - * @param $backend FileBackendStore + * @param array $ops List of file operation arrays + * @param FileBackendStore $backend * @return Array */ protected function substOpBatchPaths( array $ops, FileBackendStore $backend ) { @@ -379,8 +397,8 @@ class FileBackendMultiWrite extends FileBackend { /** * Same as substOpBatchPaths() but for a single operation * - * @param $ops array File operation array - * @param $backend FileBackendStore + * @param array $ops File operation array + * @param FileBackendStore $backend * @return Array */ protected function substOpPaths( array $ops, FileBackendStore $backend ) { @@ -391,8 +409,8 @@ class FileBackendMultiWrite extends FileBackend { /** * Substitute the backend of storage paths with an internal backend's name * - * @param $paths Array|string List of paths or single string path - * @param $backend FileBackendStore + * @param array|string $paths List of paths or single string path + * @param FileBackendStore $backend * @return Array|string */ protected function substPaths( $paths, FileBackendStore $backend ) { @@ -406,7 +424,7 @@ class FileBackendMultiWrite extends FileBackend { /** * Substitute the backend of internal storage paths with the proxy backend's name * - * @param $paths Array|string List of paths or single string path + * @param array|string $paths List of paths or single string path * @return Array|string */ protected function unsubstPaths( $paths ) { @@ -417,10 +435,6 @@ class FileBackendMultiWrite extends FileBackend { ); } - /** - * @see FileBackend::doQuickOperationsInternal() - * @return Status - */ protected function doQuickOperationsInternal( array $ops ) { $status = Status::newGood(); // Do the operations on the master backend; setting Status fields... @@ -446,18 +460,14 @@ class FileBackendMultiWrite extends FileBackend { } /** - * @param $path string Storage path + * @param string $path Storage path * @return bool Path container should have dir changes pushed to all backends */ protected function replicateContainerDirChanges( $path ) { - list( $b, $shortCont, $r ) = self::splitStoragePath( $path ); + list( , $shortCont, ) = self::splitStoragePath( $path ); return !in_array( $shortCont, $this->noPushDirConts ); } - /** - * @see FileBackend::doPrepare() - * @return Status - */ protected function doPrepare( array $params ) { $status = Status::newGood(); $replicate = $this->replicateContainerDirChanges( $params['dir'] ); @@ -470,11 +480,6 @@ class FileBackendMultiWrite extends FileBackend { return $status; } - /** - * @see FileBackend::doSecure() - * @param $params array - * @return Status - */ protected function doSecure( array $params ) { $status = Status::newGood(); $replicate = $this->replicateContainerDirChanges( $params['dir'] ); @@ -487,11 +492,6 @@ class FileBackendMultiWrite extends FileBackend { return $status; } - /** - * @see FileBackend::doPublish() - * @param $params array - * @return Status - */ protected function doPublish( array $params ) { $status = Status::newGood(); $replicate = $this->replicateContainerDirChanges( $params['dir'] ); @@ -504,11 +504,6 @@ class FileBackendMultiWrite extends FileBackend { return $status; } - /** - * @see FileBackend::doClean() - * @param $params array - * @return Status - */ protected function doClean( array $params ) { $status = Status::newGood(); $replicate = $this->replicateContainerDirChanges( $params['dir'] ); @@ -521,149 +516,100 @@ class FileBackendMultiWrite extends FileBackend { return $status; } - /** - * @see FileBackend::concatenate() - * @param $params array - * @return Status - */ public function concatenate( array $params ) { // We are writing to an FS file, so we don't need to do this per-backend $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->concatenate( $realParams ); } - /** - * @see FileBackend::fileExists() - * @param $params array - */ public function fileExists( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->fileExists( $realParams ); } - /** - * @see FileBackend::getFileTimestamp() - * @param $params array - * @return bool|string - */ public function getFileTimestamp( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->getFileTimestamp( $realParams ); } - /** - * @see FileBackend::getFileSize() - * @param $params array - * @return bool|int - */ public function getFileSize( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->getFileSize( $realParams ); } - /** - * @see FileBackend::getFileStat() - * @param $params array - * @return Array|bool|null - */ public function getFileStat( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->getFileStat( $realParams ); } - /** - * @see FileBackend::getFileContents() - * @param $params array - * @return bool|string - */ - public function getFileContents( array $params ) { + public function getFileContentsMulti( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); - return $this->backends[$this->masterIndex]->getFileContents( $realParams ); + $contentsM = $this->backends[$this->masterIndex]->getFileContentsMulti( $realParams ); + + $contents = array(); // (path => FSFile) mapping using the proxy backend's name + foreach ( $contentsM as $path => $data ) { + $contents[$this->unsubstPaths( $path )] = $data; + } + return $contents; } - /** - * @see FileBackend::getFileSha1Base36() - * @param $params array - * @return bool|string - */ public function getFileSha1Base36( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->getFileSha1Base36( $realParams ); } - /** - * @see FileBackend::getFileProps() - * @param $params array - * @return Array - */ public function getFileProps( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->getFileProps( $realParams ); } - /** - * @see FileBackend::streamFile() - * @param $params array - * @return \Status - */ public function streamFile( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->streamFile( $realParams ); } - /** - * @see FileBackend::getLocalReference() - * @param $params array - * @return FSFile|null - */ - public function getLocalReference( array $params ) { + public function getLocalReferenceMulti( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + $fsFilesM = $this->backends[$this->masterIndex]->getLocalReferenceMulti( $realParams ); + + $fsFiles = array(); // (path => FSFile) mapping using the proxy backend's name + foreach ( $fsFilesM as $path => $fsFile ) { + $fsFiles[$this->unsubstPaths( $path )] = $fsFile; + } + return $fsFiles; + } + + public function getLocalCopyMulti( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); - return $this->backends[$this->masterIndex]->getLocalReference( $realParams ); + $tempFilesM = $this->backends[$this->masterIndex]->getLocalCopyMulti( $realParams ); + + $tempFiles = array(); // (path => TempFSFile) mapping using the proxy backend's name + foreach ( $tempFilesM as $path => $tempFile ) { + $tempFiles[$this->unsubstPaths( $path )] = $tempFile; + } + return $tempFiles; } - /** - * @see FileBackend::getLocalCopy() - * @param $params array - * @return null|TempFSFile - */ - public function getLocalCopy( array $params ) { + public function getFileHttpUrl( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); - return $this->backends[$this->masterIndex]->getLocalCopy( $realParams ); + return $this->backends[$this->masterIndex]->getFileHttpUrl( $realParams ); } - /** - * @see FileBackend::directoryExists() - * @param $params array - * @return bool|null - */ public function directoryExists( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->directoryExists( $realParams ); } - /** - * @see FileBackend::getSubdirectoryList() - * @param $params array - * @return Array|null|Traversable - */ public function getDirectoryList( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->getDirectoryList( $realParams ); } - /** - * @see FileBackend::getFileList() - * @param $params array - * @return Array|null|\Traversable - */ public function getFileList( array $params ) { $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); return $this->backends[$this->masterIndex]->getFileList( $realParams ); } - /** - * @see FileBackend::clearCache() - */ public function clearCache( array $paths = null ) { foreach ( $this->backends as $backend ) { $realPaths = is_array( $paths ) ? $this->substPaths( $paths, $backend ) : null; @@ -671,19 +617,17 @@ class FileBackendMultiWrite extends FileBackend { } } - /** - * @see FileBackend::getScopedLocksForOps() - */ public function getScopedLocksForOps( array $ops, Status $status ) { - $fileOps = $this->backends[$this->masterIndex]->getOperationsInternal( $ops ); + $realOps = $this->substOpBatchPaths( $ops, $this->backends[$this->masterIndex] ); + $fileOps = $this->backends[$this->masterIndex]->getOperationsInternal( $realOps ); // Get the paths to lock from the master backend $paths = $this->backends[$this->masterIndex]->getPathsToLockForOpsInternal( $fileOps ); // Get the paths under the proxy backend's name - $paths['sh'] = $this->unsubstPaths( $paths['sh'] ); - $paths['ex'] = $this->unsubstPaths( $paths['ex'] ); - return array( - $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ), - $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ) + $pbPaths = array( + LockManager::LOCK_UW => $this->unsubstPaths( $paths[LockManager::LOCK_UW] ), + LockManager::LOCK_EX => $this->unsubstPaths( $paths[LockManager::LOCK_EX] ) ); + // Actually acquire the locks + return array( $this->getScopedFileLocks( $pbPaths, 'mixed', $status ) ); } } diff --git a/includes/filebackend/FileBackendStore.php b/includes/filebackend/FileBackendStore.php index 083dfea9..0921e99f 100644 --- a/includes/filebackend/FileBackendStore.php +++ b/includes/filebackend/FileBackendStore.php @@ -38,26 +38,43 @@ abstract class FileBackendStore extends FileBackend { /** @var BagOStuff */ protected $memCache; - /** @var ProcessCacheLRU */ - protected $cheapCache; // Map of paths to small (RAM/disk) cache items - /** @var ProcessCacheLRU */ - protected $expensiveCache; // Map of paths to large (RAM/disk) cache items + /** @var ProcessCacheLRU Map of paths to small (RAM/disk) cache items */ + protected $cheapCache; + /** @var ProcessCacheLRU Map of paths to large (RAM/disk) cache items */ + protected $expensiveCache; - /** @var Array Map of container names to sharding settings */ - protected $shardViaHashLevels = array(); // (container name => config array) + /** @var Array Map of container names to sharding config */ + protected $shardViaHashLevels = array(); + + /** @var callback Method to get the MIME type of files */ + protected $mimeCallback; protected $maxFileSize = 4294967296; // integer bytes (4GiB) + const CACHE_TTL = 10; // integer; TTL in seconds for process cache entries + const CACHE_CHEAP_SIZE = 300; // integer; max entries in "cheap cache" + const CACHE_EXPENSIVE_SIZE = 5; // integer; max entries in "expensive cache" + /** * @see FileBackend::__construct() + * Additional $config params include: + * - mimeCallback : Callback that takes (storage path, content, file system path) and + * returns the MIME type of the file or 'unknown/unknown'. The file + * system path parameter should be used if the content one is null. * - * @param $config Array + * @param array $config */ public function __construct( array $config ) { parent::__construct( $config ); - $this->memCache = new EmptyBagOStuff(); // disabled by default - $this->cheapCache = new ProcessCacheLRU( 300 ); - $this->expensiveCache = new ProcessCacheLRU( 5 ); + $this->mimeCallback = isset( $config['mimeCallback'] ) + ? $config['mimeCallback'] + : function( $storagePath, $content, $fsPath ) { + // @TODO: handle the case of extension-less files using the contents + return StreamFile::contentTypeFromPath( $storagePath ) ?: 'unknown/unknown'; + }; + $this->memCache = new EmptyBagOStuff(); // disabled by default + $this->cheapCache = new ProcessCacheLRU( self::CACHE_CHEAP_SIZE ); + $this->expensiveCache = new ProcessCacheLRU( self::CACHE_EXPENSIVE_SIZE ); } /** @@ -72,124 +89,126 @@ abstract class FileBackendStore extends FileBackend { } /** - * Check if a file can be created at a given storage path. - * FS backends should check if the parent directory exists and the file is writable. + * Check if a file can be created or changed at a given storage path. + * FS backends should check if the parent directory exists, files can be + * written under it, and that any file already there is writable. * Backends using key/value stores should check if the container exists. * - * @param $storagePath string + * @param string $storagePath * @return bool */ abstract public function isPathUsableInternal( $storagePath ); /** * Create a file in the backend with the given contents. + * This will overwrite any file that exists at the destination. * Do not call this function from places outside FileBackend and FileOp. * * $params include: - * - content : the raw file contents - * - dst : destination storage path - * - overwrite : overwrite any file that exists at the destination - * - disposition : Content-Disposition header value for the destination - * - async : Status will be returned immediately if supported. - * If the status is OK, then its value field will be - * set to a FileBackendStoreOpHandle object. + * - content : the raw file contents + * - dst : destination storage path + * - headers : HTTP header name/value map + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * - dstExists : Whether a file exists at the destination (optimization). + * Callers can use "false" if no existing file is being changed. * - * @param $params Array + * @param array $params * @return Status */ final public function createInternal( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); if ( strlen( $params['content'] ) > $this->maxFileSizeInternal() ) { $status = Status::newFatal( 'backend-fail-maxsize', $params['dst'], $this->maxFileSizeInternal() ); } else { $status = $this->doCreateInternal( $params ); $this->clearCache( array( $params['dst'] ) ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + if ( !isset( $params['dstExists'] ) || $params['dstExists'] ) { $this->deleteFileCache( $params['dst'] ); // persistent cache } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } /** * @see FileBackendStore::createInternal() + * @return Status */ abstract protected function doCreateInternal( array $params ); /** * Store a file into the backend from a file on disk. + * This will overwrite any file that exists at the destination. * Do not call this function from places outside FileBackend and FileOp. * * $params include: - * - src : source path on disk - * - dst : destination storage path - * - overwrite : overwrite any file that exists at the destination - * - disposition : Content-Disposition header value for the destination - * - async : Status will be returned immediately if supported. - * If the status is OK, then its value field will be - * set to a FileBackendStoreOpHandle object. + * - src : source path on disk + * - dst : destination storage path + * - headers : HTTP header name/value map + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * - dstExists : Whether a file exists at the destination (optimization). + * Callers can use "false" if no existing file is being changed. * - * @param $params Array + * @param array $params * @return Status */ final public function storeInternal( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); if ( filesize( $params['src'] ) > $this->maxFileSizeInternal() ) { $status = Status::newFatal( 'backend-fail-maxsize', $params['dst'], $this->maxFileSizeInternal() ); } else { $status = $this->doStoreInternal( $params ); $this->clearCache( array( $params['dst'] ) ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + if ( !isset( $params['dstExists'] ) || $params['dstExists'] ) { $this->deleteFileCache( $params['dst'] ); // persistent cache } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } /** * @see FileBackendStore::storeInternal() + * @return Status */ abstract protected function doStoreInternal( array $params ); /** * Copy a file from one storage path to another in the backend. + * This will overwrite any file that exists at the destination. * Do not call this function from places outside FileBackend and FileOp. * * $params include: - * - src : source storage path - * - dst : destination storage path - * - overwrite : overwrite any file that exists at the destination - * - disposition : Content-Disposition header value for the destination - * - async : Status will be returned immediately if supported. - * If the status is OK, then its value field will be - * set to a FileBackendStoreOpHandle object. + * - src : source storage path + * - dst : destination storage path + * - ignoreMissingSource : do nothing if the source file does not exist + * - headers : HTTP header name/value map + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * - dstExists : Whether a file exists at the destination (optimization). + * Callers can use "false" if no existing file is being changed. * - * @param $params Array + * @param array $params * @return Status */ final public function copyInternal( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = $this->doCopyInternal( $params ); $this->clearCache( array( $params['dst'] ) ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + if ( !isset( $params['dstExists'] ) || $params['dstExists'] ) { $this->deleteFileCache( $params['dst'] ); // persistent cache } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } /** * @see FileBackendStore::copyInternal() + * @return Status */ abstract protected function doCopyInternal( array $params ); @@ -204,52 +223,50 @@ abstract class FileBackendStore extends FileBackend { * If the status is OK, then its value field will be * set to a FileBackendStoreOpHandle object. * - * @param $params Array + * @param array $params * @return Status */ final public function deleteInternal( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = $this->doDeleteInternal( $params ); $this->clearCache( array( $params['src'] ) ); $this->deleteFileCache( $params['src'] ); // persistent cache - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } /** * @see FileBackendStore::deleteInternal() + * @return Status */ abstract protected function doDeleteInternal( array $params ); /** * Move a file from one storage path to another in the backend. + * This will overwrite any file that exists at the destination. * Do not call this function from places outside FileBackend and FileOp. * * $params include: - * - src : source storage path - * - dst : destination storage path - * - overwrite : overwrite any file that exists at the destination - * - disposition : Content-Disposition header value for the destination - * - async : Status will be returned immediately if supported. - * If the status is OK, then its value field will be - * set to a FileBackendStoreOpHandle object. + * - src : source storage path + * - dst : destination storage path + * - ignoreMissingSource : do nothing if the source file does not exist + * - headers : HTTP header name/value map + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * - dstExists : Whether a file exists at the destination (optimization). + * Callers can use "false" if no existing file is being changed. * - * @param $params Array + * @param array $params * @return Status */ final public function moveInternal( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = $this->doMoveInternal( $params ); $this->clearCache( array( $params['src'], $params['dst'] ) ); $this->deleteFileCache( $params['src'] ); // persistent cache - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated + if ( !isset( $params['dstExists'] ) || $params['dstExists'] ) { $this->deleteFileCache( $params['dst'] ); // persistent cache } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -259,10 +276,12 @@ abstract class FileBackendStore extends FileBackend { */ protected function doMoveInternal( array $params ) { unset( $params['async'] ); // two steps, won't work here :) + $nsrc = FileBackend::normalizeStoragePath( $params['src'] ); + $ndst = FileBackend::normalizeStoragePath( $params['dst'] ); // Copy source to dest $status = $this->copyInternal( $params ); - if ( $status->isOK() ) { - // Delete source (only fails due to races or medium going down) + if ( $nsrc !== $ndst && $status->isOK() ) { + // Delete source (only fails due to races or network problems) $status->merge( $this->deleteInternal( array( 'src' => $params['src'] ) ) ); $status->setResult( true, $status->value ); // ignore delete() errors } @@ -270,23 +289,52 @@ abstract class FileBackendStore extends FileBackend { } /** - * No-op file operation that does nothing. + * Alter metadata for a file at the storage path. * Do not call this function from places outside FileBackend and FileOp. * - * @param $params Array + * $params include: + * - src : source storage path + * - headers : HTTP header name/value map + * - async : Status will be returned immediately if supported. + * If the status is OK, then its value field will be + * set to a FileBackendStoreOpHandle object. + * + * @param array $params * @return Status */ - final public function nullInternal( array $params ) { + final public function describeInternal( array $params ) { + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); + if ( count( $params['headers'] ) ) { + $status = $this->doDescribeInternal( $params ); + $this->clearCache( array( $params['src'] ) ); + $this->deleteFileCache( $params['src'] ); // persistent cache + } else { + $status = Status::newGood(); // nothing to do + } + return $status; + } + + /** + * @see FileBackendStore::describeInternal() + * @return Status + */ + protected function doDescribeInternal( array $params ) { return Status::newGood(); } /** - * @see FileBackend::concatenate() + * No-op file operation that does nothing. + * Do not call this function from places outside FileBackend and FileOp. + * + * @param array $params * @return Status */ + final public function nullInternal( array $params ) { + return Status::newGood(); + } + final public function concatenate( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); // Try to lock the source files for the scope of this function @@ -302,8 +350,6 @@ abstract class FileBackendStore extends FileBackend { } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -314,31 +360,41 @@ abstract class FileBackendStore extends FileBackend { protected function doConcatenate( array $params ) { $status = Status::newGood(); $tmpPath = $params['dst']; // convenience + unset( $params['latest'] ); // sanity // Check that the specified temp file is valid... wfSuppressWarnings(); - $ok = ( is_file( $tmpPath ) && !filesize( $tmpPath ) ); + $ok = ( is_file( $tmpPath ) && filesize( $tmpPath ) == 0 ); wfRestoreWarnings(); if ( !$ok ) { // not present or not empty $status->fatal( 'backend-fail-opentemp', $tmpPath ); return $status; } - // Build up the temp file using the source chunks (in order)... + // Get local FS versions of the chunks needed for the concatenation... + $fsFiles = $this->getLocalReferenceMulti( $params ); + foreach ( $fsFiles as $path => &$fsFile ) { + if ( !$fsFile ) { // chunk failed to download? + $fsFile = $this->getLocalReference( array( 'src' => $path ) ); + if ( !$fsFile ) { // retry failed? + $status->fatal( 'backend-fail-read', $path ); + return $status; + } + } + } + unset( $fsFile ); // unset reference so we can reuse $fsFile + + // Get a handle for the destination temp file $tmpHandle = fopen( $tmpPath, 'ab' ); if ( $tmpHandle === false ) { $status->fatal( 'backend-fail-opentemp', $tmpPath ); return $status; } - foreach ( $params['srcs'] as $virtualSource ) { - // Get a local FS version of the chunk - $tmpFile = $this->getLocalReference( array( 'src' => $virtualSource ) ); - if ( !$tmpFile ) { - $status->fatal( 'backend-fail-read', $virtualSource ); - return $status; - } + + // Build up the temp file using the source chunks (in order)... + foreach ( $fsFiles as $virtualSource => $fsFile ) { // Get a handle to the local FS version - $sourceHandle = fopen( $tmpFile->getPath(), 'r' ); + $sourceHandle = fopen( $fsFile->getPath(), 'rb' ); if ( $sourceHandle === false ) { fclose( $tmpHandle ); $status->fatal( 'backend-fail-read', $virtualSource ); @@ -363,20 +419,13 @@ abstract class FileBackendStore extends FileBackend { return $status; } - /** - * @see FileBackend::doPrepare() - * @return Status - */ final protected function doPrepare( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); - + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); if ( $dir === null ) { $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; // invalid storage path } @@ -384,14 +433,12 @@ abstract class FileBackendStore extends FileBackend { $status->merge( $this->doPrepareInternal( $fullCont, $dir, $params ) ); } else { // directory is on several shards wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); - list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = self::splitStoragePath( $params['dir'] ); foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { $status->merge( $this->doPrepareInternal( "{$fullCont}{$suffix}", $dir, $params ) ); } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -403,20 +450,13 @@ abstract class FileBackendStore extends FileBackend { return Status::newGood(); } - /** - * @see FileBackend::doSecure() - * @return Status - */ final protected function doSecure( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); if ( $dir === null ) { $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; // invalid storage path } @@ -424,14 +464,12 @@ abstract class FileBackendStore extends FileBackend { $status->merge( $this->doSecureInternal( $fullCont, $dir, $params ) ); } else { // directory is on several shards wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); - list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = self::splitStoragePath( $params['dir'] ); foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { $status->merge( $this->doSecureInternal( "{$fullCont}{$suffix}", $dir, $params ) ); } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -443,20 +481,13 @@ abstract class FileBackendStore extends FileBackend { return Status::newGood(); } - /** - * @see FileBackend::doPublish() - * @return Status - */ final protected function doPublish( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); if ( $dir === null ) { $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; // invalid storage path } @@ -464,14 +495,12 @@ abstract class FileBackendStore extends FileBackend { $status->merge( $this->doPublishInternal( $fullCont, $dir, $params ) ); } else { // directory is on several shards wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); - list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = self::splitStoragePath( $params['dir'] ); foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { $status->merge( $this->doPublishInternal( "{$fullCont}{$suffix}", $dir, $params ) ); } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -483,13 +512,8 @@ abstract class FileBackendStore extends FileBackend { return Status::newGood(); } - /** - * @see FileBackend::doClean() - * @return Status - */ final protected function doClean( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); // Recursive: first delete all empty subdirs recursively @@ -500,14 +524,13 @@ abstract class FileBackendStore extends FileBackend { $subDir = $params['dir'] . "/{$subDirRel}"; // full path $status->merge( $this->doClean( array( 'dir' => $subDir ) + $params ) ); } + unset( $subDirsRel ); // free directory for rmdir() on Windows (for FS backends) } } list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); if ( $dir === null ) { $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; // invalid storage path } @@ -515,8 +538,6 @@ abstract class FileBackendStore extends FileBackend { $filesLockEx = array( $params['dir'] ); $scopedLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); if ( !$status->isOK() ) { - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; // abort } @@ -525,15 +546,13 @@ abstract class FileBackendStore extends FileBackend { $this->deleteContainerCache( $fullCont ); // purge cache } else { // directory is on several shards wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); - list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = self::splitStoragePath( $params['dir'] ); foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { $status->merge( $this->doCleanInternal( "{$fullCont}{$suffix}", $dir, $params ) ); $this->deleteContainerCache( "{$fullCont}{$suffix}" ); // purge cache } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -545,68 +564,46 @@ abstract class FileBackendStore extends FileBackend { return Status::newGood(); } - /** - * @see FileBackend::fileExists() - * @return bool|null - */ final public function fileExists( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $stat = $this->getFileStat( $params ); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return ( $stat === null ) ? null : (bool)$stat; // null => failure } - /** - * @see FileBackend::getFileTimestamp() - * @return bool - */ final public function getFileTimestamp( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $stat = $this->getFileStat( $params ); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $stat ? $stat['mtime'] : false; } - /** - * @see FileBackend::getFileSize() - * @return bool - */ final public function getFileSize( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $stat = $this->getFileStat( $params ); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $stat ? $stat['size'] : false; } - /** - * @see FileBackend::getFileStat() - * @return bool - */ final public function getFileStat( array $params ) { $path = self::normalizeStoragePath( $params['src'] ); if ( $path === null ) { return false; // invalid storage path } - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $latest = !empty( $params['latest'] ); // use latest data? - if ( !$this->cheapCache->has( $path, 'stat' ) ) { + if ( !$this->cheapCache->has( $path, 'stat', self::CACHE_TTL ) ) { $this->primeFileCache( array( $path ) ); // check persistent cache } - if ( $this->cheapCache->has( $path, 'stat' ) ) { + if ( $this->cheapCache->has( $path, 'stat', self::CACHE_TTL ) ) { $stat = $this->cheapCache->get( $path, 'stat' ); // If we want the latest data, check that this cached // value was in fact fetched with the latest available data. - if ( !$latest || $stat['latest'] ) { - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); - return $stat; + if ( is_array( $stat ) ) { + if ( !$latest || $stat['latest'] ) { + return $stat; + } + } elseif ( in_array( $stat, array( 'NOT_EXIST', 'NOT_EXIST_LATEST' ) ) ) { + if ( !$latest || $stat === 'NOT_EXIST_LATEST' ) { + return false; + } } } wfProfileIn( __METHOD__ . '-miss' ); @@ -614,7 +611,7 @@ abstract class FileBackendStore extends FileBackend { $stat = $this->doGetFileStat( $params ); wfProfileOut( __METHOD__ . '-miss-' . $this->name ); wfProfileOut( __METHOD__ . '-miss' ); - if ( is_array( $stat ) ) { // don't cache negatives + if ( is_array( $stat ) ) { // file exists $stat['latest'] = $latest; $this->cheapCache->set( $path, 'stat', $stat ); $this->setFileCache( $path, $stat ); // update persistent cache @@ -622,11 +619,14 @@ abstract class FileBackendStore extends FileBackend { $this->cheapCache->set( $path, 'sha1', array( 'hash' => $stat['sha1'], 'latest' => $latest ) ); } - } else { + } elseif ( $stat === false ) { // file does not exist + $this->cheapCache->set( $path, 'stat', $latest ? 'NOT_EXIST_LATEST' : 'NOT_EXIST' ); + $this->cheapCache->set( $path, 'sha1', // the SHA-1 must be false too + array( 'hash' => false, 'latest' => $latest ) ); wfDebug( __METHOD__ . ": File $path does not exist.\n" ); + } else { // an error occurred + wfDebug( __METHOD__ . ": Could not stat file $path.\n" ); } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $stat; } @@ -635,46 +635,41 @@ abstract class FileBackendStore extends FileBackend { */ abstract protected function doGetFileStat( array $params ); + public function getFileContentsMulti( array $params ) { + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); + + $params = $this->setConcurrencyFlags( $params ); + $contents = $this->doGetFileContentsMulti( $params ); + + return $contents; + } + /** - * @see FileBackend::getFileContents() - * @return bool|string + * @see FileBackendStore::getFileContentsMulti() + * @return Array */ - public function getFileContents( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); - $tmpFile = $this->getLocalReference( $params ); - if ( !$tmpFile ) { - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); - return false; + protected function doGetFileContentsMulti( array $params ) { + $contents = array(); + foreach ( $this->doGetLocalReferenceMulti( $params ) as $path => $fsFile ) { + wfSuppressWarnings(); + $contents[$path] = $fsFile ? file_get_contents( $fsFile->getPath() ) : false; + wfRestoreWarnings(); } - wfSuppressWarnings(); - $data = file_get_contents( $tmpFile->getPath() ); - wfRestoreWarnings(); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); - return $data; + return $contents; } - /** - * @see FileBackend::getFileSha1Base36() - * @return bool|string - */ final public function getFileSha1Base36( array $params ) { $path = self::normalizeStoragePath( $params['src'] ); if ( $path === null ) { return false; // invalid storage path } - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $latest = !empty( $params['latest'] ); // use latest data? - if ( $this->cheapCache->has( $path, 'sha1' ) ) { + if ( $this->cheapCache->has( $path, 'sha1', self::CACHE_TTL ) ) { $stat = $this->cheapCache->get( $path, 'sha1' ); // If we want the latest data, check that this cached // value was in fact fetched with the latest available data. if ( !$latest || $stat['latest'] ) { - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $stat['hash']; } } @@ -683,12 +678,7 @@ abstract class FileBackendStore extends FileBackend { $hash = $this->doGetFileSha1Base36( $params ); wfProfileOut( __METHOD__ . '-miss-' . $this->name ); wfProfileOut( __METHOD__ . '-miss' ); - if ( $hash ) { // don't cache negatives - $this->cheapCache->set( $path, 'sha1', - array( 'hash' => $hash, 'latest' => $latest ) ); - } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); + $this->cheapCache->set( $path, 'sha1', array( 'hash' => $hash, 'latest' => $latest ) ); return $hash; } @@ -705,59 +695,80 @@ abstract class FileBackendStore extends FileBackend { } } - /** - * @see FileBackend::getFileProps() - * @return Array - */ final public function getFileProps( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $fsFile = $this->getLocalReference( $params ); $props = $fsFile ? $fsFile->getProps() : FSFile::placeholderProps(); - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $props; } - /** - * @see FileBackend::getLocalReference() - * @return TempFSFile|null - */ - public function getLocalReference( array $params ) { - $path = self::normalizeStoragePath( $params['src'] ); - if ( $path === null ) { - return null; // invalid storage path - } - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + final public function getLocalReferenceMulti( array $params ) { + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); + + $params = $this->setConcurrencyFlags( $params ); + + $fsFiles = array(); // (path => FSFile) $latest = !empty( $params['latest'] ); // use latest data? - if ( $this->expensiveCache->has( $path, 'localRef' ) ) { - $val = $this->expensiveCache->get( $path, 'localRef' ); - // If we want the latest data, check that this cached - // value was in fact fetched with the latest available data. - if ( !$latest || $val['latest'] ) { - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); - return $val['object']; + // Reuse any files already in process cache... + foreach ( $params['srcs'] as $src ) { + $path = self::normalizeStoragePath( $src ); + if ( $path === null ) { + $fsFiles[$src] = null; // invalid storage path + } elseif ( $this->expensiveCache->has( $path, 'localRef' ) ) { + $val = $this->expensiveCache->get( $path, 'localRef' ); + // If we want the latest data, check that this cached + // value was in fact fetched with the latest available data. + if ( !$latest || $val['latest'] ) { + $fsFiles[$src] = $val['object']; + } } } - $tmpFile = $this->getLocalCopy( $params ); - if ( $tmpFile ) { // don't cache negatives - $this->expensiveCache->set( $path, 'localRef', - array( 'object' => $tmpFile, 'latest' => $latest ) ); + // Fetch local references of any remaning files... + $params['srcs'] = array_diff( $params['srcs'], array_keys( $fsFiles ) ); + foreach ( $this->doGetLocalReferenceMulti( $params ) as $path => $fsFile ) { + $fsFiles[$path] = $fsFile; + if ( $fsFile ) { // update the process cache... + $this->expensiveCache->set( $path, 'localRef', + array( 'object' => $fsFile, 'latest' => $latest ) ); + } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); - return $tmpFile; + + return $fsFiles; } /** - * @see FileBackend::streamFile() - * @return Status + * @see FileBackendStore::getLocalReferenceMulti() + * @return Array + */ + protected function doGetLocalReferenceMulti( array $params ) { + return $this->doGetLocalCopyMulti( $params ); + } + + final public function getLocalCopyMulti( array $params ) { + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); + + $params = $this->setConcurrencyFlags( $params ); + $tmpFiles = $this->doGetLocalCopyMulti( $params ); + + return $tmpFiles; + } + + /** + * @see FileBackendStore::getLocalCopyMulti() + * @return Array + */ + abstract protected function doGetLocalCopyMulti( array $params ); + + /** + * @see FileBackend::getFileHttpUrl() + * @return string|null */ + public function getFileHttpUrl( array $params ) { + return null; // not supported + } + final public function streamFile( array $params ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); $info = $this->getFileStat( $params ); @@ -776,12 +787,18 @@ abstract class FileBackendStore extends FileBackend { $status = $this->doStreamFile( $params ); wfProfileOut( __METHOD__ . '-send-' . $this->name ); wfProfileOut( __METHOD__ . '-send' ); + if ( !$status->isOK() ) { + // Per bug 41113, nasty things can happen if bad cache entries get + // stuck in cache. It's also possible that this error can come up + // with simple race conditions. Clear out the stat cache to be safe. + $this->clearCache( array( $params['src'] ) ); + $this->deleteFileCache( $params['src'] ); + trigger_error( "Bad stat cache or race condition for file {$params['src']}." ); + } } else { $status->fatal( 'backend-fail-stream', $params['src'] ); } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -802,10 +819,6 @@ abstract class FileBackendStore extends FileBackend { return $status; } - /** - * @see FileBackend::directoryExists() - * @return bool|null - */ final public function directoryExists( array $params ) { list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); if ( $dir === null ) { @@ -815,7 +828,7 @@ abstract class FileBackendStore extends FileBackend { return $this->doDirectoryExists( $fullCont, $dir, $params ); } else { // directory is on several shards wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); - list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = self::splitStoragePath( $params['dir'] ); $res = false; // response foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { $exists = $this->doDirectoryExists( "{$fullCont}{$suffix}", $dir, $params ); @@ -833,17 +846,13 @@ abstract class FileBackendStore extends FileBackend { /** * @see FileBackendStore::directoryExists() * - * @param $container string Resolved container name - * @param $dir string Resolved path relative to container - * @param $params Array + * @param string $container Resolved container name + * @param string $dir Resolved path relative to container + * @param array $params * @return bool|null */ abstract protected function doDirectoryExists( $container, $dir, array $params ); - /** - * @see FileBackend::getDirectoryList() - * @return Traversable|Array|null Returns null on failure - */ final public function getDirectoryList( array $params ) { list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); if ( $dir === null ) { // invalid storage path @@ -855,7 +864,7 @@ abstract class FileBackendStore extends FileBackend { } else { wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); // File listing spans multiple containers/shards - list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = self::splitStoragePath( $params['dir'] ); return new FileBackendStoreShardDirIterator( $this, $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); } @@ -866,17 +875,13 @@ abstract class FileBackendStore extends FileBackend { * * @see FileBackendStore::getDirectoryList() * - * @param $container string Resolved container name - * @param $dir string Resolved path relative to container - * @param $params Array + * @param string $container Resolved container name + * @param string $dir Resolved path relative to container + * @param array $params * @return Traversable|Array|null Returns null on failure */ abstract public function getDirectoryListInternal( $container, $dir, array $params ); - /** - * @see FileBackend::getFileList() - * @return Traversable|Array|null Returns null on failure - */ final public function getFileList( array $params ) { list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); if ( $dir === null ) { // invalid storage path @@ -888,7 +893,7 @@ abstract class FileBackendStore extends FileBackend { } else { wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); // File listing spans multiple containers/shards - list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + list( , $shortCont, ) = self::splitStoragePath( $params['dir'] ); return new FileBackendStoreShardFileIterator( $this, $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); } @@ -899,9 +904,9 @@ abstract class FileBackendStore extends FileBackend { * * @see FileBackendStore::getFileList() * - * @param $container string Resolved container name - * @param $dir string Resolved path relative to container - * @param $params Array + * @param string $container Resolved container name + * @param string $dir Resolved path relative to container + * @param array $params * @return Traversable|Array|null Returns null on failure */ abstract public function getFileListInternal( $container, $dir, array $params ); @@ -913,18 +918,19 @@ abstract class FileBackendStore extends FileBackend { * The result must have the same number of items as the input. * An exception is thrown if an unsupported operation is requested. * - * @param $ops Array Same format as doOperations() + * @param array $ops Same format as doOperations() * @return Array List of FileOp objects * @throws MWException */ final public function getOperationsInternal( array $ops ) { $supportedOps = array( - 'store' => 'StoreFileOp', - 'copy' => 'CopyFileOp', - 'move' => 'MoveFileOp', - 'delete' => 'DeleteFileOp', - 'create' => 'CreateFileOp', - 'null' => 'NullFileOp' + 'store' => 'StoreFileOp', + 'copy' => 'CopyFileOp', + 'move' => 'MoveFileOp', + 'delete' => 'DeleteFileOp', + 'create' => 'CreateFileOp', + 'describe' => 'DescribeFileOp', + 'null' => 'NullFileOp' ); $performOps = array(); // array of FileOp objects @@ -947,11 +953,13 @@ abstract class FileBackendStore extends FileBackend { /** * Get a list of storage paths to lock for a list of operations - * Returns an array with 'sh' (shared) and 'ex' (exclusive) keys, - * each corresponding to a list of storage paths to be locked. + * Returns an array with LockManager::LOCK_UW (shared locks) and + * LockManager::LOCK_EX (exclusive locks) keys, each corresponding + * to a list of storage paths to be locked. All returned paths are + * normalized. * - * @param $performOps Array List of FileOp objects - * @return Array ('sh' => list of paths, 'ex' => list of paths) + * @param array $performOps List of FileOp objects + * @return Array (LockManager::LOCK_UW => path list, LockManager::LOCK_EX => path list) */ final public function getPathsToLockForOpsInternal( array $performOps ) { // Build up a list of files to lock... @@ -965,30 +973,24 @@ abstract class FileBackendStore extends FileBackend { // Get a shared lock on the parent directory of each path changed $paths['sh'] = array_merge( $paths['sh'], array_map( 'dirname', $paths['ex'] ) ); - return $paths; + return array( + LockManager::LOCK_UW => $paths['sh'], + LockManager::LOCK_EX => $paths['ex'] + ); } - /** - * @see FileBackend::getScopedLocksForOps() - * @return Array - */ public function getScopedLocksForOps( array $ops, Status $status ) { $paths = $this->getPathsToLockForOpsInternal( $this->getOperationsInternal( $ops ) ); - return array( - $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ), - $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ) - ); + return array( $this->getScopedFileLocks( $paths, 'mixed', $status ) ); } - /** - * @see FileBackend::doOperationsInternal() - * @return Status - */ final protected function doOperationsInternal( array $ops, array $opts ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); + // Fix up custom header name/value pairs... + $ops = array_map( array( $this, 'stripInvalidHeadersFromOp' ), $ops ); + // Build up a list of FileOps... $performOps = $this->getOperationsInternal( $ops ); @@ -997,11 +999,8 @@ abstract class FileBackendStore extends FileBackend { // Build up a list of files to lock... $paths = $this->getPathsToLockForOpsInternal( $performOps ); // Try to lock those files for the scope of this function... - $scopeLockS = $this->getScopedFileLocks( $paths['sh'], LockManager::LOCK_UW, $status ); - $scopeLockE = $this->getScopedFileLocks( $paths['ex'], LockManager::LOCK_EX, $status ); + $scopeLock = $this->getScopedFileLocks( $paths, 'mixed', $status ); if ( !$status->isOK() ) { - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; // abort } } @@ -1016,29 +1015,28 @@ abstract class FileBackendStore extends FileBackend { $this->primeContainerCache( $performOps ); // Actually attempt the operation batch... + $opts = $this->setConcurrencyFlags( $opts ); $subStatus = FileOpBatch::attempt( $performOps, $opts, $this->fileJournal ); // Merge errors into status fields $status->merge( $subStatus ); $status->success = $subStatus->success; // not done in merge() - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } - /** - * @see FileBackend::doQuickOperationsInternal() - * @return Status - * @throws MWException - */ final protected function doQuickOperationsInternal( array $ops ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $status = Status::newGood(); + // Fix up custom header name/value pairs... + $ops = array_map( array( $this, 'stripInvalidHeadersFromOp' ), $ops ); + + // Clear any file cache entries + $this->clearCache(); + $supportedOps = array( 'create', 'store', 'copy', 'move', 'delete', 'null' ); - $async = ( $this->parallelize === 'implicit' ); + $async = ( $this->parallelize === 'implicit' && count( $ops ) > 1 ); $maxConcurrency = $this->concurrency; // throttle $statuses = array(); // array of (index => Status) @@ -1047,8 +1045,6 @@ abstract class FileBackendStore extends FileBackend { // Perform the sync-only ops and build up op handles for the async ops... foreach ( $ops as $index => $params ) { if ( !in_array( $params['op'], $supportedOps ) ) { - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); throw new MWException( "Operation '{$params['op']}' is not supported." ); } $method = $params['op'] . 'Internal'; // e.g. "storeInternal" @@ -1082,8 +1078,6 @@ abstract class FileBackendStore extends FileBackend { } } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $status; } @@ -1092,13 +1086,12 @@ abstract class FileBackendStore extends FileBackend { * The resulting Status object fields will correspond * to the order in which the handles where given. * - * @param $handles Array List of FileBackendStoreOpHandle objects + * @param array $handles List of FileBackendStoreOpHandle objects * @return Array Map of Status objects * @throws MWException */ final public function executeOpHandlesInternal( array $fileOpHandles ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); foreach ( $fileOpHandles as $fileOpHandle ) { if ( !( $fileOpHandle instanceof FileBackendStoreOpHandle ) ) { throw new MWException( "Given a non-FileBackendStoreOpHandle object." ); @@ -1110,13 +1103,13 @@ abstract class FileBackendStore extends FileBackend { foreach ( $fileOpHandles as $fileOpHandle ) { $fileOpHandle->closeResources(); } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); return $res; } /** * @see FileBackendStore::executeOpHandlesInternal() + * @param array $fileOpHandles + * @throws MWException * @return Array List of corresponding Status objects */ protected function doExecuteOpHandlesInternal( array $fileOpHandles ) { @@ -1127,12 +1120,34 @@ abstract class FileBackendStore extends FileBackend { } /** - * @see FileBackend::preloadCache() + * Strip long HTTP headers from a file operation. + * Most headers are just numbers, but some are allowed to be long. + * This function is useful for cleaning up headers and avoiding backend + * specific errors, especially in the middle of batch file operations. + * + * @param array $op Same format as doOperation() + * @return Array */ + protected function stripInvalidHeadersFromOp( array $op ) { + static $longs = array( 'Content-Disposition' ); + if ( isset( $op['headers'] ) ) { // op sets HTTP headers + foreach ( $op['headers'] as $name => $value ) { + $maxHVLen = in_array( $name, $longs ) ? INF : 255; + if ( strlen( $name ) > 255 || strlen( $value ) > $maxHVLen ) { + trigger_error( "Header '$name: $value' is too long." ); + unset( $op['headers'][$name] ); + } elseif ( !strlen( $value ) ) { + $op['headers'][$name] = ''; // null/false => "" + } + } + } + return $op; + } + final public function preloadCache( array $paths ) { $fullConts = array(); // full container names foreach ( $paths as $path ) { - list( $fullCont, $r, $s ) = $this->resolveStoragePath( $path ); + list( $fullCont, , ) = $this->resolveStoragePath( $path ); $fullConts[] = $fullCont; } // Load from the persistent file and container caches @@ -1140,9 +1155,6 @@ abstract class FileBackendStore extends FileBackend { $this->primeFileCache( $paths ); } - /** - * @see FileBackend::clearCache() - */ final public function clearCache( array $paths = null ) { if ( is_array( $paths ) ) { $paths = array_map( 'FileBackend::normalizeStoragePath', $paths ); @@ -1165,7 +1177,7 @@ abstract class FileBackendStore extends FileBackend { * * @see FileBackend::clearCache() * - * @param $paths Array Storage paths (optional) + * @param array $paths Storage paths (optional) * @return void */ protected function doClearCache( array $paths = null ) {} @@ -1183,7 +1195,7 @@ abstract class FileBackendStore extends FileBackend { * Check if a container name is valid. * This checks for for length and illegal characters. * - * @param $container string + * @param string $container * @return bool */ final protected static function isValidContainerName( $container ) { @@ -1205,7 +1217,7 @@ abstract class FileBackendStore extends FileBackend { * this means that the path can only refer to a directory and can only * be scanned by looking in all the container shards. * - * @param $storagePath string + * @param string $storagePath * @return Array (container, path, container suffix) or (null, null, null) if invalid */ final protected function resolveStoragePath( $storagePath ) { @@ -1235,16 +1247,22 @@ abstract class FileBackendStore extends FileBackend { /** * Like resolveStoragePath() except null values are returned if - * the container is sharded and the shard could not be determined. + * the container is sharded and the shard could not be determined + * or if the path ends with '/'. The later case is illegal for FS + * backends and can confuse listings for object store backends. + * + * This function is used when resolving paths that must be valid + * locations for files. Directory and listing functions should + * generally just use resolveStoragePath() instead. * * @see FileBackendStore::resolveStoragePath() * - * @param $storagePath string + * @param string $storagePath * @return Array (container, path) or (null, null) if invalid */ final protected function resolveStoragePathReal( $storagePath ) { list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath ); - if ( $cShard !== null ) { + if ( $cShard !== null && substr( $relPath, -1 ) !== '/' ) { return array( $container, $relPath ); } return array( null, null ); @@ -1254,8 +1272,8 @@ abstract class FileBackendStore extends FileBackend { * Get the container name shard suffix for a given path. * Any empty suffix means the container is not sharded. * - * @param $container string Container name - * @param $relPath string Storage path relative to the container + * @param string $container Container name + * @param string $relPath Storage path relative to the container * @return string|null Returns null if shard could not be determined */ final protected function getContainerShard( $container, $relPath ) { @@ -1291,11 +1309,11 @@ abstract class FileBackendStore extends FileBackend { * Container dirs like "a", where the container shards on "x/xy", * can reside on several shards. Such paths are tricky to handle. * - * @param $storagePath string Storage path + * @param string $storagePath Storage path * @return bool */ final public function isSingleShardPathInternal( $storagePath ) { - list( $c, $r, $shard ) = $this->resolveStoragePath( $storagePath ); + list( , , $shard ) = $this->resolveStoragePath( $storagePath ); return ( $shard !== null ); } @@ -1304,7 +1322,7 @@ abstract class FileBackendStore extends FileBackend { * If greater than 0, then all file storage paths within * the container are required to be hashed accordingly. * - * @param $container string + * @param string $container * @return Array (integer levels, integer base, repeat flag) or (0, 0, false) */ final protected function getContainerHashLevels( $container ) { @@ -1324,7 +1342,7 @@ abstract class FileBackendStore extends FileBackend { /** * Get a list of full container shard suffixes for a container * - * @param $container string + * @param string $container * @return Array */ final protected function getContainerSuffixes( $container ) { @@ -1342,7 +1360,7 @@ abstract class FileBackendStore extends FileBackend { /** * Get the full container name, including the wiki ID prefix * - * @param $container string + * @param string $container * @return string */ final protected function fullContainerName( $container ) { @@ -1358,7 +1376,7 @@ abstract class FileBackendStore extends FileBackend { * This is intended for internal use, such as encoding illegal chars. * Subclasses can override this to be more restrictive. * - * @param $container string + * @param string $container * @return string|null */ protected function resolveContainerName( $container ) { @@ -1371,8 +1389,8 @@ abstract class FileBackendStore extends FileBackend { * getting absolute paths (e.g. FS based backends). Note that the relative path * may be the empty string (e.g. the path is simply to the container). * - * @param $container string Container name - * @param $relStoragePath string Storage path relative to the container + * @param string $container Container name + * @param string $relStoragePath Storage path relative to the container * @return string|null Path or null if not valid */ protected function resolveContainerPath( $container, $relStoragePath ) { @@ -1382,7 +1400,7 @@ abstract class FileBackendStore extends FileBackend { /** * Get the cache key for a container * - * @param $container string Resolved container name + * @param string $container Resolved container name * @return string */ private function containerCacheKey( $container ) { @@ -1392,18 +1410,20 @@ abstract class FileBackendStore extends FileBackend { /** * Set the cached info for a container * - * @param $container string Resolved container name - * @param $val mixed Information to cache + * @param string $container Resolved container name + * @param array $val Information to cache + * @return void */ - final protected function setContainerCache( $container, $val ) { - $this->memCache->add( $this->containerCacheKey( $container ), $val, 14*86400 ); + final protected function setContainerCache( $container, array $val ) { + $this->memCache->add( $this->containerCacheKey( $container ), $val, 14 * 86400 ); } /** * Delete the cached info for a container. * The cache key is salted for a while to prevent race conditions. * - * @param $container string Resolved container name + * @param string $container Resolved container name + * @return void */ final protected function deleteContainerCache( $container ) { if ( !$this->memCache->set( $this->containerCacheKey( $container ), 'PURGED', 300 ) ) { @@ -1414,13 +1434,13 @@ abstract class FileBackendStore extends FileBackend { /** * Do a batch lookup from cache for container stats for all containers * used in a list of container names, storage paths, or FileOp objects. + * This loads the persistent cache values into the process cache. * - * @param $items Array + * @param Array $items * @return void */ final protected function primeContainerCache( array $items ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $paths = array(); // list of storage paths $contNames = array(); // (cache key => resolved container name) @@ -1437,7 +1457,7 @@ abstract class FileBackendStore extends FileBackend { } // Get all the corresponding cache keys for paths... foreach ( $paths as $path ) { - list( $fullCont, $r, $s ) = $this->resolveStoragePath( $path ); + list( $fullCont, , ) = $this->resolveStoragePath( $path ); if ( $fullCont !== null ) { // valid path for this backend $contNames[$this->containerCacheKey( $fullCont )] = $fullCont; } @@ -1452,9 +1472,6 @@ abstract class FileBackendStore extends FileBackend { // Populate the container process cache for the backend... $this->doPrimeContainerCache( array_filter( $contInfo, 'is_array' ) ); - - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); } /** @@ -1462,7 +1479,7 @@ abstract class FileBackendStore extends FileBackend { * resolved container names and their corresponding cached info. * Only containers that actually exist should appear in the map. * - * @param $containerInfo Array Map of resolved container names to cached info + * @param array $containerInfo Map of resolved container names to cached info * @return void */ protected function doPrimeContainerCache( array $containerInfo ) {} @@ -1470,7 +1487,7 @@ abstract class FileBackendStore extends FileBackend { /** * Get the cache key for a file path * - * @param $path string Storage path + * @param string $path Normalized storage path * @return string */ private function fileCacheKey( $path ) { @@ -1482,20 +1499,34 @@ abstract class FileBackendStore extends FileBackend { * Negatives (404s) are not cached. By not caching negatives, we can skip cache * salting for the case when a file is created at a path were there was none before. * - * @param $path string Storage path - * @param $val mixed Information to cache + * @param string $path Storage path + * @param array $val Stat information to cache + * @return void */ - final protected function setFileCache( $path, $val ) { - $this->memCache->add( $this->fileCacheKey( $path ), $val, 7*86400 ); + final protected function setFileCache( $path, array $val ) { + $path = FileBackend::normalizeStoragePath( $path ); + if ( $path === null ) { + return; // invalid storage path + } + $age = time() - wfTimestamp( TS_UNIX, $val['mtime'] ); + $ttl = min( 7 * 86400, max( 300, floor( .1 * $age ) ) ); + $this->memCache->add( $this->fileCacheKey( $path ), $val, $ttl ); } /** * Delete the cached stat info for a file path. * The cache key is salted for a while to prevent race conditions. + * Since negatives (404s) are not cached, this does not need to be called when + * a file is created at a path were there was none before. * - * @param $path string Storage path + * @param string $path Storage path + * @return void */ final protected function deleteFileCache( $path ) { + $path = FileBackend::normalizeStoragePath( $path ); + if ( $path === null ) { + return; // invalid storage path + } if ( !$this->memCache->set( $this->fileCacheKey( $path ), 'PURGED', 300 ) ) { trigger_error( "Unable to delete stat cache for file $path." ); } @@ -1504,13 +1535,13 @@ abstract class FileBackendStore extends FileBackend { /** * Do a batch lookup from cache for file stats for all paths * used in a list of storage paths or FileOp objects. + * This loads the persistent cache values into the process cache. * - * @param $items Array List of storage paths or FileOps + * @param array $items List of storage paths or FileOps * @return void */ final protected function primeFileCache( array $items ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . "-{$this->name}" ); $paths = array(); // list of storage paths $pathNames = array(); // (cache key => storage path) @@ -1520,12 +1551,14 @@ abstract class FileBackendStore extends FileBackend { $paths = array_merge( $paths, $item->storagePathsRead() ); $paths = array_merge( $paths, $item->storagePathsChanged() ); } elseif ( self::isStoragePath( $item ) ) { - $paths[] = $item; + $paths[] = FileBackend::normalizeStoragePath( $item ); } } + // Get rid of any paths that failed normalization... + $paths = array_filter( $paths, 'strlen' ); // remove nulls // Get all the corresponding cache keys for paths... foreach ( $paths as $path ) { - list( $cont, $rel, $s ) = $this->resolveStoragePath( $path ); + list( , $rel, ) = $this->resolveStoragePath( $path ); if ( $rel !== null ) { // valid path for this backend $pathNames[$this->fileCacheKey( $path )] = $path; } @@ -1542,9 +1575,38 @@ abstract class FileBackendStore extends FileBackend { } } } + } + + /** + * Set the 'concurrency' option from a list of operation options + * + * @param array $opts Map of operation options + * @return Array + */ + final protected function setConcurrencyFlags( array $opts ) { + $opts['concurrency'] = 1; // off + if ( $this->parallelize === 'implicit' ) { + if ( !isset( $opts['parallelize'] ) || $opts['parallelize'] ) { + $opts['concurrency'] = $this->concurrency; + } + } elseif ( $this->parallelize === 'explicit' ) { + if ( !empty( $opts['parallelize'] ) ) { + $opts['concurrency'] = $this->concurrency; + } + } + return $opts; + } - wfProfileOut( __METHOD__ . '-' . $this->name ); - wfProfileOut( __METHOD__ ); + /** + * Get the content type to use in HEAD/GET requests for a file + * + * @param string $storagePath + * @param string|null $content File data + * @param string|null $fsPath File system path + * @return MIME type + */ + protected function getContentType( $storagePath, $content, $fsPath ) { + return call_user_func_array( $this->mimeCallback, func_get_args() ); } } @@ -1582,30 +1644,24 @@ abstract class FileBackendStoreOpHandle { * * @ingroup FileBackend */ -abstract class FileBackendStoreShardListIterator implements Iterator { +abstract class FileBackendStoreShardListIterator extends FilterIterator { /** @var FileBackendStore */ protected $backend; /** @var Array */ protected $params; - /** @var Array */ - protected $shardSuffixes; + protected $container; // string; full container name protected $directory; // string; resolved relative path - /** @var Traversable */ - protected $iter; - protected $curShard = 0; // integer - protected $pos = 0; // integer - /** @var Array */ protected $multiShardPaths = array(); // (rel path => 1) /** - * @param $backend FileBackendStore - * @param $container string Full storage container name - * @param $dir string Storage directory relative to container - * @param $suffixes Array List of container shard suffixes - * @param $params Array + * @param FileBackendStore $backend + * @param string $container Full storage container name + * @param string $dir Storage directory relative to container + * @param array $suffixes List of container shard suffixes + * @param array $params */ public function __construct( FileBackendStore $backend, $container, $dir, array $suffixes, array $params @@ -1613,142 +1669,56 @@ abstract class FileBackendStoreShardListIterator implements Iterator { $this->backend = $backend; $this->container = $container; $this->directory = $dir; - $this->shardSuffixes = $suffixes; $this->params = $params; - } - - /** - * @see Iterator::key() - * @return integer - */ - public function key() { - return $this->pos; - } - /** - * @see Iterator::valid() - * @return bool - */ - public function valid() { - if ( $this->iter instanceof Iterator ) { - return $this->iter->valid(); - } elseif ( is_array( $this->iter ) ) { - return ( current( $this->iter ) !== false ); // no paths can have this value + $iter = new AppendIterator(); + foreach ( $suffixes as $suffix ) { + $iter->append( $this->listFromShard( $this->container . $suffix ) ); } - return false; // some failure? - } - /** - * @see Iterator::current() - * @return string|bool String or false - */ - public function current() { - return ( $this->iter instanceof Iterator ) - ? $this->iter->current() - : current( $this->iter ); + parent::__construct( $iter ); } - /** - * @see Iterator::next() - * @return void - */ - public function next() { - ++$this->pos; - ( $this->iter instanceof Iterator ) ? $this->iter->next() : next( $this->iter ); - do { - $continue = false; // keep scanning shards? - $this->filterViaNext(); // filter out duplicates - // Find the next non-empty shard if no elements are left - if ( !$this->valid() ) { - $this->nextShardIteratorIfNotValid(); - $continue = $this->valid(); // re-filter unless we ran out of shards - } - } while ( $continue ); - } - - /** - * @see Iterator::rewind() - * @return void - */ - public function rewind() { - $this->pos = 0; - $this->curShard = 0; - $this->setIteratorFromCurrentShard(); - do { - $continue = false; // keep scanning shards? - $this->filterViaNext(); // filter out duplicates - // Find the next non-empty shard if no elements are left - if ( !$this->valid() ) { - $this->nextShardIteratorIfNotValid(); - $continue = $this->valid(); // re-filter unless we ran out of shards - } - } while ( $continue ); - } - - /** - * Filter out duplicate items by advancing to the next ones - */ - protected function filterViaNext() { - while ( $this->valid() ) { - $rel = $this->iter->current(); // path relative to given directory - $path = $this->params['dir'] . "/{$rel}"; // full storage path - if ( $this->backend->isSingleShardPathInternal( $path ) ) { - break; // path is only on one shard; no issue with duplicates - } elseif ( isset( $this->multiShardPaths[$rel] ) ) { - // Don't keep listing paths that are on multiple shards - ( $this->iter instanceof Iterator ) ? $this->iter->next() : next( $this->iter ); - } else { - $this->multiShardPaths[$rel] = 1; - break; - } - } - } - - /** - * If the list iterator for this container shard is out of items, - * then move on to the next container that has items. - * If there are none, then it advances to the last container. - */ - protected function nextShardIteratorIfNotValid() { - while ( !$this->valid() && ++$this->curShard < count( $this->shardSuffixes ) ) { - $this->setIteratorFromCurrentShard(); + public function accept() { + $rel = $this->getInnerIterator()->current(); // path relative to given directory + $path = $this->params['dir'] . "/{$rel}"; // full storage path + if ( $this->backend->isSingleShardPathInternal( $path ) ) { + return true; // path is only on one shard; no issue with duplicates + } elseif ( isset( $this->multiShardPaths[$rel] ) ) { + // Don't keep listing paths that are on multiple shards + return false; + } else { + $this->multiShardPaths[$rel] = 1; + return true; } } - /** - * Set the list iterator to that of the current container shard - */ - protected function setIteratorFromCurrentShard() { - $this->iter = $this->listFromShard( - $this->container . $this->shardSuffixes[$this->curShard], - $this->directory, $this->params ); - // Start loading results so that current() works - if ( $this->iter ) { - ( $this->iter instanceof Iterator ) ? $this->iter->rewind() : reset( $this->iter ); - } + public function rewind() { + parent::rewind(); + $this->multiShardPaths = array(); } /** * Get the list for a given container shard * - * @param $container string Resolved container name - * @param $dir string Resolved path relative to container - * @param $params Array - * @return Traversable|Array|null + * @param string $container Resolved container name + * @return Iterator */ - abstract protected function listFromShard( $container, $dir, array $params ); + abstract protected function listFromShard( $container ); } /** * Iterator for listing directories */ class FileBackendStoreShardDirIterator extends FileBackendStoreShardListIterator { - /** - * @see FileBackendStoreShardListIterator::listFromShard() - * @return Array|null|Traversable - */ - protected function listFromShard( $container, $dir, array $params ) { - return $this->backend->getDirectoryListInternal( $container, $dir, $params ); + protected function listFromShard( $container ) { + $list = $this->backend->getDirectoryListInternal( + $container, $this->directory, $this->params ); + if ( $list === null ) { + return new ArrayIterator( array() ); + } else { + return is_array( $list ) ? new ArrayIterator( $list ) : $list; + } } } @@ -1756,11 +1726,13 @@ class FileBackendStoreShardDirIterator extends FileBackendStoreShardListIterator * Iterator for listing regular files */ class FileBackendStoreShardFileIterator extends FileBackendStoreShardListIterator { - /** - * @see FileBackendStoreShardListIterator::listFromShard() - * @return Array|null|Traversable - */ - protected function listFromShard( $container, $dir, array $params ) { - return $this->backend->getFileListInternal( $container, $dir, $params ); + protected function listFromShard( $container ) { + $list = $this->backend->getFileListInternal( + $container, $this->directory, $this->params ); + if ( $list === null ) { + return new ArrayIterator( array() ); + } else { + return is_array( $list ) ? new ArrayIterator( $list ) : $list; + } } } diff --git a/includes/filebackend/FileOp.php b/includes/filebackend/FileOp.php index 7c43c489..fe833084 100644 --- a/includes/filebackend/FileOp.php +++ b/includes/filebackend/FileOp.php @@ -42,11 +42,12 @@ abstract class FileOp { protected $state = self::STATE_NEW; // integer protected $failed = false; // boolean protected $async = false; // boolean - protected $useLatest = true; // boolean protected $batchId; // string + protected $doOperation = true; // boolean; operation is not a no-op protected $sourceSha1; // string - protected $destSameAsSource; // boolean + protected $overwriteSameCase; // boolean + protected $destExists; // boolean /* Object life-cycle */ const STATE_NEW = 1; @@ -54,54 +55,81 @@ abstract class FileOp { const STATE_ATTEMPTED = 3; /** - * Build a new file operation transaction + * Build a new batch file operation transaction * - * @param $backend FileBackendStore - * @param $params Array + * @param FileBackendStore $backend + * @param Array $params * @throws MWException */ final public function __construct( FileBackendStore $backend, array $params ) { $this->backend = $backend; list( $required, $optional ) = $this->allowedParams(); + // @todo normalizeAnyStoragePaths() calls are overzealous, use a parameter list foreach ( $required as $name ) { if ( isset( $params[$name] ) ) { - $this->params[$name] = $params[$name]; + // Normalize paths so the paths to the same file have the same string + $this->params[$name] = self::normalizeAnyStoragePaths( $params[$name] ); } else { throw new MWException( "File operation missing parameter '$name'." ); } } foreach ( $optional as $name ) { if ( isset( $params[$name] ) ) { - $this->params[$name] = $params[$name]; + // Normalize paths so the paths to the same file have the same string + $this->params[$name] = self::normalizeAnyStoragePaths( $params[$name] ); } } $this->params = $params; } /** - * Set the batch UUID this operation belongs to + * Normalize $item or anything in $item that is a valid storage path * - * @param $batchId string - * @return void + * @param string $item|array + * @return string|Array + */ + protected function normalizeAnyStoragePaths( $item ) { + if ( is_array( $item ) ) { + $res = array(); + foreach ( $item as $k => $v ) { + $k = self::normalizeIfValidStoragePath( $k ); + $v = self::normalizeIfValidStoragePath( $v ); + $res[$k] = $v; + } + return $res; + } else { + return self::normalizeIfValidStoragePath( $item ); + } + } + + /** + * Normalize a string if it is a valid storage path + * + * @param string $path + * @return string */ - final public function setBatchId( $batchId ) { - $this->batchId = $batchId; + protected static function normalizeIfValidStoragePath( $path ) { + if ( FileBackend::isStoragePath( $path ) ) { + $res = FileBackend::normalizeStoragePath( $path ); + return ( $res !== null ) ? $res : $path; + } + return $path; } /** - * Whether to allow stale data for file reads and stat checks + * Set the batch UUID this operation belongs to * - * @param $allowStale bool + * @param string $batchId * @return void */ - final public function allowStaleReads( $allowStale ) { - $this->useLatest = !$allowStale; + final public function setBatchId( $batchId ) { + $this->batchId = $batchId; } /** * Get the value of the parameter with the given name * - * @param $name string + * @param string $name * @return mixed Returns null if the parameter is not set */ final public function getParam( $name ) { @@ -138,11 +166,11 @@ abstract class FileOp { /** * Update a dependency tracking array to account for this operation * - * @param $deps Array Prior path reads/writes; format of FileOp::newPredicates() + * @param array $deps Prior path reads/writes; format of FileOp::newPredicates() * @return Array */ final public function applyDependencies( array $deps ) { - $deps['read'] += array_fill_keys( $this->storagePathsRead(), 1 ); + $deps['read'] += array_fill_keys( $this->storagePathsRead(), 1 ); $deps['write'] += array_fill_keys( $this->storagePathsChanged(), 1 ); return $deps; } @@ -150,7 +178,7 @@ abstract class FileOp { /** * Check if this operation changes files listed in $paths * - * @param $paths Array Prior path reads/writes; format of FileOp::newPredicates() + * @param array $paths Prior path reads/writes; format of FileOp::newPredicates() * @return boolean */ final public function dependsOn( array $deps ) { @@ -170,33 +198,36 @@ abstract class FileOp { /** * Get the file journal entries for this file operation * - * @param $oPredicates Array Pre-op info about files (format of FileOp::newPredicates) - * @param $nPredicates Array Post-op info about files (format of FileOp::newPredicates) + * @param array $oPredicates Pre-op info about files (format of FileOp::newPredicates) + * @param array $nPredicates Post-op info about files (format of FileOp::newPredicates) * @return Array */ final public function getJournalEntries( array $oPredicates, array $nPredicates ) { + if ( !$this->doOperation ) { + return array(); // this is a no-op + } $nullEntries = array(); $updateEntries = array(); $deleteEntries = array(); $pathsUsed = array_merge( $this->storagePathsRead(), $this->storagePathsChanged() ); - foreach ( $pathsUsed as $path ) { + foreach ( array_unique( $pathsUsed ) as $path ) { $nullEntries[] = array( // assertion for recovery - 'op' => 'null', - 'path' => $path, + 'op' => 'null', + 'path' => $path, 'newSha1' => $this->fileSha1( $path, $oPredicates ) ); } foreach ( $this->storagePathsChanged() as $path ) { if ( $nPredicates['sha1'][$path] === false ) { // deleted $deleteEntries[] = array( - 'op' => 'delete', - 'path' => $path, + 'op' => 'delete', + 'path' => $path, 'newSha1' => '' ); } else { // created/updated $updateEntries[] = array( - 'op' => $this->fileExists( $path, $oPredicates ) ? 'update' : 'create', - 'path' => $path, + 'op' => $this->fileExists( $path, $oPredicates ) ? 'update' : 'create', + 'path' => $path, 'newSha1' => $nPredicates['sha1'][$path] ); } @@ -205,9 +236,11 @@ abstract class FileOp { } /** - * Check preconditions of the operation without writing anything + * Check preconditions of the operation without writing anything. + * This must update $predicates for each path that the op can change + * except when a failing status object is returned. * - * @param $predicates Array + * @param Array $predicates * @return Status */ final public function precheck( array &$predicates ) { @@ -241,10 +274,14 @@ abstract class FileOp { return Status::newFatal( 'fileop-fail-attempt-precheck' ); } $this->state = self::STATE_ATTEMPTED; - $status = $this->doAttempt(); - if ( !$status->isOK() ) { - $this->failed = true; - $this->logFailure( 'attempt' ); + if ( $this->doOperation ) { + $status = $this->doAttempt(); + if ( !$status->isOK() ) { + $this->failed = true; + $this->logFailure( 'attempt' ); + } + } else { // no-op + $status = Status::newGood(); } return $status; } @@ -280,7 +317,7 @@ abstract class FileOp { /** * Adjust params to FileBackendStore internal file calls * - * @param $params Array + * @param Array $params * @return Array (required params list, optional params list) */ protected function setFlags( array $params ) { @@ -292,15 +329,7 @@ abstract class FileOp { * * @return Array */ - final public function storagePathsRead() { - return array_map( 'FileBackend::normalizeStoragePath', $this->doStoragePathsRead() ); - } - - /** - * @see FileOp::storagePathsRead() - * @return Array - */ - protected function doStoragePathsRead() { + public function storagePathsRead() { return array(); } @@ -309,24 +338,16 @@ abstract class FileOp { * * @return Array */ - final public function storagePathsChanged() { - return array_map( 'FileBackend::normalizeStoragePath', $this->doStoragePathsChanged() ); - } - - /** - * @see FileOp::storagePathsChanged() - * @return Array - */ - protected function doStoragePathsChanged() { + public function storagePathsChanged() { return array(); } /** * Check for errors with regards to the destination file already existing. - * This also updates the destSameAsSource and sourceSha1 member variables. + * Also set the destExists, overwriteSameCase and sourceSha1 member variables. * A bad status will be returned if there is no chance it can be overwritten. * - * @param $predicates Array + * @param Array $predicates * @return Status */ protected function precheckDestExistence( array $predicates ) { @@ -336,8 +357,9 @@ abstract class FileOp { if ( $this->sourceSha1 === null ) { // file in storage? $this->sourceSha1 = $this->fileSha1( $this->params['src'], $predicates ); } - $this->destSameAsSource = false; - if ( $this->fileExists( $this->params['dst'], $predicates ) ) { + $this->overwriteSameCase = false; + $this->destExists = $this->fileExists( $this->params['dst'], $predicates ); + if ( $this->destExists ) { if ( $this->getParam( 'overwrite' ) ) { return $status; // OK } elseif ( $this->getParam( 'overwriteSame' ) ) { @@ -349,7 +371,7 @@ abstract class FileOp { // Give an error if the files are not identical $status->fatal( 'backend-fail-notsame', $this->params['dst'] ); } else { - $this->destSameAsSource = true; // OK + $this->overwriteSameCase = true; // OK } return $status; // do nothing; either OK or bad status } else { @@ -362,7 +384,7 @@ abstract class FileOp { /** * precheckDestExistence() helper function to get the source file SHA-1. - * Subclasses should overwride this iff the source is not in storage. + * Subclasses should overwride this if the source is not in storage. * * @return string|bool Returns false on failure */ @@ -373,15 +395,15 @@ abstract class FileOp { /** * Check if a file will exist in storage when this operation is attempted * - * @param $source string Storage path - * @param $predicates Array + * @param string $source Storage path + * @param Array $predicates * @return bool */ final protected function fileExists( $source, array $predicates ) { if ( isset( $predicates['exists'][$source] ) ) { return $predicates['exists'][$source]; // previous op assures this } else { - $params = array( 'src' => $source, 'latest' => $this->useLatest ); + $params = array( 'src' => $source, 'latest' => true ); return $this->backend->fileExists( $params ); } } @@ -389,15 +411,17 @@ abstract class FileOp { /** * Get the SHA-1 of a file in storage when this operation is attempted * - * @param $source string Storage path - * @param $predicates Array + * @param string $source Storage path + * @param Array $predicates * @return string|bool False on failure */ final protected function fileSha1( $source, array $predicates ) { if ( isset( $predicates['sha1'][$source] ) ) { return $predicates['sha1'][$source]; // previous op assures this + } elseif ( isset( $predicates['exists'][$source] ) && !$predicates['exists'][$source] ) { + return false; // previous op assures this } else { - $params = array( 'src' => $source, 'latest' => $this->useLatest ); + $params = array( 'src' => $source, 'latest' => true ); return $this->backend->getFileSha1Base36( $params ); } } @@ -414,7 +438,7 @@ abstract class FileOp { /** * Log a file operation failure and preserve any temp files * - * @param $action string + * @param string $action * @return void */ final public function logFailure( $action ) { @@ -430,42 +454,32 @@ abstract class FileOp { } /** - * Store a file into the backend from a file on the file system. + * Create a file in the backend with the given content. * Parameters for this operation are outlined in FileBackend::doOperations(). */ -class StoreFileOp extends FileOp { - /** - * @return array - */ +class CreateFileOp extends FileOp { protected function allowedParams() { - return array( array( 'src', 'dst' ), - array( 'overwrite', 'overwriteSame', 'disposition' ) ); + return array( array( 'content', 'dst' ), + array( 'overwrite', 'overwriteSame', 'headers' ) ); } - /** - * @param $predicates array - * @return Status - */ protected function doPrecheck( array &$predicates ) { $status = Status::newGood(); - // Check if the source file exists on the file system - if ( !is_file( $this->params['src'] ) ) { - $status->fatal( 'backend-fail-notexists', $this->params['src'] ); - return $status; - // Check if the source file is too big - } elseif ( filesize( $this->params['src'] ) > $this->backend->maxFileSizeInternal() ) { + // Check if the source data is too big + if ( strlen( $this->getParam( 'content' ) ) > $this->backend->maxFileSizeInternal() ) { $status->fatal( 'backend-fail-maxsize', $this->params['dst'], $this->backend->maxFileSizeInternal() ); - $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + $status->fatal( 'backend-fail-create', $this->params['dst'] ); return $status; - // Check if a file can be placed at the destination + // Check if a file can be placed/changed at the destination } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { $status->fatal( 'backend-fail-usable', $this->params['dst'] ); - $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + $status->fatal( 'backend-fail-create', $this->params['dst'] ); return $status; } // Check if destination file exists $status->merge( $this->precheckDestExistence( $predicates ) ); + $this->params['dstExists'] = $this->destExists; // see FileBackendStore::setFileCache() if ( $status->isOK() ) { // Update file existence predicates $predicates['exists'][$this->params['dst']] = true; @@ -474,61 +488,54 @@ class StoreFileOp extends FileOp { return $status; // safe to call attempt() } - /** - * @return Status - */ protected function doAttempt() { - // Store the file at the destination - if ( !$this->destSameAsSource ) { - return $this->backend->storeInternal( $this->setFlags( $this->params ) ); + if ( !$this->overwriteSameCase ) { + // Create the file at the destination + return $this->backend->createInternal( $this->setFlags( $this->params ) ); } return Status::newGood(); } - /** - * @return bool|string - */ protected function getSourceSha1Base36() { - wfSuppressWarnings(); - $hash = sha1_file( $this->params['src'] ); - wfRestoreWarnings(); - if ( $hash !== false ) { - $hash = wfBaseConvert( $hash, 16, 36, 31 ); - } - return $hash; + return wfBaseConvert( sha1( $this->params['content'] ), 16, 36, 31 ); } - protected function doStoragePathsChanged() { + public function storagePathsChanged() { return array( $this->params['dst'] ); } } /** - * Create a file in the backend with the given content. + * Store a file into the backend from a file on the file system. * Parameters for this operation are outlined in FileBackend::doOperations(). */ -class CreateFileOp extends FileOp { +class StoreFileOp extends FileOp { protected function allowedParams() { - return array( array( 'content', 'dst' ), - array( 'overwrite', 'overwriteSame', 'disposition' ) ); + return array( array( 'src', 'dst' ), + array( 'overwrite', 'overwriteSame', 'headers' ) ); } protected function doPrecheck( array &$predicates ) { $status = Status::newGood(); - // Check if the source data is too big - if ( strlen( $this->getParam( 'content' ) ) > $this->backend->maxFileSizeInternal() ) { + // Check if the source file exists on the file system + if ( !is_file( $this->params['src'] ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if the source file is too big + } elseif ( filesize( $this->params['src'] ) > $this->backend->maxFileSizeInternal() ) { $status->fatal( 'backend-fail-maxsize', $this->params['dst'], $this->backend->maxFileSizeInternal() ); - $status->fatal( 'backend-fail-create', $this->params['dst'] ); + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); return $status; - // Check if a file can be placed at the destination + // Check if a file can be placed/changed at the destination } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { $status->fatal( 'backend-fail-usable', $this->params['dst'] ); - $status->fatal( 'backend-fail-create', $this->params['dst'] ); + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); return $status; } // Check if destination file exists $status->merge( $this->precheckDestExistence( $predicates ) ); + $this->params['dstExists'] = $this->destExists; // see FileBackendStore::setFileCache() if ( $status->isOK() ) { // Update file existence predicates $predicates['exists'][$this->params['dst']] = true; @@ -537,28 +544,25 @@ class CreateFileOp extends FileOp { return $status; // safe to call attempt() } - /** - * @return Status - */ protected function doAttempt() { - if ( !$this->destSameAsSource ) { - // Create the file at the destination - return $this->backend->createInternal( $this->setFlags( $this->params ) ); + if ( !$this->overwriteSameCase ) { + // Store the file at the destination + return $this->backend->storeInternal( $this->setFlags( $this->params ) ); } return Status::newGood(); } - /** - * @return bool|String - */ protected function getSourceSha1Base36() { - return wfBaseConvert( sha1( $this->params['content'] ), 16, 36, 31 ); + wfSuppressWarnings(); + $hash = sha1_file( $this->params['src'] ); + wfRestoreWarnings(); + if ( $hash !== false ) { + $hash = wfBaseConvert( $hash, 16, 36, 31 ); + } + return $hash; } - /** - * @return array - */ - protected function doStoragePathsChanged() { + public function storagePathsChanged() { return array( $this->params['dst'] ); } } @@ -568,25 +572,26 @@ class CreateFileOp extends FileOp { * Parameters for this operation are outlined in FileBackend::doOperations(). */ class CopyFileOp extends FileOp { - /** - * @return array - */ protected function allowedParams() { return array( array( 'src', 'dst' ), - array( 'overwrite', 'overwriteSame', 'disposition' ) ); + array( 'overwrite', 'overwriteSame', 'ignoreMissingSource', 'headers' ) ); } - /** - * @param $predicates array - * @return Status - */ protected function doPrecheck( array &$predicates ) { $status = Status::newGood(); // Check if the source file exists if ( !$this->fileExists( $this->params['src'], $predicates ) ) { - $status->fatal( 'backend-fail-notexists', $this->params['src'] ); - return $status; - // Check if a file can be placed at the destination + if ( $this->getParam( 'ignoreMissingSource' ) ) { + $this->doOperation = false; // no-op + // Update file existence predicates (cache 404s) + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + return $status; // nothing to do + } else { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + } + // Check if a file can be placed/changed at the destination } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { $status->fatal( 'backend-fail-usable', $this->params['dst'] ); $status->fatal( 'backend-fail-copy', $this->params['src'], $this->params['dst'] ); @@ -594,6 +599,7 @@ class CopyFileOp extends FileOp { } // Check if destination file exists $status->merge( $this->precheckDestExistence( $predicates ) ); + $this->params['dstExists'] = $this->destExists; // see FileBackendStore::setFileCache() if ( $status->isOK() ) { // Update file existence predicates $predicates['exists'][$this->params['dst']] = true; @@ -602,31 +608,27 @@ class CopyFileOp extends FileOp { return $status; // safe to call attempt() } - /** - * @return Status - */ protected function doAttempt() { - // Do nothing if the src/dst paths are the same - if ( $this->params['src'] !== $this->params['dst'] ) { - // Copy the file into the destination - if ( !$this->destSameAsSource ) { - return $this->backend->copyInternal( $this->setFlags( $this->params ) ); - } + if ( $this->overwriteSameCase ) { + $status = Status::newGood(); // nothing to do + } elseif ( $this->params['src'] === $this->params['dst'] ) { + // Just update the destination file headers + $headers = $this->getParam( 'headers' ) ?: array(); + $status = $this->backend->describeInternal( $this->setFlags( array( + 'src' => $this->params['dst'], 'headers' => $headers + ) ) ); + } else { + // Copy the file to the destination + $status = $this->backend->copyInternal( $this->setFlags( $this->params ) ); } - return Status::newGood(); + return $status; } - /** - * @return array - */ - protected function doStoragePathsRead() { + public function storagePathsRead() { return array( $this->params['src'] ); } - /** - * @return array - */ - protected function doStoragePathsChanged() { + public function storagePathsChanged() { return array( $this->params['dst'] ); } } @@ -636,25 +638,26 @@ class CopyFileOp extends FileOp { * Parameters for this operation are outlined in FileBackend::doOperations(). */ class MoveFileOp extends FileOp { - /** - * @return array - */ protected function allowedParams() { return array( array( 'src', 'dst' ), - array( 'overwrite', 'overwriteSame', 'disposition' ) ); + array( 'overwrite', 'overwriteSame', 'ignoreMissingSource', 'headers' ) ); } - /** - * @param $predicates array - * @return Status - */ protected function doPrecheck( array &$predicates ) { $status = Status::newGood(); // Check if the source file exists if ( !$this->fileExists( $this->params['src'], $predicates ) ) { - $status->fatal( 'backend-fail-notexists', $this->params['src'] ); - return $status; - // Check if a file can be placed at the destination + if ( $this->getParam( 'ignoreMissingSource' ) ) { + $this->doOperation = false; // no-op + // Update file existence predicates (cache 404s) + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + return $status; // nothing to do + } else { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + } + // Check if a file can be placed/changed at the destination } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { $status->fatal( 'backend-fail-usable', $this->params['dst'] ); $status->fatal( 'backend-fail-move', $this->params['src'], $this->params['dst'] ); @@ -662,6 +665,7 @@ class MoveFileOp extends FileOp { } // Check if destination file exists $status->merge( $this->precheckDestExistence( $predicates ) ); + $this->params['dstExists'] = $this->destExists; // see FileBackendStore::setFileCache() if ( $status->isOK() ) { // Update file existence predicates $predicates['exists'][$this->params['src']] = false; @@ -672,35 +676,35 @@ class MoveFileOp extends FileOp { return $status; // safe to call attempt() } - /** - * @return Status - */ protected function doAttempt() { - // Do nothing if the src/dst paths are the same - if ( $this->params['src'] !== $this->params['dst'] ) { - if ( !$this->destSameAsSource ) { - // Move the file into the destination - return $this->backend->moveInternal( $this->setFlags( $this->params ) ); + if ( $this->overwriteSameCase ) { + if ( $this->params['src'] === $this->params['dst'] ) { + // Do nothing to the destination (which is also the source) + $status = Status::newGood(); } else { - // Just delete source as the destination needs no changes - $params = array( 'src' => $this->params['src'] ); - return $this->backend->deleteInternal( $this->setFlags( $params ) ); + // Just delete the source as the destination file needs no changes + $status = $this->backend->deleteInternal( $this->setFlags( + array( 'src' => $this->params['src'] ) + ) ); } + } elseif ( $this->params['src'] === $this->params['dst'] ) { + // Just update the destination file headers + $headers = $this->getParam( 'headers' ) ?: array(); + $status = $this->backend->describeInternal( $this->setFlags( + array( 'src' => $this->params['dst'], 'headers' => $headers ) + ) ); + } else { + // Move the file to the destination + $status = $this->backend->moveInternal( $this->setFlags( $this->params ) ); } - return Status::newGood(); + return $status; } - /** - * @return array - */ - protected function doStoragePathsRead() { + public function storagePathsRead() { return array( $this->params['src'] ); } - /** - * @return array - */ - protected function doStoragePathsChanged() { + public function storagePathsChanged() { return array( $this->params['src'], $this->params['dst'] ); } } @@ -710,28 +714,29 @@ class MoveFileOp extends FileOp { * Parameters for this operation are outlined in FileBackend::doOperations(). */ class DeleteFileOp extends FileOp { - /** - * @return array - */ protected function allowedParams() { return array( array( 'src' ), array( 'ignoreMissingSource' ) ); } - protected $needsDelete = true; - - /** - * @param array $predicates - * @return Status - */ protected function doPrecheck( array &$predicates ) { $status = Status::newGood(); // Check if the source file exists if ( !$this->fileExists( $this->params['src'], $predicates ) ) { - if ( !$this->getParam( 'ignoreMissingSource' ) ) { + if ( $this->getParam( 'ignoreMissingSource' ) ) { + $this->doOperation = false; // no-op + // Update file existence predicates (cache 404s) + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + return $status; // nothing to do + } else { $status->fatal( 'backend-fail-notexists', $this->params['src'] ); return $status; } - $this->needsDelete = false; + // Check if a file can be placed/changed at the source + } elseif ( !$this->backend->isPathUsableInternal( $this->params['src'] ) ) { + $status->fatal( 'backend-fail-usable', $this->params['src'] ); + $status->fatal( 'backend-fail-delete', $this->params['src'] ); + return $status; } // Update file existence predicates $predicates['exists'][$this->params['src']] = false; @@ -739,21 +744,51 @@ class DeleteFileOp extends FileOp { return $status; // safe to call attempt() } - /** - * @return Status - */ protected function doAttempt() { - if ( $this->needsDelete ) { - // Delete the source file - return $this->backend->deleteInternal( $this->setFlags( $this->params ) ); + // Delete the source file + return $this->backend->deleteInternal( $this->setFlags( $this->params ) ); + } + + public function storagePathsChanged() { + return array( $this->params['src'] ); + } +} + +/** + * Change metadata for a file at the given storage path in the backend. + * Parameters for this operation are outlined in FileBackend::doOperations(). + */ +class DescribeFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src' ), array( 'headers' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if a file can be placed/changed at the source + } elseif ( !$this->backend->isPathUsableInternal( $this->params['src'] ) ) { + $status->fatal( 'backend-fail-usable', $this->params['src'] ); + $status->fatal( 'backend-fail-describe', $this->params['src'] ); + return $status; } - return Status::newGood(); + // Update file existence predicates + $predicates['exists'][$this->params['src']] = + $this->fileExists( $this->params['src'], $predicates ); + $predicates['sha1'][$this->params['src']] = + $this->fileSha1( $this->params['src'], $predicates ); + return $status; // safe to call attempt() } - /** - * @return array - */ - protected function doStoragePathsChanged() { + protected function doAttempt() { + // Update the source file's metadata + return $this->backend->describeInternal( $this->setFlags( $this->params ) ); + } + + public function storagePathsChanged() { return array( $this->params['src'] ); } } diff --git a/includes/filebackend/FileOpBatch.php b/includes/filebackend/FileOpBatch.php index 33558725..785c0bc9 100644 --- a/includes/filebackend/FileOpBatch.php +++ b/includes/filebackend/FileOpBatch.php @@ -42,9 +42,6 @@ class FileOpBatch { * $opts is an array of options, including: * - force : Errors that would normally cause a rollback do not. * The remaining operations are still attempted if any fail. - * - allowStale : Don't require the latest available data. - * This can increase performance for non-critical writes. - * This has no effect unless the 'force' flag is set. * - nonJournaled : Don't log this operation batch in the file journal. * - concurrency : Try to do this many operations in parallel when possible. * @@ -52,9 +49,9 @@ class FileOpBatch { * - a) unexpected operation errors occurred (network partitions, disk full...) * - b) significant operation errors occurred and 'force' was not set * - * @param $performOps Array List of FileOp operations - * @param $opts Array Batch operation options - * @param $journal FileJournal Journal to log operations to + * @param array $performOps List of FileOp operations + * @param array $opts Batch operation options + * @param FileJournal $journal Journal to log operations to * @return Status */ public static function attempt( array $performOps, array $opts, FileJournal $journal ) { @@ -69,7 +66,6 @@ class FileOpBatch { } $batchId = $journal->getTimestampedUUID(); - $allowStale = !empty( $opts['allowStale'] ); $ignoreErrors = !empty( $opts['force'] ); $journaled = empty( $opts['nonJournaled'] ); $maxConcurrency = isset( $opts['concurrency'] ) ? $opts['concurrency'] : 1; @@ -84,7 +80,6 @@ class FileOpBatch { foreach ( $performOps as $index => $fileOp ) { $backendName = $fileOp->getBackend()->getName(); $fileOp->setBatchId( $batchId ); // transaction ID - $fileOp->allowStaleReads( $allowStale ); // consistency level // Decide if this op can be done concurrently within this sub-batch // or if a new concurrent sub-batch must be started after this one... if ( $fileOp->dependsOn( $curBatchDeps ) @@ -136,49 +131,13 @@ class FileOpBatch { } // Attempt each operation (in parallel if allowed and possible)... - if ( count( $pPerformOps ) < count( $performOps ) ) { - self::runBatchParallel( $pPerformOps, $status ); - } else { - self::runBatchSeries( $performOps, $status ); - } + self::runParallelBatches( $pPerformOps, $status ); wfProfileOut( __METHOD__ ); return $status; } /** - * Attempt a list of file operations in series. - * This will abort remaining ops on failure. - * - * @param $performOps Array - * @param $status Status - * @return bool Success - */ - protected static function runBatchSeries( array $performOps, Status $status ) { - foreach ( $performOps as $index => $fileOp ) { - if ( $fileOp->failed() ) { - continue; // nothing to do - } - $subStatus = $fileOp->attempt(); - $status->merge( $subStatus ); - if ( $subStatus->isOK() ) { - $status->success[$index] = true; - ++$status->successCount; - } else { - $status->success[$index] = false; - ++$status->failCount; - // We can't continue (even with $ignoreErrors) as $predicates is wrong. - // Log the remaining ops as failed for recovery... - for ( $i = ($index + 1); $i < count( $performOps ); $i++ ) { - $performOps[$i]->logFailure( 'attempt_aborted' ); - } - return false; // bail out - } - } - return true; - } - - /** * Attempt a list of file operations sub-batches in series. * * The operations *in* each sub-batch will be done in parallel. @@ -186,12 +145,12 @@ class FileOpBatch { * within any given sub-batch do not depend on each other. * This will abort remaining ops on failure. * - * @param $pPerformOps Array - * @param $status Status + * @param Array $pPerformOps + * @param Status $status * @return bool Success */ - protected static function runBatchParallel( array $pPerformOps, Status $status ) { - $aborted = false; + protected static function runParallelBatches( array $pPerformOps, Status $status ) { + $aborted = false; // set to true on unexpected errors foreach ( $pPerformOps as $performOpsBatch ) { if ( $aborted ) { // check batch op abort flag... // We can't continue (even with $ignoreErrors) as $predicates is wrong. @@ -205,11 +164,16 @@ class FileOpBatch { $opHandles = array(); // Get the backend; all sub-batch ops belong to a single backend $backend = reset( $performOpsBatch )->getBackend(); - // If attemptAsync() returns synchronously, it was either an - // error Status or the backend just doesn't support async ops. + // Get the operation handles or actually do it if there is just one. + // If attemptAsync() returns a Status, it was either due to an error + // or the backend does not support async ops and did it synchronously. foreach ( $performOpsBatch as $i => $fileOp ) { if ( !$fileOp->failed() ) { // failed => already has Status - $subStatus = $fileOp->attemptAsync(); + // If the batch is just one operation, it's faster to avoid + // pipelining as that can involve creating new TCP connections. + $subStatus = ( count( $performOpsBatch ) > 1 ) + ? $fileOp->attemptAsync() + : $fileOp->attempt(); if ( $subStatus->value instanceof FileBackendStoreOpHandle ) { $opHandles[$i] = $subStatus->value; // deferred } else { diff --git a/includes/filebackend/README b/includes/filebackend/README new file mode 100644 index 00000000..569f3376 --- /dev/null +++ b/includes/filebackend/README @@ -0,0 +1,208 @@ +/*! +\ingroup FileBackend +\page file_backend_design File backend design + +Some notes on the FileBackend architecture. + +\section intro Introduction + +To abstract away the differences among different types of storage media, +MediaWiki is providing an interface known as FileBackend. Any MediaWiki +interaction with stored files should thus use a FileBackend object. + +Different types of backing storage media are supported (ranging from local +file system to distributed object stores). The types include: + +* FSFileBackend (used for mounted file systems) +* SwiftFileBackend (used for Swift or Ceph Rados+RGW object stores) +* FileBackendMultiWrite (useful for transitioning from one backend to another) + +Configuration documentation for each type of backend is to be found in their +__construct() inline documentation. + + +\section setup Setup + +File backends are registered in LocalSettings.php via the global variable +$wgFileBackends. To access one of those defined backends, one would use +FileBackendStore::get( <name> ) which will bring back a FileBackend object +handle. Such handles are reused for any subsequent get() call (via singleton). +The FileBackends objects are caching request calls such as file stats, +SHA1 requests or TCP connection handles. + +\par Note: +Some backends may require additional PHP extensions to be enabled or can rely on a +MediaWiki extension. This is often the case when a FileBackend subclass makes use of an +upstream client API for communicating with the backing store. + + +\section fileoperations File operations + +The MediaWiki FileBackend API supports various operations on either files or +directories. See FileBackend.php for full documentation for each function. + + +\subsection reading Reading + +The following basic operations are supported for reading from a backend: + +On files: +* stat a file for basic information (timestamp, size) +* read a file into a string or several files into a map of path names to strings +* download a file or set of files to a temporary file (on a mounted file system) +* get the SHA1 hash of a file +* get various properties of a file (stat information, content time, mime information, ...) + +On directories: +* get a list of files directly under a directory +* get a recursive list of files under a directory +* get a list of directories directly under a directory +* get a recursive list of directories under a directory + +\par Note: +Backend handles should return directory listings as iterators, all though in some cases +they may just be simple arrays (which can still be iterated over). Iterators allow for +callers to traverse a large number of file listings without consuming excessive RAM in +the process. Either the memory consumed is flatly bounded (if the iterator does paging) +or it is proportional to the depth of the portion of the directory tree being traversed +(if the iterator works via recursion). + + +\subsection writing Writing + +The following basic operations are supported for writing or changing in the backend: + +On files: +* store (copying a mounted file system file into storage) +* create (creating a file within storage from a string) +* copy (within storage) +* move (within storage) +* delete (within storage) +* lock/unlock (lock or unlock a file in storage) + +The following operations are supported for writing directories in the backend: +* prepare (create parent container and directories for a path) +* secure (try to lock-down access to a container) +* publish (try to reverse the effects of secure) +* clean (remove empty containers or directories) + + +\subsection invokingoperation Invoking an operation + +Generally, callers should use doOperations() or doQuickOperations() when doing +batches of changes, rather than making a suite of single operation calls. This +makes the system tolerate high latency much better by pipelining operations +when possible. + +doOperations() should be used for working on important original data, i.e. when +consistency is important. The former will only pipeline operations that do not +depend on each other. It is best if the operations that do not depend on each +other occur in consecutive groups. This function can also log file changes to +a journal (see FileJournal), which can be used to sync two backend instances. +One might use this function for user uploads of file for example. + +doQuickOperations() is more geared toward ephemeral items that can be easily +regenerated from original data. It will always pipeline without checking for +dependencies within the operation batch. One might use this function for +creating and purging generated thumbnails of original files for example. + + +\section consistency Consistency + +Not all backing stores are sequentially consistent by default. Various FileBackend +functions offer a "latest" option that can be passed in to assure (or try to assure) +that the latest version of the file is read. Some backing stores are consistent by +default, but callers should always assume that without this option, stale data may +be read. This is actually true for stores that have eventual consistency. + +Note that file listing functions have no "latest" flag, and thus some systems may +return stale data. Thus callers should avoid assuming that listings contain changes +made my the current client or any other client from a very short time ago. For example, +creating a file under a directory and then immediately doing a file listing operation +on that directory may result in a listing that does not include that file. + + +\section locking Locking + +Locking is effective if and only if a proper lock manager is registered and is +actually being used by the backend. Lock managers can be registered in LocalSettings.php +using the $wgLockManagers global configuration variable. + +For object stores, locking is not generally useful for avoiding partially +written or read objects, since most stores use Multi Version Concurrency +Control (MVCC) to avoid this. However, locking can be important when: +* One or more operations must be done without objects changing in the meantime. +* It can also be useful when a file read is used to determine a file write or DB change. + For example, doOperations() first checks that there will be no "file already exists" + or "file does not exist" type errors before attempting an operation batch. This works + by stating the files first, and is only safe if the files are locked in the meantime. + +When locking, callers should use the latest available file data for reads. +Also, one should always lock the file *before* reading it, not after. If stale data is +used to determine a write, there will be some data corruption, even when reads of the +original file finally start returning the updated data without needing the "latest" +option (eventual consistency). The "scoped" lock functions are preferable since +there is not the problem of forgetting to unlock due to early returns or exceptions. + +Since acquiring locks can fail, and lock managers can be non-blocking, callers should: +* Acquire all required locks up font +* Be prepared for the case where locks fail to be acquired +* Possible retry acquiring certain locks + +MVCC is also a useful pattern to use on top of the backend interface, because operations +are not atomic, even with doOperations(), so doing complex batch file changes or changing +files and updating a database row can result in partially written "transactions". Thus one +should avoid changing files once they have been stored, except perhaps with ephemeral data +that are tolerant of some degree of inconsistency. + +Callers can use their own locking (e.g. SELECT FOR UPDATE) if it is more convenient, but +note that all callers that change any of the files should then go through functions that +acquire these locks. For example, if a caller just directly uses the file backend store() +function, it will ignore any custom "FOR UPDATE" locks, which can cause problems. + +\section objectstore Object stores + +Support for object stores (like Amazon S3/Swift) drive much of the API and design +decisions of FileBackend, but using any POSIX compliant file systems works fine. +The system essentially stores "files" in "containers". For a mounted file system +as a backing store, "files" will just be files under directories. For an object store +as a backing store, the "files" will be objects stored in actual containers. + + +\section file_obj_diffs File system and Object store differences + +An advantage of object stores is the reduced Round-Trip Times. This is +achieved by avoiding the need to create each parent directory before placing a +file somewhere. It gets worse the deeper the directory hierarchy is. Another +advantage of object stores is that object listings tend to use databases, which +scale better than the linked list directories that file sytems sometimes use. +File systems like btrfs and xfs use tree structures, which scale better. +For both object stores and file systems, using "/" in filenames will allow for the +intuitive use of directory functions. For example, creating a file in Swift +called "container/a/b/file1" will mean that: +- a "directory listing" of "container/a" will contain "b", +- and a "file listing" of "b" will contain "file1" + +This means that switching from an object store to a file system and vise versa +using the FileBackend interface will generally be harmless. However, one must be +aware of some important differences: + +* In a file system, you cannot have a file and a directory within the same path + whereas it is possible in an object stores. Calling code should avoid any layouts + which allow files and directories at the same path. +* Some file systems have file name length restrictions or overall path length + restrictions that others do not. The same goes with object stores which might + have a maximum object length or a limitation regarding the number of files + under a container or volume. +* Latency varies among systems, certain access patterns may not be tolerable for + certain backends but may hold up for others. Some backend subclasses use + MediaWiki's object caching for serving stat requests, which can greatly + reduce latency. Making sure that the backend has pipelining (see the + "parallelize" and "concurrency" settings) enabled can also mask latency in + batch operation scenarios. +* File systems may implement directories as linked-lists or other structures + with poor scalability, so calling code should use layouts that shard the data. + Instead of storing files like "container/file.txt", one can store files like + "container/<x>/<y>/file.txt". It is best if "sharding" optional or configurable. + +*/ diff --git a/includes/filebackend/SwiftFileBackend.php b/includes/filebackend/SwiftFileBackend.php index b6f0aa60..db090a98 100644 --- a/includes/filebackend/SwiftFileBackend.php +++ b/includes/filebackend/SwiftFileBackend.php @@ -24,7 +24,7 @@ */ /** - * @brief Class for an OpenStack Swift based file backend. + * @brief Class for an OpenStack Swift (or Ceph RGW) based file backend. * * This requires the SwiftCloudFiles MediaWiki extension, which includes * the php-cloudfiles library (https://github.com/rackspace/php-cloudfiles). @@ -40,11 +40,16 @@ class SwiftFileBackend extends FileBackendStore { /** @var CF_Authentication */ protected $auth; // Swift authentication handler protected $authTTL; // integer seconds + protected $swiftTempUrlKey; // string; shared secret value for making temp urls protected $swiftAnonUser; // string; username to handle unauthenticated requests protected $swiftUseCDN; // boolean; whether CloudFiles CDN is enabled protected $swiftCDNExpiry; // integer; how long to cache things in the CDN protected $swiftCDNPurgable; // boolean; whether object CDN purging is enabled + // Rados Gateway specific options + protected $rgwS3AccessKey; // string; S3 access key + protected $rgwS3SecretKey; // string; S3 authentication key + /** @var CF_Connection */ protected $conn; // Swift connection handle protected $sessionStarted = 0; // integer UNIX timestamp @@ -66,6 +71,8 @@ class SwiftFileBackend extends FileBackendStore { * - swiftUser : Swift user used by MediaWiki (account:username) * - swiftKey : Swift authentication key for the above user * - swiftAuthTTL : Swift authentication TTL (seconds) + * - swiftTempUrlKey : Swift "X-Account-Meta-Temp-URL-Key" value on the account. + * Do not set this until it has been set in the backend. * - swiftAnonUser : Swift user used for end-user requests (account:username). * If set, then views of public containers are assumed to go * through this user. If not set, then public containers are @@ -84,10 +91,20 @@ class SwiftFileBackend extends FileBackendStore { * - cacheAuthInfo : Whether to cache authentication tokens in APC, XCache, ect. * If those are not available, then the main cache will be used. * This is probably insecure in shared hosting environments. + * - rgwS3AccessKey : Ragos Gateway S3 "access key" value on the account. + * Do not set this until it has been set in the backend. + * This is used for generating expiring pre-authenticated URLs. + * Only use this when using rgw and to work around + * http://tracker.newdream.net/issues/3454. + * - rgwS3SecretKey : Ragos Gateway S3 "secret key" value on the account. + * Do not set this until it has been set in the backend. + * This is used for generating expiring pre-authenticated URLs. + * Only use this when using rgw and to work around + * http://tracker.newdream.net/issues/3454. */ public function __construct( array $config ) { parent::__construct( $config ); - if ( !MWInit::classExists( 'CF_Constants' ) ) { + if ( !class_exists( 'CF_Constants' ) ) { throw new MWException( 'SwiftCloudFiles extension not installed.' ); } // Required settings @@ -104,6 +121,9 @@ class SwiftFileBackend extends FileBackendStore { $this->swiftAnonUser = isset( $config['swiftAnonUser'] ) ? $config['swiftAnonUser'] : ''; + $this->swiftTempUrlKey = isset( $config['swiftTempUrlKey'] ) + ? $config['swiftTempUrlKey'] + : ''; $this->shardViaHashLevels = isset( $config['shardViaHashLevels'] ) ? $config['shardViaHashLevels'] : ''; @@ -112,17 +132,23 @@ class SwiftFileBackend extends FileBackendStore { : false; $this->swiftCDNExpiry = isset( $config['swiftCDNExpiry'] ) ? $config['swiftCDNExpiry'] - : 12*3600; // 12 hours is safe (tokens last 24 hours per http://docs.openstack.org) + : 12 * 3600; // 12 hours is safe (tokens last 24 hours per http://docs.openstack.org) $this->swiftCDNPurgable = isset( $config['swiftCDNPurgable'] ) ? $config['swiftCDNPurgable'] : true; + $this->rgwS3AccessKey = isset( $config['rgwS3AccessKey'] ) + ? $config['rgwS3AccessKey'] + : ''; + $this->rgwS3SecretKey = isset( $config['rgwS3SecretKey'] ) + ? $config['rgwS3SecretKey'] + : ''; // Cache container information to mask latency $this->memCache = wfGetMainCache(); // Process cache for container info $this->connContainerCache = new ProcessCacheLRU( 300 ); // Cache auth token information to avoid RTTs if ( !empty( $config['cacheAuthInfo'] ) ) { - if ( php_sapi_name() === 'cli' ) { + if ( PHP_SAPI === 'cli' ) { $this->srvCache = wfGetMainCache(); // preferrably memcached } else { try { // look for APC, XCache, WinCache, ect... @@ -146,10 +172,6 @@ class SwiftFileBackend extends FileBackendStore { return $relStoragePath; } - /** - * @see FileBackendStore::isPathUsableInternal() - * @return bool - */ public function isPathUsableInternal( $storagePath ) { list( $container, $rel ) = $this->resolveStoragePathReal( $storagePath ); if ( $rel === null ) { @@ -168,14 +190,26 @@ class SwiftFileBackend extends FileBackendStore { } /** - * @param $disposition string Content-Disposition header value + * @param array $headers + * @return array + */ + protected function sanitizeHdrs( array $headers ) { + // By default, Swift has annoyingly low maximum header value limits + if ( isset( $headers['Content-Disposition'] ) ) { + $headers['Content-Disposition'] = $this->truncDisp( $headers['Content-Disposition'] ); + } + return $headers; + } + + /** + * @param string $disposition Content-Disposition header value * @return string Truncated Content-Disposition header value to meet Swift limits */ protected function truncDisp( $disposition ) { $res = ''; foreach ( explode( ';', $disposition ) as $part ) { $part = trim( $part ); - $new = ( $res === '' ) ? $part : "{$res};{$part}"; + $new = ( $res === '' ) ? $part : "{$res};{$part}"; if ( strlen( $new ) <= 255 ) { $res = $new; } else { @@ -185,10 +219,6 @@ class SwiftFileBackend extends FileBackendStore { return $res; } - /** - * @see FileBackendStore::doCreateInternal() - * @return Status - */ protected function doCreateInternal( array $params ) { $status = Status::newGood(); @@ -201,12 +231,6 @@ class SwiftFileBackend extends FileBackendStore { // (a) Check the destination container and object try { $dContObj = $this->getContainer( $dstCont ); - if ( empty( $params['overwrite'] ) && - $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) - { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); - return $status; - } } catch ( NoSuchContainerException $e ) { $status->fatal( 'backend-fail-create', $params['dst'] ); return $status; @@ -223,31 +247,23 @@ class SwiftFileBackend extends FileBackendStore { // Create a fresh CF_Object with no fields preloaded. // We don't want to preserve headers, metadata, and such. $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD - // Note: metadata keys stored as [Upper case char][[Lower case char]...] - $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + $obj->setMetadataValues( array( 'Sha1base36' => $sha1Hash ) ); // Manually set the ETag (https://github.com/rackspace/php-cloudfiles/issues/59). // The MD5 here will be checked within Swift against its own MD5. $obj->set_etag( md5( $params['content'] ) ); // Use the same content type as StreamFile for security - $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); - if ( !strlen( $obj->content_type ) ) { // special case - $obj->content_type = 'unknown/unknown'; - } - // Set the Content-Disposition header if requested - if ( isset( $params['disposition'] ) ) { - $obj->headers['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + $obj->content_type = $this->getContentType( $params['dst'], $params['content'], null ); + // Set any other custom headers if requested + if ( isset( $params['headers'] ) ) { + $obj->headers += $this->sanitizeHdrs( $params['headers'] ); } if ( !empty( $params['async'] ) ) { // deferred $op = $obj->write_async( $params['content'] ); $status->value = new SwiftFileOpHandle( $this, $params, 'Create', $op ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $status->value->affectedObjects[] = $obj; - } + $status->value->affectedObjects[] = $obj; } else { // actually write the object in Swift $obj->write( $params['content'] ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $this->purgeCDNCache( array( $obj ) ); - } + $this->purgeCDNCache( array( $obj ) ); } } catch ( CDNNotEnabledException $e ) { // CDN not enabled; nothing to see here @@ -271,10 +287,6 @@ class SwiftFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doStoreInternal() - * @return Status - */ protected function doStoreInternal( array $params ) { $status = Status::newGood(); @@ -287,12 +299,6 @@ class SwiftFileBackend extends FileBackendStore { // (a) Check the destination container and object try { $dContObj = $this->getContainer( $dstCont ); - if ( empty( $params['overwrite'] ) && - $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) - { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); - return $status; - } } catch ( NoSuchContainerException $e ) { $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); return $status; @@ -302,7 +308,9 @@ class SwiftFileBackend extends FileBackendStore { } // (b) Get a SHA-1 hash of the object + wfSuppressWarnings(); $sha1Hash = sha1_file( $params['src'] ); + wfRestoreWarnings(); if ( $sha1Hash === false ) { // source doesn't exist? $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); return $status; @@ -314,18 +322,14 @@ class SwiftFileBackend extends FileBackendStore { // Create a fresh CF_Object with no fields preloaded. // We don't want to preserve headers, metadata, and such. $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD - // Note: metadata keys stored as [Upper case char][[Lower case char]...] - $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + $obj->setMetadataValues( array( 'Sha1base36' => $sha1Hash ) ); // The MD5 here will be checked within Swift against its own MD5. $obj->set_etag( md5_file( $params['src'] ) ); // Use the same content type as StreamFile for security - $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); - if ( !strlen( $obj->content_type ) ) { // special case - $obj->content_type = 'unknown/unknown'; - } - // Set the Content-Disposition header if requested - if ( isset( $params['disposition'] ) ) { - $obj->headers['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + $obj->content_type = $this->getContentType( $params['dst'], null, $params['src'] ); + // Set any other custom headers if requested + if ( isset( $params['headers'] ) ) { + $obj->headers += $this->sanitizeHdrs( $params['headers'] ); } if ( !empty( $params['async'] ) ) { // deferred wfSuppressWarnings(); @@ -337,15 +341,11 @@ class SwiftFileBackend extends FileBackendStore { $op = $obj->write_async( $fp, filesize( $params['src'] ), true ); $status->value = new SwiftFileOpHandle( $this, $params, 'Store', $op ); $status->value->resourcesToClose[] = $fp; - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $status->value->affectedObjects[] = $obj; - } + $status->value->affectedObjects[] = $obj; } } else { // actually write the object in Swift $obj->load_from_filename( $params['src'], true ); // calls $obj->write() - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $this->purgeCDNCache( array( $obj ) ); - } + $this->purgeCDNCache( array( $obj ) ); } } catch ( CDNNotEnabledException $e ) { // CDN not enabled; nothing to see here @@ -373,10 +373,6 @@ class SwiftFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doCopyInternal() - * @return Status - */ protected function doCopyInternal( array $params ) { $status = Status::newGood(); @@ -396,14 +392,10 @@ class SwiftFileBackend extends FileBackendStore { try { $sContObj = $this->getContainer( $srcCont ); $dContObj = $this->getContainer( $dstCont ); - if ( empty( $params['overwrite'] ) && - $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) - { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); - return $status; - } } catch ( NoSuchContainerException $e ) { - $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + if ( empty( $params['ignoreMissingSource'] ) || isset( $sContObj ) ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } return $status; } catch ( CloudFilesException $e ) { // some other exception? $this->handleException( $e, $status, __METHOD__, $params ); @@ -414,25 +406,24 @@ class SwiftFileBackend extends FileBackendStore { try { $dstObj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD $hdrs = array(); // source file headers to override with new values - if ( isset( $params['disposition'] ) ) { - $hdrs['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + // Set any other custom headers if requested + if ( isset( $params['headers'] ) ) { + $hdrs += $this->sanitizeHdrs( $params['headers'] ); } if ( !empty( $params['async'] ) ) { // deferred $op = $sContObj->copy_object_to_async( $srcRel, $dContObj, $dstRel, null, $hdrs ); $status->value = new SwiftFileOpHandle( $this, $params, 'Copy', $op ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $status->value->affectedObjects[] = $dstObj; - } + $status->value->affectedObjects[] = $dstObj; } else { // actually write the object in Swift $sContObj->copy_object_to( $srcRel, $dContObj, $dstRel, null, $hdrs ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $this->purgeCDNCache( array( $dstObj ) ); - } + $this->purgeCDNCache( array( $dstObj ) ); } } catch ( CDNNotEnabledException $e ) { // CDN not enabled; nothing to see here } catch ( NoSuchObjectException $e ) { // source object does not exist - $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } } catch ( CloudFilesException $e ) { // some other exception? $this->handleException( $e, $status, __METHOD__, $params ); } @@ -451,10 +442,6 @@ class SwiftFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doMoveInternal() - * @return Status - */ protected function doMoveInternal( array $params ) { $status = Status::newGood(); @@ -474,14 +461,10 @@ class SwiftFileBackend extends FileBackendStore { try { $sContObj = $this->getContainer( $srcCont ); $dContObj = $this->getContainer( $dstCont ); - if ( empty( $params['overwrite'] ) && - $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) - { - $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); - return $status; - } } catch ( NoSuchContainerException $e ) { - $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + if ( empty( $params['ignoreMissingSource'] ) || isset( $sContObj ) ) { + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + } return $status; } catch ( CloudFilesException $e ) { // some other exception? $this->handleException( $e, $status, __METHOD__, $params ); @@ -493,27 +476,26 @@ class SwiftFileBackend extends FileBackendStore { $srcObj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD $dstObj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD $hdrs = array(); // source file headers to override with new values - if ( isset( $params['disposition'] ) ) { - $hdrs['Content-Disposition'] = $this->truncDisp( $params['disposition'] ); + // Set any other custom headers if requested + if ( isset( $params['headers'] ) ) { + $hdrs += $this->sanitizeHdrs( $params['headers'] ); } if ( !empty( $params['async'] ) ) { // deferred $op = $sContObj->move_object_to_async( $srcRel, $dContObj, $dstRel, null, $hdrs ); $status->value = new SwiftFileOpHandle( $this, $params, 'Move', $op ); $status->value->affectedObjects[] = $srcObj; - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $status->value->affectedObjects[] = $dstObj; - } + $status->value->affectedObjects[] = $dstObj; } else { // actually write the object in Swift $sContObj->move_object_to( $srcRel, $dContObj, $dstRel, null, $hdrs ); $this->purgeCDNCache( array( $srcObj ) ); - if ( !empty( $params['overwrite'] ) ) { // file possibly mutated - $this->purgeCDNCache( array( $dstObj ) ); - } + $this->purgeCDNCache( array( $dstObj ) ); } } catch ( CDNNotEnabledException $e ) { // CDN not enabled; nothing to see here } catch ( NoSuchObjectException $e ) { // source object does not exist - $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + } } catch ( CloudFilesException $e ) { // some other exception? $this->handleException( $e, $status, __METHOD__, $params ); } @@ -532,10 +514,6 @@ class SwiftFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doDeleteInternal() - * @return Status - */ protected function doDeleteInternal( array $params ) { $status = Status::newGood(); @@ -559,7 +537,9 @@ class SwiftFileBackend extends FileBackendStore { } catch ( CDNNotEnabledException $e ) { // CDN not enabled; nothing to see here } catch ( NoSuchContainerException $e ) { - $status->fatal( 'backend-fail-delete', $params['src'] ); + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } } catch ( NoSuchObjectException $e ) { if ( empty( $params['ignoreMissingSource'] ) ) { $status->fatal( 'backend-fail-delete', $params['src'] ); @@ -586,16 +566,45 @@ class SwiftFileBackend extends FileBackendStore { } } - /** - * @see FileBackendStore::doPrepareInternal() - * @return Status - */ + protected function doDescribeInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + try { + $sContObj = $this->getContainer( $srcCont ); + // Get the latest version of the current metadata + $srcObj = $sContObj->get_object( $srcRel, + $this->headersFromParams( array( 'latest' => true ) ) ); + // Merge in the metadata updates... + if ( isset( $params['headers'] ) ) { + $srcObj->headers = $this->sanitizeHdrs( $params['headers'] ) + $srcObj->headers; + } + $srcObj->sync_metadata(); // save to Swift + $this->purgeCDNCache( array( $srcObj ) ); + } catch ( CDNNotEnabledException $e ) { + // CDN not enabled; nothing to see here + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-describe', $params['src'] ); + } catch ( NoSuchObjectException $e ) { + $status->fatal( 'backend-fail-describe', $params['src'] ); + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, $status, __METHOD__, $params ); + } + + return $status; + } + protected function doPrepareInternal( $fullCont, $dir, array $params ) { $status = Status::newGood(); // (a) Check if container already exists try { - $contObj = $this->getContainer( $fullCont ); + $this->getContainer( $fullCont ); // NoSuchContainerException not thrown: container must exist return $status; // already exists } catch ( NoSuchContainerException $e ) { @@ -703,10 +712,6 @@ class SwiftFileBackend extends FileBackendStore { return $status; } - /** - * @see FileBackendStore::doCleanInternal() - * @return Status - */ protected function doCleanInternal( $fullCont, $dir, array $params ) { $status = Status::newGood(); @@ -742,10 +747,6 @@ class SwiftFileBackend extends FileBackendStore { return $status; } - /** - * @see FileBackendStore::doFileExists() - * @return array|bool|null - */ protected function doGetFileStat( array $params ) { list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); if ( $srcRel === null ) { @@ -760,8 +761,8 @@ class SwiftFileBackend extends FileBackendStore { $stat = array( // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW 'mtime' => wfTimestamp( TS_MW, $srcObj->last_modified ), - 'size' => (int)$srcObj->content_length, - 'sha1' => $srcObj->metadata['Sha1base36'] + 'size' => (int)$srcObj->content_length, + 'sha1' => $srcObj->getMetadataValue( 'Sha1base36' ) ); } catch ( NoSuchContainerException $e ) { } catch ( NoSuchObjectException $e ) { @@ -776,61 +777,103 @@ class SwiftFileBackend extends FileBackendStore { /** * Fill in any missing object metadata and save it to Swift * - * @param $obj CF_Object - * @param $path string Storage path to object + * @param CF_Object $obj + * @param string $path Storage path to object * @return bool Success * @throws Exception cloudfiles exceptions */ protected function addMissingMetadata( CF_Object $obj, $path ) { - if ( isset( $obj->metadata['Sha1base36'] ) ) { + if ( $obj->getMetadataValue( 'Sha1base36' ) !== null ) { return true; // nothing to do } wfProfileIn( __METHOD__ ); + trigger_error( "$path was not stored with SHA-1 metadata.", E_USER_WARNING ); $status = Status::newGood(); $scopeLockS = $this->getScopedFileLocks( array( $path ), LockManager::LOCK_UW, $status ); if ( $status->isOK() ) { - # Do not stat the file in getLocalCopy() to avoid infinite loops - $tmpFile = $this->getLocalCopy( array( 'src' => $path, 'latest' => 1, 'nostat' => 1 ) ); + $tmpFile = $this->getLocalCopy( array( 'src' => $path, 'latest' => 1 ) ); if ( $tmpFile ) { $hash = $tmpFile->getSha1Base36(); if ( $hash !== false ) { - $obj->metadata['Sha1base36'] = $hash; + $obj->setMetadataValues( array( 'Sha1base36' => $hash ) ); $obj->sync_metadata(); // save to Swift wfProfileOut( __METHOD__ ); return true; // success } } } - $obj->metadata['Sha1base36'] = false; + trigger_error( "Unable to set SHA-1 metadata for $path", E_USER_WARNING ); + $obj->setMetadataValues( array( 'Sha1base36' => false ) ); wfProfileOut( __METHOD__ ); return false; // failed } - /** - * @see FileBackend::getFileContents() - * @return bool|null|string - */ - public function getFileContents( array $params ) { - list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); - if ( $srcRel === null ) { - return false; // invalid storage path - } + protected function doGetFileContentsMulti( array $params ) { + $contents = array(); - if ( !$this->fileExists( $params ) ) { - return null; - } + $ep = array_diff_key( $params, array( 'srcs' => 1 ) ); // for error logging + // Blindly create tmp files and stream to them, catching any exception if the file does + // not exist. Doing stats here is useless and will loop infinitely in addMissingMetadata(). + foreach ( array_chunk( $params['srcs'], $params['concurrency'] ) as $pathBatch ) { + $cfOps = array(); // (path => CF_Async_Op) - $data = false; - try { - $sContObj = $this->getContainer( $srcCont ); - $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD - $data = $obj->read( $this->headersFromParams( $params ) ); - } catch ( NoSuchContainerException $e ) { - } catch ( CloudFilesException $e ) { // some other exception? - $this->handleException( $e, null, __METHOD__, $params ); + foreach ( $pathBatch as $path ) { // each path in this concurrent batch + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $path ); + if ( $srcRel === null ) { + $contents[$path] = false; + continue; + } + $data = false; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + // Create a new temporary memory file... + $handle = fopen( 'php://temp', 'wb' ); + if ( $handle ) { + $headers = $this->headersFromParams( $params ); + if ( count( $pathBatch ) > 1 ) { + $cfOps[$path] = $obj->stream_async( $handle, $headers ); + $cfOps[$path]->_file_handle = $handle; // close this later + } else { + $obj->stream( $handle, $headers ); + rewind( $handle ); // start from the beginning + $data = stream_get_contents( $handle ); + fclose( $handle ); + } + } else { + $data = false; + } + } catch ( NoSuchContainerException $e ) { + $data = false; + } catch ( NoSuchObjectException $e ) { + $data = false; + } catch ( CloudFilesException $e ) { // some other exception? + $data = false; + $this->handleException( $e, null, __METHOD__, array( 'src' => $path ) + $ep ); + } + $contents[$path] = $data; + } + + $batch = new CF_Async_Op_Batch( $cfOps ); + $cfOps = $batch->execute(); + foreach ( $cfOps as $path => $cfOp ) { + try { + $cfOp->getLastResponse(); + rewind( $cfOp->_file_handle ); // start from the beginning + $contents[$path] = stream_get_contents( $cfOp->_file_handle ); + } catch ( NoSuchContainerException $e ) { + $contents[$path] = false; + } catch ( NoSuchObjectException $e ) { + $contents[$path] = false; + } catch ( CloudFilesException $e ) { // some other exception? + $contents[$path] = false; + $this->handleException( $e, null, __METHOD__, array( 'src' => $path ) + $ep ); + } + fclose( $cfOp->_file_handle ); // close open handle + } } - return $data; + return $contents; } /** @@ -871,27 +914,28 @@ class SwiftFileBackend extends FileBackendStore { /** * Do not call this function outside of SwiftFileBackendFileList * - * @param $fullCont string Resolved container name - * @param $dir string Resolved storage directory with no trailing slash - * @param $after string|null Storage path of file to list items after - * @param $limit integer Max number of items to list - * @param $params Array Includes flag for 'topOnly' - * @return Array List of relative paths of dirs directly under $dir + * @param string $fullCont Resolved container name + * @param string $dir Resolved storage directory with no trailing slash + * @param string|null $after Storage path of file to list items after + * @param integer $limit Max number of items to list + * @param array $params Parameters for getDirectoryList() + * @return Array List of resolved paths of directories directly under $dir + * @throws FileBackendError */ public function getDirListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { $dirs = array(); if ( $after === INF ) { return $dirs; // nothing more } - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . '-' . $this->name ); try { $container = $this->getContainer( $fullCont ); $prefix = ( $dir == '' ) ? null : "{$dir}/"; // Non-recursive: only list dirs right under $dir if ( !empty( $params['topOnly'] ) ) { $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); - foreach ( $objects as $object ) { // files and dirs + foreach ( $objects as $object ) { // files and directories if ( substr( $object, -1 ) === '/' ) { $dirs[] = $object; // directories end in '/' } @@ -903,7 +947,7 @@ class SwiftFileBackend extends FileBackendStore { $objects = $container->list_objects( $limit, $after, $prefix ); foreach ( $objects as $object ) { // files $objectDir = $this->getParentDir( $object ); // directory of object - if ( $objectDir !== false ) { // file has a parent dir + if ( $objectDir !== false && $objectDir !== $dir ) { // Swift stores paths in UTF-8, using binary sorting. // See function "create_container_table" in common/db.py. // If a directory is not "greater" than the last one, @@ -922,6 +966,7 @@ class SwiftFileBackend extends FileBackendStore { } } } + // Page on the unfiltered directory listing (what is returned may be filtered) if ( count( $objects ) < $limit ) { $after = INF; // avoid a second RTT } else { @@ -931,9 +976,9 @@ class SwiftFileBackend extends FileBackendStore { } catch ( CloudFilesException $e ) { // some other exception? $this->handleException( $e, null, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); + throw new FileBackendError( "Got " . get_class( $e ) . " exception." ); } - wfProfileOut( __METHOD__ . '-' . $this->name ); return $dirs; } @@ -944,36 +989,52 @@ class SwiftFileBackend extends FileBackendStore { /** * Do not call this function outside of SwiftFileBackendFileList * - * @param $fullCont string Resolved container name - * @param $dir string Resolved storage directory with no trailing slash - * @param $after string|null Storage path of file to list items after - * @param $limit integer Max number of items to list - * @param $params Array Includes flag for 'topOnly' - * @return Array List of relative paths of files under $dir + * @param string $fullCont Resolved container name + * @param string $dir Resolved storage directory with no trailing slash + * @param string|null $after Storage path of file to list items after + * @param integer $limit Max number of items to list + * @param array $params Parameters for getDirectoryList() + * @return Array List of resolved paths of files under $dir + * @throws FileBackendError */ public function getFileListPageInternal( $fullCont, $dir, &$after, $limit, array $params ) { $files = array(); if ( $after === INF ) { return $files; // nothing more } - wfProfileIn( __METHOD__ . '-' . $this->name ); + $section = new ProfileSection( __METHOD__ . '-' . $this->name ); try { $container = $this->getContainer( $fullCont ); $prefix = ( $dir == '' ) ? null : "{$dir}/"; // Non-recursive: only list files right under $dir if ( !empty( $params['topOnly'] ) ) { // files and dirs - $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); - foreach ( $objects as $object ) { - if ( substr( $object, -1 ) !== '/' ) { - $files[] = $object; // directories end in '/' + if ( !empty( $params['adviseStat'] ) ) { + $limit = min( $limit, self::CACHE_CHEAP_SIZE ); + // Note: get_objects() does not include directories + $objects = $this->loadObjectListing( $params, $dir, + $container->get_objects( $limit, $after, $prefix, null, '/' ) ); + $files = $objects; + } else { + $objects = $container->list_objects( $limit, $after, $prefix, null, '/' ); + foreach ( $objects as $object ) { // files and directories + if ( substr( $object, -1 ) !== '/' ) { + $files[] = $object; // directories end in '/' + } } } // Recursive: list all files under $dir and its subdirs } else { // files - $objects = $container->list_objects( $limit, $after, $prefix ); + if ( !empty( $params['adviseStat'] ) ) { + $limit = min( $limit, self::CACHE_CHEAP_SIZE ); + $objects = $this->loadObjectListing( $params, $dir, + $container->get_objects( $limit, $after, $prefix ) ); + } else { + $objects = $container->list_objects( $limit, $after, $prefix ); + } $files = $objects; } + // Page on the unfiltered object listing (what is returned may be filtered) if ( count( $objects ) < $limit ) { $after = INF; // avoid a second RTT } else { @@ -983,29 +1044,57 @@ class SwiftFileBackend extends FileBackendStore { } catch ( CloudFilesException $e ) { // some other exception? $this->handleException( $e, null, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); + throw new FileBackendError( "Got " . get_class( $e ) . " exception." ); } - wfProfileOut( __METHOD__ . '-' . $this->name ); return $files; } /** - * @see FileBackendStore::doGetFileSha1base36() - * @return bool + * Load a list of objects that belong under $dir into stat cache + * and return a list of the names of the objects in the same order. + * + * @param array $params Parameters for getDirectoryList() + * @param string $dir Resolved container directory path + * @param array $cfObjects List of CF_Object items + * @return array List of object names */ + private function loadObjectListing( array $params, $dir, array $cfObjects ) { + $names = array(); + $storageDir = rtrim( $params['dir'], '/' ); + $suffixStart = ( $dir === '' ) ? 0 : strlen( $dir ) + 1; // size of "path/to/dir/" + // Iterate over the list *backwards* as this primes the stat cache, which is LRU. + // If this fills the cache and the caller stats an uncached file before stating + // the ones on the listing, there would be zero cache hits if this went forwards. + for ( end( $cfObjects ); key( $cfObjects ) !== null; prev( $cfObjects ) ) { + $object = current( $cfObjects ); + $path = "{$storageDir}/" . substr( $object->name, $suffixStart ); + $val = array( + // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW + 'mtime' => wfTimestamp( TS_MW, $object->last_modified ), + 'size' => (int)$object->content_length, + 'latest' => false // eventually consistent + ); + $this->cheapCache->set( $path, 'stat', $val ); + $names[] = $object->name; + } + return array_reverse( $names ); // keep the paths in original order + } + protected function doGetFileSha1base36( array $params ) { $stat = $this->getFileStat( $params ); if ( $stat ) { + if ( !isset( $stat['sha1'] ) ) { + // Stat entries filled by file listings don't include SHA1 + $this->clearCache( array( $params['src'] ) ); + $stat = $this->getFileStat( $params ); + } return $stat['sha1']; } else { return false; } } - /** - * @see FileBackendStore::doStreamFile() - * @return Status - */ protected function doStreamFile( array $params ) { $status = Status::newGood(); @@ -1037,51 +1126,120 @@ class SwiftFileBackend extends FileBackendStore { return $status; } - /** - * @see FileBackendStore::getLocalCopy() - * @return null|TempFSFile - */ - public function getLocalCopy( array $params ) { - list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); - if ( $srcRel === null ) { - return null; - } + protected function doGetLocalCopyMulti( array $params ) { + $tmpFiles = array(); - // Blindly create a tmp file and stream to it, catching any exception if the file does - // not exist. Also, doing a stat here will cause infinite loops when filling metadata. - $tmpFile = null; - try { - $sContObj = $this->getContainer( $srcCont ); - $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD - // Get source file extension - $ext = FileBackend::extensionFromPath( $srcRel ); - // Create a new temporary file... - $tmpFile = TempFSFile::factory( 'localcopy_', $ext ); - if ( $tmpFile ) { - $handle = fopen( $tmpFile->getPath(), 'wb' ); - if ( $handle ) { - $obj->stream( $handle, $this->headersFromParams( $params ) ); - fclose( $handle ); - } else { - $tmpFile = null; // couldn't open temp file + $ep = array_diff_key( $params, array( 'srcs' => 1 ) ); // for error logging + // Blindly create tmp files and stream to them, catching any exception if the file does + // not exist. Doing a stat here is useless causes infinite loops in addMissingMetadata(). + foreach ( array_chunk( $params['srcs'], $params['concurrency'] ) as $pathBatch ) { + $cfOps = array(); // (path => CF_Async_Op) + + foreach ( $pathBatch as $path ) { // each path in this concurrent batch + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $path ); + if ( $srcRel === null ) { + $tmpFiles[$path] = null; + continue; + } + $tmpFile = null; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + // Get source file extension + $ext = FileBackend::extensionFromPath( $path ); + // Create a new temporary file... + $tmpFile = TempFSFile::factory( 'localcopy_', $ext ); + if ( $tmpFile ) { + $handle = fopen( $tmpFile->getPath(), 'wb' ); + if ( $handle ) { + $headers = $this->headersFromParams( $params ); + if ( count( $pathBatch ) > 1 ) { + $cfOps[$path] = $obj->stream_async( $handle, $headers ); + $cfOps[$path]->_file_handle = $handle; // close this later + } else { + $obj->stream( $handle, $headers ); + fclose( $handle ); + } + } else { + $tmpFile = null; + } + } + } catch ( NoSuchContainerException $e ) { + $tmpFile = null; + } catch ( NoSuchObjectException $e ) { + $tmpFile = null; + } catch ( CloudFilesException $e ) { // some other exception? + $tmpFile = null; + $this->handleException( $e, null, __METHOD__, array( 'src' => $path ) + $ep ); } + $tmpFiles[$path] = $tmpFile; + } + + $batch = new CF_Async_Op_Batch( $cfOps ); + $cfOps = $batch->execute(); + foreach ( $cfOps as $path => $cfOp ) { + try { + $cfOp->getLastResponse(); + } catch ( NoSuchContainerException $e ) { + $tmpFiles[$path] = null; + } catch ( NoSuchObjectException $e ) { + $tmpFiles[$path] = null; + } catch ( CloudFilesException $e ) { // some other exception? + $tmpFiles[$path] = null; + $this->handleException( $e, null, __METHOD__, array( 'src' => $path ) + $ep ); + } + fclose( $cfOp->_file_handle ); // close open handle } - } catch ( NoSuchContainerException $e ) { - $tmpFile = null; - } catch ( NoSuchObjectException $e ) { - $tmpFile = null; - } catch ( CloudFilesException $e ) { // some other exception? - $tmpFile = null; - $this->handleException( $e, null, __METHOD__, $params ); } - return $tmpFile; + return $tmpFiles; + } + + public function getFileHttpUrl( array $params ) { + if ( $this->swiftTempUrlKey != '' || + ( $this->rgwS3AccessKey != '' && $this->rgwS3SecretKey != '' ) ) + { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return null; // invalid path + } + try { + $ttl = isset( $params['ttl'] ) ? $params['ttl'] : 86400; + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + if ( $this->swiftTempUrlKey != '' ) { + return $obj->get_temp_url( $this->swiftTempUrlKey, $ttl, "GET" ); + } else { // give S3 API URL for rgw + $expires = time() + $ttl; + // Path for signature starts with the bucket + $spath = '/' . rawurlencode( $srcCont ) . '/' . + str_replace( '%2F', '/', rawurlencode( $srcRel ) ); + // Calculate the hash + $signature = base64_encode( hash_hmac( + 'sha1', + "GET\n\n\n{$expires}\n{$spath}", + $this->rgwS3SecretKey, + true // raw + ) ); + // See http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html. + // Note: adding a newline for empty CanonicalizedAmzHeaders does not work. + return wfAppendQuery( + str_replace( '/swift/v1', '', // S3 API is the rgw default + $sContObj->cfs_http->getStorageUrl() . $spath ), + array( + 'Signature' => $signature, + 'Expires' => $expires, + 'AWSAccessKeyId' => $this->rgwS3AccessKey ) + ); + } + } catch ( NoSuchContainerException $e ) { + } catch ( CloudFilesException $e ) { // some other exception? + $this->handleException( $e, null, __METHOD__, $params ); + } + } + return null; } - /** - * @see FileBackendStore::directoriesAreVirtual() - * @return bool - */ protected function directoriesAreVirtual() { return true; } @@ -1091,7 +1249,7 @@ class SwiftFileBackend extends FileBackendStore { * on a FileBackend params array, e.g. that of getLocalCopy(). * $params is currently only checked for a 'latest' flag. * - * @param $params Array + * @param array $params * @return Array */ protected function headersFromParams( array $params ) { @@ -1102,10 +1260,6 @@ class SwiftFileBackend extends FileBackendStore { return $hdrs; } - /** - * @see FileBackendStore::doExecuteOpHandlesInternal() - * @return Array List of corresponding Status objects - */ protected function doExecuteOpHandlesInternal( array $fileOpHandles ) { $statuses = array(); @@ -1118,8 +1272,8 @@ class SwiftFileBackend extends FileBackendStore { $cfOps = $batch->execute(); foreach ( $cfOps as $index => $cfOp ) { $status = Status::newGood(); + $function = '_getResponse' . $fileOpHandles[$index]->call; try { // catch exceptions; update status - $function = '_getResponse' . $fileOpHandles[$index]->call; $this->$function( $cfOp, $status, $fileOpHandles[$index]->params ); $this->purgeCDNCache( $fileOpHandles[$index]->affectedObjects ); } catch ( CloudFilesException $e ) { // some other exception? @@ -1137,12 +1291,12 @@ class SwiftFileBackend extends FileBackendStore { * * $readGrps is a list of the possible criteria for a request to have * access to read a container. Each item is one of the following formats: - * - account:user : Grants access if the request is by the given user - * - .r:<regex> : Grants access if the request is from a referrer host that - * matches the expression and the request is not for a listing. - * Setting this to '*' effectively makes a container public. - * - .rlistings:<regex> : Grants access if the request is from a referrer host that - * matches the expression and the request for a listing. + * - account:user : Grants access if the request is by the given user + * - ".r:<regex>" : Grants access if the request is from a referrer host that + * matches the expression and the request is not for a listing. + * Setting this to '*' effectively makes a container public. + * -".rlistings:<regex>" : Grants access if the request is from a referrer host that + * matches the expression and the request is for a listing. * * $writeGrps is a list of the possible criteria for a request to have * access to write to a container. Each item is of the following format: @@ -1153,9 +1307,9 @@ class SwiftFileBackend extends FileBackendStore { * In general, we don't allow listings to end-users. It's not useful, isn't well-defined * (lists are truncated to 10000 item with no way to page), and is just a performance risk. * - * @param $contObj CF_Container Swift container - * @param $readGrps Array List of read access routes - * @param $writeGrps Array List of write access routes + * @param CF_Container $contObj Swift container + * @param array $readGrps List of read access routes + * @param array $writeGrps List of write access routes * @return Status */ protected function setContainerAccess( @@ -1178,7 +1332,7 @@ class SwiftFileBackend extends FileBackendStore { * Purge the CDN cache of affected objects if CDN caching is enabled. * This is for Rackspace/Akamai CDNs. * - * @param $objects Array List of CF_Object items + * @param array $objects List of CF_Object items * @return void */ public function purgeCDNCache( array $objects ) { @@ -1199,8 +1353,9 @@ class SwiftFileBackend extends FileBackendStore { /** * Get an authenticated connection handle to the Swift proxy * - * @return CF_Connection|bool False on failure * @throws CloudFilesException + * @throws CloudFilesException|Exception + * @return CF_Connection|bool False on failure */ protected function getConnection() { if ( $this->connException instanceof CloudFilesException ) { @@ -1222,12 +1377,12 @@ class SwiftFileBackend extends FileBackendStore { if ( is_array( $creds ) ) { // cache hit $this->auth->load_cached_credentials( $creds['auth_token'], $creds['storage_url'], $creds['cdnm_url'] ); - $this->sessionStarted = time() - ceil( $this->authTTL/2 ); // skew for worst case + $this->sessionStarted = time() - ceil( $this->authTTL / 2 ); // skew for worst case } else { // cache miss try { $this->auth->authenticate(); $creds = $this->auth->export_credentials(); - $this->srvCache->add( $cacheKey, $creds, ceil( $this->authTTL/2 ) ); // cache + $this->srvCache->add( $cacheKey, $creds, ceil( $this->authTTL / 2 ) ); // cache $this->sessionStarted = time(); } catch ( CloudFilesException $e ) { $this->connException = $e; // don't keep re-trying @@ -1251,6 +1406,7 @@ class SwiftFileBackend extends FileBackendStore { protected function closeConnection() { if ( $this->conn ) { $this->conn->close(); // close active cURL handles in CF_Http object + $this->conn = null; $this->sessionStarted = 0; $this->connContainerCache->clear(); } @@ -1259,7 +1415,7 @@ class SwiftFileBackend extends FileBackendStore { /** * Get the cache key for a container * - * @param $username string + * @param string $username * @return string */ private function getCredsCacheKey( $username ) { @@ -1267,18 +1423,11 @@ class SwiftFileBackend extends FileBackendStore { } /** - * @see FileBackendStore::doClearCache() - */ - protected function doClearCache( array $paths = null ) { - $this->connContainerCache->clear(); // clear container object cache - } - - /** * Get a Swift container object, possibly from process cache. * Use $reCache if the file count or byte count is needed. * - * @param $container string Container name - * @param $bypassCache bool Bypass all caches and load from Swift + * @param string $container Container name + * @param bool $bypassCache Bypass all caches and load from Swift * @return CF_Container * @throws CloudFilesException */ @@ -1305,7 +1454,7 @@ class SwiftFileBackend extends FileBackendStore { /** * Create a Swift container * - * @param $container string Container name + * @param string $container Container name * @return CF_Container * @throws CloudFilesException */ @@ -1319,7 +1468,7 @@ class SwiftFileBackend extends FileBackendStore { /** * Delete a Swift container * - * @param $container string Container name + * @param string $container Container name * @return void * @throws CloudFilesException */ @@ -1329,10 +1478,6 @@ class SwiftFileBackend extends FileBackendStore { $conn->delete_container( $container ); } - /** - * @see FileBackendStore::doPrimeContainerCache() - * @return void - */ protected function doPrimeContainerCache( array $containerInfo ) { try { $conn = $this->getConnection(); // Swift proxy connection @@ -1350,10 +1495,10 @@ class SwiftFileBackend extends FileBackendStore { * Log an unexpected exception for this backend. * This also sets the Status object to have a fatal error. * - * @param $e Exception - * @param $status Status|null - * @param $func string - * @param $params Array + * @param Exception $e + * @param Status $status|null + * @param string $func + * @param array $params * @return void */ protected function handleException( Exception $e, $status, $func, array $params ) { @@ -1387,7 +1532,15 @@ class SwiftFileOpHandle extends FileBackendStoreOpHandle { /** @var Array */ public $affectedObjects = array(); - public function __construct( $backend, array $params, $call, CF_Async_Op $cfOp ) { + /** + * @param SwiftFileBackend $backend + * @param array $params + * @param string $call + * @param CF_Async_Op $cfOp + */ + public function __construct( + SwiftFileBackend $backend, array $params, $call, CF_Async_Op $cfOp + ) { $this->backend = $backend; $this->params = $params; $this->call = $call; @@ -1419,10 +1572,10 @@ abstract class SwiftFileBackendList implements Iterator { const PAGE_SIZE = 9000; // file listing buffer size /** - * @param $backend SwiftFileBackend - * @param $fullCont string Resolved container name - * @param $dir string Resolved directory relative to container - * @param $params Array + * @param SwiftFileBackend $backend + * @param string $fullCont Resolved container name + * @param string $dir Resolved directory relative to container + * @param array $params */ public function __construct( SwiftFileBackend $backend, $fullCont, $dir, array $params ) { $this->backend = $backend; @@ -1491,12 +1644,12 @@ abstract class SwiftFileBackendList implements Iterator { /** * Get the given list portion (page) * - * @param $container string Resolved container name - * @param $dir string Resolved path relative to container - * @param $after string|null - * @param $limit integer - * @param $params Array - * @return Traversable|Array|null Returns null on failure + * @param string $container Resolved container name + * @param string $dir Resolved path relative to container + * @param string $after|null + * @param integer $limit + * @param array $params + * @return Traversable|Array */ abstract protected function pageFromList( $container, $dir, &$after, $limit, array $params ); } @@ -1515,7 +1668,7 @@ class SwiftFileBackendDirList extends SwiftFileBackendList { /** * @see SwiftFileBackendList::pageFromList() - * @return Array|null + * @return Array */ protected function pageFromList( $container, $dir, &$after, $limit, array $params ) { return $this->backend->getDirListPageInternal( $container, $dir, $after, $limit, $params ); @@ -1536,7 +1689,7 @@ class SwiftFileBackendFileList extends SwiftFileBackendList { /** * @see SwiftFileBackendList::pageFromList() - * @return Array|null + * @return Array */ protected function pageFromList( $container, $dir, &$after, $limit, array $params ) { return $this->backend->getFileListPageInternal( $container, $dir, $after, $limit, $params ); diff --git a/includes/filebackend/TempFSFile.php b/includes/filebackend/TempFSFile.php index 5032bf68..8266e420 100644 --- a/includes/filebackend/TempFSFile.php +++ b/includes/filebackend/TempFSFile.php @@ -37,8 +37,8 @@ class TempFSFile extends FSFile { * Make a new temporary file on the file system. * Temporary files may be purged when the file object falls out of scope. * - * @param $prefix string - * @param $extension string + * @param string $prefix + * @param string $extension * @return TempFSFile|null */ public static function factory( $prefix, $extension = '' ) { @@ -81,31 +81,38 @@ class TempFSFile extends FSFile { /** * Clean up the temporary file only after an object goes out of scope * - * @param $object Object - * @return void + * @param Object $object + * @return TempFSFile This object */ public function bind( $object ) { if ( is_object( $object ) ) { + if ( !isset( $object->tempFSFileReferences ) ) { + // Init first since $object might use __get() and return only a copy variable + $object->tempFSFileReferences = array(); + } $object->tempFSFileReferences[] = $this; } + return $this; } /** * Set flag to not clean up after the temporary file * - * @return void + * @return TempFSFile This object */ public function preserve() { $this->canDelete = false; + return $this; } /** * Set flag clean up after the temporary file * - * @return void + * @return TempFSFile This object */ public function autocollect() { $this->canDelete = true; + return $this; } /** diff --git a/includes/filebackend/filejournal/DBFileJournal.php b/includes/filebackend/filejournal/DBFileJournal.php index f6268c25..9250aa5e 100644 --- a/includes/filebackend/filejournal/DBFileJournal.php +++ b/includes/filebackend/filejournal/DBFileJournal.php @@ -65,16 +65,19 @@ class DBFileJournal extends FileJournal { foreach ( $entries as $entry ) { $data[] = array( 'fj_batch_uuid' => $batchId, - 'fj_backend' => $this->backend, - 'fj_op' => $entry['op'], - 'fj_path' => $entry['path'], - 'fj_new_sha1' => $entry['newSha1'], - 'fj_timestamp' => $dbw->timestamp( $now ) + 'fj_backend' => $this->backend, + 'fj_op' => $entry['op'], + 'fj_path' => $entry['path'], + 'fj_new_sha1' => $entry['newSha1'], + 'fj_timestamp' => $dbw->timestamp( $now ) ); } try { $dbw->insert( 'filejournal', $data, __METHOD__ ); + if ( mt_rand( 0, 99 ) == 0 ) { + $this->purgeOldLogs(); // occasionally delete old logs + } } catch ( DBError $e ) { $status->fatal( 'filejournal-fail-dbquery', $this->backend ); return $status; @@ -84,6 +87,35 @@ class DBFileJournal extends FileJournal { } /** + * @see FileJournal::doGetCurrentPosition() + * @return integer|false + */ + protected function doGetCurrentPosition() { + $dbw = $this->getMasterDB(); + + return $dbw->selectField( 'filejournal', 'MAX(fj_id)', + array( 'fj_backend' => $this->backend ), + __METHOD__ + ); + } + + /** + * @see FileJournal::doGetPositionAtTime() + * @param $time integer|string timestamp + * @return integer|false + */ + protected function doGetPositionAtTime( $time ) { + $dbw = $this->getMasterDB(); + + $encTimestamp = $dbw->addQuotes( $dbw->timestamp( $time ) ); + return $dbw->selectField( 'filejournal', 'fj_id', + array( 'fj_backend' => $this->backend, "fj_timestamp <= $encTimestamp" ), + __METHOD__, + array( 'ORDER BY' => 'fj_timestamp DESC' ) + ); + } + + /** * @see FileJournal::doGetChangeEntries() * @return Array * @throws DBError diff --git a/includes/filebackend/filejournal/FileJournal.php b/includes/filebackend/filejournal/FileJournal.php index ce029bbe..a1b7a459 100644 --- a/includes/filebackend/filejournal/FileJournal.php +++ b/includes/filebackend/filejournal/FileJournal.php @@ -54,7 +54,7 @@ abstract class FileJournal { * Create an appropriate FileJournal object from config * * @param $config Array - * @param $backend string A registered file backend name + * @param string $backend A registered file backend name * @throws MWException * @return FileJournal */ @@ -85,13 +85,13 @@ abstract class FileJournal { /** * Log changes made by a batch file operation. * $entries is an array of log entries, each of which contains: - * op : Basic operation name (create, store, copy, delete) + * op : Basic operation name (create, update, delete) * path : The storage path of the file * newSha1 : The final base 36 SHA-1 of the file * Note that 'false' should be used as the SHA-1 for non-existing files. * - * @param $entries Array List of file operations (each an array of parameters) - * @param $batchId string UUID string that identifies the operation batch + * @param array $entries List of file operations (each an array of parameters) + * @param string $batchId UUID string that identifies the operation batch * @return Status */ final public function logChangeBatch( array $entries, $batchId ) { @@ -104,13 +104,45 @@ abstract class FileJournal { /** * @see FileJournal::logChangeBatch() * - * @param $entries Array List of file operations (each an array of parameters) - * @param $batchId string UUID string that identifies the operation batch + * @param array $entries List of file operations (each an array of parameters) + * @param string $batchId UUID string that identifies the operation batch * @return Status */ abstract protected function doLogChangeBatch( array $entries, $batchId ); /** + * Get the position ID of the latest journal entry + * + * @return integer|false + */ + final public function getCurrentPosition() { + return $this->doGetCurrentPosition(); + } + + /** + * @see FileJournal::getCurrentPosition() + * @return integer|false + */ + abstract protected function doGetCurrentPosition(); + + /** + * Get the position ID of the latest journal entry at some point in time + * + * @param $time integer|string timestamp + * @return integer|false + */ + final public function getPositionAtTime( $time ) { + return $this->doGetPositionAtTime( $time ); + } + + /** + * @see FileJournal::getPositionAtTime() + * @param $time integer|string timestamp + * @return integer|false + */ + abstract protected function doGetPositionAtTime( $time ); + + /** * Get an array of file change log entries. * A starting change ID and/or limit can be specified. * @@ -169,7 +201,7 @@ abstract class FileJournal { */ class NullFileJournal extends FileJournal { /** - * @see FileJournal::logChangeBatch() + * @see FileJournal::doLogChangeBatch() * @param $entries array * @param $batchId string * @return Status @@ -179,6 +211,23 @@ class NullFileJournal extends FileJournal { } /** + * @see FileJournal::doGetCurrentPosition() + * @return integer|false + */ + protected function doGetCurrentPosition() { + return false; + } + + /** + * @see FileJournal::doGetPositionAtTime() + * @param $time integer|string timestamp + * @return integer|false + */ + protected function doGetPositionAtTime( $time ) { + return false; + } + + /** * @see FileJournal::doGetChangeEntries() * @return Array */ @@ -187,7 +236,7 @@ class NullFileJournal extends FileJournal { } /** - * @see FileJournal::purgeOldLogs() + * @see FileJournal::doPurgeOldLogs() * @return Status */ protected function doPurgeOldLogs() { diff --git a/includes/filebackend/lockmanager/DBLockManager.php b/includes/filebackend/lockmanager/DBLockManager.php index a8fe258b..3e934ba5 100644 --- a/includes/filebackend/lockmanager/DBLockManager.php +++ b/includes/filebackend/lockmanager/DBLockManager.php @@ -22,10 +22,9 @@ */ /** - * Version of LockManager based on using DB table locks. + * Version of LockManager based on using named/row DB locks. + * * This is meant for multi-wiki systems that may share files. - * All locks are blocking, so it might be useful to set a small - * lock-wait timeout via server config to curtail deadlocks. * * All lock requests for a resource, identified by a hash string, will map * to one bucket. Each bucket maps to one or several peer DBs, each on their @@ -37,7 +36,7 @@ * @ingroup LockManager * @since 1.19 */ -class DBLockManager extends QuorumLockManager { +abstract class DBLockManager extends QuorumLockManager { /** @var Array Map of DB names to server config */ protected $dbServers; // (DB name => server config array) /** @var BagOStuff */ @@ -67,11 +66,12 @@ class DBLockManager extends QuorumLockManager { * each having an odd-numbered list of DB names (peers) as values. * Any DB named 'localDBMaster' will automatically use the DB master * settings for this wiki (without the need for a dbServers entry). + * Only use 'localDBMaster' if the domain is a valid wiki ID. * - lockExpiry : Lock timeout (seconds) for dropped connections. [optional] * This tells the DB server how long to wait before assuming * connection failure and releasing all the locks for a session. * - * @param Array $config + * @param array $config */ public function __construct( array $config ) { parent::__construct( $config ); @@ -110,63 +110,17 @@ class DBLockManager extends QuorumLockManager { $this->session = wfRandomString( 31 ); } - /** - * Get a connection to a lock DB and acquire locks on $paths. - * This does not use GET_LOCK() per http://bugs.mysql.com/bug.php?id=1118. - * - * @see QuorumLockManager::getLocksOnServer() - * @return Status - */ - protected function getLocksOnServer( $lockSrv, array $paths, $type ) { + // @TODO: change this code to work in one batch + protected function getLocksOnServer( $lockSrv, array $pathsByType ) { $status = Status::newGood(); - - if ( $type == self::LOCK_EX ) { // writer locks - try { - $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); - # Build up values for INSERT clause - $data = array(); - foreach ( $keys as $key ) { - $data[] = array( 'fle_key' => $key ); - } - # Wait on any existing writers and block new ones if we get in - $db = $this->getConnection( $lockSrv ); // checked in isServerUp() - $db->insert( 'filelocks_exclusive', $data, __METHOD__ ); - } catch ( DBError $e ) { - foreach ( $paths as $path ) { - $status->fatal( 'lockmanager-fail-acquirelock', $path ); - } - } + foreach ( $pathsByType as $type => $paths ) { + $status->merge( $this->doGetLocksOnServer( $lockSrv, $paths, $type ) ); } - return $status; } - /** - * @see QuorumLockManager::freeLocksOnServer() - * @return Status - */ - protected function freeLocksOnServer( $lockSrv, array $paths, $type ) { - return Status::newGood(); // not supported - } - - /** - * @see QuorumLockManager::releaseAllLocks() - * @return Status - */ - protected function releaseAllLocks() { - $status = Status::newGood(); - - foreach ( $this->conns as $lockDb => $db ) { - if ( $db->trxLevel() ) { // in transaction - try { - $db->rollback( __METHOD__ ); // finish transaction and kill any rows - } catch ( DBError $e ) { - $status->fatal( 'lockmanager-fail-db-release', $lockDb ); - } - } - } - - return $status; + protected function freeLocksOnServer( $lockSrv, array $pathsByType ) { + return Status::newGood(); } /** @@ -197,8 +151,8 @@ class DBLockManager extends QuorumLockManager { if ( !isset( $this->conns[$lockDb] ) ) { $db = null; if ( $lockDb === 'localDBMaster' ) { - $lb = wfGetLBFactory()->newMainLB(); - $db = $lb->getConnection( DB_MASTER ); + $lb = wfGetLBFactory()->getMainLB( $this->domain ); + $db = $lb->getConnection( DB_MASTER, array(), $this->domain ); } elseif ( isset( $this->dbServers[$lockDb] ) ) { $config = $this->dbServers[$lockDb]; $db = DatabaseBase::factory( $config['type'], $config ); @@ -274,14 +228,8 @@ class DBLockManager extends QuorumLockManager { * Make sure remaining locks get cleared for sanity */ function __destruct() { + $this->releaseAllLocks(); foreach ( $this->conns as $db ) { - if ( $db->trxLevel() ) { // in transaction - try { - $db->rollback( __METHOD__ ); // finish transaction and kill any rows - } catch ( DBError $e ) { - // oh well - } - } $db->close(); } } @@ -317,31 +265,42 @@ class MySqlLockManager extends DBLockManager { * @see DBLockManager::getLocksOnServer() * @return Status */ - protected function getLocksOnServer( $lockSrv, array $paths, $type ) { + protected function doGetLocksOnServer( $lockSrv, array $paths, $type ) { $status = Status::newGood(); $db = $this->getConnection( $lockSrv ); // checked in isServerUp() - $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + + $keys = array(); // list of hash keys for the paths + $data = array(); // list of rows to insert + $checkEXKeys = array(); // list of hash keys that this has no EX lock on # Build up values for INSERT clause - $data = array(); - foreach ( $keys as $key ) { + foreach ( $paths as $path ) { + $key = $this->sha1Base36Absolute( $path ); + $keys[] = $key; $data[] = array( 'fls_key' => $key, 'fls_session' => $this->session ); + if ( !isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $checkEXKeys[] = $key; + } } - # Block new writers... + + # Block new writers (both EX and SH locks leave entries here)... $db->insert( 'filelocks_shared', $data, __METHOD__, array( 'IGNORE' ) ); # Actually do the locking queries... if ( $type == self::LOCK_SH ) { // reader locks + $blocked = false; # Bail if there are any existing writers... - $blocked = $db->selectField( 'filelocks_exclusive', '1', - array( 'fle_key' => $keys ), - __METHOD__ - ); - # Prospective writers that haven't yet updated filelocks_exclusive - # will recheck filelocks_shared after doing so and bail due to our entry. + if ( count( $checkEXKeys ) ) { + $blocked = $db->selectField( 'filelocks_exclusive', '1', + array( 'fle_key' => $checkEXKeys ), + __METHOD__ + ); + } + # Other prospective writers that haven't yet updated filelocks_exclusive + # will recheck filelocks_shared after doing so and bail due to this entry. } else { // writer locks $encSession = $db->addQuotes( $this->session ); # Bail if there are any existing writers... - # The may detect readers, but the safe check for them is below. + # This may detect readers, but the safe check for them is below. # Note: if two writers come at the same time, both bail :) $blocked = $db->selectField( 'filelocks_shared', '1', array( 'fls_key' => $keys, "fls_session != $encSession" ), @@ -371,4 +330,103 @@ class MySqlLockManager extends DBLockManager { return $status; } + + /** + * @see QuorumLockManager::releaseAllLocks() + * @return Status + */ + protected function releaseAllLocks() { + $status = Status::newGood(); + + foreach ( $this->conns as $lockDb => $db ) { + if ( $db->trxLevel() ) { // in transaction + try { + $db->rollback( __METHOD__ ); // finish transaction and kill any rows + } catch ( DBError $e ) { + $status->fatal( 'lockmanager-fail-db-release', $lockDb ); + } + } + } + + return $status; + } +} + +/** + * PostgreSQL version of DBLockManager that supports shared locks. + * All locks are non-blocking, which avoids deadlocks. + * + * @ingroup LockManager + */ +class PostgreSqlLockManager extends DBLockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + protected function doGetLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + if ( !count( $paths ) ) { + return $status; // nothing to lock + } + + $db = $this->getConnection( $lockSrv ); // checked in isServerUp() + $bigints = array_unique( array_map( + function( $key ) { + return wfBaseConvert( substr( $key, 0, 15 ), 16, 10 ); + }, + array_map( array( $this, 'sha1Base16Absolute' ), $paths ) + ) ); + + // Try to acquire all the locks... + $fields = array(); + foreach ( $bigints as $bigint ) { + $fields[] = ( $type == self::LOCK_SH ) + ? "pg_try_advisory_lock_shared({$db->addQuotes( $bigint )}) AS K$bigint" + : "pg_try_advisory_lock({$db->addQuotes( $bigint )}) AS K$bigint"; + } + $res = $db->query( 'SELECT ' . implode( ', ', $fields ), __METHOD__ ); + $row = (array)$res->fetchObject(); + + if ( in_array( 'f', $row ) ) { + // Release any acquired locks if some could not be acquired... + $fields = array(); + foreach ( $row as $kbigint => $ok ) { + if ( $ok === 't' ) { // locked + $bigint = substr( $kbigint, 1 ); // strip off the "K" + $fields[] = ( $type == self::LOCK_SH ) + ? "pg_advisory_unlock_shared({$db->addQuotes( $bigint )})" + : "pg_advisory_unlock({$db->addQuotes( $bigint )})"; + } + } + if ( count( $fields ) ) { + $db->query( 'SELECT ' . implode( ', ', $fields ), __METHOD__ ); + } + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } + + return $status; + } + + /** + * @see QuorumLockManager::releaseAllLocks() + * @return Status + */ + protected function releaseAllLocks() { + $status = Status::newGood(); + + foreach ( $this->conns as $lockDb => $db ) { + try { + $db->query( "SELECT pg_advisory_unlock_all()", __METHOD__ ); + } catch ( DBError $e ) { + $status->fatal( 'lockmanager-fail-db-release', $lockDb ); + } + } + + return $status; + } } diff --git a/includes/filebackend/lockmanager/FSLockManager.php b/includes/filebackend/lockmanager/FSLockManager.php index 9a6206fd..eacba704 100644 --- a/includes/filebackend/lockmanager/FSLockManager.php +++ b/includes/filebackend/lockmanager/FSLockManager.php @@ -43,7 +43,7 @@ class FSLockManager extends LockManager { protected $lockDir; // global dir for all servers - /** @var Array Map of (locked key => lock type => lock file handle) */ + /** @var Array Map of (locked key => lock file handle) */ protected $handles = array(); /** @@ -115,12 +115,16 @@ class FSLockManager extends LockManager { } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { $this->locksHeld[$path][$type] = 1; } else { - wfSuppressWarnings(); - $handle = fopen( $this->getLockPath( $path ), 'a+' ); - wfRestoreWarnings(); - if ( !$handle ) { // lock dir missing? - wfMkdirParents( $this->lockDir ); - $handle = fopen( $this->getLockPath( $path ), 'a+' ); // try again + if ( isset( $this->handles[$path] ) ) { + $handle = $this->handles[$path]; + } else { + wfSuppressWarnings(); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); + wfRestoreWarnings(); + if ( !$handle ) { // lock dir missing? + wfMkdirParents( $this->lockDir ); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); // try again + } } if ( $handle ) { // Either a shared or exclusive lock @@ -128,7 +132,7 @@ class FSLockManager extends LockManager { if ( flock( $handle, $lock | LOCK_NB ) ) { // Record this lock as active $this->locksHeld[$path][$type] = 1; - $this->handles[$path][$type] = $handle; + $this->handles[$path] = $handle; } else { fclose( $handle ); $status->fatal( 'lockmanager-fail-acquirelock', $path ); @@ -160,24 +164,13 @@ class FSLockManager extends LockManager { --$this->locksHeld[$path][$type]; if ( $this->locksHeld[$path][$type] <= 0 ) { unset( $this->locksHeld[$path][$type] ); - // If a LOCK_SH comes in while we have a LOCK_EX, we don't - // actually add a handler, so check for handler existence. - if ( isset( $this->handles[$path][$type] ) ) { - if ( $type === self::LOCK_EX - && isset( $this->locksHeld[$path][self::LOCK_SH] ) - && !isset( $this->handles[$path][self::LOCK_SH] ) ) - { - // EX lock came first: move this handle to the SH one - $this->handles[$path][self::LOCK_SH] = $this->handles[$path][$type]; - } else { - // Mark this handle to be unlocked and closed - $handlesToClose[] = $this->handles[$path][$type]; - } - unset( $this->handles[$path][$type] ); - } } if ( !count( $this->locksHeld[$path] ) ) { unset( $this->locksHeld[$path] ); // no locks on this path + if ( isset( $this->handles[$path] ) ) { + $handlesToClose[] = $this->handles[$path]; + unset( $this->handles[$path] ); + } } // Unlock handles to release locks and delete // any lock files that end up with no locks on them... @@ -237,8 +230,7 @@ class FSLockManager extends LockManager { * @return string */ protected function getLockPath( $path ) { - $hash = self::sha1Base36( $path ); - return "{$this->lockDir}/{$hash}.lock"; + return "{$this->lockDir}/{$this->sha1Base36Absolute( $path )}.lock"; } /** diff --git a/includes/filebackend/lockmanager/LSLockManager.php b/includes/filebackend/lockmanager/LSLockManager.php index 89428182..97de8dca 100644 --- a/includes/filebackend/lockmanager/LSLockManager.php +++ b/includes/filebackend/lockmanager/LSLockManager.php @@ -66,7 +66,7 @@ class LSLockManager extends QuorumLockManager { * each having an odd-numbered list of server names (peers) as values. * - connTimeout : Lock server connection attempt timeout. [optional] * - * @param Array $config + * @param array $config */ public function __construct( array $config ) { parent::__construct( $config ); @@ -94,7 +94,7 @@ class LSLockManager extends QuorumLockManager { // Send out the command and get the response... $type = ( $type == self::LOCK_SH ) ? 'SH' : 'EX'; - $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + $keys = array_unique( array_map( array( $this, 'sha1Base36Absolute' ), $paths ) ); $response = $this->sendCommand( $lockSrv, 'ACQUIRE', $type, $keys ); if ( $response !== 'ACQUIRED' ) { @@ -115,7 +115,7 @@ class LSLockManager extends QuorumLockManager { // Send out the command and get the response... $type = ( $type == self::LOCK_SH ) ? 'SH' : 'EX'; - $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + $keys = array_unique( array_map( array( $this, 'sha1Base36Absolute' ), $paths ) ); $response = $this->sendCommand( $lockSrv, 'RELEASE', $type, $keys ); if ( $response !== 'RELEASED' ) { @@ -169,7 +169,7 @@ class LSLockManager extends QuorumLockManager { $authKey = $this->lockServers[$lockSrv]['authKey']; // Build of the command as a flat string... $values = implode( '|', $values ); - $key = sha1( $this->session . $action . $type . $values . $authKey ); + $key = hash_hmac( 'sha1', "{$this->session}\n{$action}\n{$type}\n{$values}", $authKey ); // Send out the command... if ( fwrite( $conn, "{$this->session}:$key:$action:$type:$values\n" ) === false ) { return false; diff --git a/includes/filebackend/lockmanager/LockManager.php b/includes/filebackend/lockmanager/LockManager.php index 07853f87..dad8a624 100644 --- a/includes/filebackend/lockmanager/LockManager.php +++ b/includes/filebackend/lockmanager/LockManager.php @@ -53,7 +53,10 @@ abstract class LockManager { /** @var Array Map of (resource path => lock type => count) */ protected $locksHeld = array(); - /* Lock types; stronger locks have higher values */ + protected $domain; // string; domain (usually wiki ID) + protected $lockTTL; // integer; maximum time locks can be held + + /** Lock types; stronger locks have higher values */ const LOCK_SH = 1; // shared lock (for reads) const LOCK_UW = 2; // shared lock (for reads used to write elsewhere) const LOCK_EX = 3; // exclusive lock (for writes) @@ -61,341 +64,185 @@ abstract class LockManager { /** * Construct a new instance from configuration * + * $config paramaters include: + * - domain : Domain (usually wiki ID) that all resources are relative to [optional] + * - lockTTL : Age (in seconds) at which resource locks should expire. + * This only applies if locks are not tied to a connection/process. + * * @param $config Array */ - public function __construct( array $config ) {} + public function __construct( array $config ) { + $this->domain = isset( $config['domain'] ) ? $config['domain'] : wfWikiID(); + if ( isset( $config['lockTTL'] ) ) { + $this->lockTTL = max( 1, $config['lockTTL'] ); + } elseif ( PHP_SAPI === 'cli' ) { + $this->lockTTL = 2 * 3600; + } else { + $met = ini_get( 'max_execution_time' ); // this is 0 in CLI mode + $this->lockTTL = max( 5 * 60, 2 * (int)$met ); + } + } /** * Lock the resources at the given abstract paths * - * @param $paths Array List of resource names + * @param array $paths List of resource names * @param $type integer LockManager::LOCK_* constant + * @param integer $timeout Timeout in seconds (0 means non-blocking) (since 1.21) * @return Status */ - final public function lock( array $paths, $type = self::LOCK_EX ) { - wfProfileIn( __METHOD__ ); - $status = $this->doLock( array_unique( $paths ), $this->lockTypeMap[$type] ); - wfProfileOut( __METHOD__ ); - return $status; + final public function lock( array $paths, $type = self::LOCK_EX, $timeout = 0 ) { + return $this->lockByType( array( $type => $paths ), $timeout ); } /** - * Unlock the resources at the given abstract paths + * Lock the resources at the given abstract paths * - * @param $paths Array List of storage paths - * @param $type integer LockManager::LOCK_* constant + * @param array $pathsByType Map of LockManager::LOCK_* constants to lists of paths + * @param integer $timeout Timeout in seconds (0 means non-blocking) (since 1.21) * @return Status + * @since 1.22 */ - final public function unlock( array $paths, $type = self::LOCK_EX ) { + final public function lockByType( array $pathsByType, $timeout = 0 ) { wfProfileIn( __METHOD__ ); - $status = $this->doUnlock( array_unique( $paths ), $this->lockTypeMap[$type] ); + $status = Status::newGood(); + $pathsByType = $this->normalizePathsByType( $pathsByType ); + $msleep = array( 0, 50, 100, 300, 500 ); // retry backoff times + $start = microtime( true ); + do { + $status = $this->doLockByType( $pathsByType ); + $elapsed = microtime( true ) - $start; + if ( $status->isOK() || $elapsed >= $timeout || $elapsed < 0 ) { + break; // success, timeout, or clock set back + } + usleep( 1e3 * ( next( $msleep ) ?: 1000 ) ); // use 1 sec after enough times + $elapsed = microtime( true ) - $start; + } while ( $elapsed < $timeout && $elapsed >= 0 ); wfProfileOut( __METHOD__ ); return $status; } /** - * Get the base 36 SHA-1 of a string, padded to 31 digits + * Unlock the resources at the given abstract paths * - * @param $path string - * @return string + * @param array $paths List of paths + * @param $type integer LockManager::LOCK_* constant + * @return Status */ - final protected static function sha1Base36( $path ) { - return wfBaseConvert( sha1( $path ), 16, 36, 31 ); + final public function unlock( array $paths, $type = self::LOCK_EX ) { + return $this->unlockByType( array( $type => $paths ) ); } /** - * Lock resources with the given keys and lock type + * Unlock the resources at the given abstract paths * - * @param $paths Array List of storage paths - * @param $type integer LockManager::LOCK_* constant - * @return string + * @param array $pathsByType Map of LockManager::LOCK_* constants to lists of paths + * @return Status + * @since 1.22 */ - abstract protected function doLock( array $paths, $type ); + final public function unlockByType( array $pathsByType ) { + wfProfileIn( __METHOD__ ); + $pathsByType = $this->normalizePathsByType( $pathsByType ); + $status = $this->doUnlockByType( $pathsByType ); + wfProfileOut( __METHOD__ ); + return $status; + } /** - * Unlock resources with the given keys and lock type + * Get the base 36 SHA-1 of a string, padded to 31 digits. + * Before hashing, the path will be prefixed with the domain ID. + * This should be used interally for lock key or file names. * - * @param $paths Array List of storage paths - * @param $type integer LockManager::LOCK_* constant + * @param $path string * @return string */ - abstract protected function doUnlock( array $paths, $type ); -} - -/** - * Self-releasing locks - * - * LockManager helper class to handle scoped locks, which - * release when an object is destroyed or goes out of scope. - * - * @ingroup LockManager - * @since 1.19 - */ -class ScopedLock { - /** @var LockManager */ - protected $manager; - /** @var Status */ - protected $status; - /** @var Array List of resource paths*/ - protected $paths; - - protected $type; // integer lock type - - /** - * @param $manager LockManager - * @param $paths Array List of storage paths - * @param $type integer LockManager::LOCK_* constant - * @param $status Status - */ - protected function __construct( - LockManager $manager, array $paths, $type, Status $status - ) { - $this->manager = $manager; - $this->paths = $paths; - $this->status = $status; - $this->type = $type; + final protected function sha1Base36Absolute( $path ) { + return wfBaseConvert( sha1( "{$this->domain}:{$path}" ), 16, 36, 31 ); } /** - * Get a ScopedLock object representing a lock on resource paths. - * Any locks are released once this object goes out of scope. - * The status object is updated with any errors or warnings. + * Get the base 16 SHA-1 of a string, padded to 31 digits. + * Before hashing, the path will be prefixed with the domain ID. + * This should be used interally for lock key or file names. * - * @param $manager LockManager - * @param $paths Array List of storage paths - * @param $type integer LockManager::LOCK_* constant - * @param $status Status - * @return ScopedLock|null Returns null on failure + * @param $path string + * @return string */ - public static function factory( - LockManager $manager, array $paths, $type, Status $status - ) { - $lockStatus = $manager->lock( $paths, $type ); - $status->merge( $lockStatus ); - if ( $lockStatus->isOK() ) { - return new self( $manager, $paths, $type, $status ); - } - return null; - } - - function __destruct() { - $wasOk = $this->status->isOK(); - $this->status->merge( $this->manager->unlock( $this->paths, $this->type ) ); - if ( $wasOk ) { - // Make sure status is OK, despite any unlockFiles() fatals - $this->status->setResult( true, $this->status->value ); - } + final protected function sha1Base16Absolute( $path ) { + return sha1( "{$this->domain}:{$path}" ); } -} - -/** - * Version of LockManager that uses a quorum from peer servers for locks. - * The resource space can also be sharded into separate peer groups. - * - * @ingroup LockManager - * @since 1.20 - */ -abstract class QuorumLockManager extends LockManager { - /** @var Array Map of bucket indexes to peer server lists */ - protected $srvsByBucket = array(); // (bucket index => (lsrv1, lsrv2, ...)) /** - * @see LockManager::doLock() - * @param $paths array - * @param $type int - * @return Status + * Normalize the $paths array by converting LOCK_UW locks into the + * appropriate type and removing any duplicated paths for each lock type. + * + * @param array $paths Map of LockManager::LOCK_* constants to lists of paths + * @return Array + * @since 1.22 */ - final protected function doLock( array $paths, $type ) { - $status = Status::newGood(); - - $pathsToLock = array(); // (bucket => paths) - // Get locks that need to be acquired (buckets => locks)... - foreach ( $paths as $path ) { - if ( isset( $this->locksHeld[$path][$type] ) ) { - ++$this->locksHeld[$path][$type]; - } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { - $this->locksHeld[$path][$type] = 1; - } else { - $bucket = $this->getBucketFromKey( $path ); - $pathsToLock[$bucket][] = $path; - } - } - - $lockedPaths = array(); // files locked in this attempt - // Attempt to acquire these locks... - foreach ( $pathsToLock as $bucket => $paths ) { - // Try to acquire the locks for this bucket - $status->merge( $this->doLockingRequestBucket( $bucket, $paths, $type ) ); - if ( !$status->isOK() ) { - $status->merge( $this->doUnlock( $lockedPaths, $type ) ); - return $status; - } - // Record these locks as active - foreach ( $paths as $path ) { - $this->locksHeld[$path][$type] = 1; // locked - } - // Keep track of what locks were made in this attempt - $lockedPaths = array_merge( $lockedPaths, $paths ); + final protected function normalizePathsByType( array $pathsByType ) { + $res = array(); + foreach ( $pathsByType as $type => $paths ) { + $res[$this->lockTypeMap[$type]] = array_unique( $paths ); } - - return $status; + return $res; } /** - * @see LockManager::doUnlock() - * @param $paths array - * @param $type int + * @see LockManager::lockByType() + * @param array $paths Map of LockManager::LOCK_* constants to lists of paths * @return Status + * @since 1.22 */ - final protected function doUnlock( array $paths, $type ) { + protected function doLockByType( array $pathsByType ) { $status = Status::newGood(); - - $pathsToUnlock = array(); - foreach ( $paths as $path ) { - if ( !isset( $this->locksHeld[$path][$type] ) ) { - $status->warning( 'lockmanager-notlocked', $path ); + $lockedByType = array(); // map of (type => paths) + foreach ( $pathsByType as $type => $paths ) { + $status->merge( $this->doLock( $paths, $type ) ); + if ( $status->isOK() ) { + $lockedByType[$type] = $paths; } else { - --$this->locksHeld[$path][$type]; - // Reference count the locks held and release locks when zero - if ( $this->locksHeld[$path][$type] <= 0 ) { - unset( $this->locksHeld[$path][$type] ); - $bucket = $this->getBucketFromKey( $path ); - $pathsToUnlock[$bucket][] = $path; - } - if ( !count( $this->locksHeld[$path] ) ) { - unset( $this->locksHeld[$path] ); // no SH or EX locks left for key + // Release the subset of locks that were acquired + foreach ( $lockedByType as $type => $paths ) { + $status->merge( $this->doUnlock( $paths, $type ) ); } + break; } } - - // Remove these specific locks if possible, or at least release - // all locks once this process is currently not holding any locks. - foreach ( $pathsToUnlock as $bucket => $paths ) { - $status->merge( $this->doUnlockingRequestBucket( $bucket, $paths, $type ) ); - } - if ( !count( $this->locksHeld ) ) { - $status->merge( $this->releaseAllLocks() ); - } - return $status; } /** - * Attempt to acquire locks with the peers for a bucket. - * This is all or nothing; if any key is locked then this totally fails. + * Lock resources with the given keys and lock type * - * @param $bucket integer - * @param $paths Array List of resource keys to lock - * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @param array $paths List of paths + * @param $type integer LockManager::LOCK_* constant * @return Status */ - final protected function doLockingRequestBucket( $bucket, array $paths, $type ) { - $status = Status::newGood(); - - $yesVotes = 0; // locks made on trustable servers - $votesLeft = count( $this->srvsByBucket[$bucket] ); // remaining peers - $quorum = floor( $votesLeft/2 + 1 ); // simple majority - // Get votes for each peer, in order, until we have enough... - foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { - if ( !$this->isServerUp( $lockSrv ) ) { - --$votesLeft; - $status->warning( 'lockmanager-fail-svr-acquire', $lockSrv ); - continue; // server down? - } - // Attempt to acquire the lock on this peer - $status->merge( $this->getLocksOnServer( $lockSrv, $paths, $type ) ); - if ( !$status->isOK() ) { - return $status; // vetoed; resource locked - } - ++$yesVotes; // success for this peer - if ( $yesVotes >= $quorum ) { - return $status; // lock obtained - } - --$votesLeft; - $votesNeeded = $quorum - $yesVotes; - if ( $votesNeeded > $votesLeft ) { - break; // short-circuit - } - } - // At this point, we must not have met the quorum - $status->setResult( false ); - - return $status; - } + abstract protected function doLock( array $paths, $type ); /** - * Attempt to release locks with the peers for a bucket - * - * @param $bucket integer - * @param $paths Array List of resource keys to lock - * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @see LockManager::unlockByType() + * @param array $paths Map of LockManager::LOCK_* constants to lists of paths * @return Status + * @since 1.22 */ - final protected function doUnlockingRequestBucket( $bucket, array $paths, $type ) { + protected function doUnlockByType( array $pathsByType ) { $status = Status::newGood(); - - foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { - if ( !$this->isServerUp( $lockSrv ) ) { - $status->fatal( 'lockmanager-fail-svr-release', $lockSrv ); - // Attempt to release the lock on this peer - } else { - $status->merge( $this->freeLocksOnServer( $lockSrv, $paths, $type ) ); - } + foreach ( $pathsByType as $type => $paths ) { + $status->merge( $this->doUnlock( $paths, $type ) ); } - return $status; } /** - * Get the bucket for resource path. - * This should avoid throwing any exceptions. - * - * @param $path string - * @return integer - */ - protected function getBucketFromKey( $path ) { - $prefix = substr( sha1( $path ), 0, 2 ); // first 2 hex chars (8 bits) - return (int)base_convert( $prefix, 16, 10 ) % count( $this->srvsByBucket ); - } - - /** - * Check if a lock server is up - * - * @param $lockSrv string - * @return bool - */ - abstract protected function isServerUp( $lockSrv ); - - /** - * Get a connection to a lock server and acquire locks on $paths - * - * @param $lockSrv string - * @param $paths array - * @param $type integer - * @return Status - */ - abstract protected function getLocksOnServer( $lockSrv, array $paths, $type ); - - /** - * Get a connection to a lock server and release locks on $paths. - * - * Subclasses must effectively implement this or releaseAllLocks(). - * - * @param $lockSrv string - * @param $paths array - * @param $type integer - * @return Status - */ - abstract protected function freeLocksOnServer( $lockSrv, array $paths, $type ); - - /** - * Release all locks that this session is holding. - * - * Subclasses must effectively implement this or freeLocksOnServer(). + * Unlock resources with the given keys and lock type * + * @param array $paths List of paths + * @param $type integer LockManager::LOCK_* constant * @return Status */ - abstract protected function releaseAllLocks(); + abstract protected function doUnlock( array $paths, $type ); } /** @@ -403,22 +250,10 @@ abstract class QuorumLockManager extends LockManager { * @since 1.19 */ class NullLockManager extends LockManager { - /** - * @see LockManager::doLock() - * @param $paths array - * @param $type int - * @return Status - */ protected function doLock( array $paths, $type ) { return Status::newGood(); } - /** - * @see LockManager::doUnlock() - * @param $paths array - * @param $type int - * @return Status - */ protected function doUnlock( array $paths, $type ) { return Status::newGood(); } diff --git a/includes/filebackend/lockmanager/LockManagerGroup.php b/includes/filebackend/lockmanager/LockManagerGroup.php index 8c8c940a..9aff2415 100644 --- a/includes/filebackend/lockmanager/LockManagerGroup.php +++ b/includes/filebackend/lockmanager/LockManagerGroup.php @@ -29,33 +29,41 @@ * @since 1.19 */ class LockManagerGroup { - /** - * @var LockManagerGroup - */ - protected static $instance = null; + /** @var Array (domain => LockManager) */ + protected static $instances = array(); + + protected $domain; // string; domain (usually wiki ID) - /** @var Array of (name => ('class' =>, 'config' =>, 'instance' =>)) */ + /** @var Array of (name => ('class' => ..., 'config' => ..., 'instance' => ...)) */ protected $managers = array(); - protected function __construct() {} + /** + * @param string $domain Domain (usually wiki ID) + */ + protected function __construct( $domain ) { + $this->domain = $domain; + } /** + * @param string $domain Domain (usually wiki ID) * @return LockManagerGroup */ - public static function singleton() { - if ( self::$instance == null ) { - self::$instance = new self(); - self::$instance->initFromGlobals(); + public static function singleton( $domain = false ) { + $domain = ( $domain === false ) ? wfWikiID() : $domain; + if ( !isset( self::$instances[$domain] ) ) { + self::$instances[$domain] = new self( $domain ); + self::$instances[$domain]->initFromGlobals(); } - return self::$instance; + return self::$instances[$domain]; } /** - * Destroy the singleton instance, so that a new one will be created next - * time singleton() is called. + * Destroy the singleton instances + * + * @return void */ - public static function destroySingleton() { - self::$instance = null; + public static function destroySingletons() { + self::$instances = array(); } /** @@ -78,6 +86,7 @@ class LockManagerGroup { */ protected function register( array $configs ) { foreach ( $configs as $config ) { + $config['domain'] = $this->domain; if ( !isset( $config['name'] ) ) { throw new MWException( "Cannot register a lock manager with no name." ); } @@ -88,8 +97,8 @@ class LockManagerGroup { $class = $config['class']; unset( $config['class'] ); // lock manager won't need this $this->managers[$name] = array( - 'class' => $class, - 'config' => $config, + 'class' => $class, + 'config' => $config, 'instance' => null ); } @@ -116,6 +125,21 @@ class LockManagerGroup { } /** + * Get the config array for a lock manager object with a given name + * + * @param $name string + * @return Array + * @throws MWException + */ + public function config( $name ) { + if ( !isset( $this->managers[$name] ) ) { + throw new MWException( "No lock manager defined with the name `$name`." ); + } + $class = $this->managers[$name]['class']; + return array( 'class' => $class ) + $this->managers[$name]['config']; + } + + /** * Get the default lock manager configured for the site. * Returns NullLockManager if no lock manager could be found. * diff --git a/includes/filebackend/lockmanager/MemcLockManager.php b/includes/filebackend/lockmanager/MemcLockManager.php index 57c0463d..5eab03ee 100644 --- a/includes/filebackend/lockmanager/MemcLockManager.php +++ b/includes/filebackend/lockmanager/MemcLockManager.php @@ -28,8 +28,8 @@ * This is meant for multi-wiki systems that may share files. * All locks are non-blocking, which avoids deadlocks. * - * All lock requests for a resource, identified by a hash string, will map - * to one bucket. Each bucket maps to one or several peer servers, each running memcached. + * All lock requests for a resource, identified by a hash string, will map to one + * bucket. Each bucket maps to one or several peer servers, each running memcached. * A majority of peers must agree for a lock to be acquired. * * @ingroup LockManager @@ -48,9 +48,7 @@ class MemcLockManager extends QuorumLockManager { /** @var Array */ protected $serversUp = array(); // (server name => bool) - protected $lockExpiry; // integer; maximum time locks can be held - protected $session = ''; // string; random SHA-1 UUID - protected $wikiId = ''; // string + protected $session = ''; // string; random UUID /** * Construct a new instance from configuration. @@ -61,9 +59,9 @@ class MemcLockManager extends QuorumLockManager { * each having an odd-numbered list of server names (peers) as values. * - memcConfig : Configuration array for ObjectCache::newFromParams. [optional] * If set, this must use one of the memcached classes. - * - wikiId : Wiki ID string that all resources are relative to. [optional] * - * @param Array $config + * @param array $config + * @throws MWException */ public function __construct( array $config ) { parent::__construct( $config ); @@ -87,19 +85,47 @@ class MemcLockManager extends QuorumLockManager { } } - $this->wikiId = isset( $config['wikiId'] ) ? $config['wikiId'] : wfWikiID(); + $this->session = wfRandomString( 32 ); + } - $met = ini_get( 'max_execution_time' ); // this is 0 in CLI mode - $this->lockExpiry = $met ? 2*(int)$met : 2*3600; + // @TODO: change this code to work in one batch + protected function getLocksOnServer( $lockSrv, array $pathsByType ) { + $status = Status::newGood(); - $this->session = wfRandomString( 32 ); + $lockedPaths = array(); + foreach ( $pathsByType as $type => $paths ) { + $status->merge( $this->doGetLocksOnServer( $lockSrv, $paths, $type ) ); + if ( $status->isOK() ) { + $lockedPaths[$type] = isset( $lockedPaths[$type] ) + ? array_merge( $lockedPaths[$type], $paths ) + : $paths; + } else { + foreach ( $lockedPaths as $type => $paths ) { + $status->merge( $this->doFreeLocksOnServer( $lockSrv, $paths, $type ) ); + } + break; + } + } + + return $status; + } + + // @TODO: change this code to work in one batch + protected function freeLocksOnServer( $lockSrv, array $pathsByType ) { + $status = Status::newGood(); + + foreach ( $pathsByType as $type => $paths ) { + $status->merge( $this->doFreeLocksOnServer( $lockSrv, $paths, $type ) ); + } + + return $status; } /** * @see QuorumLockManager::getLocksOnServer() * @return Status */ - protected function getLocksOnServer( $lockSrv, array $paths, $type ) { + protected function doGetLocksOnServer( $lockSrv, array $paths, $type ) { $status = Status::newGood(); $memc = $this->getCache( $lockSrv ); @@ -110,7 +136,7 @@ class MemcLockManager extends QuorumLockManager { foreach ( $paths as $path ) { $status->fatal( 'lockmanager-fail-acquirelock', $path ); } - return; + return $status; } // Fetch all the existing lock records... @@ -121,8 +147,8 @@ class MemcLockManager extends QuorumLockManager { foreach ( $paths as $path ) { $locksKey = $this->recordKeyForPath( $path ); $locksHeld = isset( $lockRecords[$locksKey] ) - ? $lockRecords[$locksKey] - : array( self::LOCK_SH => array(), self::LOCK_EX => array() ); // init + ? self::sanitizeLockArray( $lockRecords[$locksKey] ) + : self::newLockArray(); // init foreach ( $locksHeld[self::LOCK_EX] as $session => $expiry ) { if ( $expiry < $now ) { // stale? unset( $locksHeld[self::LOCK_EX][$session] ); @@ -141,7 +167,7 @@ class MemcLockManager extends QuorumLockManager { } if ( $status->isOK() ) { // Register the session in the lock record array - $locksHeld[$type][$this->session] = $now + $this->lockExpiry; + $locksHeld[$type][$this->session] = $now + $this->lockTTL; // We will update this record if none of the other locks conflict $lockRecords[$locksKey] = $locksHeld; } @@ -149,9 +175,15 @@ class MemcLockManager extends QuorumLockManager { // If there were no lock conflicts, update all the lock records... if ( $status->isOK() ) { - foreach ( $lockRecords as $locksKey => $locksHeld ) { - $memc->set( $locksKey, $locksHeld ); - wfDebug( __METHOD__ . ": acquired lock on key $locksKey.\n" ); + foreach ( $paths as $path ) { + $locksKey = $this->recordKeyForPath( $path ); + $locksHeld = $lockRecords[$locksKey]; + $ok = $memc->set( $locksKey, $locksHeld, 7 * 86400 ); + if ( !$ok ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } else { + wfDebug( __METHOD__ . ": acquired lock on key $locksKey.\n" ); + } } } @@ -165,7 +197,7 @@ class MemcLockManager extends QuorumLockManager { * @see QuorumLockManager::freeLocksOnServer() * @return Status */ - protected function freeLocksOnServer( $lockSrv, array $paths, $type ) { + protected function doFreeLocksOnServer( $lockSrv, array $paths, $type ) { $status = Status::newGood(); $memc = $this->getCache( $lockSrv ); @@ -186,17 +218,22 @@ class MemcLockManager extends QuorumLockManager { foreach ( $paths as $path ) { $locksKey = $this->recordKeyForPath( $path ); // lock record if ( !isset( $lockRecords[$locksKey] ) ) { + $status->warning( 'lockmanager-fail-releaselock', $path ); continue; // nothing to do } - $locksHeld = $lockRecords[$locksKey]; - if ( is_array( $locksHeld ) && isset( $locksHeld[$type] ) ) { - unset( $locksHeld[$type][$this->session] ); - $ok = $memc->set( $locksKey, $locksHeld ); + $locksHeld = self::sanitizeLockArray( $lockRecords[$locksKey] ); + if ( isset( $locksHeld[$type][$this->session] ) ) { + unset( $locksHeld[$type][$this->session] ); // unregister this session + if ( $locksHeld === self::newLockArray() ) { + $ok = $memc->delete( $locksKey ); + } else { + $ok = $memc->set( $locksKey, $locksHeld ); + } + if ( !$ok ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } } else { - $ok = true; - } - if ( !$ok ) { - $status->fatal( 'lockmanager-fail-releaselock', $path ); + $status->warning( 'lockmanager-fail-releaselock', $path ); } wfDebug( __METHOD__ . ": released lock on key $locksKey.\n" ); } @@ -226,7 +263,7 @@ class MemcLockManager extends QuorumLockManager { /** * Get the MemcachedBagOStuff object for a $lockSrv * - * @param $lockSrv string Server name + * @param string $lockSrv Server name * @return MemcachedBagOStuff|null */ protected function getCache( $lockSrv ) { @@ -234,7 +271,7 @@ class MemcLockManager extends QuorumLockManager { if ( isset( $this->bagOStuffs[$lockSrv] ) ) { $memc = $this->bagOStuffs[$lockSrv]; if ( !isset( $this->serversUp[$lockSrv] ) ) { - $this->serversUp[$lockSrv] = $memc->set( 'MemcLockManager:ping', 1, 1 ); + $this->serversUp[$lockSrv] = $memc->set( __CLASS__ . ':ping', 1, 1 ); if ( !$this->serversUp[$lockSrv] ) { trigger_error( __METHOD__ . ": Could not contact $lockSrv.", E_USER_WARNING ); } @@ -251,14 +288,32 @@ class MemcLockManager extends QuorumLockManager { * @return string */ protected function recordKeyForPath( $path ) { - $hash = LockManager::sha1Base36( $path ); - list( $db, $prefix ) = wfSplitWikiID( $this->wikiId ); - return wfForeignMemcKey( $db, $prefix, __CLASS__, 'locks', $hash ); + return implode( ':', array( __CLASS__, 'locks', $this->sha1Base36Absolute( $path ) ) ); + } + + /** + * @return Array An empty lock structure for a key + */ + protected static function newLockArray() { + return array( self::LOCK_SH => array(), self::LOCK_EX => array() ); + } + + /** + * @param $a array + * @return Array An empty lock structure for a key + */ + protected static function sanitizeLockArray( $a ) { + if ( is_array( $a ) && isset( $a[self::LOCK_EX] ) && isset( $a[self::LOCK_SH] ) ) { + return $a; + } else { + trigger_error( __METHOD__ . ": reset invalid lock array.", E_USER_WARNING ); + return self::newLockArray(); + } } /** * @param $memc MemcachedBagOStuff - * @param $keys Array List of keys to acquire + * @param array $keys List of keys to acquire * @return bool */ protected function acquireMutexes( MemcachedBagOStuff $memc, array $keys ) { @@ -275,7 +330,7 @@ class MemcLockManager extends QuorumLockManager { $start = microtime( true ); do { if ( ( ++$rounds % 4 ) == 0 ) { - usleep( 1000*50 ); // 50 ms + usleep( 1000 * 50 ); // 50 ms } foreach ( array_diff( $keys, $lockedKeys ) as $key ) { if ( $memc->add( "$key:mutex", 1, 180 ) ) { // lock record @@ -284,10 +339,10 @@ class MemcLockManager extends QuorumLockManager { continue; // acquire in order } } - } while ( count( $lockedKeys ) < count( $keys ) && ( microtime( true ) - $start ) <= 6 ); + } while ( count( $lockedKeys ) < count( $keys ) && ( microtime( true ) - $start ) <= 3 ); if ( count( $lockedKeys ) != count( $keys ) ) { - $this->releaseMutexes( $lockedKeys ); // failed; release what was locked + $this->releaseMutexes( $memc, $lockedKeys ); // failed; release what was locked return false; } @@ -296,7 +351,7 @@ class MemcLockManager extends QuorumLockManager { /** * @param $memc MemcachedBagOStuff - * @param $keys Array List of acquired keys + * @param array $keys List of acquired keys * @return void */ protected function releaseMutexes( MemcachedBagOStuff $memc, array $keys ) { diff --git a/includes/filebackend/lockmanager/QuorumLockManager.php b/includes/filebackend/lockmanager/QuorumLockManager.php new file mode 100644 index 00000000..8356d32a --- /dev/null +++ b/includes/filebackend/lockmanager/QuorumLockManager.php @@ -0,0 +1,246 @@ +<?php +/** + * Version of LockManager that uses a quorum from peer servers for locks. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + */ + +/** + * Version of LockManager that uses a quorum from peer servers for locks. + * The resource space can also be sharded into separate peer groups. + * + * @ingroup LockManager + * @since 1.20 + */ +abstract class QuorumLockManager extends LockManager { + /** @var Array Map of bucket indexes to peer server lists */ + protected $srvsByBucket = array(); // (bucket index => (lsrv1, lsrv2, ...)) + /** @var Array Map of degraded buckets */ + protected $degradedBuckets = array(); // (buckey index => UNIX timestamp) + + final protected function doLock( array $paths, $type ) { + return $this->doLockByType( array( $type => $paths ) ); + } + + final protected function doUnlock( array $paths, $type ) { + return $this->doUnlockByType( array( $type => $paths ) ); + } + + protected function doLockByType( array $pathsByType ) { + $status = Status::newGood(); + + $pathsToLock = array(); // (bucket => type => paths) + // Get locks that need to be acquired (buckets => locks)... + foreach ( $pathsByType as $type => $paths ) { + foreach ( $paths as $path ) { + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } else { + $bucket = $this->getBucketFromPath( $path ); + $pathsToLock[$bucket][$type][] = $path; + } + } + } + + $lockedPaths = array(); // files locked in this attempt (type => paths) + // Attempt to acquire these locks... + foreach ( $pathsToLock as $bucket => $pathsToLockByType ) { + // Try to acquire the locks for this bucket + $status->merge( $this->doLockingRequestBucket( $bucket, $pathsToLockByType ) ); + if ( !$status->isOK() ) { + $status->merge( $this->doUnlockByType( $lockedPaths ) ); + return $status; + } + // Record these locks as active + foreach ( $pathsToLockByType as $type => $paths ) { + foreach ( $paths as $path ) { + $this->locksHeld[$path][$type] = 1; // locked + // Keep track of what locks were made in this attempt + $lockedPaths[$type][] = $path; + } + } + } + + return $status; + } + + protected function doUnlockByType( array $pathsByType ) { + $status = Status::newGood(); + + $pathsToUnlock = array(); // (bucket => type => paths) + foreach ( $pathsByType as $type => $paths ) { + foreach ( $paths as $path ) { + if ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + --$this->locksHeld[$path][$type]; + // Reference count the locks held and release locks when zero + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + $bucket = $this->getBucketFromPath( $path ); + $pathsToUnlock[$bucket][$type][] = $path; + } + if ( !count( $this->locksHeld[$path] ) ) { + unset( $this->locksHeld[$path] ); // no SH or EX locks left for key + } + } + } + } + + // Remove these specific locks if possible, or at least release + // all locks once this process is currently not holding any locks. + foreach ( $pathsToUnlock as $bucket => $pathsToUnlockByType ) { + $status->merge( $this->doUnlockingRequestBucket( $bucket, $pathsToUnlockByType ) ); + } + if ( !count( $this->locksHeld ) ) { + $status->merge( $this->releaseAllLocks() ); + $this->degradedBuckets = array(); // safe to retry the normal quorum + } + + return $status; + } + + /** + * Attempt to acquire locks with the peers for a bucket. + * This is all or nothing; if any key is locked then this totally fails. + * + * @param $bucket integer + * @param array $pathsByType Map of LockManager::LOCK_* constants to lists of paths + * @return Status + */ + final protected function doLockingRequestBucket( $bucket, array $pathsByType ) { + $status = Status::newGood(); + + $yesVotes = 0; // locks made on trustable servers + $votesLeft = count( $this->srvsByBucket[$bucket] ); // remaining peers + $quorum = floor( $votesLeft / 2 + 1 ); // simple majority + // Get votes for each peer, in order, until we have enough... + foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { + if ( !$this->isServerUp( $lockSrv ) ) { + --$votesLeft; + $status->warning( 'lockmanager-fail-svr-acquire', $lockSrv ); + $this->degradedBuckets[$bucket] = time(); + continue; // server down? + } + // Attempt to acquire the lock on this peer + $status->merge( $this->getLocksOnServer( $lockSrv, $pathsByType ) ); + if ( !$status->isOK() ) { + return $status; // vetoed; resource locked + } + ++$yesVotes; // success for this peer + if ( $yesVotes >= $quorum ) { + return $status; // lock obtained + } + --$votesLeft; + $votesNeeded = $quorum - $yesVotes; + if ( $votesNeeded > $votesLeft ) { + break; // short-circuit + } + } + // At this point, we must not have met the quorum + $status->setResult( false ); + + return $status; + } + + /** + * Attempt to release locks with the peers for a bucket + * + * @param $bucket integer + * @param array $pathsByType Map of LockManager::LOCK_* constants to lists of paths + * @return Status + */ + final protected function doUnlockingRequestBucket( $bucket, array $pathsByType ) { + $status = Status::newGood(); + + $yesVotes = 0; // locks freed on trustable servers + $votesLeft = count( $this->srvsByBucket[$bucket] ); // remaining peers + $quorum = floor( $votesLeft / 2 + 1 ); // simple majority + $isDegraded = isset( $this->degradedBuckets[$bucket] ); // not the normal quorum? + foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { + if ( !$this->isServerUp( $lockSrv ) ) { + $status->warning( 'lockmanager-fail-svr-release', $lockSrv ); + // Attempt to release the lock on this peer + } else { + $status->merge( $this->freeLocksOnServer( $lockSrv, $pathsByType ) ); + ++$yesVotes; // success for this peer + // Normally the first peers form the quorum, and the others are ignored. + // Ignore them in this case, but not when an alternative quorum was used. + if ( $yesVotes >= $quorum && !$isDegraded ) { + break; // lock released + } + } + } + // Set a bad status if the quorum was not met. + // Assumes the same "up" servers as during the acquire step. + $status->setResult( $yesVotes >= $quorum ); + + return $status; + } + + /** + * Get the bucket for resource path. + * This should avoid throwing any exceptions. + * + * @param $path string + * @return integer + */ + protected function getBucketFromPath( $path ) { + $prefix = substr( sha1( $path ), 0, 2 ); // first 2 hex chars (8 bits) + return (int)base_convert( $prefix, 16, 10 ) % count( $this->srvsByBucket ); + } + + /** + * Check if a lock server is up. + * This should process cache results to reduce RTT. + * + * @param $lockSrv string + * @return bool + */ + abstract protected function isServerUp( $lockSrv ); + + /** + * Get a connection to a lock server and acquire locks + * + * @param $lockSrv string + * @param array $pathsByType Map of LockManager::LOCK_* constants to lists of paths + * @return Status + */ + abstract protected function getLocksOnServer( $lockSrv, array $pathsByType ); + + /** + * Get a connection to a lock server and release locks on $paths. + * + * Subclasses must effectively implement this or releaseAllLocks(). + * + * @param $lockSrv string + * @param array $pathsByType Map of LockManager::LOCK_* constants to lists of paths + * @return Status + */ + abstract protected function freeLocksOnServer( $lockSrv, array $pathsByType ); + + /** + * Release all locks that this session is holding. + * + * Subclasses must effectively implement this or freeLocksOnServer(). + * + * @return Status + */ + abstract protected function releaseAllLocks(); +} diff --git a/includes/filebackend/lockmanager/RedisLockManager.php b/includes/filebackend/lockmanager/RedisLockManager.php new file mode 100644 index 00000000..43b0198a --- /dev/null +++ b/includes/filebackend/lockmanager/RedisLockManager.php @@ -0,0 +1,288 @@ +<?php +/** + * Version of LockManager based on using redis servers. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + */ + +/** + * Manage locks using redis servers. + * + * Version of LockManager based on using redis servers. + * This is meant for multi-wiki systems that may share files. + * All locks are non-blocking, which avoids deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map to one + * bucket. Each bucket maps to one or several peer servers, each running redis. + * A majority of peers must agree for a lock to be acquired. + * + * This class requires Redis 2.6 as it makes use Lua scripts for fast atomic operations. + * + * @ingroup LockManager + * @since 1.22 + */ +class RedisLockManager extends QuorumLockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + /** @var RedisConnectionPool */ + protected $redisPool; + /** @var Array Map server names to hostname/IP and port numbers */ + protected $lockServers = array(); + + protected $session = ''; // string; random UUID + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * - lockServers : Associative array of server names to "<IP>:<port>" strings. + * - srvsByBucket : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of server names (peers) as values. + * - redisConfig : Configuration for RedisConnectionPool::__construct(). + * + * @param Array $config + * @throws MWException + */ + public function __construct( array $config ) { + parent::__construct( $config ); + + $this->lockServers = $config['lockServers']; + // Sanitize srvsByBucket config to prevent PHP errors + $this->srvsByBucket = array_filter( $config['srvsByBucket'], 'is_array' ); + $this->srvsByBucket = array_values( $this->srvsByBucket ); // consecutive + + $config['redisConfig']['serializer'] = 'none'; + $this->redisPool = RedisConnectionPool::singleton( $config['redisConfig'] ); + + $this->session = wfRandomString( 32 ); + } + + // @TODO: change this code to work in one batch + protected function getLocksOnServer( $lockSrv, array $pathsByType ) { + $status = Status::newGood(); + + $lockedPaths = array(); + foreach ( $pathsByType as $type => $paths ) { + $status->merge( $this->doGetLocksOnServer( $lockSrv, $paths, $type ) ); + if ( $status->isOK() ) { + $lockedPaths[$type] = isset( $lockedPaths[$type] ) + ? array_merge( $lockedPaths[$type], $paths ) + : $paths; + } else { + foreach ( $lockedPaths as $type => $paths ) { + $status->merge( $this->doFreeLocksOnServer( $lockSrv, $paths, $type ) ); + } + break; + } + } + + return $status; + } + + // @TODO: change this code to work in one batch + protected function freeLocksOnServer( $lockSrv, array $pathsByType ) { + $status = Status::newGood(); + + foreach ( $pathsByType as $type => $paths ) { + $status->merge( $this->doFreeLocksOnServer( $lockSrv, $paths, $type ) ); + } + + return $status; + } + + protected function doGetLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + $server = $this->lockServers[$lockSrv]; + $conn = $this->redisPool->getConnection( $server ); + if ( !$conn ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + return $status; + } + + $keys = array_map( array( $this, 'recordKeyForPath' ), $paths ); // lock records + + try { + static $script = +<<<LUA + if ARGV[1] ~= 'EX' and ARGV[1] ~= 'SH' then + return redis.error_reply('Unrecognized lock type given (must be EX or SH)') + end + local failed = {} + -- Check that all the locks can be acquired + for i,resourceKey in ipairs(KEYS) do + local keyIsFree = true + local currentLocks = redis.call('hKeys',resourceKey) + for i,lockKey in ipairs(currentLocks) do + local _, _, type, session = string.find(lockKey,"(%w+):(%w+)") + -- Check any locks that are not owned by this session + if session ~= ARGV[2] then + local lockTimestamp = redis.call('hGet',resourceKey,lockKey) + if 1*lockTimestamp < ( ARGV[4] - ARGV[3] ) then + -- Lock is stale, so just prune it out + redis.call('hDel',resourceKey,lockKey) + elseif ARGV[1] == 'EX' or type == 'EX' then + keyIsFree = false + break + end + end + end + if not keyIsFree then + failed[#failed+1] = resourceKey + end + end + -- If all locks could be acquired, then do so + if #failed == 0 then + for i,resourceKey in ipairs(KEYS) do + redis.call('hSet',resourceKey,ARGV[1] .. ':' .. ARGV[2],ARGV[4]) + -- In addition to invalidation logic, be sure to garbage collect + redis.call('expire',resourceKey,ARGV[3]) + end + end + return failed +LUA; + $res = $conn->luaEval( $script, + array_merge( + $keys, // KEYS[0], KEYS[1],...KEYS[N] + array( + $type === self::LOCK_SH ? 'SH' : 'EX', // ARGV[1] + $this->session, // ARGV[2] + $this->lockTTL, // ARGV[3] + time() // ARGV[4] + ) + ), + count( $keys ) # number of first argument(s) that are keys + ); + } catch ( RedisException $e ) { + $res = false; + $this->redisPool->handleException( $server, $conn, $e ); + } + + if ( $res === false ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } else { + $pathsByKey = array_combine( $keys, $paths ); + foreach ( $res as $key ) { + $status->fatal( 'lockmanager-fail-acquirelock', $pathsByKey[$key] ); + } + } + + return $status; + } + + protected function doFreeLocksOnServer( $lockSrv, array $paths, $type ) { + $status = Status::newGood(); + + $server = $this->lockServers[$lockSrv]; + $conn = $this->redisPool->getConnection( $server ); + if ( !$conn ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + return $status; + } + + $keys = array_map( array( $this, 'recordKeyForPath' ), $paths ); // lock records + + try { + static $script = +<<<LUA + if ARGV[1] ~= 'EX' and ARGV[1] ~= 'SH' then + return redis.error_reply('Unrecognized lock type given (must be EX or SH)') + end + local failed = {} + for i,resourceKey in ipairs(KEYS) do + local released = redis.call('hDel',resourceKey,ARGV[1] .. ':' .. ARGV[2]) + if released > 0 then + -- Remove the whole structure if it is now empty + if redis.call('hLen',resourceKey) == 0 then + redis.call('del',resourceKey) + end + else + failed[#failed+1] = resourceKey + end + end + return failed +LUA; + $res = $conn->luaEval( $script, + array_merge( + $keys, // KEYS[0], KEYS[1],...KEYS[N] + array( + $type === self::LOCK_SH ? 'SH' : 'EX', // ARGV[1] + $this->session // ARGV[2] + ) + ), + count( $keys ) # number of first argument(s) that are keys + ); + } catch ( RedisException $e ) { + $res = false; + $this->redisPool->handleException( $server, $conn, $e ); + } + + if ( $res === false ) { + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + } else { + $pathsByKey = array_combine( $keys, $paths ); + foreach ( $res as $key ) { + $status->fatal( 'lockmanager-fail-releaselock', $pathsByKey[$key] ); + } + } + + return $status; + } + + protected function releaseAllLocks() { + return Status::newGood(); // not supported + } + + protected function isServerUp( $lockSrv ) { + return (bool)$this->redisPool->getConnection( $this->lockServers[$lockSrv] ); + } + + /** + * @param $path string + * @return string + */ + protected function recordKeyForPath( $path ) { + return implode( ':', array( __CLASS__, 'locks', $this->sha1Base36Absolute( $path ) ) ); + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + while ( count( $this->locksHeld ) ) { + foreach ( $this->locksHeld as $path => $locks ) { + $this->doUnlock( array( $path ), self::LOCK_EX ); + $this->doUnlock( array( $path ), self::LOCK_SH ); + } + } + } +} diff --git a/includes/filebackend/lockmanager/ScopedLock.php b/includes/filebackend/lockmanager/ScopedLock.php new file mode 100644 index 00000000..5faad4a6 --- /dev/null +++ b/includes/filebackend/lockmanager/ScopedLock.php @@ -0,0 +1,104 @@ +<?php +/** + * Resource locking handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup LockManager + * @author Aaron Schulz + */ + +/** + * Self-releasing locks + * + * LockManager helper class to handle scoped locks, which + * release when an object is destroyed or goes out of scope. + * + * @ingroup LockManager + * @since 1.19 + */ +class ScopedLock { + /** @var LockManager */ + protected $manager; + /** @var Status */ + protected $status; + /** @var Array Map of lock types to resource paths */ + protected $pathsByType; + + /** + * @param LockManager $manager + * @param array $pathsByType Map of lock types to path lists + * @param Status $status + */ + protected function __construct( LockManager $manager, array $pathsByType, Status $status ) { + $this->manager = $manager; + $this->pathsByType = $pathsByType; + $this->status = $status; + } + + /** + * Get a ScopedLock object representing a lock on resource paths. + * Any locks are released once this object goes out of scope. + * The status object is updated with any errors or warnings. + * + * $type can be "mixed" and $paths can be a map of types to paths (since 1.22). + * Otherwise $type should be an integer and $paths should be a list of paths. + * + * @param LockManager $manager + * @param array $paths List of storage paths or map of lock types to path lists + * @param integer|string $type LockManager::LOCK_* constant or "mixed" + * @param Status $status + * @param integer $timeout Timeout in seconds (0 means non-blocking) (since 1.22) + * @return ScopedLock|null Returns null on failure + */ + public static function factory( + LockManager $manager, array $paths, $type, Status $status, $timeout = 0 + ) { + $pathsByType = is_integer( $type ) ? array( $type => $paths ) : $paths; + $lockStatus = $manager->lockByType( $pathsByType, $timeout ); + $status->merge( $lockStatus ); + if ( $lockStatus->isOK() ) { + return new self( $manager, $pathsByType, $status ); + } + return null; + } + + /** + * Release a scoped lock and set any errors in the attatched Status object. + * This is useful for early release of locks before function scope is destroyed. + * This is the same as setting the lock object to null. + * + * @param ScopedLock $lock + * @return void + * @since 1.21 + */ + public static function release( ScopedLock &$lock = null ) { + $lock = null; + } + + /** + * Release the locks when this goes out of scope + */ + function __destruct() { + $wasOk = $this->status->isOK(); + $this->status->merge( $this->manager->unlockByType( $this->pathsByType ) ); + if ( $wasOk ) { + // Make sure status is OK, despite any unlockFiles() fatals + $this->status->setResult( true, $this->status->value ); + } + } +} |