diff options
Diffstat (limited to 'includes/filerepo')
28 files changed, 7810 insertions, 1364 deletions
diff --git a/includes/filerepo/FSRepo.php b/includes/filerepo/FSRepo.php index 2610ac6e..22dbdefc 100644 --- a/includes/filerepo/FSRepo.php +++ b/includes/filerepo/FSRepo.php @@ -7,710 +7,50 @@ */ /** - * A repository for files accessible via the local filesystem. Does not support - * database access or registration. + * A repository for files accessible via the local filesystem. + * Does not support database access or registration. + * + * This is a mostly a legacy class. New uses should not be added. + * * @ingroup FileRepo + * @deprecated since 1.19 */ class FSRepo extends FileRepo { - var $directory, $deletedDir, $deletedHashLevels, $fileMode; - var $fileFactory = array( 'UnregisteredLocalFile', 'newFromTitle' ); - var $oldFileFactory = false; - var $pathDisclosureProtection = 'simple'; - - function __construct( $info ) { - parent::__construct( $info ); - - // Required settings - $this->directory = $info['directory']; - $this->url = $info['url']; - - // Optional settings - $this->hashLevels = isset( $info['hashLevels'] ) ? $info['hashLevels'] : 2; - $this->deletedHashLevels = isset( $info['deletedHashLevels'] ) ? - $info['deletedHashLevels'] : $this->hashLevels; - $this->deletedDir = isset( $info['deletedDir'] ) ? $info['deletedDir'] : false; - $this->fileMode = isset( $info['fileMode'] ) ? $info['fileMode'] : 0644; - if ( isset( $info['thumbDir'] ) ) { - $this->thumbDir = $info['thumbDir']; - } else { - $this->thumbDir = "{$this->directory}/thumb"; - } - if ( isset( $info['thumbUrl'] ) ) { - $this->thumbUrl = $info['thumbUrl']; - } else { - $this->thumbUrl = "{$this->url}/thumb"; - } - } - - /** - * Get the public root directory of the repository. - */ - function getRootDirectory() { - return $this->directory; - } - - /** - * Get the public root URL of the repository - */ - function getRootUrl() { - return $this->url; - } - - /** - * Returns true if the repository uses a multi-level directory structure - */ - function isHashed() { - return (bool)$this->hashLevels; - } - - /** - * Get the local directory corresponding to one of the three basic zones - * - * @param $zone string - * - * @return string - */ - function getZonePath( $zone ) { - switch ( $zone ) { - case 'public': - return $this->directory; - case 'temp': - return "{$this->directory}/temp"; - case 'deleted': - return $this->deletedDir; - case 'thumb': - return $this->thumbDir; - default: - return false; - } - } - - /** - * @see FileRepo::getZoneUrl() - * - * @param $zone string - * - * @return url - */ - function getZoneUrl( $zone ) { - switch ( $zone ) { - case 'public': - return $this->url; - case 'temp': - return "{$this->url}/temp"; - case 'deleted': - return parent::getZoneUrl( $zone ); // no public URL - case 'thumb': - return $this->thumbUrl; - default: - return parent::getZoneUrl( $zone ); - } - } - - /** - * Get a URL referring to this repository, with the private mwrepo protocol. - * The suffix, if supplied, is considered to be unencoded, and will be - * URL-encoded before being returned. - * - * @param $suffix string - * - * @return string - */ - function getVirtualUrl( $suffix = false ) { - $path = 'mwrepo://' . $this->name; - if ( $suffix !== false ) { - $path .= '/' . rawurlencode( $suffix ); - } - return $path; - } - - /** - * Get the local path corresponding to a virtual URL - * - * @param $url string - * - * @return string - */ - function resolveVirtualUrl( $url ) { - if ( substr( $url, 0, 9 ) != 'mwrepo://' ) { - throw new MWException( __METHOD__.': unknown protocol' ); - } - - $bits = explode( '/', substr( $url, 9 ), 3 ); - if ( count( $bits ) != 3 ) { - throw new MWException( __METHOD__.": invalid mwrepo URL: $url" ); - } - list( $repo, $zone, $rel ) = $bits; - if ( $repo !== $this->name ) { - throw new MWException( __METHOD__.": fetching from a foreign repo is not supported" ); - } - $base = $this->getZonePath( $zone ); - if ( !$base ) { - throw new MWException( __METHOD__.": invalid zone: $zone" ); - } - return $base . '/' . rawurldecode( $rel ); - } - - /** - * Store a batch of files - * - * @param $triplets Array: (src,zone,dest) triplets as per store() - * @param $flags Integer: bitwise combination of the following flags: - * self::DELETE_SOURCE Delete the source file after upload - * self::OVERWRITE Overwrite an existing destination file instead of failing - * self::OVERWRITE_SAME Overwrite the file if the destination exists and has the - * same contents as the source - */ - function storeBatch( $triplets, $flags = 0 ) { - wfDebug( __METHOD__ . ': Storing ' . count( $triplets ) . - " triplets; flags: {$flags}\n" ); - - // Try creating directories - if ( !wfMkdirParents( $this->directory ) ) { - return $this->newFatal( 'upload_directory_missing', $this->directory ); - } - if ( !is_writable( $this->directory ) ) { - return $this->newFatal( 'upload_directory_read_only', $this->directory ); - } - - // Validate each triplet - $status = $this->newGood(); - foreach ( $triplets as $i => $triplet ) { - list( $srcPath, $dstZone, $dstRel ) = $triplet; - - // Resolve destination path - $root = $this->getZonePath( $dstZone ); - if ( !$root ) { - throw new MWException( "Invalid zone: $dstZone" ); - } - if ( !$this->validateFilename( $dstRel ) ) { - throw new MWException( 'Validation error in $dstRel' ); - } - $dstPath = "$root/$dstRel"; - $dstDir = dirname( $dstPath ); - - // Create destination directories for this triplet - if ( !is_dir( $dstDir ) ) { - if ( !wfMkdirParents( $dstDir ) ) { - return $this->newFatal( 'directorycreateerror', $dstDir ); - } - if ( $dstZone == 'deleted' ) { - $this->initDeletedDir( $dstDir ); - } - } - - // Resolve source - if ( self::isVirtualUrl( $srcPath ) ) { - $srcPath = $triplets[$i][0] = $this->resolveVirtualUrl( $srcPath ); - } - if ( !is_file( $srcPath ) ) { - // Make a list of files that don't exist for return to the caller - $status->fatal( 'filenotfound', $srcPath ); - continue; - } - - // Check overwriting - if ( !( $flags & self::OVERWRITE ) && file_exists( $dstPath ) ) { - if ( $flags & self::OVERWRITE_SAME ) { - $hashSource = sha1_file( $srcPath ); - $hashDest = sha1_file( $dstPath ); - if ( $hashSource != $hashDest ) { - $status->fatal( 'fileexistserror', $dstPath ); - } - } else { - $status->fatal( 'fileexistserror', $dstPath ); - } - } - } - - // Windows does not support moving over existing files, so explicitly delete them - $deleteDest = wfIsWindows() && ( $flags & self::OVERWRITE ); - - // Abort now on failure - if ( !$status->ok ) { - return $status; - } - - // Execute the store operation for each triplet - foreach ( $triplets as $i => $triplet ) { - list( $srcPath, $dstZone, $dstRel ) = $triplet; - $root = $this->getZonePath( $dstZone ); - $dstPath = "$root/$dstRel"; - $good = true; - - if ( $flags & self::DELETE_SOURCE ) { - if ( $deleteDest ) { - unlink( $dstPath ); - } - if ( !rename( $srcPath, $dstPath ) ) { - $status->error( 'filerenameerror', $srcPath, $dstPath ); - $good = false; - } - } else { - if ( !copy( $srcPath, $dstPath ) ) { - $status->error( 'filecopyerror', $srcPath, $dstPath ); - $good = false; - } - if ( !( $flags & self::SKIP_VALIDATION ) ) { - wfSuppressWarnings(); - $hashSource = sha1_file( $srcPath ); - $hashDest = sha1_file( $dstPath ); - wfRestoreWarnings(); - - if ( $hashDest === false || $hashSource !== $hashDest ) { - wfDebug( __METHOD__ . ': File copy validation failed: ' . - "$srcPath ($hashSource) to $dstPath ($hashDest)\n" ); - - $status->error( 'filecopyerror', $srcPath, $dstPath ); - $good = false; - } - } - } - if ( $good ) { - $this->chmod( $dstPath ); - $status->successCount++; - } else { - $status->failCount++; - } - $status->success[$i] = $good; - } - return $status; - } - - /** - * Deletes a batch of files. Each file can be a (zone, rel) pairs, a - * virtual url or a real path. It will try to delete each file, but - * ignores any errors that may occur - * - * @param $pairs array List of files to delete - */ - function cleanupBatch( $files ) { - foreach ( $files as $file ) { - if ( is_array( $file ) ) { - // This is a pair, extract it - list( $zone, $rel ) = $file; - $root = $this->getZonePath( $zone ); - $path = "$root/$rel"; - } else { - if ( self::isVirtualUrl( $file ) ) { - // This is a virtual url, resolve it - $path = $this->resolveVirtualUrl( $file ); - } else { - // This is a full file name - $path = $file; - } - } - - wfSuppressWarnings(); - unlink( $path ); - wfRestoreWarnings(); - } - } - - function append( $srcPath, $toAppendPath, $flags = 0 ) { - $status = $this->newGood(); - - // Resolve the virtual URL - if ( self::isVirtualUrl( $toAppendPath ) ) { - $toAppendPath = $this->resolveVirtualUrl( $toAppendPath ); - } - // Make sure the files are there - if ( !is_file( $toAppendPath ) ) - $status->fatal( 'filenotfound', $toAppendPath ); - - if ( !is_file( $srcPath ) ) - $status->fatal( 'filenotfound', $srcPath ); - - if ( !$status->isOk() ) return $status; - - // Do the append - $chunk = file_get_contents( $srcPath ); - if( $chunk === false ) { - $status->fatal( 'fileappenderrorread', $srcPath ); - } - - if( $status->isOk() ) { - if ( file_put_contents( $toAppendPath, $chunk, FILE_APPEND ) ) { - $status->value = $toAppendPath; - } else { - $status->fatal( 'fileappenderror', $srcPath, $toAppendPath); - } - } - - if ( $flags & self::DELETE_SOURCE ) { - unlink( $srcPath ); + function __construct( array $info ) { + if ( !isset( $info['backend'] ) ) { + // B/C settings... + $directory = $info['directory']; + $deletedDir = isset( $info['deletedDir'] ) + ? $info['deletedDir'] + : false; + $thumbDir = isset( $info['thumbDir'] ) + ? $info['thumbDir'] + : "{$directory}/thumb"; + $fileMode = isset( $info['fileMode'] ) + ? $info['fileMode'] + : 0644; + + $repoName = $info['name']; + // Get the FS backend configuration + $backend = new FSFileBackend( array( + 'name' => $info['name'] . '-backend', + 'lockManager' => 'fsLockManager', + 'containerPaths' => array( + "{$repoName}-public" => "{$directory}", + "{$repoName}-temp" => "{$directory}/temp", + "{$repoName}-thumb" => $thumbDir, + "{$repoName}-deleted" => $deletedDir + ), + 'fileMode' => $fileMode, + ) ); + // Update repo config to use this backend + $info['backend'] = $backend; } - return $status; - } - - /* We can actually append to the files, so no-op needed here. */ - function appendFinish( $toAppendPath ) {} - - /** - * Checks existence of specified array of files. - * - * @param $files Array: URLs of files to check - * @param $flags Integer: bitwise combination of the following flags: - * self::FILES_ONLY Mark file as existing only if it is a file (not directory) - * @return Either array of files and existence flags, or false - */ - function fileExistsBatch( $files, $flags = 0 ) { - if ( !file_exists( $this->directory ) || !is_readable( $this->directory ) ) { - return false; - } - $result = array(); - foreach ( $files as $key => $file ) { - if ( self::isVirtualUrl( $file ) ) { - $file = $this->resolveVirtualUrl( $file ); - } - if( $flags & self::FILES_ONLY ) { - $result[$key] = is_file( $file ); - } else { - $result[$key] = file_exists( $file ); - } - } - - return $result; - } - - /** - * Take all available measures to prevent web accessibility of new deleted - * directories, in case the user has not configured offline storage - */ - protected function initDeletedDir( $dir ) { - // Add a .htaccess file to the root of the deleted zone - $root = $this->getZonePath( 'deleted' ); - if ( !file_exists( "$root/.htaccess" ) ) { - file_put_contents( "$root/.htaccess", "Deny from all\n" ); - } - // Seed new directories with a blank index.html, to prevent crawling - file_put_contents( "$dir/index.html", '' ); - } - - /** - * Pick a random name in the temp zone and store a file to it. - * @param $originalName String: the base name of the file as specified - * by the user. The file extension will be maintained. - * @param $srcPath String: the current location of the file. - * @return FileRepoStatus object with the URL in the value. - */ - function storeTemp( $originalName, $srcPath ) { - $date = gmdate( "YmdHis" ); - $hashPath = $this->getHashPath( $originalName ); - $dstRel = "$hashPath$date!$originalName"; - $dstUrlRel = $hashPath . $date . '!' . rawurlencode( $originalName ); - - $result = $this->store( $srcPath, 'temp', $dstRel ); - $result->value = $this->getVirtualUrl( 'temp' ) . '/' . $dstUrlRel; - return $result; - } - - /** - * Remove a temporary file or mark it for garbage collection - * @param $virtualUrl String: the virtual URL returned by storeTemp - * @return Boolean: true on success, false on failure - */ - function freeTemp( $virtualUrl ) { - $temp = "mwrepo://{$this->name}/temp"; - if ( substr( $virtualUrl, 0, strlen( $temp ) ) != $temp ) { - wfDebug( __METHOD__.": Invalid virtual URL\n" ); - return false; - } - $path = $this->resolveVirtualUrl( $virtualUrl ); - wfSuppressWarnings(); - $success = unlink( $path ); - wfRestoreWarnings(); - return $success; - } - - /** - * Publish a batch of files - * @param $triplets Array: (source,dest,archive) triplets as per publish() - * @param $flags Integer: bitfield, may be FileRepo::DELETE_SOURCE to indicate - * that the source files should be deleted if possible - */ - function publishBatch( $triplets, $flags = 0 ) { - // Perform initial checks - if ( !wfMkdirParents( $this->directory ) ) { - return $this->newFatal( 'upload_directory_missing', $this->directory ); - } - if ( !is_writable( $this->directory ) ) { - return $this->newFatal( 'upload_directory_read_only', $this->directory ); - } - $status = $this->newGood( array() ); - foreach ( $triplets as $i => $triplet ) { - list( $srcPath, $dstRel, $archiveRel ) = $triplet; - - if ( substr( $srcPath, 0, 9 ) == 'mwrepo://' ) { - $triplets[$i][0] = $srcPath = $this->resolveVirtualUrl( $srcPath ); - } - if ( !$this->validateFilename( $dstRel ) ) { - throw new MWException( 'Validation error in $dstRel' ); - } - if ( !$this->validateFilename( $archiveRel ) ) { - throw new MWException( 'Validation error in $archiveRel' ); - } - $dstPath = "{$this->directory}/$dstRel"; - $archivePath = "{$this->directory}/$archiveRel"; - - $dstDir = dirname( $dstPath ); - $archiveDir = dirname( $archivePath ); - // Abort immediately on directory creation errors since they're likely to be repetitive - if ( !is_dir( $dstDir ) && !wfMkdirParents( $dstDir ) ) { - return $this->newFatal( 'directorycreateerror', $dstDir ); - } - if ( !is_dir( $archiveDir ) && !wfMkdirParents( $archiveDir ) ) { - return $this->newFatal( 'directorycreateerror', $archiveDir ); - } - if ( !is_file( $srcPath ) ) { - // Make a list of files that don't exist for return to the caller - $status->fatal( 'filenotfound', $srcPath ); - } - } - - if ( !$status->ok ) { - return $status; - } - - foreach ( $triplets as $i => $triplet ) { - list( $srcPath, $dstRel, $archiveRel ) = $triplet; - $dstPath = "{$this->directory}/$dstRel"; - $archivePath = "{$this->directory}/$archiveRel"; - - // Archive destination file if it exists - if( is_file( $dstPath ) ) { - // Check if the archive file exists - // This is a sanity check to avoid data loss. In UNIX, the rename primitive - // unlinks the destination file if it exists. DB-based synchronisation in - // publishBatch's caller should prevent races. In Windows there's no - // problem because the rename primitive fails if the destination exists. - if ( is_file( $archivePath ) ) { - $success = false; - } else { - wfSuppressWarnings(); - $success = rename( $dstPath, $archivePath ); - wfRestoreWarnings(); - } - - if( !$success ) { - $status->error( 'filerenameerror',$dstPath, $archivePath ); - $status->failCount++; - continue; - } else { - wfDebug(__METHOD__.": moved file $dstPath to $archivePath\n"); - } - $status->value[$i] = 'archived'; - } else { - $status->value[$i] = 'new'; - } - - $good = true; - wfSuppressWarnings(); - if ( $flags & self::DELETE_SOURCE ) { - if ( !rename( $srcPath, $dstPath ) ) { - $status->error( 'filerenameerror', $srcPath, $dstPath ); - $good = false; - } - } else { - if ( !copy( $srcPath, $dstPath ) ) { - $status->error( 'filecopyerror', $srcPath, $dstPath ); - $good = false; - } - } - wfRestoreWarnings(); - - if ( $good ) { - $status->successCount++; - wfDebug(__METHOD__.": wrote tempfile $srcPath to $dstPath\n"); - // Thread-safe override for umask - $this->chmod( $dstPath ); - } else { - $status->failCount++; - } - } - return $status; - } - - /** - * Move a group of files to the deletion archive. - * If no valid deletion archive is configured, this may either delete the - * file or throw an exception, depending on the preference of the repository. - * - * @param $sourceDestPairs Array of source/destination pairs. Each element - * is a two-element array containing the source file path relative to the - * public root in the first element, and the archive file path relative - * to the deleted zone root in the second element. - * @return FileRepoStatus - */ - function deleteBatch( $sourceDestPairs ) { - $status = $this->newGood(); - if ( !$this->deletedDir ) { - throw new MWException( __METHOD__.': no valid deletion archive directory' ); - } - - /** - * Validate filenames and create archive directories - */ - foreach ( $sourceDestPairs as $pair ) { - list( $srcRel, $archiveRel ) = $pair; - if ( !$this->validateFilename( $srcRel ) ) { - throw new MWException( __METHOD__.':Validation error in $srcRel' ); - } - if ( !$this->validateFilename( $archiveRel ) ) { - throw new MWException( __METHOD__.':Validation error in $archiveRel' ); - } - $archivePath = "{$this->deletedDir}/$archiveRel"; - $archiveDir = dirname( $archivePath ); - if ( !is_dir( $archiveDir ) ) { - if ( !wfMkdirParents( $archiveDir ) ) { - $status->fatal( 'directorycreateerror', $archiveDir ); - continue; - } - $this->initDeletedDir( $archiveDir ); - } - // Check if the archive directory is writable - // This doesn't appear to work on NTFS - if ( !is_writable( $archiveDir ) ) { - $status->fatal( 'filedelete-archive-read-only', $archiveDir ); - } - } - if ( !$status->ok ) { - // Abort early - return $status; - } - - /** - * Move the files - * We're now committed to returning an OK result, which will lead to - * the files being moved in the DB also. - */ - foreach ( $sourceDestPairs as $pair ) { - list( $srcRel, $archiveRel ) = $pair; - $srcPath = "{$this->directory}/$srcRel"; - $archivePath = "{$this->deletedDir}/$archiveRel"; - $good = true; - if ( file_exists( $archivePath ) ) { - # A file with this content hash is already archived - wfSuppressWarnings(); - $good = unlink( $srcPath ); - wfRestoreWarnings(); - if ( !$good ) { - $status->error( 'filedeleteerror', $srcPath ); - } - } else{ - wfSuppressWarnings(); - $good = rename( $srcPath, $archivePath ); - wfRestoreWarnings(); - if ( !$good ) { - $status->error( 'filerenameerror', $srcPath, $archivePath ); - } else { - $this->chmod( $archivePath ); - } - } - if ( $good ) { - $status->successCount++; - } else { - $status->failCount++; - } - } - return $status; - } - - /** - * Get a relative path for a deletion archive key, - * e.g. s/z/a/ for sza251lrxrc1jad41h5mgilp8nysje52.jpg - */ - function getDeletedHashPath( $key ) { - $path = ''; - for ( $i = 0; $i < $this->deletedHashLevels; $i++ ) { - $path .= $key[$i] . '/'; - } - return $path; - } - - /** - * Call a callback function for every file in the repository. - * Uses the filesystem even in child classes. - */ - function enumFilesInFS( $callback ) { - $numDirs = 1 << ( $this->hashLevels * 4 ); - for ( $flatIndex = 0; $flatIndex < $numDirs; $flatIndex++ ) { - $hexString = sprintf( "%0{$this->hashLevels}x", $flatIndex ); - $path = $this->directory; - for ( $hexPos = 0; $hexPos < $this->hashLevels; $hexPos++ ) { - $path .= '/' . substr( $hexString, 0, $hexPos + 1 ); - } - if ( !file_exists( $path ) || !is_dir( $path ) ) { - continue; - } - $dir = opendir( $path ); - if ($dir) { - while ( false !== ( $name = readdir( $dir ) ) ) { - call_user_func( $callback, $path . '/' . $name ); - } - closedir( $dir ); - } - } - } - - /** - * Call a callback function for every file in the repository - * May use either the database or the filesystem - */ - function enumFiles( $callback ) { - $this->enumFilesInFS( $callback ); - } - - /** - * Get properties of a file with a given virtual URL - * The virtual URL must refer to this repo - */ - function getFileProps( $virtualUrl ) { - $path = $this->resolveVirtualUrl( $virtualUrl ); - return File::getPropsFromPath( $path ); - } - - /** - * Path disclosure protection functions - * - * Get a callback function to use for cleaning error message parameters - */ - function getErrorCleanupFunction() { - switch ( $this->pathDisclosureProtection ) { - case 'simple': - $callback = array( $this, 'simpleClean' ); - break; - default: - $callback = parent::getErrorCleanupFunction(); - } - return $callback; - } + parent::__construct( $info ); - function simpleClean( $param ) { - if ( !isset( $this->simpleCleanPairs ) ) { - global $IP; - $this->simpleCleanPairs = array( - $this->directory => 'public', - "{$this->directory}/temp" => 'temp', - $IP => '$IP', - dirname( __FILE__ ) => '$IP/extensions/WebStore', - ); - if ( $this->deletedDir ) { - $this->simpleCleanPairs[$this->deletedDir] = 'deleted'; - } + if ( !( $this->backend instanceof FSFileBackend ) ) { + throw new MWException( "FSRepo only supports FSFileBackend." ); } - return strtr( $param, $this->simpleCleanPairs ); } - - /** - * Chmod a file, supressing the warnings. - * @param $path String: the path to change - */ - protected function chmod( $path ) { - wfSuppressWarnings(); - chmod( $path, $this->fileMode ); - wfRestoreWarnings(); - } - } diff --git a/includes/filerepo/FileRepo.php b/includes/filerepo/FileRepo.php index 843f09a9..8d4f2bd9 100644 --- a/includes/filerepo/FileRepo.php +++ b/includes/filerepo/FileRepo.php @@ -1,5 +1,13 @@ <?php /** + * @defgroup FileRepo File Repository + * + * @brief This module handles how MediaWiki interacts with filesystems. + * + * @details + */ + +/** * Base code for file repositories. * * @file @@ -7,61 +15,244 @@ */ /** - * Base class for file repositories. - * Do not instantiate, use a derived class. + * Base class for file repositories * * @ingroup FileRepo */ -abstract class FileRepo { +class FileRepo { const FILES_ONLY = 1; + const DELETE_SOURCE = 1; const OVERWRITE = 2; const OVERWRITE_SAME = 4; - const SKIP_VALIDATION = 8; + const SKIP_LOCKING = 8; + + /** @var FileBackend */ + protected $backend; + /** @var Array Map of zones to config */ + protected $zones = array(); var $thumbScriptUrl, $transformVia404; var $descBaseUrl, $scriptDirUrl, $scriptExtension, $articleUrl; var $fetchDescription, $initialCapital; - var $pathDisclosureProtection = 'paranoid'; - var $descriptionCacheExpiry, $hashLevels, $url, $thumbUrl; + var $pathDisclosureProtection = 'simple'; // 'paranoid' + var $descriptionCacheExpiry, $url, $thumbUrl; + var $hashLevels, $deletedHashLevels; /** * Factory functions for creating new files * Override these in the base class */ - var $fileFactory = false, $oldFileFactory = false; + var $fileFactory = array( 'UnregisteredLocalFile', 'newFromTitle' ); + var $oldFileFactory = false; var $fileFactoryKey = false, $oldFileFactoryKey = false; - function __construct( $info ) { + function __construct( Array $info = null ) { + // Verify required settings presence + if( + $info === null + || !array_key_exists( 'name', $info ) + || !array_key_exists( 'backend', $info ) + ) { + throw new MWException( __CLASS__ . " requires an array of options having both 'name' and 'backend' keys.\n" ); + } + // Required settings $this->name = $info['name']; + if ( $info['backend'] instanceof FileBackend ) { + $this->backend = $info['backend']; // useful for testing + } else { + $this->backend = FileBackendGroup::singleton()->get( $info['backend'] ); + } - // Optional settings - $this->initialCapital = MWNamespace::isCapitalized( NS_FILE ); - foreach ( array( 'descBaseUrl', 'scriptDirUrl', 'articleUrl', 'fetchDescription', - 'thumbScriptUrl', 'initialCapital', 'pathDisclosureProtection', - 'descriptionCacheExpiry', 'hashLevels', 'url', 'thumbUrl', 'scriptExtension' ) - as $var ) - { + // Optional settings that can have no value + $optionalSettings = array( + 'descBaseUrl', 'scriptDirUrl', 'articleUrl', 'fetchDescription', + 'thumbScriptUrl', 'pathDisclosureProtection', 'descriptionCacheExpiry', + 'scriptExtension' + ); + foreach ( $optionalSettings as $var ) { if ( isset( $info[$var] ) ) { $this->$var = $info[$var]; } } + + // Optional settings that have a default + $this->initialCapital = isset( $info['initialCapital'] ) + ? $info['initialCapital'] + : MWNamespace::isCapitalized( NS_FILE ); + $this->url = isset( $info['url'] ) + ? $info['url'] + : false; // a subclass may set the URL (e.g. ForeignAPIRepo) + if ( isset( $info['thumbUrl'] ) ) { + $this->thumbUrl = $info['thumbUrl']; + } else { + $this->thumbUrl = $this->url ? "{$this->url}/thumb" : false; + } + $this->hashLevels = isset( $info['hashLevels'] ) + ? $info['hashLevels'] + : 2; + $this->deletedHashLevels = isset( $info['deletedHashLevels'] ) + ? $info['deletedHashLevels'] + : $this->hashLevels; $this->transformVia404 = !empty( $info['transformVia404'] ); + $this->zones = isset( $info['zones'] ) + ? $info['zones'] + : array(); + // Give defaults for the basic zones... + foreach ( array( 'public', 'thumb', 'temp', 'deleted' ) as $zone ) { + if ( !isset( $this->zones[$zone] ) ) { + $this->zones[$zone] = array( + 'container' => "{$this->name}-{$zone}", + 'directory' => '' // container root + ); + } + } + } + + /** + * Get the file backend instance + * + * @return FileBackend + */ + public function getBackend() { + return $this->backend; + } + + /** + * Prepare a single zone or list of zones for usage. + * See initDeletedDir() for additional setup needed for the 'deleted' zone. + * + * @param $doZones Array Only do a particular zones + * @return Status + */ + protected function initZones( $doZones = array() ) { + $status = $this->newGood(); + foreach ( (array)$doZones as $zone ) { + $root = $this->getZonePath( $zone ); + if ( $root === null ) { + throw new MWException( "No '$zone' zone defined in the {$this->name} repo." ); + } + } + return $status; + } + + /** + * Take all available measures to prevent web accessibility of new deleted + * directories, in case the user has not configured offline storage + * + * @param $dir string + * @return void + */ + protected function initDeletedDir( $dir ) { + $this->backend->secure( // prevent web access & dir listings + array( 'dir' => $dir, 'noAccess' => true, 'noListing' => true ) ); } /** * Determine if a string is an mwrepo:// URL * * @param $url string - * * @return bool */ - static function isVirtualUrl( $url ) { + public static function isVirtualUrl( $url ) { return substr( $url, 0, 9 ) == 'mwrepo://'; } /** + * Get a URL referring to this repository, with the private mwrepo protocol. + * The suffix, if supplied, is considered to be unencoded, and will be + * URL-encoded before being returned. + * + * @param $suffix string + * @return string + */ + public function getVirtualUrl( $suffix = false ) { + $path = 'mwrepo://' . $this->name; + if ( $suffix !== false ) { + $path .= '/' . rawurlencode( $suffix ); + } + return $path; + } + + /** + * Get the URL corresponding to one of the four basic zones + * + * @param $zone String: one of: public, deleted, temp, thumb + * @return String or false + */ + public function getZoneUrl( $zone ) { + switch ( $zone ) { + case 'public': + return $this->url; + case 'temp': + return "{$this->url}/temp"; + case 'deleted': + return false; // no public URL + case 'thumb': + return $this->thumbUrl; + default: + return false; + } + } + + /** + * Get the backend storage path corresponding to a virtual URL + * + * @param $url string + * @return string + */ + function resolveVirtualUrl( $url ) { + if ( substr( $url, 0, 9 ) != 'mwrepo://' ) { + throw new MWException( __METHOD__.': unknown protocol' ); + } + $bits = explode( '/', substr( $url, 9 ), 3 ); + if ( count( $bits ) != 3 ) { + throw new MWException( __METHOD__.": invalid mwrepo URL: $url" ); + } + list( $repo, $zone, $rel ) = $bits; + if ( $repo !== $this->name ) { + throw new MWException( __METHOD__.": fetching from a foreign repo is not supported" ); + } + $base = $this->getZonePath( $zone ); + if ( !$base ) { + throw new MWException( __METHOD__.": invalid zone: $zone" ); + } + return $base . '/' . rawurldecode( $rel ); + } + + /** + * The the storage container and base path of a zone + * + * @param $zone string + * @return Array (container, base path) or (null, null) + */ + protected function getZoneLocation( $zone ) { + if ( !isset( $this->zones[$zone] ) ) { + return array( null, null ); // bogus + } + return array( $this->zones[$zone]['container'], $this->zones[$zone]['directory'] ); + } + + /** + * Get the storage path corresponding to one of the zones + * + * @param $zone string + * @return string|null + */ + public function getZonePath( $zone ) { + list( $container, $base ) = $this->getZoneLocation( $zone ); + if ( $container === null || $base === null ) { + return null; + } + $backendName = $this->backend->getName(); + if ( $base != '' ) { // may not be set + $base = "/{$base}"; + } + return "mwstore://$backendName/{$container}{$base}"; + } + + /** * Create a new File object from the local repository * * @param $title Mixed: Title object or string @@ -70,15 +261,12 @@ abstract class FileRepo { * instance of the repository's old file class instead of a * current file. Repositories not supporting version control * should return false if this parameter is set. - * - * @return File + * @return File|null A File, or null if passed an invalid Title */ - function newFile( $title, $time = false ) { - if ( !( $title instanceof Title ) ) { - $title = Title::makeTitleSafe( NS_FILE, $title ); - if ( !is_object( $title ) ) { - return null; - } + public function newFile( $title, $time = false ) { + $title = File::normalizeTitle( $title ); + if ( !$title ) { + return null; } if ( $time ) { if ( $this->oldFileFactory ) { @@ -107,17 +295,14 @@ abstract class FileRepo { * private: If true, return restricted (deleted) files if the current * user is allowed to view them. Otherwise, such files will not * be found. - * * @return File|false */ - function findFile( $title, $options = array() ) { - $time = isset( $options['time'] ) ? $options['time'] : false; - if ( !($title instanceof Title) ) { - $title = Title::makeTitleSafe( NS_FILE, $title ); - if ( !is_object( $title ) ) { - return false; - } + public function findFile( $title, $options = array() ) { + $title = File::normalizeTitle( $title ); + if ( !$title ) { + return false; } + $time = isset( $options['time'] ) ? $options['time'] : false; # First try the current version of the file to see if it precedes the timestamp $img = $this->newFile( $title ); if ( !$img ) { @@ -143,12 +328,12 @@ abstract class FileRepo { return false; } $redir = $this->checkRedirect( $title ); - if( $redir && $title->getNamespace() == NS_FILE) { + if ( $redir && $title->getNamespace() == NS_FILE) { $img = $this->newFile( $redir ); - if( !$img ) { + if ( !$img ) { return false; } - if( $img->exists() ) { + if ( $img->exists() ) { $img->redirectedFrom( $title->getDBkey() ); return $img; } @@ -158,14 +343,16 @@ abstract class FileRepo { /** * Find many files at once. + * * @param $items An array of titles, or an array of findFile() options with * the "title" option giving the title. Example: * * $findItem = array( 'title' => $title, 'private' => true ); * $findBatch = array( $findItem ); * $repo->findFiles( $findBatch ); + * @return array */ - function findFiles( $items ) { + public function findFiles( $items ) { $result = array(); foreach ( $items as $item ) { if ( is_array( $item ) ) { @@ -191,8 +378,9 @@ abstract class FileRepo { * * @param $sha1 String base 36 SHA-1 hash * @param $options Option array, same as findFile(). + * @return File|false */ - function findFileFromKey( $sha1, $options = array() ) { + public function findFileFromKey( $sha1, $options = array() ) { $time = isset( $options['time'] ) ? $options['time'] : false; # First try to find a matching current version of a file... @@ -219,35 +407,59 @@ abstract class FileRepo { } /** - * Get the URL of thumb.php + * Get an array or iterator of file objects for files that have a given + * SHA-1 content hash. + * + * STUB */ - function getThumbScriptUrl() { - return $this->thumbScriptUrl; + public function findBySha1( $hash ) { + return array(); } /** - * Get the URL corresponding to one of the four basic zones - * @param $zone String: one of: public, deleted, temp, thumb - * @return String or false + * Get the public root URL of the repository + * + * @return string|false */ - function getZoneUrl( $zone ) { - return false; + public function getRootUrl() { + return $this->url; + } + + /** + * Returns true if the repository uses a multi-level directory structure + * + * @return string + */ + public function isHashed() { + return (bool)$this->hashLevels; + } + + /** + * Get the URL of thumb.php + * + * @return string + */ + public function getThumbScriptUrl() { + return $this->thumbScriptUrl; } /** * Returns true if the repository can transform files via a 404 handler + * + * @return bool */ - function canTransformVia404() { + public function canTransformVia404() { return $this->transformVia404; } /** * Get the name of an image from its title object + * * @param $title Title */ - function getNameFromTitle( $title ) { + public function getNameFromTitle( Title $title ) { + global $wgContLang; if ( $this->initialCapital != MWNamespace::isCapitalized( NS_FILE ) ) { - global $wgContLang; $name = $title->getUserCaseDBKey(); if ( $this->initialCapital ) { $name = $wgContLang->ucfirst( $name ); @@ -258,6 +470,31 @@ abstract class FileRepo { return $name; } + /** + * Get the public zone root storage directory of the repository + * + * @return string + */ + public function getRootDirectory() { + return $this->getZonePath( 'public' ); + } + + /** + * Get a relative path including trailing slash, e.g. f/fa/ + * If the repo is not hashed, returns an empty string + * + * @param $name string + * @return string + */ + public function getHashPath( $name ) { + return self::getHashPathForLevel( $name, $this->hashLevels ); + } + + /** + * @param $name + * @param $levels + * @return string + */ static function getHashPathForLevel( $name, $levels ) { if ( $levels == 0 ) { return ''; @@ -272,17 +509,20 @@ abstract class FileRepo { } /** - * Get a relative path including trailing slash, e.g. f/fa/ - * If the repo is not hashed, returns an empty string + * Get the number of hash directory levels + * + * @return integer */ - function getHashPath( $name ) { - return self::getHashPathForLevel( $name, $this->hashLevels ); + public function getHashLevels() { + return $this->hashLevels; } /** * Get the name of this repository, as specified by $info['name]' to the constructor + * + * @return string */ - function getName() { + public function getName() { return $this->name; } @@ -291,11 +531,14 @@ abstract class FileRepo { * * @param $query mixed Query string to append * @param $entry string Entry point; defaults to index - * @return string + * @return string|false */ - function makeUrl( $query = '', $entry = 'index' ) { - $ext = isset( $this->scriptExtension ) ? $this->scriptExtension : '.php'; - return wfAppendQuery( "{$this->scriptDirUrl}/{$entry}{$ext}", $query ); + public function makeUrl( $query = '', $entry = 'index' ) { + if ( isset( $this->scriptDirUrl ) ) { + $ext = isset( $this->scriptExtension ) ? $this->scriptExtension : '.php'; + return wfAppendQuery( "{$this->scriptDirUrl}/{$entry}{$ext}", $query ); + } + return false; } /** @@ -306,8 +549,11 @@ abstract class FileRepo { * * In particular, it uses the article paths as specified to the repository * constructor, whereas local repositories use the local Title functions. + * + * @param $name string + * @return string */ - function getDescriptionUrl( $name ) { + public function getDescriptionUrl( $name ) { $encName = wfUrlencode( $name ); if ( !is_null( $this->descBaseUrl ) ) { # "http://example.com/wiki/Image:" @@ -337,10 +583,12 @@ abstract class FileRepo { * MediaWiki this means action=render. This should only be called by the * repository's file class, since it may return invalid results. User code * should use File::getDescriptionText(). + * * @param $name String: name of image to fetch * @param $lang String: language to fetch it in, if any. + * @return string */ - function getDescriptionRenderUrl( $name, $lang = null ) { + public function getDescriptionRenderUrl( $name, $lang = null ) { $query = 'action=render'; if ( !is_null( $lang ) ) { $query .= '&uselang=' . $lang; @@ -362,19 +610,21 @@ abstract class FileRepo { /** * Get the URL of the stylesheet to apply to description pages - * @return string + * + * @return string|false */ - function getDescriptionStylesheetUrl() { - if ( $this->scriptDirUrl ) { + public function getDescriptionStylesheetUrl() { + if ( isset( $this->scriptDirUrl ) ) { return $this->makeUrl( 'title=MediaWiki:Filepage.css&' . wfArrayToCGI( Skin::getDynamicStylesheetQuery() ) ); } + return false; } /** * Store a file to a given destination. * - * @param $srcPath String: source path or virtual URL + * @param $srcPath String: source FS path, storage path, or virtual URL * @param $dstZone String: destination zone * @param $dstRel String: destination relative path * @param $flags Integer: bitwise combination of the following flags: @@ -382,9 +632,10 @@ abstract class FileRepo { * self::OVERWRITE Overwrite an existing destination file instead of failing * self::OVERWRITE_SAME Overwrite the file if the destination exists and has the * same contents as the source + * self::SKIP_LOCKING Skip any file locking when doing the store * @return FileRepoStatus */ - function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) { + public function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) { $status = $this->storeBatch( array( array( $srcPath, $dstZone, $dstRel ) ), $flags ); if ( $status->successCount == 0 ) { $status->ok = false; @@ -395,65 +646,236 @@ abstract class FileRepo { /** * Store a batch of files * - * @param $triplets Array: (src,zone,dest) triplets as per store() - * @param $flags Integer: flags as per store + * @param $triplets Array: (src, dest zone, dest rel) triplets as per store() + * @param $flags Integer: bitwise combination of the following flags: + * self::DELETE_SOURCE Delete the source file after upload + * self::OVERWRITE Overwrite an existing destination file instead of failing + * self::OVERWRITE_SAME Overwrite the file if the destination exists and has the + * same contents as the source + * self::SKIP_LOCKING Skip any file locking when doing the store + * @return FileRepoStatus */ - abstract function storeBatch( $triplets, $flags = 0 ); + public function storeBatch( $triplets, $flags = 0 ) { + $backend = $this->backend; // convenience + + $status = $this->newGood(); + + $operations = array(); + $sourceFSFilesToDelete = array(); // cleanup for disk source files + // Validate each triplet and get the store operation... + foreach ( $triplets as $triplet ) { + list( $srcPath, $dstZone, $dstRel ) = $triplet; + wfDebug( __METHOD__ + . "( \$src='$srcPath', \$dstZone='$dstZone', \$dstRel='$dstRel' )\n" + ); + + // Resolve destination path + $root = $this->getZonePath( $dstZone ); + if ( !$root ) { + throw new MWException( "Invalid zone: $dstZone" ); + } + if ( !$this->validateFilename( $dstRel ) ) { + throw new MWException( 'Validation error in $dstRel' ); + } + $dstPath = "$root/$dstRel"; + $dstDir = dirname( $dstPath ); + // Create destination directories for this triplet + if ( !$backend->prepare( array( 'dir' => $dstDir ) )->isOK() ) { + return $this->newFatal( 'directorycreateerror', $dstDir ); + } + + if ( $dstZone == 'deleted' ) { + $this->initDeletedDir( $dstDir ); + } + + // Resolve source to a storage path if virtual + if ( self::isVirtualUrl( $srcPath ) ) { + $srcPath = $this->resolveVirtualUrl( $srcPath ); + } + + // Get the appropriate file operation + if ( FileBackend::isStoragePath( $srcPath ) ) { + $opName = ( $flags & self::DELETE_SOURCE ) ? 'move' : 'copy'; + } else { + $opName = 'store'; + if ( $flags & self::DELETE_SOURCE ) { + $sourceFSFilesToDelete[] = $srcPath; + } + } + $operations[] = array( + 'op' => $opName, + 'src' => $srcPath, + 'dst' => $dstPath, + 'overwrite' => $flags & self::OVERWRITE, + 'overwriteSame' => $flags & self::OVERWRITE_SAME, + ); + } + + // Execute the store operation for each triplet + $opts = array( 'force' => true ); + if ( $flags & self::SKIP_LOCKING ) { + $opts['nonLocking'] = true; + } + $status->merge( $backend->doOperations( $operations, $opts ) ); + // Cleanup for disk source files... + foreach ( $sourceFSFilesToDelete as $file ) { + wfSuppressWarnings(); + unlink( $file ); // FS cleanup + wfRestoreWarnings(); + } + + return $status; + } + + /** + * Deletes a batch of files. + * Each file can be a (zone, rel) pair, virtual url, storage path, or FS path. + * It will try to delete each file, but ignores any errors that may occur. + * + * @param $pairs array List of files to delete + * @param $flags Integer: bitwise combination of the following flags: + * self::SKIP_LOCKING Skip any file locking when doing the deletions + * @return void + */ + public function cleanupBatch( $files, $flags = 0 ) { + $operations = array(); + $sourceFSFilesToDelete = array(); // cleanup for disk source files + foreach ( $files as $file ) { + if ( is_array( $file ) ) { + // This is a pair, extract it + list( $zone, $rel ) = $file; + $root = $this->getZonePath( $zone ); + $path = "$root/$rel"; + } else { + if ( self::isVirtualUrl( $file ) ) { + // This is a virtual url, resolve it + $path = $this->resolveVirtualUrl( $file ); + } else { + // This is a full file name + $path = $file; + } + } + // Get a file operation if needed + if ( FileBackend::isStoragePath( $path ) ) { + $operations[] = array( + 'op' => 'delete', + 'src' => $path, + ); + } else { + $sourceFSFilesToDelete[] = $path; + } + } + // Actually delete files from storage... + $opts = array( 'force' => true ); + if ( $flags & self::SKIP_LOCKING ) { + $opts['nonLocking'] = true; + } + $this->backend->doOperations( $operations, $opts ); + // Cleanup for disk source files... + foreach ( $sourceFSFilesToDelete as $file ) { + wfSuppressWarnings(); + unlink( $file ); // FS cleanup + wfRestoreWarnings(); + } + } /** * Pick a random name in the temp zone and store a file to it. - * Returns a FileRepoStatus object with the URL in the value. + * Returns a FileRepoStatus object with the file Virtual URL in the value, + * file can later be disposed using FileRepo::freeTemp(). + * * * @param $originalName String: the base name of the file as specified * by the user. The file extension will be maintained. * @param $srcPath String: the current location of the file. + * @return FileRepoStatus object with the URL in the value. */ - abstract function storeTemp( $originalName, $srcPath ); + public function storeTemp( $originalName, $srcPath ) { + $date = gmdate( "YmdHis" ); + $hashPath = $this->getHashPath( $originalName ); + $dstRel = "{$hashPath}{$date}!{$originalName}"; + $dstUrlRel = $hashPath . $date . '!' . rawurlencode( $originalName ); + $result = $this->store( $srcPath, 'temp', $dstRel, self::SKIP_LOCKING ); + $result->value = $this->getVirtualUrl( 'temp' ) . '/' . $dstUrlRel; + return $result; + } /** - * Append the contents of the source path to the given file, OR queue - * the appending operation in anticipation of a later appendFinish() call. - * @param $srcPath String: location of the source file - * @param $toAppendPath String: path to append to. - * @param $flags Integer: bitfield, may be FileRepo::DELETE_SOURCE to indicate - * that the source file should be deleted if possible - * @return mixed Status or false + * Concatenate a list of files into a target file location. + * + * @param $srcPaths Array Ordered list of source virtual URLs/storage paths + * @param $dstPath String Target file system path + * @param $flags Integer: bitwise combination of the following flags: + * self::DELETE_SOURCE Delete the source files + * @return FileRepoStatus */ - abstract function append( $srcPath, $toAppendPath, $flags = 0 ); + function concatenate( $srcPaths, $dstPath, $flags = 0 ) { + $status = $this->newGood(); - /** - * Finish the append operation. - * @param $toAppendPath String: path to append to. - * @return mixed Status or false - */ - abstract function appendFinish( $toAppendPath ); + $sources = array(); + $deleteOperations = array(); // post-concatenate ops + foreach ( $srcPaths as $srcPath ) { + // Resolve source to a storage path if virtual + $source = $this->resolveToStoragePath( $srcPath ); + $sources[] = $source; // chunk to merge + if ( $flags & self::DELETE_SOURCE ) { + $deleteOperations[] = array( 'op' => 'delete', 'src' => $source ); + } + } + + // Concatenate the chunks into one FS file + $params = array( 'srcs' => $sources, 'dst' => $dstPath ); + $status->merge( $this->backend->concatenate( $params ) ); + if ( !$status->isOK() ) { + return $status; + } + + // Delete the sources if required + if ( $deleteOperations ) { + $opts = array( 'force' => true ); + $status->merge( $this->backend->doOperations( $deleteOperations, $opts ) ); + } + + // Make sure status is OK, despite any $deleteOperations fatals + $status->setResult( true ); + + return $status; + } /** * Remove a temporary file or mark it for garbage collection - * @param $virtualUrl String: the virtual URL returned by storeTemp + * + * @param $virtualUrl String: the virtual URL returned by FileRepo::storeTemp() * @return Boolean: true on success, false on failure - * STUB */ - function freeTemp( $virtualUrl ) { - return true; + public function freeTemp( $virtualUrl ) { + $temp = "mwrepo://{$this->name}/temp"; + if ( substr( $virtualUrl, 0, strlen( $temp ) ) != $temp ) { + wfDebug( __METHOD__.": Invalid temp virtual URL\n" ); + return false; + } + $path = $this->resolveVirtualUrl( $virtualUrl ); + $op = array( 'op' => 'delete', 'src' => $path ); + $status = $this->backend->doOperation( $op ); + return $status->isOK(); } /** - * Copy or move a file either from the local filesystem or from an mwrepo:// - * virtual URL, into this repository at the specified destination location. + * Copy or move a file either from a storage path, virtual URL, + * or FS path, into this repository at the specified destination location. * * Returns a FileRepoStatus object. On success, the value contains "new" or * "archived", to indicate whether the file was new with that name. * - * @param $srcPath String: the source path or URL + * @param $srcPath String: the source FS path, storage path, or URL * @param $dstRel String: the destination relative path - * @param $archiveRel String: rhe relative path where the existing file is to + * @param $archiveRel String: the relative path where the existing file is to * be archived, if there is one. Relative to the public zone root. * @param $flags Integer: bitfield, may be FileRepo::DELETE_SOURCE to indicate * that the source file should be deleted if possible */ - function publish( $srcPath, $dstRel, $archiveRel, $flags = 0 ) { + public function publish( $srcPath, $dstRel, $archiveRel, $flags = 0 ) { $status = $this->publishBatch( array( array( $srcPath, $dstRel, $archiveRel ) ), $flags ); if ( $status->successCount == 0 ) { $status->ok = false; @@ -468,13 +890,123 @@ abstract class FileRepo { /** * Publish a batch of files - * @param $triplets Array: (source,dest,archive) triplets as per publish() + * + * @param $triplets Array: (source, dest, archive) triplets as per publish() * @param $flags Integer: bitfield, may be FileRepo::DELETE_SOURCE to indicate * that the source files should be deleted if possible + * @return FileRepoStatus */ - abstract function publishBatch( $triplets, $flags = 0 ); + public function publishBatch( $triplets, $flags = 0 ) { + $backend = $this->backend; // convenience + + // Try creating directories + $status = $this->initZones( 'public' ); + if ( !$status->isOK() ) { + return $status; + } + + $status = $this->newGood( array() ); + + $operations = array(); + $sourceFSFilesToDelete = array(); // cleanup for disk source files + // Validate each triplet and get the store operation... + foreach ( $triplets as $i => $triplet ) { + list( $srcPath, $dstRel, $archiveRel ) = $triplet; + // Resolve source to a storage path if virtual + if ( substr( $srcPath, 0, 9 ) == 'mwrepo://' ) { + $srcPath = $this->resolveVirtualUrl( $srcPath ); + } + if ( !$this->validateFilename( $dstRel ) ) { + throw new MWException( 'Validation error in $dstRel' ); + } + if ( !$this->validateFilename( $archiveRel ) ) { + throw new MWException( 'Validation error in $archiveRel' ); + } + + $publicRoot = $this->getZonePath( 'public' ); + $dstPath = "$publicRoot/$dstRel"; + $archivePath = "$publicRoot/$archiveRel"; + + $dstDir = dirname( $dstPath ); + $archiveDir = dirname( $archivePath ); + // Abort immediately on directory creation errors since they're likely to be repetitive + if ( !$backend->prepare( array( 'dir' => $dstDir ) )->isOK() ) { + return $this->newFatal( 'directorycreateerror', $dstDir ); + } + if ( !$backend->prepare( array( 'dir' => $archiveDir ) )->isOK() ) { + return $this->newFatal( 'directorycreateerror', $archiveDir ); + } + + // Archive destination file if it exists + if ( $backend->fileExists( array( 'src' => $dstPath ) ) ) { + // Check if the archive file exists + // This is a sanity check to avoid data loss. In UNIX, the rename primitive + // unlinks the destination file if it exists. DB-based synchronisation in + // publishBatch's caller should prevent races. In Windows there's no + // problem because the rename primitive fails if the destination exists. + if ( $backend->fileExists( array( 'src' => $archivePath ) ) ) { + $operations[] = array( 'op' => 'null' ); + continue; + } else { + $operations[] = array( + 'op' => 'move', + 'src' => $dstPath, + 'dst' => $archivePath + ); + } + $status->value[$i] = 'archived'; + } else { + $status->value[$i] = 'new'; + } + // Copy (or move) the source file to the destination + if ( FileBackend::isStoragePath( $srcPath ) ) { + if ( $flags & self::DELETE_SOURCE ) { + $operations[] = array( + 'op' => 'move', + 'src' => $srcPath, + 'dst' => $dstPath + ); + } else { + $operations[] = array( + 'op' => 'copy', + 'src' => $srcPath, + 'dst' => $dstPath + ); + } + } else { // FS source path + $operations[] = array( + 'op' => 'store', + 'src' => $srcPath, + 'dst' => $dstPath + ); + if ( $flags & self::DELETE_SOURCE ) { + $sourceFSFilesToDelete[] = $srcPath; + } + } + } + + // Execute the operations for each triplet + $opts = array( 'force' => true ); + $status->merge( $backend->doOperations( $operations, $opts ) ); + // Cleanup for disk source files... + foreach ( $sourceFSFilesToDelete as $file ) { + wfSuppressWarnings(); + unlink( $file ); // FS cleanup + wfRestoreWarnings(); + } - function fileExists( $file, $flags = 0 ) { + return $status; + } + + /** + * Checks existence of a a file + * + * @param $file Virtual URL (or storage path) of file to check + * @param $flags Integer: bitwise combination of the following flags: + * self::FILES_ONLY Mark file as existing only if it is a file (not directory) + * @return bool + */ + public function fileExists( $file, $flags = 0 ) { $result = $this->fileExistsBatch( array( $file ), $flags ); return $result[0]; } @@ -482,12 +1014,44 @@ abstract class FileRepo { /** * Checks existence of an array of files. * - * @param $files Array: URLs (or paths) of files to check + * @param $files Array: Virtual URLs (or storage paths) of files to check * @param $flags Integer: bitwise combination of the following flags: * self::FILES_ONLY Mark file as existing only if it is a file (not directory) * @return Either array of files and existence flags, or false */ - abstract function fileExistsBatch( $files, $flags = 0 ); + public function fileExistsBatch( $files, $flags = 0 ) { + $result = array(); + foreach ( $files as $key => $file ) { + if ( self::isVirtualUrl( $file ) ) { + $file = $this->resolveVirtualUrl( $file ); + } + if ( FileBackend::isStoragePath( $file ) ) { + $result[$key] = $this->backend->fileExists( array( 'src' => $file ) ); + } else { + if ( $flags & self::FILES_ONLY ) { + $result[$key] = is_file( $file ); // FS only + } else { + $result[$key] = file_exists( $file ); // FS only + } + } + } + + return $result; + } + + /** + * Move a file to the deletion archive. + * If no valid deletion archive exists, this may either delete the file + * or throw an exception, depending on the preference of the repository + * + * @param $srcRel Mixed: relative path for the file to be deleted + * @param $archiveRel Mixed: relative path for the archive location. + * Relative to a private archive directory. + * @return FileRepoStatus object + */ + public function delete( $srcRel, $archiveRel ) { + return $this->deleteBatch( array( array( $srcRel, $archiveRel ) ) ); + } /** * Move a group of files to the deletion archive. @@ -495,7 +1059,7 @@ abstract class FileRepo { * If no valid deletion archive is configured, this may either delete the * file or throw an exception, depending on the preference of the repository. * - * The overwrite policy is determined by the repository -- currently FSRepo + * The overwrite policy is determined by the repository -- currently LocalRepo * assumes a naming scheme in the deleted zone based on content hash, as * opposed to the public zone which is assumed to be unique. * @@ -505,41 +1069,210 @@ abstract class FileRepo { * to the deleted zone root in the second element. * @return FileRepoStatus */ - abstract function deleteBatch( $sourceDestPairs ); + public function deleteBatch( $sourceDestPairs ) { + $backend = $this->backend; // convenience + + // Try creating directories + $status = $this->initZones( array( 'public', 'deleted' ) ); + if ( !$status->isOK() ) { + return $status; + } + + $status = $this->newGood(); + + $operations = array(); + // Validate filenames and create archive directories + foreach ( $sourceDestPairs as $pair ) { + list( $srcRel, $archiveRel ) = $pair; + if ( !$this->validateFilename( $srcRel ) ) { + throw new MWException( __METHOD__.':Validation error in $srcRel' ); + } + if ( !$this->validateFilename( $archiveRel ) ) { + throw new MWException( __METHOD__.':Validation error in $archiveRel' ); + } + + $publicRoot = $this->getZonePath( 'public' ); + $srcPath = "{$publicRoot}/$srcRel"; + + $deletedRoot = $this->getZonePath( 'deleted' ); + $archivePath = "{$deletedRoot}/{$archiveRel}"; + $archiveDir = dirname( $archivePath ); // does not touch FS + + // Create destination directories + if ( !$backend->prepare( array( 'dir' => $archiveDir ) )->isOK() ) { + return $this->newFatal( 'directorycreateerror', $archiveDir ); + } + $this->initDeletedDir( $archiveDir ); + + $operations[] = array( + 'op' => 'move', + 'src' => $srcPath, + 'dst' => $archivePath, + // We may have 2+ identical files being deleted, + // all of which will map to the same destination file + 'overwriteSame' => true // also see bug 31792 + ); + } + + // Move the files by execute the operations for each pair. + // We're now committed to returning an OK result, which will + // lead to the files being moved in the DB also. + $opts = array( 'force' => true ); + $status->merge( $backend->doOperations( $operations, $opts ) ); + + return $status; + } /** - * Move a file to the deletion archive. - * If no valid deletion archive exists, this may either delete the file - * or throw an exception, depending on the preference of the repository - * @param $srcRel Mixed: relative path for the file to be deleted - * @param $archiveRel Mixed: relative path for the archive location. - * Relative to a private archive directory. - * @return FileRepoStatus object + * Get a relative path for a deletion archive key, + * e.g. s/z/a/ for sza251lrxrc1jad41h5mgilp8nysje52.jpg + * + * @return string */ - function delete( $srcRel, $archiveRel ) { - return $this->deleteBatch( array( array( $srcRel, $archiveRel ) ) ); + public function getDeletedHashPath( $key ) { + $path = ''; + for ( $i = 0; $i < $this->deletedHashLevels; $i++ ) { + $path .= $key[$i] . '/'; + } + return $path; + } + + /** + * If a path is a virtual URL, resolve it to a storage path. + * Otherwise, just return the path as it is. + * + * @param $path string + * @return string + * @throws MWException + */ + protected function resolveToStoragePath( $path ) { + if ( $this->isVirtualUrl( $path ) ) { + return $this->resolveVirtualUrl( $path ); + } + return $path; + } + + /** + * Get a local FS copy of a file with a given virtual URL/storage path. + * Temporary files may be purged when the file object falls out of scope. + * + * @param $virtualUrl string + * @return TempFSFile|null Returns null on failure + */ + public function getLocalCopy( $virtualUrl ) { + $path = $this->resolveToStoragePath( $virtualUrl ); + return $this->backend->getLocalCopy( array( 'src' => $path ) ); } /** - * Get properties of a file with a given virtual URL - * The virtual URL must refer to this repo - * Properties should ultimately be obtained via File::getPropsFromPath() + * Get a local FS file with a given virtual URL/storage path. + * The file is either an original or a copy. It should not be changed. + * Temporary files may be purged when the file object falls out of scope. + * + * @param $virtualUrl string + * @return FSFile|null Returns null on failure. */ - abstract function getFileProps( $virtualUrl ); + public function getLocalReference( $virtualUrl ) { + $path = $this->resolveToStoragePath( $virtualUrl ); + return $this->backend->getLocalReference( array( 'src' => $path ) ); + } /** - * Call a callback function for every file in the repository - * May use either the database or the filesystem - * STUB + * Get properties of a file with a given virtual URL/storage path. + * Properties should ultimately be obtained via FSFile::getProps(). + * + * @param $virtualUrl string + * @return Array */ - function enumFiles( $callback ) { - throw new MWException( 'enumFiles is not supported by ' . get_class( $this ) ); + public function getFileProps( $virtualUrl ) { + $path = $this->resolveToStoragePath( $virtualUrl ); + return $this->backend->getFileProps( array( 'src' => $path ) ); + } + + /** + * Get the timestamp of a file with a given virtual URL/storage path + * + * @param $virtualUrl string + * @return string|false + */ + public function getFileTimestamp( $virtualUrl ) { + $path = $this->resolveToStoragePath( $virtualUrl ); + return $this->backend->getFileTimestamp( array( 'src' => $path ) ); + } + + /** + * Get the sha1 of a file with a given virtual URL/storage path + * + * @param $virtualUrl string + * @return string|false + */ + public function getFileSha1( $virtualUrl ) { + $path = $this->resolveToStoragePath( $virtualUrl ); + $tmpFile = $this->backend->getLocalReference( array( 'src' => $path ) ); + if ( !$tmpFile ) { + return false; + } + return $tmpFile->getSha1Base36(); + } + + /** + * Attempt to stream a file with the given virtual URL/storage path + * + * @param $virtualUrl string + * @param $headers Array Additional HTTP headers to send on success + * @return bool Success + */ + public function streamFile( $virtualUrl, $headers = array() ) { + $path = $this->resolveToStoragePath( $virtualUrl ); + $params = array( 'src' => $path, 'headers' => $headers ); + return $this->backend->streamFile( $params )->isOK(); + } + + /** + * Call a callback function for every public regular file in the repository. + * This only acts on the current version of files, not any old versions. + * May use either the database or the filesystem. + * + * @param $callback Array|string + * @return void + */ + public function enumFiles( $callback ) { + $this->enumFilesInStorage( $callback ); + } + + /** + * Call a callback function for every public file in the repository. + * May use either the database or the filesystem. + * + * @param $callback Array|string + * @return void + */ + protected function enumFilesInStorage( $callback ) { + $publicRoot = $this->getZonePath( 'public' ); + $numDirs = 1 << ( $this->hashLevels * 4 ); + // Use a priori assumptions about directory structure + // to reduce the tree height of the scanning process. + for ( $flatIndex = 0; $flatIndex < $numDirs; $flatIndex++ ) { + $hexString = sprintf( "%0{$this->hashLevels}x", $flatIndex ); + $path = $publicRoot; + for ( $hexPos = 0; $hexPos < $this->hashLevels; $hexPos++ ) { + $path .= '/' . substr( $hexString, 0, $hexPos + 1 ); + } + $iterator = $this->backend->getFileList( array( 'dir' => $path ) ); + foreach ( $iterator as $name ) { + // Each item returned is a public file + call_user_func( $callback, "{$path}/{$name}" ); + } + } } /** * Determine if a relative path is valid, i.e. not blank or involving directory traveral + * + * @param $filename string + * @return bool */ - function validateFilename( $filename ) { + public function validateFilename( $filename ) { if ( strval( $filename ) == '' ) { return false; } @@ -550,11 +1283,11 @@ abstract class FileRepo { * Use the same traversal protection as Title::secureAndSplit() */ if ( strpos( $filename, '.' ) !== false && - ( $filename === '.' || $filename === '..' || - strpos( $filename, './' ) === 0 || - strpos( $filename, '../' ) === 0 || - strpos( $filename, '/./' ) !== false || - strpos( $filename, '/../' ) !== false ) ) + ( $filename === '.' || $filename === '..' || + strpos( $filename, './' ) === 0 || + strpos( $filename, '../' ) === 0 || + strpos( $filename, '/./' ) !== false || + strpos( $filename, '/../' ) !== false ) ) { return false; } else { @@ -562,29 +1295,65 @@ abstract class FileRepo { } } - /**#@+ - * Path disclosure protection functions - */ - function paranoidClean( $param ) { return '[hidden]'; } - function passThrough( $param ) { return $param; } - /** * Get a callback function to use for cleaning error message parameters + * + * @return Array */ function getErrorCleanupFunction() { switch ( $this->pathDisclosureProtection ) { case 'none': $callback = array( $this, 'passThrough' ); break; + case 'simple': + $callback = array( $this, 'simpleClean' ); + break; default: // 'paranoid' $callback = array( $this, 'paranoidClean' ); } return $callback; } - /**#@-*/ + + /** + * Path disclosure protection function + * + * @param $param string + * @return string + */ + function paranoidClean( $param ) { + return '[hidden]'; + } + + /** + * Path disclosure protection function + * + * @param $param string + * @return string + */ + function simpleClean( $param ) { + global $IP; + if ( !isset( $this->simpleCleanPairs ) ) { + $this->simpleCleanPairs = array( + $IP => '$IP', // sanity + ); + } + return strtr( $param, $this->simpleCleanPairs ); + } + + /** + * Path disclosure protection function + * + * @param $param string + * @return string + */ + function passThrough( $param ) { + return $param; + } /** * Create a new fatal error + * + * @return FileRepoStatus */ function newFatal( $message /*, parameters...*/ ) { $params = func_get_args(); @@ -594,6 +1363,8 @@ abstract class FileRepo { /** * Create a new good result + * + * @return FileRepoStatus */ function newGood( $value = null ) { return FileRepoStatus::newGood( $this, $value ); @@ -601,9 +1372,10 @@ abstract class FileRepo { /** * Delete files in the deleted directory if they are not referenced in the filearchive table + * * STUB */ - function cleanupDeletedBatch( $storageKeys ) {} + public function cleanupDeletedBatch( $storageKeys ) {} /** * Checks if there is a redirect named as $title. If there is, return the @@ -613,7 +1385,7 @@ abstract class FileRepo { * @param $title Title of image * @return Bool */ - function checkRedirect( $title ) { + public function checkRedirect( Title $title ) { return false; } @@ -624,20 +1396,11 @@ abstract class FileRepo { * STUB * @param $title Title of image */ - function invalidateImageRedirect( $title ) {} + public function invalidateImageRedirect( Title $title ) {} /** - * Get an array or iterator of file objects for files that have a given - * SHA-1 content hash. + * Get the human-readable name of the repo * - * STUB - */ - function findBySha1( $hash ) { - return array(); - } - - /** - * Get the human-readable name of the repo. * @return string */ public function getDisplayName() { @@ -654,11 +1417,10 @@ abstract class FileRepo { * * @return bool */ - function isLocal() { + public function isLocal() { return $this->getName() == 'local'; } - /** * Get a key on the primary cache for this repository. * Returns false if the repository's cache is not accessible at this site. @@ -674,6 +1436,8 @@ abstract class FileRepo { * Get a key for this repo in the local cache domain. These cache keys are * not shared with remote instances of the repo. * The parameters are the parts of the key, as for wfMemcKey(). + * + * @return string */ function getLocalCacheKey( /*...*/ ) { $args = func_get_args(); @@ -686,7 +1450,7 @@ abstract class FileRepo { * * @return UploadStash */ - function getUploadStash() { + public function getUploadStash() { return new UploadStash( $this ); } } diff --git a/includes/filerepo/ForeignAPIRepo.php b/includes/filerepo/ForeignAPIRepo.php index 502b8c1d..e544defb 100644 --- a/includes/filerepo/ForeignAPIRepo.php +++ b/includes/filerepo/ForeignAPIRepo.php @@ -32,20 +32,16 @@ class ForeignAPIRepo extends FileRepo { var $apiThumbCacheExpiry = 86400; /* 24*60*60 */ /* Redownload thumbnail files after a month */ var $fileCacheExpiry = 2592000; /* 86400*30 */ - /* Local image directory */ - var $directory; - var $thumbDir; protected $mQueryCache = array(); protected $mFileExists = array(); function __construct( $info ) { + global $wgLocalFileRepo; parent::__construct( $info ); - global $wgUploadDirectory; // http://commons.wikimedia.org/w/api.php $this->mApiBase = isset( $info['apibase'] ) ? $info['apibase'] : null; - $this->directory = isset( $info['directory'] ) ? $info['directory'] : $wgUploadDirectory; if( isset( $info['apiThumbCacheExpiry'] ) ) { $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry']; @@ -59,17 +55,11 @@ class ForeignAPIRepo extends FileRepo { } // If we can cache thumbs we can guess sane defaults for these if( $this->canCacheThumbs() && !$this->url ) { - global $wgLocalFileRepo; $this->url = $wgLocalFileRepo['url']; } if( $this->canCacheThumbs() && !$this->thumbUrl ) { $this->thumbUrl = $this->url . '/thumb'; } - if ( isset( $info['thumbDir'] ) ) { - $this->thumbDir = $info['thumbDir']; - } else { - $this->thumbDir = "{$this->directory}/thumb"; - } } /** @@ -97,6 +87,10 @@ class ForeignAPIRepo extends FileRepo { return false; } + function concatenate( $fileList, $targetPath, $flags = 0 ){ + return false; + } + function append( $srcPath, $toAppendPath, $flags = 0 ){ return false; } @@ -280,9 +274,9 @@ class ForeignAPIRepo extends FileRepo { $localFilename = $localPath . "/" . $fileName; $localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) . rawurlencode( $name ) . "/" . rawurlencode( $fileName ); - if( file_exists( $localFilename ) && isset( $metadata['timestamp'] ) ) { + if( $this->fileExists( $localFilename ) && isset( $metadata['timestamp'] ) ) { wfDebug( __METHOD__ . " Thumbnail was already downloaded before\n" ); - $modified = filemtime( $localFilename ); + $modified = $this->getFileTimestamp( $localFilename ); $remoteModified = strtotime( $metadata['timestamp'] ); $current = time(); $diff = abs( $modified - $current ); @@ -299,16 +293,12 @@ class ForeignAPIRepo extends FileRepo { wfDebug( __METHOD__ . " Could not download thumb\n" ); return false; } - if ( !is_dir($localPath) ) { - if( !wfMkdirParents($localPath) ) { - wfDebug( __METHOD__ . " could not create directory $localPath for thumb\n" ); - return $foreignUrl; - } - } # @todo FIXME: Delete old thumbs that aren't being used. Maintenance script? wfSuppressWarnings(); - if( !file_put_contents( $localFilename, $thumb ) ) { + $backend = $this->getBackend(); + $op = array( 'op' => 'create', 'dst' => $localFilename, 'content' => $thumb ); + if( !$backend->doOperation( $op )->isOK() ) { wfRestoreWarnings(); wfDebug( __METHOD__ . " could not write to thumb path\n" ); return $foreignUrl; @@ -335,17 +325,14 @@ class ForeignAPIRepo extends FileRepo { } /** - * Get the local directory corresponding to one of the three basic zones + * Get the local directory corresponding to one of the basic zones */ function getZonePath( $zone ) { - switch ( $zone ) { - case 'public': - return $this->directory; - case 'thumb': - return $this->thumbDir; - default: - return false; + $supported = array( 'public', 'thumb' ); + if ( in_array( $zone, $supported ) ) { + return parent::getZonePath( $zone ); } + return false; } /** @@ -388,4 +375,8 @@ class ForeignAPIRepo extends FileRepo { return false; } } + + function enumFiles( $callback ) { + throw new MWException( 'enumFiles is not supported by ' . get_class( $this ) ); + } } diff --git a/includes/filerepo/ForeignDBViaLBRepo.php b/includes/filerepo/ForeignDBViaLBRepo.php index 4c530b51..28b48b5e 100644 --- a/includes/filerepo/ForeignDBViaLBRepo.php +++ b/includes/filerepo/ForeignDBViaLBRepo.php @@ -30,6 +30,7 @@ class ForeignDBViaLBRepo extends LocalRepo { function getSlaveDB() { return wfGetDB( DB_SLAVE, array(), $this->wiki ); } + function hasSharedCache() { return $this->hasSharedCache; } diff --git a/includes/filerepo/LocalRepo.php b/includes/filerepo/LocalRepo.php index 9089f4d7..cc23fa31 100644 --- a/includes/filerepo/LocalRepo.php +++ b/includes/filerepo/LocalRepo.php @@ -10,19 +10,20 @@ /** * A repository that stores files in the local filesystem and registers them * in the wiki's own database. This is the most commonly used repository class. + * * @ingroup FileRepo */ -class LocalRepo extends FSRepo { - var $fileFactory = array( 'LocalFile', 'newFromTitle' ); - var $fileFactoryKey = array( 'LocalFile', 'newFromKey' ); - var $oldFileFactory = array( 'OldLocalFile', 'newFromTitle' ); - var $oldFileFactoryKey = array( 'OldLocalFile', 'newFromKey' ); - var $fileFromRowFactory = array( 'LocalFile', 'newFromRow' ); - var $oldFileFromRowFactory = array( 'OldLocalFile', 'newFromRow' ); +class LocalRepo extends FileRepo { + var $fileFactory = array( 'LocalFile' , 'newFromTitle' ); + var $fileFactoryKey = array( 'LocalFile' , 'newFromKey' ); + var $fileFromRowFactory = array( 'LocalFile' , 'newFromRow' ); + var $oldFileFactory = array( 'OldLocalFile', 'newFromTitle' ); + var $oldFileFactoryKey = array( 'OldLocalFile', 'newFromKey' ); + var $oldFileFromRowFactory = array( 'OldLocalFile', 'newFromRow' ); /** * @throws MWException - * @param $row + * @param $row * @return File */ function newFileFromRow( $row ) { @@ -55,6 +56,7 @@ class LocalRepo extends FSRepo { * @return FileRepoStatus */ function cleanupDeletedBatch( $storageKeys ) { + $backend = $this->backend; // convenience $root = $this->getZonePath( 'deleted' ); $dbw = $this->getMasterDB(); $status = $this->newGood(); @@ -63,25 +65,14 @@ class LocalRepo extends FSRepo { $hashPath = $this->getDeletedHashPath( $key ); $path = "$root/$hashPath$key"; $dbw->begin(); - $inuse = $dbw->selectField( 'filearchive', '1', - array( 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ), - __METHOD__, array( 'FOR UPDATE' ) ); - if( !$inuse ) { - $sha1 = self::getHashFromKey( $key ); - $ext = substr( $key, strcspn( $key, '.' ) + 1 ); - $ext = File::normalizeExtension($ext); - $inuse = $dbw->selectField( 'oldimage', '1', - array( 'oi_sha1' => $sha1, - 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ), - $dbw->bitAnd('oi_deleted', File::DELETED_FILE) => File::DELETED_FILE ), - __METHOD__, array( 'FOR UPDATE' ) ); - } - if ( !$inuse ) { + // Check for usage in deleted/hidden files and pre-emptively + // lock the key to avoid any future use until we are finished. + $deleted = $this->deletedFileHasKey( $key, 'lock' ); + $hidden = $this->hiddenFileHasKey( $key, 'lock' ); + if ( !$deleted && !$hidden ) { // not in use now wfDebug( __METHOD__ . ": deleting $key\n" ); - wfSuppressWarnings(); - $unlink = unlink( $path ); - wfRestoreWarnings(); - if ( !$unlink ) { + $op = array( 'op' => 'delete', 'src' => $path ); + if ( !$backend->doOperation( $op )->isOK() ) { $status->error( 'undelete-cleanup-error', $path ); $status->failCount++; } @@ -95,6 +86,45 @@ class LocalRepo extends FSRepo { } /** + * Check if a deleted (filearchive) file has this sha1 key + * + * @param $key String File storage key (base-36 sha1 key with file extension) + * @param $lock String|null Use "lock" to lock the row via FOR UPDATE + * @return bool File with this key is in use + */ + protected function deletedFileHasKey( $key, $lock = null ) { + $options = ( $lock === 'lock' ) ? array( 'FOR UPDATE' ) : array(); + + $dbw = $this->getMasterDB(); + return (bool)$dbw->selectField( 'filearchive', '1', + array( 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ), + __METHOD__, $options + ); + } + + /** + * Check if a hidden (revision delete) file has this sha1 key + * + * @param $key String File storage key (base-36 sha1 key with file extension) + * @param $lock String|null Use "lock" to lock the row via FOR UPDATE + * @return bool File with this key is in use + */ + protected function hiddenFileHasKey( $key, $lock = null ) { + $options = ( $lock === 'lock' ) ? array( 'FOR UPDATE' ) : array(); + + $sha1 = self::getHashFromKey( $key ); + $ext = File::normalizeExtension( substr( $key, strcspn( $key, '.' ) + 1 ) ); + + $dbw = $this->getMasterDB(); + return (bool)$dbw->selectField( 'oldimage', '1', + array( 'oi_sha1' => $sha1, + 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ), + $dbw->bitAnd( 'oi_deleted', File::DELETED_FILE ) => File::DELETED_FILE ), + __METHOD__, $options + ); + } + + /** * Gets the SHA1 hash from a storage key * * @param string $key @@ -108,16 +138,12 @@ class LocalRepo extends FSRepo { * Checks if there is a redirect named as $title * * @param $title Title of file + * @return bool */ - function checkRedirect( $title ) { + function checkRedirect( Title $title ) { global $wgMemc; - if( is_string( $title ) ) { - $title = Title::newFromText( $title ); - } - if( $title instanceof Title && $title->getNamespace() == NS_MEDIA ) { - $title = Title::makeTitle( NS_FILE, $title->getText() ); - } + $title = File::normalizeTitle( $title, 'exception' ); $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) ); if ( $memcKey === false ) { @@ -161,6 +187,7 @@ class LocalRepo extends FSRepo { /** * Function link Title::getArticleID(). * We can't say Title object, what database it should use, so we duplicate that function here. + * * @param $title Title */ protected function getArticleID( $title ) { @@ -169,20 +196,23 @@ class LocalRepo extends FSRepo { } $dbr = $this->getSlaveDB(); $id = $dbr->selectField( - 'page', // Table - 'page_id', //Field - array( //Conditions + 'page', // Table + 'page_id', //Field + array( //Conditions 'page_namespace' => $title->getNamespace(), 'page_title' => $title->getDBkey(), ), - __METHOD__ //Function name + __METHOD__ //Function name ); return $id; } /** - * Get an array or iterator of file objects for files that have a given + * Get an array or iterator of file objects for files that have a given * SHA-1 content hash. + * + * @param $hash String a sha1 hash to look for + * @return Array */ function findBySha1( $hash ) { $dbr = $this->getSlaveDB(); @@ -219,6 +249,8 @@ class LocalRepo extends FSRepo { * Get a key on the primary cache for this repository. * Returns false if the repository's cache is not accessible at this site. * The parameters are the parts of the key, as for wfMemcKey(). + * + * @return string */ function getSharedCacheKey( /*...*/ ) { $args = func_get_args(); @@ -229,8 +261,9 @@ class LocalRepo extends FSRepo { * Invalidates image redirect cache related to that image * * @param $title Title of page + * @return void */ - function invalidateImageRedirect( $title ) { + function invalidateImageRedirect( Title $title ) { global $wgMemc; $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) ); if ( $memcKey ) { diff --git a/includes/filerepo/NullRepo.php b/includes/filerepo/NullRepo.php index cac3e5d8..65318f40 100644 --- a/includes/filerepo/NullRepo.php +++ b/includes/filerepo/NullRepo.php @@ -44,4 +44,7 @@ class NullRepo extends FileRepo { function findFile( $title, $options = array() ) { return false; } + function concatenate( $fileList, $targetPath, $flags = 0 ) { + return false; + } } diff --git a/includes/filerepo/README b/includes/filerepo/README index db46ff8a..885a1ded 100644 --- a/includes/filerepo/README +++ b/includes/filerepo/README @@ -42,18 +42,19 @@ Tim Starling, June 2007 Structure: -File.php defines an abstract class File. - ForeignAPIFile.php extends File. - LocalFile.php extends File. - ForeignDBFile.php extends LocalFile - Image.php extends LocalFile - UnregisteredLocalFile.php extends File. -FileRepo.php defined an abstract class FileRepo. - ForeignAPIRepo.php extends FileRepo +File defines an abstract class File. + ForeignAPIFile extends File. + LocalFile extends File. + ForeignDBFile extends LocalFile + Image extends LocalFile + UnregisteredLocalFile extends File. + UploadStashFile extends UnregisteredLocalFile. +FileRepo defines an abstract class FileRepo. + ForeignAPIRepo extends FileRepo FSRepo extends FileRepo - LocalRepo.php extends FSRepo - ForeignDBRepo.php extends LocalRepo - ForeignDBViaLBRepo.php extends LocalRepo + LocalRepo extends FSRepo + ForeignDBRepo extends LocalRepo + ForeignDBViaLBRepo extends LocalRepo NullRepo extends FileRepo Russ Nelson, March 2011 diff --git a/includes/filerepo/RepoGroup.php b/includes/filerepo/RepoGroup.php index d4875908..334ef2b8 100644 --- a/includes/filerepo/RepoGroup.php +++ b/includes/filerepo/RepoGroup.php @@ -7,10 +7,6 @@ */ /** - * @defgroup FileRepo FileRepo - */ - -/** * Prioritized list of file repositories * * @ingroup FileRepo @@ -56,6 +52,9 @@ class RepoGroup { /** * Set the singleton instance to a given object + * Used by extensions which hook into the Repo chain. + * It's not enough to just create a superclass ... you have + * to get people to call into it even though all they know is RepoGroup::singleton() * * @param $instance RepoGroup */ @@ -105,22 +104,15 @@ class RepoGroup { if ( !$this->reposInitialised ) { $this->initialiseRepos(); } - if ( !($title instanceof Title) ) { - $title = Title::makeTitleSafe( NS_FILE, $title ); - if ( !is_object( $title ) ) { - return false; - } - } - - if ( $title->getNamespace() != NS_MEDIA && $title->getNamespace() != NS_FILE ) { - throw new MWException( __METHOD__ . ' received an Title object with incorrect namespace' ); + $title = File::normalizeTitle( $title ); + if ( !$title ) { + return false; } # Check the cache if ( empty( $options['ignoreRedirect'] ) && empty( $options['private'] ) - && empty( $options['bypassCache'] ) - && $title->getNamespace() == NS_FILE ) + && empty( $options['bypassCache'] ) ) { $useCache = true; $time = isset( $options['time'] ) ? $options['time'] : ''; @@ -176,10 +168,10 @@ class RepoGroup { if ( !is_array( $item ) ) { $item = array( 'title' => $item ); } - if ( !( $item['title'] instanceof Title ) ) - $item['title'] = Title::makeTitleSafe( NS_FILE, $item['title'] ); - if ( $item['title'] ) + $item['title'] = File::normalizeTitle( $item['title'] ); + if ( $item['title'] ) { $items[$item['title']->getDBkey()] = $item; + } } $images = $this->localRepo->findFiles( $items ); @@ -198,7 +190,7 @@ class RepoGroup { /** * Interface for FileRepo::checkRedirect() */ - function checkRedirect( $title ) { + function checkRedirect( Title $title ) { if ( !$this->reposInitialised ) { $this->initialiseRepos(); } @@ -370,14 +362,14 @@ class RepoGroup { $repo = $this->getRepo( $repoName ); return $repo->getFileProps( $fileName ); } else { - return File::getPropsFromPath( $fileName ); + return FSFile::getPropsFromPath( $fileName ); } } /** * Limit cache memory */ - function trimCache() { + protected function trimCache() { while ( count( $this->cache ) >= self::MAX_CACHE_SIZE ) { reset( $this->cache ); $key = key( $this->cache ); @@ -385,4 +377,19 @@ class RepoGroup { unset( $this->cache[$key] ); } } + + /** + * Clear RepoGroup process cache used for finding a file + * @param $title Title|null Title of the file or null to clear all files + */ + public function clearCache( Title $title = null ) { + if ( $title == null ) { + $this->cache = array(); + } else { + $dbKey = $title->getDBkey(); + if ( isset( $this->cache[$dbKey] ) ) { + unset( $this->cache[$dbKey] ); + } + } + } } diff --git a/includes/filerepo/backend/FSFile.php b/includes/filerepo/backend/FSFile.php new file mode 100644 index 00000000..54dd1359 --- /dev/null +++ b/includes/filerepo/backend/FSFile.php @@ -0,0 +1,233 @@ +<?php +/** + * @file + * @ingroup FileBackend + */ + +/** + * Class representing a non-directory file on the file system + * + * @ingroup FileBackend + */ +class FSFile { + protected $path; // path to file + + /** + * Sets up the file object + * + * @param String $path Path to temporary file on local disk + */ + public function __construct( $path ) { + if ( FileBackend::isStoragePath( $path ) ) { + throw new MWException( __METHOD__ . " given storage path `$path`." ); + } + $this->path = $path; + } + + /** + * Returns the file system path + * + * @return String + */ + public function getPath() { + return $this->path; + } + + /** + * Checks if the file exists + * + * @return bool + */ + public function exists() { + return is_file( $this->path ); + } + + /** + * Get the file size in bytes + * + * @return int|false + */ + public function getSize() { + return filesize( $this->path ); + } + + /** + * Get the file's last-modified timestamp + * + * @return string|false TS_MW timestamp or false on failure + */ + public function getTimestamp() { + wfSuppressWarnings(); + $timestamp = filemtime( $this->path ); + wfRestoreWarnings(); + if ( $timestamp !== false ) { + $timestamp = wfTimestamp( TS_MW, $timestamp ); + } + return $timestamp; + } + + /** + * Guess the MIME type from the file contents alone + * + * @return string + */ + public function getMimeType() { + return MimeMagic::singleton()->guessMimeType( $this->path, false ); + } + + /** + * Get an associative array containing information about + * a file with the given storage path. + * + * @param $ext Mixed: the file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * + * @return array + */ + public function getProps( $ext = true ) { + wfProfileIn( __METHOD__ ); + wfDebug( __METHOD__.": Getting file info for $this->path\n" ); + + $info = self::placeholderProps(); + $info['fileExists'] = $this->exists(); + + if ( $info['fileExists'] ) { + $magic = MimeMagic::singleton(); + + # get the file extension + if ( $ext === true ) { + $ext = self::extensionFromPath( $this->path ); + } + + # mime type according to file contents + $info['file-mime'] = $this->getMimeType(); + # logical mime type + $info['mime'] = $magic->improveTypeFromExtension( $info['file-mime'], $ext ); + + list( $info['major_mime'], $info['minor_mime'] ) = File::splitMime( $info['mime'] ); + $info['media_type'] = $magic->getMediaType( $this->path, $info['mime'] ); + + # Get size in bytes + $info['size'] = $this->getSize(); + + # Height, width and metadata + $handler = MediaHandler::getHandler( $info['mime'] ); + if ( $handler ) { + $tempImage = (object)array(); + $info['metadata'] = $handler->getMetadata( $tempImage, $this->path ); + $gis = $handler->getImageSize( $tempImage, $this->path, $info['metadata'] ); + if ( is_array( $gis ) ) { + $info = $this->extractImageSizeInfo( $gis ) + $info; + } + } + $info['sha1'] = $this->getSha1Base36(); + + wfDebug(__METHOD__.": $this->path loaded, {$info['size']} bytes, {$info['mime']}.\n"); + } else { + wfDebug(__METHOD__.": $this->path NOT FOUND!\n"); + } + + wfProfileOut( __METHOD__ ); + return $info; + } + + /** + * Placeholder file properties to use for files that don't exist + * + * @return Array + */ + public static function placeholderProps() { + $info = array(); + $info['fileExists'] = false; + $info['mime'] = null; + $info['media_type'] = MEDIATYPE_UNKNOWN; + $info['metadata'] = ''; + $info['sha1'] = ''; + $info['width'] = 0; + $info['height'] = 0; + $info['bits'] = 0; + return $info; + } + + /** + * Exract image size information + * + * @return Array + */ + protected function extractImageSizeInfo( array $gis ) { + $info = array(); + # NOTE: $gis[2] contains a code for the image type. This is no longer used. + $info['width'] = $gis[0]; + $info['height'] = $gis[1]; + if ( isset( $gis['bits'] ) ) { + $info['bits'] = $gis['bits']; + } else { + $info['bits'] = 0; + } + return $info; + } + + /** + * Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case + * encoding, zero padded to 31 digits. + * + * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 + * fairly neatly. + * + * @return false|string False on failure + */ + public function getSha1Base36() { + wfProfileIn( __METHOD__ ); + + wfSuppressWarnings(); + $hash = sha1_file( $this->path ); + wfRestoreWarnings(); + if ( $hash !== false ) { + $hash = wfBaseConvert( $hash, 16, 36, 31 ); + } + + wfProfileOut( __METHOD__ ); + return $hash; + } + + /** + * Get the final file extension from a file system path + * + * @param $path string + * @return string + */ + public static function extensionFromPath( $path ) { + $i = strrpos( $path, '.' ); + return strtolower( $i ? substr( $path, $i + 1 ) : '' ); + } + + /** + * Get an associative array containing information about a file in the local filesystem. + * + * @param $path String: absolute local filesystem path + * @param $ext Mixed: the file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * + * @return array + */ + public static function getPropsFromPath( $path, $ext = true ) { + $fsFile = new self( $path ); + return $fsFile->getProps( $ext ); + } + + /** + * Get a SHA-1 hash of a file in the local filesystem, in base-36 lower case + * encoding, zero padded to 31 digits. + * + * 160 log 2 / log 36 = 30.95, so the 160-bit hash fills 31 digits in base 36 + * fairly neatly. + * + * @param $path string + * + * @return false|string False on failure + */ + public static function getSha1Base36FromPath( $path ) { + $fsFile = new self( $path ); + return $fsFile->getSha1Base36(); + } +} diff --git a/includes/filerepo/backend/FSFileBackend.php b/includes/filerepo/backend/FSFileBackend.php new file mode 100644 index 00000000..1a4c44ad --- /dev/null +++ b/includes/filerepo/backend/FSFileBackend.php @@ -0,0 +1,600 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Class for a file system (FS) based file backend. + * + * All "containers" each map to a directory under the backend's base directory. + * For backwards-compatibility, some container paths can be set to custom paths. + * The wiki ID will not be used in any custom paths, so this should be avoided. + * + * Having directories with thousands of files will diminish performance. + * Sharding can be accomplished by using FileRepo-style hash paths. + * + * Status messages should avoid mentioning the internal FS paths. + * PHP warnings are assumed to be logged rather than output. + * + * @ingroup FileBackend + * @since 1.19 + */ +class FSFileBackend extends FileBackendStore { + protected $basePath; // string; directory holding the container directories + /** @var Array Map of container names to root paths */ + protected $containerPaths = array(); // for custom container paths + protected $fileMode; // integer; file permission mode + + protected $hadWarningErrors = array(); + + /** + * @see FileBackendStore::__construct() + * Additional $config params include: + * basePath : File system directory that holds containers. + * containerPaths : Map of container names to custom file system directories. + * This should only be used for backwards-compatibility. + * fileMode : Octal UNIX file permissions to use on files stored. + */ + public function __construct( array $config ) { + parent::__construct( $config ); + + // Remove any possible trailing slash from directories + if ( isset( $config['basePath'] ) ) { + $this->basePath = rtrim( $config['basePath'], '/' ); // remove trailing slash + } else { + $this->basePath = null; // none; containers must have explicit paths + } + + if ( isset( $config['containerPaths'] ) ) { + $this->containerPaths = (array)$config['containerPaths']; + foreach ( $this->containerPaths as &$path ) { + $path = rtrim( $path, '/' ); // remove trailing slash + } + } + + $this->fileMode = isset( $config['fileMode'] ) + ? $config['fileMode'] + : 0644; + } + + /** + * @see FileBackendStore::resolveContainerPath() + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + // Check that container has a root directory + if ( isset( $this->containerPaths[$container] ) || isset( $this->basePath ) ) { + // Check for sane relative paths (assume the base paths are OK) + if ( $this->isLegalRelPath( $relStoragePath ) ) { + return $relStoragePath; + } + } + return null; + } + + /** + * Sanity check a relative file system path for validity + * + * @param $path string Normalized relative path + * @return bool + */ + protected function isLegalRelPath( $path ) { + // Check for file names longer than 255 chars + if ( preg_match( '![^/]{256}!', $path ) ) { // ext3/NTFS + return false; + } + if ( wfIsWindows() ) { // NTFS + return !preg_match( '![:*?"<>|]!', $path ); + } else { + return true; + } + } + + /** + * Given the short (unresolved) and full (resolved) name of + * a container, return the file system path of the container. + * + * @param $shortCont string + * @param $fullCont string + * @return string|null + */ + protected function containerFSRoot( $shortCont, $fullCont ) { + if ( isset( $this->containerPaths[$shortCont] ) ) { + return $this->containerPaths[$shortCont]; + } elseif ( isset( $this->basePath ) ) { + return "{$this->basePath}/{$fullCont}"; + } + return null; // no container base path defined + } + + /** + * Get the absolute file system path for a storage path + * + * @param $storagePath string Storage path + * @return string|null + */ + protected function resolveToFSPath( $storagePath ) { + list( $fullCont, $relPath ) = $this->resolveStoragePathReal( $storagePath ); + if ( $relPath === null ) { + return null; // invalid + } + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $storagePath ); + $fsPath = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + if ( $relPath != '' ) { + $fsPath .= "/{$relPath}"; + } + return $fsPath; + } + + /** + * @see FileBackendStore::isPathUsableInternal() + */ + public function isPathUsableInternal( $storagePath ) { + $fsPath = $this->resolveToFSPath( $storagePath ); + if ( $fsPath === null ) { + return false; // invalid + } + $parentDir = dirname( $fsPath ); + + if ( file_exists( $fsPath ) ) { + $ok = is_file( $fsPath ) && is_writable( $fsPath ); + } else { + $ok = is_dir( $parentDir ) && is_writable( $parentDir ); + } + + return $ok; + } + + /** + * @see FileBackendStore::doStoreInternal() + */ + protected function doStoreInternal( array $params ) { + $status = Status::newGood(); + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $ok = copy( $params['src'], $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-store', $params['src'], $params['dst'] ); + return $status; + } + + $this->chmod( $dest ); + + return $status; + } + + /** + * @see FileBackendStore::doCopyInternal() + */ + protected function doCopyInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $ok = copy( $source, $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } + + $this->chmod( $dest ); + + return $status; + } + + /** + * @see FileBackendStore::doMoveInternal() + */ + protected function doMoveInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + // Windows does not support moving over existing files + if ( wfIsWindows() ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $ok = rename( $source, $dest ); + clearstatcache(); // file no longer at source + if ( !$ok ) { + $status->fatal( 'backend-fail-move', $params['src'], $params['dst'] ); + return $status; + } + + return $status; + } + + /** + * @see FileBackendStore::doDeleteInternal() + */ + protected function doDeleteInternal( array $params ) { + $status = Status::newGood(); + + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + if ( !is_file( $source ) ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } + return $status; // do nothing; either OK or bad status + } + + $ok = unlink( $source ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + return $status; + } + + return $status; + } + + /** + * @see FileBackendStore::doCreateInternal() + */ + protected function doCreateInternal( array $params ) { + $status = Status::newGood(); + + $dest = $this->resolveToFSPath( $params['dst'] ); + if ( $dest === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + if ( file_exists( $dest ) ) { + if ( !empty( $params['overwrite'] ) ) { + $ok = unlink( $dest ); + if ( !$ok ) { + $status->fatal( 'backend-fail-delete', $params['dst'] ); + return $status; + } + } else { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } + + $bytes = file_put_contents( $dest, $params['content'] ); + if ( $bytes === false ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } + + $this->chmod( $dest ); + + return $status; + } + + /** + * @see FileBackendStore::doPrepareInternal() + */ + protected function doPrepareInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + if ( !wfMkdirParents( $dir ) ) { // make directory and its parents + $status->fatal( 'directorycreateerror', $params['dir'] ); + } elseif ( !is_writable( $dir ) ) { + $status->fatal( 'directoryreadonlyerror', $params['dir'] ); + } elseif ( !is_readable( $dir ) ) { + $status->fatal( 'directorynotreadableerror', $params['dir'] ); + } + return $status; + } + + /** + * @see FileBackendStore::doSecureInternal() + */ + protected function doSecureInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + // Seed new directories with a blank index.html, to prevent crawling... + if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) { + $bytes = file_put_contents( "{$dir}/index.html", '' ); + if ( !$bytes ) { + $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' ); + return $status; + } + } + // Add a .htaccess file to the root of the container... + if ( !empty( $params['noAccess'] ) ) { + if ( !file_exists( "{$contRoot}/.htaccess" ) ) { + $bytes = file_put_contents( "{$contRoot}/.htaccess", "Deny from all\n" ); + if ( !$bytes ) { + $storeDir = "mwstore://{$this->name}/{$shortCont}"; + $status->fatal( 'backend-fail-create', "{$storeDir}/.htaccess" ); + return $status; + } + } + } + return $status; + } + + /** + * @see FileBackendStore::doCleanInternal() + */ + protected function doCleanInternal( $fullCont, $dirRel, array $params ) { + $status = Status::newGood(); + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + wfSuppressWarnings(); + if ( is_dir( $dir ) ) { + rmdir( $dir ); // remove directory if empty + } + wfRestoreWarnings(); + return $status; + } + + /** + * @see FileBackendStore::doFileExists() + */ + protected function doGetFileStat( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return false; // invalid storage path + } + + $this->trapWarnings(); // don't trust 'false' if there were errors + $stat = is_file( $source ) ? stat( $source ) : false; // regular files only + $hadError = $this->untrapWarnings(); + + if ( $stat ) { + return array( + 'mtime' => wfTimestamp( TS_MW, $stat['mtime'] ), + 'size' => $stat['size'] + ); + } elseif ( !$hadError ) { + return false; // file does not exist + } else { + return null; // failure + } + } + + /** + * @see FileBackendStore::doClearCache() + */ + protected function doClearCache( array $paths = null ) { + clearstatcache(); // clear the PHP file stat cache + } + + /** + * @see FileBackendStore::getFileListInternal() + */ + public function getFileListInternal( $fullCont, $dirRel, array $params ) { + list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] ); + $contRoot = $this->containerFSRoot( $shortCont, $fullCont ); // must be valid + $dir = ( $dirRel != '' ) ? "{$contRoot}/{$dirRel}" : $contRoot; + $exists = is_dir( $dir ); + if ( !$exists ) { + wfDebug( __METHOD__ . "() given directory does not exist: '$dir'\n" ); + return array(); // nothing under this dir + } + $readable = is_readable( $dir ); + if ( !$readable ) { + wfDebug( __METHOD__ . "() given directory is unreadable: '$dir'\n" ); + return null; // bad permissions? + } + return new FSFileBackendFileList( $dir ); + } + + /** + * @see FileBackendStore::getLocalReference() + */ + public function getLocalReference( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return null; + } + return new FSFile( $source ); + } + + /** + * @see FileBackendStore::getLocalCopy() + */ + public function getLocalCopy( array $params ) { + $source = $this->resolveToFSPath( $params['src'] ); + if ( $source === null ) { + return null; + } + + // Create a new temporary file with the same extension... + $ext = FileBackend::extensionFromPath( $params['src'] ); + $tmpFile = TempFSFile::factory( wfBaseName( $source ) . '_', $ext ); + if ( !$tmpFile ) { + return null; + } + $tmpPath = $tmpFile->getPath(); + + // Copy the source file over the temp file + $ok = copy( $source, $tmpPath ); + if ( !$ok ) { + return null; + } + + $this->chmod( $tmpPath ); + + return $tmpFile; + } + + /** + * Chmod a file, suppressing the warnings + * + * @param $path string Absolute file system path + * @return bool Success + */ + protected function chmod( $path ) { + wfSuppressWarnings(); + $ok = chmod( $path, $this->fileMode ); + wfRestoreWarnings(); + + return $ok; + } + + /** + * Listen for E_WARNING errors and track whether any happen + * + * @return bool + */ + protected function trapWarnings() { + $this->hadWarningErrors[] = false; // push to stack + set_error_handler( array( $this, 'handleWarning' ), E_WARNING ); + return false; // invoke normal PHP error handler + } + + /** + * Stop listening for E_WARNING errors and return true if any happened + * + * @return bool + */ + protected function untrapWarnings() { + restore_error_handler(); // restore previous handler + return array_pop( $this->hadWarningErrors ); // pop from stack + } + + private function handleWarning() { + $this->hadWarningErrors[count( $this->hadWarningErrors ) - 1] = true; + return true; // suppress from PHP handler + } +} + +/** + * Wrapper around RecursiveDirectoryIterator that catches + * exception or does any custom behavoir that we may want. + * Do not use this class from places outside FSFileBackend. + * + * @ingroup FileBackend + */ +class FSFileBackendFileList implements Iterator { + /** @var RecursiveIteratorIterator */ + protected $iter; + protected $suffixStart; // integer + protected $pos = 0; // integer + + /** + * @param $dir string file system directory + */ + public function __construct( $dir ) { + $dir = realpath( $dir ); // normalize + $this->suffixStart = strlen( $dir ) + 1; // size of "path/to/dir/" + try { + # Get an iterator that will return leaf nodes (non-directories) + if ( MWInit::classExists( 'FilesystemIterator' ) ) { // PHP >= 5.3 + # RecursiveDirectoryIterator extends FilesystemIterator. + # FilesystemIterator::SKIP_DOTS default is inconsistent in PHP 5.3.x. + $flags = FilesystemIterator::CURRENT_AS_FILEINFO | FilesystemIterator::SKIP_DOTS; + $this->iter = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $dir, $flags ) ); + } else { // PHP < 5.3 + # RecursiveDirectoryIterator extends DirectoryIterator + $this->iter = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $dir ) ); + } + } catch ( UnexpectedValueException $e ) { + $this->iter = null; // bad permissions? deleted? + } + } + + public function current() { + // Return only the relative path and normalize slashes to FileBackend-style + // Make sure to use the realpath since the suffix is based upon that + return str_replace( '\\', '/', + substr( realpath( $this->iter->current() ), $this->suffixStart ) ); + } + + public function key() { + return $this->pos; + } + + public function next() { + try { + $this->iter->next(); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; + } + ++$this->pos; + } + + public function rewind() { + $this->pos = 0; + try { + $this->iter->rewind(); + } catch ( UnexpectedValueException $e ) { + $this->iter = null; + } + } + + public function valid() { + return $this->iter && $this->iter->valid(); + } +} diff --git a/includes/filerepo/backend/FileBackend.php b/includes/filerepo/backend/FileBackend.php new file mode 100644 index 00000000..9433bcb4 --- /dev/null +++ b/includes/filerepo/backend/FileBackend.php @@ -0,0 +1,1739 @@ +<?php +/** + * @defgroup FileBackend File backend + * @ingroup FileRepo + * + * This module regroup classes meant for MediaWiki to interacts with + */ + +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Base class for all file backend classes (including multi-write backends). + * + * This class defines the methods as abstract that subclasses must implement. + * Outside callers can assume that all backends will have these functions. + * + * All "storage paths" are of the format "mwstore://backend/container/path". + * The paths use UNIX file system (FS) notation, though any particular backend may + * not actually be using a local filesystem. Therefore, the paths are only virtual. + * + * Backend contents are stored under wiki-specific container names by default. + * For legacy reasons, this has no effect for the FS backend class, and per-wiki + * segregation must be done by setting the container paths appropriately. + * + * FS-based backends are somewhat more restrictive due to the existence of real + * directory files; a regular file cannot have the same name as a directory. Other + * backends with virtual directories may not have this limitation. Callers should + * store files in such a way that no files and directories are under the same path. + * + * Methods should avoid throwing exceptions at all costs. + * As a corollary, external dependencies should be kept to a minimum. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileBackend { + protected $name; // string; unique backend name + protected $wikiId; // string; unique wiki name + protected $readOnly; // string; read-only explanation message + /** @var LockManager */ + protected $lockManager; + + /** + * Create a new backend instance from configuration. + * This should only be called from within FileBackendGroup. + * + * $config includes: + * 'name' : The unique name of this backend. + * This should consist of alphanumberic, '-', and '_' characters. + * This name should not be changed after use. + * 'wikiId' : Prefix to container names that is unique to this wiki. + * This should consist of alphanumberic, '-', and '_' characters. + * 'lockManager' : Registered name of a file lock manager to use. + * 'readOnly' : Write operations are disallowed if this is a non-empty string. + * It should be an explanation for the backend being read-only. + * + * @param $config Array + */ + public function __construct( array $config ) { + $this->name = $config['name']; + if ( !preg_match( '!^[a-zA-Z0-9-_]{1,255}$!', $this->name ) ) { + throw new MWException( "Backend name `{$this->name}` is invalid." ); + } + $this->wikiId = isset( $config['wikiId'] ) + ? $config['wikiId'] + : wfWikiID(); // e.g. "my_wiki-en_" + $this->lockManager = ( $config['lockManager'] instanceof LockManager ) + ? $config['lockManager'] + : LockManagerGroup::singleton()->get( $config['lockManager'] ); + $this->readOnly = isset( $config['readOnly'] ) + ? (string)$config['readOnly'] + : ''; + } + + /** + * Get the unique backend name. + * We may have multiple different backends of the same type. + * For example, we can have two Swift backends using different proxies. + * + * @return string + */ + final public function getName() { + return $this->name; + } + + /** + * Check if this backend is read-only + * + * @return bool + */ + final public function isReadOnly() { + return ( $this->readOnly != '' ); + } + + /** + * Get an explanatory message if this backend is read-only + * + * @return string|false Returns falls if the backend is not read-only + */ + final public function getReadOnlyReason() { + return ( $this->readOnly != '' ) ? $this->readOnly : false; + } + + /** + * This is the main entry point into the backend for write operations. + * Callers supply an ordered list of operations to perform as a transaction. + * Files will be locked, the stat cache cleared, and then the operations attempted. + * If any serious errors occur, all attempted operations will be rolled back. + * + * $ops is an array of arrays. The outer array holds a list of operations. + * Each inner array is a set of key value pairs that specify an operation. + * + * Supported operations and their parameters: + * a) Create a new file in storage with the contents of a string + * array( + * 'op' => 'create', + * 'dst' => <storage path>, + * 'content' => <string of new file contents>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * b) Copy a file system file into storage + * array( + * 'op' => 'store', + * 'src' => <file system path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * c) Copy a file within storage + * array( + * 'op' => 'copy', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * d) Move a file within storage + * array( + * 'op' => 'move', + * 'src' => <storage path>, + * 'dst' => <storage path>, + * 'overwrite' => <boolean>, + * 'overwriteSame' => <boolean> + * ) + * e) Delete a file within storage + * array( + * 'op' => 'delete', + * 'src' => <storage path>, + * 'ignoreMissingSource' => <boolean> + * ) + * f) Do nothing (no-op) + * array( + * 'op' => 'null', + * ) + * + * Boolean flags for operations (operation-specific): + * 'ignoreMissingSource' : The operation will simply succeed and do + * nothing if the source file does not exist. + * 'overwrite' : Any destination file will be overwritten. + * 'overwriteSame' : An error will not be given if a file already + * exists at the destination that has the same + * contents as the new contents to be written there. + * + * $opts is an associative of boolean flags, including: + * 'force' : Errors that would normally cause a rollback do not. + * The remaining operations are still attempted if any fail. + * 'nonLocking' : No locks are acquired for the operations. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * 'allowStale' : Don't require the latest available data. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * + * Remarks on locking: + * File system paths given to operations should refer to files that are + * already locked or otherwise safe from modification from other processes. + * Normally these files will be new temp files, which should be adequate. + * + * Return value: + * This returns a Status, which contains all warnings and fatals that occured + * during the operation. The 'failCount', 'successCount', and 'success' members + * will reflect each operation attempted. The status will be "OK" unless: + * a) unexpected operation errors occurred (network partitions, disk full...) + * b) significant operation errors occured and 'force' was not set + * + * @param $ops Array List of operations to execute in order + * @param $opts Array Batch operation options + * @return Status + */ + final public function doOperations( array $ops, array $opts = array() ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + if ( empty( $opts['force'] ) ) { // sanity + unset( $opts['nonLocking'] ); + unset( $opts['allowStale'] ); + } + return $this->doOperationsInternal( $ops, $opts ); + } + + /** + * @see FileBackend::doOperations() + */ + abstract protected function doOperationsInternal( array $ops, array $opts ); + + /** + * Same as doOperations() except it takes a single operation. + * If you are doing a batch of operations that should either + * all succeed or all fail, then use that function instead. + * + * @see FileBackend::doOperations() + * + * @param $op Array Operation + * @param $opts Array Operation options + * @return Status + */ + final public function doOperation( array $op, array $opts = array() ) { + return $this->doOperations( array( $op ), $opts ); + } + + /** + * Performs a single create operation. + * This sets $params['op'] to 'create' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function create( array $params, array $opts = array() ) { + $params['op'] = 'create'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single store operation. + * This sets $params['op'] to 'store' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function store( array $params, array $opts = array() ) { + $params['op'] = 'store'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single copy operation. + * This sets $params['op'] to 'copy' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function copy( array $params, array $opts = array() ) { + $params['op'] = 'copy'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single move operation. + * This sets $params['op'] to 'move' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function move( array $params, array $opts = array() ) { + $params['op'] = 'move'; + return $this->doOperation( $params, $opts ); + } + + /** + * Performs a single delete operation. + * This sets $params['op'] to 'delete' and passes it to doOperation(). + * + * @see FileBackend::doOperation() + * + * @param $params Array Operation parameters + * @param $opts Array Operation options + * @return Status + */ + final public function delete( array $params, array $opts = array() ) { + $params['op'] = 'delete'; + return $this->doOperation( $params, $opts ); + } + + /** + * Concatenate a list of storage files into a single file system file. + * The target path should refer to a file that is already locked or + * otherwise safe from modification from other processes. Normally, + * the file will be a new temp file, which should be adequate. + * $params include: + * srcs : ordered source storage paths (e.g. chunk1, chunk2, ...) + * dst : file system path to 0-byte temp file + * + * @param $params Array Operation parameters + * @return Status + */ + abstract public function concatenate( array $params ); + + /** + * Prepare a storage directory for usage. + * This will create any required containers and parent directories. + * Backends using key/value stores only need to create the container. + * + * $params include: + * dir : storage directory + * + * @param $params Array + * @return Status + */ + final public function prepare( array $params ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doPrepare( $params ); + } + + /** + * @see FileBackend::prepare() + */ + abstract protected function doPrepare( array $params ); + + /** + * Take measures to block web access to a storage directory and + * the container it belongs to. FS backends might add .htaccess + * files whereas key/value store backends might restrict container + * access to the auth user that represents end-users in web request. + * This is not guaranteed to actually do anything. + * + * $params include: + * dir : storage directory + * noAccess : try to deny file access + * noListing : try to deny file listing + * + * @param $params Array + * @return Status + */ + final public function secure( array $params ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + $status = $this->doPrepare( $params ); // dir must exist to restrict it + if ( $status->isOK() ) { + $status->merge( $this->doSecure( $params ) ); + } + return $status; + } + + /** + * @see FileBackend::secure() + */ + abstract protected function doSecure( array $params ); + + /** + * Delete a storage directory if it is empty. + * Backends using key/value stores may do nothing unless the directory + * is that of an empty container, in which case it should be deleted. + * + * $params include: + * dir : storage directory + * + * @param $params Array + * @return Status + */ + final public function clean( array $params ) { + if ( $this->isReadOnly() ) { + return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly ); + } + return $this->doClean( $params ); + } + + /** + * @see FileBackend::clean() + */ + abstract protected function doClean( array $params ); + + /** + * Check if a file exists at a storage path in the backend. + * This returns false if only a directory exists at the path. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return bool|null Returns null on failure + */ + abstract public function fileExists( array $params ); + + /** + * Get the last-modified timestamp of the file at a storage path. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return string|false TS_MW timestamp or false on failure + */ + abstract public function getFileTimestamp( array $params ); + + /** + * Get the contents of a file at a storage path in the backend. + * This should be avoided for potentially large files. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return string|false Returns false on failure + */ + abstract public function getFileContents( array $params ); + + /** + * Get the size (bytes) of a file at a storage path in the backend. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return integer|false Returns false on failure + */ + abstract public function getFileSize( array $params ); + + /** + * Get quick information about a file at a storage path in the backend. + * If the file does not exist, then this returns false. + * Otherwise, the result is an associative array that includes: + * mtime : the last-modified timestamp (TS_MW) + * size : the file size (bytes) + * Additional values may be included for internal use only. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return Array|false|null Returns null on failure + */ + abstract public function getFileStat( array $params ); + + /** + * Get a SHA-1 hash of the file at a storage path in the backend. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return string|false Hash string or false on failure + */ + abstract public function getFileSha1Base36( array $params ); + + /** + * Get the properties of the file at a storage path in the backend. + * Returns FSFile::placeholderProps() on failure. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return Array + */ + abstract public function getFileProps( array $params ); + + /** + * Stream the file at a storage path in the backend. + * If the file does not exists, a 404 error will be given. + * Appropriate HTTP headers (Status, Content-Type, Content-Length) + * must be sent if streaming began, while none should be sent otherwise. + * Implementations should flush the output buffer before sending data. + * + * $params include: + * src : source storage path + * headers : additional HTTP headers to send on success + * latest : use the latest available data + * + * @param $params Array + * @return Status + */ + abstract public function streamFile( array $params ); + + /** + * Returns a file system file, identical to the file at a storage path. + * The file returned is either: + * a) A local copy of the file at a storage path in the backend. + * The temporary copy will have the same extension as the source. + * b) An original of the file at a storage path in the backend. + * Temporary files may be purged when the file object falls out of scope. + * + * Write operations should *never* be done on this file as some backends + * may do internal tracking or may be instances of FileBackendMultiWrite. + * In that later case, there are copies of the file that must stay in sync. + * Additionally, further calls to this function may return the same file. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return FSFile|null Returns null on failure + */ + abstract public function getLocalReference( array $params ); + + /** + * Get a local copy on disk of the file at a storage path in the backend. + * The temporary copy will have the same file extension as the source. + * Temporary files may be purged when the file object falls out of scope. + * + * $params include: + * src : source storage path + * latest : use the latest available data + * + * @param $params Array + * @return TempFSFile|null Returns null on failure + */ + abstract public function getLocalCopy( array $params ); + + /** + * Get an iterator to list out all stored files under a storage directory. + * If the directory is of the form "mwstore://backend/container", + * then all files in the container should be listed. + * If the directory is of form "mwstore://backend/container/dir", + * then all files under that container directory should be listed. + * Results should be storage paths relative to the given directory. + * + * Storage backends with eventual consistency might return stale data. + * + * $params include: + * dir : storage path directory + * + * @return Traversable|Array|null Returns null on failure + */ + abstract public function getFileList( array $params ); + + /** + * Invalidate any in-process file existence and property cache. + * If $paths is given, then only the cache for those files will be cleared. + * + * @param $paths Array Storage paths (optional) + * @return void + */ + public function clearCache( array $paths = null ) {} + + /** + * Lock the files at the given storage paths in the backend. + * This will either lock all the files or none (on failure). + * + * Callers should consider using getScopedFileLocks() instead. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function lockFiles( array $paths, $type ) { + return $this->lockManager->lock( $paths, $type ); + } + + /** + * Unlock the files at the given storage paths in the backend. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function unlockFiles( array $paths, $type ) { + return $this->lockManager->unlock( $paths, $type ); + } + + /** + * Lock the files at the given storage paths in the backend. + * This will either lock all the files or none (on failure). + * On failure, the status object will be updated with errors. + * + * Once the return value goes out scope, the locks will be released and + * the status updated. Unlock fatals will not change the status "OK" value. + * + * @param $paths Array Storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status Status to update on lock/unlock + * @return ScopedLock|null Returns null on failure + */ + final public function getScopedFileLocks( array $paths, $type, Status $status ) { + return ScopedLock::factory( $this->lockManager, $paths, $type, $status ); + } + + /** + * Check if a given path is a "mwstore://" path. + * This does not do any further validation or any existence checks. + * + * @param $path string + * @return bool + */ + final public static function isStoragePath( $path ) { + return ( strpos( $path, 'mwstore://' ) === 0 ); + } + + /** + * Split a storage path into a backend name, a container name, + * and a relative file path. The relative path may be the empty string. + * This does not do any path normalization or traversal checks. + * + * @param $storagePath string + * @return Array (backend, container, rel object) or (null, null, null) + */ + final public static function splitStoragePath( $storagePath ) { + if ( self::isStoragePath( $storagePath ) ) { + // Remove the "mwstore://" prefix and split the path + $parts = explode( '/', substr( $storagePath, 10 ), 3 ); + if ( count( $parts ) >= 2 && $parts[0] != '' && $parts[1] != '' ) { + if ( count( $parts ) == 3 ) { + return $parts; // e.g. "backend/container/path" + } else { + return array( $parts[0], $parts[1], '' ); // e.g. "backend/container" + } + } + } + return array( null, null, null ); + } + + /** + * Normalize a storage path by cleaning up directory separators. + * Returns null if the path is not of the format of a valid storage path. + * + * @param $storagePath string + * @return string|null + */ + final public static function normalizeStoragePath( $storagePath ) { + list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath ); + if ( $relPath !== null ) { // must be for this backend + $relPath = self::normalizeContainerPath( $relPath ); + if ( $relPath !== null ) { + return ( $relPath != '' ) + ? "mwstore://{$backend}/{$container}/{$relPath}" + : "mwstore://{$backend}/{$container}"; + } + } + return null; + } + + /** + * Validate and normalize a relative storage path. + * Null is returned if the path involves directory traversal. + * Traversal is insecure for FS backends and broken for others. + * + * @param $path string Storage path relative to a container + * @return string|null + */ + final protected static function normalizeContainerPath( $path ) { + // Normalize directory separators + $path = strtr( $path, '\\', '/' ); + // Collapse any consecutive directory separators + $path = preg_replace( '![/]{2,}!', '/', $path ); + // Remove any leading directory separator + $path = ltrim( $path, '/' ); + // Use the same traversal protection as Title::secureAndSplit() + if ( strpos( $path, '.' ) !== false ) { + if ( + $path === '.' || + $path === '..' || + strpos( $path, './' ) === 0 || + strpos( $path, '../' ) === 0 || + strpos( $path, '/./' ) !== false || + strpos( $path, '/../' ) !== false + ) { + return null; + } + } + return $path; + } + + /** + * Get the parent storage directory of a storage path. + * This returns a path like "mwstore://backend/container", + * "mwstore://backend/container/...", or null if there is no parent. + * + * @param $storagePath string + * @return string|null + */ + final public static function parentStoragePath( $storagePath ) { + $storagePath = dirname( $storagePath ); + list( $b, $cont, $rel ) = self::splitStoragePath( $storagePath ); + return ( $rel === null ) ? null : $storagePath; + } + + /** + * Get the final extension from a storage or FS path + * + * @param $path string + * @return string + */ + final public static function extensionFromPath( $path ) { + $i = strrpos( $path, '.' ); + return strtolower( $i ? substr( $path, $i + 1 ) : '' ); + } +} + +/** + * @brief Base class for all backends associated with a particular storage medium. + * + * This class defines the methods as abstract that subclasses must implement. + * Outside callers should *not* use functions with "Internal" in the name. + * + * The FileBackend operations are implemented using basic functions + * such as storeInternal(), copyInternal(), deleteInternal() and the like. + * This class is also responsible for path resolution and sanitization. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileBackendStore extends FileBackend { + /** @var Array Map of paths to small (RAM/disk) cache items */ + protected $cache = array(); // (storage path => key => value) + protected $maxCacheSize = 100; // integer; max paths with entries + /** @var Array Map of paths to large (RAM/disk) cache items */ + protected $expensiveCache = array(); // (storage path => key => value) + protected $maxExpensiveCacheSize = 10; // integer; max paths with entries + + /** @var Array Map of container names to sharding settings */ + protected $shardViaHashLevels = array(); // (container name => config array) + + protected $maxFileSize = 1000000000; // integer bytes (1GB) + + /** + * Get the maximum allowable file size given backend + * medium restrictions and basic performance constraints. + * Do not call this function from places outside FileBackend and FileOp. + * + * @return integer Bytes + */ + final public function maxFileSizeInternal() { + return $this->maxFileSize; + } + + /** + * Check if a file can be created at a given storage path. + * FS backends should check if the parent directory exists and the file is writable. + * Backends using key/value stores should check if the container exists. + * + * @param $storagePath string + * @return bool + */ + abstract public function isPathUsableInternal( $storagePath ); + + /** + * Create a file in the backend with the given contents. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * content : the raw file contents + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function createInternal( array $params ) { + wfProfileIn( __METHOD__ ); + if ( strlen( $params['content'] ) > $this->maxFileSizeInternal() ) { + $status = Status::newFatal( 'backend-fail-create', $params['dst'] ); + } else { + $status = $this->doCreateInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + } + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::createInternal() + */ + abstract protected function doCreateInternal( array $params ); + + /** + * Store a file into the backend from a file on disk. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source path on disk + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function storeInternal( array $params ) { + wfProfileIn( __METHOD__ ); + if ( filesize( $params['src'] ) > $this->maxFileSizeInternal() ) { + $status = Status::newFatal( 'backend-fail-store', $params['dst'] ); + } else { + $status = $this->doStoreInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + } + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::storeInternal() + */ + abstract protected function doStoreInternal( array $params ); + + /** + * Copy a file from one storage path to another in the backend. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source storage path + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function copyInternal( array $params ) { + wfProfileIn( __METHOD__ ); + $status = $this->doCopyInternal( $params ); + $this->clearCache( array( $params['dst'] ) ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::copyInternal() + */ + abstract protected function doCopyInternal( array $params ); + + /** + * Delete a file at the storage path. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source storage path + * ignoreMissingSource : do nothing if the source file does not exist + * + * @param $params Array + * @return Status + */ + final public function deleteInternal( array $params ) { + wfProfileIn( __METHOD__ ); + $status = $this->doDeleteInternal( $params ); + $this->clearCache( array( $params['src'] ) ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::deleteInternal() + */ + abstract protected function doDeleteInternal( array $params ); + + /** + * Move a file from one storage path to another in the backend. + * Do not call this function from places outside FileBackend and FileOp. + * + * $params include: + * src : source storage path + * dst : destination storage path + * overwrite : overwrite any file that exists at the destination + * + * @param $params Array + * @return Status + */ + final public function moveInternal( array $params ) { + wfProfileIn( __METHOD__ ); + $status = $this->doMoveInternal( $params ); + $this->clearCache( array( $params['src'], $params['dst'] ) ); + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::moveInternal() + */ + protected function doMoveInternal( array $params ) { + // Copy source to dest + $status = $this->copyInternal( $params ); + if ( $status->isOK() ) { + // Delete source (only fails due to races or medium going down) + $status->merge( $this->deleteInternal( array( 'src' => $params['src'] ) ) ); + $status->setResult( true, $status->value ); // ignore delete() errors + } + return $status; + } + + /** + * @see FileBackend::concatenate() + */ + final public function concatenate( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + // Try to lock the source files for the scope of this function + $scopeLockS = $this->getScopedFileLocks( $params['srcs'], LockManager::LOCK_UW, $status ); + if ( $status->isOK() ) { + // Actually do the concatenation + $status->merge( $this->doConcatenate( $params ) ); + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::concatenate() + */ + protected function doConcatenate( array $params ) { + $status = Status::newGood(); + $tmpPath = $params['dst']; // convenience + + // Check that the specified temp file is valid... + wfSuppressWarnings(); + $ok = ( is_file( $tmpPath ) && !filesize( $tmpPath ) ); + wfRestoreWarnings(); + if ( !$ok ) { // not present or not empty + $status->fatal( 'backend-fail-opentemp', $tmpPath ); + return $status; + } + + // Build up the temp file using the source chunks (in order)... + $tmpHandle = fopen( $tmpPath, 'ab' ); + if ( $tmpHandle === false ) { + $status->fatal( 'backend-fail-opentemp', $tmpPath ); + return $status; + } + foreach ( $params['srcs'] as $virtualSource ) { + // Get a local FS version of the chunk + $tmpFile = $this->getLocalReference( array( 'src' => $virtualSource ) ); + if ( !$tmpFile ) { + $status->fatal( 'backend-fail-read', $virtualSource ); + return $status; + } + // Get a handle to the local FS version + $sourceHandle = fopen( $tmpFile->getPath(), 'r' ); + if ( $sourceHandle === false ) { + fclose( $tmpHandle ); + $status->fatal( 'backend-fail-read', $virtualSource ); + return $status; + } + // Append chunk to file (pass chunk size to avoid magic quotes) + if ( !stream_copy_to_stream( $sourceHandle, $tmpHandle ) ) { + fclose( $sourceHandle ); + fclose( $tmpHandle ); + $status->fatal( 'backend-fail-writetemp', $tmpPath ); + return $status; + } + fclose( $sourceHandle ); + } + if ( !fclose( $tmpHandle ) ) { + $status->fatal( 'backend-fail-closetemp', $tmpPath ); + return $status; + } + + clearstatcache(); // temp file changed + + return $status; + } + + /** + * @see FileBackend::doPrepare() + */ + final protected function doPrepare( array $params ) { + wfProfileIn( __METHOD__ ); + + $status = Status::newGood(); + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doPrepareInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doPrepareInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doPrepare() + */ + protected function doPrepareInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::doSecure() + */ + final protected function doSecure( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doSecureInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doSecureInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doSecure() + */ + protected function doSecureInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::doClean() + */ + final protected function doClean( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); + wfProfileOut( __METHOD__ ); + return $status; // invalid storage path + } + + // Attempt to lock this directory... + $filesLockEx = array( $params['dir'] ); + $scopedLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + wfProfileOut( __METHOD__ ); + return $status; // abort + } + + if ( $shard !== null ) { // confined to a single container/shard + $status->merge( $this->doCleanInternal( $fullCont, $dir, $params ) ); + } else { // directory is on several shards + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { + $status->merge( $this->doCleanInternal( "{$fullCont}{$suffix}", $dir, $params ) ); + } + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::doClean() + */ + protected function doCleanInternal( $container, $dir, array $params ) { + return Status::newGood(); + } + + /** + * @see FileBackend::fileExists() + */ + final public function fileExists( array $params ) { + wfProfileIn( __METHOD__ ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ ); + return ( $stat === null ) ? null : (bool)$stat; // null => failure + } + + /** + * @see FileBackend::getFileTimestamp() + */ + final public function getFileTimestamp( array $params ) { + wfProfileIn( __METHOD__ ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ ); + return $stat ? $stat['mtime'] : false; + } + + /** + * @see FileBackend::getFileSize() + */ + final public function getFileSize( array $params ) { + wfProfileIn( __METHOD__ ); + $stat = $this->getFileStat( $params ); + wfProfileOut( __METHOD__ ); + return $stat ? $stat['size'] : false; + } + + /** + * @see FileBackend::getFileStat() + */ + final public function getFileStat( array $params ) { + wfProfileIn( __METHOD__ ); + $path = self::normalizeStoragePath( $params['src'] ); + if ( $path === null ) { + return false; // invalid storage path + } + $latest = !empty( $params['latest'] ); + if ( isset( $this->cache[$path]['stat'] ) ) { + // If we want the latest data, check that this cached + // value was in fact fetched with the latest available data. + if ( !$latest || $this->cache[$path]['stat']['latest'] ) { + wfProfileOut( __METHOD__ ); + return $this->cache[$path]['stat']; + } + } + $stat = $this->doGetFileStat( $params ); + if ( is_array( $stat ) ) { // don't cache negatives + $this->trimCache(); // limit memory + $this->cache[$path]['stat'] = $stat; + $this->cache[$path]['stat']['latest'] = $latest; + } + wfProfileOut( __METHOD__ ); + return $stat; + } + + /** + * @see FileBackendStore::getFileStat() + */ + abstract protected function doGetFileStat( array $params ); + + /** + * @see FileBackend::getFileContents() + */ + public function getFileContents( array $params ) { + wfProfileIn( __METHOD__ ); + $tmpFile = $this->getLocalReference( $params ); + if ( !$tmpFile ) { + wfProfileOut( __METHOD__ ); + return false; + } + wfSuppressWarnings(); + $data = file_get_contents( $tmpFile->getPath() ); + wfRestoreWarnings(); + wfProfileOut( __METHOD__ ); + return $data; + } + + /** + * @see FileBackend::getFileSha1Base36() + */ + final public function getFileSha1Base36( array $params ) { + wfProfileIn( __METHOD__ ); + $path = $params['src']; + if ( isset( $this->cache[$path]['sha1'] ) ) { + wfProfileOut( __METHOD__ ); + return $this->cache[$path]['sha1']; + } + $hash = $this->doGetFileSha1Base36( $params ); + if ( $hash ) { // don't cache negatives + $this->trimCache(); // limit memory + $this->cache[$path]['sha1'] = $hash; + } + wfProfileOut( __METHOD__ ); + return $hash; + } + + /** + * @see FileBackendStore::getFileSha1Base36() + */ + protected function doGetFileSha1Base36( array $params ) { + $fsFile = $this->getLocalReference( $params ); + if ( !$fsFile ) { + return false; + } else { + return $fsFile->getSha1Base36(); + } + } + + /** + * @see FileBackend::getFileProps() + */ + final public function getFileProps( array $params ) { + wfProfileIn( __METHOD__ ); + $fsFile = $this->getLocalReference( $params ); + $props = $fsFile ? $fsFile->getProps() : FSFile::placeholderProps(); + wfProfileOut( __METHOD__ ); + return $props; + } + + /** + * @see FileBackend::getLocalReference() + */ + public function getLocalReference( array $params ) { + wfProfileIn( __METHOD__ ); + $path = $params['src']; + if ( isset( $this->expensiveCache[$path]['localRef'] ) ) { + wfProfileOut( __METHOD__ ); + return $this->expensiveCache[$path]['localRef']; + } + $tmpFile = $this->getLocalCopy( $params ); + if ( $tmpFile ) { // don't cache negatives + $this->trimExpensiveCache(); // limit memory + $this->expensiveCache[$path]['localRef'] = $tmpFile; + } + wfProfileOut( __METHOD__ ); + return $tmpFile; + } + + /** + * @see FileBackend::streamFile() + */ + final public function streamFile( array $params ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + $info = $this->getFileStat( $params ); + if ( !$info ) { // let StreamFile handle the 404 + $status->fatal( 'backend-fail-notexists', $params['src'] ); + } + + // Set output buffer and HTTP headers for stream + $extraHeaders = isset( $params['headers'] ) ? $params['headers'] : array(); + $res = StreamFile::prepareForStream( $params['src'], $info, $extraHeaders ); + if ( $res == StreamFile::NOT_MODIFIED ) { + // do nothing; client cache is up to date + } elseif ( $res == StreamFile::READY_STREAM ) { + $status = $this->doStreamFile( $params ); + } else { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackendStore::streamFile() + */ + protected function doStreamFile( array $params ) { + $status = Status::newGood(); + + $fsFile = $this->getLocalReference( $params ); + if ( !$fsFile ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } elseif ( !readfile( $fsFile->getPath() ) ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + } + + return $status; + } + + /** + * @copydoc FileBackend::getFileList() + */ + final public function getFileList( array $params ) { + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); + if ( $dir === null ) { // invalid storage path + return null; + } + if ( $shard !== null ) { + // File listing is confined to a single container/shard + return $this->getFileListInternal( $fullCont, $dir, $params ); + } else { + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); + // File listing spans multiple containers/shards + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); + return new FileBackendStoreShardListIterator( $this, + $fullCont, $dir, $this->getContainerSuffixes( $shortCont ), $params ); + } + } + + /** + * Do not call this function from places outside FileBackend + * + * @see FileBackendStore::getFileList() + * + * @param $container string Resolved container name + * @param $dir string Resolved path relative to container + * @param $params Array + * @return Traversable|Array|null + */ + abstract public function getFileListInternal( $container, $dir, array $params ); + + /** + * Get the list of supported operations and their corresponding FileOp classes. + * + * @return Array + */ + protected function supportedOperations() { + return array( + 'store' => 'StoreFileOp', + 'copy' => 'CopyFileOp', + 'move' => 'MoveFileOp', + 'delete' => 'DeleteFileOp', + 'create' => 'CreateFileOp', + 'null' => 'NullFileOp' + ); + } + + /** + * Return a list of FileOp objects from a list of operations. + * Do not call this function from places outside FileBackend. + * + * The result must have the same number of items as the input. + * An exception is thrown if an unsupported operation is requested. + * + * @param $ops Array Same format as doOperations() + * @return Array List of FileOp objects + * @throws MWException + */ + final public function getOperations( array $ops ) { + $supportedOps = $this->supportedOperations(); + + $performOps = array(); // array of FileOp objects + // Build up ordered array of FileOps... + foreach ( $ops as $operation ) { + $opName = $operation['op']; + if ( isset( $supportedOps[$opName] ) ) { + $class = $supportedOps[$opName]; + // Get params for this operation + $params = $operation; + // Append the FileOp class + $performOps[] = new $class( $this, $params ); + } else { + throw new MWException( "Operation `$opName` is not supported." ); + } + } + + return $performOps; + } + + /** + * @see FileBackend::doOperationsInternal() + */ + protected function doOperationsInternal( array $ops, array $opts ) { + wfProfileIn( __METHOD__ ); + $status = Status::newGood(); + + // Build up a list of FileOps... + $performOps = $this->getOperations( $ops ); + + // Acquire any locks as needed... + if ( empty( $opts['nonLocking'] ) ) { + // Build up a list of files to lock... + $filesLockEx = $filesLockSh = array(); + foreach ( $performOps as $fileOp ) { + $filesLockSh = array_merge( $filesLockSh, $fileOp->storagePathsRead() ); + $filesLockEx = array_merge( $filesLockEx, $fileOp->storagePathsChanged() ); + } + // Optimization: if doing an EX lock anyway, don't also set an SH one + $filesLockSh = array_diff( $filesLockSh, $filesLockEx ); + // Get a shared lock on the parent directory of each path changed + $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) ); + // Try to lock those files for the scope of this function... + $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status ); + $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + wfProfileOut( __METHOD__ ); + return $status; // abort + } + } + + // Clear any cache entries (after locks acquired) + $this->clearCache(); + + // Actually attempt the operation batch... + $subStatus = FileOp::attemptBatch( $performOps, $opts ); + + // Merge errors into status fields + $status->merge( $subStatus ); + $status->success = $subStatus->success; // not done in merge() + + wfProfileOut( __METHOD__ ); + return $status; + } + + /** + * @see FileBackend::clearCache() + */ + final public function clearCache( array $paths = null ) { + if ( is_array( $paths ) ) { + $paths = array_map( 'FileBackend::normalizeStoragePath', $paths ); + $paths = array_filter( $paths, 'strlen' ); // remove nulls + } + if ( $paths === null ) { + $this->cache = array(); + $this->expensiveCache = array(); + } else { + foreach ( $paths as $path ) { + unset( $this->cache[$path] ); + unset( $this->expensiveCache[$path] ); + } + } + $this->doClearCache( $paths ); + } + + /** + * Clears any additional stat caches for storage paths + * + * @see FileBackend::clearCache() + * + * @param $paths Array Storage paths (optional) + * @return void + */ + protected function doClearCache( array $paths = null ) {} + + /** + * Prune the inexpensive cache if it is too big to add an item + * + * @return void + */ + protected function trimCache() { + if ( count( $this->cache ) >= $this->maxCacheSize ) { + reset( $this->cache ); + unset( $this->cache[key( $this->cache )] ); + } + } + + /** + * Prune the expensive cache if it is too big to add an item + * + * @return void + */ + protected function trimExpensiveCache() { + if ( count( $this->expensiveCache ) >= $this->maxExpensiveCacheSize ) { + reset( $this->expensiveCache ); + unset( $this->expensiveCache[key( $this->expensiveCache )] ); + } + } + + /** + * Check if a container name is valid. + * This checks for for length and illegal characters. + * + * @param $container string + * @return bool + */ + final protected static function isValidContainerName( $container ) { + // This accounts for Swift and S3 restrictions while leaving room + // for things like '.xxx' (hex shard chars) or '.seg' (segments). + // This disallows directory separators or traversal characters. + // Note that matching strings URL encode to the same string; + // in Swift, the length restriction is *after* URL encoding. + return preg_match( '/^[a-z0-9][a-z0-9-_]{0,199}$/i', $container ); + } + + /** + * Splits a storage path into an internal container name, + * an internal relative file name, and a container shard suffix. + * Any shard suffix is already appended to the internal container name. + * This also checks that the storage path is valid and within this backend. + * + * If the container is sharded but a suffix could not be determined, + * this means that the path can only refer to a directory and can only + * be scanned by looking in all the container shards. + * + * @param $storagePath string + * @return Array (container, path, container suffix) or (null, null, null) if invalid + */ + final protected function resolveStoragePath( $storagePath ) { + list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath ); + if ( $backend === $this->name ) { // must be for this backend + $relPath = self::normalizeContainerPath( $relPath ); + if ( $relPath !== null ) { + // Get shard for the normalized path if this container is sharded + $cShard = $this->getContainerShard( $container, $relPath ); + // Validate and sanitize the relative path (backend-specific) + $relPath = $this->resolveContainerPath( $container, $relPath ); + if ( $relPath !== null ) { + // Prepend any wiki ID prefix to the container name + $container = $this->fullContainerName( $container ); + if ( self::isValidContainerName( $container ) ) { + // Validate and sanitize the container name (backend-specific) + $container = $this->resolveContainerName( "{$container}{$cShard}" ); + if ( $container !== null ) { + return array( $container, $relPath, $cShard ); + } + } + } + } + } + return array( null, null, null ); + } + + /** + * Like resolveStoragePath() except null values are returned if + * the container is sharded and the shard could not be determined. + * + * @see FileBackendStore::resolveStoragePath() + * + * @param $storagePath string + * @return Array (container, path) or (null, null) if invalid + */ + final protected function resolveStoragePathReal( $storagePath ) { + list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath ); + if ( $cShard !== null ) { + return array( $container, $relPath ); + } + return array( null, null ); + } + + /** + * Get the container name shard suffix for a given path. + * Any empty suffix means the container is not sharded. + * + * @param $container string Container name + * @param $relStoragePath string Storage path relative to the container + * @return string|null Returns null if shard could not be determined + */ + final protected function getContainerShard( $container, $relPath ) { + list( $levels, $base, $repeat ) = $this->getContainerHashLevels( $container ); + if ( $levels == 1 || $levels == 2 ) { + // Hash characters are either base 16 or 36 + $char = ( $base == 36 ) ? '[0-9a-z]' : '[0-9a-f]'; + // Get a regex that represents the shard portion of paths. + // The concatenation of the captures gives us the shard. + if ( $levels === 1 ) { // 16 or 36 shards per container + $hashDirRegex = '(' . $char . ')'; + } else { // 256 or 1296 shards per container + if ( $repeat ) { // verbose hash dir format (e.g. "a/ab/abc") + $hashDirRegex = $char . '/(' . $char . '{2})'; + } else { // short hash dir format (e.g. "a/b/c") + $hashDirRegex = '(' . $char . ')/(' . $char . ')'; + } + } + // Allow certain directories to be above the hash dirs so as + // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab"). + // They must be 2+ chars to avoid any hash directory ambiguity. + $m = array(); + if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) { + return '.' . implode( '', array_slice( $m, 1 ) ); + } + return null; // failed to match + } + return ''; // no sharding + } + + /** + * Get the sharding config for a container. + * If greater than 0, then all file storage paths within + * the container are required to be hashed accordingly. + * + * @param $container string + * @return Array (integer levels, integer base, repeat flag) or (0, 0, false) + */ + final protected function getContainerHashLevels( $container ) { + if ( isset( $this->shardViaHashLevels[$container] ) ) { + $config = $this->shardViaHashLevels[$container]; + $hashLevels = (int)$config['levels']; + if ( $hashLevels == 1 || $hashLevels == 2 ) { + $hashBase = (int)$config['base']; + if ( $hashBase == 16 || $hashBase == 36 ) { + return array( $hashLevels, $hashBase, $config['repeat'] ); + } + } + } + return array( 0, 0, false ); // no sharding + } + + /** + * Get a list of full container shard suffixes for a container + * + * @param $container string + * @return Array + */ + final protected function getContainerSuffixes( $container ) { + $shards = array(); + list( $digits, $base ) = $this->getContainerHashLevels( $container ); + if ( $digits > 0 ) { + $numShards = pow( $base, $digits ); + for ( $index = 0; $index < $numShards; $index++ ) { + $shards[] = '.' . wfBaseConvert( $index, 10, $base, $digits ); + } + } + return $shards; + } + + /** + * Get the full container name, including the wiki ID prefix + * + * @param $container string + * @return string + */ + final protected function fullContainerName( $container ) { + if ( $this->wikiId != '' ) { + return "{$this->wikiId}-$container"; + } else { + return $container; + } + } + + /** + * Resolve a container name, checking if it's allowed by the backend. + * This is intended for internal use, such as encoding illegal chars. + * Subclasses can override this to be more restrictive. + * + * @param $container string + * @return string|null + */ + protected function resolveContainerName( $container ) { + return $container; + } + + /** + * Resolve a relative storage path, checking if it's allowed by the backend. + * This is intended for internal use, such as encoding illegal chars or perhaps + * getting absolute paths (e.g. FS based backends). Note that the relative path + * may be the empty string (e.g. the path is simply to the container). + * + * @param $container string Container name + * @param $relStoragePath string Storage path relative to the container + * @return string|null Path or null if not valid + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + return $relStoragePath; + } +} + +/** + * FileBackendStore helper function to handle file listings that span container shards. + * Do not use this class from places outside of FileBackendStore. + * + * @ingroup FileBackend + */ +class FileBackendStoreShardListIterator implements Iterator { + /* @var FileBackendStore */ + protected $backend; + /* @var Array */ + protected $params; + /* @var Array */ + protected $shardSuffixes; + protected $container; // string + protected $directory; // string + + /* @var Traversable */ + protected $iter; + protected $curShard = 0; // integer + protected $pos = 0; // integer + + /** + * @param $backend FileBackendStore + * @param $container string Full storage container name + * @param $dir string Storage directory relative to container + * @param $suffixes Array List of container shard suffixes + * @param $params Array + */ + public function __construct( + FileBackendStore $backend, $container, $dir, array $suffixes, array $params + ) { + $this->backend = $backend; + $this->container = $container; + $this->directory = $dir; + $this->shardSuffixes = $suffixes; + $this->params = $params; + } + + public function current() { + if ( is_array( $this->iter ) ) { + return current( $this->iter ); + } else { + return $this->iter->current(); + } + } + + public function key() { + return $this->pos; + } + + public function next() { + ++$this->pos; + if ( is_array( $this->iter ) ) { + next( $this->iter ); + } else { + $this->iter->next(); + } + // Find the next non-empty shard if no elements are left + $this->nextShardIteratorIfNotValid(); + } + + /** + * If the iterator for this container shard is out of items, + * then move on to the next container that has items. + * If there are none, then it advances to the last container. + */ + protected function nextShardIteratorIfNotValid() { + while ( !$this->valid() ) { + if ( ++$this->curShard >= count( $this->shardSuffixes ) ) { + break; // no more container shards + } + $this->setIteratorFromCurrentShard(); + } + } + + protected function setIteratorFromCurrentShard() { + $suffix = $this->shardSuffixes[$this->curShard]; + $this->iter = $this->backend->getFileListInternal( + "{$this->container}{$suffix}", $this->directory, $this->params ); + } + + public function rewind() { + $this->pos = 0; + $this->curShard = 0; + $this->setIteratorFromCurrentShard(); + // Find the next non-empty shard if this one has no elements + $this->nextShardIteratorIfNotValid(); + } + + public function valid() { + if ( $this->iter == null ) { + return false; // some failure? + } elseif ( is_array( $this->iter ) ) { + return ( current( $this->iter ) !== false ); // no paths can have this value + } else { + return $this->iter->valid(); + } + } +} diff --git a/includes/filerepo/backend/FileBackendGroup.php b/includes/filerepo/backend/FileBackendGroup.php new file mode 100644 index 00000000..73815cfb --- /dev/null +++ b/includes/filerepo/backend/FileBackendGroup.php @@ -0,0 +1,156 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Class to handle file backend registration + * + * @ingroup FileBackend + * @since 1.19 + */ +class FileBackendGroup { + /** + * @var FileBackendGroup + */ + protected static $instance = null; + + /** @var Array (name => ('class' => string, 'config' => array, 'instance' => object)) */ + protected $backends = array(); + + protected function __construct() {} + protected function __clone() {} + + /** + * @return FileBackendGroup + */ + public static function singleton() { + if ( self::$instance == null ) { + self::$instance = new self(); + self::$instance->initFromGlobals(); + } + return self::$instance; + } + + /** + * Destroy the singleton instance + * + * @return void + */ + public static function destroySingleton() { + self::$instance = null; + } + + /** + * Register file backends from the global variables + * + * @return void + */ + protected function initFromGlobals() { + global $wgLocalFileRepo, $wgForeignFileRepos, $wgFileBackends; + + // Register explicitly defined backends + $this->register( $wgFileBackends ); + + $autoBackends = array(); + // Automatically create b/c backends for file repos... + $repos = array_merge( $wgForeignFileRepos, array( $wgLocalFileRepo ) ); + foreach ( $repos as $info ) { + $backendName = $info['backend']; + if ( is_object( $backendName ) || isset( $this->backends[$backendName] ) ) { + continue; // already defined (or set to the object for some reason) + } + $repoName = $info['name']; + // Local vars that used to be FSRepo members... + $directory = $info['directory']; + $deletedDir = isset( $info['deletedDir'] ) + ? $info['deletedDir'] + : false; // deletion disabled + $thumbDir = isset( $info['thumbDir'] ) + ? $info['thumbDir'] + : "{$directory}/thumb"; + $fileMode = isset( $info['fileMode'] ) + ? $info['fileMode'] + : 0644; + // Get the FS backend configuration + $autoBackends[] = array( + 'name' => $backendName, + 'class' => 'FSFileBackend', + 'lockManager' => 'fsLockManager', + 'containerPaths' => array( + "{$repoName}-public" => "{$directory}", + "{$repoName}-thumb" => $thumbDir, + "{$repoName}-deleted" => $deletedDir, + "{$repoName}-temp" => "{$directory}/temp" + ), + 'fileMode' => $fileMode, + ); + } + + // Register implicitly defined backends + $this->register( $autoBackends ); + } + + /** + * Register an array of file backend configurations + * + * @param $configs Array + * @return void + * @throws MWException + */ + protected function register( array $configs ) { + foreach ( $configs as $config ) { + if ( !isset( $config['name'] ) ) { + throw new MWException( "Cannot register a backend with no name." ); + } + $name = $config['name']; + if ( !isset( $config['class'] ) ) { + throw new MWException( "Cannot register backend `{$name}` with no class." ); + } + $class = $config['class']; + + unset( $config['class'] ); // backend won't need this + $this->backends[$name] = array( + 'class' => $class, + 'config' => $config, + 'instance' => null + ); + } + } + + /** + * Get the backend object with a given name + * + * @param $name string + * @return FileBackend + * @throws MWException + */ + public function get( $name ) { + if ( !isset( $this->backends[$name] ) ) { + throw new MWException( "No backend defined with the name `$name`." ); + } + // Lazy-load the actual backend instance + if ( !isset( $this->backends[$name]['instance'] ) ) { + $class = $this->backends[$name]['class']; + $config = $this->backends[$name]['config']; + $this->backends[$name]['instance'] = new $class( $config ); + } + return $this->backends[$name]['instance']; + } + + /** + * Get an appropriate backend object from a storage path + * + * @param $storagePath string + * @return FileBackend|null Backend or null on failure + */ + public function backendFromPath( $storagePath ) { + list( $backend, $c, $p ) = FileBackend::splitStoragePath( $storagePath ); + if ( $backend !== null && isset( $this->backends[$backend] ) ) { + return $this->get( $backend ); + } + return null; + } +} diff --git a/includes/filerepo/backend/FileBackendMultiWrite.php b/includes/filerepo/backend/FileBackendMultiWrite.php new file mode 100644 index 00000000..c0f1ac57 --- /dev/null +++ b/includes/filerepo/backend/FileBackendMultiWrite.php @@ -0,0 +1,420 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * This class defines a multi-write backend. Multiple backends can be + * registered to this proxy backend and it will act as a single backend. + * Use this when all access to those backends is through this proxy backend. + * At least one of the backends must be declared the "master" backend. + * + * Only use this class when transitioning from one storage system to another. + * + * Read operations are only done on the 'master' backend for consistency. + * Write operations are performed on all backends, in the order defined. + * If an operation fails on one backend it will be rolled back from the others. + * + * @ingroup FileBackend + * @since 1.19 + */ +class FileBackendMultiWrite extends FileBackend { + /** @var Array Prioritized list of FileBackendStore objects */ + protected $backends = array(); // array of (backend index => backends) + protected $masterIndex = -1; // integer; index of master backend + protected $syncChecks = 0; // integer bitfield + + /* Possible internal backend consistency checks */ + const CHECK_SIZE = 1; + const CHECK_TIME = 2; + + /** + * Construct a proxy backend that consists of several internal backends. + * Additional $config params include: + * 'backends' : Array of backend config and multi-backend settings. + * Each value is the config used in the constructor of a + * FileBackendStore class, but with these additional settings: + * 'class' : The name of the backend class + * 'isMultiMaster' : This must be set for one backend. + * 'syncChecks' : Integer bitfield of internal backend sync checks to perform. + * Possible bits include self::CHECK_SIZE and self::CHECK_TIME. + * The checks are done before allowing any file operations. + * @param $config Array + */ + public function __construct( array $config ) { + parent::__construct( $config ); + $namesUsed = array(); + // Construct backends here rather than via registration + // to keep these backends hidden from outside the proxy. + foreach ( $config['backends'] as $index => $config ) { + $name = $config['name']; + if ( isset( $namesUsed[$name] ) ) { // don't break FileOp predicates + throw new MWException( "Two or more backends defined with the name $name." ); + } + $namesUsed[$name] = 1; + if ( !isset( $config['class'] ) ) { + throw new MWException( 'No class given for a backend config.' ); + } + $class = $config['class']; + $this->backends[$index] = new $class( $config ); + if ( !empty( $config['isMultiMaster'] ) ) { + if ( $this->masterIndex >= 0 ) { + throw new MWException( 'More than one master backend defined.' ); + } + $this->masterIndex = $index; + } + } + if ( $this->masterIndex < 0 ) { // need backends and must have a master + throw new MWException( 'No master backend defined.' ); + } + $this->syncChecks = isset( $config['syncChecks'] ) + ? $config['syncChecks'] + : self::CHECK_SIZE; + } + + /** + * @see FileBackend::doOperationsInternal() + */ + final protected function doOperationsInternal( array $ops, array $opts ) { + $status = Status::newGood(); + + $performOps = array(); // list of FileOp objects + $filesRead = $filesChanged = array(); // storage paths used + // Build up a list of FileOps. The list will have all the ops + // for one backend, then all the ops for the next, and so on. + // These batches of ops are all part of a continuous array. + // Also build up a list of files read/changed... + foreach ( $this->backends as $index => $backend ) { + $backendOps = $this->substOpBatchPaths( $ops, $backend ); + // Add on the operation batch for this backend + $performOps = array_merge( $performOps, $backend->getOperations( $backendOps ) ); + if ( $index == 0 ) { // first batch + // Get the files used for these operations. Each backend has a batch of + // the same operations, so we only need to get them from the first batch. + foreach ( $performOps as $fileOp ) { + $filesRead = array_merge( $filesRead, $fileOp->storagePathsRead() ); + $filesChanged = array_merge( $filesChanged, $fileOp->storagePathsChanged() ); + } + // Get the paths under the proxy backend's name + $filesRead = $this->unsubstPaths( $filesRead ); + $filesChanged = $this->unsubstPaths( $filesChanged ); + } + } + + // Try to lock those files for the scope of this function... + if ( empty( $opts['nonLocking'] ) ) { + $filesLockSh = array_diff( $filesRead, $filesChanged ); // optimization + $filesLockEx = $filesChanged; + // Get a shared lock on the parent directory of each path changed + $filesLockSh = array_merge( $filesLockSh, array_map( 'dirname', $filesLockEx ) ); + // Try to lock those files for the scope of this function... + $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status ); + $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status ); + if ( !$status->isOK() ) { + return $status; // abort + } + } + + // Clear any cache entries (after locks acquired) + $this->clearCache(); + + // Do a consistency check to see if the backends agree + if ( count( $this->backends ) > 1 ) { + $status->merge( $this->consistencyCheck( array_merge( $filesRead, $filesChanged ) ) ); + if ( !$status->isOK() ) { + return $status; // abort + } + } + + // Actually attempt the operation batch... + $subStatus = FileOp::attemptBatch( $performOps, $opts ); + + $success = array(); + $failCount = $successCount = 0; + // Make 'success', 'successCount', and 'failCount' fields reflect + // the overall operation, rather than all the batches for each backend. + // Do this by only using success values from the master backend's batch. + $batchStart = $this->masterIndex * count( $ops ); + $batchEnd = $batchStart + count( $ops ) - 1; + for ( $i = $batchStart; $i <= $batchEnd; $i++ ) { + if ( !isset( $subStatus->success[$i] ) ) { + break; // failed out before trying this op + } elseif ( $subStatus->success[$i] ) { + ++$successCount; + } else { + ++$failCount; + } + $success[] = $subStatus->success[$i]; + } + $subStatus->success = $success; + $subStatus->successCount = $successCount; + $subStatus->failCount = $failCount; + + // Merge errors into status fields + $status->merge( $subStatus ); + $status->success = $subStatus->success; // not done in merge() + + return $status; + } + + /** + * Check that a set of files are consistent across all internal backends + * + * @param $paths Array + * @return Status + */ + public function consistencyCheck( array $paths ) { + $status = Status::newGood(); + if ( $this->syncChecks == 0 ) { + return $status; // skip checks + } + + $mBackend = $this->backends[$this->masterIndex]; + foreach ( array_unique( $paths ) as $path ) { + $params = array( 'src' => $path, 'latest' => true ); + // Stat the file on the 'master' backend + $mStat = $mBackend->getFileStat( $this->substOpPaths( $params, $mBackend ) ); + // Check of all clone backends agree with the master... + foreach ( $this->backends as $index => $cBackend ) { + if ( $index === $this->masterIndex ) { + continue; // master + } + $cStat = $cBackend->getFileStat( $this->substOpPaths( $params, $cBackend ) ); + if ( $mStat ) { // file is in master + if ( !$cStat ) { // file should exist + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + if ( $this->syncChecks & self::CHECK_SIZE ) { + if ( $cStat['size'] != $mStat['size'] ) { // wrong size + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + } + if ( $this->syncChecks & self::CHECK_TIME ) { + $mTs = wfTimestamp( TS_UNIX, $mStat['mtime'] ); + $cTs = wfTimestamp( TS_UNIX, $cStat['mtime'] ); + if ( abs( $mTs - $cTs ) > 30 ) { // outdated file somewhere + $status->fatal( 'backend-fail-synced', $path ); + continue; + } + } + } else { // file is not in master + if ( $cStat ) { // file should not exist + $status->fatal( 'backend-fail-synced', $path ); + } + } + } + } + + return $status; + } + + /** + * Substitute the backend name in storage path parameters + * for a set of operations with that of a given internal backend. + * + * @param $ops Array List of file operation arrays + * @param $backend FileBackendStore + * @return Array + */ + protected function substOpBatchPaths( array $ops, FileBackendStore $backend ) { + $newOps = array(); // operations + foreach ( $ops as $op ) { + $newOp = $op; // operation + foreach ( array( 'src', 'srcs', 'dst', 'dir' ) as $par ) { + if ( isset( $newOp[$par] ) ) { // string or array + $newOp[$par] = $this->substPaths( $newOp[$par], $backend ); + } + } + $newOps[] = $newOp; + } + return $newOps; + } + + /** + * Same as substOpBatchPaths() but for a single operation + * + * @param $op File operation array + * @param $backend FileBackendStore + * @return Array + */ + protected function substOpPaths( array $ops, FileBackendStore $backend ) { + $newOps = $this->substOpBatchPaths( array( $ops ), $backend ); + return $newOps[0]; + } + + /** + * Substitute the backend of storage paths with an internal backend's name + * + * @param $paths Array|string List of paths or single string path + * @param $backend FileBackendStore + * @return Array|string + */ + protected function substPaths( $paths, FileBackendStore $backend ) { + return preg_replace( + '!^mwstore://' . preg_quote( $this->name ) . '/!', + StringUtils::escapeRegexReplacement( "mwstore://{$backend->getName()}/" ), + $paths // string or array + ); + } + + /** + * Substitute the backend of internal storage paths with the proxy backend's name + * + * @param $paths Array|string List of paths or single string path + * @return Array|string + */ + protected function unsubstPaths( $paths ) { + return preg_replace( + '!^mwstore://([^/]+)!', + StringUtils::escapeRegexReplacement( "mwstore://{$this->name}" ), + $paths // string or array + ); + } + + /** + * @see FileBackend::doPrepare() + */ + public function doPrepare( array $params ) { + $status = Status::newGood(); + foreach ( $this->backends as $backend ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doPrepare( $realParams ) ); + } + return $status; + } + + /** + * @see FileBackend::doSecure() + */ + public function doSecure( array $params ) { + $status = Status::newGood(); + foreach ( $this->backends as $backend ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doSecure( $realParams ) ); + } + return $status; + } + + /** + * @see FileBackend::doClean() + */ + public function doClean( array $params ) { + $status = Status::newGood(); + foreach ( $this->backends as $backend ) { + $realParams = $this->substOpPaths( $params, $backend ); + $status->merge( $backend->doClean( $realParams ) ); + } + return $status; + } + + /** + * @see FileBackend::getFileList() + */ + public function concatenate( array $params ) { + // We are writing to an FS file, so we don't need to do this per-backend + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->concatenate( $realParams ); + } + + /** + * @see FileBackend::fileExists() + */ + public function fileExists( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->fileExists( $realParams ); + } + + /** + * @see FileBackend::getFileTimestamp() + */ + public function getFileTimestamp( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileTimestamp( $realParams ); + } + + /** + * @see FileBackend::getFileSize() + */ + public function getFileSize( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileSize( $realParams ); + } + + /** + * @see FileBackend::getFileStat() + */ + public function getFileStat( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileStat( $realParams ); + } + + /** + * @see FileBackend::getFileContents() + */ + public function getFileContents( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileContents( $realParams ); + } + + /** + * @see FileBackend::getFileSha1Base36() + */ + public function getFileSha1Base36( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileSha1Base36( $realParams ); + } + + /** + * @see FileBackend::getFileProps() + */ + public function getFileProps( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileProps( $realParams ); + } + + /** + * @see FileBackend::streamFile() + */ + public function streamFile( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->streamFile( $realParams ); + } + + /** + * @see FileBackend::getLocalReference() + */ + public function getLocalReference( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getLocalReference( $realParams ); + } + + /** + * @see FileBackend::getLocalCopy() + */ + public function getLocalCopy( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getLocalCopy( $realParams ); + } + + /** + * @see FileBackend::getFileList() + */ + public function getFileList( array $params ) { + $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] ); + return $this->backends[$this->masterIndex]->getFileList( $realParams ); + } + + /** + * @see FileBackend::clearCache() + */ + public function clearCache( array $paths = null ) { + foreach ( $this->backends as $backend ) { + $realPaths = is_array( $paths ) ? $this->substPaths( $paths, $backend ) : null; + $backend->clearCache( $realPaths ); + } + } +} diff --git a/includes/filerepo/backend/FileOp.php b/includes/filerepo/backend/FileOp.php new file mode 100644 index 00000000..5844c9f2 --- /dev/null +++ b/includes/filerepo/backend/FileOp.php @@ -0,0 +1,697 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Aaron Schulz + */ + +/** + * Helper class for representing operations with transaction support. + * Do not use this class from places outside FileBackend. + * + * Methods called from attemptBatch() should avoid throwing exceptions at all costs. + * FileOp objects should be lightweight in order to support large arrays in memory. + * + * @ingroup FileBackend + * @since 1.19 + */ +abstract class FileOp { + /** @var Array */ + protected $params = array(); + /** @var FileBackendStore */ + protected $backend; + + protected $state = self::STATE_NEW; // integer + protected $failed = false; // boolean + protected $useLatest = true; // boolean + + protected $sourceSha1; // string + protected $destSameAsSource; // boolean + + /* Object life-cycle */ + const STATE_NEW = 1; + const STATE_CHECKED = 2; + const STATE_ATTEMPTED = 3; + + /* Timeout related parameters */ + const MAX_BATCH_SIZE = 1000; + const TIME_LIMIT_SEC = 300; // 5 minutes + + /** + * Build a new file operation transaction + * + * @params $backend FileBackendStore + * @params $params Array + * @throws MWException + */ + final public function __construct( FileBackendStore $backend, array $params ) { + $this->backend = $backend; + list( $required, $optional ) = $this->allowedParams(); + foreach ( $required as $name ) { + if ( isset( $params[$name] ) ) { + $this->params[$name] = $params[$name]; + } else { + throw new MWException( "File operation missing parameter '$name'." ); + } + } + foreach ( $optional as $name ) { + if ( isset( $params[$name] ) ) { + $this->params[$name] = $params[$name]; + } + } + $this->params = $params; + } + + /** + * Allow stale data for file reads and existence checks + * + * @return void + */ + final protected function allowStaleReads() { + $this->useLatest = false; + } + + /** + * Attempt a series of file operations. + * Callers are responsible for handling file locking. + * + * $opts is an array of options, including: + * 'force' : Errors that would normally cause a rollback do not. + * The remaining operations are still attempted if any fail. + * 'allowStale' : Don't require the latest available data. + * This can increase performance for non-critical writes. + * This has no effect unless the 'force' flag is set. + * + * The resulting Status will be "OK" unless: + * a) unexpected operation errors occurred (network partitions, disk full...) + * b) significant operation errors occured and 'force' was not set + * + * @param $performOps Array List of FileOp operations + * @param $opts Array Batch operation options + * @return Status + */ + final public static function attemptBatch( array $performOps, array $opts ) { + $status = Status::newGood(); + + $allowStale = !empty( $opts['allowStale'] ); + $ignoreErrors = !empty( $opts['force'] ); + + $n = count( $performOps ); + if ( $n > self::MAX_BATCH_SIZE ) { + $status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE ); + return $status; + } + + $predicates = FileOp::newPredicates(); // account for previous op in prechecks + // Do pre-checks for each operation; abort on failure... + foreach ( $performOps as $index => $fileOp ) { + if ( $allowStale ) { + $fileOp->allowStaleReads(); // allow potentially stale reads + } + $subStatus = $fileOp->precheck( $predicates ); + $status->merge( $subStatus ); + if ( !$subStatus->isOK() ) { // operation failed? + $status->success[$index] = false; + ++$status->failCount; + if ( !$ignoreErrors ) { + return $status; // abort + } + } + } + + if ( $ignoreErrors ) { + # Treat all precheck() fatals as merely warnings + $status->setResult( true, $status->value ); + } + + // Restart PHP's execution timer and set the timeout to safe amount. + // This handles cases where the operations take a long time or where we are + // already running low on time left. The old timeout is restored afterwards. + # @TODO: re-enable this for when the number of batches is high. + #$scopedTimeLimit = new FileOpScopedPHPTimeout( self::TIME_LIMIT_SEC ); + + // Attempt each operation... + foreach ( $performOps as $index => $fileOp ) { + if ( $fileOp->failed() ) { + continue; // nothing to do + } + $subStatus = $fileOp->attempt(); + $status->merge( $subStatus ); + if ( $subStatus->isOK() ) { + $status->success[$index] = true; + ++$status->successCount; + } else { + $status->success[$index] = false; + ++$status->failCount; + // We can't continue (even with $ignoreErrors) as $predicates is wrong. + // Log the remaining ops as failed for recovery... + for ( $i = ($index + 1); $i < count( $performOps ); $i++ ) { + $performOps[$i]->logFailure( 'attempt_aborted' ); + } + return $status; // bail out + } + } + + return $status; + } + + /** + * Get the value of the parameter with the given name + * + * @param $name string + * @return mixed Returns null if the parameter is not set + */ + final public function getParam( $name ) { + return isset( $this->params[$name] ) ? $this->params[$name] : null; + } + + /** + * Check if this operation failed precheck() or attempt() + * + * @return bool + */ + final public function failed() { + return $this->failed; + } + + /** + * Get a new empty predicates array for precheck() + * + * @return Array + */ + final public static function newPredicates() { + return array( 'exists' => array(), 'sha1' => array() ); + } + + /** + * Check preconditions of the operation without writing anything + * + * @param $predicates Array + * @return Status + */ + final public function precheck( array &$predicates ) { + if ( $this->state !== self::STATE_NEW ) { + return Status::newFatal( 'fileop-fail-state', self::STATE_NEW, $this->state ); + } + $this->state = self::STATE_CHECKED; + $status = $this->doPrecheck( $predicates ); + if ( !$status->isOK() ) { + $this->failed = true; + } + return $status; + } + + /** + * Attempt the operation, backing up files as needed; this must be reversible + * + * @return Status + */ + final public function attempt() { + if ( $this->state !== self::STATE_CHECKED ) { + return Status::newFatal( 'fileop-fail-state', self::STATE_CHECKED, $this->state ); + } elseif ( $this->failed ) { // failed precheck + return Status::newFatal( 'fileop-fail-attempt-precheck' ); + } + $this->state = self::STATE_ATTEMPTED; + $status = $this->doAttempt(); + if ( !$status->isOK() ) { + $this->failed = true; + $this->logFailure( 'attempt' ); + } + return $status; + } + + /** + * Get the file operation parameters + * + * @return Array (required params list, optional params list) + */ + protected function allowedParams() { + return array( array(), array() ); + } + + /** + * Get a list of storage paths read from for this operation + * + * @return Array + */ + public function storagePathsRead() { + return array(); + } + + /** + * Get a list of storage paths written to for this operation + * + * @return Array + */ + public function storagePathsChanged() { + return array(); + } + + /** + * @return Status + */ + protected function doPrecheck( array &$predicates ) { + return Status::newGood(); + } + + /** + * @return Status + */ + protected function doAttempt() { + return Status::newGood(); + } + + /** + * Check for errors with regards to the destination file already existing. + * This also updates the destSameAsSource and sourceSha1 member variables. + * A bad status will be returned if there is no chance it can be overwritten. + * + * @param $predicates Array + * @return Status + */ + protected function precheckDestExistence( array $predicates ) { + $status = Status::newGood(); + // Get hash of source file/string and the destination file + $this->sourceSha1 = $this->getSourceSha1Base36(); // FS file or data string + if ( $this->sourceSha1 === null ) { // file in storage? + $this->sourceSha1 = $this->fileSha1( $this->params['src'], $predicates ); + } + $this->destSameAsSource = false; + if ( $this->fileExists( $this->params['dst'], $predicates ) ) { + if ( $this->getParam( 'overwrite' ) ) { + return $status; // OK + } elseif ( $this->getParam( 'overwriteSame' ) ) { + $dhash = $this->fileSha1( $this->params['dst'], $predicates ); + // Check if hashes are valid and match each other... + if ( !strlen( $this->sourceSha1 ) || !strlen( $dhash ) ) { + $status->fatal( 'backend-fail-hashes' ); + } elseif ( $this->sourceSha1 !== $dhash ) { + // Give an error if the files are not identical + $status->fatal( 'backend-fail-notsame', $this->params['dst'] ); + } else { + $this->destSameAsSource = true; // OK + } + return $status; // do nothing; either OK or bad status + } else { + $status->fatal( 'backend-fail-alreadyexists', $this->params['dst'] ); + return $status; + } + } + return $status; + } + + /** + * precheckDestExistence() helper function to get the source file SHA-1. + * Subclasses should overwride this iff the source is not in storage. + * + * @return string|false Returns false on failure + */ + protected function getSourceSha1Base36() { + return null; // N/A + } + + /** + * Check if a file will exist in storage when this operation is attempted + * + * @param $source string Storage path + * @param $predicates Array + * @return bool + */ + final protected function fileExists( $source, array $predicates ) { + if ( isset( $predicates['exists'][$source] ) ) { + return $predicates['exists'][$source]; // previous op assures this + } else { + $params = array( 'src' => $source, 'latest' => $this->useLatest ); + return $this->backend->fileExists( $params ); + } + } + + /** + * Get the SHA-1 of a file in storage when this operation is attempted + * + * @param $source string Storage path + * @param $predicates Array + * @return string|false + */ + final protected function fileSha1( $source, array $predicates ) { + if ( isset( $predicates['sha1'][$source] ) ) { + return $predicates['sha1'][$source]; // previous op assures this + } else { + $params = array( 'src' => $source, 'latest' => $this->useLatest ); + return $this->backend->getFileSha1Base36( $params ); + } + } + + /** + * Log a file operation failure and preserve any temp files + * + * @param $action string + * @return void + */ + final protected function logFailure( $action ) { + $params = $this->params; + $params['failedAction'] = $action; + try { + wfDebugLog( 'FileOperation', + get_class( $this ) . ' failed:' . serialize( $params ) ); + } catch ( Exception $e ) { + // bad config? debug log error? + } + } +} + +/** + * FileOp helper class to expand PHP execution time for a function. + * On construction, set_time_limit() is called and set to $seconds. + * When the object goes out of scope, the timer is restarted, with + * the original time limit minus the time the object existed. + */ +class FileOpScopedPHPTimeout { + protected $startTime; // float; seconds + protected $oldTimeout; // integer; seconds + + protected static $stackDepth = 0; // integer + protected static $totalCalls = 0; // integer + protected static $totalElapsed = 0; // float; seconds + + /* Prevent callers in infinite loops from running forever */ + const MAX_TOTAL_CALLS = 1000000; + const MAX_TOTAL_TIME = 300; // seconds + + /** + * @param $seconds integer + */ + public function __construct( $seconds ) { + if ( ini_get( 'max_execution_time' ) > 0 ) { // CLI uses 0 + if ( self::$totalCalls >= self::MAX_TOTAL_CALLS ) { + trigger_error( "Maximum invocations of " . __CLASS__ . " exceeded." ); + } elseif ( self::$totalElapsed >= self::MAX_TOTAL_TIME ) { + trigger_error( "Time limit within invocations of " . __CLASS__ . " exceeded." ); + } elseif ( self::$stackDepth > 0 ) { // recursion guard + trigger_error( "Resursive invocation of " . __CLASS__ . " attempted." ); + } else { + $this->oldTimeout = ini_set( 'max_execution_time', $seconds ); + $this->startTime = microtime( true ); + ++self::$stackDepth; + ++self::$totalCalls; // proof against < 1us scopes + } + } + } + + /** + * Restore the original timeout. + * This does not account for the timer value on __construct(). + */ + public function __destruct() { + if ( $this->oldTimeout ) { + $elapsed = microtime( true ) - $this->startTime; + // Note: a limit of 0 is treated as "forever" + set_time_limit( max( 1, $this->oldTimeout - (int)$elapsed ) ); + // If each scoped timeout is for less than one second, we end up + // restoring the original timeout without any decrease in value. + // Thus web scripts in an infinite loop can run forever unless we + // take some measures to prevent this. Track total time and calls. + self::$totalElapsed += $elapsed; + --self::$stackDepth; + } + } +} + +/** + * Store a file into the backend from a file on the file system. + * Parameters similar to FileBackendStore::storeInternal(), which include: + * src : source path on file system + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class StoreFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists on the file system + if ( !is_file( $this->params['src'] ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if the source file is too big + } elseif ( filesize( $this->params['src'] ) > $this->backend->maxFileSizeInternal() ) { + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-store', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Store the file at the destination + if ( !$this->destSameAsSource ) { + $status->merge( $this->backend->storeInternal( $this->params ) ); + } + return $status; + } + + protected function getSourceSha1Base36() { + wfSuppressWarnings(); + $hash = sha1_file( $this->params['src'] ); + wfRestoreWarnings(); + if ( $hash !== false ) { + $hash = wfBaseConvert( $hash, 16, 36, 31 ); + } + return $hash; + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Create a file in the backend with the given content. + * Parameters similar to FileBackendStore::createInternal(), which include: + * content : the raw file contents + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class CreateFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'content', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source data is too big + if ( strlen( $this->getParam( 'content' ) ) > $this->backend->maxFileSizeInternal() ) { + $status->fatal( 'backend-fail-create', $this->params['dst'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-create', $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Create the file at the destination + if ( !$this->destSameAsSource ) { + $status->merge( $this->backend->createInternal( $this->params ) ); + } + return $status; + } + + protected function getSourceSha1Base36() { + return wfBaseConvert( sha1( $this->params['content'] ), 16, 36, 31 ); + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Copy a file from one storage path to another in the backend. + * Parameters similar to FileBackendStore::copyInternal(), which include: + * src : source storage path + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class CopyFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-copy', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Do nothing if the src/dst paths are the same + if ( $this->params['src'] !== $this->params['dst'] ) { + // Copy the file into the destination + if ( !$this->destSameAsSource ) { + $status->merge( $this->backend->copyInternal( $this->params ) ); + } + } + return $status; + } + + public function storagePathsRead() { + return array( $this->params['src'] ); + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Move a file from one storage path to another in the backend. + * Parameters similar to FileBackendStore::moveInternal(), which include: + * src : source storage path + * dst : destination storage path + * overwrite : do nothing and pass if an identical file exists at destination + * overwriteSame : override any existing file at destination + */ +class MoveFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src', 'dst' ), array( 'overwrite', 'overwriteSame' ) ); + } + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + // Check if a file can be placed at the destination + } elseif ( !$this->backend->isPathUsableInternal( $this->params['dst'] ) ) { + $status->fatal( 'backend-fail-move', $this->params['src'], $this->params['dst'] ); + return $status; + } + // Check if destination file exists + $status->merge( $this->precheckDestExistence( $predicates ) ); + if ( $status->isOK() ) { + // Update file existence predicates + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + $predicates['exists'][$this->params['dst']] = true; + $predicates['sha1'][$this->params['dst']] = $this->sourceSha1; + } + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + // Do nothing if the src/dst paths are the same + if ( $this->params['src'] !== $this->params['dst'] ) { + if ( !$this->destSameAsSource ) { + // Move the file into the destination + $status->merge( $this->backend->moveInternal( $this->params ) ); + } else { + // Just delete source as the destination needs no changes + $params = array( 'src' => $this->params['src'] ); + $status->merge( $this->backend->deleteInternal( $params ) ); + } + } + return $status; + } + + public function storagePathsRead() { + return array( $this->params['src'] ); + } + + public function storagePathsChanged() { + return array( $this->params['dst'] ); + } +} + +/** + * Delete a file at the given storage path from the backend. + * Parameters similar to FileBackendStore::deleteInternal(), which include: + * src : source storage path + * ignoreMissingSource : don't return an error if the file does not exist + */ +class DeleteFileOp extends FileOp { + protected function allowedParams() { + return array( array( 'src' ), array( 'ignoreMissingSource' ) ); + } + + protected $needsDelete = true; + + protected function doPrecheck( array &$predicates ) { + $status = Status::newGood(); + // Check if the source file exists + if ( !$this->fileExists( $this->params['src'], $predicates ) ) { + if ( !$this->getParam( 'ignoreMissingSource' ) ) { + $status->fatal( 'backend-fail-notexists', $this->params['src'] ); + return $status; + } + $this->needsDelete = false; + } + // Update file existence predicates + $predicates['exists'][$this->params['src']] = false; + $predicates['sha1'][$this->params['src']] = false; + return $status; // safe to call attempt() + } + + protected function doAttempt() { + $status = Status::newGood(); + if ( $this->needsDelete ) { + // Delete the source file + $status->merge( $this->backend->deleteInternal( $this->params ) ); + } + return $status; + } + + public function storagePathsChanged() { + return array( $this->params['src'] ); + } +} + +/** + * Placeholder operation that has no params and does nothing + */ +class NullFileOp extends FileOp {} diff --git a/includes/filerepo/backend/SwiftFileBackend.php b/includes/filerepo/backend/SwiftFileBackend.php new file mode 100644 index 00000000..a287f488 --- /dev/null +++ b/includes/filerepo/backend/SwiftFileBackend.php @@ -0,0 +1,877 @@ +<?php +/** + * @file + * @ingroup FileBackend + * @author Russ Nelson + * @author Aaron Schulz + */ + +/** + * Class for an OpenStack Swift based file backend. + * + * This requires the SwiftCloudFiles MediaWiki extension, which includes + * the php-cloudfiles library (https://github.com/rackspace/php-cloudfiles). + * php-cloudfiles requires the curl, fileinfo, and mb_string PHP extensions. + * + * Status messages should avoid mentioning the Swift account name. + * Likewise, error suppression should be used to avoid path disclosure. + * + * @ingroup FileBackend + * @since 1.19 + */ +class SwiftFileBackend extends FileBackendStore { + /** @var CF_Authentication */ + protected $auth; // Swift authentication handler + protected $authTTL; // integer seconds + protected $swiftAnonUser; // string; username to handle unauthenticated requests + protected $maxContCacheSize = 100; // integer; max containers with entries + + /** @var CF_Connection */ + protected $conn; // Swift connection handle + protected $connStarted = 0; // integer UNIX timestamp + protected $connContainers = array(); // container object cache + + /** + * @see FileBackendStore::__construct() + * Additional $config params include: + * swiftAuthUrl : Swift authentication server URL + * swiftUser : Swift user used by MediaWiki (account:username) + * swiftKey : Swift authentication key for the above user + * swiftAuthTTL : Swift authentication TTL (seconds) + * swiftAnonUser : Swift user used for end-user requests (account:username) + * shardViaHashLevels : Map of container names to sharding config with: + * 'base' : base of hash characters, 16 or 36 + * 'levels' : the number of hash levels (and digits) + * 'repeat' : hash subdirectories are prefixed with all the + * parent hash directory names (e.g. "a/ab/abc") + */ + public function __construct( array $config ) { + parent::__construct( $config ); + // Required settings + $this->auth = new CF_Authentication( + $config['swiftUser'], + $config['swiftKey'], + null, // account; unused + $config['swiftAuthUrl'] + ); + // Optional settings + $this->authTTL = isset( $config['swiftAuthTTL'] ) + ? $config['swiftAuthTTL'] + : 120; // some sane number + $this->swiftAnonUser = isset( $config['swiftAnonUser'] ) + ? $config['swiftAnonUser'] + : ''; + $this->shardViaHashLevels = isset( $config['shardViaHashLevels'] ) + ? $config['shardViaHashLevels'] + : ''; + } + + /** + * @see FileBackendStore::resolveContainerPath() + */ + protected function resolveContainerPath( $container, $relStoragePath ) { + if ( strlen( urlencode( $relStoragePath ) ) > 1024 ) { + return null; // too long for Swift + } + return $relStoragePath; + } + + /** + * @see FileBackendStore::isPathUsableInternal() + */ + public function isPathUsableInternal( $storagePath ) { + list( $container, $rel ) = $this->resolveStoragePathReal( $storagePath ); + if ( $rel === null ) { + return false; // invalid + } + + try { + $this->getContainer( $container ); + return true; // container exists + } catch ( NoSuchContainerException $e ) { + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, array( 'path' => $storagePath ) ); + } + + return false; + } + + /** + * @see FileBackendStore::doCreateInternal() + */ + protected function doCreateInternal( array $params ) { + $status = Status::newGood(); + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the destination container and object + try { + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-create', $params['dst'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Get a SHA-1 hash of the object + $sha1Hash = wfBaseConvert( sha1( $params['content'] ), 16, 36, 31 ); + + // (c) Actually create the object + try { + // Create a fresh CF_Object with no fields preloaded. + // We don't want to preserve headers, metadata, and such. + $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + // Note: metadata keys stored as [Upper case char][[Lower case char]...] + $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + // Manually set the ETag (https://github.com/rackspace/php-cloudfiles/issues/59). + // The MD5 here will be checked within Swift against its own MD5. + $obj->set_etag( md5( $params['content'] ) ); + // Use the same content type as StreamFile for security + $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); + // Actually write the object in Swift + $obj->write( $params['content'] ); + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doStoreInternal() + */ + protected function doStoreInternal( array $params ) { + $status = Status::newGood(); + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the destination container and object + try { + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Get a SHA-1 hash of the object + $sha1Hash = sha1_file( $params['src'] ); + if ( $sha1Hash === false ) { // source doesn't exist? + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } + $sha1Hash = wfBaseConvert( $sha1Hash, 16, 36, 31 ); + + // (c) Actually store the object + try { + // Create a fresh CF_Object with no fields preloaded. + // We don't want to preserve headers, metadata, and such. + $obj = new CF_Object( $dContObj, $dstRel, false, false ); // skip HEAD + // Note: metadata keys stored as [Upper case char][[Lower case char]...] + $obj->metadata = array( 'Sha1base36' => $sha1Hash ); + // The MD5 here will be checked within Swift against its own MD5. + $obj->set_etag( md5_file( $params['src'] ) ); + // Use the same content type as StreamFile for security + $obj->content_type = StreamFile::contentTypeFromPath( $params['dst'] ); + // Actually write the object in Swift + $obj->load_from_filename( $params['src'], True ); // calls $obj->write() + } catch ( BadContentTypeException $e ) { + $status->fatal( 'backend-fail-contenttype', $params['dst'] ); + } catch ( IOException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doCopyInternal() + */ + protected function doCopyInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + list( $dstCont, $dstRel ) = $this->resolveStoragePathReal( $params['dst'] ); + if ( $dstRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); + return $status; + } + + // (a) Check the source/destination containers and destination object + try { + $sContObj = $this->getContainer( $srcCont ); + $dContObj = $this->getContainer( $dstCont ); + if ( empty( $params['overwrite'] ) && + $this->fileExists( array( 'src' => $params['dst'], 'latest' => 1 ) ) ) + { + $status->fatal( 'backend-fail-alreadyexists', $params['dst'] ); + return $status; + } + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Actually copy the file to the destination + try { + $sContObj->copy_object_to( $srcRel, $dContObj, $dstRel ); + } catch ( NoSuchObjectException $e ) { // source object does not exist + $status->fatal( 'backend-fail-copy', $params['src'], $params['dst'] ); + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doDeleteInternal() + */ + protected function doDeleteInternal( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + return $status; + } + + try { + $sContObj = $this->getContainer( $srcCont ); + $sContObj->delete_object( $srcRel ); + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } catch ( NoSuchObjectException $e ) { + if ( empty( $params['ignoreMissingSource'] ) ) { + $status->fatal( 'backend-fail-delete', $params['src'] ); + } + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::doPrepareInternal() + */ + protected function doPrepareInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + // (a) Check if container already exists + try { + $contObj = $this->getContainer( $fullCont ); + // NoSuchContainerException not thrown: container must exist + return $status; // already exists + } catch ( NoSuchContainerException $e ) { + // NoSuchContainerException thrown: container does not exist + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Create container as needed + try { + $contObj = $this->createContainer( $fullCont ); + if ( $this->swiftAnonUser != '' ) { + // Make container public to end-users... + $status->merge( $this->setContainerAccess( + $contObj, + array( $this->auth->username, $this->swiftAnonUser ), // read + array( $this->auth->username ) // write + ) ); + } + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + return $status; + } + + /** + * @see FileBackendStore::doSecureInternal() + */ + protected function doSecureInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + if ( $this->swiftAnonUser != '' ) { + // Restrict container from end-users... + try { + // doPrepareInternal() should have been called, + // so the Swift container should already exist... + $contObj = $this->getContainer( $fullCont ); // normally a cache hit + // NoSuchContainerException not thrown: container must exist + if ( !isset( $contObj->mw_wasSecured ) ) { + $status->merge( $this->setContainerAccess( + $contObj, + array( $this->auth->username ), // read + array( $this->auth->username ) // write + ) ); + // @TODO: when php-cloudfiles supports container + // metadata, we can make use of that to avoid RTTs + $contObj->mw_wasSecured = true; // avoid useless RTTs + } + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + } + } + + return $status; + } + + /** + * @see FileBackendStore::doCleanInternal() + */ + protected function doCleanInternal( $fullCont, $dir, array $params ) { + $status = Status::newGood(); + + // Only containers themselves can be removed, all else is virtual + if ( $dir != '' ) { + return $status; // nothing to do + } + + // (a) Check the container + try { + $contObj = $this->getContainer( $fullCont, true ); + } catch ( NoSuchContainerException $e ) { + return $status; // ok, nothing to do + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + // (b) Delete the container if empty + if ( $contObj->object_count == 0 ) { + try { + $this->deleteContainer( $fullCont ); + } catch ( NoSuchContainerException $e ) { + return $status; // race? + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-internal', $this->name ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + } + + return $status; + } + + /** + * @see FileBackendStore::doFileExists() + */ + protected function doGetFileStat( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return false; // invalid storage path + } + + $stat = false; + try { + $contObj = $this->getContainer( $srcCont ); + $srcObj = $contObj->get_object( $srcRel, $this->headersFromParams( $params ) ); + $this->addMissingMetadata( $srcObj, $params['src'] ); + $stat = array( + // Convert dates like "Tue, 03 Jan 2012 22:01:04 GMT" to TS_MW + 'mtime' => wfTimestamp( TS_MW, $srcObj->last_modified ), + 'size' => $srcObj->content_length, + 'sha1' => $srcObj->metadata['Sha1base36'] + ); + } catch ( NoSuchContainerException $e ) { + } catch ( NoSuchObjectException $e ) { + } catch ( InvalidResponseException $e ) { + $stat = null; + } catch ( Exception $e ) { // some other exception? + $stat = null; + $this->logException( $e, __METHOD__, $params ); + } + + return $stat; + } + + /** + * Fill in any missing object metadata and save it to Swift + * + * @param $obj CF_Object + * @param $path string Storage path to object + * @return bool Success + * @throws Exception cloudfiles exceptions + */ + protected function addMissingMetadata( CF_Object $obj, $path ) { + if ( isset( $obj->metadata['Sha1base36'] ) ) { + return true; // nothing to do + } + $status = Status::newGood(); + $scopeLockS = $this->getScopedFileLocks( array( $path ), LockManager::LOCK_UW, $status ); + if ( $status->isOK() ) { + $tmpFile = $this->getLocalCopy( array( 'src' => $path, 'latest' => 1 ) ); + if ( $tmpFile ) { + $hash = $tmpFile->getSha1Base36(); + if ( $hash !== false ) { + $obj->metadata['Sha1base36'] = $hash; + $obj->sync_metadata(); // save to Swift + return true; // success + } + } + } + $obj->metadata['Sha1base36'] = false; + return false; // failed + } + + /** + * @see FileBackend::getFileContents() + */ + public function getFileContents( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return false; // invalid storage path + } + + if ( !$this->fileExists( $params ) ) { + return null; + } + + $data = false; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD request + $data = $obj->read( $this->headersFromParams( $params ) ); + } catch ( NoSuchContainerException $e ) { + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, $params ); + } + + return $data; + } + + /** + * @see FileBackendStore::getFileListInternal() + */ + public function getFileListInternal( $fullCont, $dir, array $params ) { + return new SwiftFileBackendFileList( $this, $fullCont, $dir ); + } + + /** + * Do not call this function outside of SwiftFileBackendFileList + * + * @param $fullCont string Resolved container name + * @param $dir string Resolved storage directory with no trailing slash + * @param $after string Storage path of file to list items after + * @param $limit integer Max number of items to list + * @return Array + */ + public function getFileListPageInternal( $fullCont, $dir, $after, $limit ) { + $files = array(); + + try { + $container = $this->getContainer( $fullCont ); + $prefix = ( $dir == '' ) ? null : "{$dir}/"; + $files = $container->list_objects( $limit, $after, $prefix ); + } catch ( NoSuchContainerException $e ) { + } catch ( NoSuchObjectException $e ) { + } catch ( InvalidResponseException $e ) { + } catch ( Exception $e ) { // some other exception? + $this->logException( $e, __METHOD__, array( 'cont' => $fullCont, 'dir' => $dir ) ); + } + + return $files; + } + + /** + * @see FileBackendStore::doGetFileSha1base36() + */ + public function doGetFileSha1base36( array $params ) { + $stat = $this->getFileStat( $params ); + if ( $stat ) { + return $stat['sha1']; + } else { + return false; + } + } + + /** + * @see FileBackendStore::doStreamFile() + */ + protected function doStreamFile( array $params ) { + $status = Status::newGood(); + + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + $status->fatal( 'backend-fail-invalidpath', $params['src'] ); + } + + try { + $cont = $this->getContainer( $srcCont ); + } catch ( NoSuchContainerException $e ) { + $status->fatal( 'backend-fail-stream', $params['src'] ); + return $status; + } catch ( InvalidResponseException $e ) { + $status->fatal( 'backend-fail-connect', $this->name ); + return $status; + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-stream', $params['src'] ); + $this->logException( $e, __METHOD__, $params ); + return $status; + } + + try { + $output = fopen( 'php://output', 'wb' ); + $obj = new CF_Object( $cont, $srcRel, false, false ); // skip HEAD request + $obj->stream( $output, $this->headersFromParams( $params ) ); + } catch ( InvalidResponseException $e ) { // 404? connection problem? + $status->fatal( 'backend-fail-stream', $params['src'] ); + } catch ( Exception $e ) { // some other exception? + $status->fatal( 'backend-fail-stream', $params['src'] ); + $this->logException( $e, __METHOD__, $params ); + } + + return $status; + } + + /** + * @see FileBackendStore::getLocalCopy() + */ + public function getLocalCopy( array $params ) { + list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] ); + if ( $srcRel === null ) { + return null; + } + + if ( !$this->fileExists( $params ) ) { + return null; + } + + $tmpFile = null; + try { + $sContObj = $this->getContainer( $srcCont ); + $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD + // Get source file extension + $ext = FileBackend::extensionFromPath( $srcRel ); + // Create a new temporary file... + $tmpFile = TempFSFile::factory( wfBaseName( $srcRel ) . '_', $ext ); + if ( $tmpFile ) { + $handle = fopen( $tmpFile->getPath(), 'wb' ); + if ( $handle ) { + $obj->stream( $handle, $this->headersFromParams( $params ) ); + fclose( $handle ); + } else { + $tmpFile = null; // couldn't open temp file + } + } + } catch ( NoSuchContainerException $e ) { + $tmpFile = null; + } catch ( InvalidResponseException $e ) { + $tmpFile = null; + } catch ( Exception $e ) { // some other exception? + $tmpFile = null; + $this->logException( $e, __METHOD__, $params ); + } + + return $tmpFile; + } + + /** + * Get headers to send to Swift when reading a file based + * on a FileBackend params array, e.g. that of getLocalCopy(). + * $params is currently only checked for a 'latest' flag. + * + * @param $params Array + * @return Array + */ + protected function headersFromParams( array $params ) { + $hdrs = array(); + if ( !empty( $params['latest'] ) ) { + $hdrs[] = 'X-Newest: true'; + } + return $hdrs; + } + + /** + * Set read/write permissions for a Swift container + * + * @param $contObj CF_Container Swift container + * @param $readGrps Array Swift users who can read (account:user) + * @param $writeGrps Array Swift users who can write (account:user) + * @return Status + */ + protected function setContainerAccess( + CF_Container $contObj, array $readGrps, array $writeGrps + ) { + $creds = $contObj->cfs_auth->export_credentials(); + + $url = $creds['storage_url'] . '/' . rawurlencode( $contObj->name ); + + // Note: 10 second timeout consistent with php-cloudfiles + $req = new CurlHttpRequest( $url, array( 'method' => 'POST', 'timeout' => 10 ) ); + $req->setHeader( 'X-Auth-Token', $creds['auth_token'] ); + $req->setHeader( 'X-Container-Read', implode( ',', $readGrps ) ); + $req->setHeader( 'X-Container-Write', implode( ',', $writeGrps ) ); + + return $req->execute(); // should return 204 + } + + /** + * Get a connection to the Swift proxy + * + * @return CF_Connection|false + * @throws InvalidResponseException + */ + protected function getConnection() { + if ( $this->conn === false ) { + throw new InvalidResponseException; // failed last attempt + } + // Session keys expire after a while, so we renew them periodically + if ( $this->conn && ( time() - $this->connStarted ) > $this->authTTL ) { + $this->conn->close(); // close active cURL connections + $this->conn = null; + } + // Authenticate with proxy and get a session key... + if ( $this->conn === null ) { + $this->connContainers = array(); + try { + $this->auth->authenticate(); + $this->conn = new CF_Connection( $this->auth ); + $this->connStarted = time(); + } catch ( AuthenticationException $e ) { + $this->conn = false; // don't keep re-trying + } catch ( InvalidResponseException $e ) { + $this->conn = false; // don't keep re-trying + } + } + if ( !$this->conn ) { + throw new InvalidResponseException; // auth/connection problem + } + return $this->conn; + } + + /** + * @see FileBackendStore::doClearCache() + */ + protected function doClearCache( array $paths = null ) { + $this->connContainers = array(); // clear container object cache + } + + /** + * Get a Swift container object, possibly from process cache. + * Use $reCache if the file count or byte count is needed. + * + * @param $container string Container name + * @param $reCache bool Refresh the process cache + * @return CF_Container + */ + protected function getContainer( $container, $reCache = false ) { + $conn = $this->getConnection(); // Swift proxy connection + if ( $reCache ) { + unset( $this->connContainers[$container] ); // purge cache + } + if ( !isset( $this->connContainers[$container] ) ) { + $contObj = $conn->get_container( $container ); + // NoSuchContainerException not thrown: container must exist + if ( count( $this->connContainers ) >= $this->maxContCacheSize ) { // trim cache? + reset( $this->connContainers ); + $key = key( $this->connContainers ); + unset( $this->connContainers[$key] ); + } + $this->connContainers[$container] = $contObj; // cache it + } + return $this->connContainers[$container]; + } + + /** + * Create a Swift container + * + * @param $container string Container name + * @return CF_Container + */ + protected function createContainer( $container ) { + $conn = $this->getConnection(); // Swift proxy connection + $contObj = $conn->create_container( $container ); + $this->connContainers[$container] = $contObj; // cache it + return $contObj; + } + + /** + * Delete a Swift container + * + * @param $container string Container name + * @return void + */ + protected function deleteContainer( $container ) { + $conn = $this->getConnection(); // Swift proxy connection + $conn->delete_container( $container ); + unset( $this->connContainers[$container] ); // purge cache + } + + /** + * Log an unexpected exception for this backend + * + * @param $e Exception + * @param $func string + * @param $params Array + * @return void + */ + protected function logException( Exception $e, $func, array $params ) { + wfDebugLog( 'SwiftBackend', + get_class( $e ) . " in '{$func}' (given '" . serialize( $params ) . "')" . + ( $e instanceof InvalidResponseException + ? ": {$e->getMessage()}" + : "" + ) + ); + } +} + +/** + * SwiftFileBackend helper class to page through object listings. + * Swift also has a listing limit of 10,000 objects for sanity. + * Do not use this class from places outside SwiftFileBackend. + * + * @ingroup FileBackend + */ +class SwiftFileBackendFileList implements Iterator { + /** @var Array */ + protected $bufferIter = array(); + protected $bufferAfter = null; // string; list items *after* this path + protected $pos = 0; // integer + + /** @var SwiftFileBackend */ + protected $backend; + protected $container; // + protected $dir; // string storage directory + protected $suffixStart; // integer + + const PAGE_SIZE = 5000; // file listing buffer size + + /** + * @param $backend SwiftFileBackend + * @param $fullCont string Resolved container name + * @param $dir string Resolved directory relative to container + */ + public function __construct( SwiftFileBackend $backend, $fullCont, $dir ) { + $this->backend = $backend; + $this->container = $fullCont; + $this->dir = $dir; + if ( substr( $this->dir, -1 ) === '/' ) { + $this->dir = substr( $this->dir, 0, -1 ); // remove trailing slash + } + if ( $this->dir == '' ) { // whole container + $this->suffixStart = 0; + } else { // dir within container + $this->suffixStart = strlen( $this->dir ) + 1; // size of "path/to/dir/" + } + } + + public function current() { + return substr( current( $this->bufferIter ), $this->suffixStart ); + } + + public function key() { + return $this->pos; + } + + public function next() { + // Advance to the next file in the page + next( $this->bufferIter ); + ++$this->pos; + // Check if there are no files left in this page and + // advance to the next page if this page was not empty. + if ( !$this->valid() && count( $this->bufferIter ) ) { + $this->bufferAfter = end( $this->bufferIter ); + $this->bufferIter = $this->backend->getFileListPageInternal( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE + ); + } + } + + public function rewind() { + $this->pos = 0; + $this->bufferAfter = null; + $this->bufferIter = $this->backend->getFileListPageInternal( + $this->container, $this->dir, $this->bufferAfter, self::PAGE_SIZE + ); + } + + public function valid() { + return ( current( $this->bufferIter ) !== false ); // no paths can have this value + } +} diff --git a/includes/filerepo/backend/TempFSFile.php b/includes/filerepo/backend/TempFSFile.php new file mode 100644 index 00000000..7843d6cd --- /dev/null +++ b/includes/filerepo/backend/TempFSFile.php @@ -0,0 +1,92 @@ +<?php +/** + * @file + * @ingroup FileBackend + */ + +/** + * This class is used to hold the location and do limited manipulation + * of files stored temporarily (usually this will be $wgTmpDirectory) + * + * @ingroup FileBackend + */ +class TempFSFile extends FSFile { + protected $canDelete = false; // bool; garbage collect the temp file + + /** @var Array of active temp files to purge on shutdown */ + protected static $instances = array(); + + /** + * Make a new temporary file on the file system. + * Temporary files may be purged when the file object falls out of scope. + * + * @param $prefix string + * @param $extension string + * @return TempFSFile|null + */ + public static function factory( $prefix, $extension = '' ) { + $base = wfTempDir() . '/' . $prefix . dechex( mt_rand( 0, 99999999 ) ); + $ext = ( $extension != '' ) ? ".{$extension}" : ""; + for ( $attempt = 1; true; $attempt++ ) { + $path = "{$base}-{$attempt}{$ext}"; + wfSuppressWarnings(); + $newFileHandle = fopen( $path, 'x' ); + wfRestoreWarnings(); + if ( $newFileHandle ) { + fclose( $newFileHandle ); + break; // got it + } + if ( $attempt >= 15 ) { + return null; // give up + } + } + $tmpFile = new self( $path ); + $tmpFile->canDelete = true; // safely instantiated + return $tmpFile; + } + + /** + * Purge this file off the file system + * + * @return bool Success + */ + public function purge() { + $this->canDelete = false; // done + wfSuppressWarnings(); + $ok = unlink( $this->path ); + wfRestoreWarnings(); + return $ok; + } + + /** + * Clean up the temporary file only after an object goes out of scope + * + * @param $object Object + * @return void + */ + public function bind( $object ) { + if ( is_object( $object ) ) { + $object->tempFSFileReferences[] = $this; + } + } + + /** + * Set flag to not clean up after the temporary file + * + * @return void + */ + public function preserve() { + $this->canDelete = false; + } + + /** + * Cleans up after the temporary file by deleting it + */ + function __destruct() { + if ( $this->canDelete ) { + wfSuppressWarnings(); + unlink( $this->path ); + wfRestoreWarnings(); + } + } +} diff --git a/includes/filerepo/backend/lockmanager/DBLockManager.php b/includes/filerepo/backend/lockmanager/DBLockManager.php new file mode 100644 index 00000000..045056ea --- /dev/null +++ b/includes/filerepo/backend/lockmanager/DBLockManager.php @@ -0,0 +1,469 @@ +<?php + +/** + * Version of LockManager based on using DB table locks. + * This is meant for multi-wiki systems that may share files. + * All locks are blocking, so it might be useful to set a small + * lock-wait timeout via server config to curtail deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map + * to one bucket. Each bucket maps to one or several peer DBs, each on their + * own server, all having the filelocks.sql tables (with row-level locking). + * A majority of peer DBs must agree for a lock to be acquired. + * + * Caching is used to avoid hitting servers that are down. + * + * @ingroup LockManager + * @since 1.19 + */ +class DBLockManager extends LockManager { + /** @var Array Map of DB names to server config */ + protected $dbServers; // (DB name => server config array) + /** @var Array Map of bucket indexes to peer DB lists */ + protected $dbsByBucket; // (bucket index => (ldb1, ldb2, ...)) + /** @var BagOStuff */ + protected $statusCache; + + protected $lockExpiry; // integer number of seconds + protected $safeDelay; // integer number of seconds + + protected $session = 0; // random integer + /** @var Array Map Database connections (DB name => Database) */ + protected $conns = array(); + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * 'dbServers' : Associative array of DB names to server configuration. + * Configuration is an associative array that includes: + * 'host' - DB server name + * 'dbname' - DB name + * 'type' - DB type (mysql,postgres,...) + * 'user' - DB user + * 'password' - DB user password + * 'tablePrefix' - DB table prefix + * 'flags' - DB flags (see DatabaseBase) + * 'dbsByBucket' : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of DB names (peers) as values. + * Any DB named 'localDBMaster' will automatically use the DB master + * settings for this wiki (without the need for a dbServers entry). + * 'lockExpiry' : Lock timeout (seconds) for dropped connections. [optional] + * This tells the DB server how long to wait before assuming + * connection failure and releasing all the locks for a session. + * + * @param Array $config + */ + public function __construct( array $config ) { + $this->dbServers = isset( $config['dbServers'] ) + ? $config['dbServers'] + : array(); // likely just using 'localDBMaster' + // Sanitize dbsByBucket config to prevent PHP errors + $this->dbsByBucket = array_filter( $config['dbsByBucket'], 'is_array' ); + $this->dbsByBucket = array_values( $this->dbsByBucket ); // consecutive + + if ( isset( $config['lockExpiry'] ) ) { + $this->lockExpiry = $config['lockExpiry']; + } else { + $met = ini_get( 'max_execution_time' ); + $this->lockExpiry = $met ? $met : 60; // use some sane amount if 0 + } + $this->safeDelay = ( $this->lockExpiry <= 0 ) + ? 60 // pick a safe-ish number to match DB timeout default + : $this->lockExpiry; // cover worst case + + foreach ( $this->dbsByBucket as $bucket ) { + if ( count( $bucket ) > 1 ) { + // Tracks peers that couldn't be queried recently to avoid lengthy + // connection timeouts. This is useless if each bucket has one peer. + $this->statusCache = wfGetMainCache(); + break; + } + } + + $this->session = ''; + for ( $i = 0; $i < 5; $i++ ) { + $this->session .= mt_rand( 0, 2147483647 ); + } + $this->session = wfBaseConvert( sha1( $this->session ), 16, 36, 31 ); + } + + /** + * @see LockManager::doLock() + */ + protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $pathsToLock = array(); + // Get locks that need to be acquired (buckets => locks)... + foreach ( $paths as $path ) { + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + $bucket = $this->getBucketFromKey( $path ); + $pathsToLock[$bucket][] = $path; + } + } + + $lockedPaths = array(); // files locked in this attempt + // Attempt to acquire these locks... + foreach ( $pathsToLock as $bucket => $paths ) { + // Try to acquire the locks for this bucket + $res = $this->doLockingQueryAll( $bucket, $paths, $type ); + if ( $res === 'cantacquire' ) { + // Resources already locked by another process. + // Abort and unlock everything we just locked. + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } elseif ( $res !== true ) { + // Couldn't contact any DBs for this bucket. + // Abort and unlock everything we just locked. + $status->fatal( 'lockmanager-fail-db-bucket', $bucket ); + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + // Record these locks as active + foreach ( $paths as $path ) { + $this->locksHeld[$path][$type] = 1; // locked + } + // Keep track of what locks were made in this attempt + $lockedPaths = array_merge( $lockedPaths, $paths ); + } + + return $status; + } + + /** + * @see LockManager::doUnlock() + */ + protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $paths as $path ) { + if ( !isset( $this->locksHeld[$path] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } elseif ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + --$this->locksHeld[$path][$type]; + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + } + if ( !count( $this->locksHeld[$path] ) ) { + unset( $this->locksHeld[$path] ); // no SH or EX locks left for key + } + } + } + + // Reference count the locks held and COMMIT when zero + if ( !count( $this->locksHeld ) ) { + $status->merge( $this->finishLockTransactions() ); + } + + return $status; + } + + /** + * Get a connection to a lock DB and acquire locks on $paths. + * This does not use GET_LOCK() per http://bugs.mysql.com/bug.php?id=1118. + * + * @param $lockDb string + * @param $paths Array + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool Resources able to be locked + * @throws DBError + */ + protected function doLockingQuery( $lockDb, array $paths, $type ) { + if ( $type == self::LOCK_EX ) { // writer locks + $db = $this->getConnection( $lockDb ); + if ( !$db ) { + return false; // bad config + } + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fle_key' => $key ); + } + # Wait on any existing writers and block new ones if we get in + $db->insert( 'filelocks_exclusive', $data, __METHOD__ ); + } + return true; + } + + /** + * Attempt to acquire locks with the peers for a bucket. + * This should avoid throwing any exceptions. + * + * @param $bucket integer + * @param $paths Array List of resource keys to lock + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool|string One of (true, 'cantacquire', 'dberrors') + */ + protected function doLockingQueryAll( $bucket, array $paths, $type ) { + $yesVotes = 0; // locks made on trustable DBs + $votesLeft = count( $this->dbsByBucket[$bucket] ); // remaining DBs + $quorum = floor( $votesLeft/2 + 1 ); // simple majority + // Get votes for each DB, in order, until we have enough... + foreach ( $this->dbsByBucket[$bucket] as $lockDb ) { + // Check that DB is not *known* to be down + if ( $this->cacheCheckFailures( $lockDb ) ) { + try { + // Attempt to acquire the lock on this DB + if ( !$this->doLockingQuery( $lockDb, $paths, $type ) ) { + return 'cantacquire'; // vetoed; resource locked + } + ++$yesVotes; // success for this peer + if ( $yesVotes >= $quorum ) { + return true; // lock obtained + } + } catch ( DBConnectionError $e ) { + $this->cacheRecordFailure( $lockDb ); + } catch ( DBError $e ) { + if ( $this->lastErrorIndicatesLocked( $lockDb ) ) { + return 'cantacquire'; // vetoed; resource locked + } + } + } + --$votesLeft; + $votesNeeded = $quorum - $yesVotes; + if ( $votesNeeded > $votesLeft ) { + // In "trust cache" mode we don't have to meet the quorum + break; // short-circuit + } + } + // At this point, we must not have meet the quorum + return 'dberrors'; // not enough votes to ensure correctness + } + + /** + * Get (or reuse) a connection to a lock DB + * + * @param $lockDb string + * @return Database + * @throws DBError + */ + protected function getConnection( $lockDb ) { + if ( !isset( $this->conns[$lockDb] ) ) { + $db = null; + if ( $lockDb === 'localDBMaster' ) { + $lb = wfGetLBFactory()->newMainLB(); + $db = $lb->getConnection( DB_MASTER ); + } elseif ( isset( $this->dbServers[$lockDb] ) ) { + $config = $this->dbServers[$lockDb]; + $db = DatabaseBase::factory( $config['type'], $config ); + } + if ( !$db ) { + return null; // config error? + } + $this->conns[$lockDb] = $db; + $this->conns[$lockDb]->clearFlag( DBO_TRX ); + # If the connection drops, try to avoid letting the DB rollback + # and release the locks before the file operations are finished. + # This won't handle the case of DB server restarts however. + $options = array(); + if ( $this->lockExpiry > 0 ) { + $options['connTimeout'] = $this->lockExpiry; + } + $this->conns[$lockDb]->setSessionOptions( $options ); + $this->initConnection( $lockDb, $this->conns[$lockDb] ); + } + if ( !$this->conns[$lockDb]->trxLevel() ) { + $this->conns[$lockDb]->begin(); // start transaction + } + return $this->conns[$lockDb]; + } + + /** + * Do additional initialization for new lock DB connection + * + * @param $lockDb string + * @param $db DatabaseBase + * @return void + * @throws DBError + */ + protected function initConnection( $lockDb, DatabaseBase $db ) {} + + /** + * Commit all changes to lock-active databases. + * This should avoid throwing any exceptions. + * + * @return Status + */ + protected function finishLockTransactions() { + $status = Status::newGood(); + foreach ( $this->conns as $lockDb => $db ) { + if ( $db->trxLevel() ) { // in transaction + try { + $db->rollback(); // finish transaction and kill any rows + } catch ( DBError $e ) { + $status->fatal( 'lockmanager-fail-db-release', $lockDb ); + } + } + } + return $status; + } + + /** + * Check if the last DB error for $lockDb indicates + * that a requested resource was locked by another process. + * This should avoid throwing any exceptions. + * + * @param $lockDb string + * @return bool + */ + protected function lastErrorIndicatesLocked( $lockDb ) { + if ( isset( $this->conns[$lockDb] ) ) { // sanity + $db = $this->conns[$lockDb]; + return ( $db->wasDeadlock() || $db->wasLockTimeout() ); + } + return false; + } + + /** + * Checks if the DB has not recently had connection/query errors. + * This just avoids wasting time on doomed connection attempts. + * + * @param $lockDb string + * @return bool + */ + protected function cacheCheckFailures( $lockDb ) { + if ( $this->statusCache && $this->safeDelay > 0 ) { + $path = $this->getMissKey( $lockDb ); + $misses = $this->statusCache->get( $path ); + return !$misses; + } + return true; + } + + /** + * Log a lock request failure to the cache + * + * @param $lockDb string + * @return bool Success + */ + protected function cacheRecordFailure( $lockDb ) { + if ( $this->statusCache && $this->safeDelay > 0 ) { + $path = $this->getMissKey( $lockDb ); + $misses = $this->statusCache->get( $path ); + if ( $misses ) { + return $this->statusCache->incr( $path ); + } else { + return $this->statusCache->add( $path, 1, $this->safeDelay ); + } + } + return true; + } + + /** + * Get a cache key for recent query misses for a DB + * + * @param $lockDb string + * @return string + */ + protected function getMissKey( $lockDb ) { + return 'lockmanager:querymisses:' . str_replace( ' ', '_', $lockDb ); + } + + /** + * Get the bucket for resource path. + * This should avoid throwing any exceptions. + * + * @param $path string + * @return integer + */ + protected function getBucketFromKey( $path ) { + $prefix = substr( sha1( $path ), 0, 2 ); // first 2 hex chars (8 bits) + return intval( base_convert( $prefix, 16, 10 ) ) % count( $this->dbsByBucket ); + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + foreach ( $this->conns as $lockDb => $db ) { + if ( $db->trxLevel() ) { // in transaction + try { + $db->rollback(); // finish transaction and kill any rows + } catch ( DBError $e ) { + // oh well + } + } + $db->close(); + } + } +} + +/** + * MySQL version of DBLockManager that supports shared locks. + * All locks are non-blocking, which avoids deadlocks. + * + * @ingroup LockManager + */ +class MySqlLockManager extends DBLockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + protected function initConnection( $lockDb, DatabaseBase $db ) { + # Let this transaction see lock rows from other transactions + $db->query( "SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;" ); + } + + protected function doLockingQuery( $lockDb, array $paths, $type ) { + $db = $this->getConnection( $lockDb ); + if ( !$db ) { + return false; + } + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fls_key' => $key, 'fls_session' => $this->session ); + } + # Block new writers... + $db->insert( 'filelocks_shared', $data, __METHOD__, array( 'IGNORE' ) ); + # Actually do the locking queries... + if ( $type == self::LOCK_SH ) { // reader locks + # Bail if there are any existing writers... + $blocked = $db->selectField( 'filelocks_exclusive', '1', + array( 'fle_key' => $keys ), + __METHOD__ + ); + # Prospective writers that haven't yet updated filelocks_exclusive + # will recheck filelocks_shared after doing so and bail due to our entry. + } else { // writer locks + $encSession = $db->addQuotes( $this->session ); + # Bail if there are any existing writers... + # The may detect readers, but the safe check for them is below. + # Note: if two writers come at the same time, both bail :) + $blocked = $db->selectField( 'filelocks_shared', '1', + array( 'fls_key' => $keys, "fls_session != $encSession" ), + __METHOD__ + ); + if ( !$blocked ) { + # Build up values for INSERT clause + $data = array(); + foreach ( $keys as $key ) { + $data[] = array( 'fle_key' => $key ); + } + # Block new readers/writers... + $db->insert( 'filelocks_exclusive', $data, __METHOD__ ); + # Bail if there are any existing readers... + $blocked = $db->selectField( 'filelocks_shared', '1', + array( 'fls_key' => $keys, "fls_session != $encSession" ), + __METHOD__ + ); + } + } + return !$blocked; + } +} diff --git a/includes/filerepo/backend/lockmanager/FSLockManager.php b/includes/filerepo/backend/lockmanager/FSLockManager.php new file mode 100644 index 00000000..42074fd3 --- /dev/null +++ b/includes/filerepo/backend/lockmanager/FSLockManager.php @@ -0,0 +1,202 @@ +<?php + +/** + * Simple version of LockManager based on using FS lock files. + * All locks are non-blocking, which avoids deadlocks. + * + * This should work fine for small sites running off one server. + * Do not use this with 'lockDirectory' set to an NFS mount unless the + * NFS client is at least version 2.6.12. Otherwise, the BSD flock() + * locks will be ignored; see http://nfs.sourceforge.net/#section_d. + * + * @ingroup LockManager + * @since 1.19 + */ +class FSLockManager extends LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + protected $lockDir; // global dir for all servers + + /** @var Array Map of (locked key => lock type => lock file handle) */ + protected $handles = array(); + + /** + * Construct a new instance from configuration. + * + * $config includes: + * 'lockDirectory' : Directory containing the lock files + * + * @param array $config + */ + function __construct( array $config ) { + parent::__construct( $config ); + $this->lockDir = $config['lockDirectory']; + } + + protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $lockedPaths = array(); // files locked in this attempt + foreach ( $paths as $path ) { + $status->merge( $this->doSingleLock( $path, $type ) ); + if ( $status->isOK() ) { + $lockedPaths[] = $path; + } else { + // Abort and unlock everything + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + } + + return $status; + } + + protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $paths as $path ) { + $status->merge( $this->doSingleUnlock( $path, $type ) ); + } + + return $status; + } + + /** + * Lock a single resource key + * + * @param $path string + * @param $type integer + * @return Status + */ + protected function doSingleLock( $path, $type ) { + $status = Status::newGood(); + + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + wfSuppressWarnings(); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); + wfRestoreWarnings(); + if ( !$handle ) { // lock dir missing? + wfMkdirParents( $this->lockDir ); + $handle = fopen( $this->getLockPath( $path ), 'a+' ); // try again + } + if ( $handle ) { + // Either a shared or exclusive lock + $lock = ( $type == self::LOCK_SH ) ? LOCK_SH : LOCK_EX; + if ( flock( $handle, $lock | LOCK_NB ) ) { + // Record this lock as active + $this->locksHeld[$path][$type] = 1; + $this->handles[$path][$type] = $handle; + } else { + fclose( $handle ); + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + } else { + $status->fatal( 'lockmanager-fail-openlock', $path ); + } + } + + return $status; + } + + /** + * Unlock a single resource key + * + * @param $path string + * @param $type integer + * @return Status + */ + protected function doSingleUnlock( $path, $type ) { + $status = Status::newGood(); + + if ( !isset( $this->locksHeld[$path] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } elseif ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + $handlesToClose = array(); + --$this->locksHeld[$path][$type]; + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + // If a LOCK_SH comes in while we have a LOCK_EX, we don't + // actually add a handler, so check for handler existence. + if ( isset( $this->handles[$path][$type] ) ) { + // Mark this handle to be unlocked and closed + $handlesToClose[] = $this->handles[$path][$type]; + unset( $this->handles[$path][$type] ); + } + } + // Unlock handles to release locks and delete + // any lock files that end up with no locks on them... + if ( wfIsWindows() ) { + // Windows: for any process, including this one, + // calling unlink() on a locked file will fail + $status->merge( $this->closeLockHandles( $path, $handlesToClose ) ); + $status->merge( $this->pruneKeyLockFiles( $path ) ); + } else { + // Unix: unlink() can be used on files currently open by this + // process and we must do so in order to avoid race conditions + $status->merge( $this->pruneKeyLockFiles( $path ) ); + $status->merge( $this->closeLockHandles( $path, $handlesToClose ) ); + } + } + + return $status; + } + + private function closeLockHandles( $path, array $handlesToClose ) { + $status = Status::newGood(); + foreach ( $handlesToClose as $handle ) { + wfSuppressWarnings(); + if ( !flock( $handle, LOCK_UN ) ) { + $status->fatal( 'lockmanager-fail-releaselock', $path ); + } + if ( !fclose( $handle ) ) { + $status->warning( 'lockmanager-fail-closelock', $path ); + } + wfRestoreWarnings(); + } + return $status; + } + + private function pruneKeyLockFiles( $path ) { + $status = Status::newGood(); + if ( !count( $this->locksHeld[$path] ) ) { + wfSuppressWarnings(); + # No locks are held for the lock file anymore + if ( !unlink( $this->getLockPath( $path ) ) ) { + $status->warning( 'lockmanager-fail-deletelock', $path ); + } + wfRestoreWarnings(); + unset( $this->locksHeld[$path] ); + unset( $this->handles[$path] ); + } + return $status; + } + + /** + * Get the path to the lock file for a key + * @param $path string + * @return string + */ + protected function getLockPath( $path ) { + $hash = self::sha1Base36( $path ); + return "{$this->lockDir}/{$hash}.lock"; + } + + function __destruct() { + // Make sure remaining locks get cleared for sanity + foreach ( $this->locksHeld as $path => $locks ) { + $this->doSingleUnlock( $path, self::LOCK_EX ); + $this->doSingleUnlock( $path, self::LOCK_SH ); + } + } +} diff --git a/includes/filerepo/backend/lockmanager/LSLockManager.php b/includes/filerepo/backend/lockmanager/LSLockManager.php new file mode 100644 index 00000000..b7ac743c --- /dev/null +++ b/includes/filerepo/backend/lockmanager/LSLockManager.php @@ -0,0 +1,295 @@ +<?php + +/** + * Manage locks using a lock daemon server. + * + * Version of LockManager based on using lock daemon servers. + * This is meant for multi-wiki systems that may share files. + * All locks are non-blocking, which avoids deadlocks. + * + * All lock requests for a resource, identified by a hash string, will map + * to one bucket. Each bucket maps to one or several peer servers, each + * running LockServerDaemon.php, listening on a designated TCP port. + * A majority of peers must agree for a lock to be acquired. + * + * @ingroup LockManager + * @since 1.19 + */ +class LSLockManager extends LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_SH, + self::LOCK_EX => self::LOCK_EX + ); + + /** @var Array Map of server names to server config */ + protected $lockServers; // (server name => server config array) + /** @var Array Map of bucket indexes to peer server lists */ + protected $srvsByBucket; // (bucket index => (lsrv1, lsrv2, ...)) + + /** @var Array Map Server connections (server name => resource) */ + protected $conns = array(); + + protected $connTimeout; // float number of seconds + protected $session = ''; // random SHA-1 string + + /** + * Construct a new instance from configuration. + * + * $config paramaters include: + * 'lockServers' : Associative array of server names to configuration. + * Configuration is an associative array that includes: + * 'host' - IP address/hostname + * 'port' - TCP port + * 'authKey' - Secret string the lock server uses + * 'srvsByBucket' : Array of 1-16 consecutive integer keys, starting from 0, + * each having an odd-numbered list of server names (peers) as values. + * 'connTimeout' : Lock server connection attempt timeout. [optional] + * + * @param Array $config + */ + public function __construct( array $config ) { + $this->lockServers = $config['lockServers']; + // Sanitize srvsByBucket config to prevent PHP errors + $this->srvsByBucket = array_filter( $config['srvsByBucket'], 'is_array' ); + $this->srvsByBucket = array_values( $this->srvsByBucket ); // consecutive + + if ( isset( $config['connTimeout'] ) ) { + $this->connTimeout = $config['connTimeout']; + } else { + $this->connTimeout = 3; // use some sane amount + } + + $this->session = ''; + for ( $i = 0; $i < 5; $i++ ) { + $this->session .= mt_rand( 0, 2147483647 ); + } + $this->session = wfBaseConvert( sha1( $this->session ), 16, 36, 31 ); + } + + protected function doLock( array $paths, $type ) { + $status = Status::newGood(); + + $pathsToLock = array(); + // Get locks that need to be acquired (buckets => locks)... + foreach ( $paths as $path ) { + if ( isset( $this->locksHeld[$path][$type] ) ) { + ++$this->locksHeld[$path][$type]; + } elseif ( isset( $this->locksHeld[$path][self::LOCK_EX] ) ) { + $this->locksHeld[$path][$type] = 1; + } else { + $bucket = $this->getBucketFromKey( $path ); + $pathsToLock[$bucket][] = $path; + } + } + + $lockedPaths = array(); // files locked in this attempt + // Attempt to acquire these locks... + foreach ( $pathsToLock as $bucket => $paths ) { + // Try to acquire the locks for this bucket + $res = $this->doLockingRequestAll( $bucket, $paths, $type ); + if ( $res === 'cantacquire' ) { + // Resources already locked by another process. + // Abort and unlock everything we just locked. + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } elseif ( $res !== true ) { + // Couldn't contact any servers for this bucket. + // Abort and unlock everything we just locked. + foreach ( $paths as $path ) { + $status->fatal( 'lockmanager-fail-acquirelock', $path ); + } + $status->merge( $this->doUnlock( $lockedPaths, $type ) ); + return $status; + } + // Record these locks as active + foreach ( $paths as $path ) { + $this->locksHeld[$path][$type] = 1; // locked + } + // Keep track of what locks were made in this attempt + $lockedPaths = array_merge( $lockedPaths, $paths ); + } + + return $status; + } + + protected function doUnlock( array $paths, $type ) { + $status = Status::newGood(); + + foreach ( $paths as $path ) { + if ( !isset( $this->locksHeld[$path] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } elseif ( !isset( $this->locksHeld[$path][$type] ) ) { + $status->warning( 'lockmanager-notlocked', $path ); + } else { + --$this->locksHeld[$path][$type]; + if ( $this->locksHeld[$path][$type] <= 0 ) { + unset( $this->locksHeld[$path][$type] ); + } + if ( !count( $this->locksHeld[$path] ) ) { + unset( $this->locksHeld[$path] ); // no SH or EX locks left for key + } + } + } + + // Reference count the locks held and release locks when zero + if ( !count( $this->locksHeld ) ) { + $status->merge( $this->releaseLocks() ); + } + + return $status; + } + + /** + * Get a connection to a lock server and acquire locks on $paths + * + * @param $lockSrv string + * @param $paths Array + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool Resources able to be locked + */ + protected function doLockingRequest( $lockSrv, array $paths, $type ) { + if ( $type == self::LOCK_SH ) { // reader locks + $type = 'SH'; + } elseif ( $type == self::LOCK_EX ) { // writer locks + $type = 'EX'; + } else { + return true; // ok... + } + + // Send out the command and get the response... + $keys = array_unique( array_map( 'LockManager::sha1Base36', $paths ) ); + $response = $this->sendCommand( $lockSrv, 'ACQUIRE', $type, $keys ); + + return ( $response === 'ACQUIRED' ); + } + + /** + * Send a command and get back the response + * + * @param $lockSrv string + * @param $action string + * @param $type string + * @param $values Array + * @return string|false + */ + protected function sendCommand( $lockSrv, $action, $type, $values ) { + $conn = $this->getConnection( $lockSrv ); + if ( !$conn ) { + return false; // no connection + } + $authKey = $this->lockServers[$lockSrv]['authKey']; + // Build of the command as a flat string... + $values = implode( '|', $values ); + $key = sha1( $this->session . $action . $type . $values . $authKey ); + // Send out the command... + if ( fwrite( $conn, "{$this->session}:$key:$action:$type:$values\n" ) === false ) { + return false; + } + // Get the response... + $response = fgets( $conn ); + if ( $response === false ) { + return false; + } + return trim( $response ); + } + + /** + * Attempt to acquire locks with the peers for a bucket + * + * @param $bucket integer + * @param $paths Array List of resource keys to lock + * @param $type integer LockManager::LOCK_EX or LockManager::LOCK_SH + * @return bool|string One of (true, 'cantacquire', 'srverrors') + */ + protected function doLockingRequestAll( $bucket, array $paths, $type ) { + $yesVotes = 0; // locks made on trustable servers + $votesLeft = count( $this->srvsByBucket[$bucket] ); // remaining peers + $quorum = floor( $votesLeft/2 + 1 ); // simple majority + // Get votes for each peer, in order, until we have enough... + foreach ( $this->srvsByBucket[$bucket] as $lockSrv ) { + // Attempt to acquire the lock on this peer + if ( !$this->doLockingRequest( $lockSrv, $paths, $type ) ) { + return 'cantacquire'; // vetoed; resource locked + } + ++$yesVotes; // success for this peer + if ( $yesVotes >= $quorum ) { + return true; // lock obtained + } + --$votesLeft; + $votesNeeded = $quorum - $yesVotes; + if ( $votesNeeded > $votesLeft ) { + // In "trust cache" mode we don't have to meet the quorum + break; // short-circuit + } + } + // At this point, we must not have meet the quorum + return 'srverrors'; // not enough votes to ensure correctness + } + + /** + * Get (or reuse) a connection to a lock server + * + * @param $lockSrv string + * @return resource + */ + protected function getConnection( $lockSrv ) { + if ( !isset( $this->conns[$lockSrv] ) ) { + $cfg = $this->lockServers[$lockSrv]; + wfSuppressWarnings(); + $errno = $errstr = ''; + $conn = fsockopen( $cfg['host'], $cfg['port'], $errno, $errstr, $this->connTimeout ); + wfRestoreWarnings(); + if ( $conn === false ) { + return null; + } + $sec = floor( $this->connTimeout ); + $usec = floor( ( $this->connTimeout - floor( $this->connTimeout ) ) * 1e6 ); + stream_set_timeout( $conn, $sec, $usec ); + $this->conns[$lockSrv] = $conn; + } + return $this->conns[$lockSrv]; + } + + /** + * Release all locks that this session is holding + * + * @return Status + */ + protected function releaseLocks() { + $status = Status::newGood(); + foreach ( $this->conns as $lockSrv => $conn ) { + $response = $this->sendCommand( $lockSrv, 'RELEASE_ALL', '', array() ); + if ( $response !== 'RELEASED_ALL' ) { + $status->fatal( 'lockmanager-fail-svr-release', $lockSrv ); + } + } + return $status; + } + + /** + * Get the bucket for resource path. + * This should avoid throwing any exceptions. + * + * @param $path string + * @return integer + */ + protected function getBucketFromKey( $path ) { + $prefix = substr( sha1( $path ), 0, 2 ); // first 2 hex chars (8 bits) + return intval( base_convert( $prefix, 16, 10 ) ) % count( $this->srvsByBucket ); + } + + /** + * Make sure remaining locks get cleared for sanity + */ + function __destruct() { + $this->releaseLocks(); + foreach ( $this->conns as $conn ) { + fclose( $conn ); + } + } +} diff --git a/includes/filerepo/backend/lockmanager/LockManager.php b/includes/filerepo/backend/lockmanager/LockManager.php new file mode 100644 index 00000000..23603a4f --- /dev/null +++ b/includes/filerepo/backend/lockmanager/LockManager.php @@ -0,0 +1,182 @@ +<?php +/** + * @defgroup LockManager Lock management + * @ingroup FileBackend + */ + +/** + * @file + * @ingroup LockManager + * @author Aaron Schulz + */ + +/** + * Class for handling resource locking. + * + * Locks on resource keys can either be shared or exclusive. + * + * Implementations must keep track of what is locked by this proccess + * in-memory and support nested locking calls (using reference counting). + * At least LOCK_UW and LOCK_EX must be implemented. LOCK_SH can be a no-op. + * Locks should either be non-blocking or have low wait timeouts. + * + * Subclasses should avoid throwing exceptions at all costs. + * + * @ingroup LockManager + * @since 1.19 + */ +abstract class LockManager { + /** @var Array Mapping of lock types to the type actually used */ + protected $lockTypeMap = array( + self::LOCK_SH => self::LOCK_SH, + self::LOCK_UW => self::LOCK_EX, // subclasses may use self::LOCK_SH + self::LOCK_EX => self::LOCK_EX + ); + + /** @var Array Map of (resource path => lock type => count) */ + protected $locksHeld = array(); + + /* Lock types; stronger locks have higher values */ + const LOCK_SH = 1; // shared lock (for reads) + const LOCK_UW = 2; // shared lock (for reads used to write elsewhere) + const LOCK_EX = 3; // exclusive lock (for writes) + + /** + * Construct a new instance from configuration + * + * @param $config Array + */ + public function __construct( array $config ) {} + + /** + * Lock the resources at the given abstract paths + * + * @param $paths Array List of resource names + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function lock( array $paths, $type = self::LOCK_EX ) { + return $this->doLock( array_unique( $paths ), $this->lockTypeMap[$type] ); + } + + /** + * Unlock the resources at the given abstract paths + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return Status + */ + final public function unlock( array $paths, $type = self::LOCK_EX ) { + return $this->doUnlock( array_unique( $paths ), $this->lockTypeMap[$type] ); + } + + /** + * Get the base 36 SHA-1 of a string, padded to 31 digits + * + * @param $path string + * @return string + */ + final protected static function sha1Base36( $path ) { + return wfBaseConvert( sha1( $path ), 16, 36, 31 ); + } + + /** + * Lock resources with the given keys and lock type + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return string + */ + abstract protected function doLock( array $paths, $type ); + + /** + * Unlock resources with the given keys and lock type + * + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @return string + */ + abstract protected function doUnlock( array $paths, $type ); +} + +/** + * Self releasing locks + * + * LockManager helper class to handle scoped locks, which + * release when an object is destroyed or goes out of scope. + * + * @ingroup LockManager + * @since 1.19 + */ +class ScopedLock { + /** @var LockManager */ + protected $manager; + /** @var Status */ + protected $status; + /** @var Array List of resource paths*/ + protected $paths; + + protected $type; // integer lock type + + /** + * @param $manager LockManager + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status + */ + protected function __construct( + LockManager $manager, array $paths, $type, Status $status + ) { + $this->manager = $manager; + $this->paths = $paths; + $this->status = $status; + $this->type = $type; + } + + protected function __clone() {} + + /** + * Get a ScopedLock object representing a lock on resource paths. + * Any locks are released once this object goes out of scope. + * The status object is updated with any errors or warnings. + * + * @param $manager LockManager + * @param $paths Array List of storage paths + * @param $type integer LockManager::LOCK_* constant + * @param $status Status + * @return ScopedLock|null Returns null on failure + */ + public static function factory( + LockManager $manager, array $paths, $type, Status $status + ) { + $lockStatus = $manager->lock( $paths, $type ); + $status->merge( $lockStatus ); + if ( $lockStatus->isOK() ) { + return new self( $manager, $paths, $type, $status ); + } + return null; + } + + function __destruct() { + $wasOk = $this->status->isOK(); + $this->status->merge( $this->manager->unlock( $this->paths, $this->type ) ); + if ( $wasOk ) { + // Make sure status is OK, despite any unlockFiles() fatals + $this->status->setResult( true, $this->status->value ); + } + } +} + +/** + * Simple version of LockManager that does nothing + * @since 1.19 + */ +class NullLockManager extends LockManager { + protected function doLock( array $paths, $type ) { + return Status::newGood(); + } + + protected function doUnlock( array $paths, $type ) { + return Status::newGood(); + } +} diff --git a/includes/filerepo/backend/lockmanager/LockManagerGroup.php b/includes/filerepo/backend/lockmanager/LockManagerGroup.php new file mode 100644 index 00000000..11e77972 --- /dev/null +++ b/includes/filerepo/backend/lockmanager/LockManagerGroup.php @@ -0,0 +1,89 @@ +<?php +/** + * Class to handle file lock manager registration + * + * @ingroup LockManager + * @author Aaron Schulz + * @since 1.19 + */ +class LockManagerGroup { + + /** + * @var LockManagerGroup + */ + protected static $instance = null; + + /** @var Array of (name => ('class' =>, 'config' =>, 'instance' =>)) */ + protected $managers = array(); + + protected function __construct() {} + protected function __clone() {} + + /** + * @return LockManagerGroup + */ + public static function singleton() { + if ( self::$instance == null ) { + self::$instance = new self(); + self::$instance->initFromGlobals(); + } + return self::$instance; + } + + /** + * Register lock managers from the global variables + * + * @return void + */ + protected function initFromGlobals() { + global $wgLockManagers; + + $this->register( $wgLockManagers ); + } + + /** + * Register an array of file lock manager configurations + * + * @param $configs Array + * @return void + * @throws MWException + */ + protected function register( array $configs ) { + foreach ( $configs as $config ) { + if ( !isset( $config['name'] ) ) { + throw new MWException( "Cannot register a lock manager with no name." ); + } + $name = $config['name']; + if ( !isset( $config['class'] ) ) { + throw new MWException( "Cannot register lock manager `{$name}` with no class." ); + } + $class = $config['class']; + unset( $config['class'] ); // lock manager won't need this + $this->managers[$name] = array( + 'class' => $class, + 'config' => $config, + 'instance' => null + ); + } + } + + /** + * Get the lock manager object with a given name + * + * @param $name string + * @return LockManager + * @throws MWException + */ + public function get( $name ) { + if ( !isset( $this->managers[$name] ) ) { + throw new MWException( "No lock manager defined with the name `$name`." ); + } + // Lazy-load the actual lock manager instance + if ( !isset( $this->managers[$name]['instance'] ) ) { + $class = $this->managers[$name]['class']; + $config = $this->managers[$name]['config']; + $this->managers[$name]['instance'] = new $class( $config ); + } + return $this->managers[$name]['instance']; + } +} diff --git a/includes/filerepo/ArchivedFile.php b/includes/filerepo/file/ArchivedFile.php index 0d9e349b..3b9bd7f0 100644 --- a/includes/filerepo/ArchivedFile.php +++ b/includes/filerepo/file/ArchivedFile.php @@ -3,13 +3,13 @@ * Deleted file in the 'filearchive' table * * @file - * @ingroup FileRepo + * @ingroup FileAbstraction */ /** * Class representing a row of the 'filearchive' table * - * @ingroup FileRepo + * @ingroup FileAbstraction */ class ArchivedFile { /**#@+ @@ -73,8 +73,8 @@ class ArchivedFile { $this->dataLoaded = false; $this->exists = false; - if( is_object( $title ) ) { - $this->title = $title; + if( $title instanceof Title ) { + $this->title = File::normalizeTitle( $title, 'exception' ); $this->name = $title->getDBkey(); } @@ -86,7 +86,7 @@ class ArchivedFile { $this->key = $key; } - if ( !$id && !$key && !is_object( $title ) ) { + if ( !$id && !$key && !( $title instanceof Title ) ) { throw new MWException( "No specifications provided to ArchivedFile constructor." ); } } @@ -177,6 +177,9 @@ class ArchivedFile { /** * Loads a file object from the filearchive table + * + * @param $row + * * @return ArchivedFile */ public static function newFromRow( $row ) { @@ -205,6 +208,8 @@ class ArchivedFile { /** * Return the associated title object + * + * @return Title */ public function getTitle() { return $this->title; @@ -212,16 +217,24 @@ class ArchivedFile { /** * Return the file name + * + * @return string */ public function getName() { return $this->name; } + /** + * @return int + */ public function getID() { $this->load(); return $this->id; } + /** + * @return bool + */ public function exists() { $this->load(); return $this->exists; @@ -229,6 +242,7 @@ class ArchivedFile { /** * Return the FileStore key + * @return string */ public function getKey() { $this->load(); @@ -237,6 +251,7 @@ class ArchivedFile { /** * Return the FileStore key (overriding base File class) + * @return string */ public function getStorageKey() { return $this->getKey(); @@ -244,6 +259,7 @@ class ArchivedFile { /** * Return the FileStore storage group + * @return string */ public function getGroup() { return $this->group; @@ -251,6 +267,7 @@ class ArchivedFile { /** * Return the width of the image + * @return int */ public function getWidth() { $this->load(); @@ -259,6 +276,7 @@ class ArchivedFile { /** * Return the height of the image + * @return int */ public function getHeight() { $this->load(); @@ -267,6 +285,7 @@ class ArchivedFile { /** * Get handler-specific metadata + * @return string */ public function getMetadata() { $this->load(); @@ -275,6 +294,7 @@ class ArchivedFile { /** * Return the size of the image file, in bytes + * @return int */ public function getSize() { $this->load(); @@ -283,6 +303,7 @@ class ArchivedFile { /** * Return the bits of the image file, in bytes + * @return int */ public function getBits() { $this->load(); @@ -291,6 +312,7 @@ class ArchivedFile { /** * Returns the mime type of the file. + * @return string */ public function getMimeType() { $this->load(); @@ -326,6 +348,7 @@ class ArchivedFile { /** * Return the type of the media in the file. * Use the value returned by this function with the MEDIATYPE_xxx constants. + * @return string */ public function getMediaType() { $this->load(); @@ -334,6 +357,8 @@ class ArchivedFile { /** * Return upload timestamp. + * + * @return string */ public function getTimestamp() { $this->load(); @@ -342,6 +367,8 @@ class ArchivedFile { /** * Return the user ID of the uploader. + * + * @return int */ public function getUser() { $this->load(); @@ -354,6 +381,8 @@ class ArchivedFile { /** * Return the user name of the uploader. + * + * @return string */ public function getUserText() { $this->load(); @@ -366,6 +395,8 @@ class ArchivedFile { /** * Return upload description. + * + * @return string */ public function getDescription() { $this->load(); @@ -378,6 +409,8 @@ class ArchivedFile { /** * Return the user ID of the uploader. + * + * @return int */ public function getRawUser() { $this->load(); @@ -386,6 +419,8 @@ class ArchivedFile { /** * Return the user name of the uploader. + * + * @return string */ public function getRawUserText() { $this->load(); @@ -394,6 +429,8 @@ class ArchivedFile { /** * Return upload description. + * + * @return string */ public function getRawDescription() { $this->load(); @@ -424,10 +461,11 @@ class ArchivedFile { * Determine if the current user is allowed to view a particular * field of this FileStore image file, if it's marked as deleted. * @param $field Integer + * @param $user User object to check, or null to use $wgUser * @return bool */ - public function userCan( $field ) { + public function userCan( $field, User $user = null ) { $this->load(); - return Revision::userCanBitfield( $this->deleted, $field ); + return Revision::userCanBitfield( $this->deleted, $field, $user ); } } diff --git a/includes/filerepo/File.php b/includes/filerepo/file/File.php index 1fd6d452..f74fb678 100644 --- a/includes/filerepo/File.php +++ b/includes/filerepo/file/File.php @@ -1,9 +1,16 @@ <?php /** + * @defgroup FileAbstraction File abstraction + * @ingroup FileRepo + * + * Represents files in a repository. + */ + +/** * Base code for files. * * @file - * @ingroup FileRepo + * @ingroup FileAbstraction */ /** @@ -23,14 +30,21 @@ * The convenience functions wfLocalFile() and wfFindFile() should be sufficient * in most cases. * - * @ingroup FileRepo + * @ingroup FileAbstraction */ abstract class File { const DELETED_FILE = 1; const DELETED_COMMENT = 2; const DELETED_USER = 4; const DELETED_RESTRICTED = 8; - const RENDER_NOW = 1; + + /** Force rendering in the current process */ + const RENDER_NOW = 1; + /** + * Force rendering even if thumbnail already exist and using RENDER_NOW + * I.e. you have to pass both flags: File::RENDER_NOW | File::RENDER_FORCE + */ + const RENDER_FORCE = 2; const DELETE_SOURCE = 1; @@ -54,30 +68,88 @@ abstract class File { */ /** - * @var LocalRepo + * @var FileRepo|false */ var $repo; /** - * @var Title + * @var Title|false */ var $title; var $lastError, $redirected, $redirectedTitle; /** + * @var FSFile|false + */ + protected $fsFile; + + /** * @var MediaHandler */ protected $handler; /** - * Call this constructor from child classes + * @var string + */ + protected $url, $extension, $name, $path, $hashPath, $pageCount, $transformScript; + + /** + * @var bool + */ + protected $canRender, $isSafeFile; + + /** + * @var string Required Repository class type + */ + protected $repoClass = 'FileRepo'; + + /** + * Call this constructor from child classes. + * + * Both $title and $repo are optional, though some functions + * may return false or throw exceptions if they are not set. + * Most subclasses will want to call assertRepoDefined() here. + * + * @param $title Title|string|false + * @param $repo FileRepo|false */ function __construct( $title, $repo ) { + if ( $title !== false ) { // subclasses may not use MW titles + $title = self::normalizeTitle( $title, 'exception' ); + } $this->title = $title; $this->repo = $repo; } + /** + * Given a string or Title object return either a + * valid Title object with namespace NS_FILE or null + * + * @param $title Title|string + * @param $exception string|false Use 'exception' to throw an error on bad titles + * @return Title|null + */ + static function normalizeTitle( $title, $exception = false ) { + $ret = $title; + if ( $ret instanceof Title ) { + # Normalize NS_MEDIA -> NS_FILE + if ( $ret->getNamespace() == NS_MEDIA ) { + $ret = Title::makeTitleSafe( NS_FILE, $ret->getDBkey() ); + # Sanity check the title namespace + } elseif ( $ret->getNamespace() !== NS_FILE ) { + $ret = null; + } + } else { + # Convert strings to Title objects + $ret = Title::makeTitleSafe( NS_FILE, (string)$ret ); + } + if ( !$ret && $exception !== false ) { + throw new MWException( "`$title` is not a valid file title." ); + } + return $ret; + } + function __get( $name ) { $function = array( $this, 'get' . ucfirst( $name ) ); if ( !is_callable( $function ) ) { @@ -123,8 +195,7 @@ abstract class File { static function checkExtensionCompatibility( File $old, $new ) { $oldMime = $old->getMimeType(); $n = strrpos( $new, '.' ); - $newExt = self::normalizeExtension( - $n ? substr( $new, $n + 1 ) : '' ); + $newExt = self::normalizeExtension( $n ? substr( $new, $n + 1 ) : '' ); $mimeMagic = MimeMagic::singleton(); return $mimeMagic->isMatchingExtension( $newExt, $oldMime ); } @@ -158,6 +229,7 @@ abstract class File { */ public function getName() { if ( !isset( $this->name ) ) { + $this->assertRepoDefined(); $this->name = $this->repo->getNameFromTitle( $this->title ); } return $this->name; @@ -179,9 +251,12 @@ abstract class File { /** * Return the associated title object - * @return Title + * + * @return Title|false */ - public function getTitle() { return $this->title; } + public function getTitle() { + return $this->title; + } /** * Return the title used to find this file @@ -202,6 +277,7 @@ abstract class File { */ public function getUrl() { if ( !isset( $this->url ) ) { + $this->assertRepoDefined(); $this->url = $this->repo->getZoneUrl( 'public' ) . '/' . $this->getUrlRel(); } return $this->url; @@ -218,6 +294,9 @@ abstract class File { return wfExpandUrl( $this->getUrl(), PROTO_RELATIVE ); } + /** + * @return string + */ public function getCanonicalUrl() { return wfExpandUrl( $this->getUrl(), PROTO_CANONICAL ); } @@ -226,11 +305,12 @@ abstract class File { * @return string */ function getViewURL() { - if( $this->mustRender()) { - if( $this->canRender() ) { + if ( $this->mustRender() ) { + if ( $this->canRender() ) { return $this->createThumb( $this->getWidth() ); } else { - wfDebug(__METHOD__.': supposed to render '.$this->getName().' ('.$this->getMimeType()."), but can't!\n"); + wfDebug( __METHOD__.': supposed to render ' . $this->getName() . + ' (' . $this->getMimeType() . "), but can't!\n" ); return $this->getURL(); #hm... return NULL? } } else { @@ -239,7 +319,7 @@ abstract class File { } /** - * Return the full filesystem path to the file. Note that this does + * Return the storage path to the file. Note that this does * not mean that a file actually exists under that location. * * This path depends on whether directory hashing is active or not, @@ -253,21 +333,30 @@ abstract class File { */ public function getPath() { if ( !isset( $this->path ) ) { - $this->path = $this->repo->getZonePath('public') . '/' . $this->getRel(); + $this->assertRepoDefined(); + $this->path = $this->repo->getZonePath( 'public' ) . '/' . $this->getRel(); } return $this->path; } /** - * Alias for getPath() - * - * @deprecated since 1.18 Use getPath(). + * Get an FS copy or original of this file and return the path. + * Returns false on failure. Callers must not alter the file. + * Temporary files are cleared automatically. * - * @return string + * @return string|false */ - public function getFullPath() { - wfDeprecated( __METHOD__ ); - return $this->getPath(); + public function getLocalRefPath() { + $this->assertRepoDefined(); + if ( !isset( $this->fsFile ) ) { + $this->fsFile = $this->repo->getLocalReference( $this->getPath() ); + if ( !$this->fsFile ) { + $this->fsFile = false; // null => false; cache negative hits + } + } + return ( $this->fsFile ) + ? $this->fsFile->getPath() + : false; } /** @@ -292,6 +381,8 @@ abstract class File { * STUB * Overridden by LocalFile, UnregisteredLocalFile * + * @param $page int + * * @return false|number */ public function getHeight( $page = 1 ) { @@ -357,7 +448,7 @@ abstract class File { public function convertMetadataVersion($metadata, $version) { $handler = $this->getHandler(); if ( !is_array( $metadata ) ) { - //just to make the return type consistant + // Just to make the return type consistent $metadata = unserialize( $metadata ); } if ( $handler ) { @@ -402,7 +493,9 @@ abstract class File { * Overridden by LocalFile, * STUB */ - function getMediaType() { return MEDIATYPE_UNKNOWN; } + function getMediaType() { + return MEDIATYPE_UNKNOWN; + } /** * Checks if the output of transform() for this file is likely @@ -488,6 +581,8 @@ abstract class File { * @return bool */ protected function _getIsSafeFile() { + global $wgTrustedMediaFormats; + if ( $this->allowInlineDisplay() ) { return true; } @@ -495,8 +590,6 @@ abstract class File { return true; } - global $wgTrustedMediaFormats; - $type = $this->getMediaType(); $mime = $this->getMimeType(); #wfDebug("LocalFile::isSafeFile: type= $type, mime= $mime\n"); @@ -532,7 +625,7 @@ abstract class File { * @return bool */ function isTrustedFile() { - #this could be implemented to check a flag in the databas, + #this could be implemented to check a flag in the database, #look for signatures, etc return false; } @@ -545,7 +638,7 @@ abstract class File { * @return boolean Whether file exists in the repository. */ public function exists() { - return $this->getPath() && file_exists( $this->path ); + return $this->getPath() && $this->repo->fileExists( $this->path ); } /** @@ -617,7 +710,8 @@ abstract class File { return null; } $extension = $this->getExtension(); - list( $thumbExt, $thumbMime ) = $this->handler->getThumbType( $extension, $this->getMimeType(), $params ); + list( $thumbExt, $thumbMime ) = $this->handler->getThumbType( + $extension, $this->getMimeType(), $params ); $thumbName = $this->handler->makeParamString( $params ) . '-' . $name; if ( $thumbExt != $extension ) { $thumbName .= ".$thumbExt"; @@ -648,85 +742,147 @@ abstract class File { $params['height'] = $height; } $thumb = $this->transform( $params ); - if( is_null( $thumb ) || $thumb->isError() ) return ''; + if ( is_null( $thumb ) || $thumb->isError() ) { + return ''; + } return $thumb->getUrl(); } /** + * Return either a MediaTransformError or placeholder thumbnail (if $wgIgnoreImageErrors) + * + * @param $thumbPath string Thumbnail storage path + * @param $thumbUrl string Thumbnail URL + * @param $params Array + * @param $flags integer + * @return MediaTransformOutput + */ + protected function transformErrorOutput( $thumbPath, $thumbUrl, $params, $flags ) { + global $wgIgnoreImageErrors; + + if ( $wgIgnoreImageErrors && !( $flags & self::RENDER_NOW ) ) { + return $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); + } else { + return new MediaTransformError( 'thumbnail_error', + $params['width'], 0, wfMsg( 'thumbnail-dest-create' ) ); + } + } + + /** * Transform a media file * * @param $params Array: an associative array of handler-specific parameters. * Typical keys are width, height and page. * @param $flags Integer: a bitfield, may contain self::RENDER_NOW to force rendering - * @return MediaTransformOutput | false + * @return MediaTransformOutput|false */ function transform( $params, $flags = 0 ) { - global $wgUseSquid, $wgIgnoreImageErrors, $wgThumbnailEpoch, $wgServer; + global $wgUseSquid, $wgIgnoreImageErrors, $wgThumbnailEpoch; wfProfileIn( __METHOD__ ); do { if ( !$this->canRender() ) { - // not a bitmap or renderable image, don't try. $thumb = $this->iconThumb(); - break; + break; // not a bitmap or renderable image, don't try } // Get the descriptionUrl to embed it as comment into the thumbnail. Bug 19791. - $descriptionUrl = $this->getDescriptionUrl(); + $descriptionUrl = $this->getDescriptionUrl(); if ( $descriptionUrl ) { $params['descriptionUrl'] = wfExpandUrl( $descriptionUrl, PROTO_CANONICAL ); } $script = $this->getTransformScript(); - if ( $script && !($flags & self::RENDER_NOW) ) { + if ( $script && !( $flags & self::RENDER_NOW ) ) { // Use a script to transform on client request, if possible $thumb = $this->handler->getScriptedTransform( $this, $script, $params ); - if( $thumb ) { + if ( $thumb ) { break; } } $normalisedParams = $params; $this->handler->normaliseParams( $this, $normalisedParams ); + $thumbName = $this->thumbName( $normalisedParams ); - $thumbPath = $this->getThumbPath( $thumbName ); $thumbUrl = $this->getThumbUrl( $thumbName ); + $thumbPath = $this->getThumbPath( $thumbName ); // final thumb path - if ( $this->repo && $this->repo->canTransformVia404() && !($flags & self::RENDER_NOW ) ) { - $thumb = $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); - break; - } - - wfDebug( __METHOD__.": Doing stat for $thumbPath\n" ); - $this->migrateThumbFile( $thumbName ); - if ( file_exists( $thumbPath )) { - $thumbTime = filemtime( $thumbPath ); - if ( $thumbTime !== FALSE && - gmdate( 'YmdHis', $thumbTime ) >= $wgThumbnailEpoch ) { - + if ( $this->repo ) { + // Defer rendering if a 404 handler is set up... + if ( $this->repo->canTransformVia404() && !( $flags & self::RENDER_NOW ) ) { + wfDebug( __METHOD__ . " transformation deferred." ); + // XXX: Pass in the storage path even though we are not rendering anything + // and the path is supposed to be an FS path. This is due to getScalerType() + // getting called on the path and clobbering $thumb->getUrl() if it's false. $thumb = $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); break; } + // Clean up broken thumbnails as needed + $this->migrateThumbFile( $thumbName ); + // Check if an up-to-date thumbnail already exists... + wfDebug( __METHOD__.": Doing stat for $thumbPath\n" ); + if ( $this->repo->fileExists( $thumbPath ) && !( $flags & self::RENDER_FORCE ) ) { + $timestamp = $this->repo->getFileTimestamp( $thumbPath ); + if ( $timestamp !== false && $timestamp >= $wgThumbnailEpoch ) { + // XXX: Pass in the storage path even though we are not rendering anything + // and the path is supposed to be an FS path. This is due to getScalerType() + // getting called on the path and clobbering $thumb->getUrl() if it's false. + $thumb = $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); + $thumb->setStoragePath( $thumbPath ); + break; + } + } elseif ( $flags & self::RENDER_FORCE ) { + wfDebug( __METHOD__ . " forcing rendering per flag File::RENDER_FORCE\n" ); + } + } + + // Create a temp FS file with the same extension and the thumbnail + $thumbExt = FileBackend::extensionFromPath( $thumbPath ); + $tmpFile = TempFSFile::factory( 'transform_', $thumbExt ); + if ( !$tmpFile ) { + $thumb = $this->transformErrorOutput( $thumbPath, $thumbUrl, $params, $flags ); + break; } - $thumb = $this->handler->doTransform( $this, $thumbPath, $thumbUrl, $params ); + $tmpThumbPath = $tmpFile->getPath(); // path of 0-byte temp file - // Ignore errors if requested - if ( !$thumb ) { + // Actually render the thumbnail... + $thumb = $this->handler->doTransform( $this, $tmpThumbPath, $thumbUrl, $params ); + $tmpFile->bind( $thumb ); // keep alive with $thumb + + if ( !$thumb ) { // bad params? $thumb = null; - } elseif ( $thumb->isError() ) { + } elseif ( $thumb->isError() ) { // transform error $this->lastError = $thumb->toText(); - if ( $wgIgnoreImageErrors && !($flags & self::RENDER_NOW) ) { - $thumb = $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); + // Ignore errors if requested + if ( $wgIgnoreImageErrors && !( $flags & self::RENDER_NOW ) ) { + $thumb = $this->handler->getTransform( $this, $tmpThumbPath, $thumbUrl, $params ); + } + } elseif ( $this->repo && $thumb->hasFile() && !$thumb->fileIsSource() ) { + $backend = $this->repo->getBackend(); + // Copy the thumbnail from the file system into storage. This avoids using + // FileRepo::store(); getThumbPath() uses a different zone in some subclasses. + $backend->prepare( array( 'dir' => dirname( $thumbPath ) ) ); + $status = $backend->store( + array( 'src' => $tmpThumbPath, 'dst' => $thumbPath, 'overwrite' => 1 ), + array( 'force' => 1, 'nonLocking' => 1, 'allowStale' => 1 ) + ); + if ( $status->isOK() ) { + $thumb->setStoragePath( $thumbPath ); + } else { + $thumb = $this->transformErrorOutput( $thumbPath, $thumbUrl, $params, $flags ); } } // Purge. Useful in the event of Core -> Squid connection failure or squid // purge collisions from elsewhere during failure. Don't keep triggering for // "thumbs" which have the main image URL though (bug 13776) - if ( $wgUseSquid && ( !$thumb || $thumb->isError() || $thumb->getUrl() != $this->getURL()) ) { - SquidUpdate::purge( array( $thumbUrl ) ); + if ( $wgUseSquid ) { + if ( !$thumb || $thumb->isError() || $thumb->getUrl() != $this->getURL() ) { + SquidUpdate::purge( array( $thumbUrl ) ); + } } - } while (false); + } while ( false ); wfProfileOut( __METHOD__ ); return is_object( $thumb ) ? $thumb : false; @@ -741,6 +897,7 @@ abstract class File { /** * Get a MediaHandler instance for this file + * * @return MediaHandler */ function getHandler() { @@ -752,16 +909,17 @@ abstract class File { /** * Get a ThumbnailImage representing a file type icon + * * @return ThumbnailImage */ function iconThumb() { global $wgStylePath, $wgStyleDirectory; $try = array( 'fileicon-' . $this->getExtension() . '.png', 'fileicon.png' ); - foreach( $try as $icon ) { + foreach ( $try as $icon ) { $path = '/common/images/icons/' . $icon; $filepath = $wgStyleDirectory . $path; - if( file_exists( $filepath ) ) { + if ( file_exists( $filepath ) ) { // always FS return new ThumbnailImage( $this, $wgStylePath . $path, 120, 120 ); } } @@ -789,8 +947,10 @@ abstract class File { * Purge shared caches such as thumbnails and DB data caching * STUB * Overridden by LocalFile + * @param $options Array Options, which include: + * 'forThumbRefresh' : The purging is only to refresh thumbnails */ - function purgeCache() {} + function purgeCache( $options = array() ) {} /** * Purge the file description page, but don't go after @@ -866,13 +1026,15 @@ abstract class File { */ function getHashPath() { if ( !isset( $this->hashPath ) ) { + $this->assertRepoDefined(); $this->hashPath = $this->repo->getHashPath( $this->getName() ); } return $this->hashPath; } /** - * Get the path of the file relative to the public zone root + * Get the path of the file relative to the public zone root. + * This function is overriden in OldLocalFile to be like getArchiveRel(). * * @return string */ @@ -881,16 +1043,7 @@ abstract class File { } /** - * Get urlencoded relative path of the file - * - * @return string - */ - function getUrlRel() { - return $this->getHashPath() . rawurlencode( $this->getName() ); - } - - /** - * Get the relative path for an archived file + * Get the path of an archived file relative to the public zone root * * @param $suffix bool|string if not false, the name of an archived thumbnail file * @@ -907,11 +1060,39 @@ abstract class File { } /** - * Get the relative path for an archived file's thumbs directory + * Get the path, relative to the thumbnail zone root, of the + * thumbnail directory or a particular file if $suffix is specified + * + * @param $suffix bool|string if not false, the name of a thumbnail file + * + * @return string + */ + function getThumbRel( $suffix = false ) { + $path = $this->getRel(); + if ( $suffix !== false ) { + $path .= '/' . $suffix; + } + return $path; + } + + /** + * Get urlencoded path of the file relative to the public zone root. + * This function is overriden in OldLocalFile to be like getArchiveUrl(). + * + * @return string + */ + function getUrlRel() { + return $this->getHashPath() . rawurlencode( $this->getName() ); + } + + /** + * Get the path, relative to the thumbnail zone root, for an archived file's thumbs directory * or a specific thumb if the $suffix is given. * * @param $archiveName string the timestamped name of an archived image * @param $suffix bool|string if not false, the name of a thumbnail file + * + * @return string */ function getArchiveThumbRel( $archiveName, $suffix = false ) { $path = 'archive/' . $this->getHashPath() . $archiveName . "/"; @@ -931,11 +1112,12 @@ abstract class File { * @return string */ function getArchivePath( $suffix = false ) { + $this->assertRepoDefined(); return $this->repo->getZonePath( 'public' ) . '/' . $this->getArchiveRel( $suffix ); } /** - * Get the path of the archived file's thumbs, or a particular thumb if $suffix is specified + * Get the path of an archived file's thumbs, or a particular thumb if $suffix is specified * * @param $archiveName string the timestamped name of an archived image * @param $suffix bool|string if not false, the name of a thumbnail file @@ -943,7 +1125,9 @@ abstract class File { * @return string */ function getArchiveThumbPath( $archiveName, $suffix = false ) { - return $this->repo->getZonePath( 'thumb' ) . '/' . $this->getArchiveThumbRel( $archiveName, $suffix ); + $this->assertRepoDefined(); + return $this->repo->getZonePath( 'thumb' ) . '/' . + $this->getArchiveThumbRel( $archiveName, $suffix ); } /** @@ -954,11 +1138,8 @@ abstract class File { * @return string */ function getThumbPath( $suffix = false ) { - $path = $this->repo->getZonePath( 'thumb' ) . '/' . $this->getRel(); - if ( $suffix !== false ) { - $path .= '/' . $suffix; - } - return $path; + $this->assertRepoDefined(); + return $this->repo->getZonePath( 'thumb' ) . '/' . $this->getThumbRel( $suffix ); } /** @@ -969,7 +1150,8 @@ abstract class File { * @return string */ function getArchiveUrl( $suffix = false ) { - $path = $this->repo->getZoneUrl('public') . '/archive/' . $this->getHashPath(); + $this->assertRepoDefined(); + $path = $this->repo->getZoneUrl( 'public' ) . '/archive/' . $this->getHashPath(); if ( $suffix === false ) { $path = substr( $path, 0, -1 ); } else { @@ -987,7 +1169,9 @@ abstract class File { * @return string */ function getArchiveThumbUrl( $archiveName, $suffix = false ) { - $path = $this->repo->getZoneUrl('thumb') . '/archive/' . $this->getHashPath() . rawurlencode( $archiveName ) . "/"; + $this->assertRepoDefined(); + $path = $this->repo->getZoneUrl( 'thumb' ) . '/archive/' . + $this->getHashPath() . rawurlencode( $archiveName ) . "/"; if ( $suffix === false ) { $path = substr( $path, 0, -1 ); } else { @@ -1004,7 +1188,8 @@ abstract class File { * @return path */ function getThumbUrl( $suffix = false ) { - $path = $this->repo->getZoneUrl('thumb') . '/' . $this->getUrlRel(); + $this->assertRepoDefined(); + $path = $this->repo->getZoneUrl( 'thumb' ) . '/' . $this->getUrlRel(); if ( $suffix !== false ) { $path .= '/' . rawurlencode( $suffix ); } @@ -1012,46 +1197,49 @@ abstract class File { } /** - * Get the virtual URL for an archived file's thumbs, or a specific thumb. + * Get the public zone virtual URL for a current version source file * * @param $suffix bool|string if not false, the name of a thumbnail file * * @return string */ - function getArchiveVirtualUrl( $suffix = false ) { - $path = $this->repo->getVirtualUrl() . '/public/archive/' . $this->getHashPath(); - if ( $suffix === false ) { - $path = substr( $path, 0, -1 ); - } else { - $path .= rawurlencode( $suffix ); + function getVirtualUrl( $suffix = false ) { + $this->assertRepoDefined(); + $path = $this->repo->getVirtualUrl() . '/public/' . $this->getUrlRel(); + if ( $suffix !== false ) { + $path .= '/' . rawurlencode( $suffix ); } return $path; } /** - * Get the virtual URL for a thumbnail file or directory + * Get the public zone virtual URL for an archived version source file * * @param $suffix bool|string if not false, the name of a thumbnail file * * @return string */ - function getThumbVirtualUrl( $suffix = false ) { - $path = $this->repo->getVirtualUrl() . '/thumb/' . $this->getUrlRel(); - if ( $suffix !== false ) { - $path .= '/' . rawurlencode( $suffix ); + function getArchiveVirtualUrl( $suffix = false ) { + $this->assertRepoDefined(); + $path = $this->repo->getVirtualUrl() . '/public/archive/' . $this->getHashPath(); + if ( $suffix === false ) { + $path = substr( $path, 0, -1 ); + } else { + $path .= rawurlencode( $suffix ); } return $path; } /** - * Get the virtual URL for the file itself + * Get the virtual URL for a thumbnail file or directory * * @param $suffix bool|string if not false, the name of a thumbnail file * * @return string */ - function getVirtualUrl( $suffix = false ) { - $path = $this->repo->getVirtualUrl() . '/public/' . $this->getUrlRel(); + function getThumbVirtualUrl( $suffix = false ) { + $this->assertRepoDefined(); + $path = $this->repo->getVirtualUrl() . '/thumb/' . $this->getUrlRel(); if ( $suffix !== false ) { $path .= '/' . rawurlencode( $suffix ); } @@ -1062,6 +1250,7 @@ abstract class File { * @return bool */ function isHashed() { + $this->assertRepoDefined(); return $this->repo->isHashed(); } @@ -1125,8 +1314,7 @@ abstract class File { * @return bool */ function isLocal() { - $repo = $this->getRepo(); - return $repo && $repo->isLocal(); + return $this->repo && $this->repo->isLocal(); } /** @@ -1141,7 +1329,7 @@ abstract class File { /** * Returns the repository * - * @return FileRepo + * @return FileRepo|false */ function getRepo() { return $this->repo; @@ -1306,7 +1494,11 @@ abstract class File { * @return string */ function getDescriptionUrl() { - return $this->repo->getDescriptionUrl( $this->getName() ); + if ( $this->repo ) { + return $this->repo->getDescriptionUrl( $this->getName() ); + } else { + return false; + } } /** @@ -1316,7 +1508,7 @@ abstract class File { */ function getDescriptionText() { global $wgMemc, $wgLang; - if ( !$this->repo->fetchDescription ) { + if ( !$this->repo || !$this->repo->fetchDescription ) { return false; } $renderUrl = $this->repo->getDescriptionRenderUrl( $this->getName(), $wgLang->getCode() ); @@ -1354,17 +1546,13 @@ abstract class File { } /** - * Get the 14-character timestamp of the file upload, or false if - * it doesn't exist + * Get the 14-character timestamp of the file upload * - * @return string + * @return string|false TS_MW timestamp or false on failure */ function getTimestamp() { - $path = $this->getPath(); - if ( !file_exists( $path ) ) { - return false; - } - return wfTimestamp( TS_MW, filemtime( $path ) ); + $this->assertRepoDefined(); + return $this->repo->getFileTimestamp( $this->getPath() ); } /** @@ -1373,7 +1561,8 @@ abstract class File { * @return string */ function getSha1() { - return self::sha1Base36( $this->getPath() ); + $this->assertRepoDefined(); + return $this->repo->getFileSha1( $this->getPath() ); } /** @@ -1396,9 +1585,10 @@ abstract class File { * field of this file, if it's marked as deleted. * STUB * @param $field Integer + * @param $user User object to check, or null to use $wgUser * @return Boolean */ - function userCan( $field ) { + function userCan( $field, User $user = null ) { return true; } @@ -1412,67 +1602,11 @@ abstract class File { * @return array */ static function getPropsFromPath( $path, $ext = true ) { - wfProfileIn( __METHOD__ ); wfDebug( __METHOD__.": Getting file info for $path\n" ); - $info = array( - 'fileExists' => file_exists( $path ) && !is_dir( $path ) - ); - $gis = false; - if ( $info['fileExists'] ) { - $magic = MimeMagic::singleton(); - - if ( $ext === true ) { - $i = strrpos( $path, '.' ); - $ext = strtolower( $i ? substr( $path, $i + 1 ) : '' ); - } - - # mime type according to file contents - $info['file-mime'] = $magic->guessMimeType( $path, false ); - # logical mime type - $info['mime'] = $magic->improveTypeFromExtension( $info['file-mime'], $ext ); + wfDeprecated( __METHOD__, '1.19' ); - list( $info['major_mime'], $info['minor_mime'] ) = self::splitMime( $info['mime'] ); - $info['media_type'] = $magic->getMediaType( $path, $info['mime'] ); - - # Get size in bytes - $info['size'] = filesize( $path ); - - # Height, width and metadata - $handler = MediaHandler::getHandler( $info['mime'] ); - if ( $handler ) { - $tempImage = (object)array(); - $info['metadata'] = $handler->getMetadata( $tempImage, $path ); - $gis = $handler->getImageSize( $tempImage, $path, $info['metadata'] ); - } else { - $gis = false; - $info['metadata'] = ''; - } - $info['sha1'] = self::sha1Base36( $path ); - - wfDebug(__METHOD__.": $path loaded, {$info['size']} bytes, {$info['mime']}.\n"); - } else { - $info['mime'] = null; - $info['media_type'] = MEDIATYPE_UNKNOWN; - $info['metadata'] = ''; - $info['sha1'] = ''; - wfDebug(__METHOD__.": $path NOT FOUND!\n"); - } - if( $gis ) { - # NOTE: $gis[2] contains a code for the image type. This is no longer used. - $info['width'] = $gis[0]; - $info['height'] = $gis[1]; - if ( isset( $gis['bits'] ) ) { - $info['bits'] = $gis['bits']; - } else { - $info['bits'] = 0; - } - } else { - $info['width'] = 0; - $info['height'] = 0; - $info['bits'] = 0; - } - wfProfileOut( __METHOD__ ); - return $info; + $fsFile = new FSFile( $path ); + return $fsFile->getProps(); } /** @@ -1487,14 +1621,10 @@ abstract class File { * @return false|string False on failure */ static function sha1Base36( $path ) { - wfSuppressWarnings(); - $hash = sha1_file( $path ); - wfRestoreWarnings(); - if ( $hash === false ) { - return false; - } else { - return wfBaseConvert( $hash, 16, 36, 31 ); - } + wfDeprecated( __METHOD__, '1.19' ); + + $fsFile = new FSFile( $path ); + return $fsFile->getSha1Base36(); } /** @@ -1566,11 +1696,24 @@ abstract class File { function isMissing() { return false; } + + /** + * Assert that $this->repo is set to a valid FileRepo instance + * @throws MWException + */ + protected function assertRepoDefined() { + if ( !( $this->repo instanceof $this->repoClass ) ) { + throw new MWException( "A {$this->repoClass} object is not set for this File.\n" ); + } + } + + /** + * Assert that $this->title is set to a Title + * @throws MWException + */ + protected function assertTitleDefined() { + if ( !( $this->title instanceof Title ) ) { + throw new MWException( "A Title object is not set for this File.\n" ); + } + } } -/** - * Aliases for backwards compatibility with 1.6 - */ -define( 'MW_IMG_DELETED_FILE', File::DELETED_FILE ); -define( 'MW_IMG_DELETED_COMMENT', File::DELETED_COMMENT ); -define( 'MW_IMG_DELETED_USER', File::DELETED_USER ); -define( 'MW_IMG_DELETED_RESTRICTED', File::DELETED_RESTRICTED ); diff --git a/includes/filerepo/ForeignAPIFile.php b/includes/filerepo/file/ForeignAPIFile.php index 53c4a3bd..681544fd 100644 --- a/includes/filerepo/ForeignAPIFile.php +++ b/includes/filerepo/file/ForeignAPIFile.php @@ -3,43 +3,47 @@ * Foreign file accessible through api.php requests. * * @file - * @ingroup FileRepo + * @ingroup FileAbstraction */ /** * Foreign file accessible through api.php requests. * Very hacky and inefficient, do not use :D * - * @ingroup FileRepo + * @ingroup FileAbstraction */ class ForeignAPIFile extends File { - private $mExists; + protected $repoClass = 'ForeignApiRepo'; + /** - * @param $title - * @param $repo ForeignApiRepo - * @param $info + * @param $title + * @param $repo ForeignApiRepo + * @param $info * @param bool $exists */ function __construct( $title, $repo, $info, $exists = false ) { parent::__construct( $title, $repo ); + $this->mInfo = $info; $this->mExists = $exists; + + $this->assertRepoDefined(); } /** - * @param $title Title - * @param $repo ForeignApiRepo + * @param $title Title + * @param $repo ForeignApiRepo * @return ForeignAPIFile|null */ - static function newFromTitle( $title, $repo ) { + static function newFromTitle( Title $title, $repo ) { $data = $repo->fetchImageQuery( array( - 'titles' => 'File:' . $title->getDBKey(), - 'iiprop' => self::getProps(), - 'prop' => 'imageinfo', + 'titles' => 'File:' . $title->getDBKey(), + 'iiprop' => self::getProps(), + 'prop' => 'imageinfo', 'iimetadataversion' => MediaHandler::getMetadataVersion() - ) ); + ) ); $info = $repo->getImageInfo( $data ); @@ -49,31 +53,31 @@ class ForeignAPIFile extends File { : -1; if( $lastRedirect >= 0 ) { $newtitle = Title::newFromText( $data['query']['redirects'][$lastRedirect]['to']); - $img = new ForeignAPIFile( $newtitle, $repo, $info, true ); + $img = new self( $newtitle, $repo, $info, true ); if( $img ) { $img->redirectedFrom( $title->getDBkey() ); } } else { - $img = new ForeignAPIFile( $title, $repo, $info, true ); + $img = new self( $title, $repo, $info, true ); } return $img; } else { return null; } } - + /** * Get the property string for iiprop and aiprop */ static function getProps() { return 'timestamp|user|comment|url|size|sha1|metadata|mime'; } - + // Dummy functions... public function exists() { return $this->mExists; } - + public function getPath() { return false; } @@ -100,18 +104,22 @@ class ForeignAPIFile extends File { public function getWidth( $page = 1 ) { return isset( $this->mInfo['width'] ) ? intval( $this->mInfo['width'] ) : 0; } - + + /** + * @param $page int + * @return int + */ public function getHeight( $page = 1 ) { return isset( $this->mInfo['height'] ) ? intval( $this->mInfo['height'] ) : 0; } - + public function getMetadata() { if ( isset( $this->mInfo['metadata'] ) ) { return serialize( self::parseMetadata( $this->mInfo['metadata'] ) ); } return null; } - + public static function parseMetadata( $metadata ) { if( !is_array( $metadata ) ) { return $metadata; @@ -122,11 +130,11 @@ class ForeignAPIFile extends File { } return $ret; } - + public function getSize() { return isset( $this->mInfo['size'] ) ? intval( $this->mInfo['size'] ) : null; } - + public function getUrl() { return isset( $this->mInfo['url'] ) ? strval( $this->mInfo['url'] ) : null; } @@ -134,25 +142,25 @@ class ForeignAPIFile extends File { public function getUser( $method='text' ) { return isset( $this->mInfo['user'] ) ? strval( $this->mInfo['user'] ) : null; } - + public function getDescription() { return isset( $this->mInfo['comment'] ) ? strval( $this->mInfo['comment'] ) : null; } function getSha1() { - return isset( $this->mInfo['sha1'] ) ? - wfBaseConvert( strval( $this->mInfo['sha1'] ), 16, 36, 31 ) : - null; + return isset( $this->mInfo['sha1'] ) + ? wfBaseConvert( strval( $this->mInfo['sha1'] ), 16, 36, 31 ) + : null; } - + function getTimestamp() { - return wfTimestamp( TS_MW, - isset( $this->mInfo['timestamp'] ) ? - strval( $this->mInfo['timestamp'] ) : - null + return wfTimestamp( TS_MW, + isset( $this->mInfo['timestamp'] ) + ? strval( $this->mInfo['timestamp'] ) + : null ); } - + function getMimeType() { if( !isset( $this->mInfo['mime'] ) ) { $magic = MimeMagic::singleton(); @@ -160,19 +168,19 @@ class ForeignAPIFile extends File { } return $this->mInfo['mime']; } - + /// @todo FIXME: May guess wrong on file types that can be eg audio or video function getMediaType() { $magic = MimeMagic::singleton(); return $magic->getMediaType( null, $this->getMimeType() ); } - + function getDescriptionUrl() { return isset( $this->mInfo['descriptionurl'] ) ? $this->mInfo['descriptionurl'] : false; } - + /** * Only useful if we're locally caching thumbs anyway... */ @@ -187,47 +195,58 @@ class ForeignAPIFile extends File { return null; } } - + function getThumbnails() { - $files = array(); $dir = $this->getThumbPath( $this->getName() ); - if ( is_dir( $dir ) ) { - $handle = opendir( $dir ); - if ( $handle ) { - while ( false !== ( $file = readdir($handle) ) ) { - if ( $file[0] != '.' ) { - $files[] = $file; - } - } - closedir( $handle ); - } + $iter = $this->repo->getBackend()->getFileList( array( 'dir' => $dir ) ); + + $files = array(); + foreach ( $iter as $file ) { + $files[] = $file; } + return $files; } - - function purgeCache() { - $this->purgeThumbnails(); + + /** + * @see File::purgeCache() + */ + function purgeCache( $options = array() ) { + $this->purgeThumbnails( $options ); $this->purgeDescriptionPage(); } - + function purgeDescriptionPage() { global $wgMemc, $wgContLang; + $url = $this->repo->getDescriptionRenderUrl( $this->getName(), $wgContLang->getCode() ); $key = $this->repo->getLocalCacheKey( 'RemoteFileDescription', 'url', md5($url) ); + $wgMemc->delete( $key ); } - - function purgeThumbnails() { + + function purgeThumbnails( $options = array() ) { global $wgMemc; + $key = $this->repo->getLocalCacheKey( 'ForeignAPIRepo', 'ThumbUrl', $this->getName() ); $wgMemc->delete( $key ); + $files = $this->getThumbnails(); + // Give media handler a chance to filter the purge list + $handler = $this->getHandler(); + if ( $handler ) { + $handler->filterThumbnailPurgeList( $files, $options ); + } + $dir = $this->getThumbPath( $this->getName() ); + $purgeList = array(); foreach ( $files as $file ) { - unlink( $dir . $file ); - } - if ( is_dir( $dir ) ) { - rmdir( $dir ); // Might have already gone away, spews errors if we don't. + $purgeList[] = "{$dir}{$file}"; } + + # Delete the thumbnails + $this->repo->cleanupBatch( $purgeList, FileRepo::SKIP_LOCKING ); + # Clear out the thumbnail directory if empty + $this->repo->getBackend()->clean( array( 'dir' => $dir ) ); } } diff --git a/includes/filerepo/ForeignDBFile.php b/includes/filerepo/file/ForeignDBFile.php index 09bee39c..191a712d 100644 --- a/includes/filerepo/ForeignDBFile.php +++ b/includes/filerepo/file/ForeignDBFile.php @@ -3,13 +3,13 @@ * Foreign file with an accessible MediaWiki database * * @file - * @ingroup FileRepo + * @ingroup FileAbstraction */ /** * Foreign file with an accessible MediaWiki database * - * @ingroup FileRepo + * @ingroup FileAbstraction */ class ForeignDBFile extends LocalFile { diff --git a/includes/filerepo/LocalFile.php b/includes/filerepo/file/LocalFile.php index 14da9122..0f8b4754 100644 --- a/includes/filerepo/LocalFile.php +++ b/includes/filerepo/file/LocalFile.php @@ -3,7 +3,7 @@ * Local file in the wiki's own database * * @file - * @ingroup FileRepo + * @ingroup FileAbstraction */ /** @@ -26,7 +26,7 @@ define( 'MW_FILE_VERSION', 8 ); * The convenience functions wfLocalFile() and wfFindFile() should be sufficient * in most cases. * - * @ingroup FileRepo + * @ingroup FileAbstraction */ class LocalFile extends File { /**#@+ @@ -58,6 +58,8 @@ class LocalFile extends File { /**#@-*/ + protected $repoClass = 'LocalRepo'; + /** * Create a LocalFile from a title * Do not call this except from inside a repo class. @@ -144,16 +146,15 @@ class LocalFile extends File { * Do not call this except from inside a repo class. */ function __construct( $title, $repo ) { - if ( !is_object( $title ) ) { - throw new MWException( __CLASS__ . ' constructor given bogus title.' ); - } - parent::__construct( $title, $repo ); $this->metadata = ''; $this->historyLine = 0; $this->historyRes = null; $this->dataLoaded = false; + + $this->assertRepoDefined(); + $this->assertTitleDefined(); } /** @@ -233,7 +234,8 @@ class LocalFile extends File { * Load metadata from the file itself */ function loadFromFile() { - $this->setProps( self::getPropsFromPath( $this->getPath() ) ); + $props = $this->repo->getFileProps( $this->getVirtualUrl() ); + $this->setProps( $props ); } function getCacheFields( $prefix = 'img_' ) { @@ -383,6 +385,8 @@ class LocalFile extends File { function upgradeRow() { wfProfileIn( __METHOD__ ); + $this->lock(); // begin + $this->loadFromFile(); # Don't destroy file info of missing files @@ -403,19 +407,23 @@ class LocalFile extends File { $dbw->update( 'image', array( - 'img_width' => $this->width, - 'img_height' => $this->height, - 'img_bits' => $this->bits, + 'img_width' => $this->width, + 'img_height' => $this->height, + 'img_bits' => $this->bits, 'img_media_type' => $this->media_type, 'img_major_mime' => $major, 'img_minor_mime' => $minor, - 'img_metadata' => $this->metadata, - 'img_sha1' => $this->sha1, - ), array( 'img_name' => $this->getName() ), + 'img_metadata' => $this->metadata, + 'img_sha1' => $this->sha1, + ), + array( 'img_name' => $this->getName() ), __METHOD__ ); $this->saveToCache(); + + $this->unlock(); // done + wfProfileOut( __METHOD__ ); } @@ -456,7 +464,7 @@ class LocalFile extends File { function isMissing() { if ( $this->missing === null ) { - list( $fileExists ) = $this->repo->fileExistsBatch( array( $this->getVirtualUrl() ), FileRepo::FILES_ONLY ); + list( $fileExists ) = $this->repo->fileExists( $this->getVirtualUrl(), FileRepo::FILES_ONLY ); $this->missing = !$fileExists; } return $this->missing; @@ -581,8 +589,9 @@ class LocalFile extends File { */ function migrateThumbFile( $thumbName ) { $thumbDir = $this->getThumbPath(); - $thumbPath = "$thumbDir/$thumbName"; + /* Old code for bug 2532 + $thumbPath = "$thumbDir/$thumbName"; if ( is_dir( $thumbPath ) ) { // Directory where file should be // This happened occasionally due to broken migration code in 1.5 @@ -597,12 +606,11 @@ class LocalFile extends File { // Doesn't exist anymore clearstatcache(); } + */ - if ( is_file( $thumbDir ) ) { - // File where directory should be - unlink( $thumbDir ); - // Doesn't exist anymore - clearstatcache(); + if ( $this->repo->fileExists( $thumbDir, FileRepo::FILES_ONLY ) ) { + // Delete file where directory should be + $this->repo->cleanupBatch( array( $thumbDir ) ); } } @@ -623,21 +631,12 @@ class LocalFile extends File { } else { $dir = $this->getThumbPath(); } - $files = array(); - $files[] = $dir; - - if ( is_dir( $dir ) ) { - $handle = opendir( $dir ); - if ( $handle ) { - while ( false !== ( $file = readdir( $handle ) ) ) { - if ( $file { 0 } != '.' ) { - $files[] = $file; - } - } - - closedir( $handle ); - } + $backend = $this->repo->getBackend(); + $files = array( $dir ); + $iterator = $backend->getFileList( array( 'dir' => $dir ) ); + foreach ( $iterator as $file ) { + $files[] = $file; } return $files; @@ -674,36 +673,30 @@ class LocalFile extends File { /** * Delete all previously generated thumbnails, refresh metadata in memcached and purge the squid */ - function purgeCache() { + function purgeCache( $options = array() ) { // Refresh metadata cache $this->purgeMetadataCache(); // Delete thumbnails - $this->purgeThumbnails(); + $this->purgeThumbnails( $options ); // Purge squid cache for this file SquidUpdate::purge( array( $this->getURL() ) ); } /** - * Delete cached transformed files for archived files + * Delete cached transformed files for an archived version only. * @param $archiveName string name of the archived file */ function purgeOldThumbnails( $archiveName ) { global $wgUseSquid; - // get a list of old thumbnails and URLs + // Get a list of old thumbnails and URLs $files = $this->getThumbnails( $archiveName ); $dir = array_shift( $files ); $this->purgeThumbList( $dir, $files ); - // Directory should be empty, delete it too. This will probably suck on - // something like NFS or if the directory isn't actually empty, so hide - // the warnings :D - wfSuppressWarnings(); - if( !rmdir( $dir ) ) { - wfDebug( __METHOD__ . ": unable to remove archive directory: $dir\n" ); - } - wfRestoreWarnings(); + // Purge any custom thumbnail caches + wfRunHooks( 'LocalFilePurgeThumbnails', array( $this, $archiveName ) ); // Purge the squid if ( $wgUseSquid ) { @@ -715,17 +708,29 @@ class LocalFile extends File { } } - /** * Delete cached transformed files for the current version only. */ - function purgeThumbnails() { + function purgeThumbnails( $options = array() ) { global $wgUseSquid; - // get a list of thumbnails and URLs + + // Delete thumbnails $files = $this->getThumbnails(); + + // Give media handler a chance to filter the purge list + if ( !empty( $options['forThumbRefresh'] ) ) { + $handler = $this->getHandler(); + if ( $handler ) { + $handler->filterThumbnailPurgeList( $files, $options ); + } + } + $dir = array_shift( $files ); $this->purgeThumbList( $dir, $files ); + // Purge any custom thumbnail caches + wfRunHooks( 'LocalFilePurgeThumbnails', array( $this, false ) ); + // Purge the squid if ( $wgUseSquid ) { $urls = array(); @@ -741,19 +746,26 @@ class LocalFile extends File { * @param $dir string base dir of the files. * @param $files array of strings: relative filenames (to $dir) */ - function purgeThumbList($dir, $files) { - global $wgExcludeFromThumbnailPurge; + protected function purgeThumbList( $dir, $files ) { + $fileListDebug = strtr( + var_export( $files, true ), + array("\n"=>'') + ); + wfDebug( __METHOD__ . ": $fileListDebug\n" ); - wfDebug( __METHOD__ . ": " . var_export( $files, true ) . "\n" ); + $purgeList = array(); foreach ( $files as $file ) { # Check that the base file name is part of the thumb name # This is a basic sanity check to avoid erasing unrelated directories if ( strpos( $file, $this->getName() ) !== false ) { - wfSuppressWarnings(); - unlink( "$dir/$file" ); - wfRestoreWarnings(); + $purgeList[] = "{$dir}/{$file}"; } } + + # Delete the thumbnails + $this->repo->cleanupBatch( $purgeList, FileRepo::SKIP_LOCKING ); + # Clear out the thumbnail directory if empty + $this->repo->getBackend()->clean( array( 'dir' => $dir ) ); } /** purgeDescription inherited */ @@ -858,7 +870,6 @@ class LocalFile extends File { } } - /** getFullPath inherited */ /** getHashPath inherited */ /** getRel inherited */ /** getUrlRel inherited */ @@ -873,7 +884,7 @@ class LocalFile extends File { /** * Upload a file and record it in the DB - * @param $srcPath String: source path or virtual URL + * @param $srcPath String: source storage path or virtual URL * @param $comment String: upload description * @param $pageText String: text to use for the new description page, * if a new description page is created @@ -888,16 +899,24 @@ class LocalFile extends File { * archive name, or an empty string if it was a new file. */ function upload( $srcPath, $comment, $pageText, $flags = 0, $props = false, $timestamp = false, $user = null ) { - $this->lock(); + global $wgContLang; + // truncate nicely or the DB will do it for us + // non-nicely (dangling multi-byte chars, non-truncated + // version in cache). + $comment = $wgContLang->truncate( $comment, 255 ); + $this->lock(); // begin $status = $this->publish( $srcPath, $flags ); - if ( $status->ok ) { + if ( $status->successCount > 0 ) { + # Essentially we are displacing any existing current file and saving + # a new current file at the old location. If just the first succeeded, + # we still need to displace the current DB entry and put in a new one. if ( !$this->recordUpload2( $status->value, $comment, $pageText, $props, $timestamp, $user ) ) { $status->fatal( 'filenotfound', $srcPath ); } } - $this->unlock(); + $this->unlock(); // done return $status; } @@ -968,82 +987,94 @@ class LocalFile extends File { # doesn't deadlock. SELECT FOR UPDATE causes a deadlock for every race condition. $dbw->insert( 'image', array( - 'img_name' => $this->getName(), - 'img_size' => $this->size, - 'img_width' => intval( $this->width ), - 'img_height' => intval( $this->height ), - 'img_bits' => $this->bits, - 'img_media_type' => $this->media_type, - 'img_major_mime' => $this->major_mime, - 'img_minor_mime' => $this->minor_mime, - 'img_timestamp' => $timestamp, + 'img_name' => $this->getName(), + 'img_size' => $this->size, + 'img_width' => intval( $this->width ), + 'img_height' => intval( $this->height ), + 'img_bits' => $this->bits, + 'img_media_type' => $this->media_type, + 'img_major_mime' => $this->major_mime, + 'img_minor_mime' => $this->minor_mime, + 'img_timestamp' => $timestamp, 'img_description' => $comment, - 'img_user' => $user->getId(), - 'img_user_text' => $user->getName(), - 'img_metadata' => $this->metadata, - 'img_sha1' => $this->sha1 + 'img_user' => $user->getId(), + 'img_user_text' => $user->getName(), + 'img_metadata' => $this->metadata, + 'img_sha1' => $this->sha1 ), __METHOD__, 'IGNORE' ); if ( $dbw->affectedRows() == 0 ) { + if ( $oldver == '' ) { // XXX + # (bug 34993) publish() can displace the current file and yet fail to save + # a new one. The next publish attempt will treat the file as a brand new file + # and pass an empty $oldver. Allow this bogus value so we can displace the + # `image` row to `oldimage`, leaving room for the new current file `image` row. + #throw new MWException( "Empty oi_archive_name. Database and storage out of sync?" ); + } $reupload = true; - # Collision, this is an update of a file # Insert previous contents into oldimage $dbw->insertSelect( 'oldimage', 'image', array( - 'oi_name' => 'img_name', + 'oi_name' => 'img_name', 'oi_archive_name' => $dbw->addQuotes( $oldver ), - 'oi_size' => 'img_size', - 'oi_width' => 'img_width', - 'oi_height' => 'img_height', - 'oi_bits' => 'img_bits', - 'oi_timestamp' => 'img_timestamp', - 'oi_description' => 'img_description', - 'oi_user' => 'img_user', - 'oi_user_text' => 'img_user_text', - 'oi_metadata' => 'img_metadata', - 'oi_media_type' => 'img_media_type', - 'oi_major_mime' => 'img_major_mime', - 'oi_minor_mime' => 'img_minor_mime', - 'oi_sha1' => 'img_sha1' - ), array( 'img_name' => $this->getName() ), __METHOD__ + 'oi_size' => 'img_size', + 'oi_width' => 'img_width', + 'oi_height' => 'img_height', + 'oi_bits' => 'img_bits', + 'oi_timestamp' => 'img_timestamp', + 'oi_description' => 'img_description', + 'oi_user' => 'img_user', + 'oi_user_text' => 'img_user_text', + 'oi_metadata' => 'img_metadata', + 'oi_media_type' => 'img_media_type', + 'oi_major_mime' => 'img_major_mime', + 'oi_minor_mime' => 'img_minor_mime', + 'oi_sha1' => 'img_sha1' + ), + array( 'img_name' => $this->getName() ), + __METHOD__ ); # Update the current image row $dbw->update( 'image', array( /* SET */ - 'img_size' => $this->size, - 'img_width' => intval( $this->width ), - 'img_height' => intval( $this->height ), - 'img_bits' => $this->bits, - 'img_media_type' => $this->media_type, - 'img_major_mime' => $this->major_mime, - 'img_minor_mime' => $this->minor_mime, - 'img_timestamp' => $timestamp, + 'img_size' => $this->size, + 'img_width' => intval( $this->width ), + 'img_height' => intval( $this->height ), + 'img_bits' => $this->bits, + 'img_media_type' => $this->media_type, + 'img_major_mime' => $this->major_mime, + 'img_minor_mime' => $this->minor_mime, + 'img_timestamp' => $timestamp, 'img_description' => $comment, - 'img_user' => $user->getId(), - 'img_user_text' => $user->getName(), - 'img_metadata' => $this->metadata, - 'img_sha1' => $this->sha1 - ), array( /* WHERE */ - 'img_name' => $this->getName() - ), __METHOD__ + 'img_user' => $user->getId(), + 'img_user_text' => $user->getName(), + 'img_metadata' => $this->metadata, + 'img_sha1' => $this->sha1 + ), + array( 'img_name' => $this->getName() ), + __METHOD__ ); } else { # This is a new file # Update the image count - $dbw->begin(); - $site_stats = $dbw->tableName( 'site_stats' ); - $dbw->query( "UPDATE $site_stats SET ss_images=ss_images+1", __METHOD__ ); - $dbw->commit(); + $dbw->begin( __METHOD__ ); + $dbw->update( + 'site_stats', + array( 'ss_images = ss_images+1' ), + '*', + __METHOD__ + ); + $dbw->commit( __METHOD__ ); } $descTitle = $this->getTitle(); - $article = new ImagePage( $descTitle ); - $article->setFile( $this ); + $wikiPage = new WikiFilePage( $descTitle ); + $wikiPage->setFile( $this ); # Add the log entry $log = new LogPage( 'upload' ); @@ -1059,11 +1090,12 @@ class LocalFile extends File { $log->getRcComment(), false ); - $nullRevision->insertOn( $dbw ); - - wfRunHooks( 'NewRevisionFromEditComplete', array( $article, $nullRevision, $latest, $user ) ); - $article->updateRevisionOn( $dbw, $nullRevision ); + if (!is_null($nullRevision)) { + $nullRevision->insertOn( $dbw ); + wfRunHooks( 'NewRevisionFromEditComplete', array( $wikiPage, $nullRevision, $latest, $user ) ); + $wikiPage->updateRevisionOn( $dbw, $nullRevision ); + } # Invalidate the cache for the description page $descTitle->invalidateCache(); $descTitle->purgeSquid(); @@ -1071,7 +1103,7 @@ class LocalFile extends File { # New file; create the description page. # There's already a log entry, so don't make a second RC entry # Squid and file cache for the description page are purged by doEdit. - $article->doEdit( $pageText, $comment, EDIT_NEW | EDIT_SUPPRESS_RC ); + $wikiPage->doEdit( $pageText, $comment, EDIT_NEW | EDIT_SUPPRESS_RC, false, $user ); } # Commit the transaction now, in case something goes wrong later @@ -1135,7 +1167,7 @@ class LocalFile extends File { * archive name, or an empty string if it was a new file. */ function publishTo( $srcPath, $dstRel, $flags = 0 ) { - $this->lock(); + $this->lock(); // begin $archiveName = wfTimestamp( TS_MW ) . '!'. $this->getName(); $archiveRel = 'archive/' . $this->getHashPath() . $archiveName; @@ -1148,7 +1180,7 @@ class LocalFile extends File { $status->value = $archiveName; } - $this->unlock(); + $this->unlock(); // done return $status; } @@ -1172,7 +1204,7 @@ class LocalFile extends File { */ function move( $target ) { wfDebugLog( 'imagemove', "Got request to move {$this->name} to " . $target->getText() ); - $this->lock(); + $this->lock(); // begin $batch = new LocalFileMoveBatch( $this, $target ); $batch->addCurrent(); @@ -1182,7 +1214,7 @@ class LocalFile extends File { wfDebugLog( 'imagemove', "Finished moving {$this->name}" ); $this->purgeEverything(); - $this->unlock(); + $this->unlock(); // done if ( $status->isOk() ) { // Now switch the object @@ -1210,7 +1242,7 @@ class LocalFile extends File { * @return FileRepoStatus object. */ function delete( $reason, $suppress = false ) { - $this->lock(); + $this->lock(); // begin $batch = new LocalFileDeleteBatch( $this, $reason, $suppress ); $batch->addCurrent(); @@ -1233,7 +1265,7 @@ class LocalFile extends File { $this->purgeEverything(); } - $this->unlock(); + $this->unlock(); // done return $status; } @@ -1253,14 +1285,14 @@ class LocalFile extends File { * @return FileRepoStatus object. */ function deleteOld( $archiveName, $reason, $suppress = false ) { - $this->lock(); + $this->lock(); // begin $batch = new LocalFileDeleteBatch( $this, $reason, $suppress ); $batch->addOld( $archiveName ); $this->purgeOldThumbnails( $archiveName ); $status = $batch->execute(); - $this->unlock(); + $this->unlock(); // done if ( $status->ok ) { $this->purgeDescription(); @@ -1345,7 +1377,9 @@ class LocalFile extends File { $this->load(); // Initialise now if necessary if ( $this->sha1 == '' && $this->fileExists ) { - $this->sha1 = File::sha1Base36( $this->getPath() ); + $this->lock(); // begin + + $this->sha1 = $this->repo->getFileSha1( $this->getPath() ); if ( !wfReadOnly() && strval( $this->sha1 ) != '' ) { $dbw = $this->repo->getMasterDB(); $dbw->update( 'image', @@ -1354,6 +1388,8 @@ class LocalFile extends File { __METHOD__ ); $this->saveToCache(); } + + $this->unlock(); // done } return $this->sha1; @@ -1403,7 +1439,7 @@ class LocalFile extends File { /** * Helper class for file deletion - * @ingroup FileRepo + * @ingroup FileAbstraction */ class LocalFileDeleteBatch { @@ -1704,7 +1740,7 @@ class LocalFileDeleteBatch { $files[$src] = $this->file->repo->getVirtualUrl( 'public' ) . '/' . rawurlencode( $src ); } - $result = $this->file->repo->fileExistsBatch( $files, FSRepo::FILES_ONLY ); + $result = $this->file->repo->fileExistsBatch( $files, FileRepo::FILES_ONLY ); foreach ( $batch as $batchItem ) { if ( $result[$batchItem[0]] ) { @@ -1720,7 +1756,7 @@ class LocalFileDeleteBatch { /** * Helper class for file undeletion - * @ingroup FileRepo + * @ingroup FileAbstraction */ class LocalFileRestoreBatch { /** @@ -1992,7 +2028,7 @@ class LocalFileRestoreBatch { foreach ( $triplets as $file ) $files[$file[0]] = $file[0]; - $result = $this->file->repo->fileExistsBatch( $files, FSRepo::FILES_ONLY ); + $result = $this->file->repo->fileExistsBatch( $files, FileRepo::FILES_ONLY ); foreach ( $triplets as $file ) { if ( $result[$file[0]] ) { @@ -2015,7 +2051,7 @@ class LocalFileRestoreBatch { rawurlencode( $repo->getDeletedHashPath( $file ) . $file ); } - $result = $repo->fileExistsBatch( $files, FSRepo::FILES_ONLY ); + $result = $repo->fileExistsBatch( $files, FileRepo::FILES_ONLY ); foreach ( $batch as $file ) { if ( $result[$file] ) { @@ -2068,10 +2104,21 @@ class LocalFileRestoreBatch { /** * Helper class for file movement - * @ingroup FileRepo + * @ingroup FileAbstraction */ class LocalFileMoveBatch { - var $file, $cur, $olds, $oldCount, $archive, $target, $db; + + /** + * @var File + */ + var $file; + + /** + * @var Title + */ + var $target; + + var $cur, $olds, $oldCount, $archive, $db; function __construct( File $file, Title $target ) { $this->file = $file; @@ -2148,7 +2195,7 @@ class LocalFileMoveBatch { // Copy the files into their new location $statusMove = $repo->storeBatch( $triplets ); - wfDebugLog( 'imagemove', "Moved files for {$this->file->name}: {$statusMove->successCount} successes, {$statusMove->failCount} failures" ); + wfDebugLog( 'imagemove', "Moved files for {$this->file->getName()}: {$statusMove->successCount} successes, {$statusMove->failCount} failures" ); if ( !$statusMove->isGood() ) { wfDebugLog( 'imagemove', "Error in moving files: " . $statusMove->getWikiText() ); $this->cleanupTarget( $triplets ); @@ -2158,7 +2205,7 @@ class LocalFileMoveBatch { $this->db->begin(); $statusDb = $this->doDBUpdates(); - wfDebugLog( 'imagemove', "Renamed {$this->file->name} in database: {$statusDb->successCount} successes, {$statusDb->failCount} failures" ); + wfDebugLog( 'imagemove', "Renamed {$this->file->getName()} in database: {$statusDb->successCount} successes, {$statusDb->failCount} failures" ); if ( !$statusDb->isGood() ) { $this->db->rollback(); // Something went wrong with the DB updates, so remove the target files @@ -2227,7 +2274,7 @@ class LocalFileMoveBatch { } /** - * Generate triplets for FSRepo::storeBatch(). + * Generate triplets for FileRepo::storeBatch(). */ function getMoveTriplets() { $moves = array_merge( array( $this->cur ), $this->olds ); @@ -2237,7 +2284,7 @@ class LocalFileMoveBatch { // $move: (oldRelativePath, newRelativePath) $srcUrl = $this->file->repo->getVirtualUrl() . '/public/' . rawurlencode( $move[0] ); $triplets[] = array( $srcUrl, 'public', $move[1] ); - wfDebugLog( 'imagemove', "Generated move triplet for {$this->file->name}: {$srcUrl} :: public :: {$move[1]}" ); + wfDebugLog( 'imagemove', "Generated move triplet for {$this->file->getName()}: {$srcUrl} :: public :: {$move[1]}" ); } return $triplets; @@ -2253,7 +2300,7 @@ class LocalFileMoveBatch { $files[$file[0]] = $file[0]; } - $result = $this->file->repo->fileExistsBatch( $files, FSRepo::FILES_ONLY ); + $result = $this->file->repo->fileExistsBatch( $files, FileRepo::FILES_ONLY ); $filteredTriplets = array(); foreach ( $triplets as $file ) { diff --git a/includes/filerepo/OldLocalFile.php b/includes/filerepo/file/OldLocalFile.php index bcb22c17..ebd83c4d 100644 --- a/includes/filerepo/OldLocalFile.php +++ b/includes/filerepo/file/OldLocalFile.php @@ -3,13 +3,13 @@ * Old file in the oldimage table * * @file - * @ingroup FileRepo + * @ingroup FileAbstraction */ /** * Class to represent a file in the oldimage table * - * @ingroup FileRepo + * @ingroup FileAbstraction */ class OldLocalFile extends LocalFile { var $requestedTime, $archive_name; @@ -212,11 +212,12 @@ class OldLocalFile extends LocalFile { * field of this image file, if it's marked as deleted. * * @param $field Integer + * @param $user User object to check, or null to use $wgUser * @return bool */ - function userCan( $field ) { + function userCan( $field, User $user = null ) { $this->load(); - return Revision::userCanBitfield( $this->deleted, $field ); + return Revision::userCanBitfield( $this->deleted, $field, $user ); } /** @@ -261,7 +262,7 @@ class OldLocalFile extends LocalFile { $dbw->begin(); $dstPath = $this->repo->getZonePath( 'public' ) . '/' . $this->getRel(); - $props = self::getPropsFromPath( $dstPath ); + $props = $this->repo->getFileProps( $dstPath ); if ( !$props['fileExists'] ) { return false; } diff --git a/includes/filerepo/UnregisteredLocalFile.php b/includes/filerepo/file/UnregisteredLocalFile.php index 2df9a9b5..cd9d3d02 100644 --- a/includes/filerepo/UnregisteredLocalFile.php +++ b/includes/filerepo/file/UnregisteredLocalFile.php @@ -3,12 +3,12 @@ * File without associated database record * * @file - * @ingroup FileRepo + * @ingroup FileAbstraction */ /** * A file object referring to either a standalone local file, or a file in a - * local repository with no database, for example an FSRepo repository. + * local repository with no database, for example an FileRepo repository. * * Read-only. * @@ -16,7 +16,7 @@ * lots of functions missing. It is used by the WebStore extension in the * standalone role. * - * @ingroup FileRepo + * @ingroup FileAbstraction */ class UnregisteredLocalFile extends File { var $title, $path, $mime, $dims; @@ -27,12 +27,12 @@ class UnregisteredLocalFile extends File { var $handler; /** - * @param $path - * @param $mime + * @param $path string Storage path + * @param $mime string * @return UnregisteredLocalFile */ static function newFromPath( $path, $mime ) { - return new UnregisteredLocalFile( false, false, $path, $mime ); + return new self( false, false, $path, $mime ); } /** @@ -41,13 +41,16 @@ class UnregisteredLocalFile extends File { * @return UnregisteredLocalFile */ static function newFromTitle( $title, $repo ) { - return new UnregisteredLocalFile( $title, $repo, false, false ); + return new self( $title, $repo, false, false ); } /** + * Create an UnregisteredLocalFile based on a path or a (title,repo) pair. + * A FileRepo object is not required here, unlike most other File classes. + * * @throws MWException - * @param $title string - * @param $repo FSRepo + * @param $title Title|false + * @param $repo FileRepo * @param $path string * @param $mime string */ @@ -55,18 +58,20 @@ class UnregisteredLocalFile extends File { if ( !( $title && $repo ) && !$path ) { throw new MWException( __METHOD__.': not enough parameters, must specify title and repo, or a full path' ); } - if ( $title ) { - $this->title = $title; + if ( $title instanceof Title ) { + $this->title = File::normalizeTitle( $title, 'exception' ); $this->name = $repo->getNameFromTitle( $title ); } else { $this->name = basename( $path ); - $this->title = Title::makeTitleSafe( NS_FILE, $this->name ); + $this->title = File::normalizeTitle( $this->name, 'exception' ); } $this->repo = $repo; if ( $path ) { $this->path = $path; } else { - $this->path = $repo->getRootDirectory() . '/' . $repo->getHashPath( $this->name ) . $this->name; + $this->assertRepoDefined(); + $this->path = $repo->getRootDirectory() . '/' . + $repo->getHashPath( $this->name ) . $this->name; } if ( $mime ) { $this->mime = $mime; @@ -74,7 +79,7 @@ class UnregisteredLocalFile extends File { $this->dims = array(); } - function getPageDimensions( $page = 1 ) { + private function cachePageDimensions( $page = 1 ) { if ( !isset( $this->dims[$page] ) ) { if ( !$this->getHandler() ) { return false; @@ -85,19 +90,19 @@ class UnregisteredLocalFile extends File { } function getWidth( $page = 1 ) { - $dim = $this->getPageDimensions( $page ); + $dim = $this->cachePageDimensions( $page ); return $dim['width']; } function getHeight( $page = 1 ) { - $dim = $this->getPageDimensions( $page ); + $dim = $this->cachePageDimensions( $page ); return $dim['height']; } function getMimeType() { if ( !isset( $this->mime ) ) { $magic = MimeMagic::singleton(); - $this->mime = $magic->guessMimeType( $this->path ); + $this->mime = $magic->guessMimeType( $this->getLocalRefPath() ); } return $this->mime; } @@ -106,7 +111,7 @@ class UnregisteredLocalFile extends File { if ( !$this->getHandler() ) { return false; } - return $this->handler->getImageSize( $this, $this->getPath() ); + return $this->handler->getImageSize( $this, $this->getLocalRefPath() ); } function getMetadata() { @@ -114,7 +119,7 @@ class UnregisteredLocalFile extends File { if ( !$this->getHandler() ) { $this->metadata = false; } else { - $this->metadata = $this->handler->getMetadata( $this, $this->getPath() ); + $this->metadata = $this->handler->getMetadata( $this, $this->getLocalRefPath() ); } } return $this->metadata; @@ -122,17 +127,19 @@ class UnregisteredLocalFile extends File { function getURL() { if ( $this->repo ) { - return $this->repo->getZoneUrl( 'public' ) . '/' . $this->repo->getHashPath( $this->name ) . rawurlencode( $this->name ); + return $this->repo->getZoneUrl( 'public' ) . '/' . + $this->repo->getHashPath( $this->name ) . rawurlencode( $this->name ); } else { return false; } } function getSize() { - if ( file_exists( $this->path ) ) { - return filesize( $this->path ); - } else { - return false; + $this->assertRepoDefined(); + $props = $this->repo->getFileProps( $this->path ); + if ( isset( $props['size'] ) ) { + return $props['size']; } + return false; // doesn't exist } } |