diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2011-12-03 13:29:22 +0100 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2011-12-03 13:29:22 +0100 |
commit | ca32f08966f1b51fcb19460f0996bb0c4048e6fe (patch) | |
tree | ec04cc15b867bc21eedca904cea9af0254531a11 /includes/cache | |
parent | a22fbfc60f36f5f7ee10d5ae6fe347340c2ee67c (diff) |
Update to MediaWiki 1.18.0
* also update ArchLinux skin to chagnes in MonoBook
* Use only css to hide our menu bar when printing
Diffstat (limited to 'includes/cache')
-rw-r--r-- | includes/cache/CacheDependency.php | 394 | ||||
-rw-r--r-- | includes/cache/HTMLCacheUpdate.php | 230 | ||||
-rw-r--r-- | includes/cache/HTMLFileCache.php | 250 | ||||
-rw-r--r-- | includes/cache/LinkBatch.php | 195 | ||||
-rw-r--r-- | includes/cache/LinkCache.php | 219 | ||||
-rw-r--r-- | includes/cache/MemcachedSessions.php | 98 | ||||
-rw-r--r-- | includes/cache/MessageCache.php | 971 | ||||
-rw-r--r-- | includes/cache/SquidUpdate.php | 226 |
8 files changed, 2583 insertions, 0 deletions
diff --git a/includes/cache/CacheDependency.php b/includes/cache/CacheDependency.php new file mode 100644 index 00000000..aa020664 --- /dev/null +++ b/includes/cache/CacheDependency.php @@ -0,0 +1,394 @@ +<?php +/** + * This class stores an arbitrary value along with its dependencies. + * Users should typically only use DependencyWrapper::getValueFromCache(), + * rather than instantiating one of these objects directly. + * @ingroup Cache + */ + +class DependencyWrapper { + var $value; + var $deps; + + /** + * Create an instance. + * @param $value Mixed: the user-supplied value + * @param $deps Mixed: a dependency or dependency array. All dependencies + * must be objects implementing CacheDependency. + */ + function __construct( $value = false, $deps = array() ) { + $this->value = $value; + + if ( !is_array( $deps ) ) { + $deps = array( $deps ); + } + + $this->deps = $deps; + } + + /** + * Returns true if any of the dependencies have expired + */ + function isExpired() { + foreach ( $this->deps as $dep ) { + if ( $dep->isExpired() ) { + return true; + } + } + + return false; + } + + /** + * Initialise dependency values in preparation for storing. This must be + * called before serialization. + */ + function initialiseDeps() { + foreach ( $this->deps as $dep ) { + $dep->loadDependencyValues(); + } + } + + /** + * Get the user-defined value + */ + function getValue() { + return $this->value; + } + + /** + * Store the wrapper to a cache + * + * @param $cache BagOStuff + * @param $key + * @param $expiry + */ + function storeToCache( $cache, $key, $expiry = 0 ) { + $this->initialiseDeps(); + $cache->set( $key, $this, $expiry ); + } + + /** + * Attempt to get a value from the cache. If the value is expired or missing, + * it will be generated with the callback function (if present), and the newly + * calculated value will be stored to the cache in a wrapper. + * + * @param $cache BagOStuff a cache object such as $wgMemc + * @param $key String: the cache key + * @param $expiry Integer: the expiry timestamp or interval in seconds + * @param $callback Mixed: the callback for generating the value, or false + * @param $callbackParams Array: the function parameters for the callback + * @param $deps Array: the dependencies to store on a cache miss. Note: these + * are not the dependencies used on a cache hit! Cache hits use the stored + * dependency array. + * + * @return mixed The value, or null if it was not present in the cache and no + * callback was defined. + */ + static function getValueFromCache( $cache, $key, $expiry = 0, $callback = false, + $callbackParams = array(), $deps = array() ) + { + $obj = $cache->get( $key ); + + if ( is_object( $obj ) && $obj instanceof DependencyWrapper && !$obj->isExpired() ) { + $value = $obj->value; + } elseif ( $callback ) { + $value = call_user_func_array( $callback, $callbackParams ); + # Cache the newly-generated value + $wrapper = new DependencyWrapper( $value, $deps ); + $wrapper->storeToCache( $cache, $key, $expiry ); + } else { + $value = null; + } + + return $value; + } +} + +/** + * @ingroup Cache + */ +abstract class CacheDependency { + /** + * Returns true if the dependency is expired, false otherwise + */ + abstract function isExpired(); + + /** + * Hook to perform any expensive pre-serialize loading of dependency values. + */ + function loadDependencyValues() { } +} + +/** + * @ingroup Cache + */ +class FileDependency extends CacheDependency { + var $filename, $timestamp; + + /** + * Create a file dependency + * + * @param $filename String: the name of the file, preferably fully qualified + * @param $timestamp Mixed: the unix last modified timestamp, or false if the + * file does not exist. If omitted, the timestamp will be loaded from + * the file. + * + * A dependency on a nonexistent file will be triggered when the file is + * created. A dependency on an existing file will be triggered when the + * file is changed. + */ + function __construct( $filename, $timestamp = null ) { + $this->filename = $filename; + $this->timestamp = $timestamp; + } + + function __sleep() { + $this->loadDependencyValues(); + return array( 'filename', 'timestamp' ); + } + + function loadDependencyValues() { + if ( is_null( $this->timestamp ) ) { + if ( !file_exists( $this->filename ) ) { + # Dependency on a non-existent file + # This is a valid concept! + $this->timestamp = false; + } else { + $this->timestamp = filemtime( $this->filename ); + } + } + } + + /** + * @return bool + */ + function isExpired() { + if ( !file_exists( $this->filename ) ) { + if ( $this->timestamp === false ) { + # Still nonexistent + return false; + } else { + # Deleted + wfDebug( "Dependency triggered: {$this->filename} deleted.\n" ); + return true; + } + } else { + $lastmod = filemtime( $this->filename ); + if ( $lastmod > $this->timestamp ) { + # Modified or created + wfDebug( "Dependency triggered: {$this->filename} changed.\n" ); + return true; + } else { + # Not modified + return false; + } + } + } +} + +/** + * @ingroup Cache + */ +class TitleDependency extends CacheDependency { + var $titleObj; + var $ns, $dbk; + var $touched; + + /** + * Construct a title dependency + * @param $title Title + */ + function __construct( Title $title ) { + $this->titleObj = $title; + $this->ns = $title->getNamespace(); + $this->dbk = $title->getDBkey(); + } + + function loadDependencyValues() { + $this->touched = $this->getTitle()->getTouched(); + } + + /** + * Get rid of bulky Title object for sleep + * + * @return array + */ + function __sleep() { + return array( 'ns', 'dbk', 'touched' ); + } + + /** + * @return Title + */ + function getTitle() { + if ( !isset( $this->titleObj ) ) { + $this->titleObj = Title::makeTitle( $this->ns, $this->dbk ); + } + + return $this->titleObj; + } + + /** + * @return bool + */ + function isExpired() { + $touched = $this->getTitle()->getTouched(); + + if ( $this->touched === false ) { + if ( $touched === false ) { + # Still missing + return false; + } else { + # Created + return true; + } + } elseif ( $touched === false ) { + # Deleted + return true; + } elseif ( $touched > $this->touched ) { + # Updated + return true; + } else { + # Unmodified + return false; + } + } +} + +/** + * @ingroup Cache + */ +class TitleListDependency extends CacheDependency { + var $linkBatch; + var $timestamps; + + /** + * Construct a dependency on a list of titles + */ + function __construct( LinkBatch $linkBatch ) { + $this->linkBatch = $linkBatch; + } + + function calculateTimestamps() { + # Initialise values to false + $timestamps = array(); + + foreach ( $this->getLinkBatch()->data as $ns => $dbks ) { + if ( count( $dbks ) > 0 ) { + $timestamps[$ns] = array(); + + foreach ( $dbks as $dbk => $value ) { + $timestamps[$ns][$dbk] = false; + } + } + } + + # Do the query + if ( count( $timestamps ) ) { + $dbr = wfGetDB( DB_SLAVE ); + $where = $this->getLinkBatch()->constructSet( 'page', $dbr ); + $res = $dbr->select( + 'page', + array( 'page_namespace', 'page_title', 'page_touched' ), + $where, + __METHOD__ + ); + + foreach ( $res as $row ) { + $timestamps[$row->page_namespace][$row->page_title] = $row->page_touched; + } + } + + return $timestamps; + } + + function loadDependencyValues() { + $this->timestamps = $this->calculateTimestamps(); + } + + /** + * @return array + */ + function __sleep() { + return array( 'timestamps' ); + } + + function getLinkBatch() { + if ( !isset( $this->linkBatch ) ) { + $this->linkBatch = new LinkBatch; + $this->linkBatch->setArray( $this->timestamps ); + } + return $this->linkBatch; + } + + /** + * @return bool + */ + function isExpired() { + $newTimestamps = $this->calculateTimestamps(); + + foreach ( $this->timestamps as $ns => $dbks ) { + foreach ( $dbks as $dbk => $oldTimestamp ) { + $newTimestamp = $newTimestamps[$ns][$dbk]; + + if ( $oldTimestamp === false ) { + if ( $newTimestamp === false ) { + # Still missing + } else { + # Created + return true; + } + } elseif ( $newTimestamp === false ) { + # Deleted + return true; + } elseif ( $newTimestamp > $oldTimestamp ) { + # Updated + return true; + } else { + # Unmodified + } + } + } + + return false; + } +} + +/** + * @ingroup Cache + */ +class GlobalDependency extends CacheDependency { + var $name, $value; + + function __construct( $name ) { + $this->name = $name; + $this->value = $GLOBALS[$name]; + } + + /** + * @return bool + */ + function isExpired() { + return $GLOBALS[$this->name] != $this->value; + } +} + +/** + * @ingroup Cache + */ +class ConstantDependency extends CacheDependency { + var $name, $value; + + function __construct( $name ) { + $this->name = $name; + $this->value = constant( $name ); + } + + /** + * @return bool + */ + function isExpired() { + return constant( $this->name ) != $this->value; + } +} diff --git a/includes/cache/HTMLCacheUpdate.php b/includes/cache/HTMLCacheUpdate.php new file mode 100644 index 00000000..d542800d --- /dev/null +++ b/includes/cache/HTMLCacheUpdate.php @@ -0,0 +1,230 @@ +<?php + +/** + * Class to invalidate the HTML cache of all the pages linking to a given title. + * Small numbers of links will be done immediately, large numbers are pushed onto + * the job queue. + * + * This class is designed to work efficiently with small numbers of links, and + * to work reasonably well with up to ~10^5 links. Above ~10^6 links, the memory + * and time requirements of loading all backlinked IDs in doUpdate() might become + * prohibitive. The requirements measured at Wikimedia are approximately: + * + * memory: 48 bytes per row + * time: 16us per row for the query plus processing + * + * The reason this query is done is to support partitioning of the job + * by backlinked ID. The memory issue could be allieviated by doing this query in + * batches, but of course LIMIT with an offset is inefficient on the DB side. + * + * The class is nevertheless a vast improvement on the previous method of using + * File::getLinksTo() and Title::touchArray(), which uses about 2KB of memory per + * link. + * + * @ingroup Cache + */ +class HTMLCacheUpdate +{ + /** + * @var Title + */ + public $mTitle; + + public $mTable, $mPrefix, $mStart, $mEnd; + public $mRowsPerJob, $mRowsPerQuery; + + function __construct( $titleTo, $table, $start = false, $end = false ) { + global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; + + $this->mTitle = $titleTo; + $this->mTable = $table; + $this->mStart = $start; + $this->mEnd = $end; + $this->mRowsPerJob = $wgUpdateRowsPerJob; + $this->mRowsPerQuery = $wgUpdateRowsPerQuery; + $this->mCache = $this->mTitle->getBacklinkCache(); + } + + public function doUpdate() { + if ( $this->mStart || $this->mEnd ) { + $this->doPartialUpdate(); + return; + } + + # Get an estimate of the number of rows from the BacklinkCache + $numRows = $this->mCache->getNumLinks( $this->mTable ); + if ( $numRows > $this->mRowsPerJob * 2 ) { + # Do fast cached partition + $this->insertJobs(); + } else { + # Get the links from the DB + $titleArray = $this->mCache->getLinks( $this->mTable ); + # Check if the row count estimate was correct + if ( $titleArray->count() > $this->mRowsPerJob * 2 ) { + # Not correct, do accurate partition + wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" ); + $this->insertJobsFromTitles( $titleArray ); + } else { + $this->invalidateTitles( $titleArray ); + } + } + } + + /** + * Update some of the backlinks, defined by a page ID range + */ + protected function doPartialUpdate() { + $titleArray = $this->mCache->getLinks( $this->mTable, $this->mStart, $this->mEnd ); + if ( $titleArray->count() <= $this->mRowsPerJob * 2 ) { + # This partition is small enough, do the update + $this->invalidateTitles( $titleArray ); + } else { + # Partitioning was excessively inaccurate. Divide the job further. + # This can occur when a large number of links are added in a short + # period of time, say by updating a heavily-used template. + $this->insertJobsFromTitles( $titleArray ); + } + } + + /** + * Partition the current range given by $this->mStart and $this->mEnd, + * using a pre-calculated title array which gives the links in that range. + * Queue the resulting jobs. + * + * @param $titleArray array + */ + protected function insertJobsFromTitles( $titleArray ) { + # We make subpartitions in the sense that the start of the first job + # will be the start of the parent partition, and the end of the last + # job will be the end of the parent partition. + $jobs = array(); + $start = $this->mStart; # start of the current job + $numTitles = 0; + foreach ( $titleArray as $title ) { + $id = $title->getArticleID(); + # $numTitles is now the number of titles in the current job not + # including the current ID + if ( $numTitles >= $this->mRowsPerJob ) { + # Add a job up to but not including the current ID + $params = array( + 'table' => $this->mTable, + 'start' => $start, + 'end' => $id - 1 + ); + $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); + $start = $id; + $numTitles = 0; + } + $numTitles++; + } + # Last job + $params = array( + 'table' => $this->mTable, + 'start' => $start, + 'end' => $this->mEnd + ); + $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); + wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" ); + + if ( count( $jobs ) < 2 ) { + # I don't think this is possible at present, but handling this case + # makes the code a bit more robust against future code updates and + # avoids a potential infinite loop of repartitioning + wfDebug( __METHOD__.": repartitioning failed!\n" ); + $this->invalidateTitles( $titleArray ); + return; + } + + Job::batchInsert( $jobs ); + } + + protected function insertJobs() { + $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob ); + if ( !$batches ) { + return; + } + $jobs = array(); + foreach ( $batches as $batch ) { + $params = array( + 'table' => $this->mTable, + 'start' => $batch[0], + 'end' => $batch[1], + ); + $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); + } + Job::batchInsert( $jobs ); + } + + /** + * Invalidate an array (or iterator) of Title objects, right now + */ + protected function invalidateTitles( $titleArray ) { + global $wgUseFileCache, $wgUseSquid; + + $dbw = wfGetDB( DB_MASTER ); + $timestamp = $dbw->timestamp(); + + # Get all IDs in this query into an array + $ids = array(); + foreach ( $titleArray as $title ) { + $ids[] = $title->getArticleID(); + } + + if ( !$ids ) { + return; + } + + # Update page_touched + $batches = array_chunk( $ids, $this->mRowsPerQuery ); + foreach ( $batches as $batch ) { + $dbw->update( 'page', + array( 'page_touched' => $timestamp ), + array( 'page_id IN (' . $dbw->makeList( $batch ) . ')' ), + __METHOD__ + ); + } + + # Update squid + if ( $wgUseSquid ) { + $u = SquidUpdate::newFromTitles( $titleArray ); + $u->doUpdate(); + } + + # Update file cache + if ( $wgUseFileCache ) { + foreach ( $titleArray as $title ) { + HTMLFileCache::clearFileCache( $title ); + } + } + } + +} + +/** + * Job wrapper for HTMLCacheUpdate. Gets run whenever a related + * job gets called from the queue. + * + * @ingroup JobQueue + */ +class HTMLCacheUpdateJob extends Job { + var $table, $start, $end; + + /** + * Construct a job + * @param $title Title: the title linked to + * @param $params Array: job parameters (table, start and end page_ids) + * @param $id Integer: job id + */ + function __construct( $title, $params, $id = 0 ) { + parent::__construct( 'htmlCacheUpdate', $title, $params, $id ); + $this->table = $params['table']; + $this->start = $params['start']; + $this->end = $params['end']; + } + + public function run() { + $update = new HTMLCacheUpdate( $this->title, $this->table, $this->start, $this->end ); + $update->doUpdate(); + return true; + } +} diff --git a/includes/cache/HTMLFileCache.php b/includes/cache/HTMLFileCache.php new file mode 100644 index 00000000..1095da2c --- /dev/null +++ b/includes/cache/HTMLFileCache.php @@ -0,0 +1,250 @@ +<?php +/** + * Contain the HTMLFileCache class + * @file + * @ingroup Cache + */ + +/** + * Handles talking to the file cache, putting stuff in and taking it back out. + * Mostly called from Article.php for the emergency abort/fallback to cache. + * + * Global options that affect this module: + * - $wgCachePages + * - $wgCacheEpoch + * - $wgUseFileCache + * - $wgCacheDirectory + * - $wgFileCacheDirectory + * - $wgUseGzip + * + * @ingroup Cache + */ +class HTMLFileCache { + + /** + * @var Title + */ + var $mTitle; + var $mFileCache, $mType; + + public function __construct( $title, $type = 'view' ) { + $this->mTitle = $title; + $this->mType = ($type == 'raw' || $type == 'view' ) ? $type : false; + $this->fileCacheName(); // init name + } + + public function fileCacheName() { + if( !$this->mFileCache ) { + global $wgCacheDirectory, $wgFileCacheDirectory, $wgFileCacheDepth; + + if ( $wgFileCacheDirectory ) { + $dir = $wgFileCacheDirectory; + } elseif ( $wgCacheDirectory ) { + $dir = "$wgCacheDirectory/html"; + } else { + throw new MWException( 'Please set $wgCacheDirectory in LocalSettings.php if you wish to use the HTML file cache' ); + } + + # Store raw pages (like CSS hits) elsewhere + $subdir = ($this->mType === 'raw') ? 'raw/' : ''; + + $key = $this->mTitle->getPrefixedDbkey(); + if ( $wgFileCacheDepth > 0 ) { + $hash = md5( $key ); + for ( $i = 1; $i <= $wgFileCacheDepth; $i++ ) { + $subdir .= substr( $hash, 0, $i ) . '/'; + } + } + # Avoid extension confusion + $key = str_replace( '.', '%2E', urlencode( $key ) ); + $this->mFileCache = "{$dir}/{$subdir}{$key}.html"; + + if( $this->useGzip() ) { + $this->mFileCache .= '.gz'; + } + + wfDebug( __METHOD__ . ": {$this->mFileCache}\n" ); + } + return $this->mFileCache; + } + + public function isFileCached() { + if( $this->mType === false ) { + return false; + } + return file_exists( $this->fileCacheName() ); + } + + public function fileCacheTime() { + return wfTimestamp( TS_MW, filemtime( $this->fileCacheName() ) ); + } + + /** + * Check if pages can be cached for this request/user + * @return bool + */ + public static function useFileCache() { + global $wgUser, $wgUseFileCache, $wgShowIPinHeader, $wgRequest, $wgLang, $wgContLang; + if( !$wgUseFileCache ) { + return false; + } + // Get all query values + $queryVals = $wgRequest->getValues(); + foreach( $queryVals as $query => $val ) { + if( $query == 'title' || $query == 'curid' ) { + continue; + // Normal page view in query form can have action=view. + // Raw hits for pages also stored, like .css pages for example. + } elseif( $query == 'action' && $val == 'view' ) { + continue; + } elseif( $query == 'usemsgcache' && $val == 'yes' ) { + continue; + // Below are header setting params + } elseif( $query == 'maxage' || $query == 'smaxage' || $query == 'ctype' || $query == 'gen' ) { + continue; + } else { + return false; + } + } + // Check for non-standard user language; this covers uselang, + // and extensions for auto-detecting user language. + $ulang = $wgLang->getCode(); + $clang = $wgContLang->getCode(); + // Check that there are no other sources of variation + return !$wgShowIPinHeader && !$wgUser->getId() && !$wgUser->getNewtalk() && $ulang == $clang; + } + + /** + * Check if up to date cache file exists + * @param $timestamp string + * + * @return bool + */ + public function isFileCacheGood( $timestamp = '' ) { + global $wgCacheEpoch; + + if( !$this->isFileCached() ) { + return false; + } + + $cachetime = $this->fileCacheTime(); + $good = $timestamp <= $cachetime && $wgCacheEpoch <= $cachetime; + + wfDebug( __METHOD__ . ": cachetime $cachetime, touched '{$timestamp}' epoch {$wgCacheEpoch}, good $good\n"); + return $good; + } + + public function useGzip() { + global $wgUseGzip; + return $wgUseGzip; + } + + /* In handy string packages */ + public function fetchRawText() { + return file_get_contents( $this->fileCacheName() ); + } + + public function fetchPageText() { + if( $this->useGzip() ) { + /* Why is there no gzfile_get_contents() or gzdecode()? */ + return implode( '', gzfile( $this->fileCacheName() ) ); + } else { + return $this->fetchRawText(); + } + } + + /* Working directory to/from output */ + public function loadFromFileCache() { + global $wgOut, $wgMimeType, $wgLanguageCode; + wfDebug( __METHOD__ . "()\n"); + $filename = $this->fileCacheName(); + // Raw pages should handle cache control on their own, + // even when using file cache. This reduces hits from clients. + if( $this->mType !== 'raw' ) { + $wgOut->sendCacheControl(); + header( "Content-Type: $wgMimeType; charset=UTF-8" ); + header( "Content-Language: $wgLanguageCode" ); + } + + if( $this->useGzip() ) { + if( wfClientAcceptsGzip() ) { + header( 'Content-Encoding: gzip' ); + } else { + /* Send uncompressed */ + readgzfile( $filename ); + return; + } + } + readfile( $filename ); + $wgOut->disable(); // tell $wgOut that output is taken care of + } + + protected function checkCacheDirs() { + $filename = $this->fileCacheName(); + $mydir2 = substr($filename,0,strrpos($filename,'/')); # subdirectory level 2 + $mydir1 = substr($mydir2,0,strrpos($mydir2,'/')); # subdirectory level 1 + + wfMkdirParents( $mydir1 ); + wfMkdirParents( $mydir2 ); + } + + public function saveToFileCache( $text ) { + global $wgUseFileCache; + if( !$wgUseFileCache || strlen( $text ) < 512 ) { + // Disabled or empty/broken output (OOM and PHP errors) + return $text; + } + + wfDebug( __METHOD__ . "()\n", false); + + $this->checkCacheDirs(); + + $f = fopen( $this->fileCacheName(), 'w' ); + if($f) { + $now = wfTimestampNow(); + if( $this->useGzip() ) { + $rawtext = str_replace( '</html>', + '<!-- Cached/compressed '.$now." -->\n</html>", + $text ); + $text = gzencode( $rawtext ); + } else { + $text = str_replace( '</html>', + '<!-- Cached '.$now." -->\n</html>", + $text ); + } + fwrite( $f, $text ); + fclose( $f ); + if( $this->useGzip() ) { + if( wfClientAcceptsGzip() ) { + header( 'Content-Encoding: gzip' ); + return $text; + } else { + return $rawtext; + } + } else { + return $text; + } + } + return $text; + } + + public static function clearFileCache( $title ) { + global $wgUseFileCache; + + if ( !$wgUseFileCache ) { + return false; + } + + wfSuppressWarnings(); + + $fc = new self( $title, 'view' ); + unlink( $fc->fileCacheName() ); + + $fc = new self( $title, 'raw' ); + unlink( $fc->fileCacheName() ); + + wfRestoreWarnings(); + + return true; + } +} diff --git a/includes/cache/LinkBatch.php b/includes/cache/LinkBatch.php new file mode 100644 index 00000000..0bd869fc --- /dev/null +++ b/includes/cache/LinkBatch.php @@ -0,0 +1,195 @@ +<?php + +/** + * Class representing a list of titles + * The execute() method checks them all for existence and adds them to a LinkCache object + * + * @ingroup Cache + */ +class LinkBatch { + /** + * 2-d array, first index namespace, second index dbkey, value arbitrary + */ + var $data = array(); + + /** + * For debugging which method is using this class. + */ + protected $caller; + + function __construct( $arr = array() ) { + foreach( $arr as $item ) { + $this->addObj( $item ); + } + } + + /** + * Use ->setCaller( __METHOD__ ) to indicate which code is using this + * class. Only used in debugging output. + * @since 1.17 + * + * @param $caller + */ + public function setCaller( $caller ) { + $this->caller = $caller; + } + + /** + * @param $title Title + */ + public function addObj( $title ) { + if ( is_object( $title ) ) { + $this->add( $title->getNamespace(), $title->getDBkey() ); + } else { + wfDebug( "Warning: LinkBatch::addObj got invalid title object\n" ); + } + } + + public function add( $ns, $dbkey ) { + if ( $ns < 0 ) { + return; + } + if ( !array_key_exists( $ns, $this->data ) ) { + $this->data[$ns] = array(); + } + + $this->data[$ns][str_replace( ' ', '_', $dbkey )] = 1; + } + + /** + * Set the link list to a given 2-d array + * First key is the namespace, second is the DB key, value arbitrary + * + * @param $array array + */ + public function setArray( $array ) { + $this->data = $array; + } + + /** + * Returns true if no pages have been added, false otherwise. + * + * @return bool + */ + public function isEmpty() { + return ($this->getSize() == 0); + } + + /** + * Returns the size of the batch. + * + * @return int + */ + public function getSize() { + return count( $this->data ); + } + + /** + * Do the query and add the results to the LinkCache object + * Return an array mapping PDBK to ID + */ + public function execute() { + $linkCache = LinkCache::singleton(); + return $this->executeInto( $linkCache ); + } + + /** + * Do the query and add the results to a given LinkCache object + * Return an array mapping PDBK to ID + */ + protected function executeInto( &$cache ) { + wfProfileIn( __METHOD__ ); + $res = $this->doQuery(); + $ids = $this->addResultToCache( $cache, $res ); + $this->doGenderQuery(); + wfProfileOut( __METHOD__ ); + return $ids; + } + + /** + * Add a ResultWrapper containing IDs and titles to a LinkCache object. + * As normal, titles will go into the static Title cache field. + * This function *also* stores extra fields of the title used for link + * parsing to avoid extra DB queries. + * + * @param $cache + * @param $res + */ + public function addResultToCache( $cache, $res ) { + if ( !$res ) { + return array(); + } + + // For each returned entry, add it to the list of good links, and remove it from $remaining + + $ids = array(); + $remaining = $this->data; + foreach ( $res as $row ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $cache->addGoodLinkObj( $row->page_id, $title, $row->page_len, $row->page_is_redirect, $row->page_latest ); + $ids[$title->getPrefixedDBkey()] = $row->page_id; + unset( $remaining[$row->page_namespace][$row->page_title] ); + } + + // The remaining links in $data are bad links, register them as such + foreach ( $remaining as $ns => $dbkeys ) { + foreach ( $dbkeys as $dbkey => $unused ) { + $title = Title::makeTitle( $ns, $dbkey ); + $cache->addBadLinkObj( $title ); + $ids[$title->getPrefixedDBkey()] = 0; + } + } + return $ids; + } + + /** + * Perform the existence test query, return a ResultWrapper with page_id fields + */ + public function doQuery() { + if ( $this->isEmpty() ) { + return false; + } + wfProfileIn( __METHOD__ ); + + // This is similar to LinkHolderArray::replaceInternal + $dbr = wfGetDB( DB_SLAVE ); + $table = 'page'; + $fields = array( 'page_id', 'page_namespace', 'page_title', 'page_len', + 'page_is_redirect', 'page_latest' ); + $conds = $this->constructSet( 'page', $dbr ); + + // Do query + $caller = __METHOD__; + if ( strval( $this->caller ) !== '' ) { + $caller .= " (for {$this->caller})"; + } + $res = $dbr->select( $table, $fields, $conds, $caller ); + wfProfileOut( __METHOD__ ); + return $res; + } + + public function doGenderQuery() { + if ( $this->isEmpty() ) { + return false; + } + + global $wgContLang; + if ( !$wgContLang->needsGenderDistinction() ) { + return false; + } + + $genderCache = GenderCache::singleton(); + $genderCache->dolinkBatch( $this->data, $this->caller ); + } + + /** + * Construct a WHERE clause which will match all the given titles. + * + * @param $prefix String: the appropriate table's field name prefix ('page', 'pl', etc) + * @param $db DatabaseBase object to use + * @return mixed string with SQL where clause fragment, or false if no items. + */ + public function constructSet( $prefix, $db ) { + return $db->makeWhereFrom2d( $this->data, "{$prefix}_namespace", "{$prefix}_title" ); + } +} diff --git a/includes/cache/LinkCache.php b/includes/cache/LinkCache.php new file mode 100644 index 00000000..aeb10eb0 --- /dev/null +++ b/includes/cache/LinkCache.php @@ -0,0 +1,219 @@ +<?php +/** + * Cache for article titles (prefixed DB keys) and ids linked from one source + * + * @ingroup Cache + */ +class LinkCache { + // Increment $mClassVer whenever old serialized versions of this class + // becomes incompatible with the new version. + private $mClassVer = 4; + + private $mGoodLinks, $mBadLinks; + private $mForUpdate; + + /** + * Get an instance of this class + * + * @return LinkCache + */ + static function &singleton() { + static $instance; + if ( !isset( $instance ) ) { + $instance = new LinkCache; + } + return $instance; + } + + function __construct() { + $this->mForUpdate = false; + $this->mGoodLinks = array(); + $this->mGoodLinkFields = array(); + $this->mBadLinks = array(); + } + + /** + * General accessor to get/set whether SELECT FOR UPDATE should be used + * + * @return bool + */ + public function forUpdate( $update = null ) { + return wfSetVar( $this->mForUpdate, $update ); + } + + /** + * @param $title + * @return array|int + */ + public function getGoodLinkID( $title ) { + if ( array_key_exists( $title, $this->mGoodLinks ) ) { + return $this->mGoodLinks[$title]; + } else { + return 0; + } + } + + /** + * Get a field of a title object from cache. + * If this link is not good, it will return NULL. + * @param $title Title + * @param $field String: ('length','redirect','revision') + * @return mixed + */ + public function getGoodLinkFieldObj( $title, $field ) { + $dbkey = $title->getPrefixedDbKey(); + if ( array_key_exists( $dbkey, $this->mGoodLinkFields ) ) { + return $this->mGoodLinkFields[$dbkey][$field]; + } else { + return null; + } + } + + /** + * @param $title + * @return bool + */ + public function isBadLink( $title ) { + return array_key_exists( $title, $this->mBadLinks ); + } + + /** + * Add a link for the title to the link cache + * + * @param $id Integer: page's ID + * @param $title Title object + * @param $len Integer: text's length + * @param $redir Integer: whether the page is a redirect + * @param $revision Integer: latest revision's ID + */ + public function addGoodLinkObj( $id, $title, $len = -1, $redir = null, $revision = false ) { + $dbkey = $title->getPrefixedDbKey(); + $this->mGoodLinks[$dbkey] = intval( $id ); + $this->mGoodLinkFields[$dbkey] = array( + 'length' => intval( $len ), + 'redirect' => intval( $redir ), + 'revision' => intval( $revision ) ); + } + + /** + * @param $title Title + */ + public function addBadLinkObj( $title ) { + $dbkey = $title->getPrefixedDbKey(); + if ( !$this->isBadLink( $dbkey ) ) { + $this->mBadLinks[$dbkey] = 1; + } + } + + public function clearBadLink( $title ) { + unset( $this->mBadLinks[$title] ); + } + + /** + * @param $title Title + */ + public function clearLink( $title ) { + $dbkey = $title->getPrefixedDbKey(); + if( isset($this->mBadLinks[$dbkey]) ) { + unset($this->mBadLinks[$dbkey]); + } + if( isset($this->mGoodLinks[$dbkey]) ) { + unset($this->mGoodLinks[$dbkey]); + } + if( isset($this->mGoodLinkFields[$dbkey]) ) { + unset($this->mGoodLinkFields[$dbkey]); + } + } + + public function getGoodLinks() { return $this->mGoodLinks; } + public function getBadLinks() { return array_keys( $this->mBadLinks ); } + + /** + * Add a title to the link cache, return the page_id or zero if non-existent + * + * @param $title String: title to add + * @return Integer + */ + public function addLink( $title ) { + $nt = Title::newFromDBkey( $title ); + if( $nt ) { + return $this->addLinkObj( $nt ); + } else { + return 0; + } + } + + /** + * Add a title to the link cache, return the page_id or zero if non-existent + * + * @param $nt Title object to add + * @return Integer + */ + public function addLinkObj( $nt ) { + global $wgAntiLockFlags; + wfProfileIn( __METHOD__ ); + + $key = $nt->getPrefixedDBkey(); + if ( $this->isBadLink( $key ) || $nt->isExternal() ) { + wfProfileOut( __METHOD__ ); + return 0; + } + $id = $this->getGoodLinkID( $key ); + if ( $id != 0 ) { + wfProfileOut( __METHOD__ ); + return $id; + } + + if ( $key === '' ) { + wfProfileOut( __METHOD__ ); + return 0; + } + + # Some fields heavily used for linking... + if ( $this->mForUpdate ) { + $db = wfGetDB( DB_MASTER ); + if ( !( $wgAntiLockFlags & ALF_NO_LINK_LOCK ) ) { + $options = array( 'FOR UPDATE' ); + } else { + $options = array(); + } + } else { + $db = wfGetDB( DB_SLAVE ); + $options = array(); + } + + $s = $db->selectRow( 'page', + array( 'page_id', 'page_len', 'page_is_redirect', 'page_latest' ), + array( 'page_namespace' => $nt->getNamespace(), 'page_title' => $nt->getDBkey() ), + __METHOD__, $options ); + # Set fields... + if ( $s !== false ) { + $id = intval( $s->page_id ); + $len = intval( $s->page_len ); + $redirect = intval( $s->page_is_redirect ); + $revision = intval( $s->page_latest ); + } else { + $id = 0; + $len = -1; + $redirect = 0; + $revision = 0; + } + + if ( $id == 0 ) { + $this->addBadLinkObj( $nt ); + } else { + $this->addGoodLinkObj( $id, $nt, $len, $redirect, $revision ); + } + wfProfileOut( __METHOD__ ); + return $id; + } + + /** + * Clears cache + */ + public function clear() { + $this->mGoodLinks = array(); + $this->mGoodLinkFields = array(); + $this->mBadLinks = array(); + } +} diff --git a/includes/cache/MemcachedSessions.php b/includes/cache/MemcachedSessions.php new file mode 100644 index 00000000..36733595 --- /dev/null +++ b/includes/cache/MemcachedSessions.php @@ -0,0 +1,98 @@ +<?php +/** + * This file gets included if $wgSessionsInMemcache is set in the config. + * It redirects session handling functions to store their data in memcached + * instead of the local filesystem. Depending on circumstances, it may also + * be necessary to change the cookie settings to work across hostnames. + * See: http://www.php.net/manual/en/function.session-set-save-handler.php + * + * @file + * @ingroup Cache + */ + +/** + * Get a cache key for the given session id. + * + * @param $id String: session id + * @return String: cache key + */ +function memsess_key( $id ) { + return wfMemcKey( 'session', $id ); +} + +/** + * Callback when opening a session. + * NOP: $wgMemc should be set up already. + * + * @param $save_path String: path used to store session files, unused + * @param $session_name String: session name + * @return Boolean: success + */ +function memsess_open( $save_path, $session_name ) { + return true; +} + +/** + * Callback when closing a session. + * NOP. + * + * @return Boolean: success + */ +function memsess_close() { + return true; +} + +/** + * Callback when reading session data. + * + * @param $id String: session id + * @return Mixed: session data + */ +function memsess_read( $id ) { + global $wgMemc; + $data = $wgMemc->get( memsess_key( $id ) ); + if( ! $data ) return ''; + return $data; +} + +/** + * Callback when writing session data. + * + * @param $id String: session id + * @param $data Mixed: session data + * @return Boolean: success + */ +function memsess_write( $id, $data ) { + global $wgMemc; + $wgMemc->set( memsess_key( $id ), $data, 3600 ); + return true; +} + +/** + * Callback to destroy a session when calling session_destroy(). + * + * @param $id String: session id + * @return Boolean: success + */ +function memsess_destroy( $id ) { + global $wgMemc; + + $wgMemc->delete( memsess_key( $id ) ); + return true; +} + +/** + * Callback to execute garbage collection. + * NOP: Memcached performs garbage collection. + * + * @param $maxlifetime Integer: maximum session life time + * @return Boolean: success + */ +function memsess_gc( $maxlifetime ) { + return true; +} + +function memsess_write_close() { + session_write_close(); +} + diff --git a/includes/cache/MessageCache.php b/includes/cache/MessageCache.php new file mode 100644 index 00000000..79883844 --- /dev/null +++ b/includes/cache/MessageCache.php @@ -0,0 +1,971 @@ +<?php +/** + * @file + * @ingroup Cache + */ + +/** + * + */ +define( 'MSG_LOAD_TIMEOUT', 60 ); +define( 'MSG_LOCK_TIMEOUT', 10 ); +define( 'MSG_WAIT_TIMEOUT', 10 ); +define( 'MSG_CACHE_VERSION', 1 ); + +/** + * Message cache + * Performs various MediaWiki namespace-related functions + * @ingroup Cache + */ +class MessageCache { + /** + * Process local cache of loaded messages that are defined in + * MediaWiki namespace. First array level is a language code, + * second level is message key and the values are either message + * content prefixed with space, or !NONEXISTENT for negative + * caching. + */ + protected $mCache; + + // Should mean that database cannot be used, but check + protected $mDisable; + + /// Lifetime for cache, used by object caching + protected $mExpiry; + + /** + * Message cache has it's own parser which it uses to transform + * messages. + */ + protected $mParserOptions, $mParser; + + /// Variable for tracking which variables are already loaded + protected $mLoadedLanguages = array(); + + /** + * Used for automatic detection of most used messages. + */ + protected $mRequestedMessages = array(); + + /** + * How long the message request counts are stored. Longer period gives + * better sample, but also takes longer to adapt changes. The counts + * are aggregrated per day, regardless of the value of this variable. + */ + protected static $mAdaptiveDataAge = 604800; // Is 7*24*3600 + + /** + * Filter the tail of less used messages that are requested more seldom + * than this factor times the number of request of most requested message. + * These messages are not loaded in the default set, but are still cached + * individually on demand with the normal cache expiry time. + */ + protected static $mAdaptiveInclusionThreshold = 0.05; + + /** + * Singleton instance + * + * @var MessageCache + */ + private static $instance; + + /** + * @var bool + */ + protected $mInParser = false; + + /** + * Get the signleton instance of this class + * + * @since 1.18 + * @return MessageCache object + */ + public static function singleton() { + if ( is_null( self::$instance ) ) { + global $wgUseDatabaseMessages, $wgMsgCacheExpiry; + self::$instance = new self( wfGetMessageCacheStorage(), $wgUseDatabaseMessages, $wgMsgCacheExpiry ); + } + return self::$instance; + } + + /** + * Destroy the singleton instance + * + * @since 1.18 + */ + public static function destroyInstance() { + self::$instance = null; + } + + function __construct( $memCached, $useDB, $expiry ) { + if ( !$memCached ) { + $memCached = wfGetCache( CACHE_NONE ); + } + + $this->mMemc = $memCached; + $this->mDisable = !$useDB; + $this->mExpiry = $expiry; + } + + /** + * ParserOptions is lazy initialised. + * + * @return ParserOptions + */ + function getParserOptions() { + if ( !$this->mParserOptions ) { + $this->mParserOptions = new ParserOptions; + } + return $this->mParserOptions; + } + + /** + * Try to load the cache from a local file. + * Actual format of the file depends on the $wgLocalMessageCacheSerialized + * setting. + * + * @param $hash String: the hash of contents, to check validity. + * @param $code Mixed: Optional language code, see documenation of load(). + * @return false on failure. + */ + function loadFromLocal( $hash, $code ) { + global $wgCacheDirectory, $wgLocalMessageCacheSerialized; + + $filename = "$wgCacheDirectory/messages-" . wfWikiID() . "-$code"; + + # Check file existence + wfSuppressWarnings(); + $file = fopen( $filename, 'r' ); + wfRestoreWarnings(); + if ( !$file ) { + return false; // No cache file + } + + if ( $wgLocalMessageCacheSerialized ) { + // Check to see if the file has the hash specified + $localHash = fread( $file, 32 ); + if ( $hash === $localHash ) { + // All good, get the rest of it + $serialized = ''; + while ( !feof( $file ) ) { + $serialized .= fread( $file, 100000 ); + } + fclose( $file ); + return $this->setCache( unserialize( $serialized ), $code ); + } else { + fclose( $file ); + return false; // Wrong hash + } + } else { + $localHash = substr( fread( $file, 40 ), 8 ); + fclose( $file ); + if ( $hash != $localHash ) { + return false; // Wrong hash + } + + # Require overwrites the member variable or just shadows it? + require( $filename ); + return $this->setCache( $this->mCache, $code ); + } + } + + /** + * Save the cache to a local file. + */ + function saveToLocal( $serialized, $hash, $code ) { + global $wgCacheDirectory; + + $filename = "$wgCacheDirectory/messages-" . wfWikiID() . "-$code"; + wfMkdirParents( $wgCacheDirectory ); // might fail + + wfSuppressWarnings(); + $file = fopen( $filename, 'w' ); + wfRestoreWarnings(); + + if ( !$file ) { + wfDebug( "Unable to open local cache file for writing\n" ); + return; + } + + fwrite( $file, $hash . $serialized ); + fclose( $file ); + wfSuppressWarnings(); + chmod( $filename, 0666 ); + wfRestoreWarnings(); + } + + function saveToScript( $array, $hash, $code ) { + global $wgCacheDirectory; + + $filename = "$wgCacheDirectory/messages-" . wfWikiID() . "-$code"; + $tempFilename = $filename . '.tmp'; + wfMkdirParents( $wgCacheDirectory ); // might fail + + wfSuppressWarnings(); + $file = fopen( $tempFilename, 'w' ); + wfRestoreWarnings(); + + if ( !$file ) { + wfDebug( "Unable to open local cache file for writing\n" ); + return; + } + + fwrite( $file, "<?php\n//$hash\n\n \$this->mCache = array(" ); + + foreach ( $array as $key => $message ) { + $key = $this->escapeForScript( $key ); + $message = $this->escapeForScript( $message ); + fwrite( $file, "'$key' => '$message',\n" ); + } + + fwrite( $file, ");\n?>" ); + fclose( $file); + rename( $tempFilename, $filename ); + } + + function escapeForScript( $string ) { + $string = str_replace( '\\', '\\\\', $string ); + $string = str_replace( '\'', '\\\'', $string ); + return $string; + } + + /** + * Set the cache to $cache, if it is valid. Otherwise set the cache to false. + * + * @return bool + */ + function setCache( $cache, $code ) { + if ( isset( $cache['VERSION'] ) && $cache['VERSION'] == MSG_CACHE_VERSION ) { + $this->mCache[$code] = $cache; + return true; + } else { + return false; + } + } + + /** + * Loads messages from caches or from database in this order: + * (1) local message cache (if $wgUseLocalMessageCache is enabled) + * (2) memcached + * (3) from the database. + * + * When succesfully loading from (2) or (3), all higher level caches are + * updated for the newest version. + * + * Nothing is loaded if member variable mDisable is true, either manually + * set by calling code or if message loading fails (is this possible?). + * + * Returns true if cache is already populated or it was succesfully populated, + * or false if populating empty cache fails. Also returns true if MessageCache + * is disabled. + * + * @param $code String: language to which load messages + */ + function load( $code = false ) { + global $wgUseLocalMessageCache; + + if( !is_string( $code ) ) { + # This isn't really nice, so at least make a note about it and try to + # fall back + wfDebug( __METHOD__ . " called without providing a language code\n" ); + $code = 'en'; + } + + # Don't do double loading... + if ( isset( $this->mLoadedLanguages[$code] ) ) { + return true; + } + + # 8 lines of code just to say (once) that message cache is disabled + if ( $this->mDisable ) { + static $shownDisabled = false; + if ( !$shownDisabled ) { + wfDebug( __METHOD__ . ": disabled\n" ); + $shownDisabled = true; + } + return true; + } + + # Loading code starts + wfProfileIn( __METHOD__ ); + $success = false; # Keep track of success + $where = array(); # Debug info, delayed to avoid spamming debug log too much + $cacheKey = wfMemcKey( 'messages', $code ); # Key in memc for messages + + # (1) local cache + # Hash of the contents is stored in memcache, to detect if local cache goes + # out of date (due to update in other thread?) + if ( $wgUseLocalMessageCache ) { + wfProfileIn( __METHOD__ . '-fromlocal' ); + + $hash = $this->mMemc->get( wfMemcKey( 'messages', $code, 'hash' ) ); + if ( $hash ) { + $success = $this->loadFromLocal( $hash, $code ); + if ( $success ) $where[] = 'got from local cache'; + } + wfProfileOut( __METHOD__ . '-fromlocal' ); + } + + # (2) memcache + # Fails if nothing in cache, or in the wrong version. + if ( !$success ) { + wfProfileIn( __METHOD__ . '-fromcache' ); + $cache = $this->mMemc->get( $cacheKey ); + $success = $this->setCache( $cache, $code ); + if ( $success ) { + $where[] = 'got from global cache'; + $this->saveToCaches( $cache, false, $code ); + } + wfProfileOut( __METHOD__ . '-fromcache' ); + } + + # (3) + # Nothing in caches... so we need create one and store it in caches + if ( !$success ) { + $where[] = 'cache is empty'; + $where[] = 'loading from database'; + + $this->lock( $cacheKey ); + + # Limit the concurrency of loadFromDB to a single process + # This prevents the site from going down when the cache expires + $statusKey = wfMemcKey( 'messages', $code, 'status' ); + $success = $this->mMemc->add( $statusKey, 'loading', MSG_LOAD_TIMEOUT ); + if ( $success ) { + $cache = $this->loadFromDB( $code ); + $success = $this->setCache( $cache, $code ); + } + if ( $success ) { + $success = $this->saveToCaches( $cache, true, $code ); + if ( $success ) { + $this->mMemc->delete( $statusKey ); + } else { + $this->mMemc->set( $statusKey, 'error', 60 * 5 ); + wfDebug( "MemCached set error in MessageCache: restart memcached server!\n" ); + } + } + $this->unlock($cacheKey); + } + + if ( !$success ) { + # Bad luck... this should not happen + $where[] = 'loading FAILED - cache is disabled'; + $info = implode( ', ', $where ); + wfDebug( __METHOD__ . ": Loading $code... $info\n" ); + $this->mDisable = true; + $this->mCache = false; + } else { + # All good, just record the success + $info = implode( ', ', $where ); + wfDebug( __METHOD__ . ": Loading $code... $info\n" ); + $this->mLoadedLanguages[$code] = true; + } + wfProfileOut( __METHOD__ ); + return $success; + } + + /** + * Loads cacheable messages from the database. Messages bigger than + * $wgMaxMsgCacheEntrySize are assigned a special value, and are loaded + * on-demand from the database later. + * + * @param $code String: language code. + * @return Array: loaded messages for storing in caches. + */ + function loadFromDB( $code ) { + wfProfileIn( __METHOD__ ); + global $wgMaxMsgCacheEntrySize, $wgLanguageCode, $wgAdaptiveMessageCache; + $dbr = wfGetDB( DB_SLAVE ); + $cache = array(); + + # Common conditions + $conds = array( + 'page_is_redirect' => 0, + 'page_namespace' => NS_MEDIAWIKI, + ); + + $mostused = array(); + if ( $wgAdaptiveMessageCache ) { + $mostused = $this->getMostUsedMessages(); + if ( $code !== $wgLanguageCode ) { + foreach ( $mostused as $key => $value ) { + $mostused[$key] = "$value/$code"; + } + } + } + + if ( count( $mostused ) ) { + $conds['page_title'] = $mostused; + } elseif ( $code !== $wgLanguageCode ) { + $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), "/$code" ); + } else { + # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses + # other than language code. + $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); + } + + # Conditions to fetch oversized pages to ignore them + $bigConds = $conds; + $bigConds[] = 'page_len > ' . intval( $wgMaxMsgCacheEntrySize ); + + # Load titles for all oversized pages in the MediaWiki namespace + $res = $dbr->select( 'page', 'page_title', $bigConds, __METHOD__ . "($code)-big" ); + foreach ( $res as $row ) { + $cache[$row->page_title] = '!TOO BIG'; + } + + # Conditions to load the remaining pages with their contents + $smallConds = $conds; + $smallConds[] = 'page_latest=rev_id'; + $smallConds[] = 'rev_text_id=old_id'; + $smallConds[] = 'page_len <= ' . intval( $wgMaxMsgCacheEntrySize ); + + $res = $dbr->select( + array( 'page', 'revision', 'text' ), + array( 'page_title', 'old_text', 'old_flags' ), + $smallConds, + __METHOD__ . "($code)-small" + ); + + foreach ( $res as $row ) { + $text = Revision::getRevisionText( $row ); + if( $text === false ) { + // Failed to fetch data; possible ES errors? + // Store a marker to fetch on-demand as a workaround... + $entry = '!TOO BIG'; + wfDebugLog( 'MessageCache', __METHOD__ . ": failed to load message page text for {$row->page_title} ($code)" ); + } else { + $entry = ' ' . $text; + } + $cache[$row->page_title] = $entry; + } + + foreach ( $mostused as $key ) { + if ( !isset( $cache[$key] ) ) { + $cache[$key] = '!NONEXISTENT'; + } + } + + $cache['VERSION'] = MSG_CACHE_VERSION; + wfProfileOut( __METHOD__ ); + return $cache; + } + + /** + * Updates cache as necessary when message page is changed + * + * @param $title String: name of the page changed. + * @param $text Mixed: new contents of the page. + */ + public function replace( $title, $text ) { + global $wgMaxMsgCacheEntrySize; + wfProfileIn( __METHOD__ ); + + if ( $this->mDisable ) { + wfProfileOut( __METHOD__ ); + return; + } + + list( $msg, $code ) = $this->figureMessage( $title ); + + $cacheKey = wfMemcKey( 'messages', $code ); + $this->load( $code ); + $this->lock( $cacheKey ); + + $titleKey = wfMemcKey( 'messages', 'individual', $title ); + + if ( $text === false ) { + # Article was deleted + $this->mCache[$code][$title] = '!NONEXISTENT'; + $this->mMemc->delete( $titleKey ); + } elseif ( strlen( $text ) > $wgMaxMsgCacheEntrySize ) { + # Check for size + $this->mCache[$code][$title] = '!TOO BIG'; + $this->mMemc->set( $titleKey, ' ' . $text, $this->mExpiry ); + } else { + $this->mCache[$code][$title] = ' ' . $text; + $this->mMemc->delete( $titleKey ); + } + + # Update caches + $this->saveToCaches( $this->mCache[$code], true, $code ); + $this->unlock( $cacheKey ); + + // Also delete cached sidebar... just in case it is affected + $codes = array( $code ); + if ( $code === 'en' ) { + // Delete all sidebars, like for example on action=purge on the + // sidebar messages + $codes = array_keys( Language::getLanguageNames() ); + } + + global $parserMemc; + foreach ( $codes as $code ) { + $sidebarKey = wfMemcKey( 'sidebar', $code ); + $parserMemc->delete( $sidebarKey ); + } + + // Update the message in the message blob store + global $wgContLang; + MessageBlobStore::updateMessage( $wgContLang->lcfirst( $msg ) ); + + wfRunHooks( 'MessageCacheReplace', array( $title, $text ) ); + + wfProfileOut( __METHOD__ ); + } + + /** + * Shortcut to update caches. + * + * @param $cache Array: cached messages with a version. + * @param $memc Bool: Wether to update or not memcache. + * @param $code String: Language code. + * @return False on somekind of error. + */ + protected function saveToCaches( $cache, $memc = true, $code = false ) { + wfProfileIn( __METHOD__ ); + global $wgUseLocalMessageCache, $wgLocalMessageCacheSerialized; + + $cacheKey = wfMemcKey( 'messages', $code ); + + if ( $memc ) { + $success = $this->mMemc->set( $cacheKey, $cache, $this->mExpiry ); + } else { + $success = true; + } + + # Save to local cache + if ( $wgUseLocalMessageCache ) { + $serialized = serialize( $cache ); + $hash = md5( $serialized ); + $this->mMemc->set( wfMemcKey( 'messages', $code, 'hash' ), $hash, $this->mExpiry ); + if ($wgLocalMessageCacheSerialized) { + $this->saveToLocal( $serialized, $hash, $code ); + } else { + $this->saveToScript( $cache, $hash, $code ); + } + } + + wfProfileOut( __METHOD__ ); + return $success; + } + + /** + * Represents a write lock on the messages key + * + * @return Boolean: success + */ + function lock( $key ) { + $lockKey = $key . ':lock'; + for ( $i = 0; $i < MSG_WAIT_TIMEOUT && !$this->mMemc->add( $lockKey, 1, MSG_LOCK_TIMEOUT ); $i++ ) { + sleep( 1 ); + } + + return $i >= MSG_WAIT_TIMEOUT; + } + + function unlock( $key ) { + $lockKey = $key . ':lock'; + $this->mMemc->delete( $lockKey ); + } + + /** + * Get a message from either the content language or the user language. + * + * @param $key String: the message cache key + * @param $useDB Boolean: get the message from the DB, false to use only + * the localisation + * @param $langcode String: code of the language to get the message for, if + * it is a valid code create a language for that language, + * if it is a string but not a valid code then make a basic + * language object, if it is a false boolean then use the + * current users language (as a fallback for the old + * parameter functionality), or if it is a true boolean + * then use the wikis content language (also as a + * fallback). + * @param $isFullKey Boolean: specifies whether $key is a two part key + * "msg/lang". + */ + function get( $key, $useDB = true, $langcode = true, $isFullKey = false ) { + global $wgLanguageCode, $wgContLang; + + if ( is_int( $key ) ) { + // "Non-string key given" exception sometimes happens for numerical strings that become ints somewhere on their way here + $key = strval( $key ); + } + + if ( !is_string( $key ) ) { + throw new MWException( 'Non-string key given' ); + } + + if ( strval( $key ) === '' ) { + # Shortcut: the empty key is always missing + return false; + } + + $lang = wfGetLangObj( $langcode ); + if ( !$lang ) { + throw new MWException( "Bad lang code $langcode given" ); + } + + $langcode = $lang->getCode(); + + $message = false; + + # Normalise title-case input (with some inlining) + $lckey = str_replace( ' ', '_', $key ); + if ( ord( $key ) < 128 ) { + $lckey[0] = strtolower( $lckey[0] ); + $uckey = ucfirst( $lckey ); + } else { + $lckey = $wgContLang->lcfirst( $lckey ); + $uckey = $wgContLang->ucfirst( $lckey ); + } + + /** + * Record each message request, but only once per request. + * This information is not used unless $wgAdaptiveMessageCache + * is enabled. + */ + $this->mRequestedMessages[$uckey] = true; + + # Try the MediaWiki namespace + if( !$this->mDisable && $useDB ) { + $title = $uckey; + if( !$isFullKey && ( $langcode != $wgLanguageCode ) ) { + $title .= '/' . $langcode; + } + $message = $this->getMsgFromNamespace( $title, $langcode ); + } + + # Try the array in the language object + if ( $message === false ) { + $message = $lang->getMessage( $lckey ); + if ( is_null( $message ) ) { + $message = false; + } + } + + # Try the array of another language + if( $message === false ) { + $parts = explode( '/', $lckey ); + # We may get calls for things that are http-urls from sidebar + # Let's not load nonexistent languages for those + # They usually have more than one slash. + if ( count( $parts ) == 2 && $parts[1] !== '' ) { + $message = Language::getMessageFor( $parts[0], $parts[1] ); + if ( is_null( $message ) ) { + $message = false; + } + } + } + + # Is this a custom message? Try the default language in the db... + if( ( $message === false || $message === '-' ) && + !$this->mDisable && $useDB && + !$isFullKey && ( $langcode != $wgLanguageCode ) ) { + $message = $this->getMsgFromNamespace( $uckey, $wgLanguageCode ); + } + + # Final fallback + if( $message === false ) { + return false; + } + + # Fix whitespace + $message = strtr( $message, + array( + # Fix for trailing whitespace, removed by textarea + ' ' => ' ', + # Fix for NBSP, converted to space by firefox + ' ' => "\xc2\xa0", + ' ' => "\xc2\xa0", + ) ); + + return $message; + } + + /** + * Get a message from the MediaWiki namespace, with caching. The key must + * first be converted to two-part lang/msg form if necessary. + * + * @param $title String: Message cache key with initial uppercase letter. + * @param $code String: code denoting the language to try. + */ + function getMsgFromNamespace( $title, $code ) { + global $wgAdaptiveMessageCache; + + $this->load( $code ); + if ( isset( $this->mCache[$code][$title] ) ) { + $entry = $this->mCache[$code][$title]; + if ( substr( $entry, 0, 1 ) === ' ' ) { + return substr( $entry, 1 ); + } elseif ( $entry === '!NONEXISTENT' ) { + return false; + } elseif( $entry === '!TOO BIG' ) { + // Fall through and try invididual message cache below + } + } else { + // XXX: This is not cached in process cache, should it? + $message = false; + wfRunHooks( 'MessagesPreLoad', array( $title, &$message ) ); + if ( $message !== false ) { + return $message; + } + + /** + * If message cache is in normal mode, it is guaranteed + * (except bugs) that there is always entry (or placeholder) + * in the cache if message exists. Thus we can do minor + * performance improvement and return false early. + */ + if ( !$wgAdaptiveMessageCache ) { + return false; + } + } + + # Try the individual message cache + $titleKey = wfMemcKey( 'messages', 'individual', $title ); + $entry = $this->mMemc->get( $titleKey ); + if ( $entry ) { + if ( substr( $entry, 0, 1 ) === ' ' ) { + $this->mCache[$code][$title] = $entry; + return substr( $entry, 1 ); + } elseif ( $entry === '!NONEXISTENT' ) { + $this->mCache[$code][$title] = '!NONEXISTENT'; + return false; + } else { + # Corrupt/obsolete entry, delete it + $this->mMemc->delete( $titleKey ); + } + } + + # Try loading it from the database + $revision = Revision::newFromTitle( Title::makeTitle( NS_MEDIAWIKI, $title ) ); + if ( $revision ) { + $message = $revision->getText(); + if ($message === false) { + // A possibly temporary loading failure. + wfDebugLog( 'MessageCache', __METHOD__ . ": failed to load message page text for {$title->getDbKey()} ($code)" ); + } else { + $this->mCache[$code][$title] = ' ' . $message; + $this->mMemc->set( $titleKey, ' ' . $message, $this->mExpiry ); + } + } else { + $message = false; + $this->mCache[$code][$title] = '!NONEXISTENT'; + $this->mMemc->set( $titleKey, '!NONEXISTENT', $this->mExpiry ); + } + + return $message; + } + + /** + * @param $message string + * @param $interface bool + * @param $language + * @param $title Title + * @return string + */ + function transform( $message, $interface = false, $language = null, $title = null ) { + // Avoid creating parser if nothing to transform + if( strpos( $message, '{{' ) === false ) { + return $message; + } + + if ( $this->mInParser ) { + return $message; + } + + $parser = $this->getParser(); + if ( $parser ) { + $popts = $this->getParserOptions(); + $popts->setInterfaceMessage( $interface ); + $popts->setTargetLanguage( $language ); + + $userlang = $popts->setUserLang( $language ); + $this->mInParser = true; + $message = $parser->transformMsg( $message, $popts, $title ); + $this->mInParser = false; + $popts->setUserLang( $userlang ); + } + return $message; + } + + /** + * @return Parser + */ + function getParser() { + global $wgParser, $wgParserConf; + if ( !$this->mParser && isset( $wgParser ) ) { + # Do some initialisation so that we don't have to do it twice + $wgParser->firstCallInit(); + # Clone it and store it + $class = $wgParserConf['class']; + if ( $class == 'Parser_DiffTest' ) { + # Uncloneable + $this->mParser = new $class( $wgParserConf ); + } else { + $this->mParser = clone $wgParser; + } + } + return $this->mParser; + } + + /** + * @param $text string + * @param $string Title|string + * @param $title Title + * @param $interface bool + * @param $linestart bool + * @param $language + * @return ParserOutput + */ + public function parse( $text, $title = null, $linestart = true, $interface = false, $language = null ) { + if ( $this->mInParser ) { + return htmlspecialchars( $text ); + } + + $parser = $this->getParser(); + $popts = $this->getParserOptions(); + + if ( $interface ) { + $popts->setInterfaceMessage( true ); + } + if ( $language !== null ) { + $popts->setTargetLanguage( $language ); + } + + wfProfileIn( __METHOD__ ); + if ( !$title || !$title instanceof Title ) { + global $wgTitle; + $title = $wgTitle; + } + // Sometimes $wgTitle isn't set either... + if ( !$title ) { + # It's not uncommon having a null $wgTitle in scripts. See r80898 + # Create a ghost title in such case + $title = Title::newFromText( 'Dwimmerlaik' ); + } + + $this->mInParser = true; + $res = $parser->parse( $text, $title, $popts, $linestart ); + $this->mInParser = false; + + wfProfileOut( __METHOD__ ); + return $res; + } + + function disable() { + $this->mDisable = true; + } + + function enable() { + $this->mDisable = false; + } + + /** + * Clear all stored messages. Mainly used after a mass rebuild. + */ + function clear() { + $langs = Language::getLanguageNames( false ); + foreach ( array_keys($langs) as $code ) { + # Global cache + $this->mMemc->delete( wfMemcKey( 'messages', $code ) ); + # Invalidate all local caches + $this->mMemc->delete( wfMemcKey( 'messages', $code, 'hash' ) ); + } + $this->mLoadedLanguages = array(); + } + + public function figureMessage( $key ) { + global $wgLanguageCode; + $pieces = explode( '/', $key ); + if( count( $pieces ) < 2 ) { + return array( $key, $wgLanguageCode ); + } + + $lang = array_pop( $pieces ); + $validCodes = Language::getLanguageNames(); + if( !array_key_exists( $lang, $validCodes ) ) { + return array( $key, $wgLanguageCode ); + } + + $message = implode( '/', $pieces ); + return array( $message, $lang ); + } + + public static function logMessages() { + wfProfileIn( __METHOD__ ); + global $wgAdaptiveMessageCache; + if ( !$wgAdaptiveMessageCache || !self::$instance instanceof MessageCache ) { + wfProfileOut( __METHOD__ ); + return; + } + + $cachekey = wfMemckey( 'message-profiling' ); + $cache = wfGetCache( CACHE_DB ); + $data = $cache->get( $cachekey ); + + if ( !$data ) { + $data = array(); + } + + $age = self::$mAdaptiveDataAge; + $filterDate = substr( wfTimestamp( TS_MW, time() - $age ), 0, 8 ); + foreach ( array_keys( $data ) as $key ) { + if ( $key < $filterDate ) { + unset( $data[$key] ); + } + } + + $index = substr( wfTimestampNow(), 0, 8 ); + if ( !isset( $data[$index] ) ) { + $data[$index] = array(); + } + + foreach ( self::$instance->mRequestedMessages as $message => $_ ) { + if ( !isset( $data[$index][$message] ) ) { + $data[$index][$message] = 0; + } + $data[$index][$message]++; + } + + $cache->set( $cachekey, $data ); + wfProfileOut( __METHOD__ ); + } + + public function getMostUsedMessages() { + wfProfileIn( __METHOD__ ); + $cachekey = wfMemcKey( 'message-profiling' ); + $cache = wfGetCache( CACHE_DB ); + $data = $cache->get( $cachekey ); + if ( !$data ) { + wfProfileOut( __METHOD__ ); + return array(); + } + + $list = array(); + + foreach( $data as $messages ) { + foreach( $messages as $message => $count ) { + $key = $message; + if ( !isset( $list[$key] ) ) { + $list[$key] = 0; + } + $list[$key] += $count; + } + } + + $max = max( $list ); + foreach ( $list as $message => $count ) { + if ( $count < intval( $max * self::$mAdaptiveInclusionThreshold ) ) { + unset( $list[$message] ); + } + } + + wfProfileOut( __METHOD__ ); + return array_keys( $list ); + } + +} diff --git a/includes/cache/SquidUpdate.php b/includes/cache/SquidUpdate.php new file mode 100644 index 00000000..d47b5b5e --- /dev/null +++ b/includes/cache/SquidUpdate.php @@ -0,0 +1,226 @@ +<?php +/** + * See deferred.txt + * @file + * @ingroup Cache + */ + +/** + * Handles purging appropriate Squid URLs given a title (or titles) + * @ingroup Cache + */ +class SquidUpdate { + var $urlArr, $mMaxTitles; + + function __construct( $urlArr = Array(), $maxTitles = false ) { + global $wgMaxSquidPurgeTitles; + if ( $maxTitles === false ) { + $this->mMaxTitles = $wgMaxSquidPurgeTitles; + } else { + $this->mMaxTitles = $maxTitles; + } + if ( count( $urlArr ) > $this->mMaxTitles ) { + $urlArr = array_slice( $urlArr, 0, $this->mMaxTitles ); + } + $this->urlArr = $urlArr; + } + + /** + * @param $title Title + * + * @return SquidUpdate + */ + static function newFromLinksTo( &$title ) { + global $wgMaxSquidPurgeTitles; + wfProfileIn( __METHOD__ ); + + # Get a list of URLs linking to this page + $dbr = wfGetDB( DB_SLAVE ); + $res = $dbr->select( array( 'links', 'page' ), + array( 'page_namespace', 'page_title' ), + array( + 'pl_namespace' => $title->getNamespace(), + 'pl_title' => $title->getDBkey(), + 'pl_from=page_id' ), + __METHOD__ ); + $blurlArr = $title->getSquidURLs(); + if ( $dbr->numRows( $res ) <= $wgMaxSquidPurgeTitles ) { + foreach ( $res as $BL ) { + $tobj = Title::makeTitle( $BL->page_namespace, $BL->page_title ) ; + $blurlArr[] = $tobj->getInternalURL(); + } + } + + wfProfileOut( __METHOD__ ); + return new SquidUpdate( $blurlArr ); + } + + /** + * Create a SquidUpdate from an array of Title objects, or a TitleArray object + * + * @param $titles array + * @param $urlArr array + * + * @return SquidUpdate + */ + static function newFromTitles( $titles, $urlArr = array() ) { + global $wgMaxSquidPurgeTitles; + $i = 0; + foreach ( $titles as $title ) { + $urlArr[] = $title->getInternalURL(); + if ( $i++ > $wgMaxSquidPurgeTitles ) { + break; + } + } + return new SquidUpdate( $urlArr ); + } + + /** + * @param $title Title + * + * @return SquidUpdate + */ + static function newSimplePurge( &$title ) { + $urlArr = $title->getSquidURLs(); + return new SquidUpdate( $urlArr ); + } + + /** + * Purges the list of URLs passed to the constructor + */ + function doUpdate() { + SquidUpdate::purge( $this->urlArr ); + } + + /** + * Purges a list of Squids defined in $wgSquidServers. + * $urlArr should contain the full URLs to purge as values + * (example: $urlArr[] = 'http://my.host/something') + * XXX report broken Squids per mail or log + * + * @param $urlArr array + * @return void + */ + static function purge( $urlArr ) { + global $wgSquidServers, $wgHTCPMulticastAddress, $wgHTCPPort; + + /*if ( (@$wgSquidServers[0]) == 'echo' ) { + echo implode("<br />\n", $urlArr) . "<br />\n"; + return; + }*/ + + if( !$urlArr ) { + return; + } + + if ( $wgHTCPMulticastAddress && $wgHTCPPort ) { + SquidUpdate::HTCPPurge( $urlArr ); + } + + wfProfileIn( __METHOD__ ); + + $maxSocketsPerSquid = 8; // socket cap per Squid + $urlsPerSocket = 400; // 400 seems to be a good tradeoff, opening a socket takes a while + $socketsPerSquid = ceil( count( $urlArr ) / $urlsPerSocket ); + if ( $socketsPerSquid > $maxSocketsPerSquid ) { + $socketsPerSquid = $maxSocketsPerSquid; + } + + $pool = new SquidPurgeClientPool; + $chunks = array_chunk( $urlArr, ceil( count( $urlArr ) / $socketsPerSquid ) ); + foreach ( $wgSquidServers as $server ) { + foreach ( $chunks as $chunk ) { + $client = new SquidPurgeClient( $server ); + foreach ( $chunk as $url ) { + $client->queuePurge( $url ); + } + $pool->addClient( $client ); + } + } + $pool->run(); + + wfProfileOut( __METHOD__ ); + } + + /** + * @throws MWException + * @param $urlArr array + */ + static function HTCPPurge( $urlArr ) { + global $wgHTCPMulticastAddress, $wgHTCPMulticastTTL, $wgHTCPPort; + wfProfileIn( __METHOD__ ); + + $htcpOpCLR = 4; // HTCP CLR + + // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h) + if( !defined( "IPPROTO_IP" ) ) { + define( "IPPROTO_IP", 0 ); + define( "IP_MULTICAST_LOOP", 34 ); + define( "IP_MULTICAST_TTL", 33 ); + } + + // pfsockopen doesn't work because we need set_sock_opt + $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP ); + if ( $conn ) { + // Set socket options + socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 ); + if ( $wgHTCPMulticastTTL != 1 ) + socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL, + $wgHTCPMulticastTTL ); + + foreach ( $urlArr as $url ) { + if( !is_string( $url ) ) { + throw new MWException( 'Bad purge URL' ); + } + $url = SquidUpdate::expand( $url ); + + // Construct a minimal HTCP request diagram + // as per RFC 2756 + // Opcode 'CLR', no response desired, no auth + $htcpTransID = rand(); + + $htcpSpecifier = pack( 'na4na*na8n', + 4, 'HEAD', strlen( $url ), $url, + 8, 'HTTP/1.0', 0 ); + + $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier ); + $htcpLen = 4 + $htcpDataLen + 2; + + // Note! Squid gets the bit order of the first + // word wrong, wrt the RFC. Apparently no other + // implementation exists, so adapt to Squid + $htcpPacket = pack( 'nxxnCxNxxa*n', + $htcpLen, $htcpDataLen, $htcpOpCLR, + $htcpTransID, $htcpSpecifier, 2); + + // Send out + wfDebug( "Purging URL $url via HTCP\n" ); + socket_sendto( $conn, $htcpPacket, $htcpLen, 0, + $wgHTCPMulticastAddress, $wgHTCPPort ); + } + } else { + $errstr = socket_strerror( socket_last_error() ); + wfDebug( __METHOD__ . "(): Error opening UDP socket: $errstr\n" ); + } + wfProfileOut( __METHOD__ ); + } + + /** + * Expand local URLs to fully-qualified URLs using the internal protocol + * and host defined in $wgInternalServer. Input that's already fully- + * qualified will be passed through unchanged. + * + * This is used to generate purge URLs that may be either local to the + * main wiki or include a non-native host, such as images hosted on a + * second internal server. + * + * Client functions should not need to call this. + * + * @param $url string + * + * @return string + */ + static function expand( $url ) { + return wfExpandUrl( $url, PROTO_INTERNAL ); + } +} |