diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2006-10-11 18:12:39 +0000 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2006-10-11 18:12:39 +0000 |
commit | 183851b06bd6c52f3cae5375f433da720d410447 (patch) | |
tree | a477257decbf3360127f6739c2f9d0ec57a03d39 /includes/HTMLCacheUpdate.php |
MediaWiki 1.7.1 wiederhergestellt
Diffstat (limited to 'includes/HTMLCacheUpdate.php')
-rw-r--r-- | includes/HTMLCacheUpdate.php | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/includes/HTMLCacheUpdate.php b/includes/HTMLCacheUpdate.php new file mode 100644 index 00000000..47703b20 --- /dev/null +++ b/includes/HTMLCacheUpdate.php @@ -0,0 +1,230 @@ +<?php + +/** + * Class to invalidate the HTML cache of all the pages linking to a given title. + * Small numbers of links will be done immediately, large numbers are pushed onto + * the job queue. + * + * This class is designed to work efficiently with small numbers of links, and + * to work reasonably well with up to ~10^5 links. Above ~10^6 links, the memory + * and time requirements of loading all backlinked IDs in doUpdate() might become + * prohibitive. The requirements measured at Wikimedia are approximately: + * + * memory: 48 bytes per row + * time: 16us per row for the query plus processing + * + * The reason this query is done is to support partitioning of the job + * by backlinked ID. The memory issue could be allieviated by doing this query in + * batches, but of course LIMIT with an offset is inefficient on the DB side. + * + * The class is nevertheless a vast improvement on the previous method of using + * Image::getLinksTo() and Title::touchArray(), which uses about 2KB of memory per + * link. + */ +class HTMLCacheUpdate +{ + public $mTitle, $mTable, $mPrefix; + public $mRowsPerJob, $mRowsPerQuery; + + function __construct( $titleTo, $table ) { + global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; + + $this->mTitle = $titleTo; + $this->mTable = $table; + $this->mRowsPerJob = $wgUpdateRowsPerJob; + $this->mRowsPerQuery = $wgUpdateRowsPerQuery; + } + + function doUpdate() { + # Fetch the IDs + $cond = $this->getToCondition(); + $dbr =& wfGetDB( DB_SLAVE ); + $res = $dbr->select( $this->mTable, $this->getFromField(), $cond, __METHOD__ ); + $resWrap = new ResultWrapper( $dbr, $res ); + if ( $dbr->numRows( $res ) != 0 ) { + if ( $dbr->numRows( $res ) > $this->mRowsPerJob ) { + $this->insertJobs( $resWrap ); + } else { + $this->invalidateIDs( $resWrap ); + } + } + $dbr->freeResult( $res ); + } + + function insertJobs( ResultWrapper $res ) { + $numRows = $res->numRows(); + $numBatches = ceil( $numRows / $this->mRowsPerJob ); + $realBatchSize = $numRows / $numBatches; + $boundaries = array(); + $start = false; + $jobs = array(); + do { + for ( $i = 0; $i < $realBatchSize - 1; $i++ ) { + $row = $res->fetchRow(); + if ( $row ) { + $id = $row[0]; + } else { + $id = false; + break; + } + } + if ( $id !== false ) { + // One less on the end to avoid duplicating the boundary + $job = new HTMLCacheUpdateJob( $this->mTitle, $this->mTable, $start, $id - 1 ); + } else { + $job = new HTMLCacheUpdateJob( $this->mTitle, $this->mTable, $start, false ); + } + $jobs[] = $job; + + $start = $id; + } while ( $start ); + + Job::batchInsert( $jobs ); + } + + function getPrefix() { + static $prefixes = array( + 'pagelinks' => 'pl', + 'imagelinks' => 'il', + 'categorylinks' => 'cl', + 'templatelinks' => 'tl', + + # Not needed + # 'externallinks' => 'el', + # 'langlinks' => 'll' + ); + + if ( is_null( $this->mPrefix ) ) { + $this->mPrefix = $prefixes[$this->mTable]; + if ( is_null( $this->mPrefix ) ) { + throw new MWException( "Invalid table type \"{$this->mTable}\" in " . __CLASS__ ); + } + } + return $this->mPrefix; + } + + function getFromField() { + return $this->getPrefix() . '_from'; + } + + function getToCondition() { + switch ( $this->mTable ) { + case 'pagelinks': + return array( + 'pl_namespace' => $this->mTitle->getNamespace(), + 'pl_title' => $this->mTitle->getDBkey() + ); + case 'templatelinks': + return array( + 'tl_namespace' => $this->mTitle->getNamespace(), + 'tl_title' => $this->mTitle->getDBkey() + ); + case 'imagelinks': + return array( 'il_to' => $this->mTitle->getDBkey() ); + case 'categorylinks': + return array( 'cl_to' => $this->mTitle->getDBkey() ); + } + throw new MWException( 'Invalid table type in ' . __CLASS__ ); + } + + /** + * Invalidate a set of IDs, right now + */ + function invalidateIDs( ResultWrapper $res ) { + global $wgUseFileCache, $wgUseSquid; + + if ( $res->numRows() == 0 ) { + return; + } + + $dbw =& wfGetDB( DB_MASTER ); + $timestamp = $dbw->timestamp(); + $done = false; + + while ( !$done ) { + # Get all IDs in this query into an array + $ids = array(); + for ( $i = 0; $i < $this->mRowsPerQuery; $i++ ) { + $row = $res->fetchRow(); + if ( $row ) { + $ids[] = $row[0]; + } else { + $done = true; + break; + } + } + + if ( !count( $ids ) ) { + break; + } + + # Update page_touched + $dbw->update( 'page', + array( 'page_touched' => $timestamp ), + array( 'page_id IN (' . $dbw->makeList( $ids ) . ')' ), + __METHOD__ + ); + + # Update squid + if ( $wgUseSquid || $wgUseFileCache ) { + $titles = Title::newFromIDs( $ids ); + if ( $wgUseSquid ) { + $u = SquidUpdate::newFromTitles( $titles ); + $u->doUpdate(); + } + + # Update file cache + if ( $wgUseFileCache ) { + foreach ( $titles as $title ) { + $cm = new CacheManager($title); + @unlink($cm->fileCacheName()); + } + } + } + } + } +} + +class HTMLCacheUpdateJob extends Job { + var $table, $start, $end; + + /** + * Construct a job + * @param Title $title The title linked to + * @param string $table The name of the link table. + * @param integer $start Beginning page_id or false for open interval + * @param integer $end End page_id or false for open interval + * @param integer $id job_id + */ + function __construct( $title, $table, $start, $end, $id = 0 ) { + $params = array( + 'table' => $table, + 'start' => $start, + 'end' => $end ); + parent::__construct( 'htmlCacheUpdate', $title, $params, $id ); + $this->table = $table; + $this->start = intval( $start ); + $this->end = intval( $end ); + } + + function run() { + $update = new HTMLCacheUpdate( $this->title, $this->table ); + + $fromField = $update->getFromField(); + $conds = $update->getToCondition(); + if ( $this->start ) { + $conds[] = "$fromField >= {$this->start}"; + } + if ( $this->end ) { + $conds[] = "$fromField <= {$this->end}"; + } + + $dbr =& wfGetDB( DB_SLAVE ); + $res = $dbr->select( $this->table, $fromField, $conds, __METHOD__ ); + $update->invalidateIDs( new ResultWrapper( $dbr, $res ) ); + $dbr->freeResult( $res ); + + return true; + } +} +?> |