diff options
Diffstat (limited to 'includes/deferred')
-rw-r--r-- | includes/deferred/CallableUpdate.php | 29 | ||||
-rw-r--r-- | includes/deferred/DataUpdate.php | 125 | ||||
-rw-r--r-- | includes/deferred/DeferredUpdates.php | 136 | ||||
-rw-r--r-- | includes/deferred/HTMLCacheUpdate.php | 70 | ||||
-rw-r--r-- | includes/deferred/LinksUpdate.php | 1019 | ||||
-rw-r--r-- | includes/deferred/SearchUpdate.php | 209 | ||||
-rw-r--r-- | includes/deferred/SiteStatsUpdate.php | 254 | ||||
-rw-r--r-- | includes/deferred/SqlDataUpdate.php | 159 | ||||
-rw-r--r-- | includes/deferred/SquidUpdate.php | 311 | ||||
-rw-r--r-- | includes/deferred/ViewCountUpdate.php | 119 |
10 files changed, 2431 insertions, 0 deletions
diff --git a/includes/deferred/CallableUpdate.php b/includes/deferred/CallableUpdate.php new file mode 100644 index 00000000..808626d0 --- /dev/null +++ b/includes/deferred/CallableUpdate.php @@ -0,0 +1,29 @@ +<?php + +/** + * Deferrable Update for closure/callback + */ +class MWCallableUpdate implements DeferrableUpdate { + /** + * @var Closure|callable + */ + private $callback; + + /** + * @param callable $callback + * @throws MWException + */ + public function __construct( $callback ) { + if ( !is_callable( $callback ) ) { + throw new MWException( 'Not a valid callback/closure!' ); + } + $this->callback = $callback; + } + + /** + * Run the update + */ + public function doUpdate() { + call_user_func( $this->callback ); + } +} diff --git a/includes/deferred/DataUpdate.php b/includes/deferred/DataUpdate.php new file mode 100644 index 00000000..ed12c601 --- /dev/null +++ b/includes/deferred/DataUpdate.php @@ -0,0 +1,125 @@ +<?php +/** + * Base code for update jobs that do something with some secondary + * data extracted from article. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Abstract base class for update jobs that do something with some secondary + * data extracted from article. + * + * @note subclasses should NOT start or commit transactions in their doUpdate() method, + * a transaction will automatically be wrapped around the update. If need be, + * subclasses can override the beginTransaction() and commitTransaction() methods. + */ +abstract class DataUpdate implements DeferrableUpdate { + /** + * Constructor + */ + public function __construct() { + # noop + } + + /** + * Begin an appropriate transaction, if any. + * This default implementation does nothing. + */ + public function beginTransaction() { + //noop + } + + /** + * Commit the transaction started via beginTransaction, if any. + * This default implementation does nothing. + */ + public function commitTransaction() { + //noop + } + + /** + * Abort / roll back the transaction started via beginTransaction, if any. + * This default implementation does nothing. + */ + public function rollbackTransaction() { + //noop + } + + /** + * Convenience method, calls doUpdate() on every DataUpdate in the array. + * + * This methods supports transactions logic by first calling beginTransaction() + * on all updates in the array, then calling doUpdate() on each, and, if all goes well, + * then calling commitTransaction() on each update. If an error occurs, + * rollbackTransaction() will be called on any update object that had beginTransaction() + * called but not yet commitTransaction(). + * + * This allows for limited transactional logic across multiple backends for storing + * secondary data. + * + * @param array $updates A list of DataUpdate instances + * @throws Exception|null + */ + public static function runUpdates( $updates ) { + if ( empty( $updates ) ) { + return; # nothing to do + } + + $open_transactions = array(); + $exception = null; + + /** + * @var $update DataUpdate + * @var $trans DataUpdate + */ + + try { + // begin transactions + foreach ( $updates as $update ) { + $update->beginTransaction(); + $open_transactions[] = $update; + } + + // do work + foreach ( $updates as $update ) { + $update->doUpdate(); + } + + // commit transactions + while ( count( $open_transactions ) > 0 ) { + $trans = array_pop( $open_transactions ); + $trans->commitTransaction(); + } + } catch ( Exception $ex ) { + $exception = $ex; + wfDebug( "Caught exception, will rethrow after rollback: " . + $ex->getMessage() . "\n" ); + } + + // rollback remaining transactions + while ( count( $open_transactions ) > 0 ) { + $trans = array_pop( $open_transactions ); + $trans->rollbackTransaction(); + } + + if ( $exception ) { + throw $exception; // rethrow after cleanup + } + } +} diff --git a/includes/deferred/DeferredUpdates.php b/includes/deferred/DeferredUpdates.php new file mode 100644 index 00000000..b0c1899f --- /dev/null +++ b/includes/deferred/DeferredUpdates.php @@ -0,0 +1,136 @@ +<?php +/** + * Interface and manager for deferred updates. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Interface that deferrable updates should implement. Basically required so we + * can validate input on DeferredUpdates::addUpdate() + * + * @since 1.19 + */ +interface DeferrableUpdate { + /** + * Perform the actual work + */ + function doUpdate(); +} + +/** + * Class for managing the deferred updates. + * + * @since 1.19 + */ +class DeferredUpdates { + /** + * Store of updates to be deferred until the end of the request. + */ + private static $updates = array(); + + /** + * Add an update to the deferred list + * @param DeferrableUpdate $update Some object that implements doUpdate() + */ + public static function addUpdate( DeferrableUpdate $update ) { + array_push( self::$updates, $update ); + } + + /** + * HTMLCacheUpdates are the most common deferred update people use. This + * is a shortcut method for that. + * @see HTMLCacheUpdate::__construct() + * @param Title $title + * @param string $table + */ + public static function addHTMLCacheUpdate( $title, $table ) { + self::addUpdate( new HTMLCacheUpdate( $title, $table ) ); + } + + /** + * Add a callable update. In a lot of cases, we just need a callback/closure, + * defining a new DeferrableUpdate object is not necessary + * @see MWCallableUpdate::__construct() + * @param callable $callable + */ + public static function addCallableUpdate( $callable ) { + self::addUpdate( new MWCallableUpdate( $callable ) ); + } + + /** + * Do any deferred updates and clear the list + * + * @param string $commit Set to 'commit' to commit after every update to + * prevent lock contention + */ + public static function doUpdates( $commit = '' ) { + global $wgDeferredUpdateList; + + wfProfileIn( __METHOD__ ); + + $updates = array_merge( $wgDeferredUpdateList, self::$updates ); + + // No need to get master connections in case of empty updates array + if ( !count( $updates ) ) { + wfProfileOut( __METHOD__ ); + + return; + } + + $dbw = false; + $doCommit = $commit == 'commit'; + if ( $doCommit ) { + $dbw = wfGetDB( DB_MASTER ); + } + + while ( $updates ) { + self::clearPendingUpdates(); + + /** @var DeferrableUpdate $update */ + foreach ( $updates as $update ) { + try { + $update->doUpdate(); + + if ( $doCommit && $dbw->trxLevel() ) { + $dbw->commit( __METHOD__, 'flush' ); + } + } catch ( MWException $e ) { + // We don't want exceptions thrown during deferred updates to + // be reported to the user since the output is already sent. + // Instead we just log them. + if ( !$e instanceof ErrorPageError ) { + MWExceptionHandler::logException( $e ); + } + } + } + $updates = array_merge( $wgDeferredUpdateList, self::$updates ); + } + + wfProfileOut( __METHOD__ ); + } + + /** + * Clear all pending updates without performing them. Generally, you don't + * want or need to call this. Unit tests need it though. + */ + public static function clearPendingUpdates() { + global $wgDeferredUpdateList; + $wgDeferredUpdateList = self::$updates = array(); + } +} diff --git a/includes/deferred/HTMLCacheUpdate.php b/includes/deferred/HTMLCacheUpdate.php new file mode 100644 index 00000000..54fa5943 --- /dev/null +++ b/includes/deferred/HTMLCacheUpdate.php @@ -0,0 +1,70 @@ +<?php +/** + * HTML cache invalidation of all pages linking to a given title. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * Class to invalidate the HTML cache of all the pages linking to a given title. + * + * @ingroup Cache + */ +class HTMLCacheUpdate implements DeferrableUpdate { + /** @var Title */ + public $mTitle; + + /** @var string */ + public $mTable; + + /** + * @param Title $titleTo + * @param string $table + */ + function __construct( Title $titleTo, $table ) { + $this->mTitle = $titleTo; + $this->mTable = $table; + } + + public function doUpdate() { + wfProfileIn( __METHOD__ ); + + $job = new HTMLCacheUpdateJob( + $this->mTitle, + array( + 'table' => $this->mTable, + ) + Job::newRootJobParams( // "overall" refresh links job info + "htmlCacheUpdate:{$this->mTable}:{$this->mTitle->getPrefixedText()}" + ) + ); + + $count = $this->mTitle->getBacklinkCache()->getNumLinks( $this->mTable, 100 ); + if ( $count >= 100 ) { // many backlinks + JobQueueGroup::singleton()->push( $job ); + JobQueueGroup::singleton()->deduplicateRootJob( $job ); + } else { // few backlinks ($count might be off even if 0) + $dbw = wfGetDB( DB_MASTER ); + $dbw->onTransactionIdle( function () use ( $job ) { + $job->run(); // just do the purge query now + } ); + } + + wfProfileOut( __METHOD__ ); + } +} diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php new file mode 100644 index 00000000..45d26648 --- /dev/null +++ b/includes/deferred/LinksUpdate.php @@ -0,0 +1,1019 @@ +<?php +/** + * Updater for link tracking tables after a page edit. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * See docs/deferred.txt + * + * @todo document (e.g. one-sentence top-level class description). + */ +class LinksUpdate extends SqlDataUpdate { + // @todo make members protected, but make sure extensions don't break + + /** @var int Page ID of the article linked from */ + public $mId; + + /** @var Title Title object of the article linked from */ + public $mTitle; + + /** @var ParserOutput */ + public $mParserOutput; + + /** @var array Map of title strings to IDs for the links in the document */ + public $mLinks; + + /** @var array DB keys of the images used, in the array key only */ + public $mImages; + + /** @var array Map of title strings to IDs for the template references, including broken ones */ + public $mTemplates; + + /** @var array URLs of external links, array key only */ + public $mExternals; + + /** @var array Map of category names to sort keys */ + public $mCategories; + + /** @var array Map of language codes to titles */ + public $mInterlangs; + + /** @var array Map of arbitrary name to value */ + public $mProperties; + + /** @var DatabaseBase Database connection reference */ + public $mDb; + + /** @var array SELECT options to be used */ + public $mOptions; + + /** @var bool Whether to queue jobs for recursive updates */ + public $mRecursive; + + /** + * @var null|array Added links if calculated. + */ + private $linkInsertions = null; + + /** + * @var null|array Deleted links if calculated. + */ + private $linkDeletions = null; + + /** + * Constructor + * + * @param Title $title Title of the page we're updating + * @param ParserOutput $parserOutput Output from a full parse of this page + * @param bool $recursive Queue jobs for recursive updates? + * @throws MWException + */ + function __construct( $title, $parserOutput, $recursive = true ) { + parent::__construct( false ); // no implicit transaction + + if ( !( $title instanceof Title ) ) { + throw new MWException( "The calling convention to LinksUpdate::LinksUpdate() has changed. " . + "Please see Article::editUpdates() for an invocation example.\n" ); + } + + if ( !( $parserOutput instanceof ParserOutput ) ) { + throw new MWException( "The calling convention to LinksUpdate::__construct() has changed. " . + "Please see WikiPage::doEditUpdates() for an invocation example.\n" ); + } + + $this->mTitle = $title; + $this->mId = $title->getArticleID(); + + if ( !$this->mId ) { + throw new MWException( "The Title object did not provide an article " . + "ID. Perhaps the page doesn't exist?" ); + } + + $this->mParserOutput = $parserOutput; + + $this->mLinks = $parserOutput->getLinks(); + $this->mImages = $parserOutput->getImages(); + $this->mTemplates = $parserOutput->getTemplates(); + $this->mExternals = $parserOutput->getExternalLinks(); + $this->mCategories = $parserOutput->getCategories(); + $this->mProperties = $parserOutput->getProperties(); + $this->mInterwikis = $parserOutput->getInterwikiLinks(); + + # Convert the format of the interlanguage links + # I didn't want to change it in the ParserOutput, because that array is passed all + # the way back to the skin, so either a skin API break would be required, or an + # inefficient back-conversion. + $ill = $parserOutput->getLanguageLinks(); + $this->mInterlangs = array(); + foreach ( $ill as $link ) { + list( $key, $title ) = explode( ':', $link, 2 ); + $this->mInterlangs[$key] = $title; + } + + foreach ( $this->mCategories as &$sortkey ) { + # If the sortkey is longer then 255 bytes, + # it truncated by DB, and then doesn't get + # matched when comparing existing vs current + # categories, causing bug 25254. + # Also. substr behaves weird when given "". + if ( $sortkey !== '' ) { + $sortkey = substr( $sortkey, 0, 255 ); + } + } + + $this->mRecursive = $recursive; + + wfRunHooks( 'LinksUpdateConstructed', array( &$this ) ); + } + + /** + * Update link tables with outgoing links from an updated article + */ + public function doUpdate() { + wfRunHooks( 'LinksUpdate', array( &$this ) ); + $this->doIncrementalUpdate(); + wfRunHooks( 'LinksUpdateComplete', array( &$this ) ); + } + + protected function doIncrementalUpdate() { + wfProfileIn( __METHOD__ ); + + # Page links + $existing = $this->getExistingLinks(); + $this->linkDeletions = $this->getLinkDeletions( $existing ); + $this->linkInsertions = $this->getLinkInsertions( $existing ); + $this->incrTableUpdate( 'pagelinks', 'pl', $this->linkDeletions, $this->linkInsertions ); + + # Image links + $existing = $this->getExistingImages(); + + $imageDeletes = $this->getImageDeletions( $existing ); + $this->incrTableUpdate( 'imagelinks', 'il', $imageDeletes, + $this->getImageInsertions( $existing ) ); + + # Invalidate all image description pages which had links added or removed + $imageUpdates = $imageDeletes + array_diff_key( $this->mImages, $existing ); + $this->invalidateImageDescriptions( $imageUpdates ); + + # External links + $existing = $this->getExistingExternals(); + $this->incrTableUpdate( 'externallinks', 'el', $this->getExternalDeletions( $existing ), + $this->getExternalInsertions( $existing ) ); + + # Language links + $existing = $this->getExistingInterlangs(); + $this->incrTableUpdate( 'langlinks', 'll', $this->getInterlangDeletions( $existing ), + $this->getInterlangInsertions( $existing ) ); + + # Inline interwiki links + $existing = $this->getExistingInterwikis(); + $this->incrTableUpdate( 'iwlinks', 'iwl', $this->getInterwikiDeletions( $existing ), + $this->getInterwikiInsertions( $existing ) ); + + # Template links + $existing = $this->getExistingTemplates(); + $this->incrTableUpdate( 'templatelinks', 'tl', $this->getTemplateDeletions( $existing ), + $this->getTemplateInsertions( $existing ) ); + + # Category links + $existing = $this->getExistingCategories(); + + $categoryDeletes = $this->getCategoryDeletions( $existing ); + + $this->incrTableUpdate( 'categorylinks', 'cl', $categoryDeletes, + $this->getCategoryInsertions( $existing ) ); + + # Invalidate all categories which were added, deleted or changed (set symmetric difference) + $categoryInserts = array_diff_assoc( $this->mCategories, $existing ); + $categoryUpdates = $categoryInserts + $categoryDeletes; + $this->invalidateCategories( $categoryUpdates ); + $this->updateCategoryCounts( $categoryInserts, $categoryDeletes ); + + # Page properties + $existing = $this->getExistingProperties(); + + $propertiesDeletes = $this->getPropertyDeletions( $existing ); + + $this->incrTableUpdate( 'page_props', 'pp', $propertiesDeletes, + $this->getPropertyInsertions( $existing ) ); + + # Invalidate the necessary pages + $changed = $propertiesDeletes + array_diff_assoc( $this->mProperties, $existing ); + $this->invalidateProperties( $changed ); + + # Update the links table freshness for this title + $this->updateLinksTimestamp(); + + # Refresh links of all pages including this page + # This will be in a separate transaction + if ( $this->mRecursive ) { + $this->queueRecursiveJobs(); + } + + wfProfileOut( __METHOD__ ); + } + + /** + * Queue recursive jobs for this page + * + * Which means do LinksUpdate on all pages that include the current page, + * using the job queue. + */ + function queueRecursiveJobs() { + self::queueRecursiveJobsForTable( $this->mTitle, 'templatelinks' ); + if ( $this->mTitle->getNamespace() == NS_FILE ) { + // Process imagelinks in case the title is or was a redirect + self::queueRecursiveJobsForTable( $this->mTitle, 'imagelinks' ); + } + } + + /** + * Queue a RefreshLinks job for any table. + * + * @param Title $title Title to do job for + * @param string $table Table to use (e.g. 'templatelinks') + */ + public static function queueRecursiveJobsForTable( Title $title, $table ) { + wfProfileIn( __METHOD__ ); + if ( $title->getBacklinkCache()->hasLinks( $table ) ) { + $job = new RefreshLinksJob( + $title, + array( + 'table' => $table, + 'recursive' => true, + ) + Job::newRootJobParams( // "overall" refresh links job info + "refreshlinks:{$table}:{$title->getPrefixedText()}" + ) + ); + JobQueueGroup::singleton()->push( $job ); + JobQueueGroup::singleton()->deduplicateRootJob( $job ); + } + wfProfileOut( __METHOD__ ); + } + + /** + * @param array $cats + */ + function invalidateCategories( $cats ) { + $this->invalidatePages( NS_CATEGORY, array_keys( $cats ) ); + } + + /** + * Update all the appropriate counts in the category table. + * @param array $added Associative array of category name => sort key + * @param array $deleted Associative array of category name => sort key + */ + function updateCategoryCounts( $added, $deleted ) { + $a = WikiPage::factory( $this->mTitle ); + $a->updateCategoryCounts( + array_keys( $added ), array_keys( $deleted ) + ); + } + + /** + * @param array $images + */ + function invalidateImageDescriptions( $images ) { + $this->invalidatePages( NS_FILE, array_keys( $images ) ); + } + + /** + * Update a table by doing a delete query then an insert query + * @param string $table Table name + * @param string $prefix Field name prefix + * @param array $deletions + * @param array $insertions Rows to insert + */ + function incrTableUpdate( $table, $prefix, $deletions, $insertions ) { + if ( $table == 'page_props' ) { + $fromField = 'pp_page'; + } else { + $fromField = "{$prefix}_from"; + } + $where = array( $fromField => $this->mId ); + if ( $table == 'pagelinks' || $table == 'templatelinks' || $table == 'iwlinks' ) { + if ( $table == 'iwlinks' ) { + $baseKey = 'iwl_prefix'; + } else { + $baseKey = "{$prefix}_namespace"; + } + $clause = $this->mDb->makeWhereFrom2d( $deletions, $baseKey, "{$prefix}_title" ); + if ( $clause ) { + $where[] = $clause; + } else { + $where = false; + } + } else { + if ( $table == 'langlinks' ) { + $toField = 'll_lang'; + } elseif ( $table == 'page_props' ) { + $toField = 'pp_propname'; + } else { + $toField = $prefix . '_to'; + } + if ( count( $deletions ) ) { + $where[$toField] = array_keys( $deletions ); + } else { + $where = false; + } + } + if ( $where ) { + $this->mDb->delete( $table, $where, __METHOD__ ); + } + if ( count( $insertions ) ) { + $this->mDb->insert( $table, $insertions, __METHOD__, 'IGNORE' ); + wfRunHooks( 'LinksUpdateAfterInsert', array( $this, $table, $insertions ) ); + } + } + + /** + * Get an array of pagelinks insertions for passing to the DB + * Skips the titles specified by the 2-D array $existing + * @param array $existing + * @return array + */ + private function getLinkInsertions( $existing = array() ) { + $arr = array(); + foreach ( $this->mLinks as $ns => $dbkeys ) { + $diffs = isset( $existing[$ns] ) + ? array_diff_key( $dbkeys, $existing[$ns] ) + : $dbkeys; + foreach ( $diffs as $dbk => $id ) { + $arr[] = array( + 'pl_from' => $this->mId, + 'pl_from_namespace' => $this->mTitle->getNamespace(), + 'pl_namespace' => $ns, + 'pl_title' => $dbk + ); + } + } + + return $arr; + } + + /** + * Get an array of template insertions. Like getLinkInsertions() + * @param array $existing + * @return array + */ + private function getTemplateInsertions( $existing = array() ) { + $arr = array(); + foreach ( $this->mTemplates as $ns => $dbkeys ) { + $diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys; + foreach ( $diffs as $dbk => $id ) { + $arr[] = array( + 'tl_from' => $this->mId, + 'tl_from_namespace' => $this->mTitle->getNamespace(), + 'tl_namespace' => $ns, + 'tl_title' => $dbk + ); + } + } + + return $arr; + } + + /** + * Get an array of image insertions + * Skips the names specified in $existing + * @param array $existing + * @return array + */ + private function getImageInsertions( $existing = array() ) { + $arr = array(); + $diffs = array_diff_key( $this->mImages, $existing ); + foreach ( $diffs as $iname => $dummy ) { + $arr[] = array( + 'il_from' => $this->mId, + 'il_from_namespace' => $this->mTitle->getNamespace(), + 'il_to' => $iname + ); + } + + return $arr; + } + + /** + * Get an array of externallinks insertions. Skips the names specified in $existing + * @param array $existing + * @return array + */ + private function getExternalInsertions( $existing = array() ) { + $arr = array(); + $diffs = array_diff_key( $this->mExternals, $existing ); + foreach ( $diffs as $url => $dummy ) { + foreach ( wfMakeUrlIndexes( $url ) as $index ) { + $arr[] = array( + 'el_id' => $this->mDb->nextSequenceValue( 'externallinks_el_id_seq' ), + 'el_from' => $this->mId, + 'el_to' => $url, + 'el_index' => $index, + ); + } + } + + return $arr; + } + + /** + * Get an array of category insertions + * + * @param array $existing Mapping existing category names to sort keys. If both + * match a link in $this, the link will be omitted from the output + * + * @return array + */ + private function getCategoryInsertions( $existing = array() ) { + global $wgContLang, $wgCategoryCollation; + $diffs = array_diff_assoc( $this->mCategories, $existing ); + $arr = array(); + foreach ( $diffs as $name => $prefix ) { + $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); + $wgContLang->findVariantLink( $name, $nt, true ); + + if ( $this->mTitle->getNamespace() == NS_CATEGORY ) { + $type = 'subcat'; + } elseif ( $this->mTitle->getNamespace() == NS_FILE ) { + $type = 'file'; + } else { + $type = 'page'; + } + + # Treat custom sortkeys as a prefix, so that if multiple + # things are forced to sort as '*' or something, they'll + # sort properly in the category rather than in page_id + # order or such. + $sortkey = Collation::singleton()->getSortKey( + $this->mTitle->getCategorySortkey( $prefix ) ); + + $arr[] = array( + 'cl_from' => $this->mId, + 'cl_to' => $name, + 'cl_sortkey' => $sortkey, + 'cl_timestamp' => $this->mDb->timestamp(), + 'cl_sortkey_prefix' => $prefix, + 'cl_collation' => $wgCategoryCollation, + 'cl_type' => $type, + ); + } + + return $arr; + } + + /** + * Get an array of interlanguage link insertions + * + * @param array $existing Mapping existing language codes to titles + * + * @return array + */ + private function getInterlangInsertions( $existing = array() ) { + $diffs = array_diff_assoc( $this->mInterlangs, $existing ); + $arr = array(); + foreach ( $diffs as $lang => $title ) { + $arr[] = array( + 'll_from' => $this->mId, + 'll_lang' => $lang, + 'll_title' => $title + ); + } + + return $arr; + } + + /** + * Get an array of page property insertions + * @param array $existing + * @return array + */ + function getPropertyInsertions( $existing = array() ) { + $diffs = array_diff_assoc( $this->mProperties, $existing ); + + $arr = array(); + foreach ( array_keys( $diffs ) as $name ) { + $arr[] = $this->getPagePropRowData( $name ); + } + + return $arr; + } + + /** + * Returns an associative array to be used for inserting a row into + * the page_props table. Besides the given property name, this will + * include the page id from $this->mId and any property value from + * $this->mProperties. + * + * The array returned will include the pp_sortkey field if this + * is present in the database (as indicated by $wgPagePropsHaveSortkey). + * The sortkey value is currently determined by getPropertySortKeyValue(). + * + * @note this assumes that $this->mProperties[$prop] is defined. + * + * @param string $prop The name of the property. + * + * @return array + */ + private function getPagePropRowData( $prop ) { + global $wgPagePropsHaveSortkey; + + $value = $this->mProperties[$prop]; + + $row = array( + 'pp_page' => $this->mId, + 'pp_propname' => $prop, + 'pp_value' => $value, + ); + + if ( $wgPagePropsHaveSortkey ) { + $row['pp_sortkey'] = $this->getPropertySortKeyValue( $value ); + } + + return $row; + } + + /** + * Determines the sort key for the given property value. + * This will return $value if it is a float or int, + * 1 or resp. 0 if it is a bool, and null otherwise. + * + * @note In the future, we may allow the sortkey to be specified explicitly + * in ParserOutput::setProperty. + * + * @param mixed $value + * + * @return float|null + */ + private function getPropertySortKeyValue( $value ) { + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + return floatval( $value ); + } + + return null; + } + + /** + * Get an array of interwiki insertions for passing to the DB + * Skips the titles specified by the 2-D array $existing + * @param array $existing + * @return array + */ + private function getInterwikiInsertions( $existing = array() ) { + $arr = array(); + foreach ( $this->mInterwikis as $prefix => $dbkeys ) { + $diffs = isset( $existing[$prefix] ) + ? array_diff_key( $dbkeys, $existing[$prefix] ) + : $dbkeys; + + foreach ( $diffs as $dbk => $id ) { + $arr[] = array( + 'iwl_from' => $this->mId, + 'iwl_prefix' => $prefix, + 'iwl_title' => $dbk + ); + } + } + + return $arr; + } + + /** + * Given an array of existing links, returns those links which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getLinkDeletions( $existing ) { + $del = array(); + foreach ( $existing as $ns => $dbkeys ) { + if ( isset( $this->mLinks[$ns] ) ) { + $del[$ns] = array_diff_key( $existing[$ns], $this->mLinks[$ns] ); + } else { + $del[$ns] = $existing[$ns]; + } + } + + return $del; + } + + /** + * Given an array of existing templates, returns those templates which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getTemplateDeletions( $existing ) { + $del = array(); + foreach ( $existing as $ns => $dbkeys ) { + if ( isset( $this->mTemplates[$ns] ) ) { + $del[$ns] = array_diff_key( $existing[$ns], $this->mTemplates[$ns] ); + } else { + $del[$ns] = $existing[$ns]; + } + } + + return $del; + } + + /** + * Given an array of existing images, returns those images which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getImageDeletions( $existing ) { + return array_diff_key( $existing, $this->mImages ); + } + + /** + * Given an array of existing external links, returns those links which are not + * in $this and thus should be deleted. + * @param array $existing + * @return array + */ + private function getExternalDeletions( $existing ) { + return array_diff_key( $existing, $this->mExternals ); + } + + /** + * Given an array of existing categories, returns those categories which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getCategoryDeletions( $existing ) { + return array_diff_assoc( $existing, $this->mCategories ); + } + + /** + * Given an array of existing interlanguage links, returns those links which are not + * in $this and thus should be deleted. + * @param array $existing + * @return array + */ + private function getInterlangDeletions( $existing ) { + return array_diff_assoc( $existing, $this->mInterlangs ); + } + + /** + * Get array of properties which should be deleted. + * @param array $existing + * @return array + */ + function getPropertyDeletions( $existing ) { + return array_diff_assoc( $existing, $this->mProperties ); + } + + /** + * Given an array of existing interwiki links, returns those links which are not in $this + * and thus should be deleted. + * @param array $existing + * @return array + */ + private function getInterwikiDeletions( $existing ) { + $del = array(); + foreach ( $existing as $prefix => $dbkeys ) { + if ( isset( $this->mInterwikis[$prefix] ) ) { + $del[$prefix] = array_diff_key( $existing[$prefix], $this->mInterwikis[$prefix] ); + } else { + $del[$prefix] = $existing[$prefix]; + } + } + + return $del; + } + + /** + * Get an array of existing links, as a 2-D array + * + * @return array + */ + private function getExistingLinks() { + $res = $this->mDb->select( 'pagelinks', array( 'pl_namespace', 'pl_title' ), + array( 'pl_from' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + if ( !isset( $arr[$row->pl_namespace] ) ) { + $arr[$row->pl_namespace] = array(); + } + $arr[$row->pl_namespace][$row->pl_title] = 1; + } + + return $arr; + } + + /** + * Get an array of existing templates, as a 2-D array + * + * @return array + */ + private function getExistingTemplates() { + $res = $this->mDb->select( 'templatelinks', array( 'tl_namespace', 'tl_title' ), + array( 'tl_from' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + if ( !isset( $arr[$row->tl_namespace] ) ) { + $arr[$row->tl_namespace] = array(); + } + $arr[$row->tl_namespace][$row->tl_title] = 1; + } + + return $arr; + } + + /** + * Get an array of existing images, image names in the keys + * + * @return array + */ + private function getExistingImages() { + $res = $this->mDb->select( 'imagelinks', array( 'il_to' ), + array( 'il_from' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + $arr[$row->il_to] = 1; + } + + return $arr; + } + + /** + * Get an array of existing external links, URLs in the keys + * + * @return array + */ + private function getExistingExternals() { + $res = $this->mDb->select( 'externallinks', array( 'el_to' ), + array( 'el_from' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + $arr[$row->el_to] = 1; + } + + return $arr; + } + + /** + * Get an array of existing categories, with the name in the key and sort key in the value. + * + * @return array + */ + private function getExistingCategories() { + $res = $this->mDb->select( 'categorylinks', array( 'cl_to', 'cl_sortkey_prefix' ), + array( 'cl_from' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + $arr[$row->cl_to] = $row->cl_sortkey_prefix; + } + + return $arr; + } + + /** + * Get an array of existing interlanguage links, with the language code in the key and the + * title in the value. + * + * @return array + */ + private function getExistingInterlangs() { + $res = $this->mDb->select( 'langlinks', array( 'll_lang', 'll_title' ), + array( 'll_from' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + $arr[$row->ll_lang] = $row->ll_title; + } + + return $arr; + } + + /** + * Get an array of existing inline interwiki links, as a 2-D array + * @return array (prefix => array(dbkey => 1)) + */ + protected function getExistingInterwikis() { + $res = $this->mDb->select( 'iwlinks', array( 'iwl_prefix', 'iwl_title' ), + array( 'iwl_from' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + if ( !isset( $arr[$row->iwl_prefix] ) ) { + $arr[$row->iwl_prefix] = array(); + } + $arr[$row->iwl_prefix][$row->iwl_title] = 1; + } + + return $arr; + } + + /** + * Get an array of existing categories, with the name in the key and sort key in the value. + * + * @return array Array of property names and values + */ + private function getExistingProperties() { + $res = $this->mDb->select( 'page_props', array( 'pp_propname', 'pp_value' ), + array( 'pp_page' => $this->mId ), __METHOD__, $this->mOptions ); + $arr = array(); + foreach ( $res as $row ) { + $arr[$row->pp_propname] = $row->pp_value; + } + + return $arr; + } + + /** + * Return the title object of the page being updated + * @return Title + */ + public function getTitle() { + return $this->mTitle; + } + + /** + * Returns parser output + * @since 1.19 + * @return ParserOutput + */ + public function getParserOutput() { + return $this->mParserOutput; + } + + /** + * Return the list of images used as generated by the parser + * @return array + */ + public function getImages() { + return $this->mImages; + } + + /** + * Invalidate any necessary link lists related to page property changes + * @param array $changed + */ + private function invalidateProperties( $changed ) { + global $wgPagePropLinkInvalidations; + + foreach ( $changed as $name => $value ) { + if ( isset( $wgPagePropLinkInvalidations[$name] ) ) { + $inv = $wgPagePropLinkInvalidations[$name]; + if ( !is_array( $inv ) ) { + $inv = array( $inv ); + } + foreach ( $inv as $table ) { + $update = new HTMLCacheUpdate( $this->mTitle, $table ); + $update->doUpdate(); + } + } + } + } + + /** + * Fetch page links added by this LinksUpdate. Only available after the update is complete. + * @since 1.22 + * @return null|array Array of Titles + */ + public function getAddedLinks() { + if ( $this->linkInsertions === null ) { + return null; + } + $result = array(); + foreach ( $this->linkInsertions as $insertion ) { + $result[] = Title::makeTitle( $insertion['pl_namespace'], $insertion['pl_title'] ); + } + + return $result; + } + + /** + * Fetch page links removed by this LinksUpdate. Only available after the update is complete. + * @since 1.22 + * @return null|array Array of Titles + */ + public function getRemovedLinks() { + if ( $this->linkDeletions === null ) { + return null; + } + $result = array(); + foreach ( $this->linkDeletions as $ns => $titles ) { + foreach ( $titles as $title => $unused ) { + $result[] = Title::makeTitle( $ns, $title ); + } + } + + return $result; + } + + /** + * Update links table freshness + */ + protected function updateLinksTimestamp() { + if ( $this->mId ) { + // The link updates made here only reflect the freshness of the parser output + $timestamp = $this->mParserOutput->getCacheTime(); + $this->mDb->update( 'page', + array( 'page_links_updated' => $this->mDb->timestamp( $timestamp ) ), + array( 'page_id' => $this->mId ), + __METHOD__ + ); + } + } +} + +/** + * Update object handling the cleanup of links tables after a page was deleted. + **/ +class LinksDeletionUpdate extends SqlDataUpdate { + /** @var WikiPage The WikiPage that was deleted */ + protected $mPage; + + /** + * Constructor + * + * @param WikiPage $page Page we are updating + * @throws MWException + */ + function __construct( WikiPage $page ) { + parent::__construct( false ); // no implicit transaction + + $this->mPage = $page; + + if ( !$page->exists() ) { + throw new MWException( "Page ID not known, perhaps the page doesn't exist?" ); + } + } + + /** + * Do some database updates after deletion + */ + public function doUpdate() { + $title = $this->mPage->getTitle(); + $id = $this->mPage->getId(); + + # Delete restrictions for it + $this->mDb->delete( 'page_restrictions', array( 'pr_page' => $id ), __METHOD__ ); + + # Fix category table counts + $cats = array(); + $res = $this->mDb->select( 'categorylinks', 'cl_to', array( 'cl_from' => $id ), __METHOD__ ); + + foreach ( $res as $row ) { + $cats[] = $row->cl_to; + } + + $this->mPage->updateCategoryCounts( array(), $cats ); + + # If using cascading deletes, we can skip some explicit deletes + if ( !$this->mDb->cascadingDeletes() ) { + # Delete outgoing links + $this->mDb->delete( 'pagelinks', array( 'pl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'imagelinks', array( 'il_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'categorylinks', array( 'cl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'templatelinks', array( 'tl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'externallinks', array( 'el_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'langlinks', array( 'll_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'iwlinks', array( 'iwl_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); + $this->mDb->delete( 'page_props', array( 'pp_page' => $id ), __METHOD__ ); + } + + # If using cleanup triggers, we can skip some manual deletes + if ( !$this->mDb->cleanupTriggers() ) { + # Clean up recentchanges entries... + $this->mDb->delete( 'recentchanges', + array( 'rc_type != ' . RC_LOG, + 'rc_namespace' => $title->getNamespace(), + 'rc_title' => $title->getDBkey() ), + __METHOD__ ); + $this->mDb->delete( 'recentchanges', + array( 'rc_type != ' . RC_LOG, 'rc_cur_id' => $id ), + __METHOD__ ); + } + } + + /** + * Update all the appropriate counts in the category table. + * @param array $added Associative array of category name => sort key + * @param array $deleted Associative array of category name => sort key + */ + function updateCategoryCounts( $added, $deleted ) { + $a = WikiPage::factory( $this->mTitle ); + $a->updateCategoryCounts( + array_keys( $added ), array_keys( $deleted ) + ); + } +} diff --git a/includes/deferred/SearchUpdate.php b/includes/deferred/SearchUpdate.php new file mode 100644 index 00000000..5d084afd --- /dev/null +++ b/includes/deferred/SearchUpdate.php @@ -0,0 +1,209 @@ +<?php +/** + * Search index updater + * + * See deferred.txt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Search + */ + +/** + * Database independant search index updater + * + * @ingroup Search + */ +class SearchUpdate implements DeferrableUpdate { + /** @var int Page id being updated */ + private $id = 0; + + /** @var Title Title we're updating */ + private $title; + + /** @var Content|bool Content of the page (not text) */ + private $content; + + /** + * Constructor + * + * @param int $id Page id to update + * @param Title|string $title Title of page to update + * @param Content|string|bool $c Content of the page to update. Default: false. + * If a Content object, text will be gotten from it. String is for back-compat. + * Passing false tells the backend to just update the title, not the content + */ + public function __construct( $id, $title, $c = false ) { + if ( is_string( $title ) ) { + $nt = Title::newFromText( $title ); + } else { + $nt = $title; + } + + if ( $nt ) { + $this->id = $id; + // is_string() check is back-compat for ApprovedRevs + if ( is_string( $c ) ) { + $this->content = new TextContent( $c ); + } else { + $this->content = $c ?: false; + } + $this->title = $nt; + } else { + wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); + } + } + + /** + * Perform actual update for the entry + */ + public function doUpdate() { + global $wgDisableSearchUpdate; + + if ( $wgDisableSearchUpdate || !$this->id ) { + return; + } + + wfProfileIn( __METHOD__ ); + + $page = WikiPage::newFromId( $this->id, WikiPage::READ_LATEST ); + + foreach ( SearchEngine::getSearchTypes() as $type ) { + $search = SearchEngine::create( $type ); + $indexTitle = $this->indexTitle( $search ); + if ( !$search->supports( 'search-update' ) ) { + continue; + } + + $normalTitle = $search->normalizeText( $indexTitle ); + + if ( $page === null ) { + $search->delete( $this->id, $normalTitle ); + continue; + } elseif ( $this->content === false ) { + $search->updateTitle( $this->id, $normalTitle ); + continue; + } + + $text = $search->getTextFromContent( $this->title, $this->content ); + if ( !$search->textAlreadyUpdatedForIndex() ) { + $text = self::updateText( $text ); + } + + # Perform the actual update + $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) ); + } + + wfProfileOut( __METHOD__ ); + } + + /** + * Clean text for indexing. Only really suitable for indexing in databases. + * If you're using a real search engine, you'll probably want to override + * this behavior and do something nicer with the original wikitext. + * @param string $text + * @return string + */ + public static function updateText( $text ) { + global $wgContLang; + + # Language-specific strip/conversion + $text = $wgContLang->normalizeForSearch( $text ); + $lc = SearchEngine::legalSearchChars() . '&#;'; + + wfProfileIn( __METHOD__ . '-regexps' ); + $text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/", + ' ', $wgContLang->lc( " " . $text . " " ) ); # Strip HTML markup + $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD", + "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings + + # Strip external URLs + $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF"; + $protos = "http|https|ftp|mailto|news|gopher"; + $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/"; + $text = preg_replace( $pat, "\\1 \\3", $text ); + + $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/"; + $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/"; + $text = preg_replace( $p1, "\\1 ", $text ); + $text = preg_replace( $p2, "\\1 \\3 ", $text ); + + # Internal image links + $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i"; + $text = preg_replace( $pat2, " \\1 \\3", $text ); + + $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/", + "\\1\\2 \\2\\3", $text ); # Handle [[game]]s + + # Strip all remaining non-search characters + $text = preg_replace( "/[^{$lc}]+/", " ", $text ); + + # Handle 's, s' + # + # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text ); + # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text ); + # + # These tail-anchored regexps are insanely slow. The worst case comes + # when Japanese or Chinese text (ie, no word spacing) is written on + # a wiki configured for Western UTF-8 mode. The Unicode characters are + # expanded to hex codes and the "words" are very long paragraph-length + # monstrosities. On a large page the above regexps may take over 20 + # seconds *each* on a 1GHz-level processor. + # + # Following are reversed versions which are consistently fast + # (about 3 milliseconds on 1GHz-level processor). + # + $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) ); + $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) ); + + # Strip wiki '' and ''' + $text = preg_replace( "/''[']*/", " ", $text ); + wfProfileOut( __METHOD__ . '-regexps' ); + + return $text; + } + + /** + * Get a string representation of a title suitable for + * including in a search index + * + * @param SearchEngine $search + * @return string A stripped-down title string ready for the search index + */ + private function indexTitle( SearchEngine $search ) { + global $wgContLang; + + $ns = $this->title->getNamespace(); + $title = $this->title->getText(); + + $lc = $search->legalSearchChars() . '&#;'; + $t = $wgContLang->normalizeForSearch( $title ); + $t = preg_replace( "/[^{$lc}]+/", ' ', $t ); + $t = $wgContLang->lc( $t ); + + # Handle 's, s' + $t = preg_replace( "/([{$lc}]+)'s( |$)/", "\\1 \\1's ", $t ); + $t = preg_replace( "/([{$lc}]+)s'( |$)/", "\\1s ", $t ); + + $t = preg_replace( "/\\s+/", ' ', $t ); + + if ( $ns == NS_FILE ) { + $t = preg_replace( "/ (png|gif|jpg|jpeg|ogg)$/", "", $t ); + } + return trim( $t ); + } +} diff --git a/includes/deferred/SiteStatsUpdate.php b/includes/deferred/SiteStatsUpdate.php new file mode 100644 index 00000000..7bfafee8 --- /dev/null +++ b/includes/deferred/SiteStatsUpdate.php @@ -0,0 +1,254 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Class for handling updates to the site_stats table + */ +class SiteStatsUpdate implements DeferrableUpdate { + /** @var int */ + protected $views = 0; + + /** @var int */ + protected $edits = 0; + + /** @var int */ + protected $pages = 0; + + /** @var int */ + protected $articles = 0; + + /** @var int */ + protected $users = 0; + + /** @var int */ + protected $images = 0; + + // @todo deprecate this constructor + function __construct( $views, $edits, $good, $pages = 0, $users = 0 ) { + $this->views = $views; + $this->edits = $edits; + $this->articles = $good; + $this->pages = $pages; + $this->users = $users; + } + + /** + * @param array $deltas + * @return SiteStatsUpdate + */ + public static function factory( array $deltas ) { + $update = new self( 0, 0, 0 ); + + $fields = array( 'views', 'edits', 'pages', 'articles', 'users', 'images' ); + foreach ( $fields as $field ) { + if ( isset( $deltas[$field] ) && $deltas[$field] ) { + $update->$field = $deltas[$field]; + } + } + + return $update; + } + + public function doUpdate() { + global $wgSiteStatsAsyncFactor; + + $rate = $wgSiteStatsAsyncFactor; // convenience + // If set to do so, only do actual DB updates 1 every $rate times. + // The other times, just update "pending delta" values in memcached. + if ( $rate && ( $rate < 0 || mt_rand( 0, $rate - 1 ) != 0 ) ) { + $this->doUpdatePendingDeltas(); + } else { + // Need a separate transaction because this a global lock + wfGetDB( DB_MASTER )->onTransactionIdle( array( $this, 'tryDBUpdateInternal' ) ); + } + } + + /** + * Do not call this outside of SiteStatsUpdate + */ + public function tryDBUpdateInternal() { + global $wgSiteStatsAsyncFactor; + + $dbw = wfGetDB( DB_MASTER ); + $lockKey = wfMemcKey( 'site_stats' ); // prepend wiki ID + $pd = array(); + if ( $wgSiteStatsAsyncFactor ) { + // Lock the table so we don't have double DB/memcached updates + if ( !$dbw->lockIsFree( $lockKey, __METHOD__ ) + || !$dbw->lock( $lockKey, __METHOD__, 1 ) // 1 sec timeout + ) { + $this->doUpdatePendingDeltas(); + + return; + } + $pd = $this->getPendingDeltas(); + // Piggy-back the async deltas onto those of this stats update.... + $this->views += ( $pd['ss_total_views']['+'] - $pd['ss_total_views']['-'] ); + $this->edits += ( $pd['ss_total_edits']['+'] - $pd['ss_total_edits']['-'] ); + $this->articles += ( $pd['ss_good_articles']['+'] - $pd['ss_good_articles']['-'] ); + $this->pages += ( $pd['ss_total_pages']['+'] - $pd['ss_total_pages']['-'] ); + $this->users += ( $pd['ss_users']['+'] - $pd['ss_users']['-'] ); + $this->images += ( $pd['ss_images']['+'] - $pd['ss_images']['-'] ); + } + + // Build up an SQL query of deltas and apply them... + $updates = ''; + $this->appendUpdate( $updates, 'ss_total_views', $this->views ); + $this->appendUpdate( $updates, 'ss_total_edits', $this->edits ); + $this->appendUpdate( $updates, 'ss_good_articles', $this->articles ); + $this->appendUpdate( $updates, 'ss_total_pages', $this->pages ); + $this->appendUpdate( $updates, 'ss_users', $this->users ); + $this->appendUpdate( $updates, 'ss_images', $this->images ); + if ( $updates != '' ) { + $dbw->update( 'site_stats', array( $updates ), array(), __METHOD__ ); + } + + if ( $wgSiteStatsAsyncFactor ) { + // Decrement the async deltas now that we applied them + $this->removePendingDeltas( $pd ); + // Commit the updates and unlock the table + $dbw->unlock( $lockKey, __METHOD__ ); + } + } + + /** + * @param DatabaseBase $dbw + * @return bool|mixed + */ + public static function cacheUpdate( $dbw ) { + global $wgActiveUserDays; + $dbr = wfGetDB( DB_SLAVE, array( 'SpecialStatistics', 'vslow' ) ); + # Get non-bot users than did some recent action other than making accounts. + # If account creation is included, the number gets inflated ~20+ fold on enwiki. + $activeUsers = $dbr->selectField( + 'recentchanges', + 'COUNT( DISTINCT rc_user_text )', + array( + 'rc_user != 0', + 'rc_bot' => 0, + 'rc_log_type != ' . $dbr->addQuotes( 'newusers' ) . ' OR rc_log_type IS NULL', + 'rc_timestamp >= ' . $dbr->addQuotes( $dbr->timestamp( wfTimestamp( TS_UNIX ) + - $wgActiveUserDays * 24 * 3600 ) ), + ), + __METHOD__ + ); + $dbw->update( + 'site_stats', + array( 'ss_active_users' => intval( $activeUsers ) ), + array( 'ss_row_id' => 1 ), + __METHOD__ + ); + + return $activeUsers; + } + + protected function doUpdatePendingDeltas() { + $this->adjustPending( 'ss_total_views', $this->views ); + $this->adjustPending( 'ss_total_edits', $this->edits ); + $this->adjustPending( 'ss_good_articles', $this->articles ); + $this->adjustPending( 'ss_total_pages', $this->pages ); + $this->adjustPending( 'ss_users', $this->users ); + $this->adjustPending( 'ss_images', $this->images ); + } + + /** + * @param string $sql + * @param string $field + * @param int $delta + */ + protected function appendUpdate( &$sql, $field, $delta ) { + if ( $delta ) { + if ( $sql ) { + $sql .= ','; + } + if ( $delta < 0 ) { + $sql .= "$field=$field-" . abs( $delta ); + } else { + $sql .= "$field=$field+" . abs( $delta ); + } + } + } + + /** + * @param string $type + * @param string $sign ('+' or '-') + * @return string + */ + private function getTypeCacheKey( $type, $sign ) { + return wfMemcKey( 'sitestatsupdate', 'pendingdelta', $type, $sign ); + } + + /** + * Adjust the pending deltas for a stat type. + * Each stat type has two pending counters, one for increments and decrements + * @param string $type + * @param int $delta Delta (positive or negative) + */ + protected function adjustPending( $type, $delta ) { + global $wgMemc; + + if ( $delta < 0 ) { // decrement + $key = $this->getTypeCacheKey( $type, '-' ); + } else { // increment + $key = $this->getTypeCacheKey( $type, '+' ); + } + + $magnitude = abs( $delta ); + if ( !$wgMemc->incr( $key, $magnitude ) ) { // not there? + if ( !$wgMemc->add( $key, $magnitude ) ) { // race? + $wgMemc->incr( $key, $magnitude ); + } + } + } + + /** + * Get pending delta counters for each stat type + * @return array Positive and negative deltas for each type + */ + protected function getPendingDeltas() { + global $wgMemc; + + $pending = array(); + foreach ( array( 'ss_total_views', 'ss_total_edits', + 'ss_good_articles', 'ss_total_pages', 'ss_users', 'ss_images' ) as $type + ) { + // Get pending increments and pending decrements + $pending[$type]['+'] = (int)$wgMemc->get( $this->getTypeCacheKey( $type, '+' ) ); + $pending[$type]['-'] = (int)$wgMemc->get( $this->getTypeCacheKey( $type, '-' ) ); + } + + return $pending; + } + + /** + * Reduce pending delta counters after updates have been applied + * @param array $pd Result of getPendingDeltas(), used for DB update + */ + protected function removePendingDeltas( array $pd ) { + global $wgMemc; + + foreach ( $pd as $type => $deltas ) { + foreach ( $deltas as $sign => $magnitude ) { + // Lower the pending counter now that we applied these changes + $wgMemc->decr( $this->getTypeCacheKey( $type, $sign ), $magnitude ); + } + } + } +} diff --git a/includes/deferred/SqlDataUpdate.php b/includes/deferred/SqlDataUpdate.php new file mode 100644 index 00000000..9c58503f --- /dev/null +++ b/includes/deferred/SqlDataUpdate.php @@ -0,0 +1,159 @@ +<?php +/** + * Base code for update jobs that put some secondary data extracted + * from article content into the database. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Abstract base class for update jobs that put some secondary data extracted + * from article content into the database. + * + * @note subclasses should NOT start or commit transactions in their doUpdate() method, + * a transaction will automatically be wrapped around the update. Starting another + * one would break the outer transaction bracket. If need be, subclasses can override + * the beginTransaction() and commitTransaction() methods. + */ +abstract class SqlDataUpdate extends DataUpdate { + /** @var DatabaseBase Database connection reference */ + protected $mDb; + + /** @var array SELECT options to be used (array) */ + protected $mOptions; + + /** @var bool Whether a transaction is open on this object (internal use only!) */ + private $mHasTransaction; + + /** @var bool Whether this update should be wrapped in a transaction */ + protected $mUseTransaction; + + /** + * Constructor + * + * @param bool $withTransaction Whether this update should be wrapped in a + * transaction (default: true). A transaction is only started if no + * transaction is already in progress, see beginTransaction() for details. + */ + public function __construct( $withTransaction = true ) { + global $wgAntiLockFlags; + + parent::__construct(); + + if ( $wgAntiLockFlags & ALF_NO_LINK_LOCK ) { + $this->mOptions = array(); + } else { + $this->mOptions = array( 'FOR UPDATE' ); + } + + // @todo Get connection only when it's needed? Make sure that doesn't + // break anything, especially transactions! + $this->mDb = wfGetDB( DB_MASTER ); + + $this->mWithTransaction = $withTransaction; + $this->mHasTransaction = false; + } + + /** + * Begin a database transaction, if $withTransaction was given as true in + * the constructor for this SqlDataUpdate. + * + * Because nested transactions are not supported by the Database class, + * this implementation checks Database::trxLevel() and only opens a + * transaction if none is already active. + */ + public function beginTransaction() { + if ( !$this->mWithTransaction ) { + return; + } + + // NOTE: nested transactions are not supported, only start a transaction if none is open + if ( $this->mDb->trxLevel() === 0 ) { + $this->mDb->begin( get_class( $this ) . '::beginTransaction' ); + $this->mHasTransaction = true; + } + } + + /** + * Commit the database transaction started via beginTransaction (if any). + */ + public function commitTransaction() { + if ( $this->mHasTransaction ) { + $this->mDb->commit( get_class( $this ) . '::commitTransaction' ); + $this->mHasTransaction = false; + } + } + + /** + * Abort the database transaction started via beginTransaction (if any). + */ + public function abortTransaction() { + if ( $this->mHasTransaction ) { //XXX: actually... maybe always? + $this->mDb->rollback( get_class( $this ) . '::abortTransaction' ); + $this->mHasTransaction = false; + } + } + + /** + * Invalidate the cache of a list of pages from a single namespace. + * This is intended for use by subclasses. + * + * @param int $namespace Namespace number + * @param array $dbkeys + */ + protected function invalidatePages( $namespace, array $dbkeys ) { + if ( $dbkeys === array() ) { + return; + } + + /** + * Determine which pages need to be updated + * This is necessary to prevent the job queue from smashing the DB with + * large numbers of concurrent invalidations of the same page + */ + $now = $this->mDb->timestamp(); + $ids = array(); + $res = $this->mDb->select( 'page', array( 'page_id' ), + array( + 'page_namespace' => $namespace, + 'page_title' => $dbkeys, + 'page_touched < ' . $this->mDb->addQuotes( $now ) + ), __METHOD__ + ); + + foreach ( $res as $row ) { + $ids[] = $row->page_id; + } + + if ( $ids === array() ) { + return; + } + + /** + * Do the update + * We still need the page_touched condition, in case the row has changed since + * the non-locking select above. + */ + $this->mDb->update( 'page', array( 'page_touched' => $now ), + array( + 'page_id' => $ids, + 'page_touched < ' . $this->mDb->addQuotes( $now ) + ), __METHOD__ + ); + } +} diff --git a/includes/deferred/SquidUpdate.php b/includes/deferred/SquidUpdate.php new file mode 100644 index 00000000..0dcff44a --- /dev/null +++ b/includes/deferred/SquidUpdate.php @@ -0,0 +1,311 @@ +<?php +/** + * Squid cache purging. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * Handles purging appropriate Squid URLs given a title (or titles) + * @ingroup Cache + */ +class SquidUpdate { + /** + * Collection of URLs to purge. + * @var array + */ + protected $urlArr; + + /** + * @param array $urlArr Collection of URLs to purge + * @param bool|int $maxTitles Maximum number of unique URLs to purge + */ + public function __construct( $urlArr = array(), $maxTitles = false ) { + global $wgMaxSquidPurgeTitles; + if ( $maxTitles === false ) { + $maxTitles = $wgMaxSquidPurgeTitles; + } + + // Remove duplicate URLs from list + $urlArr = array_unique( $urlArr ); + if ( count( $urlArr ) > $maxTitles ) { + // Truncate to desired maximum URL count + $urlArr = array_slice( $urlArr, 0, $maxTitles ); + } + $this->urlArr = $urlArr; + } + + /** + * Create a SquidUpdate from the given Title object. + * + * The resulting SquidUpdate will purge the given Title's URLs as well as + * the pages that link to it. Capped at $wgMaxSquidPurgeTitles total URLs. + * + * @param Title $title + * @return SquidUpdate + */ + public static function newFromLinksTo( Title $title ) { + global $wgMaxSquidPurgeTitles; + wfProfileIn( __METHOD__ ); + + # Get a list of URLs linking to this page + $dbr = wfGetDB( DB_SLAVE ); + $res = $dbr->select( array( 'links', 'page' ), + array( 'page_namespace', 'page_title' ), + array( + 'pl_namespace' => $title->getNamespace(), + 'pl_title' => $title->getDBkey(), + 'pl_from=page_id' ), + __METHOD__ ); + $blurlArr = $title->getSquidURLs(); + if ( $res->numRows() <= $wgMaxSquidPurgeTitles ) { + foreach ( $res as $BL ) { + $tobj = Title::makeTitle( $BL->page_namespace, $BL->page_title ); + $blurlArr[] = $tobj->getInternalURL(); + } + } + + wfProfileOut( __METHOD__ ); + + return new SquidUpdate( $blurlArr ); + } + + /** + * Create a SquidUpdate from an array of Title objects, or a TitleArray object + * + * @param array $titles + * @param array $urlArr + * @return SquidUpdate + */ + public static function newFromTitles( $titles, $urlArr = array() ) { + global $wgMaxSquidPurgeTitles; + $i = 0; + /** @var Title $title */ + foreach ( $titles as $title ) { + $urlArr[] = $title->getInternalURL(); + if ( $i++ > $wgMaxSquidPurgeTitles ) { + break; + } + } + + return new SquidUpdate( $urlArr ); + } + + /** + * @param Title $title + * @return SquidUpdate + */ + public static function newSimplePurge( Title $title ) { + $urlArr = $title->getSquidURLs(); + + return new SquidUpdate( $urlArr ); + } + + /** + * Purges the list of URLs passed to the constructor. + */ + public function doUpdate() { + self::purge( $this->urlArr ); + } + + /** + * Purges a list of Squids defined in $wgSquidServers. + * $urlArr should contain the full URLs to purge as values + * (example: $urlArr[] = 'http://my.host/something') + * XXX report broken Squids per mail or log + * + * @param array $urlArr List of full URLs to purge + */ + public static function purge( $urlArr ) { + global $wgSquidServers, $wgHTCPRouting; + + if ( !$urlArr ) { + return; + } + + wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urlArr ) ); + + if ( $wgHTCPRouting ) { + self::HTCPPurge( $urlArr ); + } + + wfProfileIn( __METHOD__ ); + + // Remove duplicate URLs + $urlArr = array_unique( $urlArr ); + // Maximum number of parallel connections per squid + $maxSocketsPerSquid = 8; + // Number of requests to send per socket + // 400 seems to be a good tradeoff, opening a socket takes a while + $urlsPerSocket = 400; + $socketsPerSquid = ceil( count( $urlArr ) / $urlsPerSocket ); + if ( $socketsPerSquid > $maxSocketsPerSquid ) { + $socketsPerSquid = $maxSocketsPerSquid; + } + + $pool = new SquidPurgeClientPool; + $chunks = array_chunk( $urlArr, ceil( count( $urlArr ) / $socketsPerSquid ) ); + foreach ( $wgSquidServers as $server ) { + foreach ( $chunks as $chunk ) { + $client = new SquidPurgeClient( $server ); + foreach ( $chunk as $url ) { + $client->queuePurge( $url ); + } + $pool->addClient( $client ); + } + } + $pool->run(); + + wfProfileOut( __METHOD__ ); + } + + /** + * Send Hyper Text Caching Protocol (HTCP) CLR requests. + * + * @throws MWException + * @param array $urlArr Collection of URLs to purge + */ + public static function HTCPPurge( $urlArr ) { + global $wgHTCPRouting, $wgHTCPMulticastTTL; + wfProfileIn( __METHOD__ ); + + // HTCP CLR operation + $htcpOpCLR = 4; + + // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h) + if ( !defined( "IPPROTO_IP" ) ) { + define( "IPPROTO_IP", 0 ); + define( "IP_MULTICAST_LOOP", 34 ); + define( "IP_MULTICAST_TTL", 33 ); + } + + // pfsockopen doesn't work because we need set_sock_opt + $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP ); + if ( !$conn ) { + $errstr = socket_strerror( socket_last_error() ); + wfDebugLog( 'squid', __METHOD__ . + ": Error opening UDP socket: $errstr" ); + wfProfileOut( __METHOD__ ); + + return; + } + + // Set socket options + socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 ); + if ( $wgHTCPMulticastTTL != 1 ) { + // Set multicast time to live (hop count) option on socket + socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL, + $wgHTCPMulticastTTL ); + } + + // Remove duplicate URLs from collection + $urlArr = array_unique( $urlArr ); + // Get sequential trx IDs for packet loss counting + $ids = UIDGenerator::newSequentialPerNodeIDs( + 'squidhtcppurge', 32, count( $urlArr ), UIDGenerator::QUICK_VOLATILE + ); + + foreach ( $urlArr as $url ) { + if ( !is_string( $url ) ) { + wfProfileOut( __METHOD__ ); + throw new MWException( 'Bad purge URL' ); + } + $url = self::expand( $url ); + $conf = self::getRuleForURL( $url, $wgHTCPRouting ); + if ( !$conf ) { + wfDebugLog( 'squid', __METHOD__ . + "No HTCP rule configured for URL {$url} , skipping" ); + continue; + } + + if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) { + // Normalize single entries + $conf = array( $conf ); + } + foreach ( $conf as $subconf ) { + if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) { + wfProfileOut( __METHOD__ ); + throw new MWException( "Invalid HTCP rule for URL $url\n" ); + } + } + + // Construct a minimal HTCP request diagram + // as per RFC 2756 + // Opcode 'CLR', no response desired, no auth + $htcpTransID = current( $ids ); + next( $ids ); + + $htcpSpecifier = pack( 'na4na*na8n', + 4, 'HEAD', strlen( $url ), $url, + 8, 'HTTP/1.0', 0 ); + + $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier ); + $htcpLen = 4 + $htcpDataLen + 2; + + // Note! Squid gets the bit order of the first + // word wrong, wrt the RFC. Apparently no other + // implementation exists, so adapt to Squid + $htcpPacket = pack( 'nxxnCxNxxa*n', + $htcpLen, $htcpDataLen, $htcpOpCLR, + $htcpTransID, $htcpSpecifier, 2 ); + + wfDebugLog( 'squid', __METHOD__ . + "Purging URL $url via HTCP" ); + foreach ( $conf as $subconf ) { + socket_sendto( $conn, $htcpPacket, $htcpLen, 0, + $subconf['host'], $subconf['port'] ); + } + } + wfProfileOut( __METHOD__ ); + } + + /** + * Expand local URLs to fully-qualified URLs using the internal protocol + * and host defined in $wgInternalServer. Input that's already fully- + * qualified will be passed through unchanged. + * + * This is used to generate purge URLs that may be either local to the + * main wiki or include a non-native host, such as images hosted on a + * second internal server. + * + * Client functions should not need to call this. + * + * @param string $url + * @return string + */ + public static function expand( $url ) { + return wfExpandUrl( $url, PROTO_INTERNAL ); + } + + /** + * Find the HTCP routing rule to use for a given URL. + * @param string $url URL to match + * @param array $rules Array of rules, see $wgHTCPRouting for format and behavior + * @return mixed Element of $rules that matched, or false if nothing matched + */ + private static function getRuleForURL( $url, $rules ) { + foreach ( $rules as $regex => $routing ) { + if ( $regex === '' || preg_match( $regex, $url ) ) { + return $routing; + } + } + + return false; + } +} diff --git a/includes/deferred/ViewCountUpdate.php b/includes/deferred/ViewCountUpdate.php new file mode 100644 index 00000000..8282295b --- /dev/null +++ b/includes/deferred/ViewCountUpdate.php @@ -0,0 +1,119 @@ +<?php +/** + * Update for the 'page_counter' field + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Update for the 'page_counter' field, when $wgDisableCounters is false. + * + * Depending on $wgHitcounterUpdateFreq, this will directly increment the + * 'page_counter' field or use the 'hitcounter' table and then collect the data + * from that table to update the 'page_counter' field in a batch operation. + */ +class ViewCountUpdate implements DeferrableUpdate { + /** @var int Page ID to increment the view count */ + protected $id; + + /** + * Constructor + * + * @param int $id Page ID to increment the view count + */ + public function __construct( $id ) { + $this->id = intval( $id ); + } + + /** + * Run the update + */ + public function doUpdate() { + global $wgHitcounterUpdateFreq; + + $dbw = wfGetDB( DB_MASTER ); + + if ( $wgHitcounterUpdateFreq <= 1 || $dbw->getType() == 'sqlite' ) { + $id = $this->id; + $method = __METHOD__; + $dbw->onTransactionIdle( function () use ( $dbw, $id, $method ) { + try { + $dbw->update( 'page', + array( 'page_counter = page_counter + 1' ), + array( 'page_id' => $id ), + $method + ); + } catch ( DBError $e ) { + MWExceptionHandler::logException( $e ); + } + } ); + return; + } + + # Not important enough to warrant an error page in case of failure + try { + // Since `hitcounter` is non-transactional, the contention is minimal + $dbw->insert( 'hitcounter', array( 'hc_id' => $this->id ), __METHOD__ ); + $checkfreq = intval( $wgHitcounterUpdateFreq / 25 + 1 ); + if ( rand() % $checkfreq == 0 && $dbw->lastErrno() == 0 ) { + $this->collect(); + } + } catch ( DBError $e ) { + MWExceptionHandler::logException( $e ); + } + } + + protected function collect() { + global $wgHitcounterUpdateFreq; + + $dbw = wfGetDB( DB_MASTER ); + + $rown = $dbw->selectField( 'hitcounter', 'COUNT(*)', array(), __METHOD__ ); + if ( $rown < $wgHitcounterUpdateFreq ) { + return; + } + + wfProfileIn( __METHOD__ . '-collect' ); + $old_user_abort = ignore_user_abort( true ); + + $dbType = $dbw->getType(); + $tabletype = $dbType == 'mysql' ? "ENGINE=HEAP " : ''; + $hitcounterTable = $dbw->tableName( 'hitcounter' ); + $acchitsTable = $dbw->tableName( 'acchits' ); + $pageTable = $dbw->tableName( 'page' ); + + $dbw->lockTables( array(), array( 'hitcounter' ), __METHOD__, false ); + $dbw->query( "CREATE TEMPORARY TABLE $acchitsTable $tabletype AS " . + "SELECT hc_id,COUNT(*) AS hc_n FROM $hitcounterTable " . + 'GROUP BY hc_id', __METHOD__ ); + $dbw->delete( 'hitcounter', '*', __METHOD__ ); + $dbw->unlockTables( __METHOD__ ); + + if ( $dbType == 'mysql' ) { + $dbw->query( "UPDATE $pageTable,$acchitsTable SET page_counter=page_counter + hc_n " . + 'WHERE page_id = hc_id', __METHOD__ ); + } else { + $dbw->query( "UPDATE $pageTable SET page_counter=page_counter + hc_n " . + "FROM $acchitsTable WHERE page_id = hc_id", __METHOD__ ); + } + $dbw->query( "DROP TABLE $acchitsTable", __METHOD__ ); + + ignore_user_abort( $old_user_abort ); + wfProfileOut( __METHOD__ . '-collect' ); + } +} |