From c1f9b1f7b1b77776192048005dcc66dcf3df2bfb Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Sat, 27 Dec 2014 15:41:37 +0100 Subject: Update to MediaWiki 1.24.1 --- includes/jobqueue/jobs/AssembleUploadChunksJob.php | 136 +++++++++++ includes/jobqueue/jobs/DoubleRedirectJob.php | 250 +++++++++++++++++++++ includes/jobqueue/jobs/DuplicateJob.php | 59 +++++ includes/jobqueue/jobs/EmaillingJob.php | 46 ++++ includes/jobqueue/jobs/EnotifNotifyJob.php | 57 +++++ includes/jobqueue/jobs/HTMLCacheUpdateJob.php | 162 +++++++++++++ includes/jobqueue/jobs/NullJob.php | 76 +++++++ includes/jobqueue/jobs/PublishStashedFileJob.php | 150 +++++++++++++ includes/jobqueue/jobs/RefreshLinksJob.php | 199 ++++++++++++++++ includes/jobqueue/jobs/RefreshLinksJob2.php | 141 ++++++++++++ includes/jobqueue/jobs/UploadFromUrlJob.php | 187 +++++++++++++++ 11 files changed, 1463 insertions(+) create mode 100644 includes/jobqueue/jobs/AssembleUploadChunksJob.php create mode 100644 includes/jobqueue/jobs/DoubleRedirectJob.php create mode 100644 includes/jobqueue/jobs/DuplicateJob.php create mode 100644 includes/jobqueue/jobs/EmaillingJob.php create mode 100644 includes/jobqueue/jobs/EnotifNotifyJob.php create mode 100644 includes/jobqueue/jobs/HTMLCacheUpdateJob.php create mode 100644 includes/jobqueue/jobs/NullJob.php create mode 100644 includes/jobqueue/jobs/PublishStashedFileJob.php create mode 100644 includes/jobqueue/jobs/RefreshLinksJob.php create mode 100644 includes/jobqueue/jobs/RefreshLinksJob2.php create mode 100644 includes/jobqueue/jobs/UploadFromUrlJob.php (limited to 'includes/jobqueue/jobs') diff --git a/includes/jobqueue/jobs/AssembleUploadChunksJob.php b/includes/jobqueue/jobs/AssembleUploadChunksJob.php new file mode 100644 index 00000000..9e9bda6f --- /dev/null +++ b/includes/jobqueue/jobs/AssembleUploadChunksJob.php @@ -0,0 +1,136 @@ +removeDuplicates = true; + } + + public function run() { + $scope = RequestContext::importScopedSession( $this->params['session'] ); + $context = RequestContext::getMain(); + try { + $user = $context->getUser(); + if ( !$user->isLoggedIn() ) { + $this->setLastError( "Could not load the author user from session." ); + + return false; + } + + if ( count( $_SESSION ) === 0 ) { + // Empty session probably indicates that we didn't associate + // with the session correctly. Note that being able to load + // the user does not necessarily mean the session was loaded. + // Most likely cause by suhosin.session.encrypt = On. + $this->setLastError( "Error associating with user session. " . + "Try setting suhosin.session.encrypt = Off" ); + + return false; + } + + UploadBase::setSessionStatus( + $this->params['filekey'], + array( 'result' => 'Poll', 'stage' => 'assembling', 'status' => Status::newGood() ) + ); + + $upload = new UploadFromChunks( $user ); + $upload->continueChunks( + $this->params['filename'], + $this->params['filekey'], + $context->getRequest() + ); + + // Combine all of the chunks into a local file and upload that to a new stash file + $status = $upload->concatenateChunks(); + if ( !$status->isGood() ) { + UploadBase::setSessionStatus( + $this->params['filekey'], + array( 'result' => 'Failure', 'stage' => 'assembling', 'status' => $status ) + ); + $this->setLastError( $status->getWikiText() ); + + return false; + } + + // We have a new filekey for the fully concatenated file + $newFileKey = $upload->getLocalFile()->getFileKey(); + + // Remove the old stash file row and first chunk file + $upload->stash->removeFileNoAuth( $this->params['filekey'] ); + + // Build the image info array while we have the local reference handy + $apiMain = new ApiMain(); // dummy object (XXX) + $imageInfo = $upload->getImageInfo( $apiMain->getResult() ); + + // Cleanup any temporary local file + $upload->cleanupTempFile(); + + // Cache the info so the user doesn't have to wait forever to get the final info + UploadBase::setSessionStatus( + $this->params['filekey'], + array( + 'result' => 'Success', + 'stage' => 'assembling', + 'filekey' => $newFileKey, + 'imageinfo' => $imageInfo, + 'status' => Status::newGood() + ) + ); + } catch ( MWException $e ) { + UploadBase::setSessionStatus( + $this->params['filekey'], + array( + 'result' => 'Failure', + 'stage' => 'assembling', + 'status' => Status::newFatal( 'api-error-stashfailed' ) + ) + ); + $this->setLastError( get_class( $e ) . ": " . $e->getText() ); + // To be extra robust. + MWExceptionHandler::rollbackMasterChangesAndLog( $e ); + + return false; + } + + return true; + } + + public function getDeduplicationInfo() { + $info = parent::getDeduplicationInfo(); + if ( is_array( $info['params'] ) ) { + $info['params'] = array( 'filekey' => $info['params']['filekey'] ); + } + + return $info; + } + + public function allowRetries() { + return false; + } +} diff --git a/includes/jobqueue/jobs/DoubleRedirectJob.php b/includes/jobqueue/jobs/DoubleRedirectJob.php new file mode 100644 index 00000000..2561f2f1 --- /dev/null +++ b/includes/jobqueue/jobs/DoubleRedirectJob.php @@ -0,0 +1,250 @@ +" + * @param Title $redirTitle The title which has changed, redirects + * pointing to this title are fixed + * @param bool $destTitle Not used + */ + public static function fixRedirects( $reason, $redirTitle, $destTitle = false ) { + # Need to use the master to get the redirect table updated in the same transaction + $dbw = wfGetDB( DB_MASTER ); + $res = $dbw->select( + array( 'redirect', 'page' ), + array( 'page_namespace', 'page_title' ), + array( + 'page_id = rd_from', + 'rd_namespace' => $redirTitle->getNamespace(), + 'rd_title' => $redirTitle->getDBkey() + ), __METHOD__ ); + if ( !$res->numRows() ) { + return; + } + $jobs = array(); + foreach ( $res as $row ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + if ( !$title ) { + continue; + } + + $jobs[] = new self( $title, array( + 'reason' => $reason, + 'redirTitle' => $redirTitle->getPrefixedDBkey() ) ); + # Avoid excessive memory usage + if ( count( $jobs ) > 10000 ) { + JobQueueGroup::singleton()->push( $jobs ); + $jobs = array(); + } + } + JobQueueGroup::singleton()->push( $jobs ); + } + + /** + * @param Title $title + * @param array|bool $params + */ + function __construct( $title, $params = false ) { + parent::__construct( 'fixDoubleRedirect', $title, $params ); + $this->reason = $params['reason']; + $this->redirTitle = Title::newFromText( $params['redirTitle'] ); + } + + /** + * @return bool + */ + function run() { + if ( !$this->redirTitle ) { + $this->setLastError( 'Invalid title' ); + + return false; + } + + $targetRev = Revision::newFromTitle( $this->title, false, Revision::READ_LATEST ); + if ( !$targetRev ) { + wfDebug( __METHOD__ . ": target redirect already deleted, ignoring\n" ); + + return true; + } + $content = $targetRev->getContent(); + $currentDest = $content ? $content->getRedirectTarget() : null; + if ( !$currentDest || !$currentDest->equals( $this->redirTitle ) ) { + wfDebug( __METHOD__ . ": Redirect has changed since the job was queued\n" ); + + return true; + } + + // Check for a suppression tag (used e.g. in periodically archived discussions) + $mw = MagicWord::get( 'staticredirect' ); + if ( $content->matchMagicWord( $mw ) ) { + wfDebug( __METHOD__ . ": skipping: suppressed with __STATICREDIRECT__\n" ); + + return true; + } + + // Find the current final destination + $newTitle = self::getFinalDestination( $this->redirTitle ); + if ( !$newTitle ) { + wfDebug( __METHOD__ . + ": skipping: single redirect, circular redirect or invalid redirect destination\n" ); + + return true; + } + if ( $newTitle->equals( $this->redirTitle ) ) { + // The redirect is already right, no need to change it + // This can happen if the page was moved back (say after vandalism) + wfDebug( __METHOD__ . " : skipping, already good\n" ); + } + + // Preserve fragment (bug 14904) + $newTitle = Title::makeTitle( $newTitle->getNamespace(), $newTitle->getDBkey(), + $currentDest->getFragment(), $newTitle->getInterwiki() ); + + // Fix the text + $newContent = $content->updateRedirect( $newTitle ); + + if ( $newContent->equals( $content ) ) { + $this->setLastError( 'Content unchanged???' ); + + return false; + } + + $user = $this->getUser(); + if ( !$user ) { + $this->setLastError( 'Invalid user' ); + + return false; + } + + // Save it + global $wgUser; + $oldUser = $wgUser; + $wgUser = $user; + $article = WikiPage::factory( $this->title ); + + // Messages: double-redirect-fixed-move, double-redirect-fixed-maintenance + $reason = wfMessage( 'double-redirect-fixed-' . $this->reason, + $this->redirTitle->getPrefixedText(), $newTitle->getPrefixedText() + )->inContentLanguage()->text(); + $article->doEditContent( $newContent, $reason, EDIT_UPDATE | EDIT_SUPPRESS_RC, false, $user ); + $wgUser = $oldUser; + + return true; + } + + /** + * Get the final destination of a redirect + * + * @param Title $title + * + * @return bool If the specified title is not a redirect, or if it is a circular redirect + */ + public static function getFinalDestination( $title ) { + $dbw = wfGetDB( DB_MASTER ); + + // Circular redirect check + $seenTitles = array(); + $dest = false; + + while ( true ) { + $titleText = $title->getPrefixedDBkey(); + if ( isset( $seenTitles[$titleText] ) ) { + wfDebug( __METHOD__, "Circular redirect detected, aborting\n" ); + + return false; + } + $seenTitles[$titleText] = true; + + if ( $title->isExternal() ) { + // If the target is interwiki, we have to break early (bug 40352). + // Otherwise it will look up a row in the local page table + // with the namespace/page of the interwiki target which can cause + // unexpected results (e.g. X -> foo:Bar -> Bar -> .. ) + break; + } + + $row = $dbw->selectRow( + array( 'redirect', 'page' ), + array( 'rd_namespace', 'rd_title', 'rd_interwiki' ), + array( + 'rd_from=page_id', + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey() + ), __METHOD__ ); + if ( !$row ) { + # No redirect from here, chain terminates + break; + } else { + $dest = $title = Title::makeTitle( + $row->rd_namespace, + $row->rd_title, + '', + $row->rd_interwiki + ); + } + } + + return $dest; + } + + /** + * Get a user object for doing edits, from a request-lifetime cache + * False will be returned if the user name specified in the + * 'double-redirect-fixer' message is invalid. + * + * @return User|bool + */ + function getUser() { + if ( !self::$user ) { + $username = wfMessage( 'double-redirect-fixer' )->inContentLanguage()->text(); + self::$user = User::newFromName( $username ); + # User::newFromName() can return false on a badly configured wiki. + if ( self::$user && !self::$user->isLoggedIn() ) { + self::$user->addToDatabase(); + } + } + + return self::$user; + } +} diff --git a/includes/jobqueue/jobs/DuplicateJob.php b/includes/jobqueue/jobs/DuplicateJob.php new file mode 100644 index 00000000..1fa6cefe --- /dev/null +++ b/includes/jobqueue/jobs/DuplicateJob.php @@ -0,0 +1,59 @@ +getTitle(), $job->getParams() ); + $djob->command = $job->getType(); + $djob->params = is_array( $djob->params ) ? $djob->params : array(); + $djob->params = array( 'isDuplicate' => true ) + $djob->params; + $djob->metadata = $job->metadata; + + return $djob; + } + + public function run() { + return true; + } +} diff --git a/includes/jobqueue/jobs/EmaillingJob.php b/includes/jobqueue/jobs/EmaillingJob.php new file mode 100644 index 00000000..df8ae63e --- /dev/null +++ b/includes/jobqueue/jobs/EmaillingJob.php @@ -0,0 +1,46 @@ +params['to'], + $this->params['from'], + $this->params['subj'], + $this->params['body'], + $this->params['replyto'] + ); + + return $status->isOK(); + } +} diff --git a/includes/jobqueue/jobs/EnotifNotifyJob.php b/includes/jobqueue/jobs/EnotifNotifyJob.php new file mode 100644 index 00000000..1ed99a58 --- /dev/null +++ b/includes/jobqueue/jobs/EnotifNotifyJob.php @@ -0,0 +1,57 @@ +params['editorID'] ) && $this->params['editorID'] ) { + $editor = User::newFromId( $this->params['editorID'] ); + // B/C, only the name might be given. + } else { + # @todo FIXME: newFromName could return false on a badly configured wiki. + $editor = User::newFromName( $this->params['editor'], false ); + } + $enotif->actuallyNotifyOnPageChange( + $editor, + $this->title, + $this->params['timestamp'], + $this->params['summary'], + $this->params['minorEdit'], + $this->params['oldid'], + $this->params['watchers'], + $this->params['pageStatus'] + ); + + return true; + } +} diff --git a/includes/jobqueue/jobs/HTMLCacheUpdateJob.php b/includes/jobqueue/jobs/HTMLCacheUpdateJob.php new file mode 100644 index 00000000..4d1e72c9 --- /dev/null +++ b/includes/jobqueue/jobs/HTMLCacheUpdateJob.php @@ -0,0 +1,162 @@ +) set. + * - b) Jobs to purge caches for a set of titles (the job title is ignored). + * These jobs have have (pages:(:(,),...) set. + * + * @ingroup JobQueue + */ +class HTMLCacheUpdateJob extends Job { + function __construct( $title, $params = '' ) { + parent::__construct( 'htmlCacheUpdate', $title, $params ); + // Base backlink purge jobs can be de-duplicated + $this->removeDuplicates = ( !isset( $params['range'] ) && !isset( $params['pages'] ) ); + } + + function run() { + global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; + + static $expected = array( 'recursive', 'pages' ); // new jobs have one of these + + $oldRangeJob = false; + if ( !array_intersect( array_keys( $this->params ), $expected ) ) { + // B/C for older job params formats that lack these fields: + // a) base jobs with just ("table") and b) range jobs with ("table","start","end") + if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) { + $oldRangeJob = true; + } else { + $this->params['recursive'] = true; // base job + } + } + + // Job to purge all (or a range of) backlink pages for a page + if ( !empty( $this->params['recursive'] ) ) { + // Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title + // jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks + $jobs = BacklinkJobUtils::partitionBacklinkJob( + $this, + $wgUpdateRowsPerJob, + $wgUpdateRowsPerQuery, // jobs-per-title + // Carry over information for de-duplication + array( 'params' => $this->getRootJobParams() ) + ); + JobQueueGroup::singleton()->push( $jobs ); + // Job to purge pages for for a set of titles + } elseif ( isset( $this->params['pages'] ) ) { + $this->invalidateTitles( $this->params['pages'] ); + // B/C for job to purge a range of backlink pages for a given page + } elseif ( $oldRangeJob ) { + $titleArray = $this->title->getBacklinkCache()->getLinks( + $this->params['table'], $this->params['start'], $this->params['end'] ); + + $pages = array(); // same format BacklinkJobUtils uses + foreach ( $titleArray as $tl ) { + $pages[$tl->getArticleId()] = array( $tl->getNamespace(), $tl->getDbKey() ); + } + + $jobs = array(); + foreach ( array_chunk( $pages, $wgUpdateRowsPerJob ) as $pageChunk ) { + $jobs[] = new HTMLCacheUpdateJob( $this->title, + array( + 'table' => $this->params['table'], + 'pages' => $pageChunk + ) + $this->getRootJobParams() // carry over information for de-duplication + ); + } + JobQueueGroup::singleton()->push( $jobs ); + } + + return true; + } + + /** + * @param array $pages Map of (page ID => (namespace, DB key)) entries + */ + protected function invalidateTitles( array $pages ) { + global $wgUpdateRowsPerQuery, $wgUseFileCache, $wgUseSquid; + + // Get all page IDs in this query into an array + $pageIds = array_keys( $pages ); + if ( !$pageIds ) { + return; + } + + $dbw = wfGetDB( DB_MASTER ); + + // The page_touched field will need to be bumped for these pages. + // Only bump it to the present time if no "rootJobTimestamp" was known. + // If it is known, it can be used instead, which avoids invalidating output + // that was in fact generated *after* the relevant dependency change time + // (e.g. template edit). This is particularily useful since refreshLinks jobs + // save back parser output and usually run along side htmlCacheUpdate jobs; + // their saved output would be invalidated by using the current timestamp. + if ( isset( $this->params['rootJobTimestamp'] ) ) { + $touchTimestamp = $this->params['rootJobTimestamp']; + } else { + $touchTimestamp = wfTimestampNow(); + } + + // Update page_touched (skipping pages already touched since the root job). + // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already. + foreach ( array_chunk( $pageIds, $wgUpdateRowsPerQuery ) as $batch ) { + $dbw->update( 'page', + array( 'page_touched' => $dbw->timestamp( $touchTimestamp ) ), + array( 'page_id' => $batch, + // don't invalidated pages that were already invalidated + "page_touched < " . $dbw->addQuotes( $dbw->timestamp( $touchTimestamp ) ) + ), + __METHOD__ + ); + } + // Get the list of affected pages (races only mean something else did the purge) + $titleArray = TitleArray::newFromResult( $dbw->select( + 'page', + array( 'page_namespace', 'page_title' ), + array( 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $touchTimestamp ) ), + __METHOD__ + ) ); + + // Update squid + if ( $wgUseSquid ) { + $u = SquidUpdate::newFromTitles( $titleArray ); + $u->doUpdate(); + } + + // Update file cache + if ( $wgUseFileCache ) { + foreach ( $titleArray as $title ) { + HTMLFileCache::clearFileCache( $title ); + } + } + } + + public function workItemCount() { + return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1; + } +} diff --git a/includes/jobqueue/jobs/NullJob.php b/includes/jobqueue/jobs/NullJob.php new file mode 100644 index 00000000..66291e9d --- /dev/null +++ b/includes/jobqueue/jobs/NullJob.php @@ -0,0 +1,76 @@ +<?php +/** + * Degenerate job that does nothing. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * Degenerate job that does nothing, but can optionally replace itself + * in the queue and/or sleep for a brief time period. These can be used + * to represent "no-op" jobs or test lock contention and performance. + * + * @par Example: + * Inserting a null job in the configured job queue: + * @code + * $ php maintenance/eval.php + * > $queue = JobQueueGroup::singleton(); + * > $job = new NullJob( Title::newMainPage(), array( 'lives' => 10 ) ); + * > $queue->push( $job ); + * @endcode + * You can then confirm the job has been enqueued by using the showJobs.php + * maintenance utility: + * @code + * $ php maintenance/showJobs.php --group + * null: 1 queue; 0 claimed (0 active, 0 abandoned) + * $ + * @endcode + * + * @ingroup JobQueue + */ +class NullJob extends Job { + /** + * @param Title $title + * @param array $params Job parameters (lives, usleep) + */ + function __construct( $title, $params ) { + parent::__construct( 'null', $title, $params ); + if ( !isset( $this->params['lives'] ) ) { + $this->params['lives'] = 1; + } + if ( !isset( $this->params['usleep'] ) ) { + $this->params['usleep'] = 0; + } + $this->removeDuplicates = !empty( $this->params['removeDuplicates'] ); + } + + public function run() { + if ( $this->params['usleep'] > 0 ) { + usleep( $this->params['usleep'] ); + } + if ( $this->params['lives'] > 1 ) { + $params = $this->params; + $params['lives']--; + $job = new self( $this->title, $params ); + JobQueueGroup::singleton()->push( $job ); + } + + return true; + } +} diff --git a/includes/jobqueue/jobs/PublishStashedFileJob.php b/includes/jobqueue/jobs/PublishStashedFileJob.php new file mode 100644 index 00000000..918a392d --- /dev/null +++ b/includes/jobqueue/jobs/PublishStashedFileJob.php @@ -0,0 +1,150 @@ +<?php +/** + * Upload a file from the upload stash into the local file repo. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Upload + */ + +/** + * Upload a file from the upload stash into the local file repo. + * + * @ingroup Upload + */ +class PublishStashedFileJob extends Job { + public function __construct( $title, $params ) { + parent::__construct( 'PublishStashedFile', $title, $params ); + $this->removeDuplicates = true; + } + + public function run() { + $scope = RequestContext::importScopedSession( $this->params['session'] ); + $context = RequestContext::getMain(); + try { + $user = $context->getUser(); + if ( !$user->isLoggedIn() ) { + $this->setLastError( "Could not load the author user from session." ); + + return false; + } + + if ( count( $_SESSION ) === 0 ) { + // Empty session probably indicates that we didn't associate + // with the session correctly. Note that being able to load + // the user does not necessarily mean the session was loaded. + // Most likely cause by suhosin.session.encrypt = On. + $this->setLastError( "Error associating with user session. " . + "Try setting suhosin.session.encrypt = Off" ); + + return false; + } + + UploadBase::setSessionStatus( + $this->params['filekey'], + array( 'result' => 'Poll', 'stage' => 'publish', 'status' => Status::newGood() ) + ); + + $upload = new UploadFromStash( $user ); + // @todo initialize() causes a GET, ideally we could frontload the antivirus + // checks and anything else to the stash stage (which includes concatenation and + // the local file is thus already there). That way, instead of GET+PUT, there could + // just be a COPY operation from the stash to the public zone. + $upload->initialize( $this->params['filekey'], $this->params['filename'] ); + + // Check if the local file checks out (this is generally a no-op) + $verification = $upload->verifyUpload(); + if ( $verification['status'] !== UploadBase::OK ) { + $status = Status::newFatal( 'verification-error' ); + $status->value = array( 'verification' => $verification ); + UploadBase::setSessionStatus( + $this->params['filekey'], + array( 'result' => 'Failure', 'stage' => 'publish', 'status' => $status ) + ); + $this->setLastError( "Could not verify upload." ); + + return false; + } + + // Upload the stashed file to a permanent location + $status = $upload->performUpload( + $this->params['comment'], + $this->params['text'], + $this->params['watch'], + $user + ); + if ( !$status->isGood() ) { + UploadBase::setSessionStatus( + $this->params['filekey'], + array( 'result' => 'Failure', 'stage' => 'publish', 'status' => $status ) + ); + $this->setLastError( $status->getWikiText() ); + + return false; + } + + // Build the image info array while we have the local reference handy + $apiMain = new ApiMain(); // dummy object (XXX) + $imageInfo = $upload->getImageInfo( $apiMain->getResult() ); + + // Cleanup any temporary local file + $upload->cleanupTempFile(); + + // Cache the info so the user doesn't have to wait forever to get the final info + UploadBase::setSessionStatus( + $this->params['filekey'], + array( + 'result' => 'Success', + 'stage' => 'publish', + 'filename' => $upload->getLocalFile()->getName(), + 'imageinfo' => $imageInfo, + 'status' => Status::newGood() + ) + ); + } catch ( MWException $e ) { + UploadBase::setSessionStatus( + $this->params['filekey'], + array( + 'result' => 'Failure', + 'stage' => 'publish', + 'status' => Status::newFatal( 'api-error-publishfailed' ) + ) + ); + $this->setLastError( get_class( $e ) . ": " . $e->getText() ); + // To prevent potential database referential integrity issues. + // See bug 32551. + MWExceptionHandler::rollbackMasterChangesAndLog( $e ); + + return false; + } + + return true; + } + + public function getDeduplicationInfo() { + $info = parent::getDeduplicationInfo(); + if ( is_array( $info['params'] ) ) { + $info['params'] = array( 'filekey' => $info['params']['filekey'] ); + } + + return $info; + } + + public function allowRetries() { + return false; + } +} diff --git a/includes/jobqueue/jobs/RefreshLinksJob.php b/includes/jobqueue/jobs/RefreshLinksJob.php new file mode 100644 index 00000000..f82af273 --- /dev/null +++ b/includes/jobqueue/jobs/RefreshLinksJob.php @@ -0,0 +1,199 @@ +<?php +/** + * Job to update link tables for pages + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup JobQueue + */ + +/** + * Job to update link tables for pages + * + * This job comes in a few variants: + * - a) Recursive jobs to update links for backlink pages for a given title. + * These jobs have have (recursive:true,table:<table>) set. + * - b) Jobs to update links for a set of pages (the job title is ignored). + * These jobs have have (pages:(<page ID>:(<namespace>,<title>),...) set. + * - c) Jobs to update links for a single page (the job title) + * These jobs need no extra fields set. + * + * @ingroup JobQueue + */ +class RefreshLinksJob extends Job { + const PARSE_THRESHOLD_SEC = 1.0; + + function __construct( $title, $params = '' ) { + parent::__construct( 'refreshLinks', $title, $params ); + // Base backlink update jobs and per-title update jobs can be de-duplicated. + // If template A changes twice before any jobs run, a clean queue will have: + // (A base, A base) + // The second job is ignored by the queue on insertion. + // Suppose, many pages use template A, and that template itself uses template B. + // An edit to both will first create two base jobs. A clean FIFO queue will have: + // (A base, B base) + // When these jobs run, the queue will have per-title and remnant partition jobs: + // (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant) + // Some these jobs will be the same, and will automatically be ignored by + // the queue upon insertion. Some title jobs will run before the duplicate is + // inserted, so the work will still be done twice in those cases. More titles + // can be de-duplicated as the remnant jobs continue to be broken down. This + // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks + // and/or the backlink sets for pages A and B are almost identical. + $this->removeDuplicates = !isset( $params['range'] ) + && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 ); + } + + function run() { + global $wgUpdateRowsPerJob; + + // Job to update all (or a range of) backlink pages for a page + if ( !empty( $this->params['recursive'] ) ) { + // Carry over information for de-duplication + $extraParams = $this->getRootJobParams(); + // Avoid slave lag when fetching templates. + // When the outermost job is run, we know that the caller that enqueued it must have + // committed the relevant changes to the DB by now. At that point, record the master + // position and pass it along as the job recursively breaks into smaller range jobs. + // Hopefully, when leaf jobs are popped, the slaves will have reached that position. + if ( isset( $this->params['masterPos'] ) ) { + $extraParams['masterPos'] = $this->params['masterPos']; + } elseif ( wfGetLB()->getServerCount() > 1 ) { + $extraParams['masterPos'] = wfGetLB()->getMasterPos(); + } else { + $extraParams['masterPos'] = false; + } + // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title + // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks + $jobs = BacklinkJobUtils::partitionBacklinkJob( + $this, + $wgUpdateRowsPerJob, + 1, // job-per-title + array( 'params' => $extraParams ) + ); + JobQueueGroup::singleton()->push( $jobs ); + // Job to update link tables for for a set of titles + } elseif ( isset( $this->params['pages'] ) ) { + foreach ( $this->params['pages'] as $pageId => $nsAndKey ) { + list( $ns, $dbKey ) = $nsAndKey; + $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) ); + } + // Job to update link tables for a given title + } else { + $this->runForTitle( $this->title ); + } + + return true; + } + + protected function runForTitle( Title $title = null ) { + $linkCache = LinkCache::singleton(); + $linkCache->clear(); + + if ( is_null( $title ) ) { + $this->setLastError( "refreshLinks: Invalid title" ); + return false; + } + + // Wait for the DB of the current/next slave DB handle to catch up to the master. + // This way, we get the correct page_latest for templates or files that just changed + // milliseconds ago, having triggered this job to begin with. + if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) { + wfGetLB()->waitFor( $this->params['masterPos'] ); + } + + $page = WikiPage::factory( $title ); + + // Fetch the current revision... + $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); + if ( !$revision ) { + $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" ); + return false; // XXX: what if it was just deleted? + } + $content = $revision->getContent( Revision::RAW ); + if ( !$content ) { + // If there is no content, pretend the content is empty + $content = $revision->getContentHandler()->makeEmptyContent(); + } + + $parserOutput = false; + $parserOptions = $page->makeParserOptions( 'canonical' ); + // If page_touched changed after this root job (with a good slave lag skew factor), + // then it is likely that any views of the pages already resulted in re-parses which + // are now in cache. This can be reused to avoid expensive parsing in some cases. + if ( isset( $this->params['rootJobTimestamp'] ) ) { + $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5; + if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { + // Something already updated the backlinks since this job was made + return true; + } + if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { + $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); + if ( $parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp ) { + $parserOutput = false; // too stale + } + } + } + // Fetch the current revision and parse it if necessary... + if ( $parserOutput == false ) { + $start = microtime( true ); + // Revision ID must be passed to the parser output to get revision variables correct + $parserOutput = $content->getParserOutput( + $title, $revision->getId(), $parserOptions, false ); + $ellapsed = microtime( true ) - $start; + // If it took a long time to render, then save this back to the cache to avoid + // wasted CPU by other apaches or job runners. We don't want to always save to + // cache as this cause cause high cache I/O and LRU churn when a template changes. + if ( $ellapsed >= self::PARSE_THRESHOLD_SEC + && $page->isParserCacheUsed( $parserOptions, $revision->getId() ) + && $parserOutput->isCacheable() + ) { + $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time + ParserCache::singleton()->save( + $parserOutput, $page, $parserOptions, $ctime, $revision->getId() + ); + } + } + + $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput ); + DataUpdate::runUpdates( $updates ); + + InfoAction::invalidateCache( $title ); + + return true; + } + + public function getDeduplicationInfo() { + $info = parent::getDeduplicationInfo(); + if ( is_array( $info['params'] ) ) { + // Don't let highly unique "masterPos" values ruin duplicate detection + unset( $info['params']['masterPos'] ); + // For per-pages jobs, the job title is that of the template that changed + // (or similar), so remove that since it ruins duplicate detection + if ( isset( $info['pages'] ) ) { + unset( $info['namespace'] ); + unset( $info['title'] ); + } + } + + return $info; + } + + public function workItemCount() { + return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1; + } +} diff --git a/includes/jobqueue/jobs/RefreshLinksJob2.php b/includes/jobqueue/jobs/RefreshLinksJob2.php new file mode 100644 index 00000000..97405aeb --- /dev/null +++ b/includes/jobqueue/jobs/RefreshLinksJob2.php @@ -0,0 +1,141 @@ +<?php +/** + * Job to update links for a given title. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup JobQueue + */ + +/** + * Background job to update links for titles in certain backlink range by page ID. + * Newer version for high use templates. This is deprecated by RefreshLinksPartitionJob. + * + * @ingroup JobQueue + * @deprecated since 1.23 + */ +class RefreshLinksJob2 extends Job { + function __construct( $title, $params ) { + parent::__construct( 'refreshLinks2', $title, $params ); + // Base jobs for large templates can easily be de-duplicated + $this->removeDuplicates = !isset( $params['start'] ) && !isset( $params['end'] ); + } + + /** + * Run a refreshLinks2 job + * @return bool Success + */ + function run() { + global $wgUpdateRowsPerJob; + + $linkCache = LinkCache::singleton(); + $linkCache->clear(); + + if ( is_null( $this->title ) ) { + $this->error = "refreshLinks2: Invalid title"; + return false; + } + + // Back compat for pre-r94435 jobs + $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks'; + + // Avoid slave lag when fetching templates. + // When the outermost job is run, we know that the caller that enqueued it must have + // committed the relevant changes to the DB by now. At that point, record the master + // position and pass it along as the job recursively breaks into smaller range jobs. + // Hopefully, when leaf jobs are popped, the slaves will have reached that position. + if ( isset( $this->params['masterPos'] ) ) { + $masterPos = $this->params['masterPos']; + } elseif ( wfGetLB()->getServerCount() > 1 ) { + $masterPos = wfGetLB()->getMasterPos(); + } else { + $masterPos = false; + } + + $tbc = $this->title->getBacklinkCache(); + + $jobs = array(); // jobs to insert + if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) { + # This is a partition job to trigger the insertion of leaf jobs... + $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) ); + } else { + # This is a base job to trigger the insertion of partitioned jobs... + if ( $tbc->getNumLinks( $table, $wgUpdateRowsPerJob + 1 ) <= $wgUpdateRowsPerJob ) { + # Just directly insert the single per-title jobs + $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) ); + } else { + # Insert the partition jobs to make per-title jobs + foreach ( $tbc->partition( $table, $wgUpdateRowsPerJob ) as $batch ) { + list( $start, $end ) = $batch; + $jobs[] = new RefreshLinksJob2( $this->title, + array( + 'table' => $table, + 'start' => $start, + 'end' => $end, + 'masterPos' => $masterPos, + ) + $this->getRootJobParams() // carry over information for de-duplication + ); + } + } + } + + if ( count( $jobs ) ) { + JobQueueGroup::singleton()->push( $jobs ); + } + + return true; + } + + /** + * @param string $table + * @param mixed $masterPos + * @return array + */ + protected function getSingleTitleJobs( $table, $masterPos ) { + # The "start"/"end" fields are not set for the base jobs + $start = isset( $this->params['start'] ) ? $this->params['start'] : false; + $end = isset( $this->params['end'] ) ? $this->params['end'] : false; + $titles = $this->title->getBacklinkCache()->getLinks( $table, $start, $end ); + # Convert into single page refresh links jobs. + # This handles well when in sapi mode and is useful in any case for job + # de-duplication. If many pages use template A, and that template itself + # uses template B, then an edit to both will create many duplicate jobs. + # Roughly speaking, for each page, one of the "RefreshLinksJob" jobs will + # get run first, and when it does, it will remove the duplicates. Of course, + # one page could have its job popped when the other page's job is still + # buried within the logic of a refreshLinks2 job. + $jobs = array(); + foreach ( $titles as $title ) { + $jobs[] = new RefreshLinksJob( $title, + array( 'masterPos' => $masterPos ) + $this->getRootJobParams() + ); // carry over information for de-duplication + } + return $jobs; + } + + /** + * @return array + */ + public function getDeduplicationInfo() { + $info = parent::getDeduplicationInfo(); + // Don't let highly unique "masterPos" values ruin duplicate detection + if ( is_array( $info['params'] ) ) { + unset( $info['params']['masterPos'] ); + } + return $info; + } +} diff --git a/includes/jobqueue/jobs/UploadFromUrlJob.php b/includes/jobqueue/jobs/UploadFromUrlJob.php new file mode 100644 index 00000000..a09db15a --- /dev/null +++ b/includes/jobqueue/jobs/UploadFromUrlJob.php @@ -0,0 +1,187 @@ +<?php +/** + * Job for asynchronous upload-by-url. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup JobQueue + */ + +/** + * Job for asynchronous upload-by-url. + * + * This job is in fact an interface to UploadFromUrl, which is designed such + * that it does not require any globals. If it does, fix it elsewhere, do not + * add globals in here. + * + * @ingroup JobQueue + */ +class UploadFromUrlJob extends Job { + const SESSION_KEYNAME = 'wsUploadFromUrlJobData'; + + /** @var UploadFromUrl */ + public $upload; + + /** @var User */ + protected $user; + + public function __construct( $title, $params ) { + parent::__construct( 'uploadFromUrl', $title, $params ); + } + + public function run() { + global $wgCopyUploadAsyncTimeout; + # Initialize this object and the upload object + $this->upload = new UploadFromUrl(); + $this->upload->initialize( + $this->title->getText(), + $this->params['url'], + false + ); + $this->user = User::newFromName( $this->params['userName'] ); + + # Fetch the file + $opts = array(); + if ( $wgCopyUploadAsyncTimeout ) { + $opts['timeout'] = $wgCopyUploadAsyncTimeout; + } + $status = $this->upload->fetchFile( $opts ); + if ( !$status->isOk() ) { + $this->leaveMessage( $status ); + + return true; + } + + # Verify upload + $result = $this->upload->verifyUpload(); + if ( $result['status'] != UploadBase::OK ) { + $status = $this->upload->convertVerifyErrorToStatus( $result ); + $this->leaveMessage( $status ); + + return true; + } + + # Check warnings + if ( !$this->params['ignoreWarnings'] ) { + $warnings = $this->upload->checkWarnings(); + if ( $warnings ) { + + # Stash the upload + $key = $this->upload->stashFile(); + + // @todo FIXME: This has been broken for a while. + // User::leaveUserMessage() does not exist. + if ( $this->params['leaveMessage'] ) { + $this->user->leaveUserMessage( + wfMessage( 'upload-warning-subj' )->text(), + wfMessage( 'upload-warning-msg', + $key, + $this->params['url'] )->text() + ); + } else { + wfSetupSession( $this->params['sessionId'] ); + $this->storeResultInSession( 'Warning', + 'warnings', $warnings ); + session_write_close(); + } + + return true; + } + } + + # Perform the upload + $status = $this->upload->performUpload( + $this->params['comment'], + $this->params['pageText'], + $this->params['watch'], + $this->user + ); + $this->leaveMessage( $status ); + + return true; + } + + /** + * Leave a message on the user talk page or in the session according to + * $params['leaveMessage']. + * + * @param Status $status + */ + protected function leaveMessage( $status ) { + if ( $this->params['leaveMessage'] ) { + if ( $status->isGood() ) { + // @todo FIXME: user->leaveUserMessage does not exist. + $this->user->leaveUserMessage( wfMessage( 'upload-success-subj' )->text(), + wfMessage( 'upload-success-msg', + $this->upload->getTitle()->getText(), + $this->params['url'] + )->text() ); + } else { + // @todo FIXME: user->leaveUserMessage does not exist. + $this->user->leaveUserMessage( wfMessage( 'upload-failure-subj' )->text(), + wfMessage( 'upload-failure-msg', + $status->getWikiText(), + $this->params['url'] + )->text() ); + } + } else { + wfSetupSession( $this->params['sessionId'] ); + if ( $status->isOk() ) { + $this->storeResultInSession( 'Success', + 'filename', $this->upload->getLocalFile()->getName() ); + } else { + $this->storeResultInSession( 'Failure', + 'errors', $status->getErrorsArray() ); + } + session_write_close(); + } + } + + /** + * Store a result in the session data. Note that the caller is responsible + * for appropriate session_start and session_write_close calls. + * + * @param string $result The result (Success|Warning|Failure) + * @param string $dataKey The key of the extra data + * @param mixed $dataValue The extra data itself + */ + protected function storeResultInSession( $result, $dataKey, $dataValue ) { + $session =& self::getSessionData( $this->params['sessionKey'] ); + $session['result'] = $result; + $session[$dataKey] = $dataValue; + } + + /** + * Initialize the session data. Sets the intial result to queued. + */ + public function initializeSessionData() { + $session =& self::getSessionData( $this->params['sessionKey'] ); + $$session['result'] = 'Queued'; + } + + /** + * @param string $key + * @return mixed + */ + public static function &getSessionData( $key ) { + if ( !isset( $_SESSION[self::SESSION_KEYNAME][$key] ) ) { + $_SESSION[self::SESSION_KEYNAME][$key] = array(); + } + + return $_SESSION[self::SESSION_KEYNAME][$key]; + } +} -- cgit v1.2.3-54-g00ecf