diff options
Diffstat (limited to 'maintenance/namespaceDupes.php')
-rw-r--r-- | maintenance/namespaceDupes.php | 190 |
1 files changed, 167 insertions, 23 deletions
diff --git a/maintenance/namespaceDupes.php b/maintenance/namespaceDupes.php index 96e01fe4..088f6776 100644 --- a/maintenance/namespaceDupes.php +++ b/maintenance/namespaceDupes.php @@ -39,9 +39,12 @@ class NamespaceConflictChecker extends Maintenance { */ protected $db; - private $resolvableCount = 0; + private $resolvablePages = 0; private $totalPages = 0; + private $resolvableLinks = 0; + private $totalLinks = 0; + public function __construct() { parent::__construct(); $this->mDescription = ""; @@ -172,7 +175,43 @@ class NamespaceConflictChecker extends Maintenance { } $this->output( "{$this->totalPages} pages to fix, " . - "{$this->resolvableCount} were resolvable.\n" ); + "{$this->resolvablePages} were resolvable.\n\n" ); + + foreach ( $spaces as $name => $ns ) { + if ( $ns != 0 ) { + // Fix up link destinations for non-interwiki links only. + // + // For example if a page has [[Foo:Bar]] and then a Foo namespace + // is introduced, pagelinks needs to be updated to have + // page_namespace = NS_FOO. + // + // If instead an interwiki prefix was introduced called "Foo", + // the link should instead be moved to the iwlinks table. If a new + // language is introduced called "Foo", or if there is a pagelink + // [[fr:Bar]] when interlanguage magic links are turned on, the + // link would have to be moved to the langlinks table. Let's put + // those cases in the too-hard basket for now. The consequences are + // not especially severe. + // + // @fixme Handle interwiki links, and pagelinks to Category:, File: + // which probably need reparsing. + + $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options ); + $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options ); + + // The redirect table has interwiki links randomly mixed in, we + // need to filter those out. For example [[w:Foo:Bar]] would + // have rd_interwiki=w and rd_namespace=0, which would match the + // query for a conflicting namespace "Foo" if filtering wasn't done. + $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options, + array( 'rd_interwiki' => null ) ); + $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options, + array( 'rd_interwiki' => '' ) ); + } + } + + $this->output( "{$this->totalLinks} links to fix, " . + "{$this->resolvableLinks} were resolvable.\n" ); return $ok; } @@ -215,7 +254,8 @@ class NamespaceConflictChecker extends Maintenance { // Find the new title and determine the action to take - $newTitle = $this->getDestinationTitle( $ns, $name, $row, $options ); + $newTitle = $this->getDestinationTitle( $ns, $name, + $row->page_namespace, $row->page_title, $options ); $logStatus = false; if ( !$newTitle ) { $logStatus = 'invalid title'; @@ -271,27 +311,102 @@ class NamespaceConflictChecker extends Maintenance { $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" ); if ( $options['fix'] ) { - $pageOK = $this->mergePage( $row->page_id, $newTitle ); + $pageOK = $this->mergePage( $row, $newTitle ); } break; } if ( $pageOK ) { - $this->resolvableCount++; + $this->resolvablePages++; } else { $ok = false; } } - // @fixme Also needs to do like self::getTargetList() on the - // *_namespace and *_title fields of pagelinks, templatelinks, and - // redirects, and schedule a LinksUpdate job or similar for each found - // *_from. - return $ok; } /** + * Check and repair the destination fields in a link table + * @param string $table The link table name + * @param string $fieldPrefix The field prefix in the link table + * @param int $ns Destination namespace id + * @param string $name + * @param array $options Associative array of validated command-line options + * @param array $extraConds Extra conditions for the SQL query + */ + private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, + $extraConds = array() + ) { + $batchConds = array(); + $fromField = "{$fieldPrefix}_from"; + $namespaceField = "{$fieldPrefix}_namespace"; + $titleField = "{$fieldPrefix}_title"; + $batchSize = 500; + while ( true ) { + $res = $this->db->select( + $table, + array( $fromField, $namespaceField, $titleField ), + array_merge( $batchConds, $extraConds, array( + $namespaceField => 0, + $titleField . $this->db->buildLike( "$name:", $this->db->anyString() ) + ) ), + __METHOD__, + array( + 'ORDER BY' => array( $titleField, $fromField ), + 'LIMIT' => $batchSize + ) + ); + + if ( $res->numRows() == 0 ) { + break; + } + foreach ( $res as $row ) { + $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " . + "dbk={$row->$titleField}"; + $destTitle = $this->getDestinationTitle( $ns, $name, + $row->$namespaceField, $row->$titleField, $options ); + $this->totalLinks++; + if ( !$destTitle ) { + $this->output( "$table $logTitle *** INVALID\n" ); + continue; + } + $this->resolvableLinks++; + if ( !$options['fix'] ) { + $this->output( "$table $logTitle -> " . + $destTitle->getPrefixedDBkey() . " DRY RUN\n" ); + continue; + } + + $this->db->update( $table, + // SET + array( + $namespaceField => $destTitle->getNamespace(), + $titleField => $destTitle->getDBkey() + ), + // WHERE + array( + $namespaceField => 0, + $titleField => $row->$titleField, + $fromField => $row->$fromField + ), + __METHOD__ + ); + $this->output( "$table $logTitle -> " . + $destTitle->getPrefixedDBkey() . "\n" ); + } + $encLastTitle = $this->db->addQuotes( $row->$titleField ); + $encLastFrom = $this->db->addQuotes( $row->$fromField ); + + $batchConds = array( + "$titleField > $encLastTitle " . + "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ); + + wfWaitForSlaves(); + } + } + + /** * Move the given pseudo-namespace, either replacing the colon with a hyphen * (useful for pseudo-namespaces that conflict with interwiki links) or move * them to another namespace if specified. @@ -338,21 +453,22 @@ class NamespaceConflictChecker extends Maintenance { } /** - * Get the preferred destination title for a given target page row. + * Get the preferred destination title for a given target page. * @param integer $ns The destination namespace ID * @param string $name The conflicting prefix - * @param stdClass $row + * @param integer $sourceNs The source namespace + * @param integer $sourceDbk The source DB key (i.e. page_title) * @param array $options Associative array of validated command-line options * @return Title|false */ - private function getDestinationTitle( $ns, $name, $row, $options ) { - $dbk = substr( $row->page_title, strlen( "$name:" ) ); + private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) { + $dbk = substr( $sourceDbk, strlen( "$name:" ) ); if ( $ns == 0 ) { // An interwiki; try an alternate encoding with '-' for ':' $dbk = "$name-" . $dbk; } $destNS = $ns; - if ( $row->page_namespace == NS_TALK && MWNamespace::isSubject( $ns ) ) { + if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) { // This is an associated talk page moved with the --move-talk feature. $destNS = MWNamespace::getTalk( $destNS ); } @@ -392,8 +508,6 @@ class NamespaceConflictChecker extends Maintenance { /** * Move a page * - * @fixme Update pl_from_namespace etc. - * * @param integer $id The page_id * @param Title $newTitle The new title * @return bool @@ -409,8 +523,20 @@ class NamespaceConflictChecker extends Maintenance { ), __METHOD__ ); - // @fixme Needs updating the *_from_namespace fields in categorylinks, - // pagelinks, templatelinks and imagelinks. + // Update *_from_namespace in links tables + $fromNamespaceTables = array( + array( 'pagelinks', 'pl' ), + array( 'templatelinks', 'tl' ), + array( 'imagelinks', 'il' ) ); + foreach ( $fromNamespaceTables as $tableInfo ) { + list( $table, $fieldPrefix ) = $tableInfo; + $this->db->update( $table, + // SET + array( "{$fieldPrefix}_from_namespace" => $newTitle->getNamespace() ), + // WHERE + array( "{$fieldPrefix}_from" => $id ), + __METHOD__ ); + } return true; } @@ -444,7 +570,17 @@ class NamespaceConflictChecker extends Maintenance { * @param integer $id The page_id * @param Title $newTitle The new title */ - private function mergePage( $id, Title $newTitle ) { + private function mergePage( $row, Title $newTitle ) { + $id = $row->page_id; + + // Construct the WikiPage object we will need later, while the + // page_id still exists. Note that this cannot use makeTitleSafe(), + // we are deliberately constructing an invalid title. + $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); + $sourceTitle->resetArticleID( $id ); + $wikiPage = new WikiPage( $sourceTitle ); + $wikiPage->loadPageData( 'fromdbmaster' ); + $destId = $newTitle->getArticleId(); $this->db->begin( __METHOD__ ); $this->db->update( 'revision', @@ -456,10 +592,18 @@ class NamespaceConflictChecker extends Maintenance { $this->db->delete( 'page', array( 'page_id' => $id ), __METHOD__ ); - // @fixme Need WikiPage::doDeleteUpdates() or similar to avoid orphan - // rows in the links tables. - + // Call LinksDeletionUpdate to delete outgoing links from the old title, + // and update category counts. + // + // Calling external code with a fake broken Title is a fairly dubious + // idea. It's necessary because it's quite a lot of code to duplicate, + // but that also makes it fragile since it would be easy for someone to + // accidentally introduce an assumption of title validity to the code we + // are calling. + $update = new LinksDeletionUpdate( $wikiPage ); + $update->doUpdate(); $this->db->commit( __METHOD__ ); + return true; } } |