From 9db190c7e736ec8d063187d4241b59feaf7dc2d1 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 22 Jun 2011 11:28:20 +0200 Subject: update to MediaWiki 1.17.0 --- maintenance/storage/checkStorage.php | 74 +++++++++++++++---------------- maintenance/storage/compressOld.inc | 21 +++++---- maintenance/storage/compressOld.php | 20 ++++----- maintenance/storage/dumpRev.php | 16 +++---- maintenance/storage/fixBug20757.php | 43 +++++++++++------- maintenance/storage/moveToExternal.php | 30 +++++-------- maintenance/storage/orphanStats.php | 13 +++--- maintenance/storage/recompressTracked.php | 70 ++++++++++++++--------------- maintenance/storage/resolveStubs.php | 21 ++++----- maintenance/storage/storageTypeStats.php | 7 ++- maintenance/storage/testCompression.php | 12 ++--- maintenance/storage/trackBlobs.php | 56 ++++++++++++++++++++--- 12 files changed, 211 insertions(+), 172 deletions(-) (limited to 'maintenance/storage') diff --git a/maintenance/storage/checkStorage.php b/maintenance/storage/checkStorage.php index 245c2fec..c288d682 100644 --- a/maintenance/storage/checkStorage.php +++ b/maintenance/storage/checkStorage.php @@ -9,7 +9,7 @@ define( 'CONCAT_HEADER', 'O:27:"concatenatedgziphistoryblob"' ); if ( !defined( 'MEDIAWIKI' ) ) { - require_once( dirname(__FILE__) . '/../commandLine.inc' ); + require_once( dirname( __FILE__ ) . '/../commandLine.inc' ); $cs = new CheckStorage; $fix = isset( $options['fix'] ); @@ -22,7 +22,7 @@ if ( !defined( 'MEDIAWIKI' ) ) { } -//---------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------- /** * @ingroup Maintenance ExternalStorage @@ -37,7 +37,7 @@ class CheckStorage { 'unfixable' => 'Unexpected errors with no automated fixing method', 'fixed' => 'Errors already fixed', 'fixable' => 'Errors which would already be fixed if --fix was specified', - ); + ); function check( $fix = false, $xml = '' ) { $fname = 'checkStorage'; @@ -63,14 +63,14 @@ class CheckStorage { for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) { $chunkEnd = $chunkStart + $chunkSize - 1; - //print "$chunkStart of $maxRevId\n"; + // print "$chunkStart of $maxRevId\n"; // Fetch revision rows $this->oldIdMap = array(); - $dbr->ping(); - $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ), + $dbr->ping(); + $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ), array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { + foreach ( $res as $row ) { $this->oldIdMap[$row->rev_id] = $row->rev_text_id; } $dbr->freeResult( $res ); @@ -83,9 +83,9 @@ class CheckStorage { $missingTextRows = array_flip( $this->oldIdMap ); $externalRevs = array(); $objectRevs = array(); - $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ), + $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ), 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { + foreach ( $res as $row ) { $flags = $row->old_flags; $id = $row->old_id; @@ -116,7 +116,7 @@ class CheckStorage { if ( $fix ) { $this->error( 'fixed', "Warning: old_flags set to 0", $id ); $dbw->ping(); - $dbw->update( 'text', array( 'old_flags' => '' ), + $dbw->update( 'text', array( 'old_flags' => '' ), array( 'old_id' => $id ), $fname ); echo "Fixed\n"; } else { @@ -137,15 +137,15 @@ class CheckStorage { $externalConcatBlobs = array(); $externalNormalBlobs = array(); if ( count( $externalRevs ) ) { - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { + foreach ( $res as $row ) { $urlParts = explode( '://', $row->old_text, 2 ); if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) { $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id ); continue; } - list( $proto, $path ) = $urlParts; + list( $proto, ) = $urlParts; if ( $proto != 'DB' ) { $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id ); continue; @@ -164,7 +164,7 @@ class CheckStorage { // Check external concat blobs for the right header $this->checkExternalConcatBlobs( $externalConcatBlobs ); - + // Check external normal blobs for existence if ( count( $externalNormalBlobs ) ) { if ( is_null( $this->dbStore ) ) { @@ -174,10 +174,10 @@ class CheckStorage { $blobIds = array_keys( $xBlobIds ); $extDb =& $this->dbStore->getSlave( $cluster ); $blobsTable = $this->dbStore->getTable( $extDb ); - $res = $extDb->select( $blobsTable, - array( 'blob_id' ), + $res = $extDb->select( $blobsTable, + array( 'blob_id' ), array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname ); - while ( $row = $extDb->fetchObject( $res ) ) { + foreach ( $res as $row ) { unset( $xBlobIds[$row->blob_id] ); } $extDb->freeResult( $res ); @@ -194,9 +194,9 @@ class CheckStorage { $curIds = array(); if ( count( $objectRevs ) ) { $headerLength = 300; - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { + foreach ( $res as $row ) { $oldId = $row->old_id; $matches = array(); if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) { @@ -245,9 +245,9 @@ class CheckStorage { $externalConcatBlobs = array(); if ( count( $concatBlobs ) ) { $headerLength = 300; - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ), array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { + foreach ( $res as $row ) { $flags = explode( ',', $row->old_flags ); if ( in_array( 'external', $flags ) ) { // Concat blob is in external storage? @@ -261,7 +261,7 @@ class CheckStorage { if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) { $externalConcatBlobs[$cluster][$id] = array(); } - $externalConcatBlobs[$cluster][$id] = array_merge( + $externalConcatBlobs[$cluster][$id] = array_merge( $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id] ); } @@ -270,7 +270,7 @@ class CheckStorage { $concatBlobs[$row->old_id] ); } } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) { - $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}", + $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}", $concatBlobs[$row->old_id] ); } # else good @@ -286,7 +286,7 @@ class CheckStorage { } print "\n\nErrors:\n"; - foreach( $this->errors as $name => $errors ) { + foreach ( $this->errors as $name => $errors ) { if ( count( $errors ) ) { $description = $this->errorDescriptions[$name]; echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n"; @@ -323,7 +323,7 @@ class CheckStorage { foreach ( $ids as $id ) { $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) ); } - print "$msg in text rows " . implode( ', ', $ids ) . + print "$msg in text rows " . implode( ', ', $ids ) . ", revisions " . implode( ', ', $revIds ) . "\n"; } else { $id = $ids; @@ -346,18 +346,18 @@ class CheckStorage { if ( is_null( $this->dbStore ) ) { $this->dbStore = new ExternalStoreDB; } - + foreach ( $externalConcatBlobs as $cluster => $oldIds ) { $blobIds = array_keys( $oldIds ); $extDb =& $this->dbStore->getSlave( $cluster ); $blobsTable = $this->dbStore->getTable( $extDb ); $headerLength = strlen( CONCAT_HEADER ); - $res = $extDb->select( $blobsTable, - array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ), + $res = $extDb->select( $blobsTable, + array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ), array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname ); - while ( $row = $extDb->fetchObject( $res ) ) { + foreach ( $res as $row ) { if ( strcasecmp( $row->header, CONCAT_HEADER ) ) { - $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL", + $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL", $oldIds[$row->blob_id] ); } unset( $oldIds[$row->blob_id] ); @@ -383,7 +383,7 @@ class CheckStorage { $revFileName = "$wgTmpDirectory/broken-revlist-$wgDBname"; $filteredXmlFileName = "$wgTmpDirectory/filtered-$wgDBname.xml"; - + // Write revision list if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) { echo "Error writing revision list, can't restore text\n"; @@ -393,8 +393,8 @@ class CheckStorage { // Run mwdumper echo "Filtering XML dump...\n"; $exitStatus = 0; - passthru( 'mwdumper ' . - wfEscapeShellArg( + passthru( 'mwdumper ' . + wfEscapeShellArg( "--output=file:$filteredXmlFileName", "--filter=revlist:$revFileName", $xml @@ -416,7 +416,7 @@ class CheckStorage { $dbw = wfGetDB( DB_MASTER ); $dbr->ping(); $dbw->ping(); - + $source = new ImportStreamSource( $file ); $importer = new WikiImporter( $source ); $importer->setRevisionCallback( array( &$this, 'importRevision' ) ); @@ -429,8 +429,8 @@ class CheckStorage { $id = $revision->getID(); $text = $revision->getText(); if ( $text === '' ) { - // This is what happens if the revision was broken at the time the - // dump was made. Unfortunately, it also happens if the revision was + // This is what happens if the revision was broken at the time the + // dump was made. Unfortunately, it also happens if the revision was // legitimately blank, so there's no way to tell the difference. To // be safe, we'll skip it and leave it broken $id = $id ? $id : ''; @@ -457,7 +457,7 @@ class CheckStorage { // Update the text row $dbw = wfGetDB( DB_MASTER ); - $dbw->update( 'text', + $dbw->update( 'text', array( 'old_flags' => $flags, 'old_text' => $text ), array( 'old_id' => $oldId ), $fname, array( 'LIMIT' => 1 ) diff --git a/maintenance/storage/compressOld.inc b/maintenance/storage/compressOld.inc index 981cfda5..93be5f75 100644 --- a/maintenance/storage/compressOld.inc +++ b/maintenance/storage/compressOld.inc @@ -18,12 +18,11 @@ function compressOldPages( $start = 0, $extdb = '' ) { break; } $last = $start; - while( $row = $dbw->fetchObject( $res ) ) { + foreach ( $res as $row ) { # print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n"; compressPage( $row, $extdb ); $last = $row->old_id; } - $dbw->freeResult( $res ); $start = $last + 1; # Deletion may leave long empty stretches print "$start...\n"; } while( true ); @@ -67,7 +66,7 @@ define( 'LS_INDIVIDUAL', 0 ); define( 'LS_CHUNKED', 1 ); /** @todo document */ -function compressWithConcat( $startId, $maxChunkSize, $beginDate, +function compressWithConcat( $startId, $maxChunkSize, $beginDate, $endDate, $extdb="", $maxPageId = false ) { $fname = 'compressWithConcat'; @@ -94,12 +93,12 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate, $pageConds[] = 'page_namespace<>0'; } if ( $queryExtra ) { - $pageConds[] = $queryExtra; + $pageConds[] = $queryExtra; } */ # For each article, get a list of revisions which fit the criteria - + # No recompression, use a condition on old_flags # Don't compress object type entities, because that might produce data loss when # overwriting bulk storage concat rows. Don't compress external references, because @@ -142,10 +141,10 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate, wfWaitForSlaves( 5 ); # Wake up - $dbr->ping(); + $dbr->ping(); # Get the page row - $pageRes = $dbr->select( 'page', + $pageRes = $dbr->select( 'page', array('page_id', 'page_namespace', 'page_title','page_latest'), $pageConds + array('page_id' => $pageId), $fname ); if ( $dbr->numRows( $pageRes ) == 0 ) { @@ -159,10 +158,10 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate, # Load revisions $revRes = $dbw->select( $tables, $fields, - array_merge( array( - 'rev_page' => $pageRow->page_id, + array_merge( array( + 'rev_page' => $pageRow->page_id, # Don't operate on the current revision - # Use < instead of <> in case the current revision has changed + # Use < instead of <> in case the current revision has changed # since the page select, which wasn't locking 'rev_id < ' . $pageRow->page_latest ), $conds ), @@ -170,7 +169,7 @@ function compressWithConcat( $startId, $maxChunkSize, $beginDate, $revLoadOptions ); $revs = array(); - while ( $revRow = $dbw->fetchObject( $revRes ) ) { + foreach ( $revRes as $revRow ) { $revs[] = $revRow; } diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php index 7ff102a5..bc05b340 100644 --- a/maintenance/storage/compressOld.php +++ b/maintenance/storage/compressOld.php @@ -25,10 +25,10 @@ */ $optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid', 'e' ); -require_once( dirname(__FILE__) . '/../commandLine.inc' ); +require_once( dirname( __FILE__ ) . '/../commandLine.inc' ); require_once( "compressOld.inc" ); -if( !function_exists( "gzdeflate" ) ) { +if ( !function_exists( "gzdeflate" ) ) { print "You must enable zlib support in PHP to compress old revisions!\n"; print "Please see http://www.php.net/manual/en/ref.zlib.php\n\n"; wfDie(); @@ -39,9 +39,9 @@ $defaults = array( 'c' => 20, 's' => 0, 'b' => '', - 'e' => '', - 'extdb' => '', - 'endid' => false, + 'e' => '', + 'extdb' => '', + 'endid' => false, ); $options = $options + $defaults; @@ -51,15 +51,15 @@ if ( $options['t'] != 'concat' && $options['t'] != 'gzip' ) { } if ( $options['extdb'] != '' ) { - print "Compressing database $wgDBname to external cluster {$options['extdb']}\n" . str_repeat('-', 76) . "\n\n"; + print "Compressing database $wgDBname to external cluster {$options['extdb']}\n" . str_repeat( '-', 76 ) . "\n\n"; } else { - print "Compressing database $wgDBname\n" . str_repeat('-', 76) . "\n\n"; + print "Compressing database $wgDBname\n" . str_repeat( '-', 76 ) . "\n\n"; } $success = true; if ( $options['t'] == 'concat' ) { - $success = compressWithConcat( $options['s'], $options['c'], $options['b'], - $options['e'], $options['extdb'], $options['endid'] ); + $success = compressWithConcat( $options['s'], $options['c'], $options['b'], + $options['e'], $options['extdb'], $options['endid'] ); } else { compressOldPages( $options['s'], $options['extdb'] ); } @@ -68,6 +68,6 @@ if ( $success ) { print "Done.\n"; } -exit(0); +exit( 0 ); diff --git a/maintenance/storage/dumpRev.php b/maintenance/storage/dumpRev.php index 95404244..b200d8af 100644 --- a/maintenance/storage/dumpRev.php +++ b/maintenance/storage/dumpRev.php @@ -18,7 +18,7 @@ * @ingroup Maintenance ExternalStorage */ -require_once( dirname(__FILE__) . '/../Maintenance.php' ); +require_once( dirname( __FILE__ ) . '/../Maintenance.php' ); class DumpRev extends Maintenance { public function __construct() { @@ -28,15 +28,15 @@ class DumpRev extends Maintenance { public function execute() { $dbr = wfGetDB( DB_SLAVE ); - $row = $dbr->selectRow( - array( 'text', 'revision' ), - array( 'old_flags', 'old_text' ), + $row = $dbr->selectRow( + array( 'text', 'revision' ), + array( 'old_flags', 'old_text' ), array( 'old_id=rev_text_id', 'rev_id' => $this->getArg() ) ); if ( !$row ) { $this->error( "Row not found", true ); } - + $flags = explode( ',', $row->old_flags ); $text = $row->old_text; if ( in_array( 'external', $flags ) ) { @@ -65,15 +65,15 @@ class DumpRev extends Maintenance { $obj = unserialize( $text ); $text = $obj->getText(); } - + if ( is_object( $text ) ) { $this->error( "Unexpectedly got object of type: " . get_class( $text ) ); } else { - $this->output( "Text length: " . strlen( $text ) ."\n" ); + $this->output( "Text length: " . strlen( $text ) . "\n" ); $this->output( substr( $text, 0, 100 ) . "\n" ); } } } $maintClass = "DumpRev"; -require_once( DO_MAINTENANCE ); +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/storage/fixBug20757.php b/maintenance/storage/fixBug20757.php index 922d4725..4aac1202 100644 --- a/maintenance/storage/fixBug20757.php +++ b/maintenance/storage/fixBug20757.php @@ -14,7 +14,7 @@ class FixBug20757 extends Maintenance { $this->addOption( 'dry-run', 'Report only' ); $this->addOption( 'start', 'old_id to start at', false, true ); } - + function execute() { $dbr = wfGetDB( DB_SLAVE ); $dbw = wfGetDB( DB_MASTER ); @@ -31,19 +31,29 @@ class FixBug20757 extends Maintenance { $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); + if ( $dbr->getType() == 'mysql' + && version_compare( $dbr->getServerVersion(), '4.1.0', '>=' ) ) + { + // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function + $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))'; + } else { + // No CONVERT() in MySQL 4.0 + $lowerLeft = 'LOWER(LEFT(old_text,22))'; + } + while ( true ) { print "ID: $startId / $totalRevs\r"; $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), - array( + array( 'old_id > ' . intval( $startId ), 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', - 'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', + "$lowerLeft = 'o:15:\"historyblobstub\"'", ), __METHOD__, - array( + array( 'ORDER BY' => 'old_id', 'LIMIT' => $this->batchSize, ) @@ -68,7 +78,7 @@ class FixBug20757 extends Maintenance { } if ( !is_object( $obj ) ) { - print "{$row->old_id}: unrecoverable: unserialized to type " . + print "{$row->old_id}: unrecoverable: unserialized to type " . gettype( $obj ) . ", possible double-serialization\n"; ++$numBad; continue; @@ -120,22 +130,21 @@ class FixBug20757 extends Maintenance { } // Process the stubs - $stubsToFix = array(); foreach ( $stubs as $primaryId => $stub ) { $secondaryId = $stub['secondaryId']; if ( !isset( $trackedBlobs[$secondaryId] ) ) { // No tracked blob. Work out what went wrong - $secondaryRow = $dbr->selectRow( - 'text', + $secondaryRow = $dbr->selectRow( + 'text', array( 'old_flags', 'old_text' ), - array( 'old_id' => $secondaryId ), + array( 'old_id' => $secondaryId ), __METHOD__ ); if ( !$secondaryRow ) { print "$primaryId: unrecoverable: secondary row is missing\n"; ++$numBad; } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { - // Not broken yet, and not in the tracked clusters so it won't get + // Not broken yet, and not in the tracked clusters so it won't get // broken by the current RCT run. ++$numGood; } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { @@ -196,7 +205,7 @@ class FixBug20757 extends Maintenance { __METHOD__ ); - // Add a blob_tracking row so that the new reference can be recompressed + // Add a blob_tracking row so that the new reference can be recompressed // without needing to run trackBlobs.php again $dbw->insert( 'blob_tracking', array( @@ -255,7 +264,7 @@ class FixBug20757 extends Maintenance { $dbr = wfGetDB( DB_SLAVE ); $map = array(); - $res = $dbr->select( 'revision', + $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ), array( 'rev_page' => $pageId ), __METHOD__ @@ -276,7 +285,7 @@ class FixBug20757 extends Maintenance { function isUnbrokenStub( $stub, $secondaryRow ) { $flags = explode( ',', $secondaryRow->old_flags ); $text = $secondaryRow->old_text; - if( in_array( 'external', $flags ) ) { + if ( in_array( 'external', $flags ) ) { $url = $text; @list( /* $proto */ , $path ) = explode( '://', $url, 2 ); if ( $path == "" ) { @@ -284,17 +293,17 @@ class FixBug20757 extends Maintenance { } $text = ExternalStore::fetchFromUrl( $url ); } - if( !in_array( 'object', $flags ) ) { + if ( !in_array( 'object', $flags ) ) { return false; } - if( in_array( 'gzip', $flags ) ) { + if ( in_array( 'gzip', $flags ) ) { $obj = unserialize( gzinflate( $text ) ); } else { $obj = unserialize( $text ); } - if( !is_object( $obj ) ) { + if ( !is_object( $obj ) ) { // Correct for old double-serialization bug. $obj = unserialize( $obj ); } @@ -310,5 +319,5 @@ class FixBug20757 extends Maintenance { } $maintClass = 'FixBug20757'; -require_once( DO_MAINTENANCE ); +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php index dc11856a..928cbf97 100644 --- a/maintenance/storage/moveToExternal.php +++ b/maintenance/storage/moveToExternal.php @@ -9,10 +9,8 @@ define( 'REPORTING_INTERVAL', 1 ); if ( !defined( 'MEDIAWIKI' ) ) { - $optionsWithArgs = array( 'e', 's' ); - - require_once( dirname(__FILE__) . '/../commandLine.inc' ); - require_once( 'ExternalStoreDB.php' ); + require_once( dirname( __FILE__ ) . '/../commandLine.inc' ); + require_once( dirname( __FILE__ ) . '/../../includes/ExternalStoreDB.php' ); require_once( 'resolveStubs.php' ); $fname = 'moveToExternal'; @@ -35,8 +33,6 @@ if ( !defined( 'MEDIAWIKI' ) ) { moveToExternal( $cluster, $maxID, $minID ); } - - function moveToExternal( $cluster, $maxID, $minID = 1 ) { $fname = 'moveToExternal'; $dbw = wfGetDB( DB_MASTER ); @@ -48,23 +44,22 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) { print "Moving text rows from $minID to $maxID to external storage\n"; $ext = new ExternalStoreDB; $numMoved = 0; - $numStubs = 0; - + for ( $block = 0; $block < $numBlocks; $block++ ) { $blockStart = $block * $blockSize + $minID; $blockEnd = $blockStart + $blockSize - 1; - - if ( !($block % REPORTING_INTERVAL) ) { + + if ( !( $block % REPORTING_INTERVAL ) ) { print "oldid=$blockStart, moved=$numMoved\n"; wfWaitForSlaves( 2 ); } - + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ), array( "old_id BETWEEN $blockStart AND $blockEnd", 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), ), $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { + foreach ( $res as $row ) { # Resolve stubs $text = $row->old_text; $id = $row->old_id; @@ -73,13 +68,13 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) { } else { $flags = "{$row->old_flags},external"; } - + if ( strpos( $flags, 'object' ) !== false ) { $obj = unserialize( $text ); $className = strtolower( get_class( $obj ) ); if ( $className == 'historyblobstub' ) { - #resolveStub( $id, $row->old_text, $row->old_flags ); - #$numStubs++; + # resolveStub( $id, $row->old_text, $row->old_flags ); + # $numStubs++; continue; } elseif ( $className == 'historyblobcurstub' ) { $text = gzdeflate( $obj->getText() ); @@ -99,8 +94,8 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) { continue; } - #print "Storing " . strlen( $text ) . " bytes to $url\n"; - #print "old_id=$id\n"; + # print "Storing " . strlen( $text ) . " bytes to $url\n"; + # print "old_id=$id\n"; $url = $ext->store( $cluster, $text ); if ( !$url ) { @@ -112,7 +107,6 @@ function moveToExternal( $cluster, $maxID, $minID = 1 ) { array( 'old_id' => $id ), $fname ); $numMoved++; } - $dbr->freeResult( $res ); } } diff --git a/maintenance/storage/orphanStats.php b/maintenance/storage/orphanStats.php index 63f9025b..f30f07e4 100644 --- a/maintenance/storage/orphanStats.php +++ b/maintenance/storage/orphanStats.php @@ -20,7 +20,7 @@ * * @ingroup Maintenance ExternalStorage */ -require_once( dirname(__FILE__) . '/../Maintenance.php' ); +require_once( dirname( __FILE__ ) . '/../Maintenance.php' ); class OrphanStats extends Maintenance { public function __construct() { @@ -34,13 +34,12 @@ class OrphanStats extends Maintenance { } public function execute() { - $extDBs = array(); $dbr = wfGetDB( DB_SLAVE ); - if( !$dbr->tableExists( 'blob_orphans' ) ) { + if ( !$dbr->tableExists( 'blob_orphans' ) ) { $this->error( "blob_orphans doesn't seem to exist, need to run trackBlobs.php first", true ); } $res = $dbr->select( 'blob_orphans', '*', false, __METHOD__ ); - + $num = 0; $totalSize = 0; $hashes = array(); @@ -49,7 +48,7 @@ class OrphanStats extends Maintenance { foreach ( $res as $boRow ) { $extDB = $this->getDB( $boRow->bo_cluster ); $blobRow = $extDB->selectRow( 'blobs', '*', array( 'blob_id' => $boRow->bo_blob_id ), __METHOD__ ); - + $num++; $size = strlen( $blobRow->blob_text ); $totalSize += $size; @@ -61,11 +60,11 @@ class OrphanStats extends Maintenance { $this->output( "Number of orphans: $num\n" ); if ( $num > 0 ) { $this->output( "Average size: " . round( $totalSize / $num, 0 ) . " bytes\n" . - "Max size: $maxSize\n" . + "Max size: $maxSize\n" . "Number of unique texts: " . count( $hashes ) . "\n" ); } } } $maintClass = "OrphanStats"; -require_once( DO_MAINTENANCE ); +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/storage/recompressTracked.php b/maintenance/storage/recompressTracked.php index e43dbe5c..8974a74d 100644 --- a/maintenance/storage/recompressTracked.php +++ b/maintenance/storage/recompressTracked.php @@ -1,14 +1,14 @@ [... ...] Moves blobs indexed by trackBlobs.php to a specified list of destination clusters, and recompresses them in the process. Restartable. -Options: - --procs Set the number of child processes (default 1) +Options: + --procs Set the number of child processes (default 1) --copy-only Copy only, do not update the text table. Restart without this option to complete. --debug-log Log debugging data to the specified file --info-log Log progress messages to the specified file @@ -99,7 +99,7 @@ class RecompressTracked { } function logToFile( $msg, $file ) { - $header = '[' . date('d\TH:i:s') . '] ' . wfHostname() . ' ' . posix_getpid(); + $header = '[' . date( 'd\TH:i:s' ) . '] ' . wfHostname() . ' ' . posix_getpid(); if ( $this->slaveId !== false ) { $header .= "({$this->slaveId})"; } @@ -109,8 +109,8 @@ class RecompressTracked { /** * Wait until the selected slave has caught up to the master. - * This allows us to use the slave for things that were committed in a - * previous part of this batch process. + * This allows us to use the slave for things that were committed in a + * previous part of this batch process. */ function syncDBs() { $dbw = wfGetDB( DB_MASTER ); @@ -179,14 +179,14 @@ class RecompressTracked { $cmd .= " --$cmdOption"; } } - $cmd .= ' --child' . + $cmd .= ' --child' . ' --wiki ' . wfEscapeShellArg( wfWikiID() ) . ' ' . call_user_func_array( 'wfEscapeShellArg', $this->destClusters ); $this->slavePipes = $this->slaveProcs = array(); for ( $i = 0; $i < $this->numProcs; $i++ ) { $pipes = false; - $spec = array( + $spec = array( array( 'pipe', 'r' ), array( 'file', 'php://stdout', 'w' ), array( 'file', 'php://stderr', 'w' ) @@ -228,7 +228,7 @@ class RecompressTracked { function dispatch( /*...*/ ) { $args = func_get_args(); $pipes = $this->slavePipes; - $numPipes = stream_select( $x=array(), $pipes, $y=array(), 3600 ); + $numPipes = stream_select( $x = array(), $pipes, $y = array(), 3600 ); if ( !$numPipes ) { $this->critical( "Error waiting to write to slaves. Aborting" ); exit( 1 ); @@ -264,8 +264,8 @@ class RecompressTracked { if ( $this->noCount ) { $numPages = '[unknown]'; } else { - $numPages = $dbr->selectField( 'blob_tracking', - 'COUNT(DISTINCT bt_page)', + $numPages = $dbr->selectField( 'blob_tracking', + 'COUNT(DISTINCT bt_page)', # A condition is required so that this query uses the index array( 'bt_moved' => 0 ), __METHOD__ @@ -277,15 +277,15 @@ class RecompressTracked { $this->info( "Moving pages..." ); } while ( true ) { - $res = $dbr->select( 'blob_tracking', + $res = $dbr->select( 'blob_tracking', array( 'bt_page' ), - array( + array( 'bt_moved' => 0, 'bt_page > ' . $dbr->addQuotes( $startId ) ), __METHOD__, - array( - 'DISTINCT', + array( + 'DISTINCT', 'ORDER BY' => 'bt_page', 'LIMIT' => $this->batchSize, ) @@ -330,8 +330,8 @@ class RecompressTracked { if ( $this->noCount ) { $numOrphans = '[unknown]'; } else { - $numOrphans = $dbr->selectField( 'blob_tracking', - 'COUNT(DISTINCT bt_text_id)', + $numOrphans = $dbr->selectField( 'blob_tracking', + 'COUNT(DISTINCT bt_text_id)', array( 'bt_moved' => 0, 'bt_page' => 0 ), __METHOD__ ); if ( !$numOrphans ) { @@ -440,8 +440,8 @@ class RecompressTracked { $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); while ( true ) { - $res = $dbr->select( - array( 'blob_tracking', 'text' ), + $res = $dbr->select( + array( 'blob_tracking', 'text' ), '*', array( 'bt_page' => $pageId, @@ -451,7 +451,7 @@ class RecompressTracked { 'bt_text_id=old_id', ), __METHOD__, - array( + array( 'ORDER BY' => 'bt_text_id', 'LIMIT' => $this->batchSize ) @@ -496,7 +496,7 @@ class RecompressTracked { * * This is done in a single transaction to provide restartable behaviour * without data loss. - * + * * The transaction is kept short to reduce locking. */ function moveTextRow( $textId, $url ) { @@ -536,16 +536,16 @@ class RecompressTracked { $dbr = wfGetDB( DB_SLAVE ); $startId = 0; - $conds = array_merge( $conds, array( + $conds = array_merge( $conds, array( 'bt_moved' => 0, 'bt_new_url IS NOT NULL' - )); + ) ); while ( true ) { $res = $dbr->select( 'blob_tracking', '*', array_merge( $conds, array( 'bt_text_id > ' . $dbr->addQuotes( $startId ) ) ), __METHOD__, - array( + array( 'ORDER BY' => 'bt_text_id', 'LIMIT' => $this->batchSize, ) @@ -592,17 +592,17 @@ class RecompressTracked { $this->finishIncompleteMoves( array( 'bt_text_id' => $textIds ) ); $this->syncDBs(); } - + $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); $res = wfGetDB( DB_SLAVE )->select( - array( 'text', 'blob_tracking' ), - array( 'old_id', 'old_text', 'old_flags' ), - array( + array( 'text', 'blob_tracking' ), + array( 'old_id', 'old_text', 'old_flags' ), + array( 'old_id' => $textIds, 'bt_text_id=old_id', 'bt_moved' => 0, - ), + ), __METHOD__, array( 'DISTINCT' ) ); @@ -610,10 +610,10 @@ class RecompressTracked { foreach ( $res as $row ) { $text = Revision::getRevisionText( $row ); if ( $text === false ) { - $this->critical( "Error: cannot load revision text for old_id=$textId" ); + $this->critical( "Error: cannot load revision text for old_id={$row->old_id}" ); continue; } - + if ( !$trx->addItem( $text, $row->old_id ) ) { $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); $trx->commit(); @@ -625,7 +625,7 @@ class RecompressTracked { $trx->commit(); } - /** + /** * Wait for slaves (quietly) */ function waitForSlaves() { @@ -704,14 +704,14 @@ class CgzCopyTransaction { // Check to see if the target text_ids have been moved already. // - // We originally read from the slave, so this can happen when a single - // text_id is shared between multiple pages. It's rare, but possible + // We originally read from the slave, so this can happen when a single + // text_id is shared between multiple pages. It's rare, but possible // if a delete/move/undelete cycle splits up a null edit. // // We do a locking read to prevent closer-run race conditions. $dbw = wfGetDB( DB_MASTER ); $dbw->begin(); - $res = $dbw->select( 'blob_tracking', + $res = $dbw->select( 'blob_tracking', array( 'bt_text_id', 'bt_moved' ), array( 'bt_text_id' => array_keys( $this->referrers ) ), __METHOD__, array( 'FOR UPDATE' ) ); diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php index 346151e9..2269e37f 100644 --- a/maintenance/storage/resolveStubs.php +++ b/maintenance/storage/resolveStubs.php @@ -9,7 +9,7 @@ define( 'REPORTING_INTERVAL', 100 ); if ( !defined( 'MEDIAWIKI' ) ) { $optionsWithArgs = array( 'm' ); - require_once( dirname(__FILE__) . '/../commandLine.inc' ); + require_once( dirname( __FILE__ ) . '/../commandLine.inc' ); resolveStubs(); } @@ -28,22 +28,19 @@ function resolveStubs() { for ( $b = 0; $b < $numBlocks; $b++ ) { wfWaitForSlaves( 2 ); - + printf( "%5.2f%%\n", $b / $numBlocks * 100 ); - $start = intval($maxID / $numBlocks) * $b + 1; - $end = intval($maxID / $numBlocks) * ($b + 1); - + $start = intval( $maxID / $numBlocks ) * $b + 1; + $end = intval( $maxID / $numBlocks ) * ( $b + 1 ); + $res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ), "old_id>=$start AND old_id<=$end " . - "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". - 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', + "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' " . + 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', $fname ); - while ( $row = $dbr->fetchObject( $res ) ) { + foreach ( $res as $row ) { resolveStub( $row->old_id, $row->old_text, $row->old_flags ); } - $dbr->freeResult( $res ); - - } print "100%\n"; } @@ -84,7 +81,7 @@ function resolveStub( $id, $stubText, $flags ) { } # Update the row - #print "oldid=$id\n"; + # print "oldid=$id\n"; $dbw->update( 'text', array( /* SET */ 'old_flags' => $newFlags, diff --git a/maintenance/storage/storageTypeStats.php b/maintenance/storage/storageTypeStats.php index 85858620..be86c531 100644 --- a/maintenance/storage/storageTypeStats.php +++ b/maintenance/storage/storageTypeStats.php @@ -1,6 +1,6 @@ $flagStats ) { foreach ( $flagStats as $class => $entry ) { - printf( $format, $flags, $class, $entry['count'], + printf( $format, $flags, $class, $entry['count'], sprintf( "%-13d - %-13d", $entry['first'], $entry['last'] ) ); } } @@ -94,5 +93,5 @@ SQL; } $maintClass = 'StorageTypeStats'; -require_once( DO_MAINTENANCE ); +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/maintenance/storage/testCompression.php b/maintenance/storage/testCompression.php index 9c96c9f8..e2718325 100644 --- a/maintenance/storage/testCompression.php +++ b/maintenance/storage/testCompression.php @@ -1,7 +1,7 @@ ] [--start=] [--limit=] \n"; @@ -26,10 +26,10 @@ $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryB $dbr = wfGetDB( DB_SLAVE ); -$res = $dbr->select( +$res = $dbr->select( array( 'page', 'revision', 'text' ), '*', - array( + array( 'page_namespace' => $title->getNamespace(), 'page_title' => $title->getDBkey(), 'page_id=rev_page', @@ -56,9 +56,9 @@ foreach ( $res as $row ) { $serialized = serialize( $blob ); $t += microtime( true ); -#print_r( $blob->mDiffMap ); +# print_r( $blob->mDiffMap ); -printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n", +printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n", $type, count( $hashes ), $uncompressedSize / strlen( $serialized ), @@ -73,7 +73,7 @@ foreach ( $keys as $id => $key ) { $text = $blob->getItem( $key ); if ( md5( $text ) != $hashes[$id] ) { echo "Content hash mismatch for rev_id $id\n"; - #var_dump( $text ); + # var_dump( $text ); } } $t += microtime( true ); diff --git a/maintenance/storage/trackBlobs.php b/maintenance/storage/trackBlobs.php index 63327d53..15aeec3b 100644 --- a/maintenance/storage/trackBlobs.php +++ b/maintenance/storage/trackBlobs.php @@ -1,6 +1,6 @@ checkIntegrity(); $this->initTrackingTable(); $this->trackRevisions(); $this->trackOrphanText(); @@ -43,6 +44,47 @@ class TrackBlobs { } } + function checkIntegrity() { + echo "Doing integrity check...\n"; + $dbr = wfGetDB( DB_SLAVE ); + + // Scan for HistoryBlobStub objects in the text table (bug 20757) + + $exists = $dbr->selectField( 'text', 1, + 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' . + 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', + __METHOD__ + ); + + if ( $exists ) { + echo "Integrity check failed: found HistoryBlobStub objects in your text table.\n" . + "This script could destroy these objects if it continued. Run resolveStubs.php\n" . + "to fix this.\n"; + exit( 1 ); + } + + // Scan the archive table for HistoryBlobStub objects or external flags (bug 22624) + $flags = $dbr->selectField( 'archive', 'ar_flags', + 'ar_flags LIKE \'%external%\' OR (' . + 'ar_flags LIKE \'%object%\' ' . + 'AND LOWER(CONVERT(LEFT(ar_text,22) USING latin1)) = \'o:15:"historyblobstub"\' )', + __METHOD__ + ); + + if ( strpos( $flags, 'external' ) !== false ) { + echo "Integrity check failed: found external storage pointers in your archive table.\n" . + "Run normaliseArchiveTable.php to fix this.\n"; + exit( 1 ); + } elseif ( $flags ) { + echo "Integrity check failed: found HistoryBlobStub objects in your archive table.\n" . + "These objects are probably already broken, continuing would make them\n" . + "unrecoverable. Run \"normaliseArchiveTable.php --fix-cgz-bug\" to fix this.\n"; + exit( 1 ); + } + + echo "Integrity check OK\n"; + } + function initTrackingTable() { $dbw = wfGetDB( DB_MASTER ); if ( $dbw->tableExists( 'blob_tracking' ) ) { @@ -170,9 +212,9 @@ class TrackBlobs { # Scan the text table for orphan text while ( true ) { - $res = $dbr->select( array( 'text', 'blob_tracking' ), + $res = $dbr->select( array( 'text', 'blob_tracking' ), array( 'old_id', 'old_flags', 'old_text' ), - array( + array( 'old_id>' . $dbr->addQuotes( $startId ), $textClause, 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), @@ -181,7 +223,7 @@ class TrackBlobs { __METHOD__, array( 'ORDER BY' => 'old_id', - 'LIMIT' => $this->batchSize + 'LIMIT' => $this->batchSize ), array( 'blob_tracking' => array( 'LEFT JOIN', 'bt_text_id=old_id' ) ) ); @@ -275,8 +317,8 @@ class TrackBlobs { // Build a bitmap of actual blob rows while ( true ) { - $res = $extDB->select( $table, - array( 'blob_id' ), + $res = $extDB->select( $table, + array( 'blob_id' ), array( 'blob_id > ' . $extDB->addQuotes( $startId ) ), __METHOD__, array( 'LIMIT' => $this->batchSize, 'ORDER BY' => 'blob_id' ) @@ -301,7 +343,7 @@ class TrackBlobs { // Find actual blobs that weren't tracked by the previous passes // This is a set-theoretic difference A \ B, or in bitwise terms, A & ~B $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) ); - + // Traverse the orphan list $insertBatch = array(); $id = 0; -- cgit v1.2.3-54-g00ecf