diff options
Diffstat (limited to 'maintenance/dumpHTML.php')
-rw-r--r-- | maintenance/dumpHTML.php | 99 |
1 files changed, 54 insertions, 45 deletions
diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php index 37a46465..5e347e4b 100644 --- a/maintenance/dumpHTML.php +++ b/maintenance/dumpHTML.php @@ -9,20 +9,25 @@ * Usage: * php dumpHTML.php [options...] * - * -d <dest> destination directory - * -s <start> start ID - * -e <end> end ID - * -k <skin> skin to use (defaults to dumphtml) - * --images only do image description pages - * --categories only do category pages - * --redirects only do redirects - * --special only do miscellaneous stuff - * --force-copy copy commons instead of symlink, needed for Wikimedia - * --interlang allow interlanguage links + * -d <dest> destination directory + * -s <start> start ID + * -e <end> end ID + * -k <skin> skin to use (defaults to htmldump) + * --no-overwrite skip existing HTML files + * --checkpoint <file> use a checkpoint file to allow restarting of interrupted dumps + * --slice <n/m> split the job into m segments and do the n'th one + * --images only do image description pages + * --categories only do category pages + * --redirects only do redirects + * --special only do miscellaneous stuff + * --force-copy copy commons instead of symlink, needed for Wikimedia + * --interlang allow interlanguage links + * --image-snapshot copy all images used to the destination directory + * --compress generate compressed version of the html pages */ -$optionsWithArgs = array( 's', 'd', 'e', 'k' ); +$optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice' ); $profiling = false; @@ -40,7 +45,6 @@ require_once( "commandLine.inc" ); require_once( "dumpHTML.inc" ); error_reporting( E_ALL & (~E_NOTICE) ); -define( 'CHUNK_SIZE', 50 ); if ( !empty( $options['s'] ) ) { $start = $options['s']; @@ -58,10 +62,22 @@ if ( !empty( $options['e'] ) ) { if ( !empty( $options['d'] ) ) { $dest = $options['d']; } else { - $dest = 'static'; + $dest = "$IP/static"; } -$skin = isset( $options['k'] ) ? $options['k'] : 'dumphtml'; +$skin = isset( $options['k'] ) ? $options['k'] : 'htmldump'; + +if ( $options['slice'] ) { + $bits = explode( '/', $options['slice'] ); + if ( count( $bits ) != 2 || $bits[0] < 1 || $bits[0] > $bits[1] ) { + print "Invalid slice specification"; + exit; + } + $sliceNumerator = $bits[0]; + $sliceDenominator = $bits[1]; +} else { + $sliceNumerator = $sliceDenominator = 1; +} $wgHTMLDump = new DumpHTML( array( 'dest' => $dest, @@ -69,6 +85,14 @@ $wgHTMLDump = new DumpHTML( array( 'alternateScriptPath' => $options['interlang'], 'interwiki' => $options['interlang'], 'skin' => $skin, + 'makeSnapshot' => $options['image-snapshot'], + 'checkpointFile' => $options['checkpoint'], + 'startID' => $start, + 'endID' => $end, + 'sliceNumerator' => $sliceNumerator, + 'sliceDenominator' => $sliceDenominator, + 'noOverwrite' => $options['no-overwrite'], + 'compress' => $options['compress'], )); @@ -81,47 +105,32 @@ if ( $options['special'] ) { } elseif ( $options['redirects'] ) { $wgHTMLDump->doRedirects(); } else { - print("Creating static HTML dump in directory $dest. \n". - "Starting from page_id $start of $end.\n"); - + print "Creating static HTML dump in directory $dest. \n"; $dbr =& wfGetDB( DB_SLAVE ); $server = $dbr->getProperty( 'mServer' ); print "Using database {$server}\n"; - $wgHTMLDump->doArticles( $start, $end ); if ( !isset( $options['e'] ) ) { - $wgHTMLDump->doImageDescriptions(); - $wgHTMLDump->doCategories(); - $wgHTMLDump->doSpecials(); - } - - /* - if ( $end - $start > CHUNK_SIZE * 2 ) { - // Split the problem into smaller chunks, run them in different PHP instances - // This is a memory/resource leak workaround - print("Creating static HTML dump in directory $dest. \n". - "Starting from page_id $start of $end.\n"); - - chdir( "maintenance" ); - for ( $chunkStart = $start; $chunkStart < $end; $chunkStart += CHUNK_SIZE ) { - $chunkEnd = $chunkStart + CHUNK_SIZE - 1; - if ( $chunkEnd > $end ) { - $chunkEnd = $end; - } - passthru( "php dumpHTML.php -d " . wfEscapeShellArg( $dest ) . " -s $chunkStart -e $chunkEnd" ); - } - chdir( ".." ); - $d->doImageDescriptions(); - $d->doCategories(); - $d->doMainPage( $dest ); + $wgHTMLDump->doEverything(); } else { - $d->doArticles( $start, $end ); + $wgHTMLDump->doArticles(); } - */ } if ( isset( $options['debug'] ) ) { - print_r($GLOBALS); + #print_r($GLOBALS); + # Workaround for bug #36957 + $globals = array_keys( $GLOBALS ); + #sort( $globals ); + $sizes = array(); + foreach ( $globals as $name ) { + $sizes[$name] = strlen( serialize( $GLOBALS[$name] ) ); + } + arsort($sizes); + $sizes = array_slice( $sizes, 0, 20 ); + foreach ( $sizes as $name => $size ) { + printf( "%9d %s\n", $size, $name ); + } } if ( $profiling ) { |