From 222b01f5169f1c7e69762e0e8904c24f78f71882 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 28 Jul 2010 11:52:48 +0200 Subject: update to MediaWiki 1.16.0 --- maintenance/importImages.php | 168 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 133 insertions(+), 35 deletions(-) (limited to 'maintenance/importImages.php') diff --git a/maintenance/importImages.php b/maintenance/importImages.php index 7997b0d5..f0dd388a 100644 --- a/maintenance/importImages.php +++ b/maintenance/importImages.php @@ -2,17 +2,25 @@ /** * Maintenance script to import one or more images from the local file system into - * the wiki without using the web-based interface + * the wiki without using the web-based interface. + * + * "Smart import" additions: + * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki + * - process: + * - interface with the source wiki, don't use bare files only (see --source-wiki-url). + * - fetch metadata from source wiki for each file to import. + * - commit the fetched metadata to the destination wiki while submitting. * * @file * @ingroup Maintenance * @author Rob Church + * @author Mij */ -$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license' ); -require_once( 'commandLine.inc' ); -require_once( 'importImages.inc.php' ); -$added = $skipped = $overwritten = 0; +$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' ); +require_once( dirname(__FILE__) . '/commandLine.inc' ); +require_once( dirname(__FILE__) . '/importImages.inc' ); +$processed = $added = $ignored = $skipped = $overwritten = $failed = 0; echo( "Import Images\n\n" ); @@ -25,7 +33,7 @@ if( count( $args ) > 0 ) { if (isset($options['protect']) && isset($options['unprotect'])) die("Cannot specify both protect and unprotect. Only 1 is allowed.\n"); - if ($options['protect'] == 1) +if (isset($options['protect']) && $options['protect'] == 1) die("You must specify a protection option.\n"); # Prepare the list of allowed extensions @@ -45,8 +53,27 @@ if( count( $args ) > 0 ) { $user = User::newFromName( 'Maintenance script' ); $wgUser = $user; + # Get block check. If a value is given, this specified how often the check is performed + if ( isset( $options['check-userblock'] ) ) { + if ( !$options['check-userblock'] ) $checkUserBlock = 1; + else $checkUserBlock = (int)$options['check-userblock']; + } else { + $checkUserBlock = false; + } + + # Get --from + $from = @$options['from']; + + # Get sleep time. + $sleep = @$options['sleep']; + if ( $sleep ) $sleep = (int)$sleep; + + # Get limit number + $limit = @$options['limit']; + if ( $limit ) $limit = (int)$limit; + # Get the upload comment - $comment = 'Importing image file'; + $comment = NULL; if ( isset( $options['comment-file'] ) ) { $comment = file_get_contents( $options['comment-file'] ); @@ -76,6 +103,23 @@ if( count( $args ) > 0 ) { continue; } + if ( $from ) { + if ( $from == $title->getDBkey() ) { + $from = NULL; + } else { + $ignored++; + continue; + } + } + + if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) { + $user->clearInstanceCache( 'name' ); //reload from DB! + if ( $user->isBlocked() ) { + echo( $user->getName() . " was blocked! Aborting.\n" ); + break; + } + } + # Check existence $image = wfLocalFile( $title ); if( $image->exists() ) { @@ -88,36 +132,73 @@ if( count( $args ) > 0 ) { continue; } } else { - echo( "Importing {$base}..." ); - $svar = 'added'; - } + if ( isset( $options['skip-dupes'] ) ) { + $repo = $image->getRepo(); + $sha1 = File::sha1Base36( $file ); #XXX: we end up calculating this again when actually uploading. that sucks. - # Find comment text - $commentText = false; + $dupes = $repo->findBySha1( $sha1 ); - if ( $commentExt ) { - $f = findAuxFile( $file, $commentExt ); - if ( !$f ) { - echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " ); - } else { - $commentText = file_get_contents( $f ); - if ( !$f ) { - echo( " Failed to load comment file {$f}, using default comment. " ); + if ( $dupes ) { + echo( "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n" ); + $skipped++; + continue; } } - } - if ( !$commentText ) { - $commentText = $comment; + echo( "Importing {$base}..." ); + $svar = 'added'; } + if (isset( $options['source-wiki-url'])) { + /* find comment text directly from source wiki, through MW's API */ + $real_comment = getFileCommentFromSourceWiki($options['source-wiki-url'], $base); + if ($real_comment === false) + $commentText = $comment; + else + $commentText = $real_comment; + + /* find user directly from source wiki, through MW's API */ + $real_user = getFileUserFromSourceWiki($options['source-wiki-url'], $base); + if ($real_user === false) { + $wgUser = $user; + } else { + $wgUser = User::newFromName($real_user); + if ($wgUser === false) { + # user does not exist in target wiki + echo ("failed: user '$real_user' does not exist in target wiki."); + continue; + } + } + } else { + # Find comment text + $commentText = false; + + if ( $commentExt ) { + $f = findAuxFile( $file, $commentExt ); + if ( !$f ) { + echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " ); + } else { + $commentText = file_get_contents( $f ); + if ( !$f ) { + echo( " Failed to load comment file {$f}, using default comment. " ); + } + } + } + + if ( !$commentText ) { + $commentText = $comment; + } + } + + # Import the file if ( isset( $options['dry'] ) ) { - echo( " publishing {$file}... " ); + echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " ); } else { $archive = $image->publish( $file ); if( WikiError::isError( $archive ) || !$archive->isGood() ) { echo( "failed.\n" ); + $failed++; continue; } } @@ -141,7 +222,6 @@ if( count( $args ) > 0 ) { } - $$svar++; if ( isset( $options['dry'] ) ) { echo( "done.\n" ); } else if ( $image->recordUpload( $archive->value, $commentText, $license ) ) { @@ -164,14 +244,24 @@ if( count( $args ) > 0 ) { } else { echo( "failed.\n" ); + $svar = 'failed'; } + $$svar++; + $processed++; + + if ( $limit && $processed >= $limit ) + break; + + if ( $sleep ) + sleep( $sleep ); } # Print out some statistics echo( "\n" ); - foreach( array( 'count' => 'Found', 'added' => 'Added', - 'skipped' => 'Skipped', 'overwritten' => 'Overwritten' ) as $var => $desc ) { + foreach( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored', + 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten', + 'failed' => 'Failed' ) as $var => $desc ) { if( $$var > 0 ) echo( "{$desc}: {$$var}\n" ); } @@ -184,14 +274,14 @@ if( count( $args ) > 0 ) { showUsage(); } -exit(); +exit(0); function showUsage( $reason = false ) { if( $reason ) { echo( $reason . "\n" ); } - echo << @@ -199,17 +289,25 @@ USAGE: php importImages.php [options] Options: --extensions= Comma-separated list of allowable extensions, defaults to \$wgFileExtensions ---overwrite Overwrite existing images if a conflicting-named image is found +--overwrite Overwrite existing images with the same name (default is to skip them) +--limit= Limit the number of images to process. Ignored or skipped images are not counted. +--from= Ignore all files until the one with the given name. Useful for resuming + aborted imports. should be the file's canonical database form. +--skip-dupes Skip images that were already uploaded under a different name (check SHA1) +--sleep= Sleep between files. Useful mostly for debugging. --user= Set username of uploader, default 'Maintenance script' ---comment= Set upload summary comment, default 'Importing image file' +--check-userblock Check if the user got blocked during import. +--comment= Set upload summary comment, default 'Importing image file'. --comment-file= Set upload summary comment the the content of . --comment-ext= Causes the comment for each file to be loaded from a file with the same name - but the extension . + but the extension . If a global comment is also given, it is appended. --license= Use an optional license template --dry Dry run, don't import anything --protect= Specify the protect value (autoconfirmed,sysop) --unprotect Unprotects all uploaded images +--source-wiki-url if specified, take User and Comment data for each imported file from this URL. + For example, --source-wiki-url="http://en.wikipedia.org/" -END; - exit(); -} \ No newline at end of file +TEXT; + exit(1); +} -- cgit v1.2.3-54-g00ecf